Files
acceptance-tests
data
docs
external
Newtonsoft.Json
api-doc-tools
api-snapshot
aspnetwebstack
bdwgc
binary-reference-assemblies
bockbuild
boringssl
cecil
cecil-legacy
corefx
corert
helix-binaries
ikdasm
ikvm
illinker-test-assets
linker
llvm-project
clang
clang-tools-extra
compiler-rt
eng
libcxx
libcxxabi
libunwind
lld
lldb
llvm
bindings
cmake
docs
examples
include
lib
Analysis
AsmParser
BinaryFormat
Bitcode
CodeGen
DebugInfo
Demangle
ExecutionEngine
FuzzMutate
Fuzzer
IR
IRReader
LTO
LineEditor
Linker
MC
Object
ObjectYAML
Option
Passes
ProfileData
Support
Unix
Windows
AMDGPUMetadata.cpp
APFloat.cpp.REMOVED.git-id
APInt.cpp
APSInt.cpp
ARMAttributeParser.cpp
ARMBuildAttrs.cpp
ARMWinEH.cpp
Allocator.cpp
Atomic.cpp
BinaryStreamError.cpp
BinaryStreamReader.cpp
BinaryStreamRef.cpp
BinaryStreamWriter.cpp
BlockFrequency.cpp
BranchProbability.cpp
CMakeLists.txt
COM.cpp
COPYRIGHT.regex
CachePruning.cpp
Chrono.cpp
CodeGenCoverage.cpp
CommandLine.cpp
Compression.cpp
ConvertUTF.cpp
ConvertUTFWrapper.cpp
CrashRecoveryContext.cpp
DAGDeltaAlgorithm.cpp
DataExtractor.cpp
Debug.cpp
DebugCounter.cpp
DeltaAlgorithm.cpp
DynamicLibrary.cpp
Errno.cpp
Error.cpp
ErrorHandling.cpp
FileOutputBuffer.cpp
FileUtilities.cpp
FoldingSet.cpp
FormatVariadic.cpp
FormattedStream.cpp
GlobPattern.cpp
GraphWriter.cpp
Hashing.cpp
Host.cpp
IntEqClasses.cpp
IntervalMap.cpp
JamCRC.cpp
KnownBits.cpp
LEB128.cpp
LLVMBuild.txt
LineIterator.cpp
Locale.cpp
LockFileManager.cpp
LowLevelType.cpp
MD5.cpp
ManagedStatic.cpp
MathExtras.cpp
Memory.cpp
MemoryBuffer.cpp
Mutex.cpp
NativeFormatting.cpp
Options.cpp
Parallel.cpp
Path.cpp
PluginLoader.cpp
PrettyStackTrace.cpp
Process.cpp
Program.cpp
README.txt.system
RWMutex.cpp
RandomNumberGenerator.cpp
Regex.cpp
SHA1.cpp
ScaledNumber.cpp
ScopedPrinter.cpp
Signals.cpp
SmallPtrSet.cpp
SmallVector.cpp
SourceMgr.cpp
SpecialCaseList.cpp
Statistic.cpp
StringExtras.cpp
StringMap.cpp
StringPool.cpp
StringRef.cpp
StringSaver.cpp
SystemUtils.cpp
TarWriter.cpp
TargetParser.cpp
TargetRegistry.cpp
ThreadLocal.cpp
ThreadPool.cpp
Threading.cpp
Timer.cpp
ToolOutputFile.cpp
TrigramIndex.cpp
Triple.cpp
Twine.cpp
Unicode.cpp
Valgrind.cpp
Watchdog.cpp
YAMLParser.cpp
YAMLTraits.cpp
circular_raw_ostream.cpp
raw_os_ostream.cpp
raw_ostream.cpp
regcomp.c
regengine.inc
regerror.c
regex2.h
regex_impl.h
regexec.c
regfree.c
regstrlcpy.c
regutils.h
xxhash.cpp
TableGen
Target
Testing
ToolDrivers
Transforms
WindowsManifest
XRay
CMakeLists.txt
LLVMBuild.txt
projects
resources
runtimes
scripts
test
tools
unittests
utils
.arcconfig
.clang-format
.clang-tidy
.gitattributes
.gitignore
CMakeLists.txt
CODE_OWNERS.TXT
CREDITS.TXT
LICENSE.TXT
LLVMBuild.txt
README.txt
RELEASE_TESTERS.TXT
configure
llvm.spec.in
version.txt.in
nuget
openmp
polly
Directory.Build.props
Directory.Build.targets
NuGet.config
azure-pipelines.yml
build.cmd
build.sh
dir.common.props
global.json
llvm.proj
mxe-Win64.cmake.in
nuget-buildtasks
nunit-lite
roslyn-binaries
rx
xunit-binaries
how-to-bump-roslyn-binaries.md
ikvm-native
llvm
m4
man
mcs
mono
msvc
netcore
po
runtime
samples
scripts
support
tools
COPYING.LIB
LICENSE
Makefile.am
Makefile.in
NEWS
README.md
acinclude.m4
aclocal.m4
autogen.sh
code_of_conduct.md
compile
config.guess
config.h.in
config.rpath
config.sub
configure.REMOVED.git-id
configure.ac.REMOVED.git-id
depcomp
install-sh
ltmain.sh.REMOVED.git-id
missing
mkinstalldirs
mono-uninstalled.pc.in
test-driver
winconfig.h
linux-packaging-mono/external/llvm-project/llvm/lib/Support/regex2.h

166 lines
6.8 KiB
C
Raw Normal View History

/*-
* This code is derived from OpenBSD's libc/regex, original license follows:
*
* Copyright (c) 1992, 1993, 1994 Henry Spencer.
* Copyright (c) 1992, 1993, 1994
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Henry Spencer.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)regex2.h 8.4 (Berkeley) 3/20/94
*/
#ifndef LLVM_SUPPORT_REGEX2_H
#define LLVM_SUPPORT_REGEX2_H
#include "regutils.h"
#include <stddef.h>
/*
* internals of regex_t
*/
#define MAGIC1 ((('r'^0200)<<8) | 'e')
/*
* The internal representation is a *strip*, a sequence of
* operators ending with an endmarker. (Some terminology etc. is a
* historical relic of earlier versions which used multiple strips.)
* Certain oddities in the representation are there to permit running
* the machinery backwards; in particular, any deviation from sequential
* flow must be marked at both its source and its destination. Some
* fine points:
*
* - OPLUS_ and O_PLUS are *inside* the loop they create.
* - OQUEST_ and O_QUEST are *outside* the bypass they create.
* - OCH_ and O_CH are *outside* the multi-way branch they create, while
* OOR1 and OOR2 are respectively the end and the beginning of one of
* the branches. Note that there is an implicit OOR2 following OCH_
* and an implicit OOR1 preceding O_CH.
*
* In state representations, an operator's bit is on to signify a state
* immediately *preceding* "execution" of that operator.
*/
typedef unsigned long sop; /* strip operator */
typedef long sopno;
#define OPRMASK 0xf8000000LU
#define OPDMASK 0x07ffffffLU
#define OPSHIFT ((unsigned)27)
#define OP(n) ((n)&OPRMASK)
#define OPND(n) ((n)&OPDMASK)
#define SOP(op, opnd) ((op)|(opnd))
/* operators meaning operand */
/* (back, fwd are offsets) */
#define OEND (1LU<<OPSHIFT) /* endmarker - */
#define OCHAR (2LU<<OPSHIFT) /* character unsigned char */
#define OBOL (3LU<<OPSHIFT) /* left anchor - */
#define OEOL (4LU<<OPSHIFT) /* right anchor - */
#define OANY (5LU<<OPSHIFT) /* . - */
#define OANYOF (6LU<<OPSHIFT) /* [...] set number */
#define OBACK_ (7LU<<OPSHIFT) /* begin \d paren number */
#define O_BACK (8LU<<OPSHIFT) /* end \d paren number */
#define OPLUS_ (9LU<<OPSHIFT) /* + prefix fwd to suffix */
#define O_PLUS (10LU<<OPSHIFT) /* + suffix back to prefix */
#define OQUEST_ (11LU<<OPSHIFT) /* ? prefix fwd to suffix */
#define O_QUEST (12LU<<OPSHIFT) /* ? suffix back to prefix */
#define OLPAREN (13LU<<OPSHIFT) /* ( fwd to ) */
#define ORPAREN (14LU<<OPSHIFT) /* ) back to ( */
#define OCH_ (15LU<<OPSHIFT) /* begin choice fwd to OOR2 */
#define OOR1 (16LU<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
#define OOR2 (17LU<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
#define O_CH (18LU<<OPSHIFT) /* end choice back to OOR1 */
#define OBOW (19LU<<OPSHIFT) /* begin word - */
#define OEOW (20LU<<OPSHIFT) /* end word - */
/*
* Structure for [] character-set representation. Character sets are
* done as bit vectors, grouped 8 to a byte vector for compactness.
* The individual set therefore has both a pointer to the byte vector
* and a mask to pick out the relevant bit of each byte. A hash code
* simplifies testing whether two sets could be identical.
*
* This will get trickier for multicharacter collating elements. As
* preliminary hooks for dealing with such things, we also carry along
* a string of multi-character elements, and decide the size of the
* vectors at run time.
*/
typedef struct {
uch *ptr; /* -> uch [csetsize] */
uch mask; /* bit within array */
uch hash; /* hash code */
size_t smultis;
char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
} cset;
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* llvm_regcomp() internal fns */
#define MCsub(p, cs, cp) mcsub(p, cs, cp)
#define MCin(p, cs, cp) mcin(p, cs, cp)
/* stuff for character categories */
typedef unsigned char cat_t;
/*
* main compiled-expression structure
*/
struct re_guts {
int magic;
# define MAGIC2 ((('R'^0200)<<8)|'E')
sop *strip; /* malloced area for strip */
int csetsize; /* number of bits in a cset vector */
int ncsets; /* number of csets in use */
cset *sets; /* -> cset [ncsets] */
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
int cflags; /* copy of llvm_regcomp() cflags argument */
sopno nstates; /* = number of sops */
sopno firststate; /* the initial OEND (normally 0) */
sopno laststate; /* the final OEND */
int iflags; /* internal flags */
# define USEBOL 01 /* used ^ */
# define USEEOL 02 /* used $ */
# define REGEX_BAD 04 /* something wrong */
int nbol; /* number of ^ used */
int neol; /* number of $ used */
int ncategories; /* how many character categories */
cat_t *categories; /* ->catspace[-CHAR_MIN] */
char *must; /* match must contain this string */
int mlen; /* length of must */
size_t nsub; /* copy of re_nsub */
int backrefs; /* does it use back references? */
sopno nplus; /* how deep does it nest +s? */
/* catspace must be last */
cat_t catspace[1]; /* actually [NC] */
};
/* misc utilities */
#define OUT (CHAR_MAX+1) /* a non-character value */
#define ISWORD(c) (isalnum(c&0xff) || (c) == '_')
#endif