#include <owl/pch.h>
#include <owl/defs.h>
#include <algorithm>
//
// Borland includes this in run-time library
//
//#if !defined(BI_COMP_BORLANDC)
#include <owl/private/regexp.h>
namespace owl
{
/////////////////////////////////////
// TRegexp Class
typedef utchar PatternType;
int makepat(const tchar * exp, PatternType * pat, size_t maxpattern);
const tchar * matchs(const tchar * str,
const PatternType * pat,
tchar * * startpat);
const unsigned TRegexp::maxpat=128;
TRegexp::TRegexp(const tchar * str)
{
gen_pattern( str );
}
TRegexp::TRegexp(const TRegexp & r)
{
copy_pattern( r );
}
TRegexp::~TRegexp()
{
delete[] the_pattern;
}
void TRegexp::copy_pattern(const TRegexp & r)
{
the_pattern = new PatternType[maxpat];
memcpy( the_pattern, r.the_pattern, maxpat );
stat = r.stat;
}
void TRegexp::gen_pattern(const tchar * str)
{
the_pattern = new PatternType[maxpat];
stat = (StatVal)makepat( str, the_pattern, maxpat );
}
TRegexp & TRegexp::operator = ( const tchar *cp )
{
delete the_pattern;
gen_pattern( cp );
return *this;
}
TRegexp & TRegexp::operator = ( const TRegexp & r )
{
if(this != &r)
{
delete the_pattern;
copy_pattern( r );
}
return *this;
}
TRegexp::StatVal TRegexp::status() noexcept
{
StatVal temp = stat;
stat = OK;
return temp;
}
int makepat(const tchar *exp, PatternType *pat, size_t maxpattern);
const tchar * matchs( const tchar *str,
const PatternType *pat,
tchar * *startpat);
size_t TRegexp::find( const tstring & str,
size_t *len,
size_t i ) const
{
PRECONDITION( stat==OK );
tchar * startp;
const tchar * s = str.c_str();
const tchar * endp = matchs( s+i, the_pattern, &startp );
if( endp ){
*len = (size_t)((tchar *)endp - startp + 1);
return (size_t)(startp - (tchar *)s);
}
else{
*len = 0;
return static_cast<size_t>(-1); //JJH added static_cast
}
}
/*
*
* Author: Allen I. Holub
*
* (c) C Gazette. May be used freely as long as author and publication are
* acknowledged
*
*/
/*
*
* Modified by Borland International to compile without warnings as C++.
*
* Modified by Yura Bidus to support UNICODE.
*/
/* Metacharacters in the input: */
#define BOL _T('^') /* start-of-line anchor */
#define EOL _T('$') /* end-of-line anchor */
#define ANY _T('.') /* matches any character */
#define CCL _T('[') /* start a character class */
#define CCLEND _T(']') /* end a character class */
#define NCCL _T('^') /* negates character class if 1st char. */
#define CLOSURE _T('*') /* Kleene closure (matches 0 or more) */
#define PCLOSE _T('+') /* Positive closure (1 or more) */
#define OPT _T('?') /* Optional closure (0 or 1) */
//?????????????????
// 0x80 will not work in UNICODE !!!!!!!!!!!!
typedef enum action { /* These are put in the pattern string */
/* to represent metacharacters. */
M_BOL = (0x80 | _T('^')),
M_EOL = (0x80 | _T('$')),
M_ANY = (0x80 | _T('.')),
M_CCL = (0x80 | _T('[')),
M_OPT = (0x80 | _T('?')),
M_CLOSE = (0x80 | _T('*')),
M_PCLOSE = (0x80 | _T('+'))
} action;
//typedef unsigned char pattern; /* pattern strings are unsigned char */
typedef _TUCHAR pattern;
#define IS_ACTION(x) ((x)&0x80) /* true => element of pat. string is an */
/* action that represents a metacharacter */
/*----------------------------------------------------------------------*/
#define MAPSIZE 16 /* need this many bytes for character class bit map */
/*
* Advance a pointer into the pattern template
* to the next pattern element, this is a +1 for
* all pattern elements but M_CCL, where you
* to skip past both the M_CCL character and the
* bitmap that follows that character
*/
#define ADVANCE(pat) (pat += (*pat == (pattern)M_CCL) ? (MAPSIZE+1) : 1)
/*
* Bitmap functions. Set bit b in the map and
* test bit b to see if it was set previously.
*/
#define SETBIT(b,map) ((map)[((b) & 0x7f) >>3] |= pattern(1 << ((b) & 0x07)) )
#define TSTBIT(b,map) ((map)[((b) & 0x7f) >>3] & (1<< ((b) & 0x07)) )
/*----------------------------------------------------------------------*/
#define E_NONE 0 /* Possible return values from pat_err. */
#define E_ILLEGAL 1 /* Set in makepat() to indicate prob- */
#define E_NOMEM 2 /* lems that came up while making the */
#define E_PAT 3 /* pattern template. */
/*----------------------------------------------------------------------*/
static const tchar *doccl(pattern *, const tchar *);
static int hex2bin(int);
extern int makepat( const tchar *, pattern *, size_t);
extern const tchar *matchs( const tchar *, const pattern *, tchar * *);
static int oct2bin(int);
static int omatch(const tchar * *, const pattern *, const tchar *);
extern const tchar *patcmp(const tchar *, const pattern *, const tchar *);
extern int esc( const tchar * *);
/*----------------------------------------------------------------------*/
int
makepat( const tchar *exp, pattern *pat, size_t maxpat)
// char *exp; /* Regular expression */
// pattern *pat; /* Assembled compiled pattern */
// int maxpat; /* Length of pat */
{
/*
* Make a pattern template from the string pointed to by exp. Stop when
* '\0' or '\n' is found in exp. The pattern template is assembled
* in pat whose length is given by maxpat.
*
* Return:
* E_ILLEGAL Illegal input pattern.
* E_NOMEM out of memory.
* E_PAT pattern too long.
*/
pattern *cur; /* pointer to current pattern element */
pattern *prev; /* pointer to previous pattern element */
int Error = E_ILLEGAL;
if (!*exp || *exp == _T('\n'))
goto exit;
if (*exp == CLOSURE || *exp == PCLOSE || *exp == OPT)
goto exit;
Error = E_NOMEM;
if (!pat)
goto exit; /* Check for bad pat */
prev = cur = pat;
Error = E_PAT;
while (*exp && *exp != _T('\n')) {
if (cur >= &pat[maxpat - 1])
goto exit;
switch (*exp) {
case ANY:
*cur = (pattern)M_ANY;
prev = cur++;
++exp;
break;
case BOL:
*cur = (cur == pat) ? (pattern)M_BOL : *exp;
prev = cur++;
++exp;
break;
case EOL:
*cur = (!exp[1] || exp[1] == _T('\n')) ? (pattern)M_EOL : *exp;
prev = cur++;
++exp;
break;
case CCL:
if (uint((cur - pat) + MAPSIZE) >= maxpat)
goto exit; /* not enough room for bit map */
prev = cur;
*cur++ = (pattern)M_CCL;
exp = doccl(cur, exp);
cur += MAPSIZE;
break;
case OPT:
case CLOSURE:
case PCLOSE:
switch (*prev) {
case M_BOL:
case M_EOL:
case M_OPT:
case M_PCLOSE:
case M_CLOSE:
goto exit;
}
/* memmove( prev+1, prev, cur-prev ); */
{
pattern *p = cur;
while (p > prev) {
*p = *(p - 1);
p--;
}
}
*prev = (*exp == OPT) ? (pattern)M_OPT :
(*exp == PCLOSE) ? (pattern)M_PCLOSE : (pattern)M_CLOSE;
++cur;
++exp;
break;
default:
prev = cur;
*cur++ = (pattern)esc(&exp);
break;
}
}
*cur = _T('\0');
Error = E_NONE;
exit:
return Error;
}
/*----------------------------------------------------------------------*/
static const tchar *
doccl( pattern *map, const tchar * src)
{
/*
* Set bits in the map corresponding to characters specified in the src
* character class.
*/
int first, last, negative;
const tchar *start;
++src; /* skip past the [ */
negative = (*src == NCCL);
if (negative) /* check for negative ccl */
++src;
start = src; /* start of characters in class */
memset(map, 0, MAPSIZE); /* bitmap initially empty */
while (*src && *src != CCLEND) {
if (*src != _T('-')) {
first = esc(&src); /* Use temp. to avoid macro */
SETBIT(first, map); /* side effects. */
}
else if (src == start) {
SETBIT(_T('-'), map); /* literal dash at start or end */
++src;
}
else {
++src; /* skip to end-of-sequence char */
if (*src < src[-2]) {
first = *src;
last = src[-2];
}
else {
first = src[-2];
last = *src;
}
while (++first <= last)
SETBIT(first, map);
src++;
}
}
if (*src == CCLEND)
++src; /* Skip CCLEND */
if (negative)
for (first = MAPSIZE; --first >= 0;)
*map++ ^= ~0; /* invert all bits */
return src;
}
/*----------------------------------------------------------------------*/
const tchar *
matchs( const tchar * str, const pattern * pat, tchar * * startpat)
{
const tchar * endp = NULL;
const tchar * start;
if (!pat)
return NULL;
if (*str == _T('\0')) {
if ((*pat == (pattern)M_EOL) || (*pat == (pattern)M_BOL && (!pat[1] || pat[1] == (pattern)M_EOL)))
endp = str;
}
else {
start = str; /* Do a brute-force substring search,
* comparing a pattern against the input string */
while (*str) {
endp = patcmp(str, pat, start);
if (endp)
break;
str++;
}
}
*startpat = (tchar*)str;
return endp;
}
/*----------------------------------------------------------------------*/
const tchar *
patcmp( const tchar * str, const pattern * pat, const tchar * start)
{
/*
* Like strcmp, but compares str against pat. Each element of str is
* compared with the template until either a mis-match is found or the end
* of the template is reached. In the former case a 0 is returned; in the
* latter, a pointer into str (pointing to the last character in the
* matched pattern) is returned. Strstart points at the first character in
* the string, which might not be the same thing as line if the search
* started in the middle of the string.
*/
const tchar * bocl; /* beginning of closure string. */
const tchar * end=0; /* return value: end-of-string pointer. */
if (!pat) /* make sure pattern is valid */
return (NULL);
while (*pat) {
if (*pat == (pattern)M_OPT) {
/*
* Zero or one matches. It doesn't matter if omatch fails---it will
* advance str past the character on success, though. Always advance
* the pattern past both the M_OPT and the operand.
*/
omatch(&str, ++pat, start);
ADVANCE(pat);
}
else if (!(*pat == (pattern)M_CLOSE || *pat == (pattern)M_PCLOSE)) {
/*
* Do a simple match. Note that omatch() fails if there's still
* something in pat but we're at end of string.
*/
if (!omatch(&str, pat, start))
return NULL;
ADVANCE(pat);
} else { /* Process a Kleene or positive closure */
if (*pat++ == (pattern)M_PCLOSE) /* one match required */
if (!omatch(&str, pat, start))
return NULL;
/* Match as many as possible, zero is okay */
bocl = str;
while (*str && omatch(&str, pat, start)) { /* do nothing */ }
/*
* 'str' now points to the character that made made us fail. Try to
* process the rest of the string. If the character following the
* closure could have been in the closure (as in the pattern "[a-z]*t")
* the final 't' will be sucked up in the while loop. So, if the match
* fails, back up a notch and try to match the rest of the string
* again, repeating this process recursively until we get back to the
* beginning of the closure. The recursion goes, at most, one levels
* deep.
*/
if (*ADVANCE(pat)) {
for (; bocl <= str; --str) {
end = patcmp(str, pat, start);
if (end) break;
}
return end;
}
break;
}
}
/*
* omatch() advances str to point at the next character to be matched. So
* str points at the character following the last character matched when
* you reach the end of the template. The exceptions are templates
* containing only a BOLN or EOLN token. In these cases omatch doesn't
* advance. Since we must return a pointer to the last matched character,
* decrement str to make it point at the end of the matched string, making
* sure that the decrement hasn't gone past the beginning of the string.
*
* Note that $ is a position, not a character, but in the case of a pattern
* ^$, a pointer to the end of line character is returned. In ^xyz$, a
* pointer to the z is returned.
*
* The --str is done outside the return statement because __max() was a macro
* with side-effects.
*/
--str;
return (std::max(start, str));
}
/*----------------------------------------------------------------------*/
static int
omatch( const tchar * * strp,
const pattern * pat,
const tchar * start )
{
/*
* Match one pattern element, pointed at by pat, against the character at
* **strp. Return 0 on a failure, 1 on success. *strp is advanced to skip
* over the matched character on a successful match. Closure is handled one
* level up by patcmp().
*
* "start" points at the character at the left edge of the line. This might
* not be the same thing as *strp if the search is starting in the middle
* of the string. An end-of- line anchor matches '\n' or '\0'.
*/
int advance = -1; /* amount to advance *strp, -1 == error */
switch (*pat) {
case M_BOL: /* First char in string? */
if (*strp == start) /* Only one star here. */
advance = 0;
break;
case M_ANY: /* . = anything but newline and end-of-string */
if (**strp != _T('\n') && **strp != _T('\0'))
advance = 1;
break;
case M_EOL:
if (**strp == _T('\n') || **strp == _T('\0'))
advance = 0;
break;
case M_CCL:
if (**strp != _T('\0') && TSTBIT(**strp, pat + 1)) /* the end must never match */
advance = 1;
break;
default: /* literal match */
if (**strp == *pat)
advance = 1;
break;
}
if (advance > 0)
*strp += advance;
return (advance + 1);
}
#define ISHEXDIGIT(x) (_istdigit(x) \
|| (_T('a')<=(x) && (x)<=_T('f')) \
|| (_T('A')<=(x) && (x)<=_T('F')) )
#define ISOCTDIGIT(x) (_T('0')<=(x) && (x)<=_T('7'))
static int hex2bin( int c )
{
/* Convert the hex digit represented by 'c' to an int. 'c'
* must be one of: 0123456789abcdefABCDEF
*/
return (_istdigit(c) ? (c)-_T('0') : ((_toupper(c))-_T('A'))+10) & 0xf;
}
static int oct2bin( int c )
{
/* Convert the hex digit represented by 'c' to an int. 'c'
* must be a digit in the range '0'-'7'.
*/
return ( ((c)-_T('0')) & 0x7 );
}
/*------------------------------------------------------------*/
int esc( const tchar * * s)
{
/* Map escape sequences into their equivalent symbols. Return
* the equivalent ASCII character. *s is advanced past the
* escape sequence. If no escape sequence is present, the
* current character is returned and the string is advanced by
* one. The following are recognized:
*
* \b backspace
* \f formfeed
* \n newline
* \r carriage return
* \s space
* \t tab
* \e ASCII ESC character ('\033')
* \DDD number formed of 1-3 octal digits
* \xDDD number formed of 1-3 hex digits
* \^C C = any letter. Control code
*/
int rval;
if( **s != _T('\\') )
rval = *( (*s)++ );
else {
++(*s); /* Skip the \ */
switch( toupper(**s) ) {
case _T('\0'): rval = _T('\\'); break;
case _T('B'): rval = _T('\b') ; break;
case _T('F'): rval = _T('\f') ; break;
case _T('N'): rval = _T('\n') ; break;
case _T('R'): rval = _T('\r') ; break;
case _T('S'): rval = _T(' ') ; break;
case _T('T'): rval = _T('\t') ; break;
case _T('E'): rval = _T('\033'); break;
case _T('^'):
rval = *++(*s) ;
rval = _toupper(rval) - _T('@') ;
break;
case _T('X'):
rval = 0;
++(*s);
if( ISHEXDIGIT(**s) ) {
rval = hex2bin( *(*s)++ );
}
if( ISHEXDIGIT(**s) ) {
rval <<= 4;
rval |= hex2bin( *(*s)++ );
}
if( ISHEXDIGIT(**s) ) {
rval <<= 4;
rval |= hex2bin( *(*s)++ );
}
--(*s);
break;
default:
if( !ISOCTDIGIT(**s) )
rval = **s;
else {
++(*s);
rval = oct2bin( *(*s)++ );
if( ISOCTDIGIT(**s) ) {
rval <<= 3;
rval |= oct2bin( *(*s)++ );
}
if( ISOCTDIGIT(**s) ) {
rval <<= 3;
rval |= oct2bin( *(*s)++ );
}
--(*s);
}
break;
}
++(*s);
}
return rval;
}
}
//#endif
//==============================================================================
↑ V220 Suspicious sequence of types castings: memsize -> 32-bit integer -> memsize. The value being cast: '(cur - pat) + 16'.
↑ V536 Be advised that the utilized constant value is represented by an octal form. Oct: '\033', Dec: 27.