#include <stdio.h>
#include "regexFrontEnd.h"
#include "regexBackEnd.h"
int opStack[8192]={0};
int opStackTop=0;
static int opStackPush(unsigned int op)
{
opStack[opStackTop++]=op;
return opStackTop>4096;
}
static unsigned int opStackPop()
{
if(opStackTop)
{
opStackTop--;
return opStack[opStackTop];
}
else return 0;
}
static unsigned int opStackGetTop()
{
if(opStackTop) return opStack[opStackTop-1];
else return 0;
}
static int opStackIsNull()
{
return opStackTop==0;
}
static int doReversePolishConversion(struct _regexToken tok)
{
struct _regexToken pseudo_tok;
pseudo_tok.type=T_Regex_Bi_OP;
switch(tok.type)
{
case T_Regex_EOL:
{
unsigned int op=0;
while(!opStackIsNull())
{
if((op=opStackPop())=='(')
{
fprintf(stderr,"\tParser: Brackets don't match. Missing ')'s.\n");
return 1;
}
else
{
pseudo_tok.value.ch=op;
if(genNFANodeToQueue(pseudo_tok)) return 1;
}
}
if(genNFANodeToQueue(tok)) return 1;
}
break;
case T_Regex_Ch:
case T_Regex_Gen:
case T_Regex_Range:
case T_Regex_Uni_OP:
if(genNFANodeToQueue(tok)) return 1;
break;
case T_Regex_Bi_OP:
switch(tok.value.ch)
{
case '|':
while(!opStackIsNull())
{
switch(opStackGetTop())
{
case '|':
case '&':
pseudo_tok.value.ch=opStackPop();
if(genNFANodeToQueue(pseudo_tok)) return 1;
break;
default:
goto _exitSymbol;
}
}
_exitSymbol:
if(opStackPush('|'))
{
fprintf(stderr,"\tParser: Stack overflow.\n");
return 1;
}
break;
case '&':
while(!opStackIsNull())
{
if((opStackGetTop()=='&'))
{
pseudo_tok.value.ch=opStackPop();
if(genNFANodeToQueue(pseudo_tok)) return 1;
}
else break;
}
if(opStackPush('&'))
{
fprintf(stderr,"\tParser: Stack overflow.\n");
return 1;
}
break;
default:
fprintf(stderr,"\tParser: Unknown operator.\n");
return 1;
}
break;
case T_Regex_Bra:
if(opStackPush('('))
{
fprintf(stderr,"\tParser: Stack overflow.\n");
return 1;
}
break;
case T_Regex_Ket:
{
int matched=0;
unsigned int op=0;
while(!opStackIsNull())
{
if((op=opStackPop())=='(')
{
matched=1;
break;
}
else
{
pseudo_tok.value.ch=op;
if(genNFANodeToQueue(pseudo_tok)) return 1;
}
}
if(!matched)
{
fprintf(stderr,"\tParser: Brackets don't match. Unmatched ')' found.\n");
return 1;
}
break;
}
break;
}
return 0;
}
int prevType=-1;
static int regexPatternParser(struct _regexToken tok)
{
struct _regexToken pseudo_tok;
pseudo_tok.type=T_Regex_Bi_OP;
pseudo_tok.value.ch='&';
switch(tok.type)
{
case T_Regex_Ch:
case T_Regex_Gen:
case T_Regex_Range:
case T_Regex_Bra:
switch(prevType)
{
case T_Regex_Ch:
case T_Regex_Gen:
case T_Regex_Range:
case T_Regex_Uni_OP:
case T_Regex_Ket:
if(doReversePolishConversion(pseudo_tok)) return 1;
break;
}
break;
}
prevType=tok.type;
return doReversePolishConversion(tok);
}
static void throwError(int count,unsigned int symbol)
{
char symText[16];
if(!symbol)
{
sprintf(symText,"EOL");
}
else if((symbol>=32)&&(symbol<=126))
{
sprintf(symText,"'%c'",symbol);
}
else
{
sprintf(symText,"'\\u%04x'",symbol);
}
fprintf(stderr,"Lexer: Error at char number %d: %s\n",count,symText);
return;
}
int regexPatternLexer(const unsigned int *regexPattern)
{
struct _regexToken tok;
unsigned int uch=0;
int count=0;
int subCount=0;
int state=0;
tok.type=tok.value.ch=0;
uch=regexPattern[count];
while(uch)
{
int isSet=0;
switch(state)
{
case 0:
switch(uch)
{
case '\\':
state=1;
break;
case '%':
state=2;
break;
case '[':
subCount=0;
state=3;
break;
case '*':
tok.type=T_Regex_Uni_OP;
tok.value.ch='*';
isSet=1;
break;
case '+':
tok.type=T_Regex_Uni_OP;
tok.value.ch='+';
isSet=1;
break;
case '?':
tok.type=T_Regex_Uni_OP;
tok.value.ch='?';
isSet=1;
break;
case '|':
tok.type=T_Regex_Bi_OP;
tok.value.ch='|';
isSet=1;
break;
case '(':
tok.type=T_Regex_Bra;
tok.value.ch='(';
isSet=1;
break;
case ')':
tok.type=T_Regex_Ket;
tok.value.ch=')';
isSet=1;
break;
default:
tok.type=T_Regex_Ch;
tok.value.ch=uch;
isSet=1;
break;
}
break;
case 1:
tok.type=T_Regex_Ch;
isSet=1;
switch(uch)
{
case 'r':
tok.value.ch='\r';
break;
case 'n':
tok.value.ch='\n';
break;
case 't':
tok.value.ch='\t';
break;
case 'f':
tok.value.ch='\f';
break;
case 'a':
tok.value.ch='\a';
break;
default:
tok.value.ch=uch;
break;
}
state=0;
break;
case 2:
switch(uch)
{
case 'a':
tok.type=T_Regex_Gen;
tok.value.ch='a';
isSet=1;
break;
case 'd':
tok.type=T_Regex_Gen;
tok.value.ch='d';
isSet=1;
break;
case 's':
tok.type=T_Regex_Gen;
tok.value.ch='s';
isSet=1;
break;
default:
throwError(count,uch);
return 1;
break;
}
state=0;
break;
case 3:
switch(uch)
{
case '\\':
state=4;
break;
case '%':
throwError(count,uch);
return 1;
break;
case '[':
throwError(count,uch);
return 1;
break;
case ']':
throwError(count,uch);
return 1;
break;
case '*':
throwError(count,uch);
return 1;
break;
case '+':
throwError(count,uch);
return 1;
break;
case '?':
throwError(count,uch);
return 1;
break;
case '|':
throwError(count,uch);
return 1;
break;
case '(':
throwError(count,uch);
return 1;
break;
case ')':
throwError(count,uch);
return 1;
break;
case '-':
throwError(count,uch);
return 1;
break;
default:
(tok.value.range.pairs)[subCount].start=uch;
state=5;
break;
}
break;
case 4:
switch(uch)
{
case 'r':
(tok.value.range.pairs)[subCount].start='\r';
break;
case 'n':
(tok.value.range.pairs)[subCount].start='\n';
break;
case 't':
(tok.value.range.pairs)[subCount].start='\t';
break;
case 'f':
(tok.value.range.pairs)[subCount].start='\f';
break;
case 'a':
(tok.value.range.pairs)[subCount].start='\a';
break;
default:
(tok.value.range.pairs)[subCount].start=uch;
break;
}
state=5;
break;
case 5:
switch(uch)
{
case '%':
throwError(count,uch);
return 1;
break;
case '[':
throwError(count,uch);
return 1;
break;
case '*':
throwError(count,uch);
return 1;
break;
case '+':
throwError(count,uch);
return 1;
break;
case '?':
throwError(count,uch);
return 1;
break;
case '|':
throwError(count,uch);
return 1;
break;
case '(':
throwError(count,uch);
return 1;
break;
case ')':
throwError(count,uch);
return 1;
break;
case '-':
state=6;
break;
case ']':
(tok.value.range.pairs)[subCount].end=(tok.value.range.pairs)[subCount].start;
subCount++;
tok.value.range.count=subCount;
tok.type=T_Regex_Range;
isSet=1;
state=0;
break;
case '\\':
(tok.value.range.pairs)[subCount].end=(tok.value.range.pairs)[subCount].start;
subCount++;
state=4;
break;
default:
(tok.value.range.pairs)[subCount].end=(tok.value.range.pairs)[subCount].start;
subCount++;
(tok.value.range.pairs)[subCount].start=uch;
break;
}
if(subCount>62)
{
throwError(count,uch);
return 1;
}
break;
case 6:
switch(uch)
{
case '\\':
state=7;
break;
case '%':
throwError(count,uch);
return 1;
break;
case '[':
throwError(count,uch);
return 1;
break;
case ']':
throwError(count,uch);
return 1;
break;
case '*':
throwError(count,uch);
return 1;
break;
case '+':
throwError(count,uch);
return 1;
break;
case '?':
throwError(count,uch);
return 1;
break;
case '|':
throwError(count,uch);
return 1;
break;
case '(':
throwError(count,uch);
return 1;
break;
case ')':
throwError(count,uch);
return 1;
break;
case '-':
throwError(count,uch);
return 1;
break;
default:
(tok.value.range.pairs)[subCount].end=uch;
state=8;
break;
}
break;
case 7:
switch(uch)
{
case 'r':
(tok.value.range.pairs)[subCount].end='\r';
break;
case 'n':
(tok.value.range.pairs)[subCount].end='\n';
break;
case 't':
(tok.value.range.pairs)[subCount].end='\t';
break;
case 'f':
(tok.value.range.pairs)[subCount].end='\f';
break;
case 'a':
(tok.value.range.pairs)[subCount].end='\a';
break;
default:
(tok.value.range.pairs)[subCount].end=uch;
break;
}
state=8;
break;
case 8:
switch(uch)
{
case '\\':
subCount++;
state=4;
break;
case '%':
throwError(count,uch);
return 1;
break;
case '[':
throwError(count,uch);
return 1;
break;
case '*':
throwError(count,uch);
return 1;
break;
case '+':
throwError(count,uch);
return 1;
break;
case '?':
throwError(count,uch);
return 1;
break;
case '|':
throwError(count,uch);
return 1;
break;
case '(':
throwError(count,uch);
return 1;
break;
case ')':
throwError(count,uch);
return 1;
break;
case '-':
throwError(count,uch);
return 1;
break;
case ']':
subCount++;
tok.value.range.count=subCount;
tok.type=T_Regex_Range;
isSet=1;
state=0;
break;
default:
subCount++;
(tok.value.range.pairs)[subCount].start=uch;
state=5;
break;
}
if(subCount>62)
{
throwError(count,uch);
return 1;
}
break;
}
if(isSet)
{
if(regexPatternParser(tok))
{
throwError(count,uch);
return 2;
}
}
count++;
uch=regexPattern[count];
}
tok.type=T_Regex_EOL;
tok.value.ch=0;
if(state||(regexPatternParser(tok)))
{
throwError(count,0);
return 1+!state;
}
return 0;
}