package ro.sync.lexer.php;
import ro.sync.lexer.AbstractLexer;

@SuppressWarnings("unused")
%%

%public 
%class PHPLexer
%extends AbstractLexer
%unicode
%char
%type ro.sync.lexer.Symbol
 
%ignorecase
%scanerror ro.sync.lexer.LexerException

%{
    private static final byte SYM_TEXT               = PHPTokens.TEXT;
    private static final byte SYM_INVALID            = PHPTokens.INVALID;
    
    private static final byte SYM_COMMA              = PHPTokens.COMMA;
    private static final byte SYM_SEMICOLON          = PHPTokens.SEMICOLON;
    private static final byte SYM_BRACKET            = PHPTokens.BRACKET;
    private static final byte SYM_SQUARE_BRACKET     = PHPTokens.SQUARE_BRACKET;
    private static final byte SYM_CURLY_BRACKET      = PHPTokens.CURLY_BRACKET;
    private static final byte SYM_OPERATOR           = PHPTokens.OPERATOR;
    private static final byte SYM_VAR_NAME           = PHPTokens.VAR_NAME;
    private static final byte SYM_NUMBER             = PHPTokens.NUMBER;
    
    private static final byte SYM_INSTRUCTIONS       = PHPTokens.INSTRUCTIONS;
    private static final byte SYM_MODIFIERS          = PHPTokens.MODIFIERS;
    private static final byte SYM_OPERATORS          = PHPTokens.OPERATORS;
    private static final byte SYM_DECLARATIVES       = PHPTokens.DECLARATIVES;
    private static final byte SYM_PRIMITIVES         = PHPTokens.PRIMITIVES;
    
    private static final byte SYM_STRING_DQ          = PHPTokens.STRING_DQ;
    private static final byte SYM_STRING_SQ          = PHPTokens.STRING_SQ;
    
    private static final byte SYM_COMMENT_START      = PHPTokens.COMMENT;
    private static final byte SYM_COMMENT            = PHPTokens.COMMENT;
    private static final byte SYM_COMMENT_END        = PHPTokens.COMMENT;

    private static final byte SYM_LINE_COMMENT       = PHPTokens.LINE_COMMENT;

    private static final byte SYM_UNIX_COMMENT       = PHPTokens.UNIX_COMMENT;

    private static final byte SYM_PHP_START          = PHPTokens.PHP_MARKS;
    private static final byte SYM_PHP_END            = PHPTokens.PHP_MARKS;

    /**
     * Create an empty lexer, yyreset will be called later to reset and assign
     * the reader
     */
    public PHPLexer() {
        super();
    }
    
    public String getName() {
      return PHP_LEXER;
    }
%}

%xstate MULTI_LINE_COMMENT, SINGLE_LINE_COMMENT, UNIX_COMMENT, SQ_STRING, DQ_STRING

/* Any character. Anything interesting must be handled above.*/
Char = .
GeneralChar = [^-<>/,{}\+~|\'\"\[\]\@ \t:=$*\.#\(\)&%!\?;]

UnquotedString = [a-zA-Z_] ([a-zA-Z0-9_])*
VarName = \${UnquotedString}

Hexa = "0" ("x" | "X") [0-9a-fA-F]+
Digit = [0-9]
Integer = ("-")? {Digit}+
Fraction = "."{Digit}+
Exponent = ("e" | "E") ("+" | "-")? {Digit}+
Double = {Integer}({Fraction} {Exponent}? | {Exponent})

Number = {Integer} | {Double} | {Hexa}

Instructions = "new" | "clone" | "echo" | "if" | "else" | "elseif" | "for" | "foreach" | "while"
        | "do" | "switch" | "case" | "break" | "continue" | "return"
Modifiers = "default" | "global" | "static" | "abstract" | "public" | "private" | "protected"
Operators = "and" | "or" | "xor" | "require" | "require_once" | "include" | "include_once"
Declaratives = "function" | "instanceof" | "class" | "interface" | "extends" | "implements"
        | "var" | "const"
Primitives = "int" | "float" | "string" | "array" | "object" | "bool"

Operator = "+=" | "-=" | "*=" | "/=" | ".=" | "%=" | "&=" | "|=" | "^=" | "<<=" | ">>=" | "=="
        | "!=" | "===" | "!==" | "<" | "<=" | ">" | ">=" | "<>" | "<<" | ">>" | "--" | "++" | "=>"
        | "||" | "&&" | "::" | "->" | "@" | "?" | "$" | ":" | "." | "&" | "|" | "!" | "+" | "-"
        | "*" | "%" | "/" | "~" | "="

%%

<YYINITIAL> {
    "<?" | "<?php" | "php"      {   return symbol(SYM_PHP_START);           }
    ";"                         {   return symbol(SYM_SEMICOLON);           }
    {Instructions}              {   return symbol(SYM_INSTRUCTIONS);        }
    {Modifiers}                 {   return symbol(SYM_MODIFIERS);           }
    {Operators}                 {   return symbol(SYM_OPERATORS);           }
    {Declaratives}              {   return symbol(SYM_DECLARATIVES);        }
    {Operator}                  {   return symbol(SYM_OPERATOR);            }
    {Primitives}                {   return symbol(SYM_PRIMITIVES);          }
    {VarName}                   {   return symbol(SYM_VAR_NAME);            }
    {Number}                    {   return symbol(SYM_NUMBER);              }
    "/*"                        {
                                    // Save state to return to.
                                    yybegin(MULTI_LINE_COMMENT);
                                    return symbol(SYM_COMMENT_START);
                                }
    "//"                        {
                                    // Save state to return to.
                                    yybegin(SINGLE_LINE_COMMENT);
                                    return symbol(SYM_LINE_COMMENT);
                                }
    "#"                         {
                                    // Save state to return to.
                                    yybegin(UNIX_COMMENT);
                                    return symbol(SYM_UNIX_COMMENT);
                                }
    "\""                        {
                                    cLen = 1;
                                    yybegin(DQ_STRING);
                                }
    "\'"                        {
                                    cLen = 1;
                                    yybegin(SQ_STRING);
                                }
    [ \t]+                      {   return symbol(SYM_TEXT);                }
    "(" | ")"                   {   return symbol(SYM_BRACKET);             }
    "{" | "}"                   {   return symbol(SYM_CURLY_BRACKET);       }
    "[" | "]"                   {   return symbol(SYM_SQUARE_BRACKET);      }
    ","                         {   return symbol(SYM_COMMA);               }
    "?>"                        {
                                    return symbol(SYM_PHP_END);                                     
                                }
    {GeneralChar}*              {   return symbol(SYM_TEXT);                }
                                
}

<DQ_STRING> {
    "\""                         {
                                     cLen++;
                                     yybegin(YYINITIAL);
                                     return flush(SYM_STRING_DQ);
                                 }
    "\\\\" | "\\\""              {   cLen+=2;                           }
    [^\"]                        {   cLen++;                            }
    <<EOF>>                      {   return flush(SYM_STRING_DQ);       }
}

<SQ_STRING> {
    "\'"                         {
                                     cLen++;
                                     yybegin(YYINITIAL);
                                     return flush(SYM_STRING_SQ);
                                 }
    "\\\\" | "\\\'"              {   cLen+=2;                           }
    [^\']                        {   cLen++;                            }
    <<EOF>>                      {   return flush(SYM_STRING_SQ);       }
}

<MULTI_LINE_COMMENT> {
    "?>"                        {
                                    yybegin(YYINITIAL);
                                    return symbol(SYM_PHP_END);                                     
                                }
    "*/"                        {
                                    yybegin(YYINITIAL);
                                    return symbol(SYM_COMMENT_END);                                     
                                }
    ~"*/"                       {
                                    yypushback(2);
                                    return symbol(SYM_COMMENT);
                                }
    {Char}                      {   cLen ++;   }                                 
    <<EOF>>                     {   return flush(SYM_COMMENT);             }
}

<SINGLE_LINE_COMMENT> {
    "?>"                        {
                                    yybegin(YYINITIAL);
                                    return symbol(SYM_PHP_END);                                     
                                }
    ~"?>"                       {   
                                    yypushback(2);
                                    return flush(SYM_LINE_COMMENT);      
                                }
    {Char}                      {   cLen++;                                }
    <<EOF>>                     {   
                                    yybegin(YYINITIAL);
                                    return flush(SYM_LINE_COMMENT);
                                }
}

<UNIX_COMMENT> {
    "?>"                        {
                                    yybegin(YYINITIAL);
                                    return symbol(SYM_PHP_END);                                     
                                }
    ~"?>"                       {   
                                    yypushback(2);
                                    return flush(SYM_UNIX_COMMENT);      
                                }
    {Char}                      {   cLen++;                                }
    <<EOF>>                     {   
                                    yybegin(YYINITIAL);
                                    return flush(SYM_UNIX_COMMENT);
                                }
}