grammar ECMAScript; options { // Allow any char but \uFFFF (16 bit -1) charVocabulary='\u0000'..'\uFFFE'; } /* this comes from section A.5 but is really the starting point, so * it is present here. */ program: sourceElements EOF ; /* A.1 Lexical Grammar */ sourceCharacter: /* any unicode character */ /* see section 6 */ SOURCE_CHAR ; inputElementDiv: whiteSpace | lineTerminator | comment | token | divPunctuator ; inputElementRegExp: whiteSpace | lineTerminator | comment | token | regularExpressionLiteral ; whiteSpace: TAB | VT | FF | SP | NBSP | USP ; lineTerminator: | LF | CR | LS | PS ; comment: multiLineComment | singleLineComment ; multiLineComment: '/*' multiLineCommentChars '*/' ; multiLineCommentChars: multiLineNotAsterikChar multiLineCommentChars | ASTERISK postAsterikCommentChars ; postAsterikCommentChars: multiLineNotForwardSlashOrAsterikChar multiLineCommentChars | ASTERISK postAsterikCommentChars ; multiLineNotAsterikChar: sourceCharacter /* but not ASTERISK */ ; multiLineNotForwardSlashOrAsterikChar: sourceCharacter /* but not FORWARD-SLASH or ASTERISK */ ; singleLineComment: '//' singleLineCommentChars ; singleLineCommentChars: singleLineCommentChar singleLineCommentChars ; singleLineCommentChar: sourceCharacter /* but not lineTerminator */ ; token: reservedWord | identifier /* syntactic predicate used to disambiguate between DOT on ALT 3 and ALT 5 */ | (DOT decimalDigits)=> numericLiteral /* syntactic predicate used to disambiguate between APOSTROPHE on ALT 4 and ALT 5 */ | (APOSTROPHE (singleStringCharacters)? APOSTROPHE)=> stringLiteral | punctuator ; reservedWord: keyword | futureReservedWord | nullLiteral | booleanLiteral ; keyword: 'break' | 'case' | 'catch' | 'continue' | 'default' | 'delete' | 'do' | 'else' | 'finally' | 'for' | 'function' | 'if' | 'in' | 'instanceof' | 'new' | 'return' | 'switch' | 'this' | 'throw' | 'try' | 'typeof' | 'var' | 'void' | 'while' | 'with' ; futureReservedWord: 'abstract' | 'boolean' | 'byte' | 'char' | 'class' | 'const' | 'debugger' | 'double' | 'enum' | 'export' | 'extends' | 'final' | 'float' | 'goto' | 'implements' | 'import' | 'int' | 'interface' | 'long' | 'native' | 'package' | 'private' | 'protected' | 'public' | 'short' | 'static' | 'super' | 'synchronized' | 'throws' | 'transient' | 'volatile' ; identifier: identifierName /* but not reservedWord */ ; identifierName: /* * left factorization: * * SPEC: * identifierStart * | identifierName identifierPart * -> * ((identifierStart)+ (identifierPart)?)+ */ identifierStart (identifierPart)* ; identifierStart: unicodeLetter | DOLLAR | UNDERSCORE | unicodeEscapeSequence ; identifierPart: /* syntactic predicate to remove non-determinism upon alts 1 and 5 * - always choose the identifierStart when possible */ (unicodeEscapeSequence identifierPart)=> identifierStart | unicodeCombiningMark | unicodeDigit | unicodeConnectorPunctuation | unicodeEscapeSequence ; unicodeLetter: /* any character in the unicode categories: "uppercase letter (Lu)", "lowercase letter (Li)", "titlecase letter (Lt)", "modifier letter (Lm)", "other letter (lo)", "letter number (NI)" */ UNICODE_LETTER ; unicodeCombiningMark: /* any character in the unicode categories: "non-spacing mark (Mn)" "combining spacing mark (Mc)" */ UNICODE_NONSPACING_MARK | UNICODE_COMBINING_MARK ; unicodeDigit: /* any character in the unicode category "decimal number (Nd)" */ UNICODE_DIGIT ; unicodeConnectorPunctuation: /* any character in the unicode category "connector punctuation (Pc)" */ UNICODE_CONNECTOR_PUNCTUATION ; unicodeEscapeSequence: '\\u' hexDigit hexDigit hexDigit hexDigit ; hexDigit: /* explicitly enumerated in grammar */ HEXDIGIT ; punctuator: LBRACE | RBRACE | LPAREN | RPAREN | LBRACK | RBRACK | DOT | SEMI | APOSTROPHE | LT | GT | LTEQ | GTEQ | EQ2 | NOTEQ | EQ3 | NOTEQ2 | PLUS | MINUS | STAR | PERCENT | PLUS2 | MINUS2 | LSHIFT | RSHIFT | GT3 | AMPER | PIPE | CAROT | EXCLAMATION | TILDE | AMPER2 | PIPE2 | QUESTION | COLON | EQ | PLUSEQ | MINUSEQ | TIMESEQ | PERCENTEQ | LSHIFTEQ | RSHIFTEQ | GT3EQ | AMPEREQ | PIPEEQ | CAROTEQ ; divPunctuator: DIVIDE | DIVIDEEQ ; literal: nullLiteral | booleanLiteral | numericLiteral | stringLiteral ; nullLiteral: 'null' ; booleanLiteral: 'true' | 'false' ; numericLiteral: decimalLiteral | hexIntegerLiteral ; decimalLiteral: /* * SPEC: * decimalIntegerLiteral DOT (decimalDigits)? (exponentPart)? * | DOT decimalDigits (exponentPart)? * | decimalIntegerLiteral (exponentPart)? * -> */ decimalIntegerLiteral decimalIntegerLiteralTail | DOT decimalDigits (exponentPart)? ; decimalIntegerLiteralTail: /* this is necessary because of the way the grammar gets parsed */ DOT (decimalDigits)? (exponentPart)? | exponentPart ; decimalIntegerLiteral: ZERO | NON_ZERO_DIGIT (decimalDigits)? ; decimalDigits: /* * SPEC: * decimalDigit * | decimalDigits decimalDigit * => * (decimalDigits)+ */ (decimalDigit)+ ; decimalDigit: DIGIT /* grammar has each one explicitely listed */ ; exponentIndicator: EXPONENT_INDICATOR /* grammar has both e and E listed */ ; signedInteger: decimalDigits | PLUS decimalDigits | MINUS decimalDigits ; hexIntegerLiteral: /* * SPEC: * '0x' hexDigit * | '0X' hexDigit * | hexIntegerLiteral hexDigit * -> * ('0x'|'0X') (hexDigit)+ */ ('0x'|'0X') (hexDigit)+ ; stringLiteral: QUOTE (doubleStringCharacters)? QUOTE | APOSTROPHE (singleStringCharacters)? APOSTROPHE ; doubleStringCharacters: doubleStringCharacter (doubleStringCharacters)? ; singleStringCharacters: singleStringCharacter (singleStringCharacters)? ; doubleStringCharacter: sourceCharacter /* but not double quote or backslash or line terminator */ BSLASH escapeSequence ; singleStringCharacter: sourceCharacter /* but not single quote or backslash or line terminator */ | BSLASH escapeSequence ; escapeSequence: characterEscapeSequence | ZERO /* [lookahead not a member of decimalDigit] */ | hexEscapeSequence | unicodeEscapeSequence ; characterEscapeSequence: singleEscapeCharacter | nonEscapeCharacter ; singleEscapeCharacter: APOSTROPHE | QUOTE | BSLASH | LOWER_B | LOWER_F | LOWER_N | LOWER_R | LOWER_T | LOWER_V ; nonEscapeCharacter: sourceCharacter /* but not escapeCharacter or lineTerminator */ ; escapeCharacter: singleEscapeCharacter | decimalDigit | LOWER_X | LOWER_U ; hexEscapeSequence: LOWER_X hexDigit hexDigit ; /* defined above unicodeEscapeSequence: LOWER_U hexDigit hexDigit hexDigit hexDigit ;*/ regularExpressionLiteral: SLASH regularExpressionBody SLASH regularExpressionFlags ; regularExpressionBody: regularExpressionFirstChar regularExpressionChars ; regularExpressionChars: /* [empty] */ /* * SPEC: regularExpressionChars regularExpressionChar * ->: regularExpressionChar regularExpressionChars */ regularExpressionChar regularExpressionChars ; regularExpressionFirstChar: nonTerminator /* but not * or \ or / */ | backslashSequence ; regularExpressionChar: nonTerminator /* but not * or \ or / */ | backslashSequence ; backslashSequence: BSLASH nonTerminator ; nonTerminator: sourceCharacter /* but not lineTerminator */ ; regularExpressionFlags: /* * SPEC: * [empty] | regularExpressionFlags identifierPart * -> * (identifierPart|)* */ (identifierPart)* ; /* A.2 Number Conversions */ stringNumericLiteral: /* * SPEC: * (strWhiteSpace)? * | (strWhiteSpace)? strNumericLiteral (strWhiteSpace)? */ (strWhiteSpace)? (strNumericLiteral (strWhiteSpace)?)? ; strWhiteSpace: strWhiteSpaceChar (strWhiteSpace)? ; strWhiteSpaceChar: TAB | SP | NBSP | FF | VT | CR | LF | LS | PS | USP ; strNumericLiteral: strDecimalLiteral | hexIntegerLiteral ; strDecimalLiteral: strUnsignedDecimalLiteral | PLUS strUnsignedDecimalLiteral | MINUS strUnsignedDecimalLiteral ; strUnsignedDecimalLiteral: /* * SPEC: * 'Infinity' * | decimalDigits DOT (decimalDigits)? (exponentPart)? * | DOT decimalDigits (exponentPart)? * | decimalDigits (exponentPart)? */ 'Infinity' /* syntactic predicate used to remove nondet between ALTs 2 and 4 */ | (decimalDigits DOT)=> decimalDigits DOT (decimalDigits)? (exponentPart)? | DOT decimalDigits (exponentPart)? | decimalDigits (exponentPart)? ; /* defined above decimalDigits: decimalDigit | decimalDigits decimalDigit ;*/ /* decimalDigit: DIGIT // grammar has them explicitly enumerated ;*/ exponentPart: exponentIndicator signedInteger ; /* redefined above exponentIndicator: EXPONENT_INDICATOR ;*/ /* redefined above signedInteger: decimalDigits | PLUS decimalDigits | MINUS decimalDigits ;*/ /* redefined above hexIntegerLiteral: '0x' hexDigit | '0X' hexDigit | hexIntegerLiteral hexDigit ;*/ /* redefined above hexDigit: HEXDIGIT // grammar has them explicitely enumerated ; */ /* A.3 Expressions */ primaryExpression: 'this' | identifier | literal | arrayLiteral | objectLiteral | LPAREN expression RPAREN ; arrayLiteral: /* * SPEC: * LBRACK (elision)? RBRACK * | LBRACK elementList RBRACK * | LBRACK elementList COMMA (elision)? RBRACK */ LBRACK ( (elision)? | elementList (COMMA (elision)?)? ) RBRACK ; elementList: /* * SPEC: * (elision)? assignmentExpression * | elementList COMMA (elision)? assignmentExpression */ (elision)? assignmentExpression (elementListTail)* ; elementListTail: COMMA (elision)? assignmentExpression ; elision: /* * SPEC: * COMMA * | elision COMMA * -> * (COMMA)+ */ (COMMA)+ ; objectLiteral: /* * SPEC: * LBRACE RBRACE * | LBRACE propertyNameAndValueList RBRACE */ LBRACE (propertyNameAndValueList)? RBRACE ; propertyNameAndValueList: /* * SPEC: * propertyName COLON assignmentExpression * | propertyNameAndValueList COMMA propertyName COLON assignmentExpression */ propertyName COLON assignmentExpression (propertyNameAndValueListTail)* ; propertyNameAndValueListTail: COMMA propertyName COLON assignmentExpression ; propertyName: identifier | stringLiteral | numericLiteral ; memberExpression: /* * SPEC: * primaryExpression * | functionExpression * | memberExpression LBRACK expression RBRACK * | memberExpression DOT identifier * | 'new' memberExpression arguments * -> */ ( primaryExpression | functionExpression | 'new' memberExpression arguments ) (memberExpressionTail)* ; memberExpressionTail: /* SPEC: not a part of formal grammar */ LBRACK expression RBRACK | DOT identifier ; newExpression: /* * SPEC: * memberExpression * | 'new' newExpression */ /* syntactic predicate added to resolve between nondet in ALTs 1 and 2 */ ('new' newExpression)=> 'new' newExpression | memberExpression ; callExpression: /* * SPEC: * memberExpression arguments * | callExpression arguments * | callExpression LBRACK expression RBRACK * | callExpression DOT identifier */ memberExpression arguments (callExpressionTail)* ; callExpressionTail: arguments | LBRACK expression RBRACK | DOT identifier ; arguments: /* * SPEC: * LPAREN RPAREN * | LPAREN argumentList RPAREN */ LPAREN (argumentList)? RPAREN ; argumentList: /* * SPEC: * assignmentExpression * | argumentList COMMA assignmentExpression */ assignmentExpression (argumentListTail)* ; argumentListTail: COMMA assignmentExpression ; leftHandSideExpression: /* * SPEC: * newExpression * | callExpression */ ('new' newExpression)=> newExpression | callExpression ; postfixExpression: /* * SPEC: * leftHandSideExpression * | leftHandSideExpression / no line terminator here / PLUS2 * | leftHandSideExpression / no line terminator here / MINUS2 */ leftHandSideExpression (PLUS2|MINUS2)? ; unaryExpression: postfixExpression | 'delete' unaryExpression | 'void' unaryExpression | 'typeof' unaryExpression | PLUS2 unaryExpression | MINUS2 unaryExpression | PLUS unaryExpression | MINUS unaryExpression | TILDE unaryExpression | EXCLAMATION unaryExpression ; multiplicativeExpression: /* * SPEC: * unaryExpression * | multiplicativeExpression ASTERISK unaryExpression * | multiplicativeExpression DIVIDE unaryExpression * | multiplicativeExpression PERCENT unaryExpression */ unaryExpression (multiplicativeExpressionTail)* ; multiplicativeExpressionTail: ASTERISK unaryExpression | DIVIDE unaryExpression | PERCENT unaryExpression ; additiveExpression: /* * SPEC: * multiplicativeExpression * | additiveExpression PLUS multiplicativeExpression * | additiveExpression MINUS multiplicativeExpression */ multiplicativeExpression (additiveExpressionTail)* ; additiveExpressionTail: PLUS multiplicativeExpression | MINUS multiplicativeExpression ; shiftExpression: /* * SPEC: * additiveExpression * | shiftExpression LSHIFT additiveExpression * | shiftExpression RSHIFT additiveExpression * | shiftExpression GT3 additiveExpression */ additiveExpression (shiftExpressionTail)* ; shiftExpressionTail: LSHIFT additiveExpression | RSHIFT additiveExpression | GT3 additiveExpression ; relationalExpression: /* * SPEC: * shiftExpression * | relationalExpression LT shiftExpression * | relationalExpression GT shiftExpression * | relationalExpression LTEQ shiftExpression * | relationalExpression GTEQ shiftExpression * | relationalExpression 'instanceof' shiftExpression * | relationalExpression 'in' shiftExpression */ shiftExpression (relationalExpressionTail)* ; relationalExpressionTail: LT shiftExpression | GT shiftExpression | LTEQ shiftExpression | GTEQ shiftExpression | 'instanceof' shiftExpression | 'in' shiftExpression ; relationalExpressionNoln: shiftExpression | relationalExpression (LT|GT|LTEQ|GTEQ|'instanceof') shiftExpression ; equalityExpression: /* * SPEC: * relationalExpression * | equalityExpression (EQ2|NOTEQQ|EQ3|NOTEQQ2) relationalExpression */ relationalExpression (equalityExpressionTail)* ; equalityExpressionTail: (EQ2|NOTEQ|EQ3|NOTEQ2) relationalExpression ; equalityExpressionNoln: /* * SPEC: * relationalExpressionNoln * | equalityExpressionNoln (EQ2|NOTEQQ|EQ3|NOTEQQ2) relationalExpressionNoln */ relationalExpressionNoln (equalityExpressionNolnTail)* ; equalityExpressionNolnTail: (EQ2|NOTEQ|EQ3|NOTEQ2) relationalExpressionNoln ; bitwiseAndExpression: /* * SPEC: * equalityExpression * | bitwiseAndExpression AMPER equalityExpression */ equalityExpression (bitwiseAndExpressionTail)* ; bitwiseAndExpressionTail: AMPER equalityExpression ; bitwiseAndExpressionNoln: /* * SPEC: * equalityExpressionNoln * | bitwiseAndExpressionNoln AMPER equalityExpressionNoln */ equalityExpressionNoln (bitwiseAndExpressionNolnTail)* ; bitwiseAndExpressionNolnTail: AMPER equalityExpressionNoln ; bitwiseXorExpression: /* * SPEC: * bitwiseAndExpression * | bitwiseXorExpression CAROT bitwiseAndExpression */ bitwiseAndExpression (bitwiseXorExpressionTail)* ; bitwiseXorExpressionTail: CAROT bitwiseAndExpression ; bitwiseXorExpressionNoln: /* * SPEC: * bitwiseAndExpressionNoln * | bitwiseXorExpressionNoln CAROT bitwiseAndExpressionNoln */ bitwiseAndExpressionNoln (bitwiseXorExpressionNolnTail)* ; bitwiseXorExpressionNolnTail: CAROT bitwiseAndExpressionNoln ; bitwiseOrExpression: /* * SPEC: * bitwiseXorExpression * | bitwiseOrExpression PIPE bitwiseXorExpression */ bitwiseXorExpression (bitwiseOrExpressionTail)* ; bitwiseOrExpressionTail: PIPE bitwiseXorExpression ; bitwiseOrExpressionNoln: /* * SPEC: * bitwiseXorExpressionNoln * | bitwiseOrExpressionNoln PIPE bitwiseXorExpressionNoln */ bitwiseXorExpressionNoln (bitwiseOrExpressionNolnTail)* ; bitwiseOrExpressionNolnTail: PIPE bitwiseOrExpressionNolnTail ; logicalAndExpression: bitwiseOrExpression | logicalAndExpressionNoln AMPER2 bitwiseOrExpression ; logicalAndExpressionNoln: /* * SPEC: * bitwiseOrExpressionNoln * | logicalAndExpressionNoln AMPER2 bitwiseOrExpressionNoln */ bitwiseOrExpressionNoln (logicalAndExpressionNolnTail)* ; logicalAndExpressionNolnTail: AMPER2 bitwiseOrExpressionNoln ; logicalOrExpression: /* * SPEC: * logicalAndExpression * | logicalOrExpression '||' logicalAndExpression */ logicalAndExpression (logicalOrExpressionTail)* ; logicalOrExpressionTail: '||' logicalAndExpression ; logicalOrExpressionNoln: /* * SPEC: * logicalAndExpressionNoln * | logicalOrExpressionNoln '||' logicalAndExpressionNoln */ logicalAndExpressionNoln (logicalOrExpressionNolnTail)* ; logicalOrExpressionNolnTail: '||' logicalAndExpressionNoln ; conditionalExpression: logicalOrExpression | logicalOrExpression QUESTION assignmentExpression COLON assignmentExpression ; conditionalExpressionNoln: logicalOrExpressionNoln | logicalOrExpressionNoln QUESTION assignmentExpressionNoln COLON assignmentExpressionNoln ; assignmentExpression: /* * SPEC: * conditionalExpression * | leftHandSideExpression assignmentOperator assignmentExpression */ (leftHandSideExpression assignmentOperator) => leftHandSideExpression assignmentOperator assignmentExpression | conditionalExpression ; assignmentExpressionNoln: conditionalExpressionNoln | leftHandSideExpression assignmentOperator assignmentExpressionNoln ; assignmentOperator: /* note that in the grammar these are listed out explicitely */ EQ | TIMESEQ | DIVIDEEQ | PERCENTEQ | PLUSEQ | MINUSEQ | LSHIFTEQ | RSHIFTEQ | GT3EQ | AMPEREQ | CAROTEQ | PIPEEQ ; expression: /* * SPEC: * assignmentExpression * | expression COMMA assignmentExpression */ assignmentExpression (expressionTail)* ; expressionTail: COMMA assignmentExpression ; expressionNoln: /* * SPEC: * assignmentExpressionNoln * | expressionNoln COMMA assignmentExpressionNoln */ assignmentExpressionNoln (expressionNolnTail)* ; expressionNolnTail: COMMA assignmentExpressionNoln ; /* A.4 Statements */ statement: block | variableStatement | emptyStatement | expressionStatement | ifStatement | iterationStatement | continueStatement | breakStatement | returnStatement | withStatement | labelledStatement | switchStatement | throwStatement | tryStatement ; block: LBRACE (statementList)? RBRACE ; statementList: /* * SPEC: * statement * | statementList statement */ (statement)+ ; variableStatement: 'var' variableDeclarationList SEMI ; variableDeclarationList: /* * SPEC: * variableDeclaration * | variableDeclarationList COMMA variableDeclaration */ variableDeclaration (variableDeclarationTail)* ; variableDeclarationTail: COMMA variableDeclaration ; variableDeclarationListNoln: /* * SPEC: * variableDeclarationNoln * | variableDeclarationListNoln COMMA variableDeclarationNoln */ variableDeclarationNoln (variableDeclarationListNolnTail)* ; variableDeclarationListNolnTail: COMMA variableDeclarationNoln ; variableDeclaration: identifier (initialiser)? ; variableDeclarationNoln: identifier (initialiserNoln)? ; initialiser: EQ assignmentExpression ; initialiserNoln: EQ assignmentExpressionNoln ; emptyStatement: ; expressionStatement: /* [lookahead not a member of {{, function}} */ expression SEMI ; ifStatement: 'if' LPAREN expression RPAREN statement 'else' statement | 'if' LPAREN expression RPAREN statement ; iterationStatement: 'do' statement 'while' LPAREN expression RPAREN SEMI | 'while' LPAREN expression RPAREN statement | 'for' LPAREN ( (expressionNoln)? SEMI (expression)? SEMI (expression)? RPAREN statement | 'var' variableDeclarationListNoln SEMI (expression)? SEMI (expression)? RPAREN statement | leftHandSideExpression 'in' expression RPAREN statement | 'var' variableDeclarationNoln 'in' expression RPAREN statement ) ; continueStatement: 'continue' /* [ no line terminator here ] */ (identifier)? SEMI ; breakStatement: 'break' /* [ no line terminator here ] */ (identifier)? SEMI ; returnStatement: 'return' /* [no line terminator here] */ (expression)? SEMI ; withStatement: 'with' LPAREN expression RPAREN statement ; switchStatement: 'switch' LPAREN expression RPAREN caseBlock ; caseBlock: LBRACE (caseClauses)? RBRACE | LBRACE (caseClauses)? defaultClause (caseClauses)? RBRACE ; caseClauses: /* * SPEC: * caseClause * | caseClauses caseClause */ (caseClause)+ ; caseClause: 'case' expression COLON (statementList)? ; defaultClause: 'default' COLON (statementList)? ; labelledStatement: identifier COLON statement ; throwStatement: 'throw' /* [no line terminator here] */ expression SEMI ; tryStatement: 'try' block catch_ | 'try' block finally_ | 'try' block catch_ finally_ ; catch_: 'catch' LPAREN identifier RPAREN block ; finally_: 'finally' block ; /* A.5 Functions and Programs */ functionDeclaration: 'function' identifier LPAREN (formalParameterList)? LBRACE functionBody RBRACE ; functionExpression: 'function' (identifier)? LPAREN (formalParameterList)? LBRACE functionBody RBRACE ; formalParameterList: /* * SPEC: * identifier * | formalParameterList COMMA identifier */ identifier (formalParameterListTail)* ; formalParameterListTail: COMMA identifier ; functionBody: sourceElements ; /* * program is actually the starting element for the grammar so I have commented * out this one and then copied it to the very beginning as that is what the * start really is. */ /*program: sourceElements ;*/ sourceElements: /* * SPEC: * sourceStatement * | sourceElements sourceElement */ (sourceElement)+ ; sourceElement: statement | functionDeclaration ; /* A.6 URI character classes */ uri: (uriCharacters)? ; uriCharacters: uriCharacter (uriCharacters)? ; uriCharacter: uriReserved | uriUnescaped | uriEscaped ; uriReserved: SEMI | SLASH | QUESTION | COLON | AT | AMPER | EQ | PLUS | DOLLAR | COMMA ; uriUnescaped: uriAlpha | decimalDigit | uriMark ; uriEscaped: PERCENT hexDigit hexDigit ; uriAlpha: ALPHA_CHARACTER /* consists of a-zA-Z */ ; uriMark: MINUS | UNDERSCORE | DOT | EXCLAMATION | TILDE | ASTERISK | APOSTROPHE | LPAREN | RPAREN ; /* A.7 Regular Exrpessions */ patter: disjunction ; disjunction: alternative | alternative PIPE disjunction ; alternative: /* * SPEC: * -empty- *| alternative term */ (term)* ; term: assertion | atom | atom quantifier ; assertion: CAROT | DOLLAR | '\\b' /* double check this - looks like a space in the manual? */ | '\\B' /* double check this - looks like a space in the manual? */ ; quantifier: quantifierPrefix | quantifierPrefix QUESTION ; quantifierPrefix: ASTERISK | PLUS | QUESTION | LBRACE decimalDigits RBRACE | LBRACE decimalDigits COMMA RBRACE | LBRACE decimalDigits COMMA decimalDigits RBRACE ; atom: patternCharacter | DOT | BSLASH atomEscape | characterClass | LPAREN ((COLON|EQ|EXCLAMATION)? disjunction) RPAREN ; patternCharacter: sourceCharacter /* but not any of: ^ $ \ . * + ? ( ) [ ] { } | */ ; atomEscape: decimalEscape | characterEscape | characterClassEscape ; characterEscape: controlEscape | LOWER_C controlLetter | hexEscapeSequence | unicodeEscapeSequence | identityEscape ; controlEscape: CONTROL_ESCAPE_CHAR /* one of: fnrtv */ ; controlLetter: /* one of: a-z A-Z */ LOWER_ALPHA_CHAR | UPPER_ALPHA_CHAR ; identityEscape: sourceCharacter /* but not identifierPart */ ; characterClassEscape: 'd' | 'D' | 's' | 'S' | 'w' | 'W' ; decimalEscape: decimalIntegerLiteral /* lookahead not a member of decimalDigit */ ; characterClass: /* [ [lookahead not a member of {^}] ClassRanges ] | [ ^ classRanges ] */ ; classRanges: /* empty */ | nonemptyClassRanges ; nonemptyClassRanges: classAtom | classAtom nonemptyClassRangesNoDash | classAtom MINUS classAtom classRanges ; nonemptyClassRangesNoDash: classAtom | classAtomNoDash nonemptyClassRangesNoDash | classAtomNoDash MINUS classAtom classRanges ; classAtom: MINUS | classAtomNoDash ; classAtomNoDash: sourceCharacter /* but not one of: \ ] - */ | BSLASH classEscape ; classEscape: decimalEscape | LOWER_B | characterEscape | characterClassEscape ; /* a few basic characters and other things that I am going to see */ SEMI: ';'; MPER: '&'; //DASH: '-'; EQ: '='; COMMA: ','; SLASH: '/'; BSLASH: '\\'; LBRACK: '['; RBRACK: ']'; LBRACE: '{'; RBRACE: '}'; LPAREN: '('; RPAREN: ')'; APOSTROPHE: '\''; QUOTE: '"'; QUESTION: '?'; COLON: ':'; ASTERISK: '*'; AT : '@'; AMPER : '&'; PLUS : '+'; MINUS:'-'; DOLLAR : '$'; UNDERSCORE:'_'; DOT : '.'; LT : '<'; GT : '>'; LTEQ: '<='; GTEQ : '>='; EQ2 : '=='; NOTEQ: '!='; EQ3 : '==='; NOTEQ2: '!=='; STAR:'*'; PERCENT : '%'; PLUS2:'++'; MINUS2:'--'; LSHIFT:'<<'; RSHIFT:'>>'; GT3:'>>>'; PIPE : '|'; CAROT : '^'; EXCLAMATION: '!'; TILDE:'~'; AMPER2:'&&'; PIPE2 : '||'; PLUSEQ:'+='; MINUSEQ:'-='; TIMESEQ:'*='; PERCENTEQ:'%='; LSHIFTEQ:'<<='; RSHIFTEQ: '>>='; GT3EQ : '>>>='; AMPEREQ:'&='; PIPEEQ : '|='; CAROTEQ : '^='; DIVIDE : '/'; DIVIDEEQ: '/='; ZERO : '0'; NON_ZERO_DIGIT : ('1'..'9'); DIGIT : ('0'..'9'); EXPONENT_INDICATOR : 'e'|'E'; CONTROL_ESCAPE_CHAR : 'f'|'n'|'r'|'t'|'v'; /* one of: fnrtv */ LOWER_B: 'b'; LOWER_C : 'c'; LOWER_F: 'f'; LOWER_N: 'n'; LOWER_R: 'r'; LOWER_T: 't'; LOWER_V: 'v'; LOWER_X: 'x'; LOWER_U: 'u'; ALPHA_CHARACTER : ('a'..'z')|('A'..'Z'); LOWER_ALPHA_CHAR : ('a'..'z'); UPPER_ALPHA_CHAR : ('a'..'z'); WS: // ignore white space as it doesn't matter ( ' ' | '\r' '\n' {newline();}| '\n' {newline();} | '\t' ) { $setType(Token.SKIP); } ; SOURCE_CHAR: ('\u0000'..'\uFFFE'); TAB: '\u0009'; VT: '\u000b'; FF: '\u000c'; SP: '\u0020'; NBSP: '\u00a0'; USP: '\u1680' // OGHAM SPACE MARK | '\u2000' // EN QUAD | '\u2001' // EM QUAD | '\u2002' // EN SPACE | '\u2003' // EM SPACE | '\u2004' // THREE-PER-EM SPACE | '\u2005' // FOUR-PER-EM SPACE | '\u2006' // SIX-PER-EM SPACE | '\u2007' // FIGURE SPACE | '\u2008' // PUNCTUATION SPACE | '\u2009' // THIN SPACE | '\u200A' // HAIR SPACE | '\u200B' // ZERO WIDTH SPACE | '\u202F' // NARROW NO-BREAK SPACE | '\u3000' // IDEOGRAPHIC SPACE ; LF: '\u000a'; // line feed CR: '\u000d'; // carriage return LS: '\u2028'; // line separator PS: '\u2029'; // paragraph separator UNICODE_LETTER: ('\u0041'..'\u005A') | ('\u0061'..'\u007A') | '\u00AA' | '\u00B5' | '\u00BA' | ('\u00C0'..'\u00D6') | ('\u00D8'..'\u00F6') | ('\u00F8'..'\u021F') | ('\u0222'..'\u0233') | ('\u0250'..'\u02AD') | ('\u02B0'..'\u02B8') | ('\u02BB'..'\u02C1') | ('\u02D0'..'\u02D1') | ('\u02E0'..'\u02E4') | '\u02EE' | '\u037A' | '\u0386' | ('\u0388'..'\u038A') | '\u038C' | ('\u038E'..'\u03A1') | ('\u03A3'..'\u03CE') | ('\u03D0'..'\u03D7') | ('\u03DA'..'\u03F3') | ('\u0400'..'\u0481') | ('\u048C'..'\u04C4') | ('\u04C7'..'\u04C8') | ('\u04CB'..'\u04CC') | ('\u04D0'..'\u04F5') | ('\u04F8'..'\u04F9') | ('\u0531'..'\u0556') | '\u0559' |('\u0561'..'\u0587') | ('\u05D0'..'\u05EA') | ('\u05F0'..'\u05F2') | ('\u0621'..'\u063A') |('\u0640'..'\u064A') | ('\u0671'..'\u06D3') | '\u06D5' | ('\u06E5'..'\u06E6') |('\u06FA'..'\u06FC') | '\u0710' | ('\u0712'..'\u072C') | ('\u0780'..'\u07A5') |('\u0905'..'\u0939') | '\u093D' | '\u0950' | ('\u0958'..'\u0961') |('\u0985'..'\u098C') | ('\u098F'..'\u0990') | ('\u0993'..'\u09A8') | ('\u09AA'..'\u09B0') | '\u09B2' | ('\u09B6'..'\u09B9') | ('\u09DC'..'\u09DD') | ('\u09DF'..'\u09E1') |('\u09F0'..'\u09F1') | ('\u0A05'..'\u0A0A') | ('\u0A0F'..'\u0A10') | ('\u0A13'..'\u0A28') |('\u0A2A'..'\u0A30') | ('\u0A32'..'\u0A33') | ('\u0A35'..'\u0A36') | ('\u0A38'..'\u0A39') |('\u0A59'..'\u0A5C') | '\u0A5E' | ('\u0A72'..'\u0A74') | ('\u0A85'..'\u0A8B') | '\u0A8D' | ('\u0A8F'..'\u0A91') | ('\u0A93'..'\u0AA8') | ('\u0AAA'..'\u0AB0') |('\u0AB2'..'\u0AB3') | ('\u0AB5'..'\u0AB9') | '\u0ABD' | '\u0AD0' | '\u0AE0' | ('\u0B05'..'\u0B0C') | ('\u0B0F'..'\u0B10') | ('\u0B13'..'\u0B28') |('\u0B2A'..'\u0B30') | ('\u0B32'..'\u0B33') | ('\u0B36'..'\u0B39') | '\u0B3D' |('\u0B5C'..'\u0B5D') | ('\u0B5F'..'\u0B61') | ('\u0B85'..'\u0B8A') | ('\u0B8E'..'\u0B90') |('\u0B92'..'\u0B95') | ('\u0B99'..'\u0B9A') | '\u0B9C' | ('\u0B9E'..'\u0B9F') |('\u0BA3'..'\u0BA4') | ('\u0BA8'..'\u0BAA') | ('\u0BAE'..'\u0BB5') | ('\u0BB7'..'\u0BB9') |('\u0C05'..'\u0C0C') | ('\u0C0E'..'\u0C10') | ('\u0C12'..'\u0C28') | ('\u0C2A'..'\u0C33') |('\u0C35'..'\u0C39') | ('\u0C60'..'\u0C61') | ('\u0C85'..'\u0C8C') | ('\u0C8E'..'\u0C90') |('\u0C92'..'\u0CA8') | ('\u0CAA'..'\u0CB3') | ('\u0CB5'..'\u0CB9') | '\u0CDE' |('\u0CE0'..'\u0CE1') | ('\u0D05'..'\u0D0C') | ('\u0D0E'..'\u0D10') | ('\u0D12'..'\u0D28') |('\u0D2A'..'\u0D39') | ('\u0D60'..'\u0D61') | ('\u0D85'..'\u0D96') | ('\u0D9A'..'\u0DB1') |('\u0DB3'..'\u0DBB') | '\u0DBD' | ('\u0DC0'..'\u0DC6') | ('\u0E01'..'\u0E30') |('\u0E32'..'\u0E33') | ('\u0E40'..'\u0E46') | ('\u0E81'..'\u0E82') | '\u0E84' |('\u0E87'..'\u0E88') | '\u0E8A' | '\u0E8D' | ('\u0E94'..'\u0E97') |('\u0E99'..'\u0E9F') | ('\u0EA1'..'\u0EA3') | '\u0EA5' | '\u0EA7' |('\u0EAA'..'\u0EAB') | ('\u0EAD'..'\u0EB0') | ('\u0EB2'..'\u0EB3') | ('\u0EBD'..'\u0EC4') | '\u0EC6' | ('\u0EDC'..'\u0EDD') | '\u0F00' | ('\u0F40'..'\u0F6A') |('\u0F88'..'\u0F8B') | ('\u1000'..'\u1021') | ('\u1023'..'\u1027') | ('\u1029'..'\u102A') |('\u1050'..'\u1055') | ('\u10A0'..'\u10C5') | ('\u10D0'..'\u10F6') | ('\u1100'..'\u1159') |('\u115F'..'\u11A2') | ('\u11A8'..'\u11F9') | ('\u1200'..'\u1206') | ('\u1208'..'\u1246') | '\u1248' | ('\u124A'..'\u124D') | ('\u1250'..'\u1256') | '\u1258' |('\u125A'..'\u125D') | ('\u1260'..'\u1286') | '\u1288' | ('\u128A'..'\u128D') |('\u1290'..'\u12AE') | '\u12B0' | ('\u12B2'..'\u12B5') | ('\u12B8'..'\u12BE') | '\u12C0' | ('\u12C2'..'\u12C5') | ('\u12C8'..'\u12CE') | ('\u12D0'..'\u12D6') |('\u12D8'..'\u12EE') | ('\u12F0'..'\u130E') | '\u1310' | ('\u1312'..'\u1315') |('\u1318'..'\u131E') | ('\u1320'..'\u1346') | ('\u1348'..'\u135A') | ('\u13A0'..'\u13B0') |('\u13B1'..'\u13F4') | ('\u1401'..'\u1676') | ('\u1681'..'\u169A') | ('\u16A0'..'\u16EA') |('\u1780'..'\u17B3') | ('\u1820'..'\u1877') | ('\u1880'..'\u18A8') | ('\u1E00'..'\u1E9B') |('\u1EA0'..'\u1EE0') | ('\u1EE1'..'\u1EF9') | ('\u1F00'..'\u1F15') | ('\u1F18'..'\u1F1D') |('\u1F20'..'\u1F39') | ('\u1F3A'..'\u1F45') | ('\u1F48'..'\u1F4D') | ('\u1F50'..'\u1F57') | '\u1F59' | '\u1F5B' | '\u1F5D' | ('\u1F5F'..'\u1F7D') | ('\u1F80'..'\u1FB4') | ('\u1FB6'..'\u1FBC') | '\u1FBE' | ('\u1FC2'..'\u1FC4') | ('\u1FC6'..'\u1FCC') | ('\u1FD0'..'\u1FD3') | ('\u1FD6'..'\u1FDB') | ('\u1FE0'..'\u1FEC') | ('\u1FF2'..'\u1FF4') | ('\u1FF6'..'\u1FFC') | '\u207F' | '\u2102' | '\u2107' | ('\u210A'..'\u2113') | '\u2115' | ('\u2119'..'\u211D') | '\u2124' | '\u2126' | '\u2128' | ('\u212A'..'\u212D') | ('\u212F'..'\u2131') | ('\u2133'..'\u2139') | ('\u2160'..'\u2183') | ('\u3005'..'\u3007') | ('\u3021'..'\u3029') | ('\u3031'..'\u3035') | ('\u3038'..'\u303A') | ('\u3041'..'\u3094') | ('\u309D'..'\u309E') | ('\u30A1'..'\u30FA') | ('\u30FC'..'\u30FE') | ('\u3105'..'\u312C') | ('\u3131'..'\u318E') | ('\u31A0'..'\u31B7') | '\u3400' | '\u4DB5' | '\u4E00' | '\u9FA5' | ('\uA000'..'\uA48C') | '\uAC00' | '\uD7A3' | ('\uF900'..'\uFA2D') | ('\uFB00'..'\uFB06') | ('\uFB13'..'\uFB17') | '\uFB1D' | ('\uFB1F'..'\uFB28') | ('\uFB2A'..'\uFB36') | ('\uFB38'..'\uFB3C') | '\uFB3E' | ('\uFB40'..'\uFB41') | ('\uFB43'..'\uFB44') | ('\uFB46'..'\uFBB1') | ('\uFBD3'..'\uFD3D') | ('\uFD50'..'\uFD8F') | ('\uFD92'..'\uFDC7') | ('\uFDF0'..'\uFDFB') | ('\uFE70'..'\uFE72') | '\uFE74' | ('\uFE76'..'\uFEFC') | ('\uFF21'..'\uFF3A') | ('\uFF41'..'\uFF5A') | ('\uFF66'..'\uFFBE') | ('\uFFC2'..'\uFFC7') | ('\uFFCA'..'\uFFCF') | ('\uFFD2'..'\uFFD7') | ('\uFFDA'..'\uFFDC') ; HEXDIGIT: ('0'..'9')|('a'..'f')|('A'..'F'); UNICODE_DIGIT: ('\u0030'..'\u0039') | ('\u0660'..'\u0669') | ('\u06F0'..'\u06F9') | ('\u0966'..'\u096F') | ('\u09E6'..'\u09EF') | ('\u0A66'..'\u0A6F') | ('\u0AE6'..'\u0AEF') | ('\u0B66'..'\u0B6F') | ('\u0BE7'..'\u0BEF') | ('\u0C66'..'\u0C6F') | ('\u0CE6'..'\u0CEF') | ('\u0D66'..'\u0D6F') | ('\u0E50'..'\u0E59') | ('\u0ED0'..'\u0ED9') | ('\u0F20'..'\u0F29') | ('\u1040'..'\u1049') | ('\u1369'..'\u1371') | ('\u17E0'..'\u17E9') | ('\u1810'..'\u1819') | ('\uFF10'..'\uFF19') ; UNICODE_NONSPACING_MARK : ('\u0300'..'\u036F'); // And more... see: http://www.fileformat.info/info/unicode/category/Mn/list.htm UNICODE_COMBINING_MARK: // Appears to be bogus... see: http://www.fileformat.info/info/unicode/category/Mc/list.htm ('\u0300'..'\u034E') | ('\u0360'..'\u0362') | ('\u0483'..'\u0486') | ('\u0591'..'\u05A1') | ('\u05A3'..'\u05B9') | ('\u05BB'..'\u05BD') | '\u05BF' | ('\u05C1'..'\u05C2') | '\u05C4' | ('\u064B'..'\u0655') | '\u0670' | ('\u06D6'..'\u06DC') | ('\u06DF'..'\u06E4') | ('\u06E7'..'\u06E8') | ('\u06EA'..'\u06ED') | '\u0711' | ('\u0730'..'\u074A') | ('\u07A6'..'\u07B0') | ('\u0901'..'\u0903') | '\u093C' | ('\u093E'..'\u094D') | ('\u0951'..'\u0954') | ('\u0962'..'\u0963') | ('\u0981'..'\u0983') | ('\u09BC'..'\u09C4') | ('\u09C7'..'\u09C8') | ('\u09CB'..'\u09CD') | '\u09D7' | ('\u09E2'..'\u09E3') | '\u0A02' | '\u0A3C' | ('\u0A3E'..'\u0A42') | ('\u0A47'..'\u0A48') | ('\u0A4B'..'\u0A4D') | ('\u0A70'..'\u0A71') | ('\u0A81'..'\u0A83') | '\u0ABC' | ('\u0ABE'..'\u0AC5') | ('\u0AC7'..'\u0AC9') | ('\u0ACB'..'\u0ACD') | ('\u0B01'..'\u0B03') | '\u0B3C' | ('\u0B3E'..'\u0B43') | ('\u0B47'..'\u0B48') | ('\u0B4B'..'\u0B4D') | ('\u0B56'..'\u0B57') | ('\u0B82'..'\u0B83') | ('\u0BBE'..'\u0BC2') | ('\u0BC6'..'\u0BC8') | ('\u0BCA'..'\u0BCD') | '\u0BD7' | ('\u0C01'..'\u0C03') | ('\u0C3E'..'\u0C44') | ('\u0C46'..'\u0C48') | ('\u0C4A'..'\u0C4D') | ('\u0C55'..'\u0C56') | ('\u0C82'..'\u0C83') | ('\u0CBE'..'\u0CC4') | ('\u0CC6'..'\u0CC8') | ('\u0CCA'..'\u0CCD') | ('\u0CD5'..'\u0CD6') | ('\u0D02'..'\u0D03') | ('\u0D3E'..'\u0D43') | ('\u0D46'..'\u0D48') | ('\u0D4A'..'\u0D4D') | '\u0D57' | ('\u0D82'..'\u0D83') | '\u0DCA' | ('\u0DCF'..'\u0DD4') | '\u0DD6' | ('\u0DD8'..'\u0DDF') | ('\u0DF2'..'\u0DF3') | '\u0E31' | ('\u0E34'..'\u0E3A') | ('\u0E47'..'\u0E4E') | '\u0EB1' | ('\u0EB4'..'\u0EB9') | ('\u0EBB'..'\u0EBC') | ('\u0EC8'..'\u0ECD') | ('\u0F18'..'\u0F19') | '\u0F35' | '\u0F37' | '\u0F39' | ('\u0F3E'..'\u0F3F') | ('\u0F71'..'\u0F84') | ('\u0F86'..'\u0F87') | ('\u0F90'..'\u0F97') | ('\u0F99'..'\u0FBC') | '\u0FC6' | ('\u102C'..'\u1032') | ('\u1036'..'\u1039') | ('\u1056'..'\u1059') | ('\u17B4'..'\u17D3') | '\u18A9' | ('\u20D0'..'\u20DC') | '\u20E1' | ('\u302A'..'\u302F') | ('\u3099'..'\u309A') | '\uFB1E' | ('\uFE20'..'\uFE23') ; UNICODE_CONNECTOR_PUNCTUATION: '\u005F' | ('\u203F'..'\u2040') | '\u30FB' | ('\uFE33'..'\uFE34') | ('\uFE4D'..'\uFE4F') | '\uFF3F' | '\uFF65' ;