@@ -13,6 +13,10 @@ module Language.ECMAScript3.Lexer(lexeme,identifier,reserved,operator,reservedOp
1313 ,hexIntLit ,decIntLit , decDigits , decDigitsOpt , exponentPart , decLit ) where
1414
1515import Prelude hiding (lex )
16+ import Data.Char
17+ import Data.Monoid ((<>) , mconcat )
18+ import qualified Data.CharSet as Set
19+ import qualified Data.CharSet.Unicode.Category as Set
1620import Text.Parsec
1721import qualified Text.Parsec.Token as T
1822import Language.ECMAScript3.Parser.State
@@ -21,8 +25,33 @@ import Control.Monad.Identity
2125import Control.Applicative ((<$>) , (<*>) )
2226import Data.Maybe (isNothing )
2327
28+ identifierStartCharSet :: Set. CharSet
29+ identifierStartCharSet =
30+ mconcat
31+ [ Set. fromDistinctAscList " $_"
32+ , Set. lowercaseLetter
33+ , Set. uppercaseLetter
34+ , Set. titlecaseLetter
35+ , Set. modifierLetter
36+ , Set. otherLetter
37+ , Set. letterNumber
38+ ]
39+
40+ identifierRestCharSet :: Set. CharSet
41+ identifierRestCharSet =
42+ identifierStartCharSet
43+ <> mconcat
44+ [ Set. nonSpacingMark
45+ , Set. spacingCombiningMark
46+ , Set. decimalNumber
47+ , Set. connectorPunctuation
48+ ]
49+
2450identifierStart :: Stream s Identity Char => Parser s Char
25- identifierStart = letter <|> oneOf " $_"
51+ identifierStart = satisfy (flip Set. member identifierStartCharSet) <?> " letter, '$', '_'"
52+
53+ identifierRest :: Stream s Identity Char => Parser s Char
54+ identifierRest = satisfy (flip Set. member identifierRestCharSet) <?> " letter, digits, '$', '_' ..."
2655
2756javascriptDef :: Stream s Identity Char => T. GenLanguageDef s ParserState Identity
2857javascriptDef =
@@ -31,7 +60,7 @@ javascriptDef =
3160 " //"
3261 False -- no nested comments
3362 identifierStart
34- (alphaNum <|> oneOf " $_ " ) -- identifier rest
63+ identifierRest
3564 (oneOf " {}<>()~.,?:|&^=!+-*/%!" ) -- operator start
3665 (oneOf " =<>|&+" ) -- operator rest
3766 [" break" , " case" , " catch" , " const" , " continue" , " debugger" ,
0 commit comments