@@ -14,6 +14,9 @@ module Language.ECMAScript3.Lexer(lexeme,identifier,reserved,operator,reservedOp
1414
1515import Prelude hiding (lex )
1616import Data.Char
17+ import Data.Monoid ((<>) , mconcat )
18+ import qualified Data.CharSet as Set
19+ import qualified Data.CharSet.Unicode.Category as Set
1720import Text.Parsec
1821import qualified Text.Parsec.Token as T
1922import Language.ECMAScript3.Parser.State
@@ -22,14 +25,36 @@ import Control.Monad.Identity
2225import Control.Applicative ((<$>) , (<*>) )
2326import Data.Maybe (isNothing )
2427
25- jsLetter :: (Stream s m Char ) => ParsecT s u m Char
26- jsLetter = satisfy (\ x -> isAlpha x && x < '\ 65536 ') <?> " letter"
27-
28- jsAlphaNum :: (Stream s m Char => ParsecT s u m Char )
29- jsAlphaNum = satisfy (\ x -> isAlphaNum x && x < '\ 65536 ') <?> " letter or digit"
28+ identifierStartCharSet :: Set. CharSet
29+ identifierStartCharSet =
30+ (filterBmpChars $ mconcat
31+ [ Set. fromDistinctAscList " $_"
32+ , Set. lowercaseLetter
33+ , Set. uppercaseLetter
34+ , Set. titlecaseLetter
35+ , Set. modifierLetter
36+ , Set. otherLetter
37+ , Set. letterNumber
38+ ])
39+
40+ identifierRestCharSet :: Set. CharSet
41+ identifierRestCharSet =
42+ identifierStartCharSet
43+ <> (filterBmpChars $ mconcat
44+ [ Set. nonSpacingMark
45+ , Set. spacingCombiningMark
46+ , Set. decimalNumber
47+ , Set. connectorPunctuation
48+ ])
49+
50+ filterBmpChars :: Set. CharSet -> Set. CharSet
51+ filterBmpChars = Set. filter (< '\ 65536 ')
3052
3153identifierStart :: Stream s Identity Char => Parser s Char
32- identifierStart = jsLetter <|> oneOf " $_"
54+ identifierStart = satisfy (flip Set. member identifierStartCharSet) <?> " letter, '$', '_'"
55+
56+ identifierRest :: Stream s Identity Char => Parser s Char
57+ identifierRest = satisfy (flip Set. member identifierRestCharSet) <?> " letter, digits, '$', '_' ..."
3358
3459javascriptDef :: Stream s Identity Char => T. GenLanguageDef s ParserState Identity
3560javascriptDef =
@@ -38,7 +63,7 @@ javascriptDef =
3863 " //"
3964 False -- no nested comments
4065 identifierStart
41- (jsAlphaNum <|> oneOf " $_ " ) -- identifier rest
66+ identifierRest
4267 (oneOf " {}<>()~.,?:|&^=!+-*/%!" ) -- operator start
4368 (oneOf " =<>|&+" ) -- operator rest
4469 [" break" , " case" , " catch" , " const" , " continue" , " debugger" ,
0 commit comments