module Language.Haskell.HsColour.Classify
( TokenType(..)
, tokenise
) where
import Data.Char (isSpace, isUpper, isLower, isDigit)
import Data.List
tokenise :: String -> [(TokenType,String)]
tokenise :: [Char] -> [(TokenType, [Char])]
tokenise [Char]
str =
let chunks :: [[Char]]
chunks = [[Char]] -> [[Char]]
glue ([[Char]] -> [[Char]])
-> ([Char] -> [[Char]]) -> [Char] -> [[Char]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [[Char]]
chunk ([Char] -> [[Char]]) -> [Char] -> [[Char]]
forall a b. (a -> b) -> a -> b
$ [Char]
str
in [(TokenType, [Char])] -> [(TokenType, [Char])]
markDefs ([(TokenType, [Char])] -> [(TokenType, [Char])])
-> [(TokenType, [Char])] -> [(TokenType, [Char])]
forall a b. (a -> b) -> a -> b
$ ([Char] -> (TokenType, [Char]))
-> [[Char]] -> [(TokenType, [Char])]
forall a b. (a -> b) -> [a] -> [b]
map (\[Char]
s-> ([Char] -> TokenType
classify [Char]
s,[Char]
s)) [[Char]]
chunks
markDefs :: [(TokenType, String)] -> [(TokenType, String)]
markDefs :: [(TokenType, [Char])] -> [(TokenType, [Char])]
markDefs [] = []
markDefs ((TokenType
Varid, [Char]
s) : [(TokenType, [Char])]
rest) = (TokenType
Definition, [Char]
s) (TokenType, [Char])
-> [(TokenType, [Char])] -> [(TokenType, [Char])]
forall a. a -> [a] -> [a]
: [(TokenType, [Char])] -> [(TokenType, [Char])]
continue [(TokenType, [Char])]
rest
markDefs ((TokenType
Varop, [Char]
">") : (TokenType
Space, [Char]
" ") : (TokenType
Varid, [Char]
d) : [(TokenType, [Char])]
rest) =
(TokenType
Varop, [Char]
">") (TokenType, [Char])
-> [(TokenType, [Char])] -> [(TokenType, [Char])]
forall a. a -> [a] -> [a]
: (TokenType
Space, [Char]
" ") (TokenType, [Char])
-> [(TokenType, [Char])] -> [(TokenType, [Char])]
forall a. a -> [a] -> [a]
: (TokenType
Definition, [Char]
d) (TokenType, [Char])
-> [(TokenType, [Char])] -> [(TokenType, [Char])]
forall a. a -> [a] -> [a]
: [(TokenType, [Char])] -> [(TokenType, [Char])]
continue [(TokenType, [Char])]
rest
markDefs [(TokenType, [Char])]
rest = [(TokenType, [Char])] -> [(TokenType, [Char])]
continue [(TokenType, [Char])]
rest
continue :: [(TokenType, [Char])] -> [(TokenType, [Char])]
continue [(TokenType, [Char])]
rest
= let ([(TokenType, [Char])]
thisLine, [(TokenType, [Char])]
nextLine) = ((TokenType, [Char]) -> Bool)
-> [(TokenType, [Char])]
-> ([(TokenType, [Char])], [(TokenType, [Char])])
forall a. (a -> Bool) -> [a] -> ([a], [a])
span ((TokenType, [Char]) -> (TokenType, [Char]) -> Bool
forall a. Eq a => a -> a -> Bool
/= (TokenType
Space, [Char]
"\n")) [(TokenType, [Char])]
rest
in
case [(TokenType, [Char])]
nextLine of
[] -> [(TokenType, [Char])]
thisLine
((TokenType
Space, [Char]
"\n"):[(TokenType, [Char])]
nextLine') -> ([(TokenType, [Char])]
thisLine [(TokenType, [Char])]
-> [(TokenType, [Char])] -> [(TokenType, [Char])]
forall a. [a] -> [a] -> [a]
++ ((TokenType
Space, [Char]
"\n") (TokenType, [Char])
-> [(TokenType, [Char])] -> [(TokenType, [Char])]
forall a. a -> [a] -> [a]
: ([(TokenType, [Char])] -> [(TokenType, [Char])]
markDefs [(TokenType, [Char])]
nextLine')))
chunk :: String -> [String]
chunk :: [Char] -> [[Char]]
chunk [] = []
chunk (Char
'\r':[Char]
s) = [Char] -> [[Char]]
chunk [Char]
s
chunk (Char
'\n':[Char]
s) = [Char]
"\n"[Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [Char] -> [[Char]]
chunk [Char]
s
chunk (Char
c:[Char]
s) | Char -> Bool
isLinearSpace Char
c
= (Char
cChar -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
ss)[Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [Char] -> [[Char]]
chunk [Char]
rest where ([Char]
ss,[Char]
rest) = (Char -> Bool) -> [Char] -> ([Char], [Char])
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isLinearSpace [Char]
s
chunk (Char
'{':Char
'-':[Char]
s) = let ([Char]
com,[Char]
s') = Int -> [Char] -> ([Char], [Char])
nestcomment Int
0 [Char]
s
in (Char
'{'Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:Char
'-'Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
com) [Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [Char] -> [[Char]]
chunk [Char]
s'
chunk [Char]
s = case ReadS [Char]
Prelude.lex [Char]
s of
[] -> [[Char] -> Char
forall a. HasCallStack => [a] -> a
head [Char]
s][Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [Char] -> [[Char]]
chunk ([Char] -> [Char]
forall a. HasCallStack => [a] -> [a]
tail [Char]
s)
((tok :: [Char]
tok@(Char
'-':Char
'-':[Char]
_),[Char]
rest):[([Char], [Char])]
_)
| (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'-') [Char]
tok -> ([Char]
tok[Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++[Char]
com)[Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [Char] -> [[Char]]
chunk [Char]
s'
where ([Char]
com,[Char]
s') = [Char] -> ([Char], [Char])
eolcomment [Char]
rest
(([Char]
tok,[Char]
rest):[([Char], [Char])]
_) -> [Char]
tok[Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [Char] -> [[Char]]
chunk [Char]
rest
isLinearSpace :: Char -> Bool
isLinearSpace Char
c = Char
c Char -> [Char] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
" \t\f"
glue :: [[Char]] -> [[Char]]
glue ([Char]
q:[Char]
".":[Char]
n:[[Char]]
rest) | Char -> Bool
isUpper ([Char] -> Char
forall a. HasCallStack => [a] -> a
head [Char]
q)
= [[Char]] -> [[Char]]
glue (([Char]
q[Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++[Char]
"."[Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++[Char]
n)[Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [[Char]]
rest)
glue ([Char]
"`":[[Char]]
rest) =
case [[Char]] -> [[Char]]
glue [[Char]]
rest of
([Char]
qn:[Char]
"`":[[Char]]
rest) -> ([Char]
"`"[Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++[Char]
qn[Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++[Char]
"`")[Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [[Char]] -> [[Char]]
glue [[Char]]
rest
[[Char]]
_ -> [Char]
"`"[Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [[Char]] -> [[Char]]
glue [[Char]]
rest
glue ([Char]
s:[[Char]]
ss) | (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'-') [Char]
s Bool -> Bool -> Bool
&& [Char] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Char]
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>=Int
2
= ([Char]
s[Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++[[Char]] -> [Char]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Char]]
c)[Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [[Char]] -> [[Char]]
glue [[Char]]
rest
where ([[Char]]
c,[[Char]]
rest) = ([Char] -> Bool) -> [[Char]] -> ([[Char]], [[Char]])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char
'\n'Char -> [Char] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`) [[Char]]
ss
glue ([Char]
"(":[[Char]]
ss) = case [[Char]]
rest of
[Char]
")":[[Char]]
rest -> ([Char]
"(" [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [[Char]] -> [Char]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Char]]
tuple [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
")") [Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [[Char]] -> [[Char]]
glue [[Char]]
rest
[[Char]]
_ -> [Char]
"(" [Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [[Char]] -> [[Char]]
glue [[Char]]
ss
where ([[Char]]
tuple,[[Char]]
rest) = ([Char] -> Bool) -> [[Char]] -> ([[Char]], [[Char]])
forall a. (a -> Bool) -> [a] -> ([a], [a])
span ([Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
==[Char]
",") [[Char]]
ss
glue ([Char]
"[":[Char]
"]":[[Char]]
ss) = [Char]
"[]" [Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [[Char]] -> [[Char]]
glue [[Char]]
ss
glue ([Char]
"\n":[Char]
"#":[[Char]]
ss)= [Char]
"\n" [Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: (Char
'#'Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[[Char]] -> [Char]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [[Char]]
line) [Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [[Char]] -> [[Char]]
glue [[Char]]
rest
where ([[Char]]
line,[[Char]]
rest) = ([Char] -> Bool) -> [[Char]] -> ([[Char]], [[Char]])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char
'\n'Char -> [Char] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`) [[Char]]
ss
glue ([Char]
s:[[Char]]
ss) = [Char]
s[Char] -> [[Char]] -> [[Char]]
forall a. a -> [a] -> [a]
: [[Char]] -> [[Char]]
glue [[Char]]
ss
glue [] = []
nestcomment :: Int -> String -> (String,String)
Int
n (Char
'{':Char
'-':[Char]
ss) | Int
nInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>=Int
0 = (([Char]
"{-"[Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++[Char]
cs),[Char]
rm)
where ([Char]
cs,[Char]
rm) = Int -> [Char] -> ([Char], [Char])
nestcomment (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) [Char]
ss
nestcomment Int
n (Char
'-':Char
'}':[Char]
ss) | Int
nInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>Int
0 = (([Char]
"-}"[Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++[Char]
cs),[Char]
rm)
where ([Char]
cs,[Char]
rm) = Int -> [Char] -> ([Char], [Char])
nestcomment (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) [Char]
ss
nestcomment Int
n (Char
'-':Char
'}':[Char]
ss) | Int
nInt -> Int -> Bool
forall a. Eq a => a -> a -> Bool
==Int
0 = ([Char]
"-}",[Char]
ss)
nestcomment Int
n (Char
s:[Char]
ss) | Int
nInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>=Int
0 = ((Char
sChar -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
cs),[Char]
rm)
where ([Char]
cs,[Char]
rm) = Int -> [Char] -> ([Char], [Char])
nestcomment Int
n [Char]
ss
nestcomment Int
n [] = ([],[])
eolcomment :: String -> (String,String)
s :: [Char]
s@(Char
'\n':[Char]
_) = ([], [Char]
s)
eolcomment (Char
'\r':[Char]
s) = [Char] -> ([Char], [Char])
eolcomment [Char]
s
eolcomment (Char
c:[Char]
s) = (Char
cChar -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[Char]
cs, [Char]
s') where ([Char]
cs,[Char]
s') = [Char] -> ([Char], [Char])
eolcomment [Char]
s
eolcomment [] = ([],[])
data TokenType =
Space | Keyword | Keyglyph | Layout | | Conid | Varid |
Conop | Varop | String | Char | Number | Cpp | Error |
Definition
deriving (TokenType -> TokenType -> Bool
(TokenType -> TokenType -> Bool)
-> (TokenType -> TokenType -> Bool) -> Eq TokenType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: TokenType -> TokenType -> Bool
== :: TokenType -> TokenType -> Bool
$c/= :: TokenType -> TokenType -> Bool
/= :: TokenType -> TokenType -> Bool
Eq,Int -> TokenType -> [Char] -> [Char]
[TokenType] -> [Char] -> [Char]
TokenType -> [Char]
(Int -> TokenType -> [Char] -> [Char])
-> (TokenType -> [Char])
-> ([TokenType] -> [Char] -> [Char])
-> Show TokenType
forall a.
(Int -> a -> [Char] -> [Char])
-> (a -> [Char]) -> ([a] -> [Char] -> [Char]) -> Show a
$cshowsPrec :: Int -> TokenType -> [Char] -> [Char]
showsPrec :: Int -> TokenType -> [Char] -> [Char]
$cshow :: TokenType -> [Char]
show :: TokenType -> [Char]
$cshowList :: [TokenType] -> [Char] -> [Char]
showList :: [TokenType] -> [Char] -> [Char]
Show)
classify :: String -> TokenType
classify :: [Char] -> TokenType
classify s :: [Char]
s@(Char
h:[Char]
t)
| Char -> Bool
isSpace Char
h = TokenType
Space
| (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'-') [Char]
s = TokenType
Comment
| [Char]
"--" [Char] -> [Char] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` [Char]
s
Bool -> Bool -> Bool
&& (Char -> Bool) -> [Char] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any Char -> Bool
isSpace [Char]
s = TokenType
Comment
| [Char]
"{-" [Char] -> [Char] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` [Char]
s = TokenType
Comment
| [Char]
s [Char] -> [[Char]] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [[Char]]
keywords = TokenType
Keyword
| [Char]
s [Char] -> [[Char]] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [[Char]]
keyglyphs = TokenType
Keyglyph
| [Char]
s [Char] -> [[Char]] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [[Char]]
layoutchars = TokenType
Layout
| Char -> Bool
isUpper Char
h = TokenType
Conid
| [Char]
s [Char] -> [Char] -> Bool
forall a. Eq a => a -> a -> Bool
== [Char]
"[]" = TokenType
Conid
| Char
h Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'(' Bool -> Bool -> Bool
&& [Char] -> Bool
isTupleTail [Char]
t = TokenType
Conid
| Char
h Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#' = TokenType
Cpp
| Char -> Bool
isLower Char
h = TokenType
Varid
| Char
h Char -> [Char] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
symbols = TokenType
Varop
| Char
hChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
':' = TokenType
Conop
| Char
hChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'`' = TokenType
Varop
| Char
hChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'"' = TokenType
String
| Char
hChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'\'' = TokenType
Char
| Char -> Bool
isDigit Char
h = TokenType
Number
| Bool
otherwise = TokenType
Error
classify [Char]
_ = TokenType
Space
isTupleTail :: [Char] -> Bool
isTupleTail [Char
')'] = Bool
True
isTupleTail (Char
',':[Char]
xs) = [Char] -> Bool
isTupleTail [Char]
xs
isTupleTail [Char]
_ = Bool
False
keywords :: [[Char]]
keywords =
[[Char]
"case",[Char]
"class",[Char]
"data",[Char]
"default",[Char]
"deriving",[Char]
"do",[Char]
"else",[Char]
"forall"
,[Char]
"if",[Char]
"import",[Char]
"in",[Char]
"infix",[Char]
"infixl",[Char]
"infixr",[Char]
"instance",[Char]
"let",[Char]
"module"
,[Char]
"newtype",[Char]
"of",[Char]
"qualified",[Char]
"then",[Char]
"type",[Char]
"where",[Char]
"_"
,[Char]
"foreign",[Char]
"ccall",[Char]
"as",[Char]
"safe",[Char]
"unsafe",[Char]
"family"]
keyglyphs :: [[Char]]
keyglyphs =
[[Char]
"..",[Char]
"::",[Char]
"=",[Char]
"\\",[Char]
"|",[Char]
"<-",[Char]
"->",[Char]
"@",[Char]
"~",[Char]
"=>",[Char]
"[",[Char]
"]"]
layoutchars :: [[Char]]
layoutchars =
(Char -> [Char]) -> [Char] -> [[Char]]
forall a b. (a -> b) -> [a] -> [b]
map (Char -> [Char] -> [Char]
forall a. a -> [a] -> [a]
:[]) [Char]
";{}(),"
symbols :: [Char]
symbols =
[Char]
"!#$%&*+./<=>?@\\^|-~"