Alex User Guide

lot

<c1,c2,...>  regex   { code }
regexp   { code }
alexScanTokens :: String -> [Token]
$ alex Tokens.x
$ alex Tokens.x -o Main.hs
$digit      = [0-9]
$octdig     = [0-7]
$hexdig     = [0-9A-Fa-f]
$special    = [\.\;\,\$\|\*\+\?\#\~\-\{\}\(\)\[\]\^\/]
$graphic    = $printable # $white

@string     = \" ($graphic # \")* \"
@id         = [A-Za-z][A-Za-z'_]*
@smac       = '$' id
@rmac       = '@' id
@char       = ($graphic # $special) | @escape
@escape     = '\\' ($printable | 'x' $hexdig+ | 'o' $octdig+ | $digit+)
@code       = -- curly braces surrounding a Haskell code fragment
alex := [ @code ] [ wrapper ] [ encoding ] { macrodef } @id ':-' { rule } [ @code ]
wrapper := '%wrapper' @string
encoding := '%encoding' @string
action type := '%action' @string
token type := '%token' @string
typeclass(es) := '%typeclass' @string
macrodef  :=  @smac '=' set
           |  @rmac '=' regexp
rule       := [ startcodes ] token
            | startcodes '{' { token } '}'

token      := [ left_ctx ] regexp [ right_ctx ]  rhs

rhs        := @code | ';'
$white+        ;
$white*        ;
left_ctx   := '^'
            | set '^'

right_ctx  := '$'
            | '/' regexp
            | '/' @code
{ ... } :: user       -- predicate state
        -> AlexInput  -- input stream before the token
        -> Int        -- length of the token
        -> AlexInput  -- input stream after the token
        -> Bool       -- True <=> accept the token
startcode  := @id | '0'
startcodes := '<' startcode { ',' startcode } '>'
foo = 1
bar = 2
<0>      ([^\"] | \n)*  ;
<0>      \"             { begin string }
<string> [^\"]          { stringchar }
<string> \"             { begin 0 }
regexp  := rexp2 { '|' rexp2 }

rexp2   := rexp1 { rexp1 }

rexp1   := rexp0 [ '*' | '+' | '?' | repeat ]

rexp0   := set
         | @rmac
         | @string
         | '(' [ regexp ] ')'

repeat  := '{' $digit+ '}'
         | '{' $digit+ ',' '}'
         | '{' $digit+ ',' $digit+ '}'
set     := set '#' set0
        |  set0

set0    := @char [ '-' @char ]
        | '.'
        |  @smac
        | '[' [^] { set } ']'
        | '~' set0
$lls      = a-z                   -- little letters
$not_lls  = ~a-z                  -- anything but little letters
$ls_ds    = [a-zA-Z0-9]           -- letters and digits
$sym      = [ \! \@ \# \$ ]       -- the symbols !, @, #, and $
$sym_q_nl = [ \' \! \@ \# \$ \n ] -- the above symbols with ' and newline
$quotable = $printable # \'       -- any graphic character except '
$del      = \127                  -- ASCII DEL
type AlexInput
alexGetByte       :: AlexInput -> Maybe (Word8,AlexInput)
alexInputPrevChar :: AlexInput -> Char
alexScan :: AlexInput             -- The current input
         -> Int                   -- The "start code"
         -> AlexReturn action     -- The return value

data AlexReturn action
  = AlexEOF

  | AlexError
      !AlexInput     -- Remaining input

  | AlexSkip
      !AlexInput     -- Remaining input
      !Int           -- Token length

  | AlexToken
      !AlexInput     -- Remaining input
      !Int           -- Token length
      action         -- action value
alexScanUser
         :: user             -- predicate state
         -> AlexInput        -- The current input
         -> Int              -- The "start code"
         -> AlexReturn action
%wrapper "name"
type AlexInput = (Char,      -- previous char
                  [Byte],    -- rest of the bytes for the current char
                  String)    -- rest of the input string

alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (c,(b:bs),s) = Just (b,(c,bs,s))
alexGetByte (c,[],[])    = Nothing
alexGetByte (_,[],(c:s)) = case utf8Encode c of
                             (b:bs) -> Just (b, (c, bs, s))

alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (c,_,_) = c

-- alexScanTokens :: String -> [token]
alexScanTokens str = go ('\n',[],str)
  where go inp@(_,_bs,str) =
          case alexScan inp 0 of
                AlexEOF -> []
                AlexError _ -> error "lexical error"
                AlexSkip  inp' len     -> go inp'
                AlexToken inp' len act -> act (take len str) : go inp'
{ ... } :: String -> token
data AlexPosn = AlexPn !Int  -- absolute character offset
                       !Int  -- line number
                       !Int  -- column number

type AlexInput = (AlexPosn,     -- current position,
                  Char,         -- previous char
                  [Byte],       -- rest of the bytes for the current char
                  String)       -- current input string

--alexScanTokens :: String -> [token]
alexScanTokens str = go (alexStartPos,'\n',[],str)
  where go inp@(pos,_,_,str) =
          case alexScan inp 0 of
                AlexEOF -> []
                AlexError ((AlexPn _ line column),_,_,_) -> error $ "lexical error at " ++ (show line) ++ " line, " ++ (show column) ++ " column"
                AlexSkip  inp' len     -> go inp'
                AlexToken inp' len act -> act pos (take len str) : go inp'
{ ... } :: AlexPosn -> String -> token
data AlexState = AlexState {
        alex_pos :: !AlexPosn,  -- position at current input location
        alex_inp :: String,     -- the current input
        alex_chr :: !Char,      -- the character before the input
        alex_bytes :: [Byte],   -- rest of the bytes for the current char
        alex_scd :: !Int        -- the current startcode
    }

newtype Alex a = Alex { unAlex :: AlexState
                               -> Either String (AlexState, a) }

instance Functor Alex where ...
instance Applicative Alex where ...
instance Monad Alex where ...

runAlex          :: String -> Alex a -> Either String a

type AlexInput = (AlexPosn,     -- current position,
                  Char,         -- previous char
                  [Byte],       -- rest of the bytes for the current char
                  String)       -- current input string

alexGetInput     :: Alex AlexInput
alexSetInput     :: AlexInput -> Alex ()

alexError        :: String -> Alex a

alexGetStartCode :: Alex Int
alexSetStartCode :: Int -> Alex ()
alexEOF :: Alex result
alexMonadScan :: Alex result
type AlexAction result = AlexInput -> Int -> Alex result
{ ... }  :: AlexAction result
alexEOF :: Alex result
-- skip :: AlexAction result
skip input len = alexMonadScan

-- andBegin :: AlexAction result -> Int -> AlexAction result
(act `andBegin` code) input len = do alexSetStartCode code; act input len

-- begin :: Int -> AlexAction result
begin code = skip `andBegin` code

-- token :: (AlexInput -> Int -> token) -> AlexAction token
token t input len = return (t input len)
data AlexState = AlexState {
        alex_pos :: !AlexPosn,  -- position at current input location
        alex_inp :: String,     -- the current input
        alex_chr :: !Char,      -- the character before the input
        alex_bytes :: [Byte],   -- rest of the bytes for the current char
        alex_scd :: !Int,       -- the current startcode
        alex_ust :: AlexUserState -- AlexUserState will be defined in the user program
    }
runAlex :: String -> Alex a -> Either String a
runAlex input (Alex f)
   = case f (AlexState {alex_pos = alexStartPos,
                        alex_inp = input,
                        alex_chr = '\n',
                        alex_bytes = [],
                        alex_ust = alexInitUserState,
                        alex_scd = 0}) of Left msg -> Left msg
                                          Right ( _, a ) -> Right a
alexGetUserState :: Alex AlexUserState
alexSetUserState :: AlexUserState -> Alex ()
data AlexUserState = AlexUserState
                   {
                       lexerCommentDepth  :: Int
                     , lexerStringValue   :: String
                   }

alexInitUserState :: AlexUserState
alexInitUserState = AlexUserState
                   {
                       lexerCommentDepth  = 0
                     , lexerStringValue   = ""
                   }

getLexerCommentDepth :: Alex Int
getLexerCommentDepth = do ust <- alexGetUserState; return (lexerCommentDepth ust)

setLexerCommentDepth :: Int -> Alex ()
setLexerCommentDepth ss = do ust <- alexGetUserState; alexSetUserState ust{lexerCommentDepth=ss}

getLexerStringValue :: Alex String
getLexerStringValue = do ust <- alexGetUserState; return (lexerStringValue ust)

setLexerStringValue :: String -> Alex ()
setLexerStringValue ss = do ust <- alexGetUserState; alexSetUserState ust{lexerStringValue=ss}

addCharToLexerStringValue :: Char -> Alex ()
addCharToLexerStringValue c = do ust <- alexGetUserState; alexSetUserState ust{lexerStringValue=c:(lexerStringValue ust)}
alexGScan :: StopAction state result -> state -> String -> result

type StopAction state result
         = AlexPosn -> Char -> String -> (Int,state) -> result
{ ... }      :: AlexPosn                -- token position
             -> Char                    -- previous character
             -> String                  -- input string at token
             -> Int                     -- length of token
             -> ((Int,state) -> result) -- continuation
             -> (Int,state)             -- current (startcode,state)
             -> result
import qualified Data.ByteString.Lazy as ByteString

data AlexInput = AlexInput { alexChar :: {-# UNPACK #-} !Char,      -- previous char
                             alexStr ::  !ByteString.ByteString,    -- current input string
                             alexBytePos :: {-# UNPACK #-} !Int64}  -- bytes consumed so far

alexGetByte :: AlexInput -> Maybe (Char,AlexInput)

alexInputPrevChar :: AlexInput -> Char

-- alexScanTokens :: ByteString.ByteString -> [token]
{ ... } :: ByteString.ByteString -> token
import qualified Data.ByteString.Lazy as ByteString

type AlexInput = (AlexPosn,   -- current position,
                  Char,       -- previous char
                  ByteString.ByteString, -- current input string
                  Int64)           -- bytes consumed so far

-- alexScanTokens :: ByteString.ByteString -> [token]
{ ... } :: AlexPosn -> ByteString.ByteString -> token
import qualified Data.ByteString.Lazy as ByteString

data AlexState = AlexState {
        alex_pos :: !AlexPosn,  -- position at current input location
        alex_bpos:: !Int64,     -- bytes consumed so far
        alex_inp :: ByteString.ByteString, -- the current input
        alex_chr :: !Char,      -- the character before the input
        alex_scd :: !Int        -- the current startcode
    }

newtype Alex a = Alex { unAlex :: AlexState
                               -> Either String (AlexState, a) }

runAlex          :: ByteString.ByteString -> Alex a -> Either String a

type AlexInput = (AlexPosn,     -- current position,
                  Char,         -- previous char
                  ByteString.ByteString,   -- current input string
                  Int64)        -- bytes consumed so far

-- token :: (AlexInput -> Int -> token) -> AlexAction token
import qualified Data.ByteString.Lazy as ByteString

ata AlexState = AlexState {
        alex_pos :: !AlexPosn,  -- position at current input location
        alex_bpos:: !Int64,     -- bytes consumed so far
        alex_inp :: ByteString.ByteString, -- the current input
        alex_chr :: !Char,      -- the character before the input
        alex_scd :: !Int        -- the current startcode
      , alex_ust :: AlexUserState -- AlexUserState will be defined in the user program
    }

newtype Alex a = Alex { unAlex :: AlexState
                               -> Either String (AlexState, a) }

runAlex          :: ByteString.ByteString -> Alex a -> Either String a

-- token :: (AlexInput -> Int -> token) -> AlexAction token
%wrapper "basic"
%token "Token s"
%typeclass "Read s"

tokens :-

[a-zA-Z0-9]+ { mkToken }
[ \t\r\n]+   ;

{

data Token s = Tok s

mkToken :: Read s => String -> Token s
mkToken = Tok . read

lex :: Read s => String -> [Token s]
lex = alexScanTokens

}
%typeclass "Read s, Eq s"
{
{-# LANGUAGE FlexibleContexts #-}

module Lexer where

import Control.Monad.State
import qualified Data.Bits
import Data.Word

}

%action "AlexInput -> Int -> m (Token s)"
%typeclass "Read s, MonadState AlexState m"

tokens :-

[a-zA-Z0-9]+ { mkToken }
[ \t\n\r]+   ;

{

alexEOF :: MonadState AlexState m => m (Token s)
alexEOF = return EOF

mkToken :: (Read s, MonadState AlexState m) =>
           AlexInput -> Int -> m (Token s)
mkToken (_, _, _, s) len = return (Tok (read (take len s)))

data Token s = Tok s | EOF

lex :: (MonadState AlexState m, Read s) => String -> m (Token s)
lex input = alexMonadScan

-- "Boilerplate" code from monad wrapper has been omitted

}
$ alex { option } file.x  { option }

Alex User Guide

1. About Alex

1.1. Release Notes for version 3.0

1.2. Release Notes for version 2.2

1.3. Release Notes for version 2.1.0

1.4. Release Notes for version 2.0

1.4.1. Syntax changes

1.4.2. Changes in the form of an Alex file

1.4.3. Usage changes

1.4.4. Implementation changes

1.5. Reporting bugs in Alex

1.6. License

2. Introduction

3. Alex Files

3.1. Lexical syntax

3.2. Syntax of Alex files

3.2.1. Macro definitions

3.2.2. Rules

Contexts

Start codes

4. Regular Expression

4.1. Syntax of regular expressions

4.2. Syntax of character sets

5. The Interface to an Alex-generated lexer

5.1. Unicode and UTF-8

5.2. Basic interface

5.3. Wrappers

5.3.1. The "basic" wrapper

5.3.2. The "posn" wrapper

5.3.3. The "monad" wrapper

5.3.4. The "monadUserState" wrapper

5.3.5. The "gscan" wrapper

5.3.6. The bytestring wrappers

The "basic-bytestring" wrapper

The "posn-bytestring" wrapper

The "monad-bytestring" wrapper

The "monadUserState-bytestring" wrapper

5.4. Type Signatures and Typeclasses

5.4.1. Generating Type Signatures with Wrappers

5.4.2. Generating Type Signatures without Wrappers

6. Invoking Alex