clex.py - mozsearch

Enable keyboard shortcuts

# ----------------------------------------------------------------------

# clex.py

# A lexer for ANSI C.

# ----------------------------------------------------------------------

import sys

sys.path.insert(0, "../..")

import ply.lex as lex

# Reserved words

reserved = (

    'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',

    'ELSE', 'ENUM', 'EXTERN', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG', 'REGISTER',

    'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', 'SWITCH', 'TYPEDEF',

    'UNION', 'UNSIGNED', 'VOID', 'VOLATILE', 'WHILE',

tokens = reserved + (

    # Literals (identifier, integer constant, float constant, string constant,

    # char const)

    'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',

    # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)

    'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',

    'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',

    'LOR', 'LAND', 'LNOT',

    'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',

    # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)

    'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',

    'LSHIFTEQUAL', 'RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',

    # Increment/decrement (++,--)

    'PLUSPLUS', 'MINUSMINUS',

    # Structure dereference (->)

    'ARROW',

    # Conditional operator (?)

    'CONDOP',

    # Delimeters ( ) [ ] { } , . ; :

    'LPAREN', 'RPAREN',

    'LBRACKET', 'RBRACKET',

    'LBRACE', 'RBRACE',

    'COMMA', 'PERIOD', 'SEMI', 'COLON',

    # Ellipsis (...)

    'ELLIPSIS',

# Completely ignored characters

t_ignore = ' \t\x0c'

# Newlines

def t_NEWLINE(t):

    r'\n+'

    t.lexer.lineno += t.value.count("\n")

# Operators

t_PLUS = r'\+'

t_MINUS = r'-'

t_TIMES = r'\*'

t_DIVIDE = r'/'

t_MOD = r'%'

t_OR = r'\|'

t_AND = r'&'

t_NOT = r'~'

t_XOR = r'\^'

t_LSHIFT = r'<<'

t_RSHIFT = r'>>'

t_LOR = r'\|\|'

t_LAND = r'&&'

t_LNOT = r'!'

t_LT = r'<'

t_GT = r'>'

t_LE = r'<='

t_GE = r'>='

t_EQ = r'=='

t_NE = r'!='

# Assignment operators

t_EQUALS = r'='

t_TIMESEQUAL = r'\*='

t_DIVEQUAL = r'/='

t_MODEQUAL = r'%='

t_PLUSEQUAL = r'\+='

t_MINUSEQUAL = r'-='

t_LSHIFTEQUAL = r'<<='

t_RSHIFTEQUAL = r'>>='

t_ANDEQUAL = r'&='

t_OREQUAL = r'\|='

t_XOREQUAL = r'\^='

# Increment/decrement

t_PLUSPLUS = r'\+\+'

t_MINUSMINUS = r'--'

# ->

t_ARROW = r'->'

# ?

t_CONDOP = r'\?'

# Delimeters

t_LPAREN = r'\('

t_RPAREN = r'\)'

t_LBRACKET = r'\['

t_RBRACKET = r'\]'

t_LBRACE = r'\{'

t_RBRACE = r'\}'

t_COMMA = r','

t_PERIOD = r'\.'

t_SEMI = r';'

t_COLON = r':'

t_ELLIPSIS = r'\.\.\.'

# Identifiers and reserved words

reserved_map = {}

for r in reserved:

    reserved_map[r.lower()] = r

def t_ID(t):

    r'[A-Za-z_][\w_]*'

    t.type = reserved_map.get(t.value, "ID")

    return t

# Integer literal

t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'

# Floating literal

t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'

# String literal

t_SCONST = r'\"([^\\\n]|(\\.))*?\"'

# Character constant 'c' or L'c'

t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''

# Comments

def t_comment(t):

    r'/\*(.|\n)*?\*/'

    t.lexer.lineno += t.value.count('\n')

# Preprocessor directive (ignored)

def t_preprocessor(t):

    r'\#(.)*?\n'

    t.lexer.lineno += 1

def t_error(t):

    print("Illegal character %s" % repr(t.value[0]))

    t.lexer.skip(1)

lexer = lex.lex()

if __name__ == "__main__":

    lex.runmain(lexer)