You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1274 lines
52 KiB
Python

# -*- coding: utf-8 -*-
"""
CParser.py - C parsing library
Copyright 2010 Luke Campagnola
Distributed under MIT/X11 license. See license.txt for more infomation.
Used for extracting data such as macro definitions, variables, typedefs, and function
signatures from C files (preferrably header files).
"""
import sys, re, os
__all__ = ['winDefs', 'CParser']
def winDefs(verbose=False):
"""Convenience function. Returns a parser which loads a selection of windows headers included with
CParser. These definitions can either be accessed directly or included before parsing
another file like this:
windefs = CParser.winDefs()
p = CParser.CParser("headerFile.h", copyFrom=windefs)
Definitions are pulled from a selection of header files included in Visual Studio
(possibly not legal to distribute? Who knows.), some of which have been abridged
because they take so long to parse.
"""
headerFiles = ['WinNt.h', 'WinDef.h', 'WinBase.h', 'BaseTsd.h', 'WTypes.h', 'WinUser.h']
d = os.path.dirname(__file__)
p = CParser(
[os.path.join(d, 'headers', h) for h in headerFiles],
types={'__int64': ('long long')},
macros={'_WIN32': '', '_MSC_VER': '800', 'CONST': 'const', 'NO_STRICT': None},
processAll=False
)
p.processAll(cache=os.path.join(d, 'headers', 'WinDefs.cache'), noCacheWarning=True, verbose=verbose)
return p
class CParser():
"""Class for parsing C code to extract variable, struct, enum, and function declarations as well as preprocessor macros. This is not a complete C parser; instead, it is meant to simplify the process
of extracting definitions from header files in the absence of a complete build system. Many files
will require some amount of manual intervention to parse properly (see 'replace' and extra arguments
to __init__)
Usage:
## create parser object, load two files
p = CParser(['header1.h', 'header2.h'])
## remove comments, preprocess, and search for declarations
p.processAll()
## just to see what was successfully parsed from the files
p.printAll()
## access parsed declarations
allValues = p.defs['values']
functionSignatures = p.defs['functions']
...
## To see what was not successfully parsed:
unp = p.processAll(returnUnparsed=True)
for s in unp:
print s
"""
cacheVersion = 22 ## increment every time cache structure or parsing changes to invalidate old cache files.
def __init__(self, files=None, replace=None, copyFrom=None, processAll=True, cache=None, verbose=False, **args):
"""Create a C parser object fiven a file or list of files. Files are read to memory and operated
on from there.
'copyFrom' may be another CParser object from which definitions should be copied.
'replace' may be specified to perform string replacements before parsing.
format is {'searchStr': 'replaceStr', ...}
Extra parameters may be used to specify the starting state of the parser. For example,
one could provide a set of missing type declarations by
types={'UINT': ('unsigned int'), 'STRING': ('char', 1)}
Similarly, preprocessor macros can be specified:
macros={'WINAPI': ''}
"""
self.defs = {} ## holds all definitions
self.fileDefs = {} ## holds definitions grouped by the file they came from
self.initOpts = args.copy()
self.initOpts['files'] = []
self.initOpts['replace'] = {}
self.dataList = ['types', 'variables', 'fnmacros', 'macros', 'structs', 'unions', 'enums', 'functions', 'values']
self.verbose = False
# placeholders for definitions that change during parsing
#if hasPyParsing:
#self.macroExpr = Forward()
#self.fnMacroExpr = Forward()
#self.definedType = Forward()
#self.definedStruct = Forward()
#self.definedEnum = Forward()
self.fileOrder = []
self.files = {}
self.packList = {} ## list describing struct packing rules as defined by #pragma pack
if files is not None:
if type(files) is str:
files = [files]
for f in files:
self.loadFile(f, replace)
## initialize empty definition lists
for k in self.dataList:
self.defs[k] = {}
#for f in files:
#self.fileDefs[f][k] = {}
self.compiledTypes = {} ## holds translations from typedefs/structs/unions to fundamental types
self.currentFile = None
# Import extra arguments if specified
for t in args:
for k in args[t].keys():
self.addDef(t, k, args[t][k])
# Import from other CParsers if specified
if copyFrom is not None:
if type(copyFrom) not in [list, tuple]:
copyFrom = [copyFrom]
for p in copyFrom:
self.importDict(p.fileDefs)
if processAll:
self.processAll(cache=cache, verbose=verbose)
def processAll(self, cache=None, returnUnparsed=False, printAfterPreprocess=False, noCacheWarning=True, verbose=False):
"""Remove comments, preprocess, and parse declarations from all files. (operates in memory; does not alter the original files)
Returns a list of the results from parseDefs.
'cache' may specify a file where cached results are be stored or retrieved. The cache
is automatically invalidated if any of the arguments to __init__ are changed, or if the
C files are newer than the cache.
'returnUnparsed' is passed directly to parseDefs.
'printAfterPreprocess' is for debugging; prints the result of preprocessing each file."""
self.verbose = verbose
if cache is not None and self.loadCache(cache, checkValidity=True):
if verbose:
print "Loaded cached definitions; will skip parsing."
return ## cached values loaded successfully, nothing left to do here
#else:
#print "No cache.", cache
results = []
if noCacheWarning or verbose:
print "Parsing C header files (no valid cache found). This could take several minutes..."
for f in self.fileOrder:
#fn = os.path.basename(f)
if self.files[f] is None:
## This means the file could not be loaded and there was no cache.
raise Exception('Could not find header file "%s" or a suitable cache file.' % f)
if verbose:
print "Removing comments from file '%s'..." % f
self.removeComments(f)
if verbose:
print "Preprocessing file '%s'..." % f
self.preprocess(f)
if printAfterPreprocess:
print "===== PREPROCSSED %s =======" % f
print self.files[f]
if verbose:
print "Parsing definitions in file '%s'..." % f
results.append(self.parseDefs(f, returnUnparsed))
if cache is not None:
if verbose:
print "Writing cache file '%s'" % cache
self.writeCache(cache)
return results
def loadCache(self, cacheFile, checkValidity=False):
"""Load a cache file. Used internally if cache is specified in processAll().
if checkValidity=True, then run several checks before loading the cache:
- cache file must not be older than any source files
- cache file must not be older than this library file
- options recorded in cache must match options used to initialize CParser"""
## make sure cache file exists
if type(cacheFile) is not str:
raise Exception("cache file option must be a string.")
if not os.path.isfile(cacheFile):
d = os.path.dirname(__file__) ## If file doesn't exist, search for it in this module's path
cacheFile = os.path.join(d, "headers", cacheFile)
if not os.path.isfile(cacheFile):
if self.verbose:
print "Can't find requested cache file."
return False
## make sure cache is newer than all input files
if checkValidity:
mtime = os.stat(cacheFile).st_mtime
for f in self.fileOrder:
## if file does not exist, then it does not count against the validity of the cache.
if os.path.isfile(f) and os.stat(f).st_mtime > mtime:
if self.verbose:
print "Cache file is out of date."
return False
try:
## read cache file
import pickle
cache = pickle.load(open(cacheFile, 'rb'))
## make sure __init__ options match
if checkValidity:
if cache['opts'] != self.initOpts:
if self.verbose:
print "Cache file is not valid--created using different initialization options."
print cache['opts']
print self.initOpts
return False
elif self.verbose:
print "Cache init opts are OK:"
print cache['opts']
if cache['version'] < self.cacheVersion:
if self.verbose:
print "Cache file is not valid--cache format has changed."
return False
## import all parse results
self.importDict(cache['fileDefs'])
return True
except:
print "Warning--cache read failed:"
sys.excepthook(*sys.exc_info())
return False
def importDict(self, data):
"""Import definitions from a dictionary. The dict format should be the
same as CParser.fileDefs. Used internally; does not need to be called
manually."""
for f in data.keys():
self.currentFile = f
for k in self.dataList:
for n in data[f][k]:
self.addDef(k, n, data[f][k][n])
def writeCache(self, cacheFile):
"""Store all parsed declarations to cache. Used internally."""
cache = {}
cache['opts'] = self.initOpts
cache['fileDefs'] = self.fileDefs
cache['version'] = self.cacheVersion
#for k in self.dataList:
#cache[k] = getattr(self, k)
import pickle
pickle.dump(cache, open(cacheFile, 'wb'))
def loadFile(self, file, replace=None):
"""Read a file, make replacements if requested. Called by __init__, should
not be called manually."""
if not os.path.isfile(file):
## Not a fatal error since we might be able to function properly if there is a cache file..
#raise Exception("File %s not found" % file)
print "Warning: C header '%s' is missing; this may cause trouble." % file
self.files[file] = None
return False
fd = open(file, 'rU') ## U causes all newline types to be converted to \n
self.files[file] = fd.read()
fd.close()
if replace is not None:
for s in replace:
self.files[file] = re.sub(s, replace[s], self.files[file])
self.fileOrder.append(file)
bn = os.path.basename(file)
self.initOpts['replace'][bn] = replace
self.initOpts['files'].append(bn) # only interested in the file names; the directory may change between systems.
return True
#### Beginning of processing functions
def assertPyparsing(self):
"""Make sure pyparsing module is available."""
global hasPyParsing
if not hasPyParsing:
raise Exception("CParser class requires 'pyparsing' library for actual parsing work. Without this library, CParser can only be used with previously cached parse results.")
def removeComments(self, file):
"""Remove all comments from file. (operates in memory; does not alter the original files)"""
self.assertPyparsing()
text = self.files[file]
cplusplusLineComment = Literal("//") + restOfLine
# match quoted strings first to prevent matching comments inside quotes
self.files[file] = (quotedString | cStyleComment.suppress() | cplusplusLineComment.suppress()).transformString(text)
def preprocess(self, file):
"""Scan named file for preprocessor directives, removing them while expanding macros. (operates in memory; does not alter the original files)"""
self.assertPyparsing()
self.buildParser() ## we need this so that evalExpr works properly
self.currentFile = file
packStack = [(None,None)] ## stack for #pragma pack push/pop
self.packList[file] = [(0,None)]
packing = None ## current packing value
text = self.files[file]
## First join together lines split by \\n
text = Literal('\\\n').suppress().transformString(text)
#self.ppDirective = Combine("#" + Word(alphas).leaveWhitespace()) + restOfLine
# define the structure of a macro definition
name = Word(alphas+'_', alphanums+'_')('name')
self.ppDefine = name.setWhitespaceChars(' \t')("macro") + Optional(lparen + delimitedList(name) + rparen).setWhitespaceChars(' \t')('args') + SkipTo(LineEnd())('value')
self.ppDefine.setParseAction(self.processMacroDefn)
#self.updateMacroDefns()
#self.updateFnMacroDefns()
# define pattern for scanning through the input string
#self.macroExpander = (self.macroExpr | self.fnMacroExpr)
## Comb through lines, process all directives
lines = text.split('\n')
result = []
#macroExpander = (quotedString | self.macroExpander)
directive = re.compile(r'\s*#([a-zA-Z]+)(.*)$')
ifTrue = [True]
ifHit = []
for i in range(len(lines)):
line = lines[i]
newLine = ''
m = directive.match(line)
if m is None: # regular code line
if ifTrue[-1]: # only include if we are inside the correct section of an IF block
#line = macroExpander.transformString(line) # expand all known macros
newLine = self.expandMacros(line)
else: # macro line
d = m.groups()[0]
rest = m.groups()[1]
#print "PREPROCESS:", d, rest
if d == 'ifdef':
d = 'if'
rest = 'defined '+rest
elif d == 'ifndef':
d = 'if'
rest = '!defined '+rest
## Evaluate 'defined' operator before expanding macros
if d in ['if', 'elif']:
def pa(t):
return ['0', '1'][t['name'] in self.defs['macros'] or t['name'] in self.defs['fnmacros']]
rest = (
Keyword('defined') +
(name | lparen + name + rparen)
).setParseAction(pa).transformString(rest)
elif d in ['define', 'undef']:
macroName, rest = re.match(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)(.*)$', rest).groups()
## Expand macros if needed
if rest is not None and (all(ifTrue) or d in ['if', 'elif']):
rest = self.expandMacros(rest)
if d == 'elif':
if ifHit[-1] or not all(ifTrue[:-1]):
ev = False
else:
ev = self.evalPreprocessorExpr(rest)
if self.verbose:
print " "*(len(ifTrue)-2) + line, rest, ev
ifTrue[-1] = ev
ifHit[-1] = ifHit[-1] or ev
elif d == 'else':
if self.verbose:
print " "*(len(ifTrue)-2) + line, not ifHit[-1]
ifTrue[-1] = (not ifHit[-1]) and all(ifTrue[:-1])
ifHit[-1] = True
elif d == 'endif':
ifTrue.pop()
ifHit.pop()
if self.verbose:
print " "*(len(ifTrue)-1) + line
elif d == 'if':
if all(ifTrue):
ev = self.evalPreprocessorExpr(rest)
else:
ev = False
if self.verbose:
print " "*(len(ifTrue)-1) + line, rest, ev
ifTrue.append(ev)
ifHit.append(ev)
elif d == 'define':
if not ifTrue[-1]:
continue
if self.verbose:
print " "*(len(ifTrue)) + "define:", macroName, rest
try:
self.ppDefine.parseString(macroName+ ' ' + rest) ## macro is registered here
except:
print "Error processing macro definition:", macroName, rest
print " ", sys.exc_info()[1]
elif d == 'undef':
if not ifTrue[-1]:
continue
try:
self.remDef('macros', macroName.strip())
#self.macroListString = '|'.join(self.defs['macros'].keys() + self.defs['fnmacros'].keys())
#self.updateMacroDefns()
except:
if sys.exc_info()[0] is not KeyError:
sys.excepthook(*sys.exc_info())
print "Error removing macro definition '%s'" % macroName.strip()
elif d == 'pragma': ## Check for changes in structure packing
if not ifTrue[-1]:
continue
m = re.match(r'\s+pack\s*\(([^\)]+)\)', rest)
if m is None:
continue
opts = [s.strip() for s in m.groups()[0].split(',')]
pushpop = id = val = None
for o in opts:
if o in ['push', 'pop']:
pushpop = o
elif o.isdigit():
val = int(o)
else:
id = o
if val is not None:
packing = val
if pushpop == 'push':
packStack.append((packing, id))
elif opts[0] == 'pop':
if id is None:
packStack.pop()
else:
ind = None
for i in range(len(packStack)):
if packStack[i][1] == id:
ind = i
break
if ind is not None:
packStack = packStack[:ind]
if val is None:
packing = packStack[-1][0]
else:
packing = int(opts[0])
if self.verbose:
print ">> Packing changed to %s at line %d" % (str(packing), i)
self.packList[file].append((i, packing))
else:
pass ## Ignore any other directives
result.append(newLine)
self.files[file] = '\n'.join(result)
def evalPreprocessorExpr(self, expr):
## make a few alterations so the expression can be eval'd
macroDiffs = (
Literal('!').setParseAction(lambda: ' not ') |
Literal('&&').setParseAction(lambda: ' and ') |
Literal('||').setParseAction(lambda: ' or ') |
Word(alphas+'_',alphanums+'_').setParseAction(lambda: '0'))
expr2 = macroDiffs.transformString(expr)
try:
ev = bool(eval(expr2))
except:
if self.verbose:
print "Error evaluating preprocessor expression: %s [%s]" % (expr, expr2)
print " ", sys.exc_info()[1]
ev = False
return ev
#def updateMacroDefns(self):
##self.macroExpr << MatchFirst( [Keyword(m)('macro') for m in self.defs['macros']] )
##self.macroExpr.setParseAction(self.processMacroRef)
## regex is faster than pyparsing.
## Matches quoted strings and macros
##names = self.defs['macros'].keys() + self.defs['fnmacros'].keys()
#if len(self.macroListString) == 0:
#self.macroRegex = None
#else:
#self.macroRegex = re.compile(
#r'("(\\"|[^"])*")|(\b(%s)\b)' % self.macroListString
#)
#def updateFnMacroDefns(self):
#self.fnMacroExpr << MatchFirst( [(Keyword(m)('macro') + lparen + Group(delimitedList(expression))('args') + rparen) for m in self.defs['fnmacros']] )
#self.fnMacroExpr.setParseAction(self.processFnMacroRef)
def processMacroDefn(self, t):
"""Parse a #define macro and register the definition"""
if self.verbose:
print "MACRO:", t
#macroVal = self.macroExpander.transformString(t.value).strip()
#macroVal = Literal('\\\n').suppress().transformString(macroVal) ## remove escaped newlines
macroVal = t.value.strip()
if macroVal in self.defs['fnmacros']:
self.addDef('fnmacros', t.macro, self.defs['fnmacros'][macroVal])
if self.verbose:
print " Copy fn macro %s => %s" % (macroVal, t.macro)
else:
if t.args == '':
val = self.evalExpr(macroVal)
self.addDef('macros', t.macro, macroVal)
self.addDef('values', t.macro, val)
if self.verbose:
print " Add macro:", t.macro, "("+str(val)+")", self.defs['macros'][t.macro]
else:
self.addDef('fnmacros', t.macro, self.compileFnMacro(macroVal, [x for x in t.args]))
if self.verbose:
print " Add fn macro:", t.macro, t.args, self.defs['fnmacros'][t.macro]
#if self.macroListString == '':
#self.macroListString = t.macro
#else:
#self.macroListString += '|' + t.macro
#self.updateMacroDefns()
#self.macroExpr << MatchFirst( map(Keyword,self.defs['macros'].keys()) )
return "#define " + t.macro + " " + macroVal
def compileFnMacro(self, text, args):
"""Turn a function macro spec into a compiled description"""
## find all instances of each arg in text
argRegex = re.compile(r'("(\\"|[^"])*")|(\b(%s)\b)' % ('|'.join(args)))
start = 0
parts = []
argOrder = []
N = 3
for m in argRegex.finditer(text):
arg = m.groups()[N]
#print m, arg
if arg is not None:
parts.append(text[start:m.start(N)] + '%s')
start = m.end(N)
argOrder.append(args.index(arg))
parts.append(text[start:])
return (''.join(parts), argOrder)
def expandMacros(self, line):
reg = re.compile(r'("(\\"|[^"])*")|(\b(\w+)\b)')
parts = []
start = 0
N = 3 ## the group number to check for macro names
macros = self.defs['macros']
fnmacros = self.defs['fnmacros']
for m in reg.finditer(line):
name = m.groups()[N]
if name in macros:
parts.append(line[start:m.start(N)])
start = m.end(N)
parts.append(macros[name])
elif name in fnmacros:
try: ## If function macro expansion fails, just ignore it.
exp, end = self.expandFnMacro(name, line[m.end(N):])
parts.append(line[start:m.start(N)])
start = end + m.end(N)
parts.append(exp)
except:
if sys.exc_info()[1][0] != 0:
print "Function macro expansion failed:", name, line[m.end(N):]
raise
parts.append(line[start:])
return ''.join(parts)
#def expandMacros(self, line):
#if self.macroRegex is None:
#return line
#parts = []
#start = 0
#N = 3 ## the group number to check for macro names
#for m in self.macroRegex.finditer(line):
#name = m.groups()[N]
#if name is not None:
#if name in self.defs['macros']:
#parts.append(line[start:m.start(N)])
#start = m.end(N)
#parts.append(self.defs['macros'][name])
#elif name in self.defs['fnmacros']:
#try: ## If function macro expansion fails, just ignore it.
#exp, end = self.expandFnMacro(name, line[m.end(N):])
#parts.append(line[start:m.start(N)])
#start = end + m.end(N)
#parts.append(exp)
#except:
#if sys.exc_info()[1][0] != 0:
#print "Function macro expansion failed:", name, line[m.end(N):]
#raise
#else:
#raise Exception("Macro '%s' not found (internal error)" % name)
#parts.append(line[start:])
#return ''.join(parts)
def expandFnMacro(self, name, text):
#print "expandMacro:", name, text
defn = self.defs['fnmacros'][name]
## defn looks like ('%s + %s / %s', (0, 0, 1))
argList = stringStart + lparen + Group(delimitedList(expression))('args') + rparen
res = [x for x in argList.scanString(text, 1)]
if len(res) == 0:
raise Exception(0, "Function macro '%s' not followed by (...)" % name)
args, start, end = res[0]
#print " ", res
#print " ", args
#print " ", defn
newStr = defn[0] % tuple([args[0][i] for i in defn[1]])
#print " ", newStr
return (newStr, end)
# parse action to replace macro references with their respective definition
#def processMacroRef(self, t):
#return self.defs['macros'][t.macro]
#def processFnMacroRef(self, t):
#m = self.defs['fnmacros'][t.macro]
##print "=====>>"
##print "Process FN MACRO:", t
##print " macro defn:", t.macro, m
##print " macro call:", t.args
### m looks like ('a + b', ('a', 'b'))
#newStr = m[0][:]
##print " starting str:", newStr
#try:
#for i in range(len(m[1])):
##print " step", i
#arg = m[1][i]
##print " arg:", arg, '=>', t.args[i]
#newStr = Keyword(arg).copy().setParseAction(lambda: t.args[i]).transformString(newStr)
##print " new str:", newStr
#except:
##sys.excepthook(*sys.exc_info())
#raise
##print "<<====="
#return newStr
def parseDefs(self, file, returnUnparsed=False):
"""Scan through the named file for variable, struct, enum, and function declarations.
Returns the entire tree of successfully parsed tokens.
If returnUnparsed is True, return a string of all lines that failed to match (for debugging)."""
self.assertPyparsing()
self.currentFile = file
#self.definedType << kwl(self.defs['types'].keys())
parser = self.buildParser()
if returnUnparsed:
text = parser.suppress().transformString(self.files[file])
return re.sub(r'\n\s*\n', '\n', text)
else:
return [x[0] for x in parser.scanString(self.files[file])]
def buildParser(self):
"""Builds the entire tree of parser elements for the C language (the bits we support, anyway).
"""
if hasattr(self, 'parser'):
return self.parser
self.assertPyparsing()
self.structType = Forward()
self.enumType = Forward()
self.typeSpec = (typeQualifier + (
fundType |
Optional(kwl(sizeModifiers + signModifiers)) + ident |
self.structType |
self.enumType
) + typeQualifier + msModifier).setParseAction(recombine)
#self.argList = Forward()
### Abstract declarators for use in function pointer arguments
# Thus begins the extremely hairy business of parsing C declarators.
# Whomever decided this was a reasonable syntax should probably never breed.
# The following parsers combined with the processDeclarator function
# allow us to turn a nest of type modifiers into a correctly
# ordered list of modifiers.
self.declarator = Forward()
self.abstractDeclarator = Forward()
## abstract declarators look like:
# <empty string>
# *
# **[num]
# (*)(int, int)
# *( )(int, int)[10]
# ...etc...
self.abstractDeclarator << Group(
typeQualifier + Group(ZeroOrMore('*'))('ptrs') + typeQualifier +
((Optional('&')('ref')) | (lparen + self.abstractDeclarator + rparen)('center')) +
Optional(lparen + Optional(delimitedList(Group(
self.typeSpec('type') +
self.abstractDeclarator('decl') +
Optional(Literal('=').suppress() + expression, default=None)('val')
)), default=None) + rparen)('args') +
Group(ZeroOrMore(lbrack + Optional(expression, default='-1') + rbrack))('arrays')
)
## Argument list may consist of declarators or abstract declarators
#self.argList << delimitedList(Group(
#self.typeSpec('type') +
#(self.declarator('decl') | self.abstractDeclarator('decl')) +
#Optional(Keyword('=')) + expression
#))
## declarators look like:
# varName
# *varName
# **varName[num]
# (*fnName)(int, int)
# * fnName(int arg1=0)[10]
# ...etc...
self.declarator << Group(
typeQualifier + callConv + Group(ZeroOrMore('*'))('ptrs') + typeQualifier +
((Optional('&')('ref') + ident('name')) | (lparen + self.declarator + rparen)('center')) +
Optional(lparen + Optional(delimitedList(Group(
self.typeSpec('type') +
(self.declarator | self.abstractDeclarator)('decl') +
Optional(Literal('=').suppress() + expression, default=None)('val')
)), default=None) + rparen)('args') +
Group(ZeroOrMore(lbrack + Optional(expression, default='-1') + rbrack))('arrays')
)
self.declaratorList = Group(delimitedList(self.declarator))
## typedef
self.typeDecl = Keyword('typedef') + self.typeSpec('type') + self.declaratorList('declList') + semi
self.typeDecl.setParseAction(self.processTypedef)
## variable declaration
self.variableDecl = Group(self.typeSpec('type') + Optional(self.declaratorList('declList')) + Optional(Literal('=').suppress() + (expression('value') | (lbrace + Group(delimitedList(expression))('arrayValues') + rbrace)))) + semi
self.variableDecl.setParseAction(self.processVariable)
## function definition
#self.paramDecl = Group(self.typeSpec + (self.declarator | self.abstractDeclarator)) + Optional(Literal('=').suppress() + expression('value'))
self.typelessFunctionDecl = self.declarator('decl') + nestedExpr('{', '}').suppress()
self.functionDecl = self.typeSpec('type') + self.declarator('decl') + nestedExpr('{', '}').suppress()
self.functionDecl.setParseAction(self.processFunction)
## Struct definition
self.structDecl = Forward()
structKW = (Keyword('struct') | Keyword('union'))
#self.structType << structKW('structType') + ((Optional(ident)('name') + lbrace + Group(ZeroOrMore( Group(self.structDecl | self.variableDecl.copy().setParseAction(lambda: None)) ))('members') + rbrace) | ident('name'))
self.structMember = (
Group(self.variableDecl.copy().setParseAction(lambda: None)) |
(self.typeSpec + self.declarator + nestedExpr('{', '}')).suppress() |
(self.declarator + nestedExpr('{', '}')).suppress()
)
self.declList = lbrace + Group(OneOrMore(self.structMember))('members') + rbrace
self.structType << (Keyword('struct') | Keyword('union'))('structType') + ((Optional(ident)('name') + self.declList) | ident('name'))
self.structType.setParseAction(self.processStruct)
#self.updateStructDefn()
self.structDecl = self.structType + semi
## enum definition
enumVarDecl = Group(ident('name') + Optional(Literal('=').suppress() + (integer('value') | ident('valueName'))))
self.enumType << Keyword('enum') + (Optional(ident)('name') + lbrace + Group(delimitedList(enumVarDecl))('members') + rbrace | ident('name'))
self.enumType.setParseAction(self.processEnum)
self.enumDecl = self.enumType + semi
#self.parser = (self.typeDecl | self.variableDecl | self.structDecl | self.enumDecl | self.functionDecl)
self.parser = (self.typeDecl | self.variableDecl | self.functionDecl)
return self.parser
def processDeclarator(self, decl):
"""Process a declarator (without base type) and return a tuple (name, [modifiers])
See processType(...) for more information."""
toks = []
name = None
#print "DECL:", decl
if 'callConv' in decl and len(decl['callConv']) > 0:
toks.append(decl['callConv'])
if 'ptrs' in decl and len(decl['ptrs']) > 0:
toks.append('*' * len(decl['ptrs']))
if 'arrays' in decl and len(decl['arrays']) > 0:
#arrays = []
#for x in decl['arrays']:
#n = self.evalExpr(x)
#if n == -1: ## If an array was given as '[]', interpret it as '*' instead.
#toks.append('*')
#else:
#arrays.append(n)
#if len(arrays) > 0:
#toks.append(arrays)
toks.append([self.evalExpr(x) for x in decl['arrays']])
if 'args' in decl and len(decl['args']) > 0:
#print " process args"
if decl['args'][0] is None:
toks.append(())
else:
toks.append(tuple([self.processType(a['type'], a['decl']) + (a['val'][0],) for a in decl['args']]))
if 'ref' in decl:
toks.append('&')
if 'center' in decl:
(n, t) = self.processDeclarator(decl['center'][0])
if n is not None:
name = n
toks.extend(t)
if 'name' in decl:
name = decl['name']
return (name, toks)
def processType(self, typ, decl):
"""Take a declarator + base type and return a serialized name/type description.
The description will be a list of elements (name, [basetype, modifier, modifier, ...])
- name is the string name of the declarator or None for an abstract declarator
- basetype is the string representing the base type
- modifiers can be:
'*' - pointer (multiple pointers "***" allowed)
'&' - reference
'__X' - calling convention (windows only). X can be 'cdecl' or 'stdcall'
list - array. Value(s) indicate the length of each array, -1 for incomplete type.
tuple - function, items are the output of processType for each function argument.
Examples:
int *x[10] => ('x', ['int', [10], '*'])
char fn(int x) => ('fn', ['char', [('x', ['int'])]])
struct s (*)(int, int*) => (None, ["struct s", ((None, ['int']), (None, ['int', '*'])), '*'])
"""
#print "PROCESS TYPE/DECL:", typ, decl
(name, decl) = self.processDeclarator(decl)
return (name, [typ] + decl)
def processEnum(self, s, l, t):
try:
if self.verbose:
print "ENUM:", t
if t.name == '':
n = 0
while True:
name = 'anonEnum%d' % n
if name not in self.defs['enums']:
break
n += 1
else:
name = t.name[0]
if self.verbose:
print " name:", name
if name not in self.defs['enums']:
i = 0
enum = {}
for v in t.members:
if v.value != '':
i = eval(v.value)
if v.valueName != '':
i = enum[v.valueName]
enum[v.name] = i
self.addDef('values', v.name, i)
i += 1
if self.verbose:
print " members:", enum
self.addDef('enums', name, enum)
self.addDef('types', 'enum '+name, ('enum', name))
return ('enum ' + name)
except:
if self.verbose:
print "Error processing enum:", t
sys.excepthook(*sys.exc_info())
def processFunction(self, s, l, t):
if self.verbose:
print "FUNCTION", t, t.keys()
try:
(name, decl) = self.processType(t.type, t.decl[0])
if len(decl) == 0 or type(decl[-1]) != tuple:
print t
raise Exception("Incorrect declarator type for function definition.")
if self.verbose:
print " name:", name
print " sig:", decl
self.addDef('functions', name, (decl[:-1], decl[-1]))
except:
if self.verbose:
print "Error processing function:", t
sys.excepthook(*sys.exc_info())
def packingAt(self, line):
"""Return the structure packing value at the given line number"""
packing = None
for p in self.packList[self.currentFile]:
if p[0] <= line:
packing = p[1]
else:
break
return packing
def processStruct(self, s, l, t):
try:
strTyp = t.structType # struct or union
## check for extra packing rules
packing = self.packingAt(lineno(l, s))
if self.verbose:
print strTyp.upper(), t.name, t
if t.name == '':
n = 0
while True:
sname = 'anon_%s%d' % (strTyp, n)
if sname not in self.defs[strTyp+'s']:
break
n += 1
else:
if type(t.name) is str:
sname = t.name
else:
sname = t.name[0]
if self.verbose:
print " NAME:", sname
if len(t.members) > 0 or sname not in self.defs[strTyp+'s'] or self.defs[strTyp+'s'][sname] == {}:
if self.verbose:
print " NEW " + strTyp.upper()
struct = []
for m in t.members:
typ = m[0].type
val = self.evalExpr(m)
if self.verbose:
print " member:", m, m[0].keys(), m[0].declList
if len(m[0].declList) == 0: ## anonymous member
struct.append((None, [typ], None))
for d in m[0].declList:
(name, decl) = self.processType(typ, d)
struct.append((name, decl, val))
if self.verbose:
print " ", name, decl, val
self.addDef(strTyp+'s', sname, {'pack': packing, 'members': struct})
self.addDef('types', strTyp+' '+sname, (strTyp, sname))
#self.updateStructDefn()
return strTyp+' '+sname
except:
#print t
sys.excepthook(*sys.exc_info())
def processVariable(self, s, l, t):
if self.verbose:
print "VARIABLE:", t
try:
val = self.evalExpr(t[0])
for d in t[0].declList:
(name, typ) = self.processType(t[0].type, d)
if type(typ[-1]) is tuple: ## this is a function prototype
if self.verbose:
print " Add function prototype:", name, typ, val
self.addDef('functions', name, (typ[:-1], typ[-1]))
else:
if self.verbose:
print " Add variable:", name, typ, val
self.addDef('variables', name, (val, typ))
self.addDef('values', name, val)
except:
#print t, t[0].name, t.value
sys.excepthook(*sys.exc_info())
def processTypedef(self, s, l, t):
if self.verbose:
print "TYPE:", t
typ = t.type
#print t, t.type
for d in t.declList:
(name, decl) = self.processType(typ, d)
if self.verbose:
print " ", name, decl
self.addDef('types', name, decl)
#self.definedType << MatchFirst( map(Keyword,self.defs['types'].keys()) )
def evalExpr(self, toks):
## Evaluates expressions. Currently only works for expressions that also
## happen to be valid python expressions.
## This function does not currently include previous variable
## declarations, but that should not be too difficult to implement..
#print "Eval:", toks
try:
if isinstance(toks, basestring):
#print " as string"
val = self.eval(toks, None, self.defs['values'])
elif toks.arrayValues != '':
#print " as list:", toks.arrayValues
val = [self.eval(x, None, self.defs['values']) for x in toks.arrayValues]
elif toks.value != '':
#print " as value"
val = self.eval(toks.value, None, self.defs['values'])
else:
#print " as None"
val = None
return val
except:
if self.verbose:
print " failed eval:", toks
print " ", sys.exc_info()[1]
return None
def eval(self, expr, *args):
"""Just eval with a little extra robustness."""
expr = expr.strip()
cast = (lparen + self.typeSpec + self.abstractDeclarator + rparen).suppress()
expr = (quotedString | number | cast).transformString(expr)
if expr == '':
return None
return eval(expr, *args)
def printAll(self, file=None):
"""Print everything parsed from files. Useful for debugging."""
from pprint import pprint
for k in self.dataList:
print "============== %s ==================" % k
if file is None:
pprint(self.defs[k])
else:
pprint(self.fileDefs[file][k])
def addDef(self, typ, name, val):
"""Add a definition of a specific type to both the definition set for the current file and the global definition set."""
self.defs[typ][name] = val
if self.currentFile is None:
baseName = None
else:
baseName = os.path.basename(self.currentFile)
if baseName not in self.fileDefs:
self.fileDefs[baseName] = {}
for k in self.dataList:
self.fileDefs[baseName][k] = {}
self.fileDefs[baseName][typ][name] = val
def remDef(self, typ, name):
if self.currentFile is None:
baseName = None
else:
baseName = os.path.basename(self.currentFile)
del self.defs[typ][name]
del self.fileDefs[baseName][typ][name]
def isFundType(self, typ):
"""Return True if this type is a fundamental C type, struct, or union"""
if typ[0][:7] == 'struct ' or typ[0][:6] == 'union ' or typ[0][:5] == 'enum ':
return True
names = baseTypes + sizeModifiers + signModifiers
for w in typ[0].split():
if w not in names:
return False
return True
def evalType(self, typ):
"""evaluate a named type into its fundamental type"""
used = []
while True:
if self.isFundType(typ):
## remove 'signed' before returning evaluated type
typ[0] = re.sub(r'\bsigned\b', '', typ[0]).strip()
return typ
parent = typ[0]
if parent in used:
raise Exception('Recursive loop while evaluating types. (typedefs are %s)' % (' -> '.join(used+[parent])))
used.append(parent)
if not parent in self.defs['types']:
raise Exception('Unknown type "%s" (typedefs are %s)' % (parent, ' -> '.join(used)))
pt = self.defs['types'][parent]
typ = pt + typ[1:]
def find(self, name):
"""Search all definitions for the given name"""
res = []
for f in self.fileDefs:
fd = self.fileDefs[f]
for t in fd:
typ = fd[t]
for k in typ:
if isinstance(name, basestring):
if k == name:
res.append((f, t))
else:
if re.match(name, k):
res.append((f, t, k))
return res
def findText(self, text):
"""Search all file strings for text, return matching lines."""
res = []
for f in self.files:
l = self.files[f].split('\n')
for i in range(len(l)):
if text in l[i]:
res.append((f, i, l[i]))
return res
hasPyParsing = False
try:
from pyparsing import *
ParserElement.enablePackrat()
hasPyParsing = True
except:
pass ## no need to do anything yet as we might not be using any parsing functions..
## Define some common language elements if pyparsing is available.
if hasPyParsing:
## Some basic definitions
expression = Forward()
pexpr = '(' + expression + ')'
numTypes = ['int', 'float', 'double', '__int64']
baseTypes = ['char', 'bool', 'void'] + numTypes
sizeModifiers = ['short', 'long']
signModifiers = ['signed', 'unsigned']
qualifiers = ['const', 'static', 'volatile', 'inline', 'restrict', 'near', 'far']
msModifiers = ['__based', '__declspec', '__fastcall', '__restrict', '__sptr', '__uptr', '__w64', '__unaligned', '__nullterminated']
keywords = ['struct', 'enum', 'union', '__stdcall', '__cdecl'] + qualifiers + baseTypes + sizeModifiers + signModifiers
def kwl(strs):
"""Generate a match-first list of keywords given a list of strings."""
#return MatchFirst(map(Keyword,strs))
return Regex(r'\b(%s)\b' % '|'.join(strs))
keyword = kwl(keywords)
wordchars = alphanums+'_$'
ident = (WordStart(wordchars) + ~keyword + Word(alphas+"_",alphanums+"_$") + WordEnd(wordchars)).setParseAction(lambda t: t[0])
#integer = Combine(Optional("-") + (Word( nums ) | Combine("0x" + Word(hexnums))))
semi = Literal(";").ignore(quotedString).suppress()
lbrace = Literal("{").ignore(quotedString).suppress()
rbrace = Literal("}").ignore(quotedString).suppress()
lbrack = Literal("[").ignore(quotedString).suppress()
rbrack = Literal("]").ignore(quotedString).suppress()
lparen = Literal("(").ignore(quotedString).suppress()
rparen = Literal(")").ignore(quotedString).suppress()
hexint = Regex('-?0x[%s]+[UL]*'%hexnums).setParseAction(lambda t: t[0].rstrip('UL'))
decint = Regex(r'-?\d+[UL]*').setParseAction(lambda t: t[0].rstrip('UL'))
integer = (hexint | decint)
floating = Regex(r'-?((\d+(\.\d*)?)|(\.\d+))([eE]-?\d+)?')
number = (hexint | floating | decint)
bitfieldspec = ":" + integer
biOperator = oneOf("+ - / * | & || && ! ~ ^ % == != > < >= <= -> . :: << >> = ? :")
uniRightOperator = oneOf("++ --")
uniLeftOperator = oneOf("++ -- - + * sizeof new")
name = (WordStart(wordchars) + Word(alphas+"_",alphanums+"_$") + WordEnd(wordchars))
#number = Word(hexnums + ".-+xUL").setParseAction(lambda t: t[0].rstrip('UL'))
#stars = Optional(Word('*&'), default='')('ptrs') ## may need to separate & from * later?
callConv = Optional(Keyword('__cdecl')|Keyword('__stdcall'))('callConv')
## Removes '__name' from all type specs.. may cause trouble.
underscore2Ident = (WordStart(wordchars) + ~keyword + '__' + Word(alphanums,alphanums+"_$") + WordEnd(wordchars)).setParseAction(lambda t: t[0])
typeQualifier = ZeroOrMore((underscore2Ident + Optional(nestedExpr())) | kwl(qualifiers)).suppress()
msModifier = ZeroOrMore(kwl(msModifiers) + Optional(nestedExpr())).suppress()
pointerOperator = (
'*' + typeQualifier |
'&' + typeQualifier |
'::' + ident + typeQualifier
)
## language elements
fundType = OneOrMore(kwl(signModifiers + sizeModifiers + baseTypes)).setParseAction(lambda t: ' '.join(t))
## Is there a better way to process expressions with cast operators??
castAtom = (
ZeroOrMore(uniLeftOperator) + Optional('('+ident+')').suppress() +
((
ident + '(' + Optional(delimitedList(expression)) + ')' |
ident + OneOrMore('[' + expression + ']') |
ident | number | quotedString
) |
('(' + expression + ')')) +
ZeroOrMore(uniRightOperator)
)
uncastAtom = (
ZeroOrMore(uniLeftOperator) +
((
ident + '(' + Optional(delimitedList(expression)) + ')' |
ident + OneOrMore('[' + expression + ']') |
ident | number | quotedString
) |
('(' + expression + ')')) +
ZeroOrMore(uniRightOperator)
)
atom = castAtom | uncastAtom
expression << Group(
atom + ZeroOrMore(biOperator + atom)
)
arrayOp = lbrack + expression + rbrack
def recombine(tok):
"""Flattens a tree of tokens and joins into one big string."""
return " ".join(flatten(tok.asList()))
expression.setParseAction(recombine)
def flatten(lst):
res = []
for i in lst:
if type(i) in [list, tuple]:
res.extend(flatten(i))
else:
res.append(str(i))
return res
def printParseResults(pr, depth=0, name=''):
"""For debugging; pretty-prints parse result objects."""
start = name + " "*(20-len(name)) + ':'+ '..'*depth
if isinstance(pr, ParseResults):
print start
for i in pr:
name = ''
for k in pr.keys():
if pr[k] is i:
name = k
break
printParseResults(i, depth+1, name)
else:
print start + str(pr)
## Just for fun..
if __name__ == '__main__':
files = sys.argv[1:]
p = CParser(files)
p.processAll()
p.printAll()