python_c_parser.py

import sys
from operator import itemgetter, attrgetter
from xml.etree.ElementTree import ElementTree
import time

# parsing config
CONFIG_FILE = "D:/python/cparser.xml"

tr = ElementTree()
m=tr.parse(CONFIG_FILE)
path = m.find("includeDir").text
startXML = ""
sXML = m.find("startXML")
oXML = m.find("outputXML").text
if sXML.attrib["active"] == "true":
	startXML = sXML.text
# parsing config

inc = []
arr = []
defineID = 2
funcID = 1

def trim(l):
	nl = []
	for i in l:
		if len(i) > 0:
			nl.append(i)
	return nl

def stripList(l):
	for i in range(0,len(l)):
		l[i] = l[i].strip()
	return l

def contains(lst, srch):
	for x in lst:
		if x == srch:return True
	return False

def cont(wrd, pattern):
	for x in wrd:
		if x in pattern:
			return True
	return False

apTmp = []
zSlovo = ['a', 0]
def makeHash(word):
	global apTmp, hashTable

	if word[0] == zSlovo[0]:
		if word in [x[1] for x in apTmp[zSlovo[1]:] ]:
			return True
		else:
			return False
	else:
		zSlovo[0] = word[0]
		zSlovo[1] = len(apTmp)
		return False

def filter_existant(ar):
	global tmp, apTmp
	for x in range(0,len(ar)):
		if str.isdigit(ar[x][1][0]) or cont(ar[x][1], " \t.()#"):
			continue

		if not makeHash(ar[x][1]):
			apTmp.append(ar[x])


tree = ElementTree()
try:
	if len(startXML) != 0:
		m=tree.parse(startXML)
		m=m.find("AutoComplete")
		l=list(m.iter("KeyWord"))
		for x in l:
			name = x.attrib["name"]
			func = funcID if x.attrib.__contains__("func") else defineID
			retval = ""
			parm = []
			if func:
				ov = x.find("Overload")
				if ov is not None:
					retval = ov.attrib["retVal"]
					p = list(ov.iter("Param"))
					for i in p:
						parm.append(i.attrib["name"])
			arr.append((func, name, retval, parm))
except:
	if startXML != "":
		print("loading startXML failed")

def parseDefine(s):
	global arr, defineID, funcID
	s=s.replace("\t", " ")
	ls = trim(s.split(" "))
	arr.append((defineID, ls[1], '' if len(ls)<3 else ls[2], []))
def parseFunc(s):
	global arr, defineID, funcID
	l = s.split("(")
	ps = l[1].split(")")
	ps = ')'.join(ps[:len(ps)-1]).split(",")
	stripList(ps)
	l[0] = l[0].replace(chr(0x9), ' ')
	tl = trim(l[0].split(" "))
	try:
		arr.append( (funcID, tl[len(tl)-1], ' '.join(tl[0:len(tl)-1]), ps) )
	except:
		if len(s) == 2:
			arr.append( (funcID, tl[len(tl)-1], ' ', []) )

def parseC(file):
	global inc
	global arr
	file=file.replace("\\", "/")
	if file not in inc:
		inc.append(file)
	else:
		return
	nav = False
	ncom = 0
	stInd = 0
	ndef = False
	nusl = False
	s = ""
	zag = 0
	bTyp = False
	nZag2 = 0
	bInc = False
	try:
		fl = open(file, "r")
		print("parsing file: "+file)
	except:
		return
	f = fl.read()
	fl.close()

	for i in range(0,len(f)):

		if f[i:i+2] == "/*":
			ncom = 1
			i+=1
			continue

		if f[i:i+2] == "*/":
			ncom = 0
			i+=1
			stInd = i+1
			continue

		if ncom == 1:
			continue

		if f[i:i+2] == "//":
			nusl = True
			continue

		if (f[i]=="\r" or f[i]=="\n"):
			if nusl:
				nusl = False
				continue

			if bInc:
				bInc = False
				s = f[stInd:i]
				s1 = s.split("<")
				spl = s.split("\"")
				if len(s1) > 1:
					parseC(path+s1[1].split(">")[0].strip())
				elif len(spl) > 1:
					spl[1] = spl[1].replace("\\", "/")
					if len(spl[1]) > 0:
						spl2 = file.split("/")
						if len(spl) == 3:
							parseC('/'.join(spl2[:len(spl2)-1])+"/"+spl[1])
			if ndef:
				ndef = False
				s = f[stInd:i]
				try:
					parseDefine(s)
				except:
					pass
				s = ""
				continue
			stInd = i+1
			continue

		if nusl:
			continue

		if f[i]=="{" and f[stInd:i].find("extern")==-1:
			nZag2 += 1
		if f[i]=="}":
			nZag2 -= 1

		if nZag2 > 0:
			continue

		if f[i]!="\"" and nav:
			continue

		if f[i:i+7] == "#define":
			stInd = i
			ndef = True
			continue

		if f[i:i+8] == "#include":
			bInc = True
			stInd = i
			continue

		if f[i]=="#":
			nusl = True

		if f[i]==";":
			if zag and not bTyp:
				s = f[stInd:i]
				try:
					parseFunc(s)
				except:
					pass
				zag = False
			else:
				bTyp = False
			stInd = i+2

		if f[i]==")" and zag>0 and not ndef and not nusl and not bTyp:
			zag -= 1
			if zag == 0:
				s = f[stInd:i+1]
				try:
					parseFunc(s)
				except:
					pass
				zag = False
				stInd = i+1

		if f[i]=="\"":
			nav^=True


		if f[i] == "(" and not ndef and not nusl:
			zag += 1

		if f[i:i+7] == "typedef" and not nusl and not ndef:
			stInd = i
			bTyp = True

tmStart = time.clock()
parseC(sys.argv[1])
print("sorting ...")
arr = sorted(arr, key=lambda x: x[1].upper())
print("filtering ...")
filter_existant(arr)
arr = apTmp
print("Took: %.3f seconds" % (time.clock()-tmStart))

# export
fl = open(oXML, "w")
fl.write("<?xml version=\"1.0\" encoding=\"Windows-1252\" ?>\n")
fl.write("<NotepadPlus>\n")
fl.write("\t<AutoComplete language=\"C\">\n")
fl.write("\t\t<Environment ignoreCase=\"yes\" startFunc=\"(\" stopFunc=\")\" paramSeparator=\",\" terminal=\";\" />\n")
for i in arr:
	if(i[0] == defineID):
		fl.write("\t\t\t<KeyWord name=\""+i[1]+"\"/>\n")
	if(i[0] == funcID):
		fl.write("\t\t\t<KeyWord name=\""+i[1]+"\" func=\"yes\">\n")
		fl.write("\t\t\t\t<Overload retVal=\""+i[2].replace('\n', ' ')+"\">\n")
		for j in i[3]:
			fl.write("\t\t\t\t\t<Param name=\""+j.strip()+"\"/>\n")

		fl.write("\t\t\t\t</Overload>\n")
		fl.write("\t\t\t</KeyWord>\n")

fl.write("\t</AutoComplete>\n")
fl.write("</NotepadPlus>\n")
fl.close()