Advertisement
Guest User

ddddddd

a guest
Jun 9th, 2017
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.11 KB | None | 0 0
  1. import struct
  2. import mmh3
  3. import uuid
  4. from datetime import datetime
  5. import os
  6. from OCR import *
  7. from SpellCheck import *
  8. from langdetect import detect_langs, detect
  9. from datetime import datetime
  10. import time
  11. import requests
  12. from sys import exit
  13. from pyPdf import PdfFileReader
  14. import json
  15.  
  16. import ast
  17.  
  18. ORIGINAL_DIRECTORY=0
  19. FILEPATH_DIRECTORY=0
  20. GUI=0
  21. ROOT=0
  22.  
  23. API_ENDPOINT="192.168.133.110"
  24.  
  25. def replacePunctuations(str):
  26. #`~!@#$%^&*()_-+={[}]|\:;"'<,>.?/
  27. str=str.replace("`"," ")
  28. str=str.replace("~"," ")
  29. str=str.replace("!"," ")
  30. str=str.replace("@"," ")
  31. str=str.replace("#"," ")
  32. str=str.replace("$"," ")
  33. str=str.replace("%"," ")
  34. str=str.replace("^"," ")
  35. str=str.replace("&"," ")
  36. str=str.replace("*"," ")
  37. str=str.replace("("," ")
  38. str=str.replace(")"," ")
  39. str=str.replace("-"," ")
  40. str=str.replace("_"," ")
  41. str=str.replace("="," ")
  42. str=str.replace("+"," ")
  43. str=str.replace("["," ")
  44. str=str.replace("{"," ")
  45. str=str.replace("]"," ")
  46. str=str.replace("}"," ")
  47. str=str.replace("\\"," ")
  48. str=str.replace("|"," ")
  49. str=str.replace(";"," ")
  50. str=str.replace(":"," ")
  51. str=str.replace("'"," ")
  52. str=str.replace('"'," ")
  53. str=str.replace(","," ")
  54. str=str.replace("<"," ")
  55. str=str.replace("."," ")
  56. str=str.replace(">"," ")
  57. str=str.replace("/"," ")
  58. str=str.replace("?"," ")
  59. return str
  60.  
  61. def generate_uuid(timestamp, *vals):
  62. """Generates a UUID from the supplied timestamp and unique values.
  63.  
  64. The left half of the UUID is the unix time representation of timestamp
  65. (milliseconds since the epoch). The right half is a murmur3 hash of the
  66. string representation of each of vals, concatenated together with a ';'
  67. character. It is important to always pass the value fields in the same order
  68. to get a repeatable UUID result.
  69.  
  70. Sample usage:
  71. >>> generate_uuid(datetime.utcnow(), "555-1212", "555-6623")
  72. UUID('0000014b-8417-1058-b1b2-7118ad5be2f0')
  73. """
  74. mmh = mmh3.hash_bytes(';'.join(map(str, vals)))
  75. ts = long((timestamp - datetime.utcfromtimestamp(0)).total_seconds()) * 1000
  76. return uuid.UUID(bytes=struct.pack('>Q', ts) + mmh[:8])
  77.  
  78. def setOriginalDirectory(str):
  79. try:
  80. global ORIGINAL_DIRECTORY
  81. ORIGINAL_DIRECTORY=str
  82. except:
  83. print("Fatal Error Setting the Original Directory")
  84. printToConsole("////Fatal Error Setting the Original Directory////")
  85. exit()
  86.  
  87.  
  88. def setFilePathDirectory(str):
  89. global FILEPATH_DIRECTORY
  90. try:
  91. FILEPATH_DIRECTORY=str
  92. except:
  93. print("Error setting the filepath directory, setting to Desktop/PDFs. Please put PDFs into that folder")
  94. printToConsole("////Error setting the filepath directory, setting to Desktop/PDFs. Please put PDFs into that folder////")
  95. FILEPATH_DIRECTORY="/home/jsi/Desktop/PDFs"
  96.  
  97. def setGUI(gui, root):
  98. try:
  99. global GUI, ROOT
  100. GUI=gui
  101. ROOT=root
  102. except:
  103. print("Fatal error setting gui or root references")
  104. printToConsole("////Fatal error setting gui or root references////")
  105. exit()
  106.  
  107. def createFilePathDirectories(optionSaveImages=False,optionSaveSessions=False):
  108. try:
  109. os.chdir(FILEPATH_DIRECTORY)
  110. if optionSaveSessions:
  111. if not os.path.exists("OutputData"):
  112. os.makedirs("OutputData")
  113. os.chdir("./OutputData")
  114. if not os.path.exists("IndividualData"):
  115. os.makedirs("IndividualData")
  116. os.chdir(FILEPATH_DIRECTORY)
  117. if optionSaveImages:
  118. if not os.path.exists("OutputData"):
  119. os.makedirs("OutputData")
  120. os.chdir("./OutputData")
  121. if not os.path.exists("PDFjpegs"):
  122. os.makedirs("PDFjpegs")
  123. except:
  124. print("Error creating file directories, possibility of fatal error")
  125. printToConsole("////Error creating file directories, possibility of fatal error////")
  126.  
  127. def changeDirectory(str=FILEPATH_DIRECTORY):
  128. os.chdir(FILEPATH_DIRECTORY)
  129. try:
  130. os.chdir(str)
  131. except:
  132. os.chdir(FILEPATH_DIRECTORY)
  133. print("Error changing Directory")
  134. printToConsole("////Error Changing Directory////")
  135.  
  136. def sendToServer(payload, optionConnectToServer=False):
  137. if optionConnectToServer:
  138. link="http://10.0.2.2:9092/CollectionExplorer/Collections/sessionSolr/api/sessions/"
  139. try:
  140. print("here")
  141. temp=[payload]
  142. print (json.dumps(temp))
  143. print("yo")
  144. r=requests.post(link,json=temp)
  145. print("now")
  146. print r.status_code
  147. printToConsole("Server Response Code: "+str(r.status_code)+" ")
  148. except:
  149. print("Error connecting to server")
  150. printToConsole("////Error Connection to Server////")
  151. else:
  152. print("Not Connecting to Server")
  153. printToConsole("////Not Connecting to Server////")
  154.  
  155. def createFullTextFile():
  156. try:
  157. changeDirectory("./OutputData")
  158. #wipes(writes) file, then appends to
  159. fullTextFile=open("Fulltext.txt","w")
  160. fullTextFile=open("Fulltext.txt","a")
  161. fullTextFile.close()
  162. return fullTextFile
  163. except:
  164. print("Error creating full text, possibility of fatal error")
  165. printToConsole("////Error creating full text, possibility of fatal error////")
  166. return 0
  167.  
  168. def createInstanceTextFile(filename):
  169. try:
  170. changeDirectory("./OutputData/IndividualData")
  171. invTextFile=open(filename+".txt","w")
  172. invTextFile.close()
  173. return invTextFile
  174. except:
  175. print("Error creating instance text, possibility of fatal error")
  176. printToConsole("////Error creating instance text, possibility of fatal error////")
  177. return 0
  178.  
  179. def openFullTextFile():
  180. try:
  181. changeDirectory("./OutputData")
  182. fullTextFile=open("Fulltext.txt","a")
  183. return fullTextFile
  184. except:
  185. print("Error opening full text file, possibility of fatal error")
  186. printToConsole("////Error opening full text file, possibility of fatal error////")
  187. return fullTextFile
  188.  
  189. def openInvTextFile(filename):
  190. try:
  191. changeDirectory("./OutputData/IndividualData")
  192. invTextFile=open(filename,"w")
  193. return invTextFile
  194. except:
  195. print("Error opening instance text file, possibility of fatal error")
  196. printToConsole("////Error opening instance text file, possibility of fatal error////")
  197. return instanceTextFile
  198.  
  199. def writeToFiles(str,fullTextFile=None,invTextFile=None):
  200. try:
  201. str=str.encode("utf-8")
  202. except:
  203. print("Error encoding string to utf-8, attempting to encode in 'ascii'")
  204. printToConsole("////Error encoding string to utf-8, attempting to encode in 'ascii'////")
  205. try:
  206. str=str.decode("utf-8")
  207. str=str.encode("ascii","ignore")
  208. except:
  209. print("Error encoding string to ascii, possibility of fatal error")
  210. printToConsole("////Error encoding string to ascii, possibility of fatal error////")
  211.  
  212. print(str)
  213. printToConsole(str)
  214. try:
  215. if not isinstance(fullTextFile, int) and not fullTextFile==None:
  216. if not fullTextFile.closed:
  217. fullTextFile.write(str+"\n")
  218. if not isinstance(invTextFile, int) and not invTextFile==None:
  219. if not invTextFile.closed:
  220. invTextFile.write(str+"\n")
  221. except:
  222. print("Error writing to files")
  223. printToConsole("////Error writing to files////")
  224.  
  225. def createContextDictionary(userDict):
  226. try:
  227. os.chdir(ORIGINAL_DIRECTORY)
  228. os.chdir("./LangDictionaries")
  229. contextDictFile=open("ContextDict.txt","w")
  230. for word in userDict:
  231. contextDictFile.write(word.encode("utf-8")+"\n")
  232. contextDictFile.close()
  233. os.chdir(ORIGINAL_DIRECTORY)
  234. printToConsole("////Context Dictionary Successfully Created////")
  235. except:
  236. print("Error creating context dictionary")
  237. printToConsole("////Error creating context dictionary////")
  238.  
  239. def buildUserDict(instanceText):
  240. userWords={}
  241. userDict={}
  242. try:
  243. printToConsole("////Building Context Dictionary////")
  244. for item in instanceText:
  245. print item
  246. item=replacePunctuations(item.lower())
  247. items=item.split()
  248. for word in items:
  249. if not word.isdigit():
  250. word=word.strip(' ')
  251. corrWord=correction(word)
  252. if wordExists(word):
  253. userDict[word]=1
  254. elif word.lower() in userWords:
  255. userWords[word]+=1
  256. if userWords[word]>=3:
  257. userDict[word]=1
  258. else:
  259. userWords[word]=1
  260. createContextDictionary(userDict)
  261. except:
  262. print("Error building context dictionary")
  263. printToConsole("////Error building context dictionary////")
  264.  
  265.  
  266. def writeSessionData(sessionData,fullTextFile,invTextFile,optionSaveSessions=False):
  267. try:
  268. if optionSaveSessions:
  269. fullTextFile=openFullTextFile()
  270. invTextFile=openInvTextFile(sessionData["products"]["filename"])
  271.  
  272. writeToFiles("{",fullTextFile,invTextFile)
  273. for item in sessionData:
  274. if isinstance(sessionData[item],dict):
  275. writeToFiles("\t'"+item+"' : [{",fullTextFile,invTextFile)
  276. for smallerItem in sessionData[item]:
  277. if isinstance(sessionData[item][smallerItem],list):
  278. writeToFiles("\t\t'"+smallerItem+"' : [{",fullTextFile,invTextFile)
  279. for smallestItem in sessionData[item][smallerItem]:
  280. writeToFiles("\t\t\t'"+smallestItem+"',",fullTextFile,invTextFile)
  281. writeToFiles("\t\t}],",fullTextFile,invTextFile)
  282. else:
  283. writeToFiles("\t\t'"+smallerItem+"' : '"+sessionData[item][smallerItem]+"',",fullTextFile,invTextFile)
  284. writeToFiles("\t}],",fullTextFile,invTextFile)
  285. else:
  286. writeToFiles("\t'"+item+"' : '"+sessionData[item]+"',",fullTextFile,invTextFile)
  287. writeToFiles("},",fullTextFile,invTextFile)
  288.  
  289. if not isinstance(fullTextFile, int):
  290. fullTextFile.close()
  291. if not isinstance(invTextFile, int):
  292. invTextFile.close()
  293. printToConsole("////Session Data Successfully Written////")
  294. except:
  295. print("Error write Session Data to files")
  296. printToConsole("////Error write Session Data to files////")
  297.  
  298. def createSessionData(sessionguid,sessionIndex,filename,instanceText,language):
  299. sessionData={}
  300. try:
  301. sessionData["sessionguid"]=sessionguid.urn[9:]
  302. sessionData["starttime"]=datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")+"+00:00"
  303. #sessionData["casename"]="Offnet API Loader Test #"+str(sessionIndex)
  304. sessionData["casename"]="@MultiContentPreviewTestCase"
  305. sessionData["targetname"]="@MultiContentPreviewTestTarget"
  306. sessionData["linename"]="@MultiContentPreviewTestLine"
  307. sessionData["sourcesystemname"]="Sessions API"
  308. sessionData["sessiontype"]="Telephony"
  309. sessionData["isevidence"]=False
  310.  
  311. #sessionData["languages"]=[language]
  312.  
  313. #sessionData["products"]={}
  314. #sessionData["products"]["filename"]=filename+".txt"
  315. #sessionData["products"]["contenttype"]="text/plain"
  316. #sessionData["products"]["data"]=[]
  317. sessionData["fulltext"]=[]
  318. for item in instanceText:
  319. sessionData["fulltext"].append(item)
  320. printToConsole("////Session Data Successfully Created////")
  321. return sessionData
  322. except:
  323. print("Error creating session data")
  324. printToConsole("////Error creating session data////")
  325. return sessionData
  326.  
  327. def spellCheck(instanceText,language="eng",optionSpellCheck=False,optionContextualSpellCheck=False):
  328. try:
  329. x=0
  330. if optionSpellCheck or optionContextualSpellCheck:
  331. for item in instanceText:
  332. instanceText[x]=replacePunctuations(item)
  333. item=instanceText[x]
  334. items=item.split()
  335. y=0
  336. for word in items:
  337. origWord=word
  338. word=word.lower()
  339. length=len(items)
  340. corrWord=word.lower()
  341. if not word.isdigit() and len(word)>=2:
  342. #Finding the Correct word, if it exists
  343. if wordExists(word,language) and optionSpellCheck:
  344. corrWord=word
  345. elif wordExists(word,"con") and optionContextualSpellCheck:
  346. corrWord=word
  347. else:
  348. if optionContextualSpellCheck:
  349. corrWord=correction(word,"con")
  350. if optionSpellCheck:
  351. if word==corrWord:
  352. corrWord=correction(word,language)
  353. #Replacing the worse
  354. if length==1:
  355. instanceText[x]=instanceText[x].replace(origWord,corrWord.title())
  356. elif y==0:
  357. instanceText[x]=instanceText[x].replace(origWord+" ",corrWord.title()+" ",1)
  358. elif y==length-1:
  359. tmpList = instanceText[x].rsplit(origWord, 1)
  360. instanceText[x]= corrWord.title().join(tmpList)
  361. else:
  362. instanceText[x]=instanceText[x].replace(" "+origWord+" "," "+corrWord.title()+" ")
  363. y+=1
  364. x+=1
  365. printToConsole("////Session Data Successfully Spell Checked////")
  366. return instanceText
  367. except:
  368. print("Error applying spell check on instance text")
  369. printToConsole("////Error applying spell check on instance text////")
  370. return instanceText
  371.  
  372. def printToConsole(str):
  373. try:
  374. GUI.printToConsole(str)
  375. ROOT.update()
  376. except:
  377. print("Error printing to Console")
  378.  
  379. def checkPDFValidity(str):
  380. try:
  381. changeDirectory(FILEPATH_DIRECTORY)
  382. doc = PdfFileReader(file(str+".pdf", "rb"))
  383. if doc.numPages>0:
  384. return True
  385. else:
  386. return False
  387. except AssertionError:
  388. print("File is not a valid pdf (AssertionError)")
  389. printToConsole("////File is not a valid pdf////")
  390. except IOError:
  391. print("File is not a valid pdf (IOError)")
  392. printToConsole("////File is not a valid pdf////")
  393. except:
  394. print("Error checking validity of file")
  395. printToConsole("////Error checking validity of file////")
  396.  
  397. def setAPIconfig(apiEP,credUsername,credPassword):
  398. global API_ENDPOINT,CRED_USERNAME,CRED_PASSWORD
  399. API_ENDPOINT=apiEP
  400. CRED_USERNAME=credUsername
  401. CRED_PASSWORD=credPassword
  402.  
  403. def getFullTextSessionData(fullTextFile):
  404. changeDirectory("./OutputData")
  405. fullTextFile=open('Fulltext.txt','r')
  406. sessionData=fullTextFile.read()
  407.  
  408. r=(sessionData)
  409.  
  410. print r
  411.  
  412. return r[0]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement