Guest User

Untitled

a guest
Mar 1st, 2015
505
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.31 KB | None | 0 0
  1. #!/usr/bin/python3
  2. import sys
  3.  
  4. #File format:
  5. #   header [compressedArraySizeLSB,compressedArraySizeMSB]
  6. #   header [outputArraySizeLSB,outputArraySizeMSB]
  7. #   Eh.. see returnLZCodes function notes for more details.
  8. #
  9.  
  10. MAXCODELEN = 126
  11.  
  12. def readFile(file):
  13.         a = []
  14.         f = open(file,'rb')
  15.         b = f.read(1)
  16.         while b!=b'':
  17.                 a.append(ord(b))
  18.                 b = f.read(1)
  19.         f.close()
  20.         return a
  21.  
  22. def writeFile(file,a):
  23.         f = open(file,'wb+')
  24.         f.write(bytes(a))
  25.         f.close()
  26.  
  27. #winsize: default -1 for SoF, else curpos-winsize bounded by SoF.
  28. def resetSlidingWindow(curpos,winsize=None):
  29.         if winsize is None:
  30.             a=0
  31.         else:
  32.             if winsize>curpos:
  33.                 a=0
  34.             else:
  35.                 a=curpos-winsize
  36.         return a
  37.  
  38. #decomp v0.2
  39. #[0zzz zzzz] = (z+1) size of following literal bytes
  40. #[1zzz zzzz,xxxx xxxx] = (z+2) size of bytes at x bytes backref
  41. #
  42. def returnLZCodes(count,backrefptr,symbol):
  43.         a = []
  44.         if isinstance(symbol,list):
  45.             b = symbol
  46.         else:
  47.             b = [symbol]
  48.         if backrefptr == 0:
  49.             a = [count&0x7F] + b
  50.         else:
  51.             a = [count|0x80,backrefptr] # + b
  52.         return a
  53.        
  54.  
  55.        
  56.        
  57.        
  58. # do not allow "compression" of files less than 2 bytes long        
  59. def compress(inArray):
  60.     global MAXCODELEN
  61.     lbhptr = 0    #filestart
  62.     cptr = 1      #filestart+1
  63.     matchfound = 0  #flag1
  64.     foundcount = 0
  65.     foundbackref = 0
  66.     outArray = []
  67.     literalbuf = [inArray[0]]
  68.     EoA = len(inArray)  #not +1, cuz we need var at EoF to emit as literal
  69.     while cptr < EoA:
  70.         if inArray[lbhptr] == inArray[cptr]:
  71.             csrchptr = cptr
  72.             clbhptr = lbhptr
  73.             rlecount = 0
  74.             mcount = 0
  75.             while inArray[clbhptr] == inArray[csrchptr]:
  76.                 if (csrchptr+1)==(EoA-1): #do not allow final tok to be consumed
  77.                     break
  78.                 if mcount >= MAXCODELEN:
  79.                     break
  80.                 if clbhptr==cptr:
  81.                     clbhptr = lbhptr
  82.                     rlecount += 1
  83.                 mcount += 1
  84.                 clbhptr += 1
  85.                 csrchptr += 1
  86.             if (mcount > foundcount) and (mcount > 3):  #replace 3 later with codelen
  87.                 matchfound = 1
  88.                 foundcount = mcount
  89.                 foundbackref = cptr-lbhptr
  90.                 foundposend = csrchptr
  91.         lbhptr += 1
  92.        
  93.         if lbhptr >= cptr:
  94.             if matchfound == 1:
  95.                 if len(literalbuf) > 0:
  96.                    # if len(literalbuf) > 255:
  97.                    #     print("Error literalbuffer overrun!", str(literalbuf))
  98.                     outArray.extend(returnLZCodes(len(literalbuf)-1,0,literalbuf))
  99.                     del literalbuf[:]
  100.                 outArray.extend(returnLZCodes(foundcount-2,foundbackref,inArray[foundposend]))
  101.                # print("Match found: count " + str(foundcount) + ", backref " + str(foundbackref) + ", position " + str(cptr) + ", trailing num " + str(inArray[cptr+foundcount]) + " sanity check match " + str(inArray[foundposend]))
  102.                 cptr = foundposend-1  #to compensate for lookahead literal write, also if last symbol, next check falls through and exits out while loop normally
  103.                 matchfound = 0
  104.                 foundcount = 0
  105.                 foundbackref = 0
  106.             else:
  107.                 literalbuf.append(inArray[cptr])
  108.                 if len(literalbuf) >= MAXCODELEN:  #flush buffer if reached max code size
  109.                     outArray.extend(returnLZCodes(len(literalbuf)-1,0,literalbuf))
  110.                     del literalbuf[:]
  111.                     print("literalbuf filled, forcing buffer flush.")
  112.             cptr += 1
  113.             lbhptr = resetSlidingWindow(cptr,255)
  114.             if cptr == (EoA-1):
  115.                 literalbuf.append(inArray[cptr])
  116.                 break  #break out of the while loop and force a buffer flush
  117.     if len(literalbuf) > 0:
  118.         outArray.extend(returnLZCodes(len(literalbuf)-1,0,literalbuf))
  119.     #later on, return 2D array, [[outArray],[listOfCompressionDetails]]
  120.     return outArray
  121.  
  122. #Ver 0.1: fixed len [size,backref,symbol(s)]
  123. # If size=0, omit backref, use symbol.
  124. # If backref=0, size+1= number of symbols (including trailing symbol)
  125. #
  126. def decompress(inArray):
  127.     pass  #begin routine
  128.     outArray = []
  129.     cptr = 0
  130.     while cptr < len(inArray):
  131.         tempc = inArray[cptr]
  132.         if (tempc&0x80)==0:
  133.             count = (tempc&0x7F)+1
  134.             cptr += 1
  135.             for x in range(count):
  136.                 outArray.append(inArray[cptr])
  137.                 cptr += 1
  138.         else:
  139.             count = (tempc&(0x7F)) + 2
  140.             backref = inArray[cptr+1]
  141.             startptr = len(outArray)-backref
  142.             for x in range(count):
  143.                 outArray.append(outArray[startptr+x])
  144.           #  outArray.append(inArray[cptr+2])
  145.             cptr += 2
  146.     pass #end main loop
  147.     return outArray
  148.  
  149. # ----------------------------------------------------------------------------
  150. # ----------------------------------------------------------------------------
  151. # ----------------------------------------------------------------------------
  152. # ----------------------------------------------------------------------------
  153. # ----------------------------------------------------------------------------
  154. # ----------------------------------------------------------------------------
  155. # Program start
  156.  
  157. if (len(sys.argv) < 2) or (sys.argv[1][0]=='-') or (sys.argv[1][0]=='/'):
  158.     print("Usage: cmprss.ph <infile> <outfile> <options>")
  159.     print("For help: cmprss.ph -h")
  160.     sys.exit(1)
  161. if "-h" in sys.argv:
  162.     print("Usage: cmprss.ph <infile> <outfile> <options>")
  163.     print("Options:")
  164.     print("-t : Test mode (do not output to file)")
  165.     print("-a : Add size header to file")
  166. inFileName = sys.argv[1]
  167. if (len(sys.argv) > 2) and (sys.argv[2][0]!='-'):
  168.     outFileName = sys.argv[2]
  169. else:
  170.     outFileName = ""
  171. if "-a" in sys.argv:
  172.     addHeader = 1
  173. else:
  174.     addHeader = 0
  175. if "-t" in sys.argv:
  176.     testMode = 1
  177. else:
  178.     testMode = 0
  179.    
  180.  
  181.  
  182.  
  183. inFileArray = readFile(inFileName)  #later read from cmdline
  184. dataFreq = [0 for i in range(256)]  #table of num of occurances of bytes
  185. #check
  186. count = 0
  187. while count < len(inFileArray):
  188.     dataFreq[inFileArray[count]] += 1
  189.     count += 1
  190.  
  191. item_freq = 65535
  192. item_code = 0
  193. count = 0
  194. filesum = 0
  195.  
  196. while count < 256:
  197.     if dataFreq[count]<item_freq:
  198.         item_freq = dataFreq[count]
  199.         item_code = count
  200.     filesum += dataFreq[count]
  201.     count += 1
  202.    
  203. sixteensum = 0
  204. for x in range(len(inFileArray)):
  205.     sixteensum += inFileArray[x]
  206. sixteensum = sixteensum & 0x00FFFF
  207.  
  208. print("Sanity check: fSize: " + str(len(inFileArray)) +
  209.       ", summation " + str(filesum) + ", 16-bit chksum " + str(sixteensum))
  210. #print("Item chosen: " + str(item_code) + " with " +
  211. #      str(item_freq) + " occurrances.")
  212.  
  213. #escapecode = item_code
  214. #escapelength = 0
  215. #tmpvar = escapecode
  216. #while 1:
  217. #    escapelength += 1
  218. #    tmpvar //= 2
  219. #    if tmpvar == 0:
  220. #        break
  221.      
  222. # print("Escape code chosen: " + str(escapecode) +
  223. #      ", bit length: " + str(escapelength))
  224.  
  225. resultArray = compress(inFileArray)
  226.  
  227. print("Result array length :" + str(len(resultArray)))
  228.  
  229. for x in range(0,len(resultArray)):
  230.     if resultArray[x]>255:
  231.         print("Error: Array item " + str(x) + " outside bounds (" + str(resultArray[x]) + ")")
  232.         print("Around: " + str(resultArray[x-4:x+4]))
  233. #writeFile("testoutput",resultArray)
  234.  
  235. print("Decompression test start.")
  236. decompTest = decompress(resultArray)
  237. print("Length comparison: original: " + str(len(inFileArray)) + ", decompressed: " + str(len(decompTest)))
  238.  
  239. if str(len(inFileArray)) == str(len(decompTest)):
  240.     print("Data integrity test")
  241.     errcode = 0
  242.     for x in range(len(resultArray)):
  243.         if inFileArray[x] != decompTest[x]:
  244.             errcode = 1
  245.             print("Data mismatch at position " + str(x) + ": " + str(inFileArray[x]) + " vs " + str(decompTest[x]))
  246.     if errcode == 0:
  247.         print("Test successful. No discrepencies detected.")
  248.     else:
  249.         print("Error: Data mismatch found. File not written.")
  250.         print("In: " + str(inFileArray[0:10]) + "\nOut: " + str(resultArray[0:10]) + "\nChk: " + str(decompTest))
  251.         sys.exit(2)
  252. else:
  253.     print("Error: Test length mismatch. File not written.")
  254.     print("In: " + str(inFileArray[0:10]) + "\nOut: " + str(resultArray[0:10]) + "\nChk: " + str(decompTest[0:10]))
  255.     sys.exit(2)
  256. #
  257. sizeLSB = len(resultArray)%256
  258. sizeMSB = (len(resultArray)//256)%256
  259. resultArray = [sizeLSB,sizeMSB] + resultArray
  260.  
  261. sizeLSB = len(inFileArray)%256
  262. sizeMSB = (len(inFileArray)//256)%256
  263. resultArray = [sizeLSB,sizeMSB] + resultArray
  264.  
  265. if addHeader == 1:
  266.     print("Debug: resarraysample: " + str(resultArray[0:10]))
  267.  
  268. if testMode == 0:
  269.     writeFile(outFileName,resultArray)
  270.     print("File [" + outFileName + "] was output.")
  271. else:
  272.     print("Test mode running. No output.")
  273. inSize = len(inFileArray)
  274. outSize = len(resultArray)
  275. print("Success! In: " + str(inSize) + ", Out: " + str(outSize) + " (" + str((((outSize/inSize)*100)//1)) + "% of original)")
Advertisement
Add Comment
Please, Sign In to add comment