eniallator

Compression Program

May 31st, 2016
637
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Lua 8.08 KB | None | 0 0
  1. -- This is eniallator's Compression program that takes an input file and then you can choose to either compress it or decompress it.
  2. -- The way compression works is by making a table of all the different words that come up in the file.
  3. -- It will then instead of putting the word in the actual body of the file, it will just put the index of the table that the program has to search for.
  4. -- The index is in the form of a byte and i use string.char() and string.byte() to convert between the 2.
  5. --
  6. -- ================================================================================
  7. --
  8. -- Im currently using the following numbers for special cases:
  9. -- 0 = bigger than 255, 1 = new line, 2 = multiple spaces, 3 = 1 space
  10. --
  11. -- I also don't use 13 because when LUA converts character 13 back to it's number, it will be the same result as byte 10.
  12. -- Aparts from that, every other byte i just use to index the words.
  13.  
  14. local tArgs = { ... }
  15.  
  16. -- Function to return a table of the lines of a file
  17. function fileToLines(file)
  18.  
  19.   local read = fs.open(file,"r")
  20.   local lines = {}
  21.  
  22.   -- While loop to keep on adding the current line to a table and if theres no current line, it will break
  23.   while true do
  24.  
  25.     local currLine = read.readLine()
  26.  
  27.     if currLine then
  28.  
  29.       table.insert(lines, currLine)
  30.     else
  31.  
  32.       break
  33.     end
  34.   end
  35.  
  36.   read.close()
  37.   return lines
  38. end
  39.  
  40. -- Function to split a string up at its spaces
  41. local function wordSplit(string)
  42.  
  43.   local out = {}
  44.  
  45.   -- For loop to iterate over the words in the string
  46.   for word in string:gmatch("%S+") do
  47.  
  48.     table.insert(out, word)
  49.   end
  50.  
  51.   return out
  52. end
  53.  
  54. -- Function that compression uses, if the number given in the arguments is bigger than 254 it will keep on adding the byte 0 to the return
  55. local function checkNum(num)
  56.  
  57.   local out = ""
  58.  
  59.   -- While to iterate when num is bigger than 254
  60.   while num > 254 do
  61.  
  62.     out = out .. string.char(0)
  63.     num = num - 254
  64.   end
  65.  
  66.   -- Making sure num isn't byte 13
  67.   if num >= 13 then num = num + 1 end
  68.  
  69.   -- Returning the bytes instead of the number.
  70.   return out .. string.char(num)
  71. end
  72.  
  73.  
  74. -- Function to add any new words to the index
  75. local function sortWord(word,outTbl,line)
  76.   if #word > 0 then
  77.  
  78.     local wordFound = false
  79.  
  80.     -- Iterating over the first index of outTable
  81.     for i=1,#outTbl[1] do
  82.  
  83.       -- Checking if the word already exists or not
  84.       if outTbl[1][i] == word then
  85.  
  86.         table.insert(outTbl[line+1],i+3)
  87.         wordFound = true
  88.         break
  89.       end
  90.     end
  91.  
  92.     -- Adding the word to the index if it hasn't been found
  93.     if not wordFound then
  94.  
  95.       table.insert(outTbl[1],word)
  96.       table.insert(outTbl[line+1],#outTbl[1]+3)
  97.     end
  98.   end
  99.  
  100.   return outTbl
  101. end
  102.  
  103. -- Function to handle spaces in the file
  104. local function sortSpaces(spaces,outTbl,line)
  105.   if spaces > 0 then
  106.  
  107.     -- Checking if the number of spaces is bigger than 1 so it can add a different byte depending on if it is or not
  108.     if spaces > 1 then
  109.  
  110.       table.insert(outTbl[line+1],2)
  111.       table.insert(outTbl[line+1],spaces)
  112.     else
  113.  
  114.       table.insert(outTbl[line+1],3)
  115.     end
  116.   end
  117.  
  118.   return outTbl
  119. end
  120.  
  121. -- The compression function
  122. function compress(lines)
  123.  
  124.   local outTable = {{}}
  125.  
  126.   for line=1,#lines do
  127.  
  128.     local spaces = 0
  129.     local word = ""
  130.     outTable[line+1] = {}
  131.  
  132.     -- For to handle the entire compression to convert the file into bytes
  133.     for i=1,#lines[line] do
  134.  
  135.       local currChar = lines[line]:sub(i,i)
  136.  
  137.       -- A crude way to split up the lines into words and spaces
  138.       if currChar == " " then
  139.  
  140.         spaces = spaces + 1
  141.         outTable = sortWord(word,outTable,line)
  142.         word = ""
  143.       else
  144.  
  145.         word = word .. currChar
  146.         outTable = sortSpaces(spaces,outTable,line)
  147.         spaces = 0
  148.       end
  149.     end
  150.  
  151.     outTable = sortWord(word,outTable,line)
  152.     outTable = sortSpaces(spaces,outTable,line)
  153.   end
  154.  
  155.   local outString = ""
  156.  
  157.   -- For loop to combine the outTable into an output string
  158.   for i=1,#outTable do
  159.     for j=1,#outTable[i] do
  160.       if i == 1 then
  161.         if #outString > 0 then outString = outString .. " " end
  162.  
  163.         outString = outString .. outTable[i][j]
  164.       else
  165.  
  166.         outString = outString .. checkNum(outTable[i][j])
  167.       end
  168.     end
  169.  
  170.     -- Adding the new line character to the end of the line. The index is always at line 1 so i choose to add a \n to the end of line 1
  171.     if i == 1 then
  172.  
  173.       outString = outString .. "\n"
  174.     else
  175.  
  176.       outString = outString .. string.char(1)
  177.     end
  178.   end
  179.  
  180.   return outString
  181. end
  182.  
  183. -- The decompression function
  184. function decompress(lines)
  185.  
  186.   -- Seperating the index table from the body table
  187.   local index = wordSplit(lines[1])
  188.   local body = {}
  189.   table.remove(lines,1)
  190.  
  191.   -- For to convert the compressed file into it's original lines and where the indexes should go
  192.   for line=1,#lines do
  193.  
  194.     -- Inserting the character 10 every time the file goes onto a new line. This is because character 10 actually is the new line character
  195.     if line > 1 then
  196.  
  197.       table.insert(body,10)
  198.     end
  199.  
  200.     -- For loop to convert the bytes into the corresponding indexes
  201.     for i=1,#lines[line] do
  202.  
  203.       local indexNum = string.byte(lines[line]:sub(i))
  204.  
  205.       if indexNum >= 13 then
  206.  
  207.         indexNum = indexNum - 1
  208.       end
  209.  
  210.       table.insert(body,indexNum)
  211.     end
  212.   end
  213.  
  214.   local counter = 1
  215.   local fullFile = ""
  216.  
  217.   -- While loop to convert the indexes into the corresponding words (aparts from the special characters)
  218.   while counter < #body do
  219.  
  220.     -- Checking if the current index is 0 and then converting it into it's actual index (because 0 means it's bigger than 254)
  221.     if body[counter] == 0 then
  222.  
  223.       local multiples = 0
  224.  
  225.       -- Adding up the multiples of 254
  226.       while body[counter] == 0 do
  227.  
  228.         counter = counter + 1
  229.         multiples = multiples + 254
  230.       end
  231.  
  232.       -- Inserting the corresponding word with the full index from adding the multiples and the current index
  233.       fullFile = fullFile .. index[body[counter] + multiples-3]
  234.  
  235.     -- Seeing if the current index is 1 which is the new line character
  236.     elseif body[counter] == 1 then
  237.  
  238.       fullFile = fullFile .. "\n"
  239.  
  240.     -- Seeing if the current index is 2 and then seeing what the next index is to make that next index * spaces.
  241.     elseif body[counter] == 2 then
  242.  
  243.       counter = counter + 1
  244.  
  245.       -- Iterating for the amount of spaces that should be in and adding them
  246.       for i=1,body[counter] do
  247.  
  248.         fullFile = fullFile .. " "
  249.       end
  250.  
  251.     -- Seeing if the current index is 3 and inserting a space into the file
  252.     elseif body[counter] == 3 then
  253.  
  254.       fullFile = fullFile .. " "
  255.  
  256.     -- If nothing before has caught the index, the corresponding word to that index will be inserted into the file.
  257.     else
  258.  
  259.       fullFile = fullFile .. index[body[counter]-3]
  260.     end
  261.  
  262.     counter = counter + 1
  263.   end
  264.  
  265.   return fullFile
  266. end
  267.  
  268. -- Handling program arguments
  269. if tArgs[1] == "com" then
  270.   if tArgs[2] and fs.exists(tArgs[2]) then
  271.     if tArgs[3] then
  272.  
  273.       comString = compress(fileToLines(tArgs[2]))
  274.  
  275.       openFile = assert(fs.open(tArgs[3],"w"),"Something went wrong when trying to open the output file")
  276.       openFile.write(comString)
  277.       openFile.close()
  278.     else
  279.  
  280.       print("Error: Compression requires a third argument")
  281.     end
  282.   else
  283.  
  284.     print("Error: Compression requires a valid file name as the second argument.")
  285.   end
  286. elseif tArgs[1] == "decom" then
  287.   if tArgs[2] and fs.exists(tArgs[2]) and tArgs[3] then
  288.     if tArgs[3] then
  289.  
  290.       comString = decompress(fileToLines(tArgs[2]))
  291.  
  292.       openFile = assert(fs.open(tArgs[3],"w"),"Something went wrong when trying to open the output file")
  293.       openFile.write(comString)
  294.       openFile.close()
  295.     else
  296.  
  297.       print("Error: Decompression requires a third argument")
  298.     end
  299.   else
  300.  
  301.     print("Error: Decompression requires a valid file name as the second argument.")
  302.   end
  303. else
  304.  
  305.   print('Error: Invalid syntax. Correct syntax is: "compress [com/decom] [input file name] [output file name]"')
  306. end
Add Comment
Please, Sign In to add comment