Advertisement
Guest User

Compression Program

a guest
May 31st, 2016
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Lua 7.96 KB | None | 0 0
  1. -- This is eniallator's Compression program that takes an input file and then you can choose to either compress it or decompress it.
  2. -- The way compression works is by making a table of all the different words that come up in the file.
  3. -- It will then instead of putting the word in the actual body of the file, it will just put the index of the table that the program has to search for.
  4. -- The index is in the form of a byte and i use string.char() and string.byte() to convert between the 2.
  5. --
  6. -- ================================================================================
  7. --
  8. -- Im currently using the following numbers for special cases:
  9. -- 0 = bigger than 255, 1 = new line, 2 = multiple spaces, 3 = 1 space
  10. --
  11. -- I also don't use 13 because when LUA converts character 13 back to it's number, it will be the same result as byte 10.
  12. -- Aparts from that, every other byte i just use to index the words.
  13.  
  14. local tArgs = { ... }
  15.  
  16. -- Function to return a table of the lines of a file
  17. local function fileToLines(file)
  18.  
  19.   local read = fs.open(file,"r")
  20.   local lines = {}
  21.  
  22.   -- While loop to keep on adding the current line to a table and if theres no current line, it will break
  23.   while true do
  24.  
  25.     local currLine = read.readLine()
  26.  
  27.     if currLine then
  28.  
  29.       table.insert(lines, currLine)
  30.     else
  31.  
  32.       break
  33.     end
  34.   end
  35.  
  36.   read.close()
  37.   return lines
  38. end
  39.  
  40. -- Function to split a string up at it's spaces
  41. local function wordSplit(string)
  42.  
  43.   local out = {}
  44.  
  45.   -- For loop to iterate over the words in the string
  46.   for word in string:gmatch("%S+") do
  47.  
  48.     table.insert(out, word)
  49.   end
  50.  
  51.   return out
  52. end
  53.  
  54. -- Function that compression uses, if the number given in the arguments is bigger than 254 it will keep on adding the byte 0 to the return
  55. local function checkNum(num)
  56.  
  57.   local out = ""
  58.  
  59.   -- While to iterate when num is bigger than 254
  60.   while num > 254 do
  61.  
  62.     out = out .. string.char(0)
  63.     num = num - 254
  64.   end
  65.  
  66.   -- Making sure num isn't byte 13
  67.   if num >= 13 then num = num + 1 end
  68.  
  69.   -- Returning the bytes instead of the number.
  70.   return out .. string.char(num)
  71. end
  72.  
  73. -- The compression function
  74. local function compress(fileName)
  75.  
  76.   -- Splitting the file into it's lines in a table
  77.   local lines = fileToLines(fileName)
  78.   local outTable = {{}}
  79.  
  80.   for line=1,#lines do
  81.  
  82.     local spaces = 0
  83.     local word = ""
  84.     outTable[line+1] = {}
  85.  
  86.     -- Function to add any new words to the index
  87.     local function sortWord(word)
  88.       if #word > 0 then
  89.  
  90.         local wordFound = false
  91.  
  92.         -- Iterating over the first index of outTable
  93.         for i=1,#outTable[1] do
  94.  
  95.           -- Checking if the word already exists or not
  96.           if outTable[1][i] == word then
  97.  
  98.             table.insert(outTable[line+1],i+3)
  99.             wordFound = true
  100.             break
  101.           end
  102.         end
  103.  
  104.         -- Adding the word to the index if it hasn't been found
  105.         if not wordFound then
  106.  
  107.           table.insert(outTable[1],word)
  108.           table.insert(outTable[line+1],#outTable[1]+3)
  109.         end
  110.       end
  111.     end
  112.  
  113.     -- Function to handle spaces in the file
  114.     local function sortSpaces(spaces)
  115.       if spaces > 0 then
  116.  
  117.         -- Checking if the number of spaces is bigger than 1 so it can add a different byte depending on if it is or not
  118.         if spaces > 1 then
  119.  
  120.           table.insert(outTable[line+1],2)
  121.           table.insert(outTable[line+1],spaces)
  122.         else
  123.  
  124.           table.insert(outTable[line+1],3)
  125.         end
  126.       end
  127.     end
  128.  
  129.     local currLine = ""
  130.  
  131.     -- For to handle the entire compression to convert the file into bytes
  132.     for i=1,#lines[line] do
  133.  
  134.       local currChar = lines[line]:sub(i,i)
  135.  
  136.       -- A crude way to split up the lines into words and spaces
  137.       if currChar == " " then
  138.  
  139.         spaces = spaces + 1
  140.         sortWord(word)
  141.         word = ""
  142.       else
  143.  
  144.         word = word .. currChar
  145.         sortSpaces(spaces)
  146.         spaces = 0
  147.       end
  148.     end
  149.  
  150.     sortWord(word)
  151.     sortSpaces(spaces)
  152.   end
  153.  
  154.   local outString = ""
  155.  
  156.   -- For loop to combine the outTable into an output string
  157.   for i=1,#outTable do
  158.     for j=1,#outTable[i] do
  159.       if i == 1 then
  160.         if #outString > 0 then outString = outString .. " " end
  161.  
  162.         outString = outString .. outTable[i][j]
  163.       else
  164.  
  165.         outString = outString .. checkNum(outTable[i][j])
  166.       end
  167.     end
  168.  
  169.     -- Adding the new line character to the end of the line. The index is always at line 1 so i choose to add a \n to the end of line 1
  170.     if i == 1 then
  171.  
  172.       outString = outString .. "\n"
  173.     else
  174.  
  175.       outString = outString .. string.char(1)
  176.     end
  177.   end
  178.  
  179.   return outString
  180. end
  181.  
  182. -- The decompression function
  183. local function decompress(fileName)
  184.  
  185.   -- Splitting the file into it's lines in a table
  186.   local lines = fileToLines(fileName)
  187.  
  188.   -- Seperating the index table from the body table
  189.   local index = wordSplit(lines[1])
  190.   local body = {}
  191.   table.remove(lines,1)
  192.  
  193.   -- For to convert the compressed file into it's original lines and where the indexes should go
  194.   for line=1,#lines do
  195.  
  196.     -- Inserting the character 10 every time the file goes onto a new line. This is because character 10 actually is the new line character
  197.     if line > 1 then
  198.  
  199.       table.insert(body,10)
  200.     end
  201.  
  202.     -- For loop to convert the bytes into the corresponding indexes
  203.     for i=1,#lines[line] do
  204.  
  205.       local indexNum = string.byte(lines[line]:sub(i))
  206.  
  207.       if indexNum >= 13 then
  208.  
  209.         indexNum = indexNum - 1
  210.       end
  211.  
  212.       table.insert(body,indexNum)
  213.     end
  214.   end
  215.  
  216.   local counter = 1
  217.   local fullFile = ""
  218.  
  219.   -- While loop to convert the indexes into the corresponding words (aparts from the special characters)
  220.   while counter < #body do
  221.  
  222.     -- Checking if the current index is 0 and then converting it into it's actual index (because 0 means it's bigger than 254)
  223.     if body[counter] == 0 then
  224.  
  225.       local multiples = 0
  226.  
  227.       -- Adding up the multiples of 254
  228.       while body[counter] == 0 do
  229.  
  230.         counter = counter + 1
  231.         multiples = multiples + 254
  232.       end
  233.  
  234.       -- Inserting the corresponding word with the full index from adding the multiples and the current index
  235.       fullFile = fullFile .. index[body[counter] + multiples-3]
  236.  
  237.     -- Seeing if the current index is 1 which is the new line character
  238.     elseif body[counter] == 1 then
  239.  
  240.       fullFile = fullFile .. "\n"
  241.  
  242.     -- Seeing if the current index is 2 and then seeing what the next index is to make that next index * spaces.
  243.     elseif body[counter] == 2 then
  244.  
  245.       counter = counter + 1
  246.  
  247.       -- Iterating for the amount of spaces that should be in and adding them
  248.       for i=1,body[counter] do
  249.  
  250.         fullFile = fullFile .. " "
  251.       end
  252.  
  253.     -- Seeing if the current index is 3 and inserting a space into the file
  254.     elseif body[counter] == 3 then
  255.  
  256.       fullFile = fullFile .. " "
  257.  
  258.     -- If nothing before has caught the index, the corresponding word to that index will be inserted into the file.
  259.     else
  260.  
  261.       fullFile = fullFile .. index[body[counter]-3]
  262.     end
  263.  
  264.     counter = counter + 1
  265.   end
  266.  
  267.   return fullFile
  268. end
  269.  
  270. -- Handling program arguments
  271. if tArgs[1] == "com" then
  272.   if tArgs[2] and fs.exists(tArgs[2]) then
  273.     if tArgs[3] then
  274.  
  275.       comString = compress(tArgs[2])
  276.  
  277.       openFile = assert(fs.open(tArgs[3],"w"),"Something went wrong when trying to open the output file")
  278.       openFile.write(comString)
  279.       openFile.close()
  280.     else
  281.  
  282.       print("Error: Compression requires a third argument")
  283.     end
  284.   else
  285.  
  286.     print("Error: Compression requires a valid file name as the second argument.")
  287.   end
  288. elseif tArgs[1] == "decom" then
  289.   if tArgs[2] and fs.exists(tArgs[2]) and tArgs[3] then
  290.     if tArgs[3] then
  291.  
  292.       comString = decompress(tArgs[2])
  293.  
  294.       openFile = assert(fs.open(tArgs[3],"w"),"Something went wrong when trying to open the output file")
  295.       openFile.write(comString)
  296.       openFile.close()
  297.     else
  298.  
  299.       print("Error: Decompression requires a third argument")
  300.     end
  301.   else
  302.  
  303.     print("Error: Decompression requires a valid file name as the second argument.")
  304.   end
  305. else
  306.  
  307.   print('Error: Invalid syntax. Correct syntax is: "compress [com/decom] [input file name] [output file name]"')
  308. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement