Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/lua
- local FORMAT_VERSION=1
- local PROGRAM_VERSION=1
- local HASH_SIZE=64
- math.randomseed(io.popen("tr -cd \"[:digit:]\" < /dev/urandom | head -c 14","r"):read("*n")-os.time())
- local fifofn="/tmp/lar-fifo-"..math.random()
- local blockdir="./lar-blocks-"..math.random()
- local total=0
- local saved=0
- local totalblocks=0
- local cut=string.char(
- 146,29,145,10,
- 42,57,141,0,
- 201,198,43,27,
- 124,130,66,183
- )
- ---------------
- --- Helpers ---
- ---------------
- function showusage()
- io.stderr:write([==[
- lar - Liquid Archive version ]==]..FORMAT_VERSION.."."..PROGRAM_VERSION..[==[
- Lar reads archives from stdin and writes archives to stdout. Existing files are
- overwritten during extraction, if permissions allow that. Liquid archives are
- designed to be streamable both during creation and during extraction. They also
- eliminate redundancy on a 1MiB block level (or smaller for files smaller than
- 1MiB). A side effect of these two design goals is that during extraction with
- inclusion or exclusion filters, all the data needs to extracted on disk and then
- partially discarded. Lar ignores file ownership both when archiving and when
- extracting. For excellent and efficient compression of lar archives we suggest
- pipping the output through "mbuffer -q -m 1G" and then through "lzip -9" if you
- have these tools available.
- Usage: lar -c [-v] [-p|-P] [-B SIZ] [-s SNC] [-d NUM] [-b DIR] [-i INC] [-e EXC]
- lar -x [-v] [-p|-P] [-d NUM] [-i INC] [-e EXC] [-f]
- lar -l [-v] [-p|-P] [-d NUM] [-i INC] [-e EXC]
- lar -m [-v]
- lar -C N,T
- lar -H [-v]
- -c Create archive. Outputs to stdout an archive of DIR as seen from
- the current directory.
- -x Extract archive. Reads an archive form stdin and extracts the
- contents to the current working directory.
- -l List contents. Same as extract but it will not touch your local
- filesystem. You can even combine it with -i and -e to view just
- some of the files. It does not fully test the archive for all
- kinds of inconsistencies though. Some will only be detected when
- you actually extract the archive.
- -m Create an empty archive, useful if you want to create empty base
- archives for a differential archive.
- -i Only include files and directories that match the INC Lua
- pattern. Works with -c, -x and -l. Additionally, "|" may be used
- to separate multiple Lua patterns as a logical OR.
- https://www.lua.org/manual/5.3/manual.html#6.4.1
- -e Exclude files that match the EXC Lua pattern. Works with -c,
- -x and -l.
- -v Be more verbose.
- -b DIR is the directory or file that will be added to the archive.
- It can only be used with -c.
- -d Create or extract a differential archive. You need to
- sequentially pipe NUM base archives in the stdin when you create
- a differential archive. The same base archives need to be passed
- in the stdin (in any order) when you want to extract the
- resulting differential archive. Differential archives will not
- repeat blocks of data that exist in the base archives, so they
- are ideal for incremental backups. You may use differential
- archives as base archives for a new differential archive. Doing
- so will not cause the new differential archive to require the
- base archives of its base archives during its extraction. This
- allows you to create a sequence of differential archives where
- each one depends on the NUM previous archives.
- -p Create a text only archive that contains only printable
- characters and whitespace at the cost of an increased archive
- size and slower archival. It can only be combined with -c.
- -P Same as -p but this time even whitespace is disallowed.
- -f Force extraction of files with missing blocks. This will allow
- you to partially extract a differential archive even if some of
- its base archives are missing.
- -B Change the block size to SIZ. Default is 1048576 (1MiB). Bigger
- blocks are faster but deduplication is done at the block level
- so smaller blocks (but not very small) will result in more data
- savings. Don't change this if you are not sure. It can only be
- used with -c.
- -s Enable self-synchronizing block splitting for files matching the
- SNC Lua pattern. Data blocks will not be of a fixed size
- anymore, they will vary between 66% and 134% of the size defined
- by -B (or the default 1MiB). The sizes are picked in such a way
- that if you have two files with different sizes but a sustantial
- amount of common data at their end, there is a high probability
- that the blocks will synchronize, improving the deduplication of
- data. This is especially useful if, for example, you are trying
- to make differential backups of a MySQL dump file every day when
- only a little data changes but also the size of the file
- changes. Archival will be much slower. It can only be used
- with -c.
- Suppose you have these two data streams:
- abdefghijklmnopqrstuvwxyz
- abdefghijklm123nopqrstuvwxyz
- If you split them with fixed size blocks they will look like:
- ab|de|fg|hi|jk|lm|no|pq|rs|tu|vw|xy|z
- ab|de|fg|hi|jk|lm|12|3n|op|qr|st|uv|wx|yz
- Deduplication will work fine for the blocks before the digits
- but after the digits the blocks are offset in such a way that
- they will never resynchronize and even though the ends of both
- streams are the same, deduplication is impossible.
- Self-synchronizing blocks will split these data streams like:
- ab|def|g|h|ij|kl|mno|pq|rs|t|u|vwx|yz
- ab|def|g|h|ij|kl|m1|23n|op|q|rs|t|u|vwx|yz
- The blocks eventually resynchronize (after "rs" in the example)
- so deduplication of the end of this data stream is possible.
- -C This is a handy calculator for differential backups. N is the
- number that you intend to pass to -d, i.e. the number of base
- archives that each of your differential archives will be based
- on. T is the number of differential archives that you intend to
- keep. Type those two numbers in (e.g. -C 4,8) and the calculator
- will give you information about how much space they will take
- and how many archives will be recoverable.
- -H If you pipe a Liquid archive through "lar -H", you will get
- a special "thin" version of the archive in the output that
- contains only the hashes of the blocks in the input. This can be
- used to greatly reduce the amount of data transmitted over the
- wire in cases where you are creating a differential backup with
- the base files being piped over the network as it will move the
- block hashing to the remote server instead of transferring the
- data to be hashed locally.
- Examples:
- lar -cv -b Images > Images.lar
- Archive all files in the Images folder and name the archive Images.lar.
- Verbose output.
- lar -xv < Images.lar
- Recreate the Images folder that archived in the previous example under
- the current working directory. Verbose output.
- lar -xv -i '%.c$|%.h$' -e 'example' < code.lar
- Extract the archive code.lar into the current directory. Only extract
- files and directories that end with ".c" or ".h". Do not extract files
- or directories that include the word "example" in their full path.
- cat old.lar older.lar | lar -cd 2 -b Images > new.lar
- Archive all files in the Images folder and name the archive new.lar. The
- output archive will be a differential archive and will not contain any
- blocks of data that exist in old.lar and older.lar.
- cat old.lar older.lar new.lar | lar -xd 2
- Extract the archive that was created in the previous example. Only
- new.lar will be extracted but old.lar and older.lar are needed because
- they contain data that was omitted from new.lar during its creation. The
- order of old.lar and older.lar may be reversed but new.lar, the archive
- that you are actually trying to extract, must be the last one.
- (cat DB2.lar.gz | gunzip; cat DB.lar.gz | gunzip) | lar -cvb DB -d 2 | gzip > DB3.lar.gz
- Archive all files in the DB folder, pass through gzip to compress the
- archive and name it DB3.lar.gz. The output archive will be a
- differential archive and will not contain any blocks of data that exist
- in DB2.lar.gz and DB.lar.gz.
- cat yesterday.lar > lar -d 1 -cvb serverbackup/ -s '%.sql$|%.csv$' > today.lar
- Archive all the files in the serverbackup folder and turn on
- self-synchronizing blocks for files with ".sql" or ".csv". Do not
- include the data blocks that exist in yesterday.lar so the resulting
- today.lar will be a differential archive. Self-synchronization helps
- with redundancy detection in files like SQL dumps or other files that
- may have data inserted or removed from one archive to the next one.
- (ssh 'user@192.168.0.5' "cat /home/user/backup1.lar.gz" | gunzip | lar -H; ssh 'user@192.168.0.5' "cat /home/user/backup2.lar.gz" | gunzip | lar -H; ) | lar -v -d 2 -c -b . | gzip | ssh 'root@192.168.0.5' "cat > /home/user/backup0.lar.gz"
- ssh 'root@192.168.0.5' "rm /home/user/backup5.lar.gz"
- ssh 'root@192.168.0.5' "mv /home/user/backup4.lar.gz /home/user/backup5.lar.gz"
- ssh 'root@192.168.0.5' "mv /home/user/backup3.lar.gz /home/user/backup4.lar.gz"
- ssh 'root@192.168.0.5' "mv /home/user/backup2.lar.gz /home/user/backup3.lar.gz"
- ssh 'root@192.168.0.5' "mv /home/user/backup1.lar.gz /home/user/backup2.lar.gz"
- ssh 'root@192.168.0.5' "mv /home/user/backup0.lar.gz /home/user/backup1.lar.gz"
- Create a differential archive of the current directory with 2 base
- archives. The base archives are stored on a remote server at 192.168.0.5
- and are remotely processed with "lar -H" so that only the hashes of the
- blocks they contain are transferred over the wire. The resulting
- differential archive is also stored back at the same remote server.
- After the archival is done the archives are renamed and 5 of them in
- total are kept. backup1.lar.gz will be the newest archive and
- backup5.lar.gz will be the oldest.
- To see what 5 differential archives (each based on the previous 2
- archives) mean, you can run "lar -C 2,4" which will give you the
- following output:
- If your differential archives are based on the last 2 archives (-d 2)
- and you keep a total of 5 archives, then you should expect to have 3
- recoverable archives. Archives older than the last 3 will not have all
- their base archives available and you will therefore be unable to
- extract them. You should expect that all 5 archives together will take
- about the same space as 1.7 full size (non-differential) archives, but
- in some cases they will take up about the same space as 2 full size
- archives.
- Verbose output:
- 3% (input=134B output=111.74MiB) ./readme.txt (regular file) [NNNHP]
- | | | | | |
- | | Data written to stdout | File type |
- | | Current filename |
- | Data read from stdin |
- | Current file's blocks (N=new, P=previously seen in
- Percentage of files done current archive, H=previously seen in base archive)
- Copyright 2019-2020 Tritonio (www.inshame.com)
- This program is free software: you can redistribute it and/or modify it under
- the terms of the GNU General Public License version 3 as published by the Free
- Software Foundation.
- This program is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
- PARTICULAR PURPOSE. See the GNU General Public License for more details.
- If you have not received a copy of the GNU General Public License along with
- this program then see https://www.gnu.org/licenses/gpl-3.0.txt or download it
- using BitTorrent: magnet:?xt=urn:btih:ftlm2r4zr3eepxypisejceebq7gx3wn7
- ]==])
- end
- do
- local last=0
- function timehaspassed()
- local now=os.time()
- if now-last>=1 then
- last=now
- return true
- else
- return false
- end
- end
- end
- function round(n,d)
- d=d or 0
- return math.floor(0.5+n*10^d)/10^d
- end
- function formatbytes(bytes)
- if bytes>=1024^4 then
- return round(bytes/1024^4,2).."TiB"
- elseif bytes>=1024^3 then
- return round(bytes/1024^3,2).."GiB"
- elseif bytes>=1024^2 then
- return round(bytes/1024^2,2).."MiB"
- elseif bytes>=1024 then
- return round(bytes/1024,2).."KiB"
- else
- return bytes.."B"
- end
- end
- function getext(filename)
- return string.upper(string.match(filename,"[^/]%.([^/%.]+)$") or "")
- end
- function stripdirs(filename)
- return string.upper(string.match(filename,"([^/]*)$"))
- end
- function getdepth(filename)
- return #string.gsub(filename,"[^/]+","")
- end
- function parselarheader(instream)
- local magic=instream:read(3)
- assert(magic,"Not enough data. Did you forget to pipe some file to stdin?")
- assert(magic=="LAR","Not a Liquid Archive.")
- local archiveversion=readnumber(instream)
- assert(archiveversion==FORMAT_VERSION,"Unsupported Liquid Archive version: "..archiveversion)
- end
- do
- local counters={}
- function count(what,howmany)
- counters[what]=(counters[what] or 0)+howmany
- end
- function getcounters()
- local str={}
- for i,v in pairs(counters) do
- table.insert(str,i.."="..formatbytes(v))
- end
- return table.concat(str," ")
- end
- end
- function reallyread(instream,size,couldbeless)
- local data=""
- while #data<size do
- local moredata=instream:read(size-#data)
- if not moredata then
- if couldbeless then
- break
- else
- error("Unexpected end of archive.")
- end
- end
- data=(data or "")..moredata
- end
- if data=="" then
- return nil
- else
- return data
- end
- end
- do
- local h={[0]="0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F"}
- local hex={}
- for i=0,255 do
- hex[string.char(i)]=h[math.floor(i/16)]..h[i%16]
- end
- function totextual(data,nospaces)
- local textual=string.gsub(data,"[^%w`!@#$%%%^&%*%(%)_%+%-=%[%]{};':\",%./<>%?\\|"..(nospaces and "" or " \x0A\x0D\t").."]",function (badchar)
- return "~"..hex[badchar].."~"
- end)
- textual=string.gsub(textual,"~([^~])~",hex)
- return string.gsub(textual,"~~","")
- end
- function fromtextual(text)
- return string.gsub(text,"~(%x+)~",function (hexword)
- return string.gsub(hexword,"%x%x",function (hh)
- return string.char(tonumber(hh,16))
- end)
- end)
- end
- assert(fromtextual(totextual("la r"))=="la r")
- assert(fromtextual(totextual("\0la\nr~a\0a\0a\0aaa\0aa\0"))=="\0la\nr~a\0a\0a\0aaa\0aa\0")
- assert(fromtextual(totextual("la r",true))=="la r")
- assert(fromtextual(totextual("\0la\nr~a\0a\0a\0aaa\0aa\0",true))=="\0la\nr~a\0a\0a\0aaa\0aa\0")
- end
- function multimatches(str,patterns)
- for pattern in string.gmatch(patterns,"[^|]+") do
- if string.match(str,pattern) then
- return true
- end
- end
- return false
- end
- function exploitcheck(filename)
- if string.match(filename,"^%.%.") or string.match(filename,"^/") or string.match(filename,"/%.%.[%W ]") or string.match(filename,"//") then
- error("Invalid filename: "..filename..' Filenames must be not be absolute and must not contain parent directories (i.e. "..") and double slashes.')
- end
- end
- function escapequotes(filename)
- return string.gsub(filename,"'","'\"'\"'")
- end
- function parent(filename)
- return string.match(filename,"^(.-)/[^/]*$")
- end
- ---------------------
- --- Data encoding ---
- ---------------------
- function writedatapacket(data,outstream,textual,nospaces)
- data=textual and totextual(data,nospaces) or data
- outstream:write(#data..">")
- outstream:write(data)
- end
- function writenumber(number,outstream)
- outstream:write(tonumber(number).."|")
- end
- function readnumber(instream)
- --io.stderr:write("RN"..instream:seek().."\n")
- local n=tonumber(instream:read("*n"))
- assert(n,"Corrupt archive. Unexpected non-numerical data.")
- assert(instream:read(1)=="|","Corrupt archive. Unexpected number separator.")
- return n
- end
- function readdatapacket(instream,textual)
- --io.stderr:write("RDP"..instream:seek().."\n")
- local size=tonumber(instream:read("*n"))
- assert(size,"Corrupt archive. Missing data packet size.")
- assert(instream:read(1)==">","Corrupt archive. Unexpected data packet separator.")
- local somedata=reallyread(instream,size)
- return textual and fromtextual(somedata) or somedata
- end
- --------------------------
- --- Modes of operation ---
- --------------------------
- function create(dir,instream,outstream,include,exclude,verbose,textual,nospaces,differential,blocksize,selfsync)
- if verbose then io.stderr:write("Adding what matches \""..include.."\" and does not match \""..exclude.."\"...\n") end
- textual=textual or nospaces --nospaces implies textual
- outstream:write("LAR")
- writenumber(FORMAT_VERSION,outstream)
- for i=1,differential do
- if verbose then io.stderr:write("("..getcounters()..") Reading blocks from base archive number "..i.."...\n") end
- parselarheader(instream)
- while not witnessstream(instream,true) do
- if verbose and timehaspassed() then io.stderr:write("("..getcounters()..") Still reading blocks from base archive number "..i.."...\n") end
- end
- end
- local filenames=listfiles(dir,include,exclude,verbose)
- if verbose then io.stderr:write("("..getcounters()..") Creating archive...\n") end
- for i,filename in ipairs(filenames) do
- addfile(filename,outstream,textual,nospaces,round(i/#filenames*100),blocksize,selfsync)
- end
- flushcommandbuffer()
- outstream:write(".")
- end
- function extract(instream,include,exclude,verbose,dryrun,differential,thinoutputstream,force)
- if verbose then
- if not thinoutputstream then
- io.stderr:write("Extracting "..(dryrun and "(dry run) " or "").."what matches \""..include.."\" and does not match \""..exclude.."\"...\n")
- if force then
- io.stderr:write("Forcing extraction of files even if some of their blocks are missing...\n")
- end
- else
- io.stderr:write("Hashing blocks in input to create a thin version in output...\n")
- end
- end
- for i=1,differential do
- if verbose then io.stderr:write("("..getcounters()..") Reading blocks from base archive number "..i.."...\n") end
- parselarheader(instream)
- while not witnessstream(instream,dryrun) do
- if verbose and timehaspassed() then io.stderr:write("("..getcounters()..") Still reading blocks from base archive number "..i.."...\n") end
- end
- end
- if verbose then io.stderr:write("("..getcounters()..") Reading archive...\n") end
- parselarheader(instream)
- if thinoutputstream then
- thinoutputstream:write("LAR")
- writenumber(FORMAT_VERSION,thinoutputstream)
- end
- local commandbuffer={}
- while true do
- local stop,delayedcommand=executestream(instream,include,exclude,dryrun,thinoutputstream,force)
- if stop then break end
- table.insert(commandbuffer,delayedcommand)
- end
- if thinoutputstream then
- thinoutputstream:write(".")
- end
- if verbose then io.stderr:write("Applying "..(dryrun and "(dry run) " or "").."permissions, modification dates etc...\n") end
- table.sort(commandbuffer,function (a,b) return a[1]>a[1] end)
- for _,command in ipairs(commandbuffer) do
- command[2]()
- end
- end
- do
- local hashedblocks={}
- local hbc=0
- function witnessstream(instream,justlook)
- local chunktype=instream:read(1)
- if not chunktype then
- error("Unexpected end of archive.")
- elseif chunktype=="T" or chunktype=="B" then
- local blockdata=readdatapacket(instream,chunktype=="T")
- local ha=hash(blockdata)
- local fn=blockdir.."/w"..hbc
- hbc=hbc+1
- hashedblocks[ha]=fn
- if not justlook then
- local bh=io.open(fn,"wb")
- bh:write(blockdata)
- bh:close()
- end
- elseif chunktype=="t" then
- local ha=readdatapacket(instream,false)
- hbc=hbc+1
- hashedblocks[ha]=true
- if not justlook then
- error("You are trying to extract a differential archive but instead of passing the actual base archives in the input you are passing the thin versions of them created by \"lar -H\". The actual data on the base archives is needed so you should pass the actual base archives.")
- end
- elseif chunktype=="H" then
- readdatapacket(instream,true)
- elseif chunktype=="D" then
- readdatapacket(instream,true)
- readnumber(instream)
- readnumber(instream)
- elseif chunktype=="P" then
- readdatapacket(instream,true)
- readnumber(instream)
- readnumber(instream)
- elseif chunktype=="S" then
- readdatapacket(instream,true)
- readnumber(instream)
- readdatapacket(instream,true)
- elseif chunktype=="F" then
- readdatapacket(instream,true)
- readnumber(instream)
- readnumber(instream)
- local blockcount=readnumber(instream)
- for i=1,blockcount do
- readnumber(instream)
- end
- elseif chunktype=="." then
- return true
- else
- error("Corrupt archive. Unknown chunk type: "..chunktype)
- end
- end
- function iswitnessedblock(ha)
- return not not hashedblocks[ha]
- end
- function getwitnessedblock(ha,force)
- local bfn=hashedblocks[ha]
- if bfn then
- local bh=io.open(bfn,"rb")
- local blockdata=bh:read("*a")
- bh:close()
- return blockdata,bfn
- else
- if force then
- return "",false
- else
- error("Unwitnessed hashed block. You are trying to extract a differential archive but you are probably missing one of its base archives.")
- end
- end
- end
- end
- do
- local blocks={}
- function printfileinfoline(filename,ftype,dontprintcounters)
- io.stderr:write((dontprintcounters and "" or ("("..getcounters()..") "))..filename.." ("..ftype..")\n")
- end
- function executestream(instream,include,exclude,dryrun,thinoutputstream,force)
- local chunktype=instream:read(1)
- if not chunktype then
- error("Unexpected end of archive.")
- elseif chunktype=="T" or chunktype=="B" then
- local blockdata=readdatapacket(instream,chunktype=="T")
- local blockid=#blocks+1
- local block={tmp=true,location=blockdir.."/"..blockid,offset=0,size=#blockdata}
- if thinoutputstream then
- thinoutputstream:write("t")
- writedatapacket(hash(blockdata),thinoutputstream,false,false)
- else
- if not dryrun then
- local bh=io.open(blockdir.."/"..blockid,"wb")
- bh:write(blockdata)
- bh:close()
- end
- end
- table.insert(blocks,block)
- if verbose and timehaspassed() then io.stderr:write("("..getcounters()..") Still reading archive...\n") end
- elseif chunktype=="H" then
- local ha=readdatapacket(instream,true)
- if not thinoutputstream then
- local blockdata,location
- if not dryrun then blockdata,location=getwitnessedblock(ha,force) end
- local blockid=#blocks+1
- local block={tmp=true,location=location,offset=0,size=not dryrun and #blockdata}
- table.insert(blocks,block)
- if verbose and timehaspassed() then io.stderr:write("("..getcounters()..") Still reading archive...\n") end
- end
- elseif chunktype=="t" then
- error("You are trying to extract or list the contents of a thin archive created with the -H option. Thin archives do not contain data so they can only be used instead of base archives when creating differential archives.")
- elseif chunktype=="D" then
- local filename=readdatapacket(instream,true)
- exploitcheck(filename)
- local attrs=readnumber(instream)
- local modtimestamp=readnumber(instream)
- if not thinoutputstream then
- if multimatches(filename,exclude) or not multimatches(filename,include) then return end
- if not dryrun then mkdir(filename) end
- if verbose or dryrun then printfileinfoline(filename,"directory",dryrun and not verbose) end
- return false,{getdepth(filename),function ()
- if not dryrun then
- setattrs(filename,attrs)
- settimestamp(filename,modtimestamp)
- end
- end}
- end
- elseif chunktype=="P" then
- local filename=readdatapacket(instream,true)
- exploitcheck(filename)
- local attrs=readnumber(instream)
- local modtimestamp=readnumber(instream)
- if not thinoutputstream then
- if multimatches(filename,exclude) or not multimatches(filename,include) then return end
- if not dryrun then
- mkdir(parent(filename))
- mkfifo(filename)
- end
- if verbose or dryrun then printfileinfoline(filename,"fifo",dryrun and not verbose) end
- return false,{getdepth(filename),function ()
- if not dryrun then
- setattrs(filename,attrs)
- settimestamp(filename,modtimestamp)
- end
- end}
- end
- elseif chunktype=="S" then
- local filename=readdatapacket(instream,true)
- exploitcheck(filename)
- local modtimestamp=readnumber(instream)
- local target=readdatapacket(instream,true)
- if not thinoutputstream then
- if multimatches(filename,exclude) or not multimatches(filename,include) then return end
- if not dryrun then
- mkdir(parent(filename))
- end
- if verbose or dryrun then printfileinfoline(filename,"symbolic link",dryrun and not verbose) end
- return false,{getdepth(filename),function ()
- if not dryrun then
- mksymlink(filename,target)
- settimestamp(filename,modtimestamp)
- end
- end}
- end
- elseif chunktype=="F" then
- local filename=readdatapacket(instream,true)
- exploitcheck(filename)
- local attrs=readnumber(instream)
- local modtimestamp=readnumber(instream)
- local blockcount=readnumber(instream)
- if thinoutputstream or multimatches(filename,exclude) or not multimatches(filename,include) then
- for i=1,blockcount do
- readnumber(instream)
- end
- return
- end
- if not dryrun then
- mkdir(parent(filename))
- local fh=io.open(filename,"wb")
- local fhoffset=0
- for i=1,blockcount do
- local blockid=readnumber(instream)
- local block=blocks[blockid]
- if not block then
- error("Corrupt archive. Invalid block id.")
- else
- if block.location~=false then
- local bh=io.open(block.location,"rb")
- bh:seek("set",block.offset)
- local lastoffset=fh:seek()
- local blockdata=reallyread(bh,block.size,true)
- fh:write(blockdata)
- bh:close()
- if block.tmp then
- deletefile(block.location)
- fh:flush() --flush otherwise we may not find the block later in the new location
- block.location=filename
- block.offset=lastoffset
- block.tmp=false
- end
- end
- end
- end
- fh:close()
- else
- for i=1,blockcount do
- readnumber(instream)
- end
- end
- if verbose or dryrun then printfileinfoline(filename,"regular file",dryrun and not verbose) end
- return false,{getdepth(filename),function ()
- if not dryrun then
- setattrs(filename,attrs)
- settimestamp(filename,modtimestamp)
- end
- end}
- elseif chunktype=="." then
- return true
- else
- error("Corrupt archive. Unknown chunk type.")
- end
- end
- end
- do
- local known={}
- local hasharray={}
- local commandbuffer={}
- function addfile(filename,outstream,textual,nospaces,progress,blocksize,selfsync)
- local fileinfo=getfileinfo(filename)
- if not fileinfo then
- if verbose then io.stderr:write("File disappeared: "..filename.."\n") end
- return
- end
- fileinfostring=progress.."% ("..getcounters()..")".." "..filename.." ("..(fileinfo.type or "unknown type")..") ["
- if fileinfo.type=="regular file" or fileinfo.type=="regular empty file" then
- if verbose then io.stderr:write(fileinfostring) end
- local fh=io.open(filename,"rb")
- if not fh then
- io.stderr:write((verbose and "]\n" or "").."File disappeared: "..filename.."\n")
- return
- end
- local fileblockindices={}
- local leftovers=""
- --[[
- function morph(t)
- local r={} for l in string.gmatch(t,".") do table.insert(r,string.byte(l)) end
- return table.concat(r,",")
- end
- --]]
- while 1 do
- local buf
- if selfsync and multimatches(filename,selfsync) then
- buf=reallyread(fh,math.floor(blocksize*1.25)-#leftovers,true)
- if not buf and leftovers=="" then break end
- buf=leftovers..(buf or "")
- local best,winner=string.rep("\0",17),#buf --was +1
- for i=math.floor(0.66*blocksize),math.min(#buf,math.floor(1.34*blocksize)) do
- local candidate=string.sub(buf,i-15,i)
- if #candidate==16 and ((candidate>=best and (best>cut or cut>=candidate)) or (candidate<=cut and best>cut)) then
- best=candidate
- winner=i
- end
- end
- buf,leftovers=string.sub(buf,1,winner+#best),string.sub(buf,winner+#best+1)
- --io.stderr:write(morph(best).."|"..#buf.."\n")
- --string.sub(buf,-16)
- else
- buf=reallyread(fh,blocksize,true)
- if not buf then break end
- end
- local bufhash=hash(buf)
- if not known[bufhash] then
- totalblocks=totalblocks+1
- if #bufhash==HASH_SIZE and iswitnessedblock(bufhash) then
- if verbose then io.stderr:write("H") end
- outstream:write("H")
- writedatapacket(bufhash,outstream,true,nospaces)
- saved=saved+#buf-#bufhash
- else
- if verbose then io.stderr:write("N") end
- if textual then
- outstream:write("T")
- writedatapacket(buf,outstream,true,nospaces)
- else
- outstream:write("B")
- writedatapacket(buf,outstream,false,nospaces)
- end
- end
- table.insert(hasharray,bufhash)
- known[bufhash]=#hasharray
- else
- if verbose then io.stderr:write("P") end
- saved=saved+#buf
- end
- total=total+#buf
- table.insert(fileblockindices,known[bufhash])
- end
- fh:close()
- table.insert(commandbuffer,function ()
- outstream:write("F")
- writedatapacket(filename,outstream,true,nospaces)
- writenumber(fileinfo.attrs,outstream)
- writenumber(fileinfo.modts,outstream)
- writenumber(#fileblockindices,outstream)
- for i,blockhash in ipairs(fileblockindices) do
- writenumber(blockhash,outstream)
- end
- end)
- elseif fileinfo.type=="directory" then
- if verbose then io.stderr:write(fileinfostring) end
- table.insert(commandbuffer,function ()
- outstream:write("D")
- writedatapacket(filename,outstream,true,nospaces)
- writenumber(fileinfo.attrs,outstream)
- writenumber(fileinfo.modts,outstream)
- end)
- elseif fileinfo.type=="fifo" then
- if verbose then io.stderr:write(fileinfostring) end
- table.insert(commandbuffer,function ()
- outstream:write("P")
- writedatapacket(filename,outstream,true,nospaces)
- writenumber(fileinfo.attrs,outstream)
- writenumber(fileinfo.modts,outstream)
- end)
- elseif fileinfo.type=="symbolic link" then
- if verbose then io.stderr:write(fileinfostring) end
- local symbolictarget=getsymbolictarget(filename)
- if not symbolictarget then
- io.stderr:write((verbose and "]\n" or "").."File disappeared: "..filename.."\n")
- return
- end
- table.insert(commandbuffer,function ()
- outstream:write("S")
- writedatapacket(filename,outstream,true,nospaces)
- writenumber(fileinfo.modts,outstream)
- writedatapacket(symbolictarget,outstream,true,nospaces)
- end)
- end
- if verbose then io.stderr:write("]\n") end
- if #commandbuffer>=10000 then
- flushcommandbuffer()
- end
- end
- function flushcommandbuffer()
- for _,command in ipairs(commandbuffer) do
- command()
- end
- commandbuffer={}
- end
- end
- ---------------------
- --- OS / external ---
- ---------------------
- function getfileinfo(filename)
- local p=io.popen("stat -c '%a|%Y|%F' '"..escapequotes(filename).."'")
- if p then
- local infoline=p:read("*l")
- if not infoline or infoline=="" then return false end
- local attrs,modts,typ=string.match(infoline,"([^|]+)|([^|]+)|([^|]+)")
- p:close()
- return {attrs=attrs,modts=modts,type=typ}
- else
- return false
- end
- end
- --[[
- function getowner(filename)
- local p=io.popen("stat -c %u '"..escapequotes(filename).."'")
- local owner=p:read("*l")
- p:close()
- return owner
- end
- function getattrs(filename)
- local p=io.popen("stat -c %a '"..escapequotes(filename).."'")
- local attrs=p:read("*l")
- p:close()
- return attrs
- end
- function getmodtimestamp(filename)
- local p=io.popen("stat -c %Y '"..escapequotes(filename).."'")
- local modts=p:read("*l")
- p:close()
- return modts
- end
- function getfiletype(filename)
- local p=io.popen("stat -c %F '"..escapequotes(filename).."'")
- local typ=p:read("*l")
- p:close()
- return typ
- end
- --]]
- function getsymbolictarget(filename)
- local p=io.popen("readlink '"..escapequotes(filename).."'")
- local target=p:read("*l")
- p:close()
- return target
- end
- function settimestamp(filename,modtimestamp)
- os.execute("touch -h --date=@"..modtimestamp.." '"..escapequotes(filename).."'")
- end
- function setattrs(filename,attrs)
- os.execute("chmod "..attrs.." '"..escapequotes(filename).."'")
- end
- function mkdir(filename)
- if filename then
- os.execute("mkdir -p '"..escapequotes(filename).."'")
- end
- end
- function deletefile(filename)
- os.execute("rm "..escapequotes(filename))
- end
- function deletedir(filename)
- os.execute("rm -rf "..escapequotes(filename))
- end
- function mksymlink(filename,target)
- os.execute("ln -s '"..escapequotes(target).."' '"..escapequotes(filename).."'")
- end
- function mkfifo(filename)
- os.execute("mkfifo '"..escapequotes(filename).."'")
- end
- function listfiles(dir,include,exclude,verbose)
- if verbose then io.stderr:write("Listing files...\n") end
- local p=io.popen("find '"..escapequotes(dir).."'")
- local allfiles={}
- while true do
- local filename=p:read("*l")
- if not filename then break end
- if not multimatches(filename,exclude) and multimatches(filename,include) then
- table.insert(allfiles,filename)
- end
- end
- p:close()
- if verbose then io.stderr:write("Sorting "..#allfiles.." filenames...\n") end
- table.sort(allfiles,function (a,b)
- local exta,extb=getext(a),getext(b)
- if exta==extb then
- local fna,fnb=stripdirs(a),stripdirs(b)
- if fna==fnb then
- return a<b
- else
- return fna<fnb
- end
- else
- return exta<extb
- end
- end)
- return allfiles
- end
- function hash(data)
- if #data<HASH_SIZE then return data end
- ---[[
- local p=io.popen("sha256sum "..fifofn,"r")
- local fifoh=io.open(fifofn,"wb")
- fifoh:write(data)
- fifoh:close()
- local h=reallyread(p,HASH_SIZE)
- p:close()
- return h
- --]]
- --[[
- local hs={0,7,42,1337,1,2,3,4}
- local rots={1,9,25,31,3,7,15,11}
- for ci=1,#data do
- local v=string.byte(data,ci)
- for p,h in pairs(hs) do
- hs[p]=bit32.bxor(bit32.rrotate(h,rots[p]),v*(p+ci%19))
- end
- end
- print("Hashed: "..#data,bit32.bxor(unpack(hs)),table.concat(hs,"|"))
- return table.concat(hs,"|")
- --]]
- end
- ------------------------
- --- Argument parsing ---
- ------------------------
- function parseargs(args,types)
- local parsed={}
- local seen={}
- local i=1
- while args[i] do
- if string.match(args[i],"^%-") then
- for letter in string.gmatch(args[i],"%w") do
- assert(not seen[letter],"Command line argument -"..letter.." is defined twice.")
- seen[letter]=true
- if types[letter]=="boolean" then
- parsed[letter]=true
- elseif types[letter]=="string" then
- assert(string.match(args[i],"%w$")==letter,"Command line argument -"..letter.." expects a string after it so it must be the last one in a group of letters.")
- parsed[letter]=args[i+1]
- i=i+1
- else
- error("Unknown command line argument: -"..letter)
- end
- end
- i=i+1
- else
- error("Unexpected argument value: "..args[i])
- end
- end
- return parsed
- end
- ------------------------
- --- Hook read/writes ---
- ------------------------
- do
- local realin,realout=io.stdin,io.stdout
- io.stdin={read=function (s,a)
- local data=realin:read(a)
- if type(data)=="number" then
- count("input",#tostring(data))
- elseif type(data)=="string" then
- count("input",#data)
- else
- count("input",1)
- end
- return data
- end}
- io.stdout={write=function (s,data)
- local res=realout:write(data)
- count("output",#data)
- return res
- end}
- end
- ------------
- --- Main ---
- ------------
- local mustremoveblockdir,mustremovefifo,mustshowinfo=false,false,false
- local res,errmsg=pcall(function ()
- local args=parseargs(arg,{
- v='boolean',
- c='boolean',
- x='boolean',
- l='boolean',
- b='string',
- i='string',
- e='string',
- h='boolean',
- p='boolean',
- P='boolean',
- d='string',
- C='string',
- m='boolean',
- B='string',
- s='string',
- H='boolean',
- f='boolean',
- })
- args.i=args.i or "."
- args.e=args.e or "/////"
- verbose=args.v
- args.d=args.d or 0
- args.d=tonumber(args.d)
- assert(args.d and args.d%1==0,"-d must be followed by an integer.")
- assert((args.c and 1 or 0)+(args.x and 1 or 0)+(args.l and 1 or 0)+(args.C and 1 or 0)+(args.n and 1 or 0)+(args.H and 1 or 0)<=1,"Exactly one of -c, -x, -l, -C, -m, -H must be set.")
- assert(not (args.p and args.P),"You cannot set both -p and -P. Perhaps you meant to set only -P?")
- if args.h then
- showusage()
- elseif args.c then
- args.b=args.b or "."
- assert(not args.B or (tonumber(args.B) and args.B%1==0 and tonumber(args.B)>0),"-B must be followed by a positive integer")
- args.B=tonumber(args.B or 1024*1024)
- mkfifo(fifofn)
- mustremovefifo=true
- mustshowinfo=true
- exploitcheck(args.b)
- create(args.b,io.stdin,io.stdout,args.i,args.e,args.v,args.p,args.P,args.d,args.B,args.s)
- elseif args.m then
- assert(not args.f and not args.b and not args.B and not args.s and args.i=="." and args.e=="/////" and not args.p and not args.P and args.d==0,"You cannot set -b, -B, -s, -i , -e, -p, -P, -f nor -d when you are creating an empty archive with -m.")
- args.b="/dev/null"
- mkfifo(fifofn)
- mustremovefifo=true
- mustshowinfo=true
- create(args.b,io.stdin,io.stdout,args.i,args.e,args.v,args.p,args.P,args.d)
- elseif args.x then
- assert(not args.b and not args.B and not args.s and not args.p and not args.P,"Command line option -x cannot be combined with -p, -B, -s, -P nor -b.")
- mkfifo(fifofn)
- mustremovefifo=true
- mkdir(blockdir)
- mustremoveblockdir=true
- extract(io.stdin,args.i,args.e,args.v,false,args.d,false,args.f)
- elseif args.l then
- assert(not args.b and not args.B and not args.s and not args.p and not args.f and not args.P,"Command line option -l cannot be combined with -p, -B, -s, -P, -f nor -b.")
- mkfifo(fifofn)
- mustremovefifo=true
- extract(io.stdin,args.i,args.e,args.v,true,args.d,false,false)
- elseif args.C then
- assert(not args.f and not args.b and not args.p and not args.P and args.d==0 and not args.s and not args.B,"Command line option -C cannot be combined with -p, -B, -s, -P, -f, -b nor -d.")
- local n,t=string.match(args.C,"^(%d+)%s*,%s*(%d+)$")
- n,t=tonumber(n),tonumber(t)
- assert(n and t and n%1==0 and t%1==0,"N and T (after -C) must be integers.")
- local avg=round(t/(n+1),1)
- local max=math.ceil(t/(n+1))
- assert(n<t,"If your differential archives are based on the last "..n.." archives, then you need to keep at least "..n+1 .." archives available.")
- print("If your differential archives are based on the last "..n.." archives (-d "..n..") and you keep a total of "..t.." archives, then you should expect to have "..t-n.." recoverable archives. Archives older than the last "..t-n.." will not have all their base archives available and you will therefore be unable to extract them. You should expect that all "..t.." archives together will take about the same space as "..avg.." full size (non-differential) archives"..(avg~=max and ", but in some cases they will take up about the same space as "..max.." full size archives." or "."))
- elseif args.H then
- assert(not args.f and not args.b and args.i=="." and args.e=="/////" and not args.p and not args.P and args.d==0 and not args.s and not args.B,"Command line option -H cannot be combined with -b, -i, -e, -p, -P, -f, -s, -B nor -d.")
- mkfifo(fifofn)
- mustremovefifo=true
- extract(io.stdin,args.i,args.e,args.v,true,0,io.stdout,false)
- else
- showusage()
- end
- end)
- if mustremovefifo then deletefile(fifofn) end
- if mustremoveblockdir then deletedir(blockdir) end
- if mustshowinfo and verbose then
- io.stderr:write("Deduplication savings: "..formatbytes(saved).." of "..formatbytes(total).." bytes. ("..round(saved/total*100).."%)\n")
- io.stderr:write("Total blocks: "..totalblocks.."\n")
- end
- if verbose then io.stderr:write("Final counters: "..getcounters().."\n") end
- if errmsg then
- io.stderr:write(errmsg.."\n")
- os.exit(1)
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement