#!/usr/bin/lua local FORMAT_VERSION=1 local PROGRAM_VERSION=1 local HASH_SIZE=64 math.randomseed(io.popen("tr -cd \"[:digit:]\" < /dev/urandom | head -c 14","r"):read("*n")-os.time()) local fifofn="/tmp/lar-fifo-"..math.random() local blockdir="./lar-blocks-"..math.random() local total=0 local saved=0 local totalblocks=0 local cut=string.char( 146,29,145,10, 42,57,141,0, 201,198,43,27, 124,130,66,183 ) --------------- --- Helpers --- --------------- function showusage() io.stderr:write([==[ lar - Liquid Archive version ]==]..FORMAT_VERSION.."."..PROGRAM_VERSION..[==[ Lar reads archives from stdin and writes archives to stdout. Existing files are overwritten during extraction, if permissions allow that. Liquid archives are designed to be streamable both during creation and during extraction. They also eliminate redundancy on a 1MiB block level (or smaller for files smaller than 1MiB). A side effect of these two design goals is that during extraction with inclusion or exclusion filters, all the data needs to extracted on disk and then partially discarded. Lar ignores file ownership both when archiving and when extracting. For excellent and efficient compression of lar archives we suggest pipping the output through "mbuffer -q -m 1G" and then through "lzip -9" if you have these tools available. Usage: lar -c [-v] [-p|-P] [-B SIZ] [-s SNC] [-d NUM] [-b DIR] [-i INC] [-e EXC] lar -x [-v] [-p|-P] [-d NUM] [-i INC] [-e EXC] [-f] lar -l [-v] [-p|-P] [-d NUM] [-i INC] [-e EXC] lar -m [-v] lar -C N,T lar -H [-v] -c Create archive. Outputs to stdout an archive of DIR as seen from the current directory. -x Extract archive. Reads an archive form stdin and extracts the contents to the current working directory. -l List contents. Same as extract but it will not touch your local filesystem. You can even combine it with -i and -e to view just some of the files. It does not fully test the archive for all kinds of inconsistencies though. Some will only be detected when you actually extract the archive. -m Create an empty archive, useful if you want to create empty base archives for a differential archive. -i Only include files and directories that match the INC Lua pattern. Works with -c, -x and -l. Additionally, "|" may be used to separate multiple Lua patterns as a logical OR. https://www.lua.org/manual/5.3/manual.html#6.4.1 -e Exclude files that match the EXC Lua pattern. Works with -c, -x and -l. -v Be more verbose. -b DIR is the directory or file that will be added to the archive. It can only be used with -c. -d Create or extract a differential archive. You need to sequentially pipe NUM base archives in the stdin when you create a differential archive. The same base archives need to be passed in the stdin (in any order) when you want to extract the resulting differential archive. Differential archives will not repeat blocks of data that exist in the base archives, so they are ideal for incremental backups. You may use differential archives as base archives for a new differential archive. Doing so will not cause the new differential archive to require the base archives of its base archives during its extraction. This allows you to create a sequence of differential archives where each one depends on the NUM previous archives. -p Create a text only archive that contains only printable characters and whitespace at the cost of an increased archive size and slower archival. It can only be combined with -c. -P Same as -p but this time even whitespace is disallowed. -f Force extraction of files with missing blocks. This will allow you to partially extract a differential archive even if some of its base archives are missing. -B Change the block size to SIZ. Default is 1048576 (1MiB). Bigger blocks are faster but deduplication is done at the block level so smaller blocks (but not very small) will result in more data savings. Don't change this if you are not sure. It can only be used with -c. -s Enable self-synchronizing block splitting for files matching the SNC Lua pattern. Data blocks will not be of a fixed size anymore, they will vary between 66% and 134% of the size defined by -B (or the default 1MiB). The sizes are picked in such a way that if you have two files with different sizes but a sustantial amount of common data at their end, there is a high probability that the blocks will synchronize, improving the deduplication of data. This is especially useful if, for example, you are trying to make differential backups of a MySQL dump file every day when only a little data changes but also the size of the file changes. Archival will be much slower. It can only be used with -c. Suppose you have these two data streams: abdefghijklmnopqrstuvwxyz abdefghijklm123nopqrstuvwxyz If you split them with fixed size blocks they will look like: ab|de|fg|hi|jk|lm|no|pq|rs|tu|vw|xy|z ab|de|fg|hi|jk|lm|12|3n|op|qr|st|uv|wx|yz Deduplication will work fine for the blocks before the digits but after the digits the blocks are offset in such a way that they will never resynchronize and even though the ends of both streams are the same, deduplication is impossible. Self-synchronizing blocks will split these data streams like: ab|def|g|h|ij|kl|mno|pq|rs|t|u|vwx|yz ab|def|g|h|ij|kl|m1|23n|op|q|rs|t|u|vwx|yz The blocks eventually resynchronize (after "rs" in the example) so deduplication of the end of this data stream is possible. -C This is a handy calculator for differential backups. N is the number that you intend to pass to -d, i.e. the number of base archives that each of your differential archives will be based on. T is the number of differential archives that you intend to keep. Type those two numbers in (e.g. -C 4,8) and the calculator will give you information about how much space they will take and how many archives will be recoverable. -H If you pipe a Liquid archive through "lar -H", you will get a special "thin" version of the archive in the output that contains only the hashes of the blocks in the input. This can be used to greatly reduce the amount of data transmitted over the wire in cases where you are creating a differential backup with the base files being piped over the network as it will move the block hashing to the remote server instead of transferring the data to be hashed locally. Examples: lar -cv -b Images > Images.lar Archive all files in the Images folder and name the archive Images.lar. Verbose output. lar -xv < Images.lar Recreate the Images folder that archived in the previous example under the current working directory. Verbose output. lar -xv -i '%.c$|%.h$' -e 'example' < code.lar Extract the archive code.lar into the current directory. Only extract files and directories that end with ".c" or ".h". Do not extract files or directories that include the word "example" in their full path. cat old.lar older.lar | lar -cd 2 -b Images > new.lar Archive all files in the Images folder and name the archive new.lar. The output archive will be a differential archive and will not contain any blocks of data that exist in old.lar and older.lar. cat old.lar older.lar new.lar | lar -xd 2 Extract the archive that was created in the previous example. Only new.lar will be extracted but old.lar and older.lar are needed because they contain data that was omitted from new.lar during its creation. The order of old.lar and older.lar may be reversed but new.lar, the archive that you are actually trying to extract, must be the last one. (cat DB2.lar.gz | gunzip; cat DB.lar.gz | gunzip) | lar -cvb DB -d 2 | gzip > DB3.lar.gz Archive all files in the DB folder, pass through gzip to compress the archive and name it DB3.lar.gz. The output archive will be a differential archive and will not contain any blocks of data that exist in DB2.lar.gz and DB.lar.gz. cat yesterday.lar > lar -d 1 -cvb serverbackup/ -s '%.sql$|%.csv$' > today.lar Archive all the files in the serverbackup folder and turn on self-synchronizing blocks for files with ".sql" or ".csv". Do not include the data blocks that exist in yesterday.lar so the resulting today.lar will be a differential archive. Self-synchronization helps with redundancy detection in files like SQL dumps or other files that may have data inserted or removed from one archive to the next one. (ssh 'user@192.168.0.5' "cat /home/user/backup1.lar.gz" | gunzip | lar -H; ssh 'user@192.168.0.5' "cat /home/user/backup2.lar.gz" | gunzip | lar -H; ) | lar -v -d 2 -c -b . | gzip | ssh 'root@192.168.0.5' "cat > /home/user/backup0.lar.gz" ssh 'root@192.168.0.5' "rm /home/user/backup5.lar.gz" ssh 'root@192.168.0.5' "mv /home/user/backup4.lar.gz /home/user/backup5.lar.gz" ssh 'root@192.168.0.5' "mv /home/user/backup3.lar.gz /home/user/backup4.lar.gz" ssh 'root@192.168.0.5' "mv /home/user/backup2.lar.gz /home/user/backup3.lar.gz" ssh 'root@192.168.0.5' "mv /home/user/backup1.lar.gz /home/user/backup2.lar.gz" ssh 'root@192.168.0.5' "mv /home/user/backup0.lar.gz /home/user/backup1.lar.gz" Create a differential archive of the current directory with 2 base archives. The base archives are stored on a remote server at 192.168.0.5 and are remotely processed with "lar -H" so that only the hashes of the blocks they contain are transferred over the wire. The resulting differential archive is also stored back at the same remote server. After the archival is done the archives are renamed and 5 of them in total are kept. backup1.lar.gz will be the newest archive and backup5.lar.gz will be the oldest. To see what 5 differential archives (each based on the previous 2 archives) mean, you can run "lar -C 2,4" which will give you the following output: If your differential archives are based on the last 2 archives (-d 2) and you keep a total of 5 archives, then you should expect to have 3 recoverable archives. Archives older than the last 3 will not have all their base archives available and you will therefore be unable to extract them. You should expect that all 5 archives together will take about the same space as 1.7 full size (non-differential) archives, but in some cases they will take up about the same space as 2 full size archives. Verbose output: 3% (input=134B output=111.74MiB) ./readme.txt (regular file) [NNNHP] | | | | | | | | Data written to stdout | File type | | | Current filename | | Data read from stdin | | Current file's blocks (N=new, P=previously seen in Percentage of files done current archive, H=previously seen in base archive) Copyright 2019-2020 Tritonio (www.inshame.com) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License version 3 as published by the Free Software Foundation. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. If you have not received a copy of the GNU General Public License along with this program then see https://www.gnu.org/licenses/gpl-3.0.txt or download it using BitTorrent: magnet:?xt=urn:btih:ftlm2r4zr3eepxypisejceebq7gx3wn7 ]==]) end do local last=0 function timehaspassed() local now=os.time() if now-last>=1 then last=now return true else return false end end end function round(n,d) d=d or 0 return math.floor(0.5+n*10^d)/10^d end function formatbytes(bytes) if bytes>=1024^4 then return round(bytes/1024^4,2).."TiB" elseif bytes>=1024^3 then return round(bytes/1024^3,2).."GiB" elseif bytes>=1024^2 then return round(bytes/1024^2,2).."MiB" elseif bytes>=1024 then return round(bytes/1024,2).."KiB" else return bytes.."B" end end function getext(filename) return string.upper(string.match(filename,"[^/]%.([^/%.]+)$") or "") end function stripdirs(filename) return string.upper(string.match(filename,"([^/]*)$")) end function getdepth(filename) return #string.gsub(filename,"[^/]+","") end function parselarheader(instream) local magic=instream:read(3) assert(magic,"Not enough data. Did you forget to pipe some file to stdin?") assert(magic=="LAR","Not a Liquid Archive.") local archiveversion=readnumber(instream) assert(archiveversion==FORMAT_VERSION,"Unsupported Liquid Archive version: "..archiveversion) end do local counters={} function count(what,howmany) counters[what]=(counters[what] or 0)+howmany end function getcounters() local str={} for i,v in pairs(counters) do table.insert(str,i.."="..formatbytes(v)) end return table.concat(str," ") end end function reallyread(instream,size,couldbeless) local data="" while #data%?\\|"..(nospaces and "" or " \x0A\x0D\t").."]",function (badchar) return "~"..hex[badchar].."~" end) textual=string.gsub(textual,"~([^~])~",hex) return string.gsub(textual,"~~","") end function fromtextual(text) return string.gsub(text,"~(%x+)~",function (hexword) return string.gsub(hexword,"%x%x",function (hh) return string.char(tonumber(hh,16)) end) end) end assert(fromtextual(totextual("la r"))=="la r") assert(fromtextual(totextual("\0la\nr~a\0a\0a\0aaa\0aa\0"))=="\0la\nr~a\0a\0a\0aaa\0aa\0") assert(fromtextual(totextual("la r",true))=="la r") assert(fromtextual(totextual("\0la\nr~a\0a\0a\0aaa\0aa\0",true))=="\0la\nr~a\0a\0a\0aaa\0aa\0") end function multimatches(str,patterns) for pattern in string.gmatch(patterns,"[^|]+") do if string.match(str,pattern) then return true end end return false end function exploitcheck(filename) if string.match(filename,"^%.%.") or string.match(filename,"^/") or string.match(filename,"/%.%.[%W ]") or string.match(filename,"//") then error("Invalid filename: "..filename..' Filenames must be not be absolute and must not contain parent directories (i.e. "..") and double slashes.') end end function escapequotes(filename) return string.gsub(filename,"'","'\"'\"'") end function parent(filename) return string.match(filename,"^(.-)/[^/]*$") end --------------------- --- Data encoding --- --------------------- function writedatapacket(data,outstream,textual,nospaces) data=textual and totextual(data,nospaces) or data outstream:write(#data..">") outstream:write(data) end function writenumber(number,outstream) outstream:write(tonumber(number).."|") end function readnumber(instream) --io.stderr:write("RN"..instream:seek().."\n") local n=tonumber(instream:read("*n")) assert(n,"Corrupt archive. Unexpected non-numerical data.") assert(instream:read(1)=="|","Corrupt archive. Unexpected number separator.") return n end function readdatapacket(instream,textual) --io.stderr:write("RDP"..instream:seek().."\n") local size=tonumber(instream:read("*n")) assert(size,"Corrupt archive. Missing data packet size.") assert(instream:read(1)==">","Corrupt archive. Unexpected data packet separator.") local somedata=reallyread(instream,size) return textual and fromtextual(somedata) or somedata end -------------------------- --- Modes of operation --- -------------------------- function create(dir,instream,outstream,include,exclude,verbose,textual,nospaces,differential,blocksize,selfsync) if verbose then io.stderr:write("Adding what matches \""..include.."\" and does not match \""..exclude.."\"...\n") end textual=textual or nospaces --nospaces implies textual outstream:write("LAR") writenumber(FORMAT_VERSION,outstream) for i=1,differential do if verbose then io.stderr:write("("..getcounters()..") Reading blocks from base archive number "..i.."...\n") end parselarheader(instream) while not witnessstream(instream,true) do if verbose and timehaspassed() then io.stderr:write("("..getcounters()..") Still reading blocks from base archive number "..i.."...\n") end end end local filenames=listfiles(dir,include,exclude,verbose) if verbose then io.stderr:write("("..getcounters()..") Creating archive...\n") end for i,filename in ipairs(filenames) do addfile(filename,outstream,textual,nospaces,round(i/#filenames*100),blocksize,selfsync) end flushcommandbuffer() outstream:write(".") end function extract(instream,include,exclude,verbose,dryrun,differential,thinoutputstream,force) if verbose then if not thinoutputstream then io.stderr:write("Extracting "..(dryrun and "(dry run) " or "").."what matches \""..include.."\" and does not match \""..exclude.."\"...\n") if force then io.stderr:write("Forcing extraction of files even if some of their blocks are missing...\n") end else io.stderr:write("Hashing blocks in input to create a thin version in output...\n") end end for i=1,differential do if verbose then io.stderr:write("("..getcounters()..") Reading blocks from base archive number "..i.."...\n") end parselarheader(instream) while not witnessstream(instream,dryrun) do if verbose and timehaspassed() then io.stderr:write("("..getcounters()..") Still reading blocks from base archive number "..i.."...\n") end end end if verbose then io.stderr:write("("..getcounters()..") Reading archive...\n") end parselarheader(instream) if thinoutputstream then thinoutputstream:write("LAR") writenumber(FORMAT_VERSION,thinoutputstream) end local commandbuffer={} while true do local stop,delayedcommand=executestream(instream,include,exclude,dryrun,thinoutputstream,force) if stop then break end table.insert(commandbuffer,delayedcommand) end if thinoutputstream then thinoutputstream:write(".") end if verbose then io.stderr:write("Applying "..(dryrun and "(dry run) " or "").."permissions, modification dates etc...\n") end table.sort(commandbuffer,function (a,b) return a[1]>a[1] end) for _,command in ipairs(commandbuffer) do command[2]() end end do local hashedblocks={} local hbc=0 function witnessstream(instream,justlook) local chunktype=instream:read(1) if not chunktype then error("Unexpected end of archive.") elseif chunktype=="T" or chunktype=="B" then local blockdata=readdatapacket(instream,chunktype=="T") local ha=hash(blockdata) local fn=blockdir.."/w"..hbc hbc=hbc+1 hashedblocks[ha]=fn if not justlook then local bh=io.open(fn,"wb") bh:write(blockdata) bh:close() end elseif chunktype=="t" then local ha=readdatapacket(instream,false) hbc=hbc+1 hashedblocks[ha]=true if not justlook then error("You are trying to extract a differential archive but instead of passing the actual base archives in the input you are passing the thin versions of them created by \"lar -H\". The actual data on the base archives is needed so you should pass the actual base archives.") end elseif chunktype=="H" then readdatapacket(instream,true) elseif chunktype=="D" then readdatapacket(instream,true) readnumber(instream) readnumber(instream) elseif chunktype=="P" then readdatapacket(instream,true) readnumber(instream) readnumber(instream) elseif chunktype=="S" then readdatapacket(instream,true) readnumber(instream) readdatapacket(instream,true) elseif chunktype=="F" then readdatapacket(instream,true) readnumber(instream) readnumber(instream) local blockcount=readnumber(instream) for i=1,blockcount do readnumber(instream) end elseif chunktype=="." then return true else error("Corrupt archive. Unknown chunk type: "..chunktype) end end function iswitnessedblock(ha) return not not hashedblocks[ha] end function getwitnessedblock(ha,force) local bfn=hashedblocks[ha] if bfn then local bh=io.open(bfn,"rb") local blockdata=bh:read("*a") bh:close() return blockdata,bfn else if force then return "",false else error("Unwitnessed hashed block. You are trying to extract a differential archive but you are probably missing one of its base archives.") end end end end do local blocks={} function printfileinfoline(filename,ftype,dontprintcounters) io.stderr:write((dontprintcounters and "" or ("("..getcounters()..") "))..filename.." ("..ftype..")\n") end function executestream(instream,include,exclude,dryrun,thinoutputstream,force) local chunktype=instream:read(1) if not chunktype then error("Unexpected end of archive.") elseif chunktype=="T" or chunktype=="B" then local blockdata=readdatapacket(instream,chunktype=="T") local blockid=#blocks+1 local block={tmp=true,location=blockdir.."/"..blockid,offset=0,size=#blockdata} if thinoutputstream then thinoutputstream:write("t") writedatapacket(hash(blockdata),thinoutputstream,false,false) else if not dryrun then local bh=io.open(blockdir.."/"..blockid,"wb") bh:write(blockdata) bh:close() end end table.insert(blocks,block) if verbose and timehaspassed() then io.stderr:write("("..getcounters()..") Still reading archive...\n") end elseif chunktype=="H" then local ha=readdatapacket(instream,true) if not thinoutputstream then local blockdata,location if not dryrun then blockdata,location=getwitnessedblock(ha,force) end local blockid=#blocks+1 local block={tmp=true,location=location,offset=0,size=not dryrun and #blockdata} table.insert(blocks,block) if verbose and timehaspassed() then io.stderr:write("("..getcounters()..") Still reading archive...\n") end end elseif chunktype=="t" then error("You are trying to extract or list the contents of a thin archive created with the -H option. Thin archives do not contain data so they can only be used instead of base archives when creating differential archives.") elseif chunktype=="D" then local filename=readdatapacket(instream,true) exploitcheck(filename) local attrs=readnumber(instream) local modtimestamp=readnumber(instream) if not thinoutputstream then if multimatches(filename,exclude) or not multimatches(filename,include) then return end if not dryrun then mkdir(filename) end if verbose or dryrun then printfileinfoline(filename,"directory",dryrun and not verbose) end return false,{getdepth(filename),function () if not dryrun then setattrs(filename,attrs) settimestamp(filename,modtimestamp) end end} end elseif chunktype=="P" then local filename=readdatapacket(instream,true) exploitcheck(filename) local attrs=readnumber(instream) local modtimestamp=readnumber(instream) if not thinoutputstream then if multimatches(filename,exclude) or not multimatches(filename,include) then return end if not dryrun then mkdir(parent(filename)) mkfifo(filename) end if verbose or dryrun then printfileinfoline(filename,"fifo",dryrun and not verbose) end return false,{getdepth(filename),function () if not dryrun then setattrs(filename,attrs) settimestamp(filename,modtimestamp) end end} end elseif chunktype=="S" then local filename=readdatapacket(instream,true) exploitcheck(filename) local modtimestamp=readnumber(instream) local target=readdatapacket(instream,true) if not thinoutputstream then if multimatches(filename,exclude) or not multimatches(filename,include) then return end if not dryrun then mkdir(parent(filename)) end if verbose or dryrun then printfileinfoline(filename,"symbolic link",dryrun and not verbose) end return false,{getdepth(filename),function () if not dryrun then mksymlink(filename,target) settimestamp(filename,modtimestamp) end end} end elseif chunktype=="F" then local filename=readdatapacket(instream,true) exploitcheck(filename) local attrs=readnumber(instream) local modtimestamp=readnumber(instream) local blockcount=readnumber(instream) if thinoutputstream or multimatches(filename,exclude) or not multimatches(filename,include) then for i=1,blockcount do readnumber(instream) end return end if not dryrun then mkdir(parent(filename)) local fh=io.open(filename,"wb") local fhoffset=0 for i=1,blockcount do local blockid=readnumber(instream) local block=blocks[blockid] if not block then error("Corrupt archive. Invalid block id.") else if block.location~=false then local bh=io.open(block.location,"rb") bh:seek("set",block.offset) local lastoffset=fh:seek() local blockdata=reallyread(bh,block.size,true) fh:write(blockdata) bh:close() if block.tmp then deletefile(block.location) fh:flush() --flush otherwise we may not find the block later in the new location block.location=filename block.offset=lastoffset block.tmp=false end end end end fh:close() else for i=1,blockcount do readnumber(instream) end end if verbose or dryrun then printfileinfoline(filename,"regular file",dryrun and not verbose) end return false,{getdepth(filename),function () if not dryrun then setattrs(filename,attrs) settimestamp(filename,modtimestamp) end end} elseif chunktype=="." then return true else error("Corrupt archive. Unknown chunk type.") end end end do local known={} local hasharray={} local commandbuffer={} function addfile(filename,outstream,textual,nospaces,progress,blocksize,selfsync) local fileinfo=getfileinfo(filename) if not fileinfo then if verbose then io.stderr:write("File disappeared: "..filename.."\n") end return end fileinfostring=progress.."% ("..getcounters()..")".." "..filename.." ("..(fileinfo.type or "unknown type")..") [" if fileinfo.type=="regular file" or fileinfo.type=="regular empty file" then if verbose then io.stderr:write(fileinfostring) end local fh=io.open(filename,"rb") if not fh then io.stderr:write((verbose and "]\n" or "").."File disappeared: "..filename.."\n") return end local fileblockindices={} local leftovers="" --[[ function morph(t) local r={} for l in string.gmatch(t,".") do table.insert(r,string.byte(l)) end return table.concat(r,",") end --]] while 1 do local buf if selfsync and multimatches(filename,selfsync) then buf=reallyread(fh,math.floor(blocksize*1.25)-#leftovers,true) if not buf and leftovers=="" then break end buf=leftovers..(buf or "") local best,winner=string.rep("\0",17),#buf --was +1 for i=math.floor(0.66*blocksize),math.min(#buf,math.floor(1.34*blocksize)) do local candidate=string.sub(buf,i-15,i) if #candidate==16 and ((candidate>=best and (best>cut or cut>=candidate)) or (candidate<=cut and best>cut)) then best=candidate winner=i end end buf,leftovers=string.sub(buf,1,winner+#best),string.sub(buf,winner+#best+1) --io.stderr:write(morph(best).."|"..#buf.."\n") --string.sub(buf,-16) else buf=reallyread(fh,blocksize,true) if not buf then break end end local bufhash=hash(buf) if not known[bufhash] then totalblocks=totalblocks+1 if #bufhash==HASH_SIZE and iswitnessedblock(bufhash) then if verbose then io.stderr:write("H") end outstream:write("H") writedatapacket(bufhash,outstream,true,nospaces) saved=saved+#buf-#bufhash else if verbose then io.stderr:write("N") end if textual then outstream:write("T") writedatapacket(buf,outstream,true,nospaces) else outstream:write("B") writedatapacket(buf,outstream,false,nospaces) end end table.insert(hasharray,bufhash) known[bufhash]=#hasharray else if verbose then io.stderr:write("P") end saved=saved+#buf end total=total+#buf table.insert(fileblockindices,known[bufhash]) end fh:close() table.insert(commandbuffer,function () outstream:write("F") writedatapacket(filename,outstream,true,nospaces) writenumber(fileinfo.attrs,outstream) writenumber(fileinfo.modts,outstream) writenumber(#fileblockindices,outstream) for i,blockhash in ipairs(fileblockindices) do writenumber(blockhash,outstream) end end) elseif fileinfo.type=="directory" then if verbose then io.stderr:write(fileinfostring) end table.insert(commandbuffer,function () outstream:write("D") writedatapacket(filename,outstream,true,nospaces) writenumber(fileinfo.attrs,outstream) writenumber(fileinfo.modts,outstream) end) elseif fileinfo.type=="fifo" then if verbose then io.stderr:write(fileinfostring) end table.insert(commandbuffer,function () outstream:write("P") writedatapacket(filename,outstream,true,nospaces) writenumber(fileinfo.attrs,outstream) writenumber(fileinfo.modts,outstream) end) elseif fileinfo.type=="symbolic link" then if verbose then io.stderr:write(fileinfostring) end local symbolictarget=getsymbolictarget(filename) if not symbolictarget then io.stderr:write((verbose and "]\n" or "").."File disappeared: "..filename.."\n") return end table.insert(commandbuffer,function () outstream:write("S") writedatapacket(filename,outstream,true,nospaces) writenumber(fileinfo.modts,outstream) writedatapacket(symbolictarget,outstream,true,nospaces) end) end if verbose then io.stderr:write("]\n") end if #commandbuffer>=10000 then flushcommandbuffer() end end function flushcommandbuffer() for _,command in ipairs(commandbuffer) do command() end commandbuffer={} end end --------------------- --- OS / external --- --------------------- function getfileinfo(filename) local p=io.popen("stat -c '%a|%Y|%F' '"..escapequotes(filename).."'") if p then local infoline=p:read("*l") if not infoline or infoline=="" then return false end local attrs,modts,typ=string.match(infoline,"([^|]+)|([^|]+)|([^|]+)") p:close() return {attrs=attrs,modts=modts,type=typ} else return false end end --[[ function getowner(filename) local p=io.popen("stat -c %u '"..escapequotes(filename).."'") local owner=p:read("*l") p:close() return owner end function getattrs(filename) local p=io.popen("stat -c %a '"..escapequotes(filename).."'") local attrs=p:read("*l") p:close() return attrs end function getmodtimestamp(filename) local p=io.popen("stat -c %Y '"..escapequotes(filename).."'") local modts=p:read("*l") p:close() return modts end function getfiletype(filename) local p=io.popen("stat -c %F '"..escapequotes(filename).."'") local typ=p:read("*l") p:close() return typ end --]] function getsymbolictarget(filename) local p=io.popen("readlink '"..escapequotes(filename).."'") local target=p:read("*l") p:close() return target end function settimestamp(filename,modtimestamp) os.execute("touch -h --date=@"..modtimestamp.." '"..escapequotes(filename).."'") end function setattrs(filename,attrs) os.execute("chmod "..attrs.." '"..escapequotes(filename).."'") end function mkdir(filename) if filename then os.execute("mkdir -p '"..escapequotes(filename).."'") end end function deletefile(filename) os.execute("rm "..escapequotes(filename)) end function deletedir(filename) os.execute("rm -rf "..escapequotes(filename)) end function mksymlink(filename,target) os.execute("ln -s '"..escapequotes(target).."' '"..escapequotes(filename).."'") end function mkfifo(filename) os.execute("mkfifo '"..escapequotes(filename).."'") end function listfiles(dir,include,exclude,verbose) if verbose then io.stderr:write("Listing files...\n") end local p=io.popen("find '"..escapequotes(dir).."'") local allfiles={} while true do local filename=p:read("*l") if not filename then break end if not multimatches(filename,exclude) and multimatches(filename,include) then table.insert(allfiles,filename) end end p:close() if verbose then io.stderr:write("Sorting "..#allfiles.." filenames...\n") end table.sort(allfiles,function (a,b) local exta,extb=getext(a),getext(b) if exta==extb then local fna,fnb=stripdirs(a),stripdirs(b) if fna==fnb then return a0),"-B must be followed by a positive integer") args.B=tonumber(args.B or 1024*1024) mkfifo(fifofn) mustremovefifo=true mustshowinfo=true exploitcheck(args.b) create(args.b,io.stdin,io.stdout,args.i,args.e,args.v,args.p,args.P,args.d,args.B,args.s) elseif args.m then assert(not args.f and not args.b and not args.B and not args.s and args.i=="." and args.e=="/////" and not args.p and not args.P and args.d==0,"You cannot set -b, -B, -s, -i , -e, -p, -P, -f nor -d when you are creating an empty archive with -m.") args.b="/dev/null" mkfifo(fifofn) mustremovefifo=true mustshowinfo=true create(args.b,io.stdin,io.stdout,args.i,args.e,args.v,args.p,args.P,args.d) elseif args.x then assert(not args.b and not args.B and not args.s and not args.p and not args.P,"Command line option -x cannot be combined with -p, -B, -s, -P nor -b.") mkfifo(fifofn) mustremovefifo=true mkdir(blockdir) mustremoveblockdir=true extract(io.stdin,args.i,args.e,args.v,false,args.d,false,args.f) elseif args.l then assert(not args.b and not args.B and not args.s and not args.p and not args.f and not args.P,"Command line option -l cannot be combined with -p, -B, -s, -P, -f nor -b.") mkfifo(fifofn) mustremovefifo=true extract(io.stdin,args.i,args.e,args.v,true,args.d,false,false) elseif args.C then assert(not args.f and not args.b and not args.p and not args.P and args.d==0 and not args.s and not args.B,"Command line option -C cannot be combined with -p, -B, -s, -P, -f, -b nor -d.") local n,t=string.match(args.C,"^(%d+)%s*,%s*(%d+)$") n,t=tonumber(n),tonumber(t) assert(n and t and n%1==0 and t%1==0,"N and T (after -C) must be integers.") local avg=round(t/(n+1),1) local max=math.ceil(t/(n+1)) assert(n