Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/node
- //
- //mock file index design
- ({
- stat:{},
- '/':{}
- })
- //
- fs=require('fs');
- child_process=require('child_process');
- var argsRules = {
- '--':true,
- boolean:[
- 'help',
- 'verbose',
- 'deep',
- 'fast',
- 'careless',
- 'interactive',
- 'halfinteractive',
- 'all',
- 'simulate',
- 'export',
- 'init',
- 'md5sumcorrupted',
- 'checkindex',
- 'forceupdate',
- 'legacyconvert',
- 'backup'
- ],
- string:[
- 'mountpoint',
- 'indexdir',
- 'importcorrupted'
- ],
- alias:{
- help:['h'],
- verbose:['v'],
- deep:['d'],
- fast:['f'],
- careless:['L'],
- interactive:['i'],
- halfinteractive:['I'],
- all:['a'],
- simulate:['s'],
- export:['e'],
- importcorrupted:['import'],
- init:['g'],
- backup:['b'],
- mountpoint:['mnt','M']
- },
- description:{
- help:'show this help',
- verbose:'be verbose',
- deep:'force md5 recheck',
- fast:'force md5 skip',
- careless:'don\'t recheck md5 if only less important stat values changed (uid,gid,permissions)',
- interactive:'enable interactive mode',
- halfinteractive:'enable interactive mode only if index changed',
- mountpoint:'specify default drives mountpoint',
- indexdir:'specify default index dir prefix',
- all:'scan default drives mount dir searching for all available indexes',
- simulate:'enable simulation mode (do modify/write any files)',
- export:'export md5 hashes in md5sum -c readable format for faster validation than --deep',
- importcorrupted:'import list of files for force md5 validation',
- md5sumcorrupted:'read importcorrupted file as md5sum -c output',
- init:'create new index from scratch',
- checkindex:'Validate index sha512 before parsing',
- forceupdate:'update extindex file even if there were no changes noted',
- legacyconvert:'read file in legacy mode (do not expect first 128 bytes to be sha512)',
- backup:'number of backup copies to keep'
- },
- default:{
- verbose:true,
- halfinteractive:true,
- mountpoint:'/dmnt',
- indexdir:'/etc/md5index',
- checkindex:true,
- backup:3
- }
- }
- args=require('/usr/lib/node_modules/minimist/')(process.argv.slice(2),argsRules);
- (function(){
- var validArgs=[].concat.apply(['--','_'],argsRules.boolean.concat(argsRules.string).map(function(key){return [key].concat((key in argsRules.alias)?argsRules.alias[key]:[])})),
- invalidArgs=[];
- Object.keys(args).forEach(function(key){validArgs.indexOf(key)==-1&&invalidArgs.push(key)})
- if (invalidArgs.length){
- console.error('invalidArgs: \n'+invalidArgs.join('\n'));
- process.exit(7);
- }
- })()
- if (args.help){
- var keys = Object.keys(argsRules.description),white = '',
- args = Object.keys(argsRules.description).map(function(arg,i,arr){return ' '+((arg.length>1?'--':'-')+arg+', '+(argsRules.alias[arg]?(argsRules.alias[arg].map(function(a){return (a.length>1?'--':'-')+a}).join(', ')):''))+' '});
- args.forEach(function(a){if (a.length>white.length) white=a.split('').map(function(){return ' '}).join('')})
- console.log('\n'+args.map(function(a,i){return a+white.substr(a.length)+argsRules.description[keys[i]]+((keys[i] in argsRules.default)?(' [DEFAULT:'+argsRules.default[keys[i]]+']'):'')}).join('\n'));
- process.exit(0);
- }
- if (args.init)
- args.forceupdate=true;
- if (args.legacyconvert){
- args.checkindex=false,
- args.forceupdate=true;
- }
- errors=[];
- global.missing={
- file:[],
- index:[]
- }
- global.diff=[]
- indexes=(args.all?
- (!fs.existsSync(args.mountpoint)?(function(){throw 'Could not find default drives location'})():
- [].concat.apply([],fs.readdirSync(args.mountpoint).map(function(p){
- return args.mountpoint+'/'+p+'/'+args.indexdir;
- }).filter(function(p){
- return fs.existsSync(p)&&fs.statSync(p).isDirectory();
- }).map(function(p){
- return fs.readdirSync(p).filter(function(f){
- return f.match(/.*\.extindex$/)
- }).map(function(f){
- console.log('Found: ',p+'/'+f);
- return p+'/'+f
- })
- }))):
- args._).filter(function(p,i){
- if (args.init^(!fs.existsSync(p))){
- console.error('Index '+(args.init?'already exists':'does not exist')+' - aborting',p);
- process.exit(10);
- }
- if (args.init&&!fs.existsSync(args['--'][i])){
- console.error('Could not find base dir / source md5index for '+p)
- process.exit(13);
- }
- return true;
- }).map(function(p,i){
- if (!args.init){
- args.verbose&&console.log('Reading index... (',p,')');
- var bfile = fs.readFileSync(fs.realpathSync(p)),
- sha512 = bfile.slice(0,128),
- content = bfile.slice(128);
- var sha = child_process.spawnSync('sha512sum',[],{input:content})
- if (sha.error||sha.status){
- console.error(sha.error||'sha512sum exit status code: '+sha.status+' '+String(sha.stderr));
- process.exit(45);
- }
- if (String(sha.stdout).substr(0,128)!=sha512){
- if (args.checkindex){
- console.error('Index sha512 check failed - index is probably damaged - aborting\n',sha512,'\n',String(sha.stdout).substr(0,128))
- process.exit(14);
- } else
- console.warn('Index sha512sum check failed - index is probably damaged - forced ignore\n',sha512,'\n',String(sha.stdout).substr(0,128));
- }
- try{
- var pind = JSON.parse(args.legacyconvert?bfile:content),
- ostr = String(args.legacyconvert?bfile:content),
- str = JSON.stringify(pind,null,2);
- for (var i = 0, l = str.length ; i < l ; i++)
- if (str[i]!=ostr[i]){
- console.error('Soft bug occured - it\'s serious bug and probably classifies as node bug or linux memcache bug. Should be reported');
- throw ('Original string and reparsed don\'t match at '+i+' byte - system string conversion malfunction - abtorting')
- }
- if (!(typeof pind == 'object' && !Array.isArray(pind) && 'index' in pind))
- throw 'Invalid index file';
- var curpath = fs.realpathSync(p);
- if (pind.extindex!=curpath)
- console.warn('Index has been moved from its previous location - it may result in errors');
- pind.extindex=curpath;
- if (!pind.index){
- pind.index=[];
- for (var i = 0, l = pind.subindexnum ; i < l ; ++i)
- pind.index.push(pind.extindex+'.part'+i);
- pind.index=[].concat.apply([],pind.index.map(function(p,i){
- args.verbose&&console.log('Reading subindex... (',p,')');
- if (!fs.existsSync(p))
- throw 'Expected subindex does not exist';
- var bfile = fs.readFileSync(p),
- sha512 = bfile.slice(0,128),
- content = bfile.slice(128);
- var sha = child_process.spawnSync('sha512sum',[],{input:content});
- if(sha.error||sha.status)
- throw sha.error||'sha512sum exit status code for subindex '+i+': '+sha.status+' '+String(sha.stderr);
- if((sha=String(sha.stdout).substr(0,128))!=sha512){
- if (args.checkindex)
- throw 'Subindex sha512 check failed - subindex is probably damaged - aborting\n'+sha512+'\n'+sha;
- else
- console.warn('Subindex sha512 check failed - subindex is probably damaged - forced ignore\n'+sha512+'\n'+sha);
- }
- var sind = JSON.parse(content),
- ostr=String(content),
- str = JSON.stringify(sind,null,2);
- for (var i = 0, l = str.length; i < l ; i++)
- if (str[i]!=ostr[i]){
- console.error('Soft bug occured - it\'s serious bug and probably classifies as node bug or linux memcache bug. Should be reported');
- throw ('Original string and reparsed don\'t match at '+i+' byte - system string conversion malfunction - abtorting')
- }
- return sind;
- }));
- }
- return pind;
- } catch (e) {
- console.error('Could not read index - aborting',p,e);
- process.exit(11);
- }
- } else {
- args.verbose&&console.log('Generating index... (',p,')');
- p.match(/\.extindex$/)||((p+='.extindex')&&console.warn('files index should have .extindex extension - renaming'))
- if (fs.existsSync(p)){
- console.error('Index already exists - aborting',p);
- process.exit(12)
- }
- var dirbase=fs.statSync(args['--'][i]).isDirectory()
- fs.writeFileSync(p,'');
- var newIndex = {
- extindex:fs.realpathSync(p),
- md5index:dirbase?null:fs.realpathSync(args['--'][i]),
- base:dirbase?fs.realpathSync(args['--'][i]):null,
- index:dirbase?[]:String(fs.readFileSync(args['--'][i])).split('\n').filter(function(line,i){
- if (!line.match(/^[0-9a-f]{32} .+/)){
- console.warn('Malformed line: ('+i+') "'+line+'"')
- return false;
- }
- return true;
- }).map(function(line){
- return {
- md5:line.substr(0,32),
- path:line.substr(34),
- stat:null
- }
- }).filter(function(ent){
- if (!fs.existsSync(ent.path)){
- console.warn('File '+ent.path+' does not exist - skipped');
- return false;
- }
- return true;
- }).map(function(ent){
- ent.path=fs.realpathSync(ent.path);
- return ent;
- }).sort(function(a,b){
- return (a.path>b.path?1:(a.path<b.path?-1:0))
- }).map(function(ent,i,arr){
- args.verbose&&arr.length>100000&&(i%10000==0)&&console.log('[stat:] ',(i*100/arr.length).toPrecision(3)+'%');
- ent.stat=JSON.parse(JSON.stringify(fs.statSync(ent.path)))
- ent.path=ent.path.split('/').filter(function(s){return s!=''})
- return ent
- })
- };
- if (!newIndex.base){
- var ind=newIndex.index,
- longTree=ind[0].path.slice(0,-1);
- for (var i = 0, l = ind.length ; i < l ; i++){
- for (var j = 0, ll = ind[i].path.length-1 ; j < ll ; j++)
- if (longTree[j]!=ind[i].path[j])
- break;
- longTree=longTree.slice(0,j);
- }
- newIndex.base='/'+longTree.join('/');
- newIndex.index=ind.map(function(ent){
- return {
- stat:ent.stat,
- md5:ent.md5,
- path:ent.path.slice(longTree.length)
- }
- })
- }
- return newIndex;
- }
- }).filter(function validate(index){
- args.verbose&&console.log('Refreshing... (',index.extindex,')');
- //fetch current list of files
- var realindex = child_process.spawnSync('find',[index.base,'-type','f'],{cwd:'/'});
- if (realindex.error||realindex.status){
- console.error(realindex.error||('find exit status code: '+realindex.status))
- process.exit(40);
- }
- var strrealindex=String(realindex.stdout);
- if (strrealindex!=realindex.stdout){
- console.error('Soft bug occured - it\'s serious bug and probably classifies as node bug or linux memcache bug. Should be reported');
- throw ('Original string and reparsed don\'t match at '+i+' byte - system string conversion malfunction - abtorting')
- }
- realindex=strrealindex.split('\n').sort(function(a,b){
- return (a>b?1:(a<b?-1:0))
- }).filter(function(p){
- if (!fs.existsSync(p)){
- if (p!=''){
- console.error('Something went wrong - find returned non-existing file - aborting ',p)
- process.exit(41);
- }
- return false;
- }
- return true
- }).map(function(p){
- return p.split('/').filter(function(s){return s!=''})
- });
- var missingFile=[],
- missingIndex=[];
- var diff=[];
- //find missing files
- index.index=index.index.filter(function(ent){
- if (!fs.existsSync(index.base+'/'+ent.path.join('/')))
- return (missingFile.push(ent)&&false);
- return true;
- })
- //find missing index entries
- for (i=0,j=0,l=index.index.length,ll=realindex.length; i<l&&j<ll ;){
- var pi = index.base+'/'+index.index[i].path.join('/'),
- pr = '/'+realindex[j].join('/');
- if (pi==pr){
- i++;
- j++;
- } else if (pi>pr){
- missingIndex.push(pr)
- j++;
- } else { //(pi<pr)
- console.error('something went wrong - there should be no missing files at this step (possible BUG) - aborting');
- process.exit(60);
- }
- }
- if (j!=ll)
- for (;j<ll;j++)
- missingIndex.push('/'+realindex[j].join('/'));
- if (i!=l)
- for (;i<l;i++){
- console.error('something went wrong - there should be no missing files at this step (possible BUG) - aborting');
- process.exit(61);
- }
- //compare stat
- var md5queue=[];
- args.verbose&&console.log('Comparing stat...');
- for (var i = 0, l = index.index.length; i < l ; i++){
- args.verbose&&l>100000&&(i%10000==0)&&console.log('[stat:] ',(i*100/l).toPrecision(3)+'%');
- var ent=index.index[i],
- p=index.base+'/'+ent.path.join('/'),
- newStat=JSON.parse(JSON.stringify(fs.statSync(p))),
- oldStat=ent.stat;
- var d={};
- var cmpKeys=['mode','uid','gid','size','mtime'];
- for (var j=0,ll=cmpKeys.length;j<ll;j++)
- if (oldStat[cmpKeys[j]]!=newStat[cmpKeys[j]])
- d[cmpKeys[j]]=[oldStat[cmpKeys[j]],newStat[cmpKeys[j]]];
- if (Object.keys(d).length)
- diff.push({ent:ent,diff:d})
- var junkDiff=0;
- if (args.careless){
- ('gid' in d)&&junkDiff++;
- ('uid' in d)&&junkDiff++;
- ('mode' in d)&&junkDiff++;
- }
- if (args.deep||
- ((Object.keys(d).length>junkDiff||
- ent.md5=='########### skipped ############'||
- ent.md5=='############ ERROR #############')&&
- !args.fast))
- md5queue.push({ent:ent,diff:d});
- }
- if (args.importcorrupted){
- if (!fs.existsSync(args.importcorrupted))
- throw 'Could not find corrupted files list';
- var list = String(fs.readFileSync(args.importcorrupted)).split('\n').filter(function(p){
- return !args.md5sumcorrupted||p.match(/: FAILED$/);
- }).filter(function(p){
- return p.indexOf(index.base)==0
- }).map(function(p){
- return (args.md5sumcorrupted?p.replace(/: FAILED$/,''):p).split('/').filter(function(s){return s!=''}).slice(index.base.split('/').filter(function(s){return s!=''}).length)
- })
- var files = [].concat.apply([],list.map(function(p){
- var valid=index.index;
- for (var i = 0, l = p.length ; i < l ; i++)
- valid=valid.filter(function(ent){
- return ent.path[i]==p[i];
- })
- return valid;
- })).filter(function(e,i,arr){
- return i==arr.indexOf(e)
- });
- args.verbose&&console.log('Following files will be force reloaded in this index: ( '+index.base+' )\n'+files.map(function(ent){return ' '+ent.path.join('/')}).join('\n'));
- files.forEach(function(ent){
- if (md5queue.filter(function(cqueue){return cqueue.ent==ent}).length==0)
- md5queue.push({ent:ent,diff:{}});
- })
- }
- for (var i = 0, l = md5queue.length, md5queueSize=0 ; i < l ; i++)
- md5queueSize+=(md5queue[i].diff.size?md5queue[i].diff.size[1]:md5queue[i].ent.stat.size)
- for (var i = 0, l = md5queue.length, md5queueProgress=0 ; i < l ; i++){
- var cqueue=md5queue[i];
- args.verbose&&
- (l<500||i%50==0||(cqueue.diff.size?cqueue.diff.size[1]:cqueue.ent.stat.size)>50*1024*1024)&&
- console.log('[md5:] ',i+1+'/'+l,(md5queueProgress*100/md5queueSize).toPrecision(3)+'% '+(l<500?cqueue.ent.path.join('/'):''))
- var ret=child_process.spawnSync('md5sum',[index.base+'/'+cqueue.ent.path.join('/')],{cwd:'/'});
- if (ret.error||ret.status){
- console.error(ret.error||'md5sum exit status code: '+ret.status+' '+String(ret.stderr));
- process.exit(43);
- }
- ret=String(ret.stdout).replace('\n','').substr(0,32);
- if (ret!=cqueue.ent.md5){
- if (Object.keys(cqueue.diff).length==0)
- diff.push(cqueue);
- cqueue.diff.md5=[cqueue.ent.md5,ret];
- }
- md5queueProgress+=(cqueue.diff.size?cqueue.diff.size[1]:cqueue.ent.stat.size);
- }
- //apply diff
- //process.exit(0);
- diff.forEach(function(d){
- var keys=Object.keys(d.diff).filter(function(k){return k!='md5'});
- for (var i = 0, l = keys.length ; i < l ; i++)
- d.ent.stat[keys[i]]=d.diff[keys[i]][1];
- if (d.diff.md5)
- d.ent.md5=d.diff.md5[1];
- });
- //add new files to index
- if (missingIndex.length){
- args.verbose&&console.log('Updating extindex... ('+missingIndex.length+' new files)')
- var baselen=index.base.split('/').filter(function(b){return b!=''}).length
- for (var i = 0, l = missingIndex.length, missingSize=0 ; i < l ; i++)
- missingSize+=fs.statSync(missingIndex[i]).size;
- var missingProgress=0,l=missingIndex.length;
- missingIndex=missingIndex.map(function(p,i){
- var s=fs.statSync(p);
- args.verbose&&
- (l<500||i%50==0||s.size>50*1024*1024)&&
- console.log(args.fast?'[stat:] ':'[md5:] ',i+1+'/'+l,(missingProgress*100/missingSize).toPrecision(3)+'% '+(l<500?p:''));
- var ret='########### skipped ############ '+p
- if (!args.fast){
- ret=child_process.spawnSync('md5sum',[p],{cwd:'/'});
- if (ret.error||ret.status){
- console.error(ret.error||'md5sum exit status code: '+ret.status+' '+String(ret.stderr));
- //process.exit(44);
- ret='############ ERROR ############# '+p
- errors.push([ret.status,String(ret.stderr)]);
- } else
- ret=String(ret.stdout).replace('\n','');
- }
- missingProgress+=s.size;
- return {
- stat:s,
- md5:ret.substr(0,32),
- path:ret.substr(34).split('/').filter(function(s){return s!=''}).slice(baselen)
- }
- })
- index.index=index.index.concat(missingIndex).sort(function(a,b){
- var pa=a.path.join('/'),
- pb=b.path.join('/');
- return (pa>pb?1:(pa<pb?-1:0))
- });
- }
- missing.file=missing.file.concat(missingFile);
- missing.index=missing.index.concat(missingIndex);
- global.diff=global.diff.concat(diff);
- var now=Date.now(),
- basepath=index.extindex.replace(/\/[^\/]*$/,''),
- filename=index.extindex.replace(/.*\//,''),
- newbase=basepath+'/extindex.old/',
- newfile=newbase+filename;
- if ((args.forceupdate||!args.fast)&&!args.simulate){
- if (!fs.existsSync(newbase))
- fs.mkdirSync(newbase);
- if (missingFile.length){
- console.log('Creating missing list... ('+missingFile.length+' files missing)')
- fs.writeFileSync(newfile+'.'+now+'.missing',JSON.stringify(missingFile,null,2));
- }
- if (missingIndex.length){
- console.log('Creating new files list... ('+missingIndex.length+' new files)')
- fs.writeFileSync(newfile+'.'+now+'.new',JSON.stringify(missingIndex,null,2));
- }
- if (diff.length){
- console.log('Creating diff list... ('+diff.length+' files different)')
- fs.writeFileSync(newfile+'.'+now+'.diff',JSON.stringify(diff,null,2));
- }
- if (args.forceupdate||diff.length||missingIndex.length||missingFile.length){
- args.verbose&&console.log('[extindex:] ',index.extindex);
- var old = fs.readdirSync(newbase)
- .filter(function(p){return p.indexOf(filename)==0})
- .filter(function(p){return p.match(/\.old\.[0-9]+$/)});
- old.filter(function(p){return p.match(/[0-9]+$/)[0]>=args.backup}).forEach(function(p){fs.unlinkSync(newbase+p)});
- if (args.backup){
- for (var b = args.backup-1 ; b > 0 ; b--)
- old.filter(function(p){return p.match(/[0-9]+$/)[0]==b}).forEach(function(p){fs.renameSync(newbase+p,newbase+p.replace(/[0-9]+$/,b+1))})
- fs.renameSync(index.extindex,newfile+'.old.1');
- for (var i = 0, l = index.subindexnum ; i < l ; i++)
- fs.renameSync(index.extindex+'.part'+i,newfile+'.part'+i+'.old.1');
- }
- subindexes=[];
- while (index.index.length>25000)
- subindexes.push(index.index.splice(0,25000));
- subindexes.push(index.index);
- if(subindexes.length>1){
- index.index=null;
- index.subindexnum=subindexes.length;
- } else {
- subindexes=[];
- index.subindexnum=0;
- }
- var filenames=[index.extindex],
- filecontents=[index];
- for (var i = 0, l = subindexes.length ; i < l ; i++){
- filenames.push(index.extindex+'.part'+i);
- filecontents.push(subindexes[i]);
- }
- for (var i = 0, l = filenames.length ; i < l ; i++){
- args.verbose&&console.log('[extindex:] part '+(i+1)+'/'+l);
- var content = JSON.stringify(filecontents[i],null,2),
- sha512;
- var sha = child_process.spawnSync('sha512sum',[],{input:content})
- if (sha.error||sha.status){
- console.error(sha.error||'sha512sum exit status code: '+sha.status+' '+String(sha.stderr));
- process.exit(45);
- }
- sha512 = String(sha.stdout).substr(0,128);
- try {
- fs.writeFileSync(filenames[i],sha512+content);
- var file = fs.readFileSync(filenames[i]);
- if (file!=(sha512+content))
- throw ':c';
- } catch (e) {
- console.error('Error occured during index write - retry')
- fs.writeFileSync(filenames[i],sha512+content);
- var file = fs.readFileSync(filenames[i]);
- if (file!=(sha512+content))
- throw 'Could not properly write index file - operation totally failed, all your data is lost and ur doomed :< - aborting';
- }
- }
- if (subindexes.length)
- index.index=[].concat.apply([],subindexes)
- }
- if (args.export){
- var md5indexpath=basepath+'/md5index/'+filename.replace(/\.extindex$/,'.md5index');
- args.verbose&&console.log('[md5index:] ',md5indexpath)
- if (!fs.existsSync(basepath+'/md5index'))
- fs.mkdirSync(basepath+'/md5index');
- if (fs.existsSync(md5indexpath+'.old'))
- fs.unlinkSync(md5indexpath+'.old');
- if (fs.existsSync(md5indexpath))
- fs.renameSync(md5indexpath,md5indexpath+'.old');
- fs.writeFileSync(md5indexpath,index.index.map(function(ent){
- return ent.md5+' '+index.base+'/'+ent.path.join('/');
- }).join('\n'))
- }
- } else
- args.verbose&&console.log((args.fast?'Fast':'Simulate')+' mode - log write skipped');
- return true;
- });
- Object.defineProperties(global,{
- shortdiff:{
- get:function(){
- return diff.map(function(d){
- return Object.keys(d.diff).sort().join(',') + ' -- ' + d.ent.path.join('/');
- })
- }
- },
- shortmissing:{
- get:function(){
- return missing.file.map(function(ent){
- return ent.path.join('/');
- })
- }
- },
- shortnew:{
- get:function(){
- return missing.index.map(function(ent){
- return ent.path.join('/');
- })
- }
- }
- });
- if (args.interactive||(args.halfinteractive&&missing.file.length+missing.index.length+diff.length>0))
- require('/usr/lib/node_modules/interactive/').start(this);
- else
- args.verbose&&console.log('Nothing to do - exiting');
- function buildExtendedTree(index){
- var extIndex={
- md5index:index.md5index,
- basePath:index.base,
- tree:null
- },
- tree={};
- (function buildDirTree(ind,tree){
- for (var i = 0, l = ind.length ; i<l ; i++){
- }
- })(index.index,tree);
- extIndex.tree=tree;
- return extIndex;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement