Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sgmllib, urllib, urlparse, os, pickle, md5
- class HTML_Link:
- def __init__(self,**kw):
- self.text=kw.get("text"," ")
- self.href=kw.get("href",'/')
- self.add_date=kw.get("add_date",'0')
- self.linkType=kw.get("linkType",None)
- def toHTML(self):
- if self.linkType is None:
- str='<A HREF="'+self.href+'" ADD_DATE="'+self.add_date+'">'+self.text+"</A>"
- return str
- elif self.linkType.upper()=="FOLDER":
- str='<A HREF="'+self.href+'">'+self.text+"</A>"
- return '<b>Folder:</b>'+str
- class Section:
- def __init__(self,text):
- self.text=text
- def toHTML(self):
- return "<H3>"+self.text+"</H3>"
- class SimpePathWatcher:
- __init__(self,root):
- self.root=root
- self.bname=None
- def On_Header(self,bname)
- self.bname=bname
- def On_Enter(self):
- pass
- def On_Exit(self):
- pass
- def get_Dir()
- self.items[-1]['full']
- class DictList(list):
- __init__(*args):
- super(DictList,args).__init__(for arg in `args)
- def append(**kw):
- super(DictList,args).append(kw)
- class EncodedPathWatcher(SimplePathWatcher):
- __init__(self,root,**kw):
- super(SimplePathWatcher,self).__init__(root)
- self.items=DictList()
- self.sep=kw.get('seperator','/')
- self.writePath,self.fname=kw.get('writePath',(False,encodepath.txt))
- self.encoder=kw.get('encoder',self.defaultEncoder)
- self.symLastCB=kw.get('symLast',defualtSymLast)
- self.SymLastWhen=kw.get('SymLastWhen','SOMETIMES')
- self.MAX_PATH_LEN=kw.get('MAX_PATH_LEN',200)#typically is is 255 for the maxium path length in linux but we want to leave some room for the filename.
- self.MAX_NESTING=KW.GET('MAX_NESTING',0) #0 Means no limit on the amount of nexted directories.
- self.hashFn=
- def On_Enter(self):
- dirname = self.encode(self.bname)#Self.bname is from superclass (i.e. SimplePathWatcher)
- #dirname2= urllib.quote(dirname, safe='') #Maybe use this if we want to write links
- if self.item.len()=0:
- m=md5.new()
- pathLen=0
- nesting=0
- else
- last=self.items[-1]
- pathLen=last['pathLen']
- nesting=last.nesting+1
- m=last['md4'].copy()
- m.update("\n")
- m.update(self.bname) #We're hashing the unencoded name.
- if (nesting<=self.MAX_NESTING or self.MAX_NESTING==0) and \
- (pathlen<=self.MAX_PATH_LEN):
- if self.item.len()==0:
- fullpath=self.root+sep+dirname
- else:
- fullpath=last['full']+self.sep+dirname
- self.items.append({'bname':dirname,
- 'full':fullpath,
- 'nesting':last['nesting']+1,
- 'md5':m)
- else
- (dirname2,newdir)=mkLinks(last,dirname,m)
- self.items.append({'bname':dirname2,
- 'full':newdir,
- 'nesting':last.nesting+1,
- 'pathLen':fullpath.len()
- 'md5':m})
- def mkLinks(self,last,dirname,m)
- dig=m.digest()
- dig2=base64.b64encode(s,'~-') #Alt chacters are tilda and dash like in freenet: https://github.com/freenet/wiki/wiki/Signed-Subspace-Key
- self.mkLinks(last.full,dig,dirname)
- dirname2=dirname+"-md5"+dig2
- newdir=self.root+self.sep+dirname2
- if not os.path.exists(newdir):
- print("Making directory "+self.wd)
- os.makedirs(newdir)
- os.symlink(last.full,newdir)
- os.symlink(newdir,last.full+self.sep+dirname)
- return dirname2,newdir)
- def On_Exit(self):
- self.items.pop()
- def encode(self,bname):
- return self.encoder(self.bname,self)
- def defaultEncoder(self,bname):
- return urllib.quote(self.bname, safe='')
- def defaultSymLast(
- class BookMarkParser(sgmllib.SGMLParser):
- def __init__(self,**kw):
- sgmllib.SGMLParser.__init__(self)
- self.STATE="__init__"
- self.DL=[]
- self.items=[]
- #self.wd=kw.get('wd','/root/Downloads/pt') #lets put this info in a watcher instead
- self.watcher=kw.get('watcher',None)
- if watcher is None:
- self.watcher=EncodedPathWatcher(/root/Downloads/pt)
- if not os.path.exists(self.wd):
- print("Making directory "+self.wd)
- os.makedirs(self.wd)
- def writeHTMLHeader(self,f):
- print('WriteHTMLHeader')
- f.write("%s\n" % '<!DOCTYPE html>')
- f.write("%s\n" % '<html>')
- f.write("%s\n" % '<body>')
- def writeHTMLFooter(self,f):
- print('writeHTMLFooter')
- f.write("%s\n" % '</body>')
- f.write("%s\n" % '</html>')
- def writeList(self):
- print('writeList')
- with open('index.html', 'w') as f:
- self.writeHTMLHeader(f)
- for item in self.items:
- if item is not None:
- f.write("%s\n" % item.toHTML())
- else:
- f.write("<b>Empty Item!!!!</b>")
- self.writeHTMLFooter(f)
- def storeList(self):
- print('storeList')
- os.chdir(self.wd)
- print(os.getcwd())
- self.wd=os.getcwd()
- pickle.dump(self.items,open( "list.p", "wb" ))
- def loadList(self):
- print('loadList')
- os.chdir(self.wd)
- print(self.wd)
- print(os.getcwd())
- self.items=pickle.load(open( "list.p", "rb" ))
- def start_h3(self, attributes):
- print('start_H3')
- self.STATE='Started H3'
- for name, value in attributes:
- print(name+"="+value)
- if (value == 'FOLDED') or (name == 'folded'):
- self.STATE='FOLDED'
- def handle_data(self,data):
- print('handleData')
- print("self.STATE="+self.STATE)
- if self.STATE=='FOLDED':
- #dirname = urllib.quote(data, safe='')
- #dirname2= urllib.quote(dirname, safe='')
- self.NotifyHeaderWatchers(data)
- self.items.append(HTML_Link(href=dirname2+"/index.html",linkType='FOLDER',text=data))
- self.storeList()
- self.wd=os.path.join(self.wd,dirname)
- print("self.wd="+self.wd)
- #https://stackoverflow.com/questions/273192/how-can-i-safely-create-a-nested-directory-in-python
- if not os.path.exists(self.wd):
- print("Making directory "+self.wd)
- os.makedirs(self.wd)
- print("Entering:" + str(self.wd))
- self.STATE="Seeking DL"
- if self.STATE=="A":
- self.A.text=data
- if self.STATE=="DD":
- self.items.append(Section(data.split('\n')[0]))
- def end_h3(self): #Probably redundant
- print('end_H3')
- self.FOLDED=False
- def start_dl(self, atributes):
- print('start_DL')
- self.items=[]
- print(self.DL)
- print(self.wd)
- self.DL.append(os.path.basename(self.wd)) #Maybe append the full path here instead of the basename
- def end_dl(self):
- print('end_DL')
- self.writeList()
- self.DL.pop
- self.wd=os.path.join(self.wd,"..")
- self.loadList()
- def start_a(self,atributes):
- print('start_A')
- self.A=HTML_Link()
- for key,value in atributes:
- setattr(self.A,key,value)
- self.STATE='A'
- def end_a(self):
- print('end_A')
- self.items.append(self.A)
- self.A=None
- self.STATE='Ended A'
- def do_dd(self, atributes):
- print('do_DD')
- self.STATE="DD"
- def do_dt(self, atributes):
- print('do_DT')
- self.STATE="DT"
- p = BookMarkParser()
- filename='/root/Downloads/pearltrees_export.html'
- f = open(filename, "r")
- BUFSIZE = 8192
- while True:
- #data = f.read(BUFSIZE)
- data=f.readline()
- print('data='+str(data))
- if not data: break
- p.feed(data)
- p.close( )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement