Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sgmllib, urllib, urlparse, os, pickle
- class HTML_Link:
- def __init__(self,**kw):
- self.text=kw.get("text"," ")
- self.href=kw.get("href",'/')
- self.add_date=kw.get("add_date",'0')
- self.linkType=kw.get("linkType",None)
- def toHTML(self):
- if self.linkType is None:
- str='<A HREF="'+self.href+'" ADD_DATE="'+self.add_date+'">'+self.text+"</A>"
- return str
- elif self.linkType.upper()=="FOLDER":
- str='<A HREF="'+self.href+'">'+self.text+"</A>"
- return '<b>Folder:</b>'+str
- class Section:
- def __init__(self,text):
- self.text=text
- def toHTML(self):
- return "<H3>"+self.text+"</H3>"
- class BookMarkParser(sgmllib.SGMLParser):
- def __init__(self,**kw):
- sgmllib.SGMLParser.__init__(self)
- self.STATE="__init__"
- self.DL=[]
- self.items=[]
- self.wd=kw.get('wd','/root/Downloads/pt')
- if not os.path.exists(self.wd):
- print("Making directory "+self.wd)
- os.makedirs(self.wd)
- def writeHTMLHeader(self,f):
- print('WriteHTMLHeader')
- f.write("%s\n" % '<!DOCTYPE html>')
- f.write("%s\n" % '<html>')
- f.write("%s\n" % '<body>')
- def writeHTMLFooter(self,f):
- print('writeHTMLFooter')
- f.write("%s\n" % '</body>')
- f.write("%s\n" % '</html>')
- def writeList(self):
- print('writeList')
- with open('index.html', 'w') as f:
- self.writeHTMLHeader(f)
- for item in self.items:
- if item is not None:
- f.write("%s\n" % item.toHTML())
- else:
- f.write("<b>Empty Item!!!!</b>")
- self.writeHTMLFooter(f)
- def storeList(self):
- print('storeList')
- os.chdir(self.wd)
- print(os.getcwd())
- self.wd=os.getcwd()
- pickle.dump(self.items,open( "list.p", "wb" ))
- def loadList(self):
- print('loadList')
- os.chdir(self.wd)
- print(self.wd)
- print(os.getcwd())
- self.items=pickle.load(open( "list.p", "rb" ))
- def start_h3(self, attributes):
- print('start_H3')
- self.STATE='Started H3'
- for name, value in attributes:
- print(name+"="+value)
- if (value == 'FOLDED') or (name == 'folded'):
- self.STATE='FOLDED'
- def handle_data(self,data):
- print('handleData')
- print("self.STATE="+self.STATE)
- if self.STATE=='FOLDED':
- dirname = urllib.quote(data, safe='')
- dirname2= urllib.quote(dirname, safe='')
- self.items.append(HTML_Link(href=dirname2+"/index.html",linkType='FOLDER',text=data))
- self.storeList()
- self.wd=os.path.join(self.wd,dirname)
- print("self.wd="+self.wd)
- #https://stackoverflow.com/questions/273192/how-can-i-safely-create-a-nested-directory-in-python
- if not os.path.exists(self.wd):
- print("Making directory "+self.wd)
- os.makedirs(self.wd)
- print("Entering:" + str(self.wd))
- self.STATE="Seeking DL"
- if self.STATE=="A":
- self.A.text=data
- if self.STATE=="DD":
- self.items.append(Section(data.split('\n')[0]))
- def end_h3(self): #Probably redundant
- print('end_H3')
- self.FOLDED=False
- def start_dl(self, atributes):
- print('start_DL')
- self.items=[]
- print(self.DL)
- print(self.wd)
- self.DL.append(os.path.basename(self.wd)) #Maybe append the full path here instead of the basename
- def end_dl(self):
- print('end_DL')
- self.writeList()
- self.DL.pop
- self.wd=os.path.join(self.wd,"..")
- self.loadList()
- def start_a(self,atributes):
- print('start_A')
- self.A=HTML_Link()
- for key,value in atributes:
- setattr(self.A,key,value)
- self.STATE='A'
- def end_a(self):
- print('end_A')
- self.items.append(self.A)
- self.A=None
- self.STATE='Ended A'
- def do_dd(self, atributes):
- print('do_DD')
- self.STATE="DD"
- def do_dt(self, atributes):
- print('do_DT')
- self.STATE="DT"
- p = BookMarkParser()
- filename='/root/Downloads/pearltrees_export.html'
- f = open(filename, "r")
- BUFSIZE = 8192
- while True:
- #data = f.read(BUFSIZE)
- data=f.readline()
- print('data='+str(data))
- if not data: break
- p.feed(data)
- p.close( )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement