Advertisement
s243a

PT Bookmark Parser

Dec 6th, 2018
247
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.60 KB | None | 0 0
  1. import sgmllib, urllib, urlparse, os, pickle
  2. class HTML_Link:
  3.     def __init__(self,**kw):
  4.         self.text=kw.get("text"," ")
  5.         self.href=kw.get("href",'/')
  6.         self.add_date=kw.get("add_date",'0')
  7.         self.linkType=kw.get("linkType",None)
  8.  
  9.     def toHTML(self):
  10.        
  11.         if self.linkType is None:
  12.             str='<A HREF="'+self.href+'" ADD_DATE="'+self.add_date+'">'+self.text+"</A>"
  13.             return str
  14.         elif self.linkType.upper()=="FOLDER":
  15.             str='<A HREF="'+self.href+'">'+self.text+"</A>"          
  16.             return '<b>Folder:</b>'+str
  17. class Section:
  18.     def __init__(self,text):
  19.         self.text=text
  20.     def toHTML(self):
  21.         return "<H3>"+self.text+"</H3>"
  22. class BookMarkParser(sgmllib.SGMLParser):
  23.     def __init__(self,**kw):
  24.         sgmllib.SGMLParser.__init__(self)
  25.         self.STATE="__init__"
  26.         self.DL=[]
  27.         self.items=[]
  28.         self.wd=kw.get('wd','/root/Downloads/pt')
  29.         if not os.path.exists(self.wd):
  30.             print("Making directory "+self.wd)
  31.             os.makedirs(self.wd)
  32.     def writeHTMLHeader(self,f):
  33.         print('WriteHTMLHeader')
  34.         f.write("%s\n" % '<!DOCTYPE html>')
  35.         f.write("%s\n" % '<html>')
  36.         f.write("%s\n" % '<body>')
  37.     def writeHTMLFooter(self,f):
  38.         print('writeHTMLFooter')
  39.         f.write("%s\n" % '</body>')        
  40.         f.write("%s\n" % '</html>')  
  41.     def writeList(self):
  42.         print('writeList')
  43.         with open('index.html', 'w') as f:
  44.             self.writeHTMLHeader(f)
  45.             for item in self.items:
  46.                 if item is not None:
  47.                     f.write("%s\n" % item.toHTML())
  48.                 else:
  49.                     f.write("<b>Empty Item!!!!</b>")
  50.                
  51.             self.writeHTMLFooter(f)
  52.     def storeList(self):
  53.         print('storeList')
  54.         os.chdir(self.wd)
  55.         print(os.getcwd())
  56.         self.wd=os.getcwd()
  57.         pickle.dump(self.items,open( "list.p", "wb" ))
  58.     def loadList(self):
  59.         print('loadList')
  60.         os.chdir(self.wd)
  61.         print(self.wd)
  62.         print(os.getcwd())
  63.         self.items=pickle.load(open( "list.p", "rb" ))
  64.     def start_h3(self, attributes):
  65.         print('start_H3')
  66.         self.STATE='Started H3'
  67.         for name, value in attributes:
  68.             print(name+"="+value)
  69.             if (value == 'FOLDED') or (name == 'folded'):
  70.                 self.STATE='FOLDED'
  71.     def handle_data(self,data):
  72.         print('handleData')
  73.         print("self.STATE="+self.STATE)
  74.         if self.STATE=='FOLDED':
  75.             dirname = urllib.quote(data, safe='')
  76.             dirname2= urllib.quote(dirname, safe='')
  77.             self.items.append(HTML_Link(href=dirname2+"/index.html",linkType='FOLDER',text=data))
  78.             self.storeList()
  79.             self.wd=os.path.join(self.wd,dirname)
  80.             print("self.wd="+self.wd)
  81.             #https://stackoverflow.com/questions/273192/how-can-i-safely-create-a-nested-directory-in-python
  82.             if not os.path.exists(self.wd):
  83.                 print("Making directory "+self.wd)
  84.                 os.makedirs(self.wd)
  85.             print("Entering:" + str(self.wd))
  86.             self.STATE="Seeking DL"
  87.  
  88.         if self.STATE=="A":
  89.            
  90.             self.A.text=data
  91.         if self.STATE=="DD":
  92.             self.items.append(Section(data.split('\n')[0]))
  93.     def end_h3(self): #Probably redundant
  94.         print('end_H3')
  95.         self.FOLDED=False        
  96.     def start_dl(self, atributes):
  97.         print('start_DL')
  98.         self.items=[]
  99.         print(self.DL)
  100.         print(self.wd)
  101.         self.DL.append(os.path.basename(self.wd)) #Maybe append the full path here instead of the basename
  102.     def end_dl(self):
  103.         print('end_DL')
  104.         self.writeList()
  105.         self.DL.pop
  106.         self.wd=os.path.join(self.wd,"..")
  107.         self.loadList()
  108.        
  109.     def start_a(self,atributes):
  110.         print('start_A')
  111.         self.A=HTML_Link()
  112.         for key,value in atributes:
  113.            setattr(self.A,key,value)
  114.         self.STATE='A'
  115.     def end_a(self):
  116.         print('end_A')
  117.         self.items.append(self.A)
  118.         self.A=None
  119.         self.STATE='Ended A'
  120.     def do_dd(self, atributes):
  121.         print('do_DD')
  122.         self.STATE="DD"
  123.     def do_dt(self, atributes):
  124.         print('do_DT')
  125.         self.STATE="DT"
  126. p = BookMarkParser()
  127. filename='/root/Downloads/pearltrees_export.html'
  128. f = open(filename, "r")
  129. BUFSIZE = 8192
  130. while True:
  131.     #data = f.read(BUFSIZE)
  132.     data=f.readline()
  133.     print('data='+str(data))
  134.     if not data: break
  135.     p.feed(data)
  136. p.close(  )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement