Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- import urllib
- import sys
- import os
- def splitTags(txt):
- res=[]
- while txt.find('#') >-1:
- eInd=txt.find('#')+1
- if (eInd-2>=0 and not txt[eInd-2] in range(ord('a'),ord('z')+1) and not txt[eInd-2] in range(ord('A'),ord('Z')+1) and not txt[eInd-2] in range(ord('0'),ord('9')+1) and not txt[eInd-2] in ['&'] ) or eInd<=0:
- while eInd<len(txt):
- if ord(txt[eInd]) in range(ord('a'),ord('z')+1) or ord(txt[eInd]) in range(ord('A'),ord('Z')+1) or ord(txt[eInd]) in range(ord('0'),ord('9')+1):
- eInd+=1
- else: break
- if len(txt[txt.index('#')+1 : eInd])>0 and not res.__contains__(txt[txt.index('#')+1:eInd].lower()):
- res.append( txt[ txt.index('#')+1 : eInd ].lower() )
- txt=txt[eInd+1:]
- return res
- if __name__=='__main__':
- map,tags={},[]
- # allTags=""
- username='yoga1290'
- res=open('twt.txt','r').read();
- old_serial=open('lastRE.txt','r').read();
- try:
- tags=open("_tags.txt","r").read().strip().split('\n')
- for t in tags:
- map[t]=1
- except:
- tags=[]
- if True:
- print '>'
- entries=urllib.urlopen("http://search.twitter.com/search.atom?q=from%3A"+username).read().split("</entry>")
- entries.pop() #last 1 should be empty
- twt,re='',''
- for e in entries:
- re=e[e.index('http://twitter.com/'+username+'/statuses/'):e.rindex('" rel="alternate"/>')]
- if re==old_serial: #Don't repeat yourself!
- break
- twt=e[e.rindex('<title>')+len('<title>'):e.rindex('</title>')]
- html=twt #e[e.rindex('<content type="html">')+len('<content type="html">'):e.rindex('</content>')]
- html=html.replace("\r\n","<br>");
- html=html.replace("\n","<br>");
- html=html.replace("'","\'");
- html=html.replace('"','\"');
- res+='twt[n]="'+ html +'";\n'
- tmp=""
- for tag in splitTags(twt):
- if not map.has_key(tag):
- tags.append(tag)
- map[tag]=tag
- # allTags+=","+tag
- #tmp+=","+tag
- tmp=""
- try:
- tmp=open(tag,'r').read()
- except:
- tmp=""
- f=open(tag,'w')
- f.write(re+'\n'+html+'\n'+tmp)
- f.close()
- f=open("_tags.txt",'w')
- for t in tags:
- f.write(t+"\n")
- f.close()
- # for t in tags:
- # if tmp.find(',')>-1:
- # tmp=tmp[tmp.find(',')+1:]
- # res+='tag[n++]="'+tmp+'";\n\n'
- #Memorizing last RE link
- e=entries[0]
- old_serial=e[e.index('http://twitter.com/'+username+'/statuses/'):e.rindex('" rel="alternate"/>')]
- out=open('lastRE.txt','w')
- out.write(old_serial)
- out.close()
- #Relax for a while
- print '<'
- # out=open('twt.txt','w')
- # out.write(res+'\nalltg="'+allTags+'";\n')
- # out.close()
- # time.sleep(10*60)
Advertisement
Add Comment
Please, Sign In to add comment