TwtBlog.py


import time
import urllib
import sys
import os

def splitTags(txt):
    res=[]
    while txt.find('#') >-1:
        eInd=txt.find('#')+1
        if (eInd-2>=0 and not txt[eInd-2] in range(ord('a'),ord('z')+1) and not txt[eInd-2] in range(ord('A'),ord('Z')+1) and not txt[eInd-2] in range(ord('0'),ord('9')+1) and not txt[eInd-2] in ['&'] ) or eInd<=0:
            while eInd<len(txt):
                if ord(txt[eInd]) in range(ord('a'),ord('z')+1) or ord(txt[eInd]) in range(ord('A'),ord('Z')+1) or ord(txt[eInd]) in range(ord('0'),ord('9')+1):
                    eInd+=1
                else:   break
            if len(txt[txt.index('#')+1 :   eInd])>0 and not res.__contains__(txt[txt.index('#')+1:eInd].lower()):
                res.append( txt[    txt.index('#')+1    :   eInd ].lower()  )
        txt=txt[eInd+1:]
    return res
if __name__=='__main__':
        map,tags={},[]
#        allTags=""
    username='yoga1290'
        res=open('twt.txt','r').read();
    old_serial=open('lastRE.txt','r').read();
        try:
            tags=open("_tags.txt","r").read().strip().split('\n')
            for t in tags:
                map[t]=1
        except:
            tags=[]
    if True:
        print '>'
        entries=urllib.urlopen("http://search.twitter.com/search.atom?q=from%3A"+username).read().split("</entry>")
        entries.pop() #last 1 should be empty
        twt,re='',''
        for e in entries:
            re=e[e.index('http://twitter.com/'+username+'/statuses/'):e.rindex('" rel="alternate"/>')]
            if re==old_serial:  #Don't repeat yourself!
                break
                        twt=e[e.rindex('<title>')+len('<title>'):e.rindex('</title>')]
                        html=twt #e[e.rindex('<content type="html">')+len('<content type="html">'):e.rindex('</content>')]
                        html=html.replace("\r\n","<br>");
                        html=html.replace("\n","<br>");
                        html=html.replace("'","\'");
                        html=html.replace('"','\"');

            res+='twt[n]="'+ html  +'";\n'

                        tmp=""
                        for tag in splitTags(twt):
                            if not map.has_key(tag):
                                tags.append(tag)
                                map[tag]=tag
 #                               allTags+=","+tag
                            #tmp+=","+tag
                            tmp=""
                            try:
                                tmp=open(tag,'r').read()
                            except:
                                tmp=""
                            f=open(tag,'w')
                            f.write(re+'\n'+html+'\n'+tmp)
                            f.close()


                        f=open("_tags.txt",'w')
                        for t in tags:
                            f.write(t+"\n")
                        f.close()

#                        for t in tags:
 #                       if tmp.find(',')>-1:
  #                          tmp=tmp[tmp.find(',')+1:]
   #                     res+='tag[n++]="'+tmp+'";\n\n'


                #Memorizing last RE link
        e=entries[0]
        old_serial=e[e.index('http://twitter.com/'+username+'/statuses/'):e.rindex('" rel="alternate"/>')]
                out=open('lastRE.txt','w')
        out.write(old_serial)
        out.close()


                #Relax for a while
                print '<'
#                out=open('twt.txt','w')
#       out.write(res+'\nalltg="'+allTags+'";\n')
#       out.close()
#       time.sleep(10*60)