Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys, nntplib, mysql.connector, datetime
- #connect to database
- conn = mysql.connector.connect(user = 'user', password = 'password', host = 'host', database = 'database')
- conn.text_factory = str
- db = conn.cursor()
- #leave this in for now
- cSQL = "CREATE TABLE IF NOT EXISTS HEADER_BODY (SERVERID INTEGER, GROUPID INTEGER, ARTICLEID INTEGER, LINENBR INTEGER, CATEGORY TEXT, DATA TEXT, PRIMARY KEY (SERVERID,GROUPID,ARTICLEID,LINENBR));"
- db.execute(cSQL)
- #once there's data in the table this will print it
- cSQL = "SELECT SERVERID, GROUPID, ARTICLEID, LINENBR, CATEGORY, DATA FROM HEADER_BODY;"
- db.execute(cSQL)
- for SID, GID, AID, LNBR, CATG, DATA in db.fetchall():
- print SID, GID, AID, LNBR, CATG, DATA
- #server
- SRVID = 1
- SRV = "news_server"
- PRT = 119
- USR = "user_name"
- PWD = "password"
- #group
- GRPID = 1
- GRP = 'comp.os.linux.advocacy'
- print "connecting to",SRV,"..."
- news = nntplib.NNTP(SRV,PRT,USR,PWD)
- resp, IDs, beginID, endID, grpNm = news.group(GRP)
- print 'server response:',resp
- #INSERT OR IGNORE means it won't fail and quit when you try to add
- #the same articles more than once
- cSQL = "INSERT OR IGNORE INTO HEADER_BODY (SERVERID, GROUPID, ARTICLEID, LINENBR, CATEGORY, DATA) VALUES (?,?,?,?,?,?) "
- #this will capture only the headers you want
- myheaders = ['From','Date','Subject','Message-ID','User-Agent','X-Newsreader','References']
- #python range goes from start to end-1
- #so this will download 650200 and 650201
- for articleID in range(704495,704500):
- print '====================='
- print 'article',articleID
- print '====================='
- try: response, artID, msgID, headers = news.head(str(articleID))
- except (nntplib.NNTPTemporaryError,nntplib.NNTPProtocolError,nntplib.NNTPDataError,nntplib.NNTPReplyError) as headerError:
- print "(article ID",articleID, "Header NNTP Error", headerError,")"
- continue
- try: response, artID, msgID, body = news.body(str(articleID))
- except (nntplib.NNTPTemporaryError,nntplib.NNTPProtocolError,nntplib.NNTPDataError,nntplib.NNTPReplyError) as bodyError:
- print "(article ID",articleID, "Body NNTP Error", bodyError,")"
- continue
- #parse and save header/body
- linenbr = 1
- for header in headers:
- header = header.strip()
- if header != "":
- if ':' in header:
- hdrname = header[:header.index(':')]
- hdrval = header[header.index(':')+2:]
- else:
- hdrname = 'na'
- hdrval = header
- if hdrname in myheaders:
- print hdrname + ': ' + hdrval
- db.execute(cSQL, (SRVID,GRPID,articleID,linenbr,hdrname,hdrval))
- linenbr += 1
- print
- for bod in body:
- print bod
- db.execute(cSQL, (SRVID,GRPID,articleID,linenbr,'Body',bod))
- linenbr += 1
- #commit data every Nth article
- if articleID % 10 == 0:
- conn.commit()
- #exit
- conn.commit()
- db.close()
- conn.close()
- news.quit()
Add Comment
Please, Sign In to add comment