Guest User

Untitled

a guest
Sep 29th, 2018
165
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.92 KB | None | 0 0
  1. import sys, nntplib, mysql.connector, datetime
  2.  
  3. #connect to database
  4. conn = mysql.connector.connect(user = 'user', password = 'password', host = 'host', database = 'database')
  5. conn.text_factory = str
  6. db = conn.cursor()
  7.  
  8. #leave this in for now
  9. cSQL = "CREATE TABLE IF NOT EXISTS HEADER_BODY (SERVERID INTEGER, GROUPID INTEGER, ARTICLEID INTEGER, LINENBR INTEGER, CATEGORY TEXT, DATA TEXT, PRIMARY KEY (SERVERID,GROUPID,ARTICLEID,LINENBR));"
  10. db.execute(cSQL)
  11.  
  12. #once there's data in the table this will print it
  13. cSQL = "SELECT SERVERID, GROUPID, ARTICLEID, LINENBR, CATEGORY, DATA FROM HEADER_BODY;"
  14. db.execute(cSQL)
  15. for SID, GID, AID, LNBR, CATG, DATA in db.fetchall():
  16. print SID, GID, AID, LNBR, CATG, DATA
  17.  
  18.  
  19. #server
  20. SRVID = 1
  21. SRV = "news_server"
  22. PRT = 119
  23. USR = "user_name"
  24. PWD = "password"
  25.  
  26.  
  27. #group
  28. GRPID = 1
  29. GRP = 'comp.os.linux.advocacy'
  30.  
  31. print "connecting to",SRV,"..."
  32. news = nntplib.NNTP(SRV,PRT,USR,PWD)
  33. resp, IDs, beginID, endID, grpNm = news.group(GRP)
  34. print 'server response:',resp
  35.  
  36. #INSERT OR IGNORE means it won't fail and quit when you try to add
  37. #the same articles more than once
  38. cSQL = "INSERT OR IGNORE INTO HEADER_BODY (SERVERID, GROUPID, ARTICLEID, LINENBR, CATEGORY, DATA) VALUES (?,?,?,?,?,?) "
  39.  
  40. #this will capture only the headers you want
  41. myheaders = ['From','Date','Subject','Message-ID','User-Agent','X-Newsreader','References']
  42.  
  43. #python range goes from start to end-1
  44. #so this will download 650200 and 650201
  45. for articleID in range(704495,704500):
  46.  
  47. print '====================='
  48. print 'article',articleID
  49. print '====================='
  50.  
  51.  
  52. try: response, artID, msgID, headers = news.head(str(articleID))
  53. except (nntplib.NNTPTemporaryError,nntplib.NNTPProtocolError,nntplib.NNTPDataError,nntplib.NNTPReplyError) as headerError:
  54. print "(article ID",articleID, "Header NNTP Error", headerError,")"
  55. continue
  56.  
  57. try: response, artID, msgID, body = news.body(str(articleID))
  58. except (nntplib.NNTPTemporaryError,nntplib.NNTPProtocolError,nntplib.NNTPDataError,nntplib.NNTPReplyError) as bodyError:
  59. print "(article ID",articleID, "Body NNTP Error", bodyError,")"
  60. continue
  61.  
  62. #parse and save header/body
  63. linenbr = 1
  64. for header in headers:
  65. header = header.strip()
  66. if header != "":
  67. if ':' in header:
  68. hdrname = header[:header.index(':')]
  69. hdrval = header[header.index(':')+2:]
  70. else:
  71. hdrname = 'na'
  72. hdrval = header
  73.  
  74. if hdrname in myheaders:
  75. print hdrname + ': ' + hdrval
  76. db.execute(cSQL, (SRVID,GRPID,articleID,linenbr,hdrname,hdrval))
  77. linenbr += 1
  78.  
  79. print
  80. for bod in body:
  81. print bod
  82. db.execute(cSQL, (SRVID,GRPID,articleID,linenbr,'Body',bod))
  83. linenbr += 1
  84.  
  85. #commit data every Nth article
  86. if articleID % 10 == 0:
  87. conn.commit()
  88.  
  89. #exit
  90. conn.commit()
  91. db.close()
  92. conn.close()
  93. news.quit()
Add Comment
Please, Sign In to add comment