Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import praw
- import time
- from xml.etree.ElementTree import Element, SubElement, ElementTree
- #Creo una instancia de Reddit
- reddit = praw.Reddit(client_id='kgNcJc52YxrqhQ', client_secret="GGV2z1gXvScnRsC5slcPMpxmFKY",
- password='Correplatano!23', user_agent='script:testsscript:v0.1 by /u/cuakcuak8',
- username='cuakcuak8')
- from pprint import pprint
- pprint(vars(reddit))
- subreddit = reddit.subreddit('history')
- top = Element('new')
- numDocs=0
- for submission in subreddit.top(limit=200):
- documento = SubElement(top,"documento")
- numDocs+=1
- print("Numero de documentos = "+str(numDocs))
- titulo = SubElement(documento,"titulo")
- titulo.text=submission.title
- contenido = SubElement(documento,"contenido")
- contenido.text = submission.selftext
- fecha = SubElement(documento,"fecha")
- fecha.text = time.strftime('%Y-%m-%d %H:%M:%S GMT', time.localtime(submission.created_utc))
- tipoDeEntrada = SubElement(documento,"tipo_entrada")
- tipoDeEntrada.text = "post"
- submission.comments.replace_more(limit=0)
- for comment in submission.comments.list():
- documento = SubElement(top,"documento")
- numDocs+=1
- titulo = SubElement(documento,"titulo")
- titulo.text=""
- contenido = SubElement(documento,"contenido")
- contenido.text = comment.body
- fecha = SubElement(documento,"fecha")
- fecha.text = time.strftime('%Y-%m-%d %H:%M:%S GMT', time.localtime(comment.created_utc))
- tipoDeEntrada = SubElement(documento,"tipo_entrada")
- tipoDeEntrada.text = "comentario"
- path='/home/masterbigdata/TGINE-P1/historyNEW.xml'
- tree = ElementTree(top)
- tree.write(path)
- print("Se han obtenido "+str(numDocs)+" documentos y se han volvado en "+path)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement