Advertisement
Guest User

scraper for /r/wallpapers

a guest
Jan 23rd, 2014
124
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.17 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. #imports
  4. import urllib2
  5. import os
  6. from array import array
  7.  
  8. #variables
  9. sourceholder = ""
  10. index = 0
  11. indexstart = 0
  12. indexend = 0
  13. imagepaths = []
  14. holder = ""
  15. path = "/home/hojlind/Pictures/Wallpapers"
  16. time = 600
  17.  
  18. #get the htmlcode
  19. #response =
  20. req = urllib2.Request('http://www.reddit.com/r/wallpapers')
  21. req.add_unredirected_header('User-Agent', 'Hojlinds wallpaper getter')
  22. sourceholder = urllib2.urlopen(req).read()
  23.  
  24. while index < len(sourceholder):
  25.     indexstart = sourceholder.find("<a class=\"title \"", index)
  26.     indexend = sourceholder.find("</a>", index)
  27.  
  28.     if indexend == -1:
  29.         break
  30.  
  31.     #print "start" + str(indexstart)
  32.     #print "end" + str(indexend)
  33.  
  34.     if sourceholder[indexstart : indexend].find("g") != -1 or sourceholder[indexstart : indexend].find("jpg") != -1 or sourceholder[indexstart : indexend].find("imgur") != -1:
  35.         imagepaths.append(sourceholder[indexstart : indexend + 4])
  36.  
  37.     index = indexend + 4
  38.  
  39. #only sort the images we have a direct link to
  40. for i in range(len(imagepaths)):
  41.  
  42.     indexstart = imagepaths[i].find("href=")
  43.  
  44.     indexend = imagepaths[i].find("jpg")
  45.  
  46.     if indexend == -1 :
  47.         indexend = imagepaths[i].find("png")           
  48.  
  49.     imagepaths[i] = imagepaths[i][indexstart + 6 : indexend + 3]
  50.  
  51. #remove all empty elements
  52. imagepaths = filter(None, imagepaths)
  53.  
  54. #get the pictures
  55. if not os.path.exists(path):
  56.     os.makedirs(path)
  57.  
  58. for i in range(len(imagepaths)):
  59.     if imagepaths[i].find("jpg") != -1 :
  60.         filename = "wallpaper" + str(i) + ".jpg"
  61.  
  62.     else :
  63.         filename = "wallpaper" + str(i) + ".png"
  64.  
  65.     f = open(os.path.join(path, filename), "w")
  66.  
  67.     reqImage = urllib2.Request(imagepaths[i])
  68.     reqImage.add_unredirected_header('User-Agent', 'Hojlinds wallpaper getter')
  69.  
  70.     f.write(urllib2.urlopen(reqImage).read())
  71.  
  72.     f.close()
  73.  
  74. #write all images into the xml file so that the slideshow actually works
  75. #first open the file and set up all the static stuff
  76. filename = "changer.xml"
  77.  
  78. f = open(os.path.join(path, filename), "w")
  79.  
  80. f.write("<background>\n<starttime>\n<year>2009</year>\n<month>08</month>\n<day>04</day>\n<hour>00</hour>\n<minute>00</minute>\n<second>00</second>\n</starttime>")
  81.  
  82. for i in range(len(imagepaths)) :
  83.  
  84.     if imagepaths[i].find("jpg") != -1 :
  85.         filename = "wallpaper" + str(i) + ".jpg"
  86.  
  87.     else :
  88.         filename = "wallpaper" + str(i) + ".png"
  89.  
  90.     if i > 0 :
  91.  
  92.         if imagepaths[i-1].find("jpg") != -1 :
  93.             prevfilename = "wallpaper" + str(i-1) + ".jpg"
  94.  
  95.         else :
  96.             prevfilename = "wallpaper" + str(i-1) + ".png"
  97.  
  98.         f.write("<transition>\n<duration>5.0</duration>\n<from>" + str(os.path.join(path, prevfilename)) + "</from>\n<to>" + str(os.path.join(path, filename)) + "</to>\n</transition>")
  99.    
  100.     f.write("<static>\n<duration>" + str(time) + "</duration>\n<file>" + str(os.path.join(path, filename)) + "</file>\n</static>") 
  101.  
  102.     if i == (len(imagepaths) - 1) :
  103.  
  104.         if imagepaths[0].find("jpg") != -1 :
  105.             prevfilename = "wallpaper" + str(i-1) + ".jpg"
  106.  
  107.         else :
  108.             prevfilename = "wallpaper" + str(i-1) + ".png"
  109.  
  110.         f.write("<transition>\n<duration>5.0</duration>\n<from>" + str(os.path.join(path, filename)) + "</from>\n<to>" + str(os.path.join(path, prevfilename)) + "</to>\n</transition>")   
  111.  
  112.  
  113. f.write("</background>")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement