Advertisement
Guest User

Untitled

a guest
Nov 18th, 2017
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.47 KB | None | 0 0
  1. # режиссер
  2. # сценарий
  3. # продюсер
  4. # оператор
  5. # бюджет
  6. # актеры
  7. url = 'https://www.kinopoisk.ru'
  8. headers = {
  9. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko)' +
  10. 'Chrome/62.0.3202.89 Safari/537.36',
  11. }
  12. g = grab.Grab(headers=headers)
  13. #g.go(url)
  14. # //*[@id="infoTable"]/table/tbody/tr[4]/td[2]/a@href
  15. # itemprop="director"
  16. f = open("urls1", "r")
  17. line = f.readline()
  18. res = []
  19. while (line):
  20. line = line.replace('\n', "")
  21. print line
  22. g.go(url + line)
  23. director = g.xpath_list("//*[contains(@itemprop, 'director')]/a/@href")
  24. director_text = g.xpath_list("//*[contains(@itemprop, 'director')]/a/text()")
  25.  
  26. screenwriter = g.xpath_list("//*[@id=\"infoTable\"]/table[1]/tr[5]/td[2]/a/@href")
  27. screenwriter_text = g.xpath_list("//*[@id=\"infoTable\"]/table[1]/tr[5]/td[2]/a/text()")
  28.  
  29. producer = g.xpath_list("//*[@id=\"infoTable\"]/table/tr[6]/td[2]/a/@href")
  30. producer_text = g.xpath_list("//*[@id=\"infoTable\"]/table/tr[6]/td[2]/a/text()")
  31.  
  32. operator = g.xpath_list("//*[@id=\"infoTable\"]/table/tr[7]/td[2]/a/@href")
  33. operator_text = g.xpath_list("//*[@id=\"infoTable\"]/table/tr[7]/td[2]/a/text()")
  34.  
  35. actors = g.xpath_list("//*[@id=\"actorList\"]/ul[1]/li/a/@href")
  36. actors_text = g.xpath_list("//*[@id=\"actorList\"]/ul[1]/li/a/text()")
  37.  
  38. if (director_text[-1] == "..."):
  39. director = director[:-1]
  40.  
  41. if (screenwriter_text[-1] == "..."):
  42. screenwriter = screenwriter[:-1]
  43.  
  44. if (producer_text[-1] == "..."):
  45. producer = producer[:-1]
  46.  
  47. if (operator_text[-1] == "..."):
  48. operator = operator[:-1]
  49.  
  50. if (actors_text[-1] == "..."):
  51. actors = actors[:-1]
  52.  
  53. budget = g.xpath_list("//*[@id=\"infoTable\"]/table/tr[12]/td[2]/div/text()")
  54. if (budget):
  55. budget = int(budget[0].replace(u'\xa0', "").replace('\n', "").replace(" ", "").replace("$", ""))
  56. else:
  57. budget = 0
  58.  
  59. #print director
  60. #print screenwriter
  61. #print producer
  62. #print operator
  63. #print (budget)
  64. #print actors
  65.  
  66. d = { "url": line, "director": director, "screenwriter": screenwriter,
  67. "operator": operator, "budget": budget, "actors": actors, "producer": producer}
  68.  
  69. res.append(d)
  70. line = f.readline()
  71.  
  72. f = open("json_1", "w")
  73. json_str = json.dumps(res)
  74. f.write(json_str)
  75. f.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement