Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # режиссер
- # сценарий
- # продюсер
- # оператор
- # бюджет
- # актеры
- url = 'https://www.kinopoisk.ru'
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko)' +
- 'Chrome/62.0.3202.89 Safari/537.36',
- }
- g = grab.Grab(headers=headers)
- #g.go(url)
- # //*[@id="infoTable"]/table/tbody/tr[4]/td[2]/a@href
- # itemprop="director"
- f = open("urls1", "r")
- line = f.readline()
- res = []
- while (line):
- line = line.replace('\n', "")
- print line
- g.go(url + line)
- director = g.xpath_list("//*[contains(@itemprop, 'director')]/a/@href")
- director_text = g.xpath_list("//*[contains(@itemprop, 'director')]/a/text()")
- screenwriter = g.xpath_list("//*[@id=\"infoTable\"]/table[1]/tr[5]/td[2]/a/@href")
- screenwriter_text = g.xpath_list("//*[@id=\"infoTable\"]/table[1]/tr[5]/td[2]/a/text()")
- producer = g.xpath_list("//*[@id=\"infoTable\"]/table/tr[6]/td[2]/a/@href")
- producer_text = g.xpath_list("//*[@id=\"infoTable\"]/table/tr[6]/td[2]/a/text()")
- operator = g.xpath_list("//*[@id=\"infoTable\"]/table/tr[7]/td[2]/a/@href")
- operator_text = g.xpath_list("//*[@id=\"infoTable\"]/table/tr[7]/td[2]/a/text()")
- actors = g.xpath_list("//*[@id=\"actorList\"]/ul[1]/li/a/@href")
- actors_text = g.xpath_list("//*[@id=\"actorList\"]/ul[1]/li/a/text()")
- if (director_text[-1] == "..."):
- director = director[:-1]
- if (screenwriter_text[-1] == "..."):
- screenwriter = screenwriter[:-1]
- if (producer_text[-1] == "..."):
- producer = producer[:-1]
- if (operator_text[-1] == "..."):
- operator = operator[:-1]
- if (actors_text[-1] == "..."):
- actors = actors[:-1]
- budget = g.xpath_list("//*[@id=\"infoTable\"]/table/tr[12]/td[2]/div/text()")
- if (budget):
- budget = int(budget[0].replace(u'\xa0', "").replace('\n', "").replace(" ", "").replace("$", ""))
- else:
- budget = 0
- #print director
- #print screenwriter
- #print producer
- #print operator
- #print (budget)
- #print actors
- d = { "url": line, "director": director, "screenwriter": screenwriter,
- "operator": operator, "budget": budget, "actors": actors, "producer": producer}
- res.append(d)
- line = f.readline()
- f = open("json_1", "w")
- json_str = json.dumps(res)
- f.write(json_str)
- f.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement