Advertisement
Guest User

Untitled

a guest
Jul 22nd, 2019
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.85 KB | None | 0 0
  1. import requests
  2. from bs4 import BeautifulSoup
  3. import re
  4. import json
  5. import time
  6. import random
  7. import os
  8. import datetime
  9. import threading
  10. import socket
  11.  
  12.  
  13. def get_html(url):
  14. ua = [{'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; XMP-6250 Build/HAWK) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Safari/537.36 ADAPI/2.0 (UUID:9e7df0ed-2a5c-4a19-bec7-2cc54800f99d) RK3188-ADAPI/1.2.84.533 (MODEL:XMP-6250)'},
  15. {'User-Agent': 'Mozilla/5.0 (Linux; Android 7.1; Mi A1 Build/N2G47H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.36'},
  16. {'User-Agent': 'Mozilla/5.0 (Linux; Android 5.1; A37f Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.93 Mobile Safari/537.36'},
  17. {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0.1; CPH1607 Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/63.0.3239.111 Mobile Safari/537.36'},
  18. {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Redmi 4A Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.116 Mobile Safari/537.36'},
  19. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 6.0.1; zh-CN; F5121 Build/34.0.A.1.247) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.5.1.944 Mobile Safari/537.36'},
  20. {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; MYA-L22 Build/HUAWEIMYA-L22) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36'},
  21. {'User-Agent': 'Mozilla/5.0 (Linux; Android 5.1; A1601 Build/LMY47I) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.98 Mobile Safari/537.36'},
  22. {'User-Agent': 'Mozilla/5.0 (Linux; Android 7.0; TRT-LX2 Build/HUAWEITRT-LX2; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/59.0.3071.125 Mobile Safari/537.36'},
  23. {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; CAM-L21 Build/HUAWEICAM-L21; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/62.0.3202.84 Mobile Safari/537.36'},
  24. {'User-Agent': 'Mozilla/5.0 (Linux; Android 7.1.2; Redmi 4X Build/N2G47H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.111 Mobile Safari/537.36'},
  25. {'User-Agent': 'Mozilla/5.0 (Linux; Android 5.1; HUAWEI CUN-L22 Build/HUAWEICUN-L22; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/62.0.3202.84 Mobile Safari/537.36'},
  26. {'User-Agent': 'Mozilla/5.0 (Linux; Android 5.1.1; A37fw Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36'},
  27. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.4.2; zh-CN; HUAWEI MT7-TL00 Build/HuaweiMT7-TL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.3.8.909 Mobile Safari/537.36'},
  28. {'User-Agent': 'Mozilla/5.0 (Linux; Android 7.1.2; Redmi Note 5A Build/N2G47H; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/63.0.3239.111 Mobile Safari/537.36'},
  29. {'User-Agent': 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.111 Mobile Safari/537.36'},
  30. {'User-Agent': 'Mozilla/5.0 (Linux; Android 7.0; BLL-L22 Build/HUAWEIBLL-L22) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.91 Mobile Safari/537.36'},
  31. {'User-Agent': 'Mozilla/5.0 (Linux; Android 7.1.1; CPH1723 Build/N6F26Q) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.98 Mobile Safari/537.36'},
  32. {'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; ASUS_T00J Build/KVT49L) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Safari/537.36'},
  33. {'User-Agent': 'Dalvik/1.6.0 (Linux; U; Android 4.0.4; opensign_x86 Build/IMM76L)'},
  34. {'User-Agent': 'Mozilla/5.0 (Android; Mobile; rv:38.0) Gecko/38.0 Firefox/38.0'},
  35. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; SCH-I535 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'},
  36. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.1.2; en-us; SCH-I915 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30'},
  37. {'User-Agent': 'Dalvik/1.6.0 (Linux; U; Android 4.4.4; WT22M-FI Build/KTU84Q)'},
  38. {'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; en-us; SAMSUNG SCH-I545 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Version/1.5 Chrome/28.0.1500.94 Mobile Safari/537.36'},
  39. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.1.2; en-us; SGH-T599N Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'},
  40. {'User-Agent': 'Mozilla/5.0 (Mobile; LYF/F90M/LYF-F90M-000-02-28-130318; Android; rv:48.0) Gecko/48.0 Firefox/48.0 KAIOS/2.0'},
  41. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 2.3.5; en-in; Micromax A87 Build/GINGERBREAD) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1'},
  42. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.1.2; en-us; SAMSUNG-SGH-I467 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30'},
  43. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.0.4; en-us; SCH-S738C Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'},
  44. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; ZTE V768 Build/GINGERBREAD) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1'},
  45. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.1.2; en-US; B1-710 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.1 Safari/534.30'},
  46. {'User-Agent': 'Mozilla/5.0 (Linux; Android 5.0.1; SAMSUNG SCH-I545 4G Build/LRX22C) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/2.1 Chrome/34.0.1847.76 Mobile Safari/537.36'},
  47. {'User-Agent': 'Mozilla/5.0 (Android; Mobile; rv:40.0) Gecko/40.0 Firefox/40.0'},
  48. {'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.4; en-us; SAMSUNG SGH-M919 Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/1.5 Chrome/28.0.1500.94 Mobile Safari/537.36'},
  49. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.1.2; en-us; SPH-M830 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'},
  50. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 2.3.5; en-us; SCH-I800 Build/GINGERBREAD) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1'},
  51. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.0.4; en-us; C5170 Build/IML77) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'},
  52. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; SAMSUNG-SGH-I747 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'},
  53. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.1.2; en-us; SPH-M840 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'},
  54. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; SPH-L710 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'},
  55. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.1.2; en-us; SAMSUNG-SGH-I497 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30'},
  56. {'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; 7040N Build/KVT49L) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Mobile Safari/537.36'},
  57. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; KFTT Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30'},
  58. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.1.1; en-us; Huawei Y301A1 Build/HuaweiY301A1) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'},
  59. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; 0PCV1 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'},
  60. {'User-Agent': 'Mozilla/5.0 (Linux; Android 4.4.2; MS5.V2 Build/MS5.V2) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Mobile Safari/537.36'},
  61. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.3; en-us; SGH-T999L Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'},
  62. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.1.1; en-us; EVO Build/JRO03C) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'},
  63. {'User-Agent': 'Mozilla/5.0 (Linux; U; Android 4.1.2; en-us; SPH-L300 Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'}]
  64. headers = {'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'}
  65. try:
  66. r = requests.get(url, headers=ua[random.randint(0, 49)])
  67. except:
  68. time.sleep(2)
  69. r = requests.get(url, headers=ua[random.randint(0, 49)])
  70. return r.text
  71.  
  72. def get_telephone(url):
  73. os.system('start chrome '+url)
  74. print(1)
  75.  
  76. def get_page_data(url_global):
  77. #sock = socket.socket()
  78. #sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
  79. #sock.bind(('', 9092))
  80. #sock.listen(1)
  81. #conn, addr = sock.accept()
  82.  
  83. mass = [1]
  84. hh = 0;
  85. mm = 0;
  86. i = 0
  87. while True:
  88. html = get_html(url_global)
  89. #print(len(html))
  90. #now = datetime.datetime.now()
  91. #print(now.strftime("%d-%m-%c%Y %H:%M"))
  92. #time.sleep(1)
  93. i = i + 1
  94. soup = BeautifulSoup(html, 'lxml')
  95. try:
  96. ads = soup.find_all(attrs={"data-marker": "item/link"})
  97. except:
  98. continue
  99. for ad in ads:
  100. start_time = time.time()
  101. dtime = ad.find(attrs={"data-marker": "item/datetime"}).text.strip()
  102. try:
  103. g = ad.get('href');
  104. url = "https://m.avito.ru" + g
  105. id_new = int(re.findall(r'\d{1,}$',g)[0])
  106. if(id_new in mass): break
  107. mass.append(id_new)
  108. #os.system('start chrome.exe '+url)
  109. except:
  110. url = ''
  111. continue
  112. telephone = ''
  113. try:
  114. html_call = get_html(url)
  115. soupa_call = BeautifulSoup(html_call, 'lxml')
  116. telephone = re.findall(r'\+\d{1,}', soupa_call.find(attrs={"data-marker": "item-contact-bar/call"}).get('href'))
  117. name = soupa_call.find(attrs={"data-marker": "item-contact-bar/name"}).text.strip()
  118. prosm = soupa_call.find(attrs={"data-marker": "item-stats/views"}).text.strip()
  119. pr = re.findall(r'\d{1,}',prosm)
  120. #print(pr)
  121. if(int(pr[0])!=int(pr[1])): continue
  122. except:
  123. telephone = ''
  124. try:
  125. title = ad.find(attrs={"data-marker": "item/title"}).text.strip()+ " " + ad.find('div').find(attrs={"data-marker": "item/address"}).text.strip()
  126. except:
  127. title = ''
  128. try:
  129. price = ad.find(attrs={"data-marker": "item/price"}).text.strip()
  130. except:
  131. price = ''
  132. #threading.Thread(target=get_telephone, args=(url,)).start()
  133. print("--- %s seconds --- fuul" % (time.time() - start_time))
  134. #print(datetime)
  135. #print(id_new)
  136. #if(i>10): conn.send(telephone[0].encode()) #os.system('c:\\adb\\adb shell am start -a android.intent.action.CALL -d tel:'+telephone[0])
  137.  
  138. data = {'datetime':dtime,
  139. 'prosm':prosm,
  140. 'title':title,
  141. 'price':price,
  142. 'telephone':telephone,
  143. 'name':name,
  144. 'url':url}
  145. print(data)
  146.  
  147. def main():
  148. #url = "https://m.avito.ru/tyumen/kvartiry/prodam?owner[]=private&sort=date"
  149. url = "https://m.avito.ru/rossiya/kvartiry/prodam?owner[]=private&sort=date"
  150. random.seed()
  151. #get_page_data(url)
  152.  
  153. threading.Thread(target=get_page_data, args=(url,)).start()
  154. #time.sleep(30)
  155. #threading.Thread(target=get_page_data, args=(url,)).start()
  156.  
  157. if __name__ == '__main__':
  158. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement