Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import urllib.request
- import re
- import threading
- import math
- from pathlib import Path
- START_FOLDER = 35
- def partition(data, partitions):
- indexes = []
- ans = []
- for i in range(0, partitions):
- indexes.append(i * math.ceil((len(data) / (partitions))))
- indexes.append(len(data))
- for i in range(1, len(indexes)):
- ans.append(data[indexes[i - 1]:indexes[i]])
- return ans
- def threadnames(partitions):
- return ['Thread-' + str(x) for x in range(1, partitions + 1)]
- def GetThreads(data, partitions):
- ans = {}
- names = threadnames(partitions)
- dataset = partition(data, partitions)
- for i in range(len(names)):
- ans[names[i]] = dataset[i]
- return ans
- class myThread(threading.Thread):
- def __init__(self, threadID, name, data, ans):
- threading.Thread.__init__(self)
- self.threadID = threadID
- self.name = name
- self.data = data
- self.ans = ans
- def run(self):
- print("Starting " + self.name)
- process_data(self.data, self.ans)
- print("Exiting " + self.name)
- def process_data(data, ans):
- return ans.append(Parse(data))
- def starting_threads(dataset, n):
- threadList = threadnames(n)
- threads = []
- threadID = 1
- ans = []
- # Create new threads
- for name, data in GetThreads(dataset, n).items():
- thread = myThread(threadID, name, data, ans)
- thread.start()
- threads.append(thread)
- threadID += 1
- # Wait for all threads to complete
- for t in threads:
- t.join()
- ans1 = []
- return ans1
- def work(num):
- path = folder + str(num)
- result = requests.get(url=f'http://gs.3g.cn/D/{path}/w')
- pic_url = result.url
- pic_url = re.sub('&t=image/jpeg&w=100&h=200', "", pic_url)
- pic_url = re.sub(
- "/mms/v14/index.html\?u=http%3A%2F%2Fgosms.gomocdn.com%2F", "/",
- pic_url)
- if 'jpg' in pic_url:
- urllib.request.urlretrieve(pic_url, f'{folder}\\photo_{path}.jpg')
- print(num, pic_url)
- def Parse(data):
- global folder
- counter = 0
- loss = 0
- for num in data:
- try:
- work(num)
- except Exception:
- try:
- num_hex = hex(num1).split('x')[-1]
- work(num_hex)
- except:
- pass
- for num1 in range(START_FOLDER, 16 * 16):
- num1_hex = hex(num1).split('x')[-1]
- folder = f'ad{num1_hex}'
- Path(folder).mkdir(parents=True, exist_ok=True)
- starting_threads(range(16, 100), 5)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement