Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def writer(start, stop, num):
- v_id_count = 0
- v_Count = 0
- out = open('C:\chunks\part_'+str(num)+'.txt', 'w')
- for i in islice(dirs,start,stop):
- id = i
- v_id_count += 1
- p = '{}\{}'.format(path,i)
- files = [z for z in os.listdir(p)]
- for file in files:
- file_path = '{}\{}'.format(p, file)
- f = open(file_path, 'r').read()
- soup = BeautifulSoup(f, 'html.parser')
- # cur = con.cursor()
- v_Count_loop = 0
- try:
- for v in range(get_count(soup)):
- string = '{};{};{};{};{};{};{}'.format(id, get_status_date(soup), get_status(soup), get_z_name(soup),get_d_name(soup),
- get_method(soup, v), get_description(soup, v))
- out.write(string + 'n')
- v_Count += 1
- v_Count_loop += 1
- print('Поток {}. Вставлено {} строк. Всего вставлено {}. Обработано id {}'.format(num, v_Count_loop, v_Count, v_inn_count))
- except Exception as e:
- print(e)
- out.close()
- if __name__ == '__main__':
- path = 'C:\html'
- dirs = [x for x in os.listdir(path)]
- maxpoint = len(dirs)
- params = []
- #делим на диапазоны
- for i in range(1,11):
- if round(maxpoint/10 * (i-1)) == 0:
- st = round(maxpoint/10 * (i-1))
- else:
- st = round(maxpoint/10 * (i-1)) + 1
- ed = round(maxpoint/10*i)
- args = st, ed, i
- params.append(args)
- # print(params[0])
- p1 = Process(target = writer, args=params[0])
- p2 = Process(target = writer, args=params[1])
- p3 = Process(target = writer, args=params[2])
- p4 = Process(target = writer, args=params[3])
- p5 = Process(target = writer, args=params[4])
- p6 = Process(target = writer, args=params[5])
- p7 = Process(target = writer, args=params[6])
- p8 = Process(target = writer, args=params[7])
- p9 = Process(target = writer, args=params[8])
- p10 = Process(target = writer, args=params[9])
- p1.start()
- p2.start()
- p3.start()
- p4.start()
- p5.start()
- p6.start()
- p7.start()
- p8.start()
- p9.start()
- p10.start()
- p1.join()
- p2.join()
- p3.join()
- p4.join()
- p5.join()
- p6.join()
- p7.join()
- p8.join()
- p9.join()
- p10.join()
- import os
- from multiprocessing import Pool, Lock
- lock = Lock()
- def writer(args_):
- dir_, path_ = args_
- folder = os.path.join(path_, dir_)
- for file in os.listdir(folder):
- ff = os.path.join(folder, file)
- if os.path.isfile(ff):
- lock.acquire()
- with open(ff, 'a') as f:
- f.write('')
- lock.release()
- if __name__ == '__main__':
- path = os.path.realpath('TEMP')
- folders = (f for f in os.listdir(path) if os.path.isdir(os.path.join(path, f)))
- with Pool(processes=10) as pool:
- pool.map(writer, ([f, path] for f in folders))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement