Advertisement
trytryhard

test3_sb

Feb 18th, 2023
719
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.06 KB | Source Code | 0 0
  1. # -Либо получить список участников групп сайта ВКонтакте, чтоб в названии/описании группы
  2. # фигурировала информация о крипте.
  3. # Цель чтоб получить как можно лучшую информацию по клиенту(фио, др, или телефон)
  4. # чтоб дальше можно было понять кто это
  5.  
  6. #from bs4 import BeautifulSoup
  7.  
  8. print('\nimpotring libs...\n')
  9. import requests as req
  10. import pandas
  11. from pathlib import Path
  12. from datetime import datetime
  13. import csv
  14. import math
  15.  
  16. vk_api_version = '5.131'
  17. vk_api_method = 'groups.getMembers'
  18. print('Hello, and welcome to vk parcer for test task(3*).\nThis script is working on vk\'s api version=',vk_api_version)
  19. print('For working with this script u should make service-token.',
  20. '\n fyi: https://dev.vk.com/api/access-token/getting-started \n\n')
  21.  
  22. group_link = input('Input link on vk-community. example: https://vk.com/free_nft\nInput link: ')
  23. #check for right link providing
  24. try:
  25.     main_link = req.get(group_link)
  26. except:
  27.     print('wrong link')
  28.     exit()
  29.  
  30. #check for private settings
  31. try:
  32.     gr_id = ((main_link.text.split('owner_id=-'))[1].split('&'))[0]
  33. except:
  34.     print('it\' private group. impossible to parse')
  35.     exit()
  36. group_name = ((main_link.text.split('data-post-owner-type="group">'))[1].split('</a> </div>'))[0]
  37.  
  38. #making output folder
  39. base_path = str(Path.cwd())+'/output_folder/'
  40. base_p = Path(base_path)
  41. base_p.mkdir(exist_ok=True)
  42.  
  43. #creating csv
  44. csv_path = base_path+gr_id+'_'+str(datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))+'.csv'
  45. csv_result_ids = open(csv_path, "x")
  46. csv_result_ids.write('id_of_subs\n')
  47. csv_result_ids.close()
  48.  
  49. #creating and checking for group's readme
  50. try:
  51.     readme = open(base_path+gr_id+'.txt','x')
  52.     readme.write('Group name: '+group_name+'\npermanent link: https://vk.com/club'+gr_id+'\nDescription:'+str(((main_link.text.split('<meta name="description" content="'))[1].split('" />'))[0]))
  53. except:
  54.     print('readme for',group_name,'already created')
  55.  
  56. #run through subs
  57. main_link = req.get('https://vk.com/search?c[section]=people&c[group]='+gr_id)
  58.  
  59. # а тут я вспомнил про три буквы - A P I
  60. #user's own service-token for api requests
  61. ur_service_token = input('input ur vk-api\'s service token.\nInput s-token:')
  62.  
  63. #counts of subs
  64. api_q = req.get('https://api.vk.com/method/groups.getMembers?access_token='+ur_service_token+'&v='+vk_api_version+'&group_id='+gr_id+'&count=0')
  65. amount_of_subs = ((str(api_q.json()).split('\'count\': '))[1].split(','))[0]
  66.  
  67. #saving subs ids
  68. offset_help = 0
  69. csv_result_ids = open(csv_path,'a')#,newline='')
  70. for i in range(int(math.ceil(int(amount_of_subs)/1000))):
  71.     api_q = req.get('https://api.vk.com/method/groups.getMembers?access_token='+ur_service_token+'&v='+vk_api_version+'&group_id='+gr_id+'&offset='+str(offset_help))
  72.     array_of_ids=((str(api_q.json()).split('['))[1].split(']'))[0].split(',')
  73.     for j in array_of_ids:
  74.         (csv.writer(csv_result_ids)).writerow([j])
  75.     offset_help +=1000
  76. csv_result_ids.close()
  77.  
  78. #adding other info: publicity of profile, id, name, city, phone | берем айдишники, идем к апи-запросу
  79. #primerno zdes' nachinaetsya kasha s bol'shim kolichestvom bottle neck'ov i kostiley
  80. n=0
  81. csv_path_finale = base_path+gr_id+'-'+str(datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))+'.csv'
  82. csv_result = open(csv_path_finale,encoding='utf-16',mode='a')
  83. csv_result.write('id,is_closed,name,bday,sex,country,city,mobile_phone,home_phone\n')
  84. # sex: 1-female, 2-male, 0-none
  85.  
  86. print('\nparcing in progress...\n')
  87.  
  88. csv_result_ids=open(csv_path,'r')
  89. read_csv=csv.reader(csv_result_ids)
  90. next(read_csv)
  91. for i in range(int(math.ceil(int(amount_of_subs)/100))):
  92.     users_ids=[]
  93.     users_ids_no_spaces=''
  94.     for j in range(n,99+n):
  95.         try:
  96.             users_ids+=next(read_csv)
  97.         except:
  98.             continue
  99.     for k in range(len(users_ids)):
  100.         users_ids_no_spaces+=users_ids[k].replace(' ','')+','
  101.     api_q = req.get('https://api.vk.com/method/users.get?access_token='+ur_service_token+'&v='+vk_api_version+'&user_ids='+users_ids_no_spaces+'&fields=country,city,contacts,sex,bdate')
  102.     users_info = (str(api_q.json()).split('['))[1].split('}, {')
  103.  
  104.     for j in range(100):
  105.         try:
  106.             user = users_info[j]
  107.             u_id = user.split('id\':')[1].split(',')[0]+','
  108.             u_private =  user.split('is_closed\':')[1].replace('}]}','')+','
  109.             u_name=(user.split('last_name\':')[1].split(',')[0]+'_'+user.split('first_name\':')[1].split(',')[0]).replace('\'','')+','
  110.             u_bday=','
  111.             u_sex=','
  112.             u_country=','
  113.             u_city=','
  114.             u_mobile_phone=','
  115.             u_home_pnohe=''
  116.            
  117.             if 'bdate' in user:
  118.                 u_bday = user.split('bdate\':')[1].split(',')[0].replace('\'','')+','
  119.             if 'sex' in user:
  120.                 u_sex =  user.split('sex\':')[1].split(',')[0]+','
  121.             if 'mobile_phone' in user:
  122.                 u_mobile_phone=(user.split('mobile_phone\':')[1].split(',')[0]).replace('\'','')+','
  123.             if 'home_phone' in user:
  124.                 u_home_pnohe=user.split('home_phone\':')[1].split(',')[0].replace('\'','')
  125.             if 'country' in user:
  126.                 u_country=user.split('country\': {')[1].split(',')[0].replace('\'id\': ','')+'|'+user.split('country\': {')[1].split('title\':')[1].split('}')[0].replace(' ','').replace('\'','')+','
  127.             if 'city' in user:
  128.                 u_city=user.split('city\': {')[1].split(',')[0].replace('\'id\': ','')+'|'+user.split('city\': {')[1].split('title\':')[1].split('}')[0].replace(' ','').replace('\'','')+','
  129.  
  130.             u_result=(u_id+u_private+u_name+u_bday+u_sex+u_country+u_city+u_mobile_phone+u_home_pnohe).replace(' ','')
  131.             csv_result.write(u_result+'\n')
  132.         except:
  133.             continue
  134.     n+=100
  135.  
  136. print('result file was created by path:',csv_path_finale)
  137. Path.unlink(csv_path)
  138.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement