Try95th

danskfolkeparti_api so_q_74939979

Dec 28th, 2022 (edited)
183
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 11.17 KB | None | 0 0
  1. ## for https://stackoverflow.com/q/74939979/6146136
  2. ## sample outputs [as csv] on Sheet "[API] dfn_prTimeline_api" at
  3. #### https://docs.google.com/spreadsheets/d/1yiDCrMi3Sog7qLcT_6pS-71xL1FkgNQnOS5ngPlMf_M
  4.  
  5. import  cloudscraper
  6. from datetime import datetime
  7.  
  8.  
  9. ########################################## [needed for  main scraper] ##########################################
  10. ## part of getNestedVal [ at https://pastebin.com/NRxKRJPn ] ##
  11. ## will be called by danskfolkeparti_apiScraper to get values from json according to pathsRef ##
  12. def getVal_byPath(obj, kPath:list, printError=False, returnError=False):
  13.     try:
  14.         for k in kPath: obj = obj[k]
  15.         return obj
  16.     except Exception as e:
  17.         errMsg = f'could not access {kPath} - {e}'
  18.         if printError: print(errMsg)
  19.         return errMsg if returnError else None
  20. ################################################################################################################
  21.  
  22.  
  23. ################################################# MAIN SCRAPER #################################################
  24. def danskfolkeparti_apiScraper(searchFor={}, perPage=99, pathsRef={}):
  25.     searchFor = searchFor if isinstance(searchFor, (dict, str)) else {}
  26.     perPage = perPage if isinstance(perPage, int) and 0 < perPage < 100 else 99
  27.     qParams = {
  28.         'categories': '20,85,15,83,73,84,', 'before': '2022-12-28T23:59:59',
  29.         'after': '1990-05-08T01:01:01', 'per_page': perPage, 'page': '1'
  30.     }
  31.     pathsRef =  None if not isinstance(pathsRef, dict) else ({
  32.         'id': ['id'], 'title': ['title', 'rendered'], 'url': ['link'],
  33.         'date': ['date'], # ['date_gmt'],
  34.         'modified': ['modified'], # ['modified_gmt'],
  35.         'category': ['categories'], 'excerpt': ['acf', 'excerpt'],
  36.         'content': ['content', 'rendered'] # html
  37.     } if pathsRef == {} else pathsRef)
  38.     wMsgs = []
  39.  
  40.     #### PREPARE QUERY PARAMETERS ####
  41.     if isinstance(searchFor, str):
  42.         ## search keywords --> fetch postIds for query ##
  43.         sf, srj, errMsg = searchFor.replace(' ', '%20'), None, None
  44.         sUrl = f'https://danskfolkeparti.dk/wp-json/wp/v2/search'
  45.         sUrl = f'{sUrl}?search={sf}&per_page={perPage}'
  46.         sResp = cloudscraper.create_scraper().get(sUrl)
  47.         rsMsg = f'{sResp.status_code} {sResp.reason}'
  48.         try:
  49.             sResp.raise_for_status() # check for request error
  50.             srj, pIds = sResp.json(), []
  51.             if srj and isinstance(srj, list):
  52.                 pIds = [p['id'] for p in sResp.json() if isinstance(
  53.                         p['id'], int) and type(p['type'] == 'post')]
  54.             wMsgs += ([{
  55.                 'msg': f'no postIds - "{searchFor}"','search_json': srj
  56.             }] if not pIds else [])
  57.  
  58.             qParams = {
  59.                 'include': ''.join([f'{p},' for p in pIds]),
  60.                 'per_page': perPage
  61.             }
  62.         except Exception as e:
  63.             return [], {'error': f'searchError: {type(e)} {e}',
  64.                         'warnings':wMsgs, 'response_status':rsMsg}  
  65.     else:
  66.         ## no search keywords --> replace default parameters with VALID inputs ##
  67.         for k, v in searchFor.items():
  68.             try:
  69.                 if k in ['before', 'after']:
  70.                     qParams[k] = datetime.fromisoformat(v).isoformat()
  71.                 elif k == 'categories':
  72.                     if isinstance(v, str):
  73.                         v = [int(i) for i in v.split(',') if i.isdigit()]
  74.                     v = [i for i in v if isinstance(i, int)]
  75.                     if v and isinstance(v, list): qParams[k] = v
  76.                 elif k == 'page' and int(v) > 0: qParams[k] = int(v)
  77.                 else: continue # pass
  78.             except: continue # pass
  79.    
  80.     #### PREPARE API URL FOR QUERY AND CALL API ####
  81.     qStr = '&'.join([f'{k}={v}' for k, v in qParams.items()])
  82.     api_url = f'https://danskfolkeparti.dk/wp-json/wp/v2/posts?{qStr}'
  83.  
  84.     apiResp = cloudscraper.create_scraper().get(api_url)
  85.     rsMsg = f'{apiResp.status_code} {apiResp.reason}'
  86.     try:
  87.         apiResp.raise_for_status() # check for request error
  88.         arjData = apiResp.json()
  89.     except Exception as e:
  90.         return [], {'error': f'searchError: {type(e)} {e}',
  91.                     'warnings': wMsgs, 'response_status': rsMsg}
  92.    
  93.     #### EXTRACT FROM API RESPONSE ACCORDING TO pathsRef ARGUMENT ####
  94.     if not isinstance(arjData, list): tprList = []
  95.     elif pathsRef:
  96.         tprList = [{
  97.             k: getVal_byPath(pr, v) for k, v in pathsRef.items()
  98.         } for pr in arjData if 'id' in pr]
  99.     else: tprList = arjData[:] ## set pathsRef=None to jet full json response
  100.    
  101.     if not tprList:
  102.         wMsgs.append({'msg': 'no posts with ids', 'json': arjData})
  103.    
  104.     wMsgs = wMsgs if wMsgs else None
  105.     return tprList, {'error':None, 'warnings':wMsgs, 'response_status':rsMsg}
  106. ################################################################################################################
  107.  
  108.  
  109. ################################################ GET CATEGORIES ################################################
  110. def danskfolkeparti_categories(cidList='all', printCategories=False):
  111.     cat_url = 'https://danskfolkeparti.dk/wp-json/wp/v2/categories?per_page=50'
  112.     catResp = cloudscraper.create_scraper().get(cat_url)
  113.     catResp.raise_for_status()
  114.     carjData = catResp.json()
  115.  
  116.     cidList = cidList if cidList and isinstance(cidList, list) else 'all'
  117.     catList = [c for c in carjData if cidList == 'all' or c['id'] in cidList]
  118.     mIds = [i for i in cidList if not [c for c in catList if c['id'] == i]]
  119.     if mIds and printCategories: print('could not find IDs: ', mIds)
  120.     clen = len(catList)
  121.  
  122.     for ci, c in enumerate(catList):
  123.         cpi = c['parent'] if 'parent' in c else None
  124.         cpn = [p['name'] for p in carjData if p['id'] == cpi]
  125.         catList[ci]['parent_name'] = cpn[0] if cpn else None
  126.  
  127.         if not printCategories: continue
  128.         cpn = '' if cpi is None else f"<-- [{cpi}] {cpn[0] if cpn else ''}"
  129.         ci = f"[category {ci+1:>2} of {clen}]"
  130.         print(f"# {ci} {c['id']:>2} --> {c['name']} {cpn}")
  131.    
  132.     return [{'id': i, 'error': 'not found'} for i in mIds] + [{
  133.         k: v for k, v in c.items() if not isinstance(v, (list, dict))
  134.     } for c in catList]
  135. ################################################################################################################
  136.  
  137.  
  138. ################################################ CATEGORY NAMES ################################################
  139. ############################## examples: ##############################
  140. # danskfolkeparti_catNames(20) #--> "Dansk Folkeblad"
  141. # danskfolkeparti_catNames([20, 85]) #--> ['Dansk Folkeblad', 'Fokus']
  142. # danskfolkeparti_catNames(85, namePar=True) #--> "Fokus [Nyheder]"
  143. #######################################################################
  144. def danskfolkeparti_catNames(catIds,namePar=False,cDict=None,printErr=False):
  145.     try:
  146.         if not isinstance(cDict, dict):
  147.             catsList = danskfolkeparti_categories()
  148.             cDict = {c['id']: c for c in catsList if 'id' in c}
  149.         if isinstance(catIds, list) and len(catIds) == 1: catIds = catIds[0]
  150.  
  151.         if isinstance(catIds, list):
  152.             if not catIds: return None
  153.             return [danskfolkeparti_catNames(
  154.                 c, namePar=namePar, cDict=cDict, printErr=printErr
  155.             ) for c in catIds]
  156.        
  157.         if catIds in cDict and 'name' in cDict[catIds]:
  158.             cn, cpn = cDict[catIds]['name'], None
  159.             if 'parent_name' in cDict[catIds]:
  160.                 cpn = cDict[catIds]['parent_name']
  161.             return f'{cn} [{cpn}]' if namePar and cpn else cn
  162.     except Exception as e:
  163.         if printErr: print(f'{type(e)} {e}')
  164.     return catIds
  165. ################################################################################################################
  166.  
  167.  
  168. ################################################################################################################
  169. ################################################ ALL CATEGORIES ################################################
  170. # danskfolkeparti_categories() ## prints all categories
  171.  
  172. # [category  1 of 47] 22 --> Ansat <-- [0]
  173. # [category  2 of 47] 54 --> Arrangement <-- [53] Kategorier - lokalforeninger
  174. # [category  3 of 47] 64 --> Artikler <-- [53] Kategorier - lokalforeninger
  175. # [category  4 of 47] 43 --> Byrådskandidat <-- [18] Politiker
  176. # [category  5 of 47] 42 --> Byrådsmedlem <-- [18] Politiker
  177. # [category  6 of 47] 20 --> Dansk Folkeblad <-- [31] Nyheder
  178. # [category  7 of 47] 85 --> Fokus <-- [31] Nyheder
  179. # [category  8 of 47] 40 --> Folketingskandidat <-- [18] Politiker
  180. # [category  9 of 47] 39 --> Folketingsmedlem <-- [18] Politiker
  181. # [category 10 of 47] 62 --> Forside <-- [53] Kategorier - lokalforeninger
  182. # [category 11 of 47] 47 --> Hovedbestyrelsen <-- [18] Politiker
  183. # [category 12 of 47] 61 --> Hovedmenu <-- [53] Kategorier - lokalforeninger
  184. # [category 13 of 47] 86 --> Kampagne <-- [0]
  185. # [category 14 of 47] 49 --> Kandidat til Europa-Parlamentsvalg <-- [18] Politiker
  186. # [category 15 of 47] 50 --> Kandidat til regionsrådet <-- [18] Politiker
  187. # [category 16 of 47] 53 --> Kategorier - lokalforeninger <-- [0]
  188. # [category 17 of 47] 28 --> Kurser for nye medlemmer <-- [27] Kurser og Arrangementer
  189. # [category 18 of 47] 27 --> Kurser og Arrangementer <-- [0]
  190. # [category 19 of 47] 41 --> Lokalbestyrelsesmedlem <-- [18] Politiker
  191. # [category 20 of 47] 46 --> Lokalformand <-- [18] Politiker
  192. # [category 21 of 47] 65 --> Lokalnæstformand <-- [18] Politiker
  193. # [category 22 of 47] 51 --> Medlem af Europa-Parlamentet <-- [18] Politiker
  194. # [category 23 of 47] 15 --> Mortens Nyhedsbrev <-- [31] Nyheder
  195. # [category 24 of 47] 29 --> new test <-- [27] Kurser og Arrangementer
  196. # [category 25 of 47] 31 --> Nyheder <-- [0]
  197. # [category 26 of 47] 26 --> Partiets historie <-- [0]
  198. # [category 27 of 47] 83 --> Pias Blog <-- [31] Nyheder
  199. # [category 28 of 47] 63 --> Politik <-- [53] Kategorier - lokalforeninger
  200. # [category 29 of 47]  4 --> Politik <-- [0]
  201. # [category 30 of 47] 18 --> Politiker <-- [0]
  202. # [category 31 of 47] 59 --> Politiske Assistenter <-- [22] Ansat
  203. # [category 32 of 47] 73 --> Politiske udspil <-- [31] Nyheder
  204. # [category 33 of 47] 57 --> Presseafdeling <-- [22] Ansat
  205. # [category 34 of 47] 84 --> Pressemeddelelser <-- [31] Nyheder
  206. # [category 35 of 47] 34 --> Region Hovedstaden <-- [33] Regioner - Lokalforening
  207. # [category 36 of 47] 37 --> Region Midtjylland <-- [33] Regioner - Lokalforening
  208. # [category 37 of 47] 38 --> Region Nordjylland <-- [33] Regioner - Lokalforening
  209. # [category 38 of 47] 35 --> Region Sjælland <-- [33] Regioner - Lokalforening
  210. # [category 39 of 47] 36 --> Region Syddanmark <-- [33] Regioner - Lokalforening
  211. # [category 40 of 47] 33 --> Regioner - Lokalforening <-- [0]
  212. # [category 41 of 47] 48 --> Regionsrådsmedlem <-- [18] Politiker
  213. # [category 42 of 47] 60 --> Sekretærer <-- [22] Ansat
  214. # [category 43 of 47] 58 --> Sekretariatet <-- [22] Ansat
  215. # [category 44 of 47] 30 --> test pdf <-- [0]
  216. # [category 45 of 47] 16 --> Ugebrev <-- [0]
  217. # [category 46 of 47]  1 --> Uncategorized <-- [0]
  218. # [category 47 of 47] 87 --> Underside <-- [53] Kategorier - lokalforeninger
Advertisement
Add Comment
Please, Sign In to add comment