Advertisement
Guest User

Untitled

a guest
Jan 5th, 2023
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.52 KB | None | 0 0
  1. import os
  2. import requests
  3. import json
  4. import sys
  5.  
  6.  
  7. RESULT_FILE = "C:/Users/apskaita3/Finansų analizės ir valdymo sprendimai, UAB/Rokas Toomsalu - Power BI analitika/Integracijos/1_Public comapnies analytics/Databasesets/Others/market_news_helsinki5.json" # Need to change!
  8. LIMIT = 200 # Don't change!
  9. DEBUG = True
  10.  
  11.  
  12. def fetch(index: int) -> dict:
  13. url = f"https://api.news.eu.nasdaq.com/news/query.action"
  14. r = requests.get(
  15. url=url,
  16. params={
  17. "cnscategory": "",
  18. "company": "",
  19. "countResults": "false",
  20. "dateMask": "yyyy-MM-dd+HH:mm:ss",
  21. "dir": "ASC",
  22. "displayLanguage": "en",
  23. "freeText": "",
  24. "fromDate": "",
  25. "globalGroup": "exchangeNotice",
  26. "globalName": "NordicMainMarkets",
  27. "language": "en",
  28. "limit": LIMIT,
  29. "market": "Main Market, Helsinki",
  30. "showAttachments": "true",
  31. "showCnsSpecific": "true",
  32. "showCompany": "true",
  33. "start": index,
  34. "timeZone": "CET",
  35. "toDate": "",
  36. "type": "handleResponse",
  37. },
  38. )
  39. r.raise_for_status()
  40. return r.json()
  41.  
  42.  
  43. if __name__ == "__main__":
  44. # Load data from file if file exists.
  45. if os.path.exists(RESULT_FILE):
  46. with open(RESULT_FILE) as f:
  47. results = json.loads(f.read())
  48. else:
  49. results = {"items": {}, "current_index": 0}
  50.  
  51. # Load current page from file if data about current page exists.
  52. start = results["current_index"]
  53.  
  54. # Do nothing if everything was fetched.
  55. if start >= 100:
  56. print(f"Nothing to fetch, aborting...")
  57. sys.exit(0)
  58.  
  59. # Fetch new content.
  60. for i in range(start, 100, LIMIT):
  61. print(f"Grabbing entries {i} to {i+LIMIT}")
  62.  
  63. data = fetch(i)
  64. for entry in data["results"]["item"]:
  65. headline = entry["headline"].strip()
  66. published = entry["published"]
  67. market = entry["market"]
  68.  
  69. # Skip entries too old or from the wrong market.
  70. if (
  71. entry["market"] != "Main Market, Helsinki"
  72. and entry["published"] < "2021-10-20 06:30:00"
  73. ):
  74. print(
  75. f"Found an entry of another market or before 2021-10-20, skipping entry..."
  76. )
  77. continue
  78.  
  79. # Skip (but notify about!) already existing entries.
  80. if entry["disclosureId"] in results["items"]:
  81. msg = (
  82. f"Found an entry with the same disclosureId ({entry['disclosureId']}), "
  83. "skipping..."
  84. )
  85. print(msg)
  86. continue
  87.  
  88. results["items"][entry["disclosureId"]] = {
  89. "company": entry["company"],
  90. "messageUrl": entry["messageUrl"],
  91. "published": entry["published"],
  92. "headline": headline,
  93. }
  94.  
  95. if DEBUG:
  96. print(entry["disclosureId"], end=" ")
  97.  
  98. # Store current state to restart later, if aborted in between.
  99. results["current_index"] = i + LIMIT
  100.  
  101. # Update file with new data (+200 entries). The file needs to be
  102. # overwritten. That's the limitation of using JSON here.
  103. with open(RESULT_FILE, "w") as f:
  104. print(f"\nUpdating result with information from page {i+1}")
  105. json.dump(results, f)
  106.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement