Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import requests
- import json
- import sys
- RESULT_FILE = "C:/Users/apskaita3/Finansų analizės ir valdymo sprendimai, UAB/Rokas Toomsalu - Power BI analitika/Integracijos/1_Public comapnies analytics/Databasesets/Others/market_news_helsinki5.json" # Need to change!
- LIMIT = 200 # Don't change!
- DEBUG = True
- def fetch(index: int) -> dict:
- url = f"https://api.news.eu.nasdaq.com/news/query.action"
- r = requests.get(
- url=url,
- params={
- "cnscategory": "",
- "company": "",
- "countResults": "false",
- "dateMask": "yyyy-MM-dd+HH:mm:ss",
- "dir": "ASC",
- "displayLanguage": "en",
- "freeText": "",
- "fromDate": "",
- "globalGroup": "exchangeNotice",
- "globalName": "NordicMainMarkets",
- "language": "en",
- "limit": LIMIT,
- "market": "Main Market, Helsinki",
- "showAttachments": "true",
- "showCnsSpecific": "true",
- "showCompany": "true",
- "start": index,
- "timeZone": "CET",
- "toDate": "",
- "type": "handleResponse",
- },
- )
- r.raise_for_status()
- return r.json()
- if __name__ == "__main__":
- # Load data from file if file exists.
- if os.path.exists(RESULT_FILE):
- with open(RESULT_FILE) as f:
- results = json.loads(f.read())
- else:
- results = {"items": {}, "current_index": 0}
- # Load current page from file if data about current page exists.
- start = results["current_index"]
- # Do nothing if everything was fetched.
- if start >= 100:
- print(f"Nothing to fetch, aborting...")
- sys.exit(0)
- # Fetch new content.
- for i in range(start, 100, LIMIT):
- print(f"Grabbing entries {i} to {i+LIMIT}")
- data = fetch(i)
- for entry in data["results"]["item"]:
- headline = entry["headline"].strip()
- published = entry["published"]
- market = entry["market"]
- # Skip entries too old or from the wrong market.
- if (
- entry["market"] != "Main Market, Helsinki"
- and entry["published"] < "2021-10-20 06:30:00"
- ):
- print(
- f"Found an entry of another market or before 2021-10-20, skipping entry..."
- )
- continue
- # Skip (but notify about!) already existing entries.
- if entry["disclosureId"] in results["items"]:
- msg = (
- f"Found an entry with the same disclosureId ({entry['disclosureId']}), "
- "skipping..."
- )
- print(msg)
- continue
- results["items"][entry["disclosureId"]] = {
- "company": entry["company"],
- "messageUrl": entry["messageUrl"],
- "published": entry["published"],
- "headline": headline,
- }
- if DEBUG:
- print(entry["disclosureId"], end=" ")
- # Store current state to restart later, if aborted in between.
- results["current_index"] = i + LIMIT
- # Update file with new data (+200 entries). The file needs to be
- # overwritten. That's the limitation of using JSON here.
- with open(RESULT_FILE, "w") as f:
- print(f"\nUpdating result with information from page {i+1}")
- json.dump(results, f)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement