Untitled

import os
import requests
import json
import sys


RESULT_FILE = "C:/Users/apskaita3/Finansų analizės ir valdymo sprendimai, UAB/Rokas Toomsalu - Power BI analitika/Integracijos/1_Public comapnies analytics/Databasesets/Others/market_news_helsinki5.json"  # Need to change!
LIMIT = 200  # Don't change!
DEBUG = True


def fetch(index: int) -> dict:
    url = f"https://api.news.eu.nasdaq.com/news/query.action"
    r = requests.get(
        url=url,
        params={
            "cnscategory": "",
            "company": "",
            "countResults": "false",
            "dateMask": "yyyy-MM-dd+HH:mm:ss",
            "dir": "ASC",
            "displayLanguage": "en",
            "freeText": "",
            "fromDate": "",
            "globalGroup": "exchangeNotice",
            "globalName": "NordicMainMarkets",
            "language": "en",
            "limit": LIMIT,
            "market": "Main Market, Helsinki",
            "showAttachments": "true",
            "showCnsSpecific": "true",
            "showCompany": "true",
            "start": index,
            "timeZone": "CET",
            "toDate": "",
            "type": "handleResponse",
        },
    )
    r.raise_for_status()
    return r.json()


if __name__ == "__main__":
    # Load data from file if file exists.
    if os.path.exists(RESULT_FILE):
        with open(RESULT_FILE) as f:
            results = json.loads(f.read())
    else:
        results = {"items": {}, "current_index": 0}

    # Load current page from file if data about current page exists.
    start = results["current_index"]

    # Do nothing if everything was fetched.
    if start >= 100:
        print(f"Nothing to fetch, aborting...")
        sys.exit(0)

    # Fetch new content.
    for i in range(start, 100, LIMIT):
        print(f"Grabbing entries {i} to {i+LIMIT}")

        data = fetch(i)
        for entry in data["results"]["item"]:
            headline = entry["headline"].strip()
            published = entry["published"]
            market = entry["market"]

            # Skip entries too old or from the wrong market.
            if (
                entry["market"] != "Main Market, Helsinki"
                and entry["published"] < "2021-10-20 06:30:00"
            ):
                print(
                    f"Found an entry of another market or before 2021-10-20, skipping entry..."
                )
                continue

            # Skip (but notify about!) already existing entries.
            if entry["disclosureId"] in results["items"]:
                msg = (
                    f"Found an entry with the same disclosureId ({entry['disclosureId']}), "
                    "skipping..."
                )
                print(msg)
                continue

            results["items"][entry["disclosureId"]] = {
                "company": entry["company"],
                "messageUrl": entry["messageUrl"],
                "published": entry["published"],
                "headline": headline,
            }

            if DEBUG:
                print(entry["disclosureId"], end=" ")

            # Store current state to restart later, if aborted in between.
            results["current_index"] = i + LIMIT

        # Update file with new data (+200 entries). The file needs to be
        # overwritten. That's the limitation of using JSON here.
        with open(RESULT_FILE, "w") as f:
            print(f"\nUpdating result with information from page {i+1}")
            json.dump(results, f)