Untitled

"""
Automatically makes and organizes new shelves from to-read shelf on goodreads.

TODO make functions instead of iterative script.
"""
import json
import math
from typing import Dict, List, Set, Tuple
from urllib.parse import urlencode

import pandas as pd
import requests
import xmltodict
from betterreads.client import GoodreadsClient, GoodreadsRequest
from betterreads.request import GoodreadsRequestException

# pandas==0.24.1
# betterreads==0.4.2


def force_list_request(self):
    resp = requests.get(self.host + self.path, params=self.params)
    if resp.status_code != 200:
        raise GoodreadsRequestException(resp.reason, self.path)
    if self.req_format == "xml":
        data_dict = xmltodict.parse(
            resp.content, dict_constructor=dict, force_list=["shelf"]
        )
        return data_dict["GoodreadsResponse"]
    elif self.req_format == "json":
        return json.loads(resp.content)
    else:
        raise Exception("Invalid format")


# Override force_list in GoodreadsRequest.request
GoodreadsRequest.request = force_list_request

CLIENT_KEY: str = input("Enter client key")
CLIENT_SECRET: str = input("Enter client secret")
USER_ID: int = input("Enter user id")
BASE_URL: str = GoodreadsClient.base_url
SHELF: str = "to-read"
PER_PAGE: int = 200
NUM_TOP_SHELVES: int = 5
MIN_BOOKS_PER_SHELF: int = 3
IGNORED_SHELVES: List[str] = [
    "to-read",
    "currently-reading",
    "read",
    "favorites",
    "owned",
    "books-i-own",
    "favourites",
    "to-buy",
    "owned-books",
]

gc = GoodreadsClient(client_key=CLIENT_KEY, client_secret=CLIENT_SECRET)
gc.authenticate()
# alias ouath authenticated post function
gc_post = gc.session.session.post

review_list_kwargs = {
    "v": 2,
    "id": USER_ID,
    "page": 1,
    "key": CLIENT_KEY,
    "format": "xml",
    "shelf": SHELF,
    "per_page": 1,  # get since item to get page count
}

total_books: int = int(
    # gc.request_oauth is an oauth authenticated get function
    gc.request_oauth("review/list", **review_list_kwargs)["reviews"]["@total"]
)
if total_books == 0:
    exit()

book_ids_shelves: List[Tuple[int, str]] = []
for page in range(1, math.ceil(total_books / PER_PAGE) + 1):
    review_list_kwargs.update()
    for review in gc.request_oauth(
        "review/list", {**review_list_kwargs, **{"page": page, "per_page": PER_PAGE}}
    )["reviews"]["review"]:
        book_id: int = int(review["book"]["id"]["#text"])
        popular_shelves: List[str] = list(
            filter(
                lambda s: s not in IGNORED_SHELVES,
                [shelf["@name"] for shelf in gc.book(book_id=book_id).popular_shelves],
            )
        )[:NUM_TOP_SHELVES]
        # If shelves is empty, ignore
        if not popular_shelves:
            continue
        book_ids_shelves.extend(
            list(zip([book_id] * len(popular_shelves), popular_shelves))
        )

df = pd.DataFrame(book_ids_shelves, columns=["book_id", "shelf"]).drop_duplicates()

shelves_to_ids: Dict[str, List[int]] = (
    df.groupby("shelf")
    .filter(lambda shelf: len(shelf["book_id"]) >= MIN_BOOKS_PER_SHELF)
    .groupby("shelf")["book_id"]
    .apply(list)
    .to_dict()
)

user_shelves: Set[str] = {shelf.name for shelf in gc.user().shelves()}
# Create missing shelves
for shelf in set(shelves_to_ids.keys()).difference(user_shelves):
    gc_post(BASE_URL + "user_shelves.xml", data=urlencode({"user_shelf[name]": shelf}))

for shelf, ids in shelves_to_ids.items():
    gc_post(
        BASE_URL + "shelf/add_books_to_shelves.xml",
        data=urlencode(
            {"bookids": ",".join(str(_id) for _id in ids), "shelves": shelf}
        ),
        timeout=None,  # default timeout of 300 will throw exception for large lists
    )