Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Automatically makes and organizes new shelves from to-read shelf on goodreads.
- TODO make functions instead of iterative script.
- """
- import json
- import math
- from typing import Dict, List, Set, Tuple
- from urllib.parse import urlencode
- import pandas as pd
- import requests
- import xmltodict
- from betterreads.client import GoodreadsClient, GoodreadsRequest
- from betterreads.request import GoodreadsRequestException
- # pandas==0.24.1
- # betterreads==0.4.2
- def force_list_request(self):
- resp = requests.get(self.host + self.path, params=self.params)
- if resp.status_code != 200:
- raise GoodreadsRequestException(resp.reason, self.path)
- if self.req_format == "xml":
- data_dict = xmltodict.parse(
- resp.content, dict_constructor=dict, force_list=["shelf"]
- )
- return data_dict["GoodreadsResponse"]
- elif self.req_format == "json":
- return json.loads(resp.content)
- else:
- raise Exception("Invalid format")
- # Override force_list in GoodreadsRequest.request
- GoodreadsRequest.request = force_list_request
- CLIENT_KEY: str = input("Enter client key")
- CLIENT_SECRET: str = input("Enter client secret")
- USER_ID: int = input("Enter user id")
- BASE_URL: str = GoodreadsClient.base_url
- SHELF: str = "to-read"
- PER_PAGE: int = 200
- NUM_TOP_SHELVES: int = 5
- MIN_BOOKS_PER_SHELF: int = 3
- IGNORED_SHELVES: List[str] = [
- "to-read",
- "currently-reading",
- "read",
- "favorites",
- "owned",
- "books-i-own",
- "favourites",
- "to-buy",
- "owned-books",
- ]
- gc = GoodreadsClient(client_key=CLIENT_KEY, client_secret=CLIENT_SECRET)
- gc.authenticate()
- # alias ouath authenticated post function
- gc_post = gc.session.session.post
- review_list_kwargs = {
- "v": 2,
- "id": USER_ID,
- "page": 1,
- "key": CLIENT_KEY,
- "format": "xml",
- "shelf": SHELF,
- "per_page": 1, # get since item to get page count
- }
- total_books: int = int(
- # gc.request_oauth is an oauth authenticated get function
- gc.request_oauth("review/list", **review_list_kwargs)["reviews"]["@total"]
- )
- if total_books == 0:
- exit()
- book_ids_shelves: List[Tuple[int, str]] = []
- for page in range(1, math.ceil(total_books / PER_PAGE) + 1):
- review_list_kwargs.update()
- for review in gc.request_oauth(
- "review/list", {**review_list_kwargs, **{"page": page, "per_page": PER_PAGE}}
- )["reviews"]["review"]:
- book_id: int = int(review["book"]["id"]["#text"])
- popular_shelves: List[str] = list(
- filter(
- lambda s: s not in IGNORED_SHELVES,
- [shelf["@name"] for shelf in gc.book(book_id=book_id).popular_shelves],
- )
- )[:NUM_TOP_SHELVES]
- # If shelves is empty, ignore
- if not popular_shelves:
- continue
- book_ids_shelves.extend(
- list(zip([book_id] * len(popular_shelves), popular_shelves))
- )
- df = pd.DataFrame(book_ids_shelves, columns=["book_id", "shelf"]).drop_duplicates()
- shelves_to_ids: Dict[str, List[int]] = (
- df.groupby("shelf")
- .filter(lambda shelf: len(shelf["book_id"]) >= MIN_BOOKS_PER_SHELF)
- .groupby("shelf")["book_id"]
- .apply(list)
- .to_dict()
- )
- user_shelves: Set[str] = {shelf.name for shelf in gc.user().shelves()}
- # Create missing shelves
- for shelf in set(shelves_to_ids.keys()).difference(user_shelves):
- gc_post(BASE_URL + "user_shelves.xml", data=urlencode({"user_shelf[name]": shelf}))
- for shelf, ids in shelves_to_ids.items():
- gc_post(
- BASE_URL + "shelf/add_books_to_shelves.xml",
- data=urlencode(
- {"bookids": ",".join(str(_id) for _id in ids), "shelves": shelf}
- ),
- timeout=None, # default timeout of 300 will throw exception for large lists
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement