Advertisement
Guest User

Untitled

a guest
Jun 19th, 2019
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.65 KB | None | 0 0
  1. """
  2. Automatically makes and organizes new shelves from to-read shelf on goodreads.
  3.  
  4. TODO make functions instead of iterative script.
  5. """
  6. import json
  7. import math
  8. from typing import Dict, List, Set, Tuple
  9. from urllib.parse import urlencode
  10.  
  11. import pandas as pd
  12. import requests
  13. import xmltodict
  14. from betterreads.client import GoodreadsClient, GoodreadsRequest
  15. from betterreads.request import GoodreadsRequestException
  16.  
  17. # pandas==0.24.1
  18. # betterreads==0.4.2
  19.  
  20.  
  21. def force_list_request(self):
  22. resp = requests.get(self.host + self.path, params=self.params)
  23. if resp.status_code != 200:
  24. raise GoodreadsRequestException(resp.reason, self.path)
  25. if self.req_format == "xml":
  26. data_dict = xmltodict.parse(
  27. resp.content, dict_constructor=dict, force_list=["shelf"]
  28. )
  29. return data_dict["GoodreadsResponse"]
  30. elif self.req_format == "json":
  31. return json.loads(resp.content)
  32. else:
  33. raise Exception("Invalid format")
  34.  
  35.  
  36. # Override force_list in GoodreadsRequest.request
  37. GoodreadsRequest.request = force_list_request
  38.  
  39. CLIENT_KEY: str = input("Enter client key")
  40. CLIENT_SECRET: str = input("Enter client secret")
  41. USER_ID: int = input("Enter user id")
  42. BASE_URL: str = GoodreadsClient.base_url
  43. SHELF: str = "to-read"
  44. PER_PAGE: int = 200
  45. NUM_TOP_SHELVES: int = 5
  46. MIN_BOOKS_PER_SHELF: int = 3
  47. IGNORED_SHELVES: List[str] = [
  48. "to-read",
  49. "currently-reading",
  50. "read",
  51. "favorites",
  52. "owned",
  53. "books-i-own",
  54. "favourites",
  55. "to-buy",
  56. "owned-books",
  57. ]
  58.  
  59. gc = GoodreadsClient(client_key=CLIENT_KEY, client_secret=CLIENT_SECRET)
  60. gc.authenticate()
  61. # alias ouath authenticated post function
  62. gc_post = gc.session.session.post
  63.  
  64. review_list_kwargs = {
  65. "v": 2,
  66. "id": USER_ID,
  67. "page": 1,
  68. "key": CLIENT_KEY,
  69. "format": "xml",
  70. "shelf": SHELF,
  71. "per_page": 1, # get since item to get page count
  72. }
  73.  
  74. total_books: int = int(
  75. # gc.request_oauth is an oauth authenticated get function
  76. gc.request_oauth("review/list", **review_list_kwargs)["reviews"]["@total"]
  77. )
  78. if total_books == 0:
  79. exit()
  80.  
  81. book_ids_shelves: List[Tuple[int, str]] = []
  82. for page in range(1, math.ceil(total_books / PER_PAGE) + 1):
  83. review_list_kwargs.update()
  84. for review in gc.request_oauth(
  85. "review/list", {**review_list_kwargs, **{"page": page, "per_page": PER_PAGE}}
  86. )["reviews"]["review"]:
  87. book_id: int = int(review["book"]["id"]["#text"])
  88. popular_shelves: List[str] = list(
  89. filter(
  90. lambda s: s not in IGNORED_SHELVES,
  91. [shelf["@name"] for shelf in gc.book(book_id=book_id).popular_shelves],
  92. )
  93. )[:NUM_TOP_SHELVES]
  94. # If shelves is empty, ignore
  95. if not popular_shelves:
  96. continue
  97. book_ids_shelves.extend(
  98. list(zip([book_id] * len(popular_shelves), popular_shelves))
  99. )
  100.  
  101. df = pd.DataFrame(book_ids_shelves, columns=["book_id", "shelf"]).drop_duplicates()
  102.  
  103. shelves_to_ids: Dict[str, List[int]] = (
  104. df.groupby("shelf")
  105. .filter(lambda shelf: len(shelf["book_id"]) >= MIN_BOOKS_PER_SHELF)
  106. .groupby("shelf")["book_id"]
  107. .apply(list)
  108. .to_dict()
  109. )
  110.  
  111. user_shelves: Set[str] = {shelf.name for shelf in gc.user().shelves()}
  112. # Create missing shelves
  113. for shelf in set(shelves_to_ids.keys()).difference(user_shelves):
  114. gc_post(BASE_URL + "user_shelves.xml", data=urlencode({"user_shelf[name]": shelf}))
  115.  
  116. for shelf, ids in shelves_to_ids.items():
  117. gc_post(
  118. BASE_URL + "shelf/add_books_to_shelves.xml",
  119. data=urlencode(
  120. {"bookids": ",".join(str(_id) for _id in ids), "shelves": shelf}
  121. ),
  122. timeout=None, # default timeout of 300 will throw exception for large lists
  123. )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement