Guest User

Untitled

a guest
Nov 23rd, 2024
18
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.03 KB | None | 0 0
  1. import json
  2. import scraper
  3. import re
  4.  
  5. def flatten(xss):
  6. return [x for xs in xss for x in xs]
  7.  
  8. def readUtf8(files: list[str]) -> list[str]:
  9. data = []
  10. for file in files:
  11. f = open(file, "rb")
  12. b = f.read()
  13. f.close()
  14. data.append(str(b, "utf-8"))
  15.  
  16. return data
  17.  
  18. keywords = re.compile(r'(?i)(?:(edge|spank|dildo|pee|special\s*spank|bondage\s*dildo|clothespins)\s*x?(\d+)?)x?|(?:x?(\d+)?x?\s*(edge|spank|dildo|pee|special\s*spank|bondage\s*dildo|clothespins))')
  19.  
  20. tripleHour = re.compile(r'(?i)t{1,}r{1,}i{1,}p{1,}l{1,}e{1,} keyword')
  21.  
  22. def fit_into_numbers(x: int, numbers: list[int]):
  23. window_size = 2
  24. numbers.reverse()
  25. for i in range(len(numbers) - window_size + 1):
  26. items = numbers[i: i + window_size]
  27. if x >= items[1] and x < items[0]:
  28. return items[1]
  29.  
  30. keywordValues = {
  31. 0: {
  32. "edge": 2,
  33. "spank": 10,
  34. "dildo": 5,
  35. "-keywords": 3,
  36. },
  37. 100: {
  38. "edge": 4,
  39. "spank": 10,
  40. "dildo": 5,
  41. "-keywords": 3,
  42. },
  43. 250: {
  44. "edge": 4,
  45. "spank": 20,
  46. "dildo": 5,
  47. "-keywords": 3,
  48. },
  49. 500: {
  50. "edge": 8,
  51. "spank": 20,
  52. "dildo": 10,
  53. "-keywords": 3,
  54. },
  55. 750: {
  56. "edge": 8,
  57. "spank": 20,
  58. "dildo": 10,
  59. "-keywords": 6,
  60. },
  61. 1000: {
  62. "edge": 8,
  63. "spank": 20,
  64. "dildo": 10,
  65. "-keywords": 6,
  66. },
  67. 1250: {
  68. "edge": 8,
  69. "spank": 20,
  70. "dildo": 10,
  71. "-keywords": 6,
  72. },
  73. 1500: {
  74. "edge": 8,
  75. "spank": 20,
  76. "dildo": 10,
  77. "-keywords": 6,
  78. },
  79. 2000: {
  80. "edge": 8,
  81. "spank": 20,
  82. "dildo": 10,
  83. "-keywords": 6,
  84. },
  85. 2500: {
  86. "edge": 8,
  87. "spank": 20,
  88. "dildo": 10,
  89. "pee": 1,
  90. "-keywords": 6,
  91. },
  92. 3000: {
  93. "edge": 16,
  94. "spank": 40,
  95. "dildo": 20,
  96. "pee": 2,
  97. "-keywords": 6,
  98. },
  99. 3500: {
  100. "edge": 16,
  101. "spank": 40,
  102. "dildo": 20,
  103. "pee": 2,
  104. "special spank": 10,
  105. "-keywords": 6,
  106. },
  107. 4000: {
  108. "edge": 16,
  109. "spank": 40,
  110. "dildo": 20,
  111. "pee": 2,
  112. "special spank": 10,
  113. "bondage dildo": 10,
  114. "-keywords": 6,
  115. },
  116. 4500: {
  117. "edge": 16,
  118. "spank": 40,
  119. "dildo": 20,
  120. "pee": 2,
  121. "special spank": 10,
  122. "bondage dildo": 10,
  123. "-keywords": 6,
  124. },
  125. 5000: {
  126. "edge": 16,
  127. "spank": 40,
  128. "dildo": 20,
  129. "pee": 2,
  130. "special spank": 10,
  131. "bondage dildo": 10,
  132. "-keywords": 6,
  133. },
  134. 6000: {
  135. "edge": 16,
  136. "spank": 40,
  137. "dildo": 20,
  138. "pee": 2,
  139. "special spank": 10,
  140. "bondage dildo": 10,
  141. "clothespins": 5,
  142. "-keywords": 6,
  143. },
  144. 7000: {
  145. "edge": 16,
  146. "spank": 40,
  147. "dildo": 20,
  148. "pee": 2,
  149. "special spank": 10,
  150. "bondage dildo": 10,
  151. "clothespins": 5,
  152. "-keywords": 8,
  153. },
  154. 8000: {
  155. "edge": 16,
  156. "spank": 40,
  157. "dildo": 20,
  158. "pee": 2,
  159. "special spank": 10,
  160. "bondage dildo": 10,
  161. "clothespins": 5,
  162. "-keywords": 6,
  163. },
  164. 9000: {
  165. "edge": 32,
  166. "spank": 80,
  167. "dildo": 40,
  168. "pee": 4,
  169. "special spank": 20,
  170. "bondage dildo": 20,
  171. "clothespins": 10,
  172. "-keywords": 6,
  173. },
  174. 10000: {
  175. "edge": 32,
  176. "spank": 80,
  177. "dildo": 40,
  178. "pee": 4,
  179. "special spank": 20,
  180. "bondage dildo": 20,
  181. "clothespins": 10,
  182. "-keywords": 6,
  183. },
  184. }
  185.  
  186. if __name__ == "__main__":
  187. pages = readUtf8([str(x) + ".html" for x in range(477, 478)])
  188. posts = flatten([scraper.get_posts(x) for x in pages])
  189. first = min([post.post_number for post in posts])
  190. last = max([post.post_number for post in posts])
  191. print(f"Going from post #{first} to #{last}")
  192.  
  193. total = {
  194. "edge": 0,
  195. "spank": 0,
  196. "dildo": 0,
  197. "pee": 0,
  198. "special spank": 0,
  199. "bondage dildo": 0,
  200. "clothespins": 0,
  201. }
  202.  
  203. # tkh = [post for post in posts if post.post_number == 7179][0]
  204. # print(tkh)
  205. # print(tkh.author == "Snoek")
  206. # print()
  207. triple_hours = [post.time for post in posts if post.author == "Snoek" and len(tripleHour.findall(post.content)) > 0]
  208. print(triple_hours)
  209.  
  210. for post in posts:
  211. rules = keywordValues[fit_into_numbers(post.post_number, list(keywordValues.keys()))]
  212. # print("Current rules " + str(rules))
  213. inpost = {
  214. "edge": 0,
  215. "spank": 0,
  216. "dildo": 0,
  217. "pee": 0,
  218. "special spank": 0,
  219. "bondage dildo": 0,
  220. "clothespins": 0,
  221. }
  222. for match in keywords.finditer(post.content):
  223. groups = match.groups()
  224. word = [group for group in groups if group is not None and not group.isdigit()][0]
  225. count = [group for group in groups if group is not None and group.isdigit()]
  226. if word.lower() not in inpost:
  227. raise "Unknown word " + word.lower()
  228. if len(count) > 1:
  229. # print(f"Adding {count} to '{word.lower()}'")
  230. inpost[word.lower()] += count * rules[word.lower()]
  231. else:
  232. # print(f"Adding 1 to '{word.lower()}'")
  233. inpost[word.lower()] += 1 * rules[word.lower()]
  234.  
  235. keywordsUsed = sum(inpost.values())
  236.  
  237. if len([x for x in triple_hours if (post.time - x).total_seconds() > 0]) > 0:
  238. for key in inpost.keys():
  239. inpost[key] *= 3
  240.  
  241. inpost["edge"] += rules["edge"] * 2
  242.  
  243. for key in inpost.keys():
  244. total[key] += inpost[key]
  245.  
  246. print(total)
Add Comment
Please, Sign In to add comment