Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- import scraper
- import re
- def flatten(xss):
- return [x for xs in xss for x in xs]
- def readUtf8(files: list[str]) -> list[str]:
- data = []
- for file in files:
- f = open(file, "rb")
- b = f.read()
- f.close()
- data.append(str(b, "utf-8"))
- return data
- keywords = re.compile(r'(?i)(?:(edge|spank|dildo|pee|special\s*spank|bondage\s*dildo|clothespins)\s*x?(\d+)?)x?|(?:x?(\d+)?x?\s*(edge|spank|dildo|pee|special\s*spank|bondage\s*dildo|clothespins))')
- tripleHour = re.compile(r'(?i)t{1,}r{1,}i{1,}p{1,}l{1,}e{1,} keyword')
- def fit_into_numbers(x: int, numbers: list[int]):
- window_size = 2
- numbers.reverse()
- for i in range(len(numbers) - window_size + 1):
- items = numbers[i: i + window_size]
- if x >= items[1] and x < items[0]:
- return items[1]
- keywordValues = {
- 0: {
- "edge": 2,
- "spank": 10,
- "dildo": 5,
- "-keywords": 3,
- },
- 100: {
- "edge": 4,
- "spank": 10,
- "dildo": 5,
- "-keywords": 3,
- },
- 250: {
- "edge": 4,
- "spank": 20,
- "dildo": 5,
- "-keywords": 3,
- },
- 500: {
- "edge": 8,
- "spank": 20,
- "dildo": 10,
- "-keywords": 3,
- },
- 750: {
- "edge": 8,
- "spank": 20,
- "dildo": 10,
- "-keywords": 6,
- },
- 1000: {
- "edge": 8,
- "spank": 20,
- "dildo": 10,
- "-keywords": 6,
- },
- 1250: {
- "edge": 8,
- "spank": 20,
- "dildo": 10,
- "-keywords": 6,
- },
- 1500: {
- "edge": 8,
- "spank": 20,
- "dildo": 10,
- "-keywords": 6,
- },
- 2000: {
- "edge": 8,
- "spank": 20,
- "dildo": 10,
- "-keywords": 6,
- },
- 2500: {
- "edge": 8,
- "spank": 20,
- "dildo": 10,
- "pee": 1,
- "-keywords": 6,
- },
- 3000: {
- "edge": 16,
- "spank": 40,
- "dildo": 20,
- "pee": 2,
- "-keywords": 6,
- },
- 3500: {
- "edge": 16,
- "spank": 40,
- "dildo": 20,
- "pee": 2,
- "special spank": 10,
- "-keywords": 6,
- },
- 4000: {
- "edge": 16,
- "spank": 40,
- "dildo": 20,
- "pee": 2,
- "special spank": 10,
- "bondage dildo": 10,
- "-keywords": 6,
- },
- 4500: {
- "edge": 16,
- "spank": 40,
- "dildo": 20,
- "pee": 2,
- "special spank": 10,
- "bondage dildo": 10,
- "-keywords": 6,
- },
- 5000: {
- "edge": 16,
- "spank": 40,
- "dildo": 20,
- "pee": 2,
- "special spank": 10,
- "bondage dildo": 10,
- "-keywords": 6,
- },
- 6000: {
- "edge": 16,
- "spank": 40,
- "dildo": 20,
- "pee": 2,
- "special spank": 10,
- "bondage dildo": 10,
- "clothespins": 5,
- "-keywords": 6,
- },
- 7000: {
- "edge": 16,
- "spank": 40,
- "dildo": 20,
- "pee": 2,
- "special spank": 10,
- "bondage dildo": 10,
- "clothespins": 5,
- "-keywords": 8,
- },
- 8000: {
- "edge": 16,
- "spank": 40,
- "dildo": 20,
- "pee": 2,
- "special spank": 10,
- "bondage dildo": 10,
- "clothespins": 5,
- "-keywords": 6,
- },
- 9000: {
- "edge": 32,
- "spank": 80,
- "dildo": 40,
- "pee": 4,
- "special spank": 20,
- "bondage dildo": 20,
- "clothespins": 10,
- "-keywords": 6,
- },
- 10000: {
- "edge": 32,
- "spank": 80,
- "dildo": 40,
- "pee": 4,
- "special spank": 20,
- "bondage dildo": 20,
- "clothespins": 10,
- "-keywords": 6,
- },
- }
- if __name__ == "__main__":
- pages = readUtf8([str(x) + ".html" for x in range(477, 478)])
- posts = flatten([scraper.get_posts(x) for x in pages])
- first = min([post.post_number for post in posts])
- last = max([post.post_number for post in posts])
- print(f"Going from post #{first} to #{last}")
- total = {
- "edge": 0,
- "spank": 0,
- "dildo": 0,
- "pee": 0,
- "special spank": 0,
- "bondage dildo": 0,
- "clothespins": 0,
- }
- # tkh = [post for post in posts if post.post_number == 7179][0]
- # print(tkh)
- # print(tkh.author == "Snoek")
- # print()
- triple_hours = [post.time for post in posts if post.author == "Snoek" and len(tripleHour.findall(post.content)) > 0]
- print(triple_hours)
- for post in posts:
- rules = keywordValues[fit_into_numbers(post.post_number, list(keywordValues.keys()))]
- # print("Current rules " + str(rules))
- inpost = {
- "edge": 0,
- "spank": 0,
- "dildo": 0,
- "pee": 0,
- "special spank": 0,
- "bondage dildo": 0,
- "clothespins": 0,
- }
- for match in keywords.finditer(post.content):
- groups = match.groups()
- word = [group for group in groups if group is not None and not group.isdigit()][0]
- count = [group for group in groups if group is not None and group.isdigit()]
- if word.lower() not in inpost:
- raise "Unknown word " + word.lower()
- if len(count) > 1:
- # print(f"Adding {count} to '{word.lower()}'")
- inpost[word.lower()] += count * rules[word.lower()]
- else:
- # print(f"Adding 1 to '{word.lower()}'")
- inpost[word.lower()] += 1 * rules[word.lower()]
- keywordsUsed = sum(inpost.values())
- if len([x for x in triple_hours if (post.time - x).total_seconds() > 0]) > 0:
- for key in inpost.keys():
- inpost[key] *= 3
- inpost["edge"] += rules["edge"] * 2
- for key in inpost.keys():
- total[key] += inpost[key]
- print(total)
Add Comment
Please, Sign In to add comment