Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import re
- from time import sleep
- def get_cache():
- text = requests.get("https://en.wiktionary.org/?curid=8756089&action=raw").text
- lines = text.split("\n")
- a = lines.index("==Valid==")
- b = lines.index("==Broken==")
- c = lines.index("==Special cases==")
- link = r"\[\[(.*?)\]\]"
- kbd = r"<kbd>(.*?)</kbd> *\|\| *"
- valid = re.findall(link, "".join(lines[a:b]))
- broken = re.findall(link, "".join(lines[b:c]))
- valid += re.findall(kbd + "valid", "".join(lines[c:]))
- broken += re.findall(kbd + "broken", "".join(lines[c:]))
- return set(valid), set(broken)
- def get_links(cont=None):
- url = "https://en.wiktionary.org/w/api.php"
- params = {
- "action": "query",
- "format": "json",
- "list": "exturlusage",
- "euquery": "www.ancestry.com",
- "eulimit": 500,
- "eunamespace": 0,
- "eucontinue": cont,
- }
- data = requests.get(url, params).json()
- results = data["query"]["exturlusage"]
- if not results:
- return []
- if "continue" not in data:
- return results
- return results + get_links(data["continue"]["eucontinue"])
- def check_link(url, query, cache=None, retries=5):
- if cache:
- valid, broken = cache
- if query in valid:
- return "valid"
- if query in broken:
- return "broken"
- try:
- sleep(1)
- r = requests.get(url).text
- except:
- if retries <= 0:
- return None
- sleep(60)
- return check_link(query, cache, retries - 1)
- return "valid" if "Dictionary of American Family Names" in r else "broken"
- def check_all_links():
- cache = get_cache()
- simple_valid = []
- simple_broken = []
- special = []
- links = get_links()
- for link in links:
- if not "www.ancestry.com/name-origin?surname=" in link["url"]:
- continue
- surname = link["url"].split("?surname=")[1]
- status = check_link(link["url"], surname, cache)
- if surname == link["title"] and status == "valid":
- simple_valid.append(f"[[{surname}]]")
- elif surname == link["title"] and status == "broken":
- simple_broken.append(f"[[{surname}]]")
- else:
- special.append(
- f"|-\n| [[{link['title']}]] || <kbd>{surname}</kbd> || {status}"
- )
- simple_valid = ", ".join(simple_valid)
- simple_broken = ", ".join(simple_broken)
- special = "\n".join(special)
- collapse_top = "{{collapse-top|class=custom|title=Pages}}"
- collapse_bottom = "{{collapse-bottom}}"
- print(
- f"""
- ==Valid==
- {collapse_top}
- {simple_valid}
- {collapse_bottom}
- ==Broken==
- {collapse_top}
- {simple_broken}
- {collapse_bottom}
- ==Special cases==
- {{| class="wikitable sortable"
- ! Page !! Query !! Status
- {special}
- |}}"""
- )
- check_all_links()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement