Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pywikibot
- import re
- from bs4 import BeautifulSoup
- from datetime import date, timedelta
- from requests import get
- wp = pywikibot.Site()
- charts = [('adult-pop-songs', 'Adult Top 40', 'Adult Pop Airplay', 'song'),
- ('adult-contemporary', 'Adult Contemporary (chart)', 'Adult Contemporary', 'song'),
- ('pop-songs', 'Pop Airplay', 'Pop Songs Chart', 'song'),
- ('hot-100', 'Billboard Hot 100', 'Billboard Hot 100', 'song'),
- ('digital-song-sales', 'Digital Songs', 'Digital Song Sales', 'song'),
- ('country-songs', 'Hot Country Songs', 'Hot Country Songs', 'song'),
- ('country-airplay', 'Country Airplay', 'Country Airplay', 'song'),
- ('hot-mainstream-rock-tracks', 'Mainstream Rock (chart)', 'Mainstream Rock Airplay', 'song'),
- ('alternative-airplay', 'Alternative Airplay', 'Alternative Airplay', 'song'),
- ('triple-a', 'Adult Alternative Airplay', 'Adult Alternative Airplay', 'song'),
- ('rhythmic-40', 'Rhythmic (chart)', 'Rhythmic Airplay', 'song'),
- ('dance-electronic-songs', 'Dance/Electronic Songs', 'Top Dance/Electronic Songs', 'song'),
- ('hot-dance-airplay', 'Dance/Mix Show Airplay', 'Dance/Mix Show Airplay', 'song'),
- ('christian-songs', 'Hot Christian Songs', 'Hot Christian Songs', 'song'),
- ('christian-airplay', 'Christian Airplay', 'Christian Airplay', 'song'),
- ('billboard-global-200', 'Billboard Global 200', 'Billboard Global 200', 'song'),
- ('canadian-hot-100', 'Canadian Hot 100', 'Billboard Canadian Hot 100', 'song'),
- ('billboard-argentina-hot-100', 'Argentina Hot 100', 'Billboard Argentina Hot 100', 'song'),
- ('brazil-songs-hotw', 'Brazil Songs', 'Brazil Songs', 'song'),
- ('croatia-songs-hotw', 'Croatia Songs', 'Croatia Songs', 'song'),
- ('indonesia-songs-hotw', 'Indonesia Songs', 'Indonesia Songs', 'song'),
- ('philippines-songs-hotw', 'Philippines Songs', 'Philippines Songs', 'song'),
- ('south-korea-songs-hotw', 'South Korea Songs', 'South Korea Songs', 'song'),
- ('mexico-songs-hotw', 'Mexico Songs', 'Mexico Songs', 'song'),
- ('latin-songs', 'Hot Latin Songs', 'Hot Latin Songs', 'song'),
- ('latin-airplay', 'Latin Airplay', 'Latin Airplay', 'song'),
- ('latin-pop-airplay', 'Latin Pop Airplay', 'Latin Pop Airplay', 'song'),
- ('latin-regional-mexican-airplay', 'Regional Mexican Airplay', 'Regional Mexican Airplay', 'song'),
- ('latin-tropical-airplay', 'Tropical Airplay', 'Tropical Airplay', 'song'),
- ('japan-hot-100', 'Billboard Japan Hot 100', 'Billboard Japan Hot 100', 'song'),
- ('billboard-200', 'Billboard 200', 'Billboard 200', 'album'),
- ('dance-electronic-albums', 'Dance/Electronic Albums', 'Top Dance/Electronic Albums', 'album'),
- ('christian-albums', 'Top Christian Albums', 'Top Christian Albums', 'album'),
- ('comedy-albums', 'Comedy Albums', 'Comedy Albums', 'album'),
- ('latin-albums', 'Billboard Top Latin Albums', 'Top Latin Albums', 'album'),
- ('latin-pop-albums', 'Latin Pop Albums', 'Latin Pop Albums', 'album'),
- ('regional-mexican-albums', 'Regional Mexican Albums', 'Regional Mexican Albums', 'album'),
- ('tropical-albums', 'Tropical Albums', 'Tropical Albums', 'album')]
- names = {'Grse': 'Grše',
- 'Ke personajes': 'Ke Personajes'}
- links = {'Agust D': '[[Suga (rapper)|Agust D]]',
- 'Fuerza Regida': '[[Fuerza Regida]]'}
- def redirectCheck(page):
- if page.isRedirectPage():
- return next(page.linkedPages(namespaces=0))
- else:
- return page
- def disambigParse(page, i):
- for title in re.findall(r'\*[\s]?\[\[(.+?)(?:\||\]\])', page.text):
- if catCheck(pywikibot.Page(wp, title), i):
- return
- def catCheck(page, i=-1):
- regex = '(births|[mM]usic(al)? groups)' if i + 1 else type + 's'
- global workPageLink
- if [cat for cat in page.categories() if re.search(regex, cat.title())]:
- if i + 1:
- artists[i] = artistPageLinks[i] = page.title()
- if re.search(r' \(', artists[i]):
- artists[i] = artists[i].split(' (')[0]
- artistPageLinks[i] += f'|{artists[i]}'
- artistPageLinks[i] = f'[[{artistPageLinks[i]}]]'
- else:
- workPageLink = page.title()
- if re.search(r'\(.*(album|song)\)', workPageLink):
- workPageLink += f'|{workPageLink.split(" (")[0]}'
- workPageLink = f'[[{workPageLink}]]'
- return True
- elif i + 1:
- disambigPage = pywikibot.Page(wp, f'{artist} (disambiguation)')
- altPageMatch = re.search(artist + r' \(\w+\)', page.text)
- altPage = pywikibot.Page(wp, altPageMatch[0]) if altPageMatch else None
- if disambigPage.exists():
- disambigParse(disambigPage, i)
- elif altPage:
- catCheck(altPage, i)
- for chart in charts:
- url = 'https://www.billboard.com/charts/' + chart[0]
- chartText = get(url).text
- chartPage = pywikibot.Page(wp, chart[1])
- soup = BeautifulSoup(chartText, 'html.parser')
- work = str(soup.find('a', {'class': 'c-title__link'}).string).strip()
- artists = [str(soup.find('p', {'class': 'lrv-u-margin-r-150'}).string).strip()]
- type = chart[3]
- andSym = re.search('( [+&Xx] )', artists[0])
- if andSym:
- artists = artists[0].split(andSym[0])
- artistPageLinks = []
- for i, artist in enumerate(artists):
- if artist in names:
- artists[i] = artist = names[artist]
- artistPageLinks.append(artist)
- artistPage = redirectCheck(pywikibot.Page(wp, artist))
- if artistPage.isDisambig():
- disambigParse(artistPage, i)
- else:
- catCheck(artistPage, i)
- pageFound = False
- for artist in artists:
- if pageFound:
- break
- for title in [work, f'{work} ({type})', f'{work} ({artist} {type})']:
- workPage = redirectCheck(pywikibot.Page(wp, title))
- workPageLink = work
- if workPage.exists() and re.search('by.+' + artist, workPage.text):
- pageFound = catCheck(workPage)
- if pageFound:
- break
- for i, link in enumerate(artistPageLinks):
- if link in links:
- artistPageLinks[i] = links[link]
- quotes = '\'\'' if type == "album" else '"'
- currentStr = f'The current number-one {type} on the chart is '
- workStr = f'{quotes}{workPageLink}{quotes}'
- artistStr = f'{artistPageLinks[0]}{f", {artistPageLinks[1]}" if len(artists) > 2 else ""}{f" and {artistPageLinks[-1]}" if len(artists) > 1 else ""}'
- nameStr = f'{currentStr}{workStr} by {artistStr}.'
- date1 = (date.today() + timedelta(days=4)).isoformat()
- date2 = date.today().strftime('%B %-d, %Y')
- refStr = f'<ref>{{{{cite magazine|url={url}/{date1}|title={chart[2]}|date={date2}|magazine=Billboard|access-date={date2}}}}}</ref>'
- finalStr = nameStr + refStr
- chartPage.text = re.sub(re.escape(currentStr) + '.*[.>]', finalStr, chartPage.text)
- chartPage.save('Update current number one', quiet=True)
- print(f'Updated {chartPage.title()} ({finalStr})')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement