Advertisement
Guest User

Source

a guest
May 8th, 2023
289
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.76 KB | None | 0 0
  1. import pywikibot
  2. import re
  3. from bs4 import BeautifulSoup
  4. from datetime import date, timedelta
  5. from requests import get
  6.  
  7. wp = pywikibot.Site()
  8. charts = [('adult-pop-songs', 'Adult Top 40', 'Adult Pop Airplay', 'song'),
  9.         ('adult-contemporary', 'Adult Contemporary (chart)', 'Adult Contemporary', 'song'),
  10.         ('pop-songs', 'Pop Airplay', 'Pop Songs Chart', 'song'),
  11.         ('hot-100', 'Billboard Hot 100', 'Billboard Hot 100', 'song'),
  12.         ('digital-song-sales', 'Digital Songs', 'Digital Song Sales', 'song'),
  13.         ('country-songs', 'Hot Country Songs', 'Hot Country Songs', 'song'),
  14.         ('country-airplay', 'Country Airplay', 'Country Airplay', 'song'),
  15.         ('hot-mainstream-rock-tracks', 'Mainstream Rock (chart)', 'Mainstream Rock Airplay', 'song'),
  16.         ('alternative-airplay', 'Alternative Airplay', 'Alternative Airplay', 'song'),
  17.         ('triple-a', 'Adult Alternative Airplay', 'Adult Alternative Airplay', 'song'),
  18.         ('rhythmic-40', 'Rhythmic (chart)', 'Rhythmic Airplay', 'song'),
  19.         ('dance-electronic-songs', 'Dance/Electronic Songs', 'Top Dance/Electronic Songs', 'song'),
  20.         ('hot-dance-airplay', 'Dance/Mix Show Airplay', 'Dance/Mix Show Airplay', 'song'),
  21.         ('christian-songs', 'Hot Christian Songs', 'Hot Christian Songs', 'song'),
  22.         ('christian-airplay', 'Christian Airplay', 'Christian Airplay', 'song'),
  23.         ('billboard-global-200', 'Billboard Global 200', 'Billboard Global 200', 'song'),
  24.         ('canadian-hot-100', 'Canadian Hot 100', 'Billboard Canadian Hot 100', 'song'),
  25.         ('billboard-argentina-hot-100', 'Argentina Hot 100', 'Billboard Argentina Hot 100', 'song'),
  26.         ('brazil-songs-hotw', 'Brazil Songs', 'Brazil Songs', 'song'),
  27.         ('croatia-songs-hotw', 'Croatia Songs', 'Croatia Songs', 'song'),
  28.         ('indonesia-songs-hotw', 'Indonesia Songs', 'Indonesia Songs', 'song'),
  29.         ('philippines-songs-hotw', 'Philippines Songs', 'Philippines Songs', 'song'),
  30.         ('south-korea-songs-hotw', 'South Korea Songs', 'South Korea Songs', 'song'),
  31.         ('mexico-songs-hotw', 'Mexico Songs', 'Mexico Songs', 'song'),
  32.         ('latin-songs', 'Hot Latin Songs', 'Hot Latin Songs', 'song'),
  33.         ('latin-airplay', 'Latin Airplay', 'Latin Airplay', 'song'),
  34.         ('latin-pop-airplay', 'Latin Pop Airplay', 'Latin Pop Airplay', 'song'),
  35.         ('latin-regional-mexican-airplay', 'Regional Mexican Airplay', 'Regional Mexican Airplay', 'song'),
  36.         ('latin-tropical-airplay', 'Tropical Airplay', 'Tropical Airplay', 'song'),
  37.         ('japan-hot-100', 'Billboard Japan Hot 100', 'Billboard Japan Hot 100', 'song'),
  38.         ('billboard-200', 'Billboard 200', 'Billboard 200', 'album'),
  39.         ('dance-electronic-albums', 'Dance/Electronic Albums', 'Top Dance/Electronic Albums', 'album'),
  40.         ('christian-albums', 'Top Christian Albums', 'Top Christian Albums', 'album'),
  41.         ('comedy-albums', 'Comedy Albums', 'Comedy Albums', 'album'),
  42.         ('latin-albums', 'Billboard Top Latin Albums', 'Top Latin Albums', 'album'),
  43.         ('latin-pop-albums', 'Latin Pop Albums', 'Latin Pop Albums', 'album'),
  44.         ('regional-mexican-albums', 'Regional Mexican Albums', 'Regional Mexican Albums', 'album'),
  45.         ('tropical-albums', 'Tropical Albums', 'Tropical Albums', 'album')]
  46.  
  47. names = {'Grse': 'Grše',
  48.         'Ke personajes': 'Ke Personajes'}
  49.  
  50. links = {'Agust D': '[[Suga (rapper)|Agust D]]',
  51.         'Fuerza Regida': '[[Fuerza Regida]]'}
  52.  
  53. def redirectCheck(page):
  54.         if page.isRedirectPage():
  55.                 return next(page.linkedPages(namespaces=0))
  56.         else:
  57.                 return page
  58.  
  59. def disambigParse(page, i):
  60.         for title in re.findall(r'\*[\s]?\[\[(.+?)(?:\||\]\])', page.text):
  61.                 if catCheck(pywikibot.Page(wp, title), i):
  62.                         return
  63.  
  64. def catCheck(page, i=-1):
  65.         regex = '(births|[mM]usic(al)? groups)' if i + 1 else type + 's'
  66.  
  67.         global workPageLink
  68.         if [cat for cat in page.categories() if re.search(regex, cat.title())]:
  69.                 if i + 1:
  70.                         artists[i] = artistPageLinks[i] = page.title()
  71.                         if re.search(r' \(', artists[i]):
  72.                                 artists[i] = artists[i].split(' (')[0]
  73.                                 artistPageLinks[i] += f'|{artists[i]}'
  74.                         artistPageLinks[i] = f'[[{artistPageLinks[i]}]]'
  75.                 else:
  76.                         workPageLink = page.title()
  77.                         if re.search(r'\(.*(album|song)\)', workPageLink):
  78.                                 workPageLink += f'|{workPageLink.split(" (")[0]}'
  79.                         workPageLink = f'[[{workPageLink}]]'
  80.                 return True
  81.         elif i + 1:
  82.                 disambigPage = pywikibot.Page(wp, f'{artist} (disambiguation)')
  83.                 altPageMatch = re.search(artist + r' \(\w+\)', page.text)
  84.                 altPage = pywikibot.Page(wp, altPageMatch[0]) if altPageMatch else None
  85.  
  86.                 if disambigPage.exists():
  87.                         disambigParse(disambigPage, i)
  88.                 elif altPage:
  89.                         catCheck(altPage, i)
  90.  
  91. for chart in charts:
  92.         url = 'https://www.billboard.com/charts/' + chart[0]
  93.         chartText = get(url).text
  94.         chartPage = pywikibot.Page(wp, chart[1])
  95.  
  96.         soup = BeautifulSoup(chartText, 'html.parser')
  97.         work = str(soup.find('a', {'class': 'c-title__link'}).string).strip()
  98.         artists = [str(soup.find('p', {'class': 'lrv-u-margin-r-150'}).string).strip()]
  99.         type = chart[3]
  100.  
  101.         andSym = re.search('( [+&Xx] )', artists[0])
  102.         if andSym:
  103.                 artists = artists[0].split(andSym[0])
  104.  
  105.         artistPageLinks = []
  106.         for i, artist in enumerate(artists):
  107.                 if artist in names:
  108.                         artists[i] = artist = names[artist]
  109.  
  110.                 artistPageLinks.append(artist)
  111.                 artistPage = redirectCheck(pywikibot.Page(wp, artist))
  112.                 if artistPage.isDisambig():
  113.                         disambigParse(artistPage, i)
  114.                 else:
  115.                         catCheck(artistPage, i)
  116.  
  117.         pageFound = False
  118.         for artist in artists:
  119.                 if pageFound:
  120.                         break
  121.  
  122.                 for title in [work, f'{work} ({type})', f'{work} ({artist} {type})']:
  123.                         workPage = redirectCheck(pywikibot.Page(wp, title))
  124.                         workPageLink = work
  125.  
  126.                         if workPage.exists() and re.search('by.+' + artist, workPage.text):
  127.                                 pageFound = catCheck(workPage)
  128.                                 if pageFound:
  129.                                         break
  130.  
  131.         for i, link in enumerate(artistPageLinks):
  132.                 if link in links:
  133.                         artistPageLinks[i] = links[link]
  134.  
  135.         quotes = '\'\'' if type == "album" else '"'
  136.  
  137.         currentStr = f'The current number-one {type} on the chart is '
  138.         workStr = f'{quotes}{workPageLink}{quotes}'
  139.         artistStr = f'{artistPageLinks[0]}{f", {artistPageLinks[1]}" if len(artists) > 2 else ""}{f" and {artistPageLinks[-1]}" if len(artists) > 1 else ""}'
  140.         nameStr = f'{currentStr}{workStr} by {artistStr}.'
  141.  
  142.         date1 = (date.today() + timedelta(days=4)).isoformat()
  143.         date2 = date.today().strftime('%B %-d, %Y')
  144.         refStr = f'<ref>{{{{cite magazine|url={url}/{date1}|title={chart[2]}|date={date2}|magazine=Billboard|access-date={date2}}}}}</ref>'
  145.  
  146.         finalStr = nameStr + refStr
  147.         chartPage.text = re.sub(re.escape(currentStr) + '.*[.>]', finalStr, chartPage.text)
  148.         chartPage.save('Update current number one', quiet=True)
  149.  
  150.         print(f'Updated {chartPage.title()} ({finalStr})')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement