Advertisement
torack

Python: WIP parsing wowdb to create TSM groups. v1.1

Aug 1st, 2018
10,294
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.31 KB | None | 0 0
  1. """
  2. Small script to automatically request, parse wowdb data and save as TSM4 groups.
  3. Modify, improve, and distribute as you see fit. Probably contains plenty of bugs.
  4.  
  5. !Use at your own risk!
  6.  
  7. ***
  8. IMPORTANT: NOT YET FINISHED. OUTPUT WILL INCLUDE SOME CRAFTED ITEMS AND DARKMOON TRINKETS.
  9.  
  10. Not all crafted items are currently categorized as crafted on wowdb. These may end up in the import string created by this script. If you then import with the option "Move already grouped items?" enabled, these items will be moved from your profession groups to these.
  11. ***
  12.  
  13. Version 1.1:
  14. * Cleaned up the code massively, now utilizing BeautifulSoup to parse the tables and Pandas to handle the data.
  15. * Handles Darkmoon Trinkets now (by excluding Source == 'Created')
  16.  
  17. -torack on 01.08.2018
  18. """
  19.  
  20.  
  21. from bs4 import BeautifulSoup
  22. import urllib.request
  23. import pandas
  24.  
  25. def slot_filter_generator(slot_filters, quality_filters):
  26.     """ Generator to iterate over all slot, quality combinations. """
  27.     for quality_filter in quality_filters:
  28.         for slot_filter in slot_filters:
  29.             yield (quality_filter, quality_filters[quality_filter]), (slot_filter, slot_filters[slot_filter])
  30.  
  31.  
  32. def construct_wowdb_url(qual_filt, slot_filt, itype='armor', bind=2, craft=-2, ilvl_min=226, reqlvl_min=111):
  33.     """ Construct wowdb-url for given filter settings. """
  34.     url = f"https://www.wowdb.com/items/{itype}?filter-bind={bind}&filter-crafted-with={craft}&filter-ilvl-min={ilvl_min}&filter-reqlvl-min={reqlvl_min}&filter-quality={qual_filt}&filter-slot={slot_filt}"
  35.     return url
  36.  
  37.  
  38. def read_source_from_url(url):
  39.     """ Read URL and return source_code as decoded str. """
  40.     response = urllib.request.urlopen(url)
  41.     return response.read().decode()
  42.  
  43.  
  44. def label_item_id(row):
  45.     """ Extract Item_ID from wowdb item url. """
  46.     return int(row['URL'].split('/')[-1].split('-')[0])
  47.  
  48.  
  49. def read_item_table_from_wowdb_url(search_html, quality):
  50.     """ Read and reformat wowdb table returned by url-search. Returns pandas.DataFrame() """
  51.     soup = BeautifulSoup(search_html, 'lxml')
  52.     parsed_table = soup.find_all('table')[1]
  53.     data = [
  54.             [td.a['href'] if td.find('a') else
  55.             ''.join(td.stripped_strings)
  56.             for td in row.find_all('td')]
  57.         for row in parsed_table.find_all('tr')
  58.         ]
  59.  
  60.     df = pandas.DataFrame(data[1:], columns=['URL', 'URL2', 'URL3', 'Item Level', 'Req. Level', 'Slot', 'Source', 'Type'])
  61.     if not df.empty:
  62.         df['Item_ID'] = df.apply (lambda row: label_item_id (row),axis=1)
  63.         df['Quality'] = quality
  64.         df = df.dropna(axis=0, how='any')[['Item_ID', 'Item Level', 'Req. Level', 'Quality', 'Slot', 'Source', 'Type']]
  65.     return df
  66.  
  67.  
  68. def save_df_as_tsm_groups(df_main, outfile='armor_groups.dat'):
  69.     # tsm_group_sorting: quality -> armour_type -> slot
  70.     qualities = ['Epic', 'Uncommon']
  71.     armor_types = ['Plate', 'Mail', 'Leather', 'Cloth', 'Finger', 'Trinket', 'Back']
  72.     slots = ['Chest', 'Feet', 'Hands', 'Head', 'Legs', 'Shoulders', 'Waist', 'Wrists']
  73.     slot_types_as_subgroups = ['Finger', 'Trinket', 'Back']
  74.  
  75.     title = 'BfA BoE Armor'
  76.     out_str = ''
  77.     for quality in qualities:
  78.         for armor_type in armor_types:
  79.             if armor_type in slot_types_as_subgroups:
  80.                     # get items with those attributes
  81.                     df_items = df_main.loc[
  82.                         (df_main['Quality'] == quality) &
  83.                         (df_main['Type'] == armor_type)
  84.                         ]
  85.                    
  86.                     if not df_items.empty:
  87.                         # handle Back, Finger, Trinket group names
  88.                         armor_type_tmp = armor_type
  89.                         if armor_type == 'Back':
  90.                             armor_type_tmp = 'Cloaks'
  91.                         if armor_type == 'Finger':
  92.                             armor_type_tmp = 'Rings'
  93.                         if armor_type == 'Trinket':
  94.                             armor_type_tmp = 'Trinkets'
  95.  
  96.                         # construct group identifier only if items found
  97.                         out_str += f'group:{title}`{quality}`{armor_type_tmp},'
  98.  
  99.                         item_ids = df_items['Item_ID'].values
  100.                         for item_id in item_ids:
  101.                             out_str += f'i:{item_id},'
  102.             else:
  103.                 for slot in slots:
  104.                     # select corresponding items from dataframe
  105.                     df_items = df_main.loc[
  106.                         (df_main['Quality'] == quality) &
  107.                         (df_main['Type'] == armor_type) &
  108.                         (df_main['Slot'] == slot)
  109.                         ]
  110.                    
  111.                     if not df_items.empty:
  112.                         # construct group identifier
  113.                         out_str += f'group:{title}`{quality}`{armor_type}`{slot},'
  114.  
  115.                         item_ids = df_items['Item_ID'].values
  116.                         for item_id in item_ids:
  117.                             out_str += f'i:{item_id},'
  118.  
  119.     with open(outfile, 'w') as f:
  120.         f.writelines(out_str)
  121.    
  122.  
  123. # wowdb filter values per item slot
  124. slot_filters = {
  125.     'Cloak': 65536,
  126.     'Chest': 32,
  127.     'Feet': 256,
  128.     'Finger': 2048,
  129.     'Hands': 1024,
  130.     'Head': 2,
  131.     'Legs': 128,
  132.     # 'Neck': 4,  # not needed, Heart of Azeroth
  133.     'Shoulders': 8,
  134.     'Trinket': 4096,
  135.     'Waist': 64,
  136.     'Wrists': 512,
  137. }
  138.  
  139. # wowdb filter values per item quality
  140. quality_filters = {
  141.     'Epic': 16,
  142.     # 'Rare': 8,  # not needed, currently only crafted Rares
  143.     'Uncommon': 4,
  144. }
  145.  
  146. if __name__ == '__main__':
  147.     df_main = pandas.DataFrame(columns=['Item_ID', 'Item Level', 'Req. Level', 'Quality', 'Slot', 'Source', 'Type'])
  148.     for (quality, qual_filt), (slot, slot_filt) in slot_filter_generator(slot_filters, quality_filters):
  149.         search_url = construct_wowdb_url(qual_filt, slot_filt)
  150.         return_html = read_source_from_url(search_url)
  151.         df = read_item_table_from_wowdb_url(return_html, quality)
  152.  
  153.         if not df.empty:
  154.             # Remove Darkmoon Trinkets, the only items with Source == 'Created'
  155.             df = df[df.Source != 'Created']
  156.  
  157.             df_main = df_main.append(df)
  158.  
  159.     df_main = df_main.reset_index(drop=True)
  160.     save_df_as_tsm_groups(df_main, outfile='armor_groups.dat')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement