Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- men = pd.read_csv("US Rank List Men.csv")
- women = pd.read_csv("US Rank List Women.csv")
- mixed = pd.read_csv("US Rank List Mixed.csv")
- us_open = pd.read_csv("US Rank List Open.csv")
- athlete_list = pd.concat([men, women, mixed, us_open], axis = 0)
- athlete_list.drop(athlete_list.columns[[6,5,7,8,9,12,13,14,15,16,17,18]],
- axis = 1, inplace = True)
- athlete_list["Person/Team"] = athlete_list["Person/Team"].str.replace(r"\(.*\)","")
- athlete_list["Discipline"] = athlete_list["Discipline"].str.strip()
- athlete_list["DoB"] = pd.to_datetime(athlete_list["DoB"], errors = "coerce")
- # Create nested dictionary for {Sport: [Dispcline, Risk]}
- risk_dict = {
- "44": {
- "37": 1.00,
- "39": .986182,
- "36": .980809,
- "35": .963153,
- "38": .952406,
- "40": .943961
- },
- "1": {
- "55": .017437,
- "54": .017437,
- "61": .017437,
- },
- "12":{
- "7": .605391,
- "90": .570079,
- "52": .566241,
- "6": .565473,
- "8": .559332,
- "57": .558565,
- },
- "51":{
- "224": .525208,
- },
- "48" :{
- "42": .286292,
- "41": .053626,
- },
- "42":{
- "180": .27401,
- },
- "52":{
- "47": .177908,
- "46": .161019,
- },
- "38": {
- "85": .171079,
- "84": .151888,
- "86": .140373,
- "87": .136535,
- },
- "20":{
- "15": .152899,
- "16": .12987,
- "17": .129102,
- },
- "22":{
- "164": .136172,
- },
- "8":{
- "155": .132829,
- },
- "36":{
- "31": .125921,
- "135": .125921,
- "34": .125921,
- "136": .11978,
- },
- "27":{
- "174": .096624,
- },
- "85":{
- "200": .094119,
- "244": .094119,
- },
- "47":{
- "81": .081165,
- "80": .068883,
- },
- "90":{
- "122": .074478,
- "124": .065266,
- "121": .064499,
- "125": .062963,
- "123": .062963,
- },
- "18":{
- "162": .067781,
- },
- "30":{
- "22": .065472,
- "23": .065472,
- "24": .065472,
- "21": .065472,
- },
- "34":{
- "201": .062192,
- },
- "107":{
- "194": .062084,
- },
- "41":{
- "202": .061201,
- },
- "15":{
- "216": .059223,
- },
- "88":{
- "219": .056303,
- },
- "43":{
- "139": .055173,
- },
- "111":{
- "68": .047926,
- "69": .047926,
- },
- "7":{
- "154": .040582,
- },
- "19":{
- "176": .038383,
- },
- "21":{
- "163": .037286,
- },
- "94":{
- "82": .034222,
- "83": .033455,
- "110": .026546
- },
- "49":{
- "147": .033779,
- },
- "89":{
- "183": .032947,
- },
- "6":{
- "2": .032132,
- "3": .015572,
- },
- "33":{
- "64": .032132,
- "25": .028842,
- "27": .021494,
- "75": .01305,
- "77": .010747,
- "26": .008663,
- "29": .005922,
- "28": .005922,
- "76": .032132,
- "63": .028842,
- "62": .021494,
- "133": .01305,
- },
- "29":{
- "175": .027745,
- },
- "143":{
- "196": .02435,
- },
- "110":{
- "184": .022262,
- },
- "16":{
- "9": .021277,
- "10": .021277,
- "11": .021277,
- "102": .02051,
- },
- "11":{
- "158": .020946,
- },
- "57":{
- "49": .017656,
- "48": .0176556,
- },
- "108":{
- "198": .016781,
- },
- "23":{
- "167": .01645,
- },
- "86":{
- "191": .016121,
- },
- "35":{
- "177": .015463,
- },
- "84":{
- "189": .015353,
- },
- "50":{
- "45": .014585,
- "43": .014585,
- "44": .014585,
- "56": .014585,
- },
- "114":{
- "187": .014478,
- },
- "97":{
- "190": .01305,
- },
- "39":{
- "146": .011953,
- },
- "102":{
- "210": .011515,
- "209": .011515,
- },
- "103":{
- "118": .010747,
- "117": .004606,
- "119": .10747,
- },
- "25":{
- "173": .010747,
- },
- "9":{
- "4": .005812,
- "88": .005045,
- "89": .004277,
- "5": .00965,
- },
- "17":{
- "12": .007786,
- "13": .007786,
- "14": .007019,
- },
- "2":{
- "148": .007677,
- },
- "5":{
- "151": .007677,
- },
- "40":{
- "179": .007677,
- },
- "24":{
- "168": .007348,
- },
- "123":{
- "193": .006909,
- },
- "101":{
- "73": .006141,
- "72": .006141,
- "74": .006141,
- },
- "138":{
- "1199": .006141,
- },
- "37":{
- "178": .006141,
- },
- "99":{
- "192": .003071,
- },
- "153":{
- "186": .002303,
- },
- "93":{
- "114": .002303,
- "113": .002303
- },
- "104":{
- "197": .001535,},}
- #TODO Take the athlete_list dataframe and convert the sport/discipline names
- # to those conained in the risk dictionary.
- # Replace command does not like matching k:v pairs, so those are commented out
- sport_dict={
- "Athletics": "44",
- "Archery": "1",
- "Alpine Skiing": "33",
- "Badminton": "2",
- "Baseball": "3",
- "Basketball": "4",
- "Beach Volleyball": "48",
- "Biathlon": "5",
- "BMX": "12",
- "Bobsleigh": "6",
- "Boxing": "8",
- "Canoe Slalom": "9",
- "Canoe Sprint": "9",
- "Cross Country Skiing": "33",
- "Curling": "11",
- "Cycling - Road": "12",
- "Cycling - Track": "12",
- "Diving": "15",
- "Fencing": "17",
- "Figure Skating": "19",
- "Football": "34",
- "Freestyle Skiing": "33",
- "Golf": "142",
- "Gymnastics - Artistic": "20",
- "Gymnastics - Trampolining": "20",
- "Handball": "41",
- "Hockey": "18",
- "Ice Hockey": "21",
- "Judo": "22",
- "Karate": "57",
- "Luge": "23",
- "Modern Pentathlon": "24",
- "Mountain Bike": "12",
- "Nordic Combined": "33",
- "Open Water Swimming": "38",
- "Rowing": "27",
- "Rugby Sevens": "111",
- "Sailing": "29",
- "Shooting": "30",
- "Short Track": "36",
- "Skateboarding": "133",
- "Skeleton": "6",
- "Ski Jumping": "33",
- "Snowboard": "33",
- "Speed Skating": "36",
- "Sport Climbing": "121",
- "Surfing": "141",
- "Swimming": "38",
- "Table Tennis": "40",
- "Taekwondo": "43",
- "Tennis": "42",
- "Triathlon": "47",
- "Volleyball": "48",
- "Water Polo": "49",
- "Weightlifting": "51",
- "Wrestling - Freestyle": "52",
- "Wrestling - Greco-Roman": "52",
- "Gymnastics - Rhythmic": "20",
- "Softball": "35",
- "Synchronised Swimming": "39",
- "Equestrian - Dressage": "16",
- "Equestrian - Eventing": "16",
- "Equestrian - Jumping": "16"}
- athlete_list = athlete_list.replace({"Sport": sport_dict})
- discipline_change_dict = {
- "44": {
- "4 x 100m Relay": "37",
- "4 x 400m Relay": "37",
- "4 x 400m Relay Indoor": "37",
- "60m Hurdles Indoor": "37",
- "50km Walk" : "39",
- "60m Hurdles Indoor": "37",
- "60m Indoor": "37",
- "100m": "37",
- "110m Hurdles": "37",
- "200m":"37",
- "400m":"37",
- "400m Hurdles":"37",
- "400m Indoor": "37",
- "800m": "38",
- "800m Indoor": "38",
- "1500m": "38",
- "1500m Indoor": "38",
- "3000m Indoor": "39",
- "3000m Steeplechase": "39",
- "5000m": "39",
- "10000m": "39",
- "Decathlon": "40",
- "Heptathlon": "40",
- "Discus Throw": "35",
- "Hammer Throw": "35",
- "High Jump": "36",
- "Javelin Throw": "35",
- "Long Jump": "36",
- "Marathon": "39",
- "Pole Vault": "36",
- "Shot Put": "35",
- "Triple Jump": "36",
- "100m Hurdles": "37",
- "20km Walk": "39",
- },
- "2":{
- "Doubles": "148",
- "Singles": "148",
- },
- "5":{
- "4 x 7.5km Relay": "151",
- "10k Sprint": "151",
- "12.5km Pursuit": "151",
- "15km Mass Start": "151",
- "20km Individual": "151",
- "4 x 6km Relay": "151",
- "7.5km Sprint": "151",
- "10km Pursuit": "151",
- "10km Sprint": "151",
- "12.5km Mass Start": "151",
- "15km Individual": "151",
- "2 x 6 + 2 x 7.5km Relay": "151",
- "6 + 7.5km Relay": "151",
- },
- "6":{
- "2-Man Competition": "2",
- "Individual": "3",
- "4-Man Competition": "2",
- },
- "8":{
- "Bantamweight": "155",
- "Flyweight": "155",
- "Light Flyweight": "155",
- "Light Heavyweight": "155",
- "Light Welterweight": "155",
- "Lightweight": "155",
- "Middleweight": "155",
- "Super Heavyweight": "155",
- "Welterweight": "155",
- },
- "9":{
- "C1 200m": "4",
- "C1 1000m": "89",
- "K1 200m": "4",
- "K1 500m": "88",
- "K1 1000m": "89",
- "K1 5000m": "89",
- "K2 500m": "88",
- "K4 1000m": "89",
- "K4 500m": "88",
- "C1": "5",
- "C1 Team": "5",
- "C2": "5",
- "C2 Team": "5",
- "K1": "5",
- "K1 Cross": "5",
- "K1 Team": "5",
- },
- "11":{
- "Team": "158",
- "Doubles": "158",
- },
- "12":{
- "Individual": "52",
- "1km Time Trial": "8",
- "3km Individual Pursuit": "8",
- "4-Cross": "57",
- "4km Individual Pursuit": "8",
- "500m Time Trial": "8",
- "Cross Country": "6",
- "Cross Country Eliminator": "6",
- "Downhill": "6",
- "Individual time trial": "7",
- "Keirin": "90",
- "Madison": "90",
- "Marathon": "6",
- "Omnium": "90",
- "Points Race": "90",
- "Road race": "7",
- "Scratch": "90",
- "Sprint": "8",
- "Team Pursuit": "8",
- "Team Sprint": "8",
- },
- "15":{
- "10m Platform": "216",
- "10m Platform Synchro": "216",
- "1m Springboard": "216",
- "3m Springboard": "216",
- "3m Springboard Synchro": "216",
- },
- "16":{
- "Individual": "10",
- "Team": "10",# Nielson breaks on 3 levels, us on 2, so everything into 1 category
- },
- "17":{
- "Individual Epee": "12",
- "Individual Foil": "13",
- "Individual Sabre": "13",
- "Team Epee": "12",
- "Team Foil": "13",
- "Team Sabre": "13",
- },
- "18":{
- "Team": "162",
- },
- "19":{
- "Ice Dance": "176",
- "Pairs": "176",
- "Singles": "176",
- "Team": "176",
- },
- "142":{
- "Individual": "217",
- },
- "20":{
- "Ball": "16",
- "Beam": "15",
- "Clubs": "16",
- "Floor Exercise": "15",
- "Group All-Around": "16",
- "Group Competition - 3x Clubs + 2x Hoop": "16",
- "Group Competition - 5x Hoop": "16",
- "Group Competition - 5x Ribbon": "16",
- "Hoop": "16",
- "Horizontal Bar": "15",
- "Individual All-Around": "15",
- "Parallel Bars": "15",
- "Pommel Horse": "15",
- "Individual Trampoline": "17",
- "Ribbon": "16",
- "Rings": "15",
- "Team": "15",
- "Uneven Bars": "15",
- "Vault": "15",
- },
- "21":{
- "Team": "163",
- },
- "22":{
- "+100kg": "164",
- "+78kg": "164",
- "100kg": "164",
- "48kg": "164",
- "52kg": "164",
- "57kg": "164",
- "60kg": "164",
- "63kg": "164",
- "66kg": "164",
- "70kg": "164",
- "73kg": "164",
- "78kg": "164",
- "81kg": "164",
- "90kg": "164",
- },
- "57":{
- "+68kg Kumite": "48", #We have no history on Kumite, so cominbing all into karate
- "+84kg Kumite": "48",
- "-50kg Kumite": "48",
- "-55kg Kumite": "48",
- "-60kg Kumite": "48",
- "-61kg Kumite": "48",
- "-67kg Kumite": "48",
- "-68kg Kumite": "48",
- "-75kg Kumite": "48",
- "-84kg Kumite": "48",
- "Individual Kata": "49",
- "Team Kata": "49",
- "Team Kumite": "48",
- "Individual Kumite": "48",
- },
- "23":{
- "Double": "167",
- "Single": "167",
- "Sprint - Double": "167",
- "Sprint - Single": "167",
- "Team - Relay": "167",
- },
- "24":{
- "Individual": "168",
- },
- "27":{
- "Coxed Pair": "174",
- "Double Sculls": "174",
- "Eight": "174",
- "Four": "174",
- "Lightweight Double Sculls": "174",
- "Lightweight Eight": "174",
- "Lightweight Four": "174",
- "Lightweight Pair": "174",
- "Lightweight Quadruple Sculls": "174",
- "Lightweight Single Sculls": "174",
- "Pair": "174",
- "Quadruple Sculls": "174",
- "Single Sculls": "174",
- },
- "111":{
- "Team": "69",
- },
- "29":{
- "470": "175",
- "49er": "175",
- "49er FX": "175",
- "Finn": "175",
- "Laser": "175",
- "Laser Radial": "175",
- "Nacra 17": "175",
- "RS:X": "175",
- },
- "30":{
- "10m Air Pistol": "22",
- "10m Air Rifle": "23",
- "25m Pistol": "22",
- "50m Pistol": "22",
- "50m Rifle 3 Positions": "23",
- "50m Rifle Prone": "23",
- "Double Trap": "21",
- "Skeet": "21",
- "Trap": "21",
- "25m Rapid Fire Pistol": "22",
- },
- "133":{
- "Park": "222",
- "Street": "222",
- },
- "33":{
- "5km Classic Style": "26",
- "5km Freestyle": "26",
- "10km Classic Style": "26",
- "10km Freestyle": "26",
- "15km Classic Style": "26",
- "15km Freestyle": "26",
- "30km Classic Style": "26",
- "30km Freestyle": "26",
- "2 x 10km Skiathlon": "26",
- "4 x 10km Skiathlon": "26",
- "2 x 15km Skiathlon": "26",
- "2 x 5km Skiathlon": "26",
- "2 x 7.5km Skiathlon": "26",
- "4 x 5km Relay": "26",
- "4 x 7.5km Relay": "26",
- "50km Classic Style": "26",
- "50km Freestyle": "26",
- "5km Classic Style": "26",
- "5km Freestyle": "26",
- "Aerials": "27",
- "Alpine Combined": "25",
- "Big Air": "27",
- "City Event": "25",
- "Downhill": "25",
- "Dual Moguls": "27",
- "Giant Slalom": "25",
- "Gundersen Large Hill / 10km": "29",
- "Gundersen Normal Hill / 5km": "29",
- "Gundersen Normal Hill / 10km": "29",
- "Gundersen Normal Hill / 15km": "29",
- "Halfpipe": "75",
- "Individual - Large Hill": "28",
- "Individual - Normal Hill": "28",
- "Individual - Ski Flying": "28",
- "Moguls": "27",
- "Parallel Giant Slalom": "25",
- "Parallel Slalom": "25",
- "Pursuit": "26",
- "Ski Cross": "76",
- "Slalom": "25",
- "Slopestyle": "77",
- "Snowboard Cross": "64",
- "Snowboard Cross Team": "64",
- "Sprint Classic Style": "26",
- "Sprint Freestyle": "26",
- "Super G": "25",
- "Team - Large Hill": "28",
- "Team - Normal Hill": "28",
- "Team - Ski Flying": "28",
- "Team Normal Hill / 4 x 5km": "29",
- "Team Sprint Classic Style": "26",
- "Team Sprint Freestyle": "26",
- "Team Sprint Large Hill / 2 x 7.5km": "26",
- "10km Classic Style": "26",
- "4 x 10km Relay": "26",
- "Team": "25",
- },
- "34":{
- "Team": "201",
- },
- "36":{
- "10000m": "136",
- "1000m": "31",
- "1500m": "31",
- "3000m": "136",
- "3000m Relay": "136",
- "5000m": "136",
- "5000m Relay": "136",
- "500m": "31",
- "Mass Start 16 Laps": "136",
- "Overall": "31",
- "Small Combination": "135",
- "Sprint Combination": "135",
- "Team Pursuit 6 Laps": "31",
- "Team Pursuit 8 Laps": "31",
- },
- "121":{
- "Bouldering": "60g",
- "Combined": "59",
- "Lead": "59",
- "Speed": "58",
- },
- "141":{
- "Longboard": "79",
- "Shortboard": "79",
- "Team": "79",
- },
- "38":{
- "100m Backstroke": "84",
- "100m Backstroke SC": "84",
- "100m Breaststroke": "84",
- "100m Breaststroke SC": "84",
- "100m Butterfly": "84",
- "100m Butterfly SC": "84",
- "100m Freestyle": "84",
- "100m Freestyle SC": "84",
- "100m Individual Medley SC": "84",
- "10km": "87",
- "1500m Freestyle": "86",
- "1500m Freestyle SC": "86",
- "200m Backstroke": "85",
- "200m Backstroke SC": "85",
- "200m Breaststroke": "85",
- "200m Breaststroke SC": "85",
- "200m Butterfly": "85",
- "200m Butterfly SC": "85",
- "200m Freestyle": "85",
- "200m Freestyle SC": "85",
- "200m Individual Medley": "85",
- "200m Individual Medley SC": "85",
- "4 x 100m Freestyle Relay": "85",
- "4 x 100m Freestyle Relay SC": "85",
- "4 x 100m Medley Relay": "85",
- "4 x 100m Medley Relay SC": "85",
- "4 x 200m Freestyle Relay": "86",
- "4 x 200m Freestyle Relay SC": "86",
- "4 x 50m Freestyle Relay SC": "85",
- "4 x 50m Medley Relay SC": "85",
- "400m Freestyle": "85",
- "400m Freestyle SC": "85",
- "400m Individual Medley": "85",
- "400m Individual Medley SC": "85",
- "50m Backstroke": "84",
- "50m Backstroke SC": "84",
- "50m Breaststroke": "84",
- "50m Breaststroke SC": "84",
- "50m Butterfly": "84",
- "50m Butterfly SC": "84",
- "50m Freestyle": "84",
- "50m Freestyle SC": "84",
- "5km": "87",
- "800m Freestyle": " 86",
- "800m Freestyle SC": "86",
- },
- "39":{
- "Duet": "146",
- "Team": "146",
- "Team Free Routine": "146",
- "Team Technical Routine": "146",
- },
- "40":{
- "Doubles": "179",
- "Singles": "179",
- "Team": "179",
- },
- "43":{
- "Bantamweight 49-53kg": "139",
- "Bantamweight 58-63kg": "139",
- "Featherweight 53-57kg": "139",
- "Featherweight 63-68kg": "139",
- "Finweight -46kg": "139",
- "Finweight -54kg": "139",
- "Flyweight 46-49kg": "139",
- "Flyweight 54-58kg": "139",
- "Heavyweight +67kg": "139",
- "Heavyweight +73kg": "139",
- "Heavyweight +80kg": "139",
- "Heavyweight +87kg": "139",
- "Lightweight 57-62kg": "139",
- "Lightweight 68-74kg": "139",
- "Middleweight 67-73kg": "139",
- "Middleweight 80-87kg": "139",
- "Welterweight 57-67kg": "139",
- "Welterweight 62-67kg": "139",
- "Welterweight 68-80kg": "139",
- "Welterweight 74-80kg": "139",
- },
- "41":{
- "Team Handball": "202",
- "Team": "202",
- },
- "42":{
- "Doubles": "180",
- "Singles": "180",
- },
- "47":{
- "Olympic Distance": "80",
- "Sprint Distance": "80",
- },
- "48":{
- "Team": "42",
- },
- "51":{
- "+105kg": "224",
- "+105kg - Clean & Jerk": "224",
- "+105kg - Snatch": "224",
- "+75kg":"224",
- "+75kg Clean & Jerk":"224",
- "+75kg - Snatch": "224",
- "105kg": "224",
- "105kg - Clean & Jerk": "224",
- "105kg - Snatch": "224",
- "48kg": "224",
- "48kg - Clean & Jerk": "224",
- "48kg - Snatch": "224",
- "53kg": "224",
- "53kg - Clean & Jerk": "224",
- "53kg - Snatch": "224",
- "56kg": "224",
- "56kg - Clean & Jerk": "224",
- "56kg - Snatch": "224",
- "58kg": "224",
- "58kg - Clean & Jerk": "224",
- "58kg - Snatch": "224",
- "63kg": "224",
- "63kg - Clean & Jerk": "224",
- "63kg - Snatch": "224",
- "69kg": "224",
- "69kg - Clean & Jerk": "224",
- "69kg - Snatch": "224",
- "75kg": "224",
- "+75kg - Clean & Jerk": "224",
- "75kg - Clean & Jerk": "224",
- "75kg - Snatch": "224",
- "77kg": "224",
- "77kg - Clean & Jerk": "224",
- "77kg - Snatch": "224",
- "85kg": "224",
- "85kg - Clean & Jerk": "224",
- "85kg - Snatch": "224",
- "94kg": "224",
- "94kg - Clean & Jerk": "224",
- "94kg - Snatch": "224",
- },
- "52":{
- "125kg": "47",
- "130kg": "46",
- "48kg": "47",
- "53kg": "47",
- "57kg": "47",
- "58kg": "47",
- "59kg": "46",
- "63kg": "47",
- "65kg": "47",
- "66kg": "46",
- "69kg": "47",
- "74kg": "47",
- "75kg": "46",
- "85kg": "46",
- "86kg": "47",
- "97kg": "47",
- "98kg": "46",
- },
- "1":{
- "Compound Individual": "54",
- "Compound Team": "54",
- "Individual": "55",
- "Team": "55",
- },
- "3":{
- "Team": "149",
- },
- "4":{
- "Team": "150",
- },
- "35":{
- "Team": "177",
- },
- "49":{
- "Team": "147",
- },}
- sport_name_change_df = pd.DataFrame(sport_dict).stack().to_frame("Renamed Discipline")
- out_df = athlete_list.merge(sport_dict_df, how = "left", left_on=["Discipline", "Sport"], right_index = True)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement