Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import pandas
- from bs4 import BeautifulSoup
- leapYears = {1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944,
- 1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988,
- 1992, 1996, 2000, 2004, 2008, 2012, 2016, 2020}
- tempDFList = []
- def getTemperature(year, month, day):
- # Use BeautifulSoup to parse the temperature from Wunderground page
- url = ('https://www.wunderground.com/history/airport/KSFO/'+str(year)+'/'+str(month)+'/'\
- +str(day)+'/DailyHistory.html?req_city=San+Francisco&req_state=CA&req_statename=\
- California&reqdb.zip=94102&reqdb.magic=1&reqdb.wmo=99999')
- r = requests.get(url, headers={'user-agent': 'Mozilla/5.0'})
- soup = BeautifulSoup(r.content, 'html.parser')
- value = soup.select_one('.wx-value')
- fullTemp = [year, month, day, value.string]
- return fullTemp
- def temperatureSpan(startYear, endYear):
- temperatureList = [] # List to store yearly temperature values, gets reset to blank after each year
- for currentYear in range(startYear, endYear+1): # Loops through input year range
- print(currentYear)
- for currentMonth in range(1, 13): # Loops through all months in a year
- print(currentMonth)
- if currentMonth == 1: # January
- for currentDay in range(1, 32): # Parse daily temperature
- print(currentDay)
- dailyTemp = getTemperature(currentYear, currentMonth, currentDay)
- temperatureList.append(dailyTemp)
- if currentMonth == 2: # February
- if currentYear in leapYears:
- for currentDay in range(1, 30): # Parse daily temperature
- print(currentDay)
- dailyTemp = getTemperature(currentYear, currentMonth, currentDay)
- temperatureList.append(dailyTemp)
- else:
- for currentDay in range(1, 29):
- print(currentDay)
- dailyTemp = getTemperature(currentYear, currentMonth, currentDay)
- temperatureList.append(dailyTemp)
- if currentMonth == 3: # March
- for currentDay in range(1, 32): # Parse daily temperature
- print(currentDay)
- dailyTemp = getTemperature(currentYear, currentMonth, currentDay)
- temperatureList.append(dailyTemp)
- if currentMonth == 4: # April
- for currentDay in range(1, 31): # Parse daily temperature
- print(currentDay)
- dailyTemp = getTemperature(currentYear, currentMonth, currentDay)
- temperatureList.append(dailyTemp)
- if currentMonth == 5: # May
- for currentDay in range(1, 32): # Parse daily temperature
- print(currentDay)
- dailyTemp = getTemperature(currentYear, currentMonth, currentDay)
- temperatureList.append(dailyTemp)
- if currentMonth == 6: # June
- for currentDay in range(1, 31): # Parse daily temperature
- print(currentDay)
- dailyTemp = getTemperature(currentYear, currentMonth, currentDay)
- temperatureList.append(dailyTemp)
- if currentMonth == 7: # July
- for currentDay in range(1, 32): # Parse daily temperature
- print(currentDay)
- dailyTemp = getTemperature(currentYear, currentMonth, currentDay)
- temperatureList.append(dailyTemp)
- if currentMonth == 8: # August
- for currentDay in range(1, 32): # Parse daily temperature
- print(currentDay)
- dailyTemp = getTemperature(currentYear, currentMonth, currentDay)
- temperatureList.append(dailyTemp)
- if currentMonth == 9: # September
- for currentDay in range(1, 31): # Parse daily temperature
- print(currentDay)
- dailyTemp = getTemperature(currentYear, currentMonth, currentDay)
- temperatureList.append(dailyTemp)
- if currentMonth == 10: # October
- for currentDay in range(1, 31): # Parse daily temperature
- print(currentDay)
- dailyTemp = getTemperature(currentYear, currentMonth, currentDay)
- temperatureList.append(dailyTemp)
- if currentMonth == 11: # November
- for currentDay in range(1, 31): # Parse daily temperature
- print(currentDay)
- dailyTemp = getTemperature(currentYear, currentMonth, currentDay)
- temperatureList.append(dailyTemp)
- if currentMonth == 12: # December
- for currentDay in range(1, 32): # Parse daily temperature
- print(currentDay)
- dailyTemp = getTemperature(currentYear, currentMonth, currentDay)
- temperatureList.append(dailyTemp)
- # Temporary data frames to store data for a single year, appended onto overall list
- df_temp = pandas.DataFrame(temperatureList)
- tempDFList.append(df_temp)
- del temperatureList[:]
- def main():
- temperatureSpan(1950, 1955) # Parse temperatures for input range of years
- # Use pandas to output temperature values into a .csv file
- dfTempFinal = pandas.concat(tempDFList)
- dfTempFinal.columns = ['Year', 'Month', 'Day', 'Mean Temperature']
- dfTempFinal.to_csv("temperature_data_1953-1954.csv", index=False)
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement