Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import matplotlib.pyplot as plt
- import random
- import seaborn as sns
- import numpy as np
- from scipy import stats
- from random import randint
- # import data files
- # read the large csv file with specified chunksize
- intensity_meta = pd.read_csv(r'utwente intensiteiten groot amsterdam 1 dag met metadata (2)_intensiteit_00001.csv', chunksize=1000000, low_memory=False)
- travel_times_1 = pd.read_csv(r'utwente reistijden groot amsterdam _reistijd_00001.csv', chunksize=1000000, low_memory=False)
- travel_times_2 = pd.read_csv(r'utwente reistijden groot amsterdam _reistijd_00002.csv', chunksize=1000000, low_memory=False)
- travel_times_3 = pd.read_csv(r'utwente reistijden groot amsterdam _reistijd_00003.csv', chunksize=1000000, low_memory=False)
- travel_times_4 = pd.read_csv(r'utwente reistijden groot amsterdam _reistijd_00004.csv', chunksize=1000000, low_memory=False)
- travel_times_meta = pd.read_csv(r'utwente reistijden groot amsterdam 1 dag met metadata_reistijd_00001.csv', chunksize=1000000, low_memory=False)
- speed_meta = pd.read_csv(r'utwente snelheden groot amsterdam 1 dag met metadata_snelheid_00001.csv', chunksize=1000000, low_memory=False)
- # append each chunk df here
- intensity_meta_list = []
- travel_times_1_list = []
- travel_times_2_list = []
- travel_times_3_list = []
- travel_times_4_list = []
- travel_times_meta_list = []
- speed_meta_list = []
- # Each chunk is in df format
- for chunk in intensity_meta:
- # Once the data filtering is done, append the chunk to list
- intensity_meta_list.append(chunk)
- # concat the list into dataframe
- intensity_meta_concat = pd.concat(intensity_meta_list)
- # Each chunk is in df format
- for chunk in travel_times_1:
- # Once the data filtering is done, append the chunk to list
- travel_times_1_list.append(chunk)
- # concat the list into dataframe
- travel_times_1_concat = pd.concat(travel_times_1_list)
- # Each chunk is in df format
- for chunk in travel_times_2:
- # Once the data filtering is done, append the chunk to list
- travel_times_2_list.append(chunk)
- # concat the list into dataframe
- travel_times_2_concat = pd.concat(travel_times_2_list)
- # Each chunk is in df format
- for chunk in travel_times_3:
- # Once the data filtering is done, append the chunk to list
- travel_times_3_list.append(chunk)
- # concat the list into dataframe
- travel_times_3_concat = pd.concat(travel_times_3_list)
- # Each chunk is in df format
- for chunk in travel_times_4:
- # Once the data filtering is done, append the chunk to list
- travel_times_4_list.append(chunk)
- # concat the list into dataframe
- travel_times_4_concat = pd.concat(travel_times_4_list)
- # Each chunk is in df format
- for chunk in intensity_meta:
- # Once the data filtering is done, append the chunk to list
- intensity_meta_list.append(chunk)
- # concat the list into dataframe
- intensity_meta_concat = pd.concat(intensity_meta_list)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement