Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import os
- import re
- root = r"my_dir"
- agg_df = pd.DataFrame()
- for directory, subdirectory, files in os.walk(root):
- for file in files:
- if file.endswith('.xlsm'):
- filepath = os.path.join(directory, file)
- xls = pd.ExcelFile(filepath)
- for i in xls.sheet_names:
- if re.search(r'Apples', i):
- df_temp = pd.read_excel(filepath, sheet_names=i)
- df_temp['Filepath'] = filepath
- df_temp['Sheet_Name'] = i
- agg_df = agg_df.append(df_temp)
- elif re.search(r'Oranges', i):
- df_temp = pd.read_excel(filepath, sheet_names=i)
- df_temp['Filepath'] = filepath
- df_temp['Sheet_Name'] = i
- agg_df = agg_df.append(df_temp)
- elif re.search('Grapes', i):
- df_temp = pd.read_excel(filepath, sheet_names=i)
- df_temp['Filepath'] = filepath
- df_temp['Sheet_Name'] = i
- agg_df = agg_df.append(df_temp)
- elif re.search(r'Tomatoes', i):
- df_temp = pd.read_excel(filepath, sheet_names=i)
- df_temp['Filepath'] = filepath
- df_temp['Sheet_Name'] = i
- agg_df = agg_df.append(df_temp)
- elif re.search(r'Peaches', i):
- df_temp = pd.read_excel(filepath, sheet_names=i)
- df_temp['Filepath'] = filepath
- df_temp['Sheet_Name'] = i
- agg_df = agg_df.append(df_temp)
- elif re.search(r'Pears', i):
- df_temp = pd.read_excel(filepath, sheet_names=i)
- df_temp['Filepath'] = filepath
- df_temp['Sheet_Name'] = i
- agg_df = agg_df.append(df_temp)
- elif re.search(r'Bananas', i):
- df_temp = pd.read_excel(filepath, sheet_names=i)
- df_temp['Filepath'] = filepath
- df_temp['Sheet_Name'] = i
- agg_df = agg_df.append(df_temp)
- elif re.search(r'Mangos', i):
- df_temp = pd.read_excel(filepath, sheet_names=i)
- df_temp['Filepath'] = filepath
- df_temp['Sheet_Name'] = i
- agg_df = agg_df.append(df_temp)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement