Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import pandas as pd
- from datetime import datetime
- import re
- # Bulgarian to English month mapping
- bg_months = {
- 'януари': 'January', 'яну.': 'January',
- 'февруари': 'February', 'февр.': 'February',
- 'март': 'March',
- 'април': 'April', 'апр.': 'April',
- 'май': 'May',
- 'юни': 'June',
- 'юли': 'July',
- 'август': 'August', 'авг.': 'August',
- 'септември': 'September', 'септ.': 'September',
- 'октомври': 'October', 'окт.': 'October',
- 'ноември': 'November', 'ное.': 'November',
- 'декември': 'December', 'дек.': 'December'
- }
- def translate_bg_date(date_str):
- for bg, en in bg_months.items():
- if bg in date_str:
- date_str = date_str.replace(bg, en)
- break
- try:
- dt = datetime.strptime(date_str.strip(), "%d %B %Y %H:%M:%S")
- return dt.strftime("%Y/%m/%d %H:%M:%S")
- except:
- return None
- def process_file(file_path):
- with open(file_path, 'r', encoding='utf-8-sig') as f:
- lines = f.readlines()
- # Extract metadata using regex based on "." and "-"
- meta_line = next((line for line in lines if 'history:' in line.lower()), '').strip()
- sensor_id = room_name = sensor_type = floor = "Unknown"
- try:
- match = re.search(r'history:GATEBMS/([\w-]+)\.\s(.*?)\s-\s(\d{8})\s-\s(\w+)', meta_line)
- if match:
- floor = match.group(1) # FL0, FL1, etc.
- room_name = match.group(2) # Conference room
- sensor_id = match.group(3) # 82000968
- sensor_type = match.group(4) # Temp
- except Exception as e:
- print(f"⚠️ Metadata regex parsing failed in {file_path}: {e}")
- # Find header line for data
- data_start = 0
- for i, line in enumerate(lines):
- if 'timestamp' in line.lower() and 'value' in line.lower():
- data_start = i
- break
- try:
- df = pd.read_csv(file_path, skiprows=data_start)
- df.columns = [col.strip().lower() for col in df.columns]
- if 'timestamp' not in df.columns or 'value (°c)' not in df.columns:
- print(f"⚠️ Skipped file (missing expected columns): {file_path}")
- return pd.DataFrame()
- # Clean and convert data
- df_clean = pd.DataFrame()
- df_clean['time'] = df['timestamp'].astype(str).apply(translate_bg_date)
- df_clean['value'] = df['value (°c)'].astype(str).str.replace(',', '.').astype(float)
- df_clean['ID/key'] = sensor_id
- df_clean['floor'] = floor
- df_clean['room name'] = room_name
- df_clean['sensor type'] = sensor_type
- df_clean = df_clean.dropna(subset=['time', 'value'])
- if df_clean.empty:
- print(f"⚠️ Skipped (no valid rows): {file_path}")
- return df_clean
- except Exception as e:
- print(f"⚠️ Error reading {file_path}: {e}")
- return pd.DataFrame()
- def process_all_csvs(folder_path, output_excel):
- all_dfs = []
- for filename in os.listdir(folder_path):
- if filename.lower().endswith(".csv"):
- file_path = os.path.join(folder_path, filename)
- df = process_file(file_path)
- if not df.empty:
- all_dfs.append(df)
- else:
- print(f"⚠️ No data extracted from: {filename}")
- if all_dfs:
- merged = pd.concat(all_dfs, ignore_index=True)
- os.makedirs(os.path.dirname(output_excel), exist_ok=True)
- merged.to_excel(output_excel, index=False)
- print(f"\n✅ Done! {len(merged)} total rows saved to:\n{output_excel}")
- else:
- print("⚠️ No valid data found.")
- # ==== USAGE ====
- input_folder = r"C:/Working_desktop/GATE_stuff/Fledge/BMS_GATE_data/BSM_ALL_Sensors_1month_by_rooms/Temperature"
- output_file = r"C:/Working_desktop/GATE_stuff/Fledge/BMS_GATE_data/BSM_ALL_Sensors_1month_by_rooms/Temperature/Results/final_Merged_table_Incl_device_IDs.xlsx"
- process_all_csvs(input_folder, output_file)
Advertisement
Add Comment
Please, Sign In to add comment