Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Music Library Linter
- Validates a music library against configurable rules for:
- - Folder structure depth
- - Allowed file extensions
- - Lossy audio file placement
- - Artist/album picture requirements
- - Album folder naming conventions
- - Track numbering in song files
- Written largely by GLM preview
- """
- import os
- import re
- import logging
- from pathlib import Path
- from typing import List, Set, Dict, Optional
- # =============================================================================
- # CONFIGURABLE CONSTANTS
- # =============================================================================
- # Path to the music library
- LIBRARY_PATH = "../Library"
- # Log file path
- LOG_FILE = "lint.log"
- # Lossy audio extensions
- LOSSY_EXTENSIONS: Set[str] = {"opus", "mp3", "m4a"}
- # Lossy folder indicator
- LOSSY_INDICATOR = "[Lossy]"
- # Picture extensions
- PICTURE_EXTENSIONS: Set[str] = {"jpg", "png"}
- # Allowed file extensions
- ALLOWED_EXTENSIONS: Set[str] = LOSSY_EXTENSIONS | PICTURE_EXTENSIONS | {
- "flac",
- "txt",
- "log"
- }
- # Maximum picture size in bytes (1MB = 1,048,576 bytes)
- MAX_PICTURE_SIZE = 1_048_576
- # Picture file names for artist and album
- ARTIST_PICTURE_NAMES = {"artist"}
- ALBUM_PICTURE_NAMES = {"cover"}
- # Valid year prefixes for album folders
- VALID_YEAR_MARKERS = {"(unknown)", "(various)"}
- # Whitelist patterns for track number requirement (regex patterns)
- TRACK_NUMBER_WHITELIST: List[str] = [
- r"\[Lossy\]", # Lossy folders
- ]
- # Track number regex pattern
- TRACK_NUMBER_PATTERN = r"^\d+\s*[-.\s]?\s*.+"
- # =============================================================================
- # LOGGING SETUP
- # =============================================================================
- def setup_logging(log_file: str) -> logging.Logger:
- """
- Configure logging to both console (INFO level) and file (DEBUG level).
- """
- logger = logging.getLogger("music_linter")
- logger.setLevel(logging.DEBUG)
- # Prevent duplicate handlers if called multiple times
- if logger.handlers:
- return logger
- # Formatter
- formatter = logging.Formatter(
- "%(asctime)s - %(levelname)s - %(message)s",
- datefmt="%Y-%m-%d %H:%M:%S"
- )
- # Console handler (INFO level)
- console_handler = logging.StreamHandler()
- console_handler.setLevel(logging.INFO)
- console_handler.setFormatter(formatter)
- logger.addHandler(console_handler)
- # File handler (DEBUG level)
- file_handler = logging.FileHandler(log_file, mode="w", encoding="utf-8")
- file_handler.setLevel(logging.INFO) #(logging.DEBUG)
- file_handler.setFormatter(formatter)
- logger.addHandler(file_handler)
- return logger
- # Initialize logger
- logger = setup_logging(LOG_FILE)
- # =============================================================================
- # HELPER FUNCTIONS
- # =============================================================================
- def get_extension(filename: str) -> str:
- """Get file extension without the dot, lowercase."""
- return Path(filename).suffix.lower().lstrip(".")
- def get_artist_album_path(folder_path: str) -> str:
- """
- Get the artist/album path from a full folder path.
- Returns the last two directory names for warning messages.
- """
- parts = Path(folder_path).parts
- if len(parts) >= 2:
- return str(Path(parts[-2]) / parts[-1])
- return folder_path
- def is_audio_file(filename: str) -> bool:
- """Check if file is an audio file (flac, opus, mp3)."""
- ext = get_extension(filename)
- return ext in {"flac", "opus", "mp3"}
- def is_picture_file(filename: str) -> bool:
- """Check if file is a picture (jpg, png)."""
- ext = get_extension(filename)
- return ext in PICTURE_EXTENSIONS
- def matches_whitelist(path: str, patterns: List[str]) -> bool:
- """Check if path matches any whitelist regex pattern."""
- for pattern in patterns:
- if re.search(pattern, path, re.IGNORECASE):
- return True
- return False
- def has_track_number(filename: str) -> bool:
- """
- Check if filename starts with a track number.
- Handles formats like:
- - "0001 song", "01. song", "01 - song", "01-song", "01 song", "1 song"
- """
- name = Path(filename).stem
- return bool(re.match(TRACK_NUMBER_PATTERN, name))
- def _get_track_number(filename: str) -> Optional[int]:
- """
- Extracts the track number from the start of a filename.
- Matches digits followed by at least one non-alphanumeric character.
- """
- match = re.match(r'^(\d+)[^a-zA-Z0-9]', filename)
- if match:
- return int(match.group(1))
- return None
- def extract_year_from_folder(folder_name: str) -> str:
- """
- Extract year from folder name.
- Returns the year string, "(unknown)", "(various)", or None.
- """
- # Check for special markers
- for marker in VALID_YEAR_MARKERS:
- if marker.lower() in folder_name.lower():
- return marker
- # Check for year in parentheses: (2020)
- year_match = re.search(r"\((\d{4})\)", folder_name)
- if year_match:
- return year_match.group(1)
- # Check for year at start: "2020 - Album Name"
- start_year_match = re.match(r"^(\d{4})\s*[-.\s]", folder_name)
- if start_year_match:
- return start_year_match.group(1)
- return None
- def emit_path_warnings(
- violations: Dict[str, List[str]],
- warning_message: str
- ) -> int:
- """
- Emit one warning per unique path from collected violations.
- Args:
- violations: Dict mapping path -> list of violating files
- warning_message: Message template (path will be appended)
- Returns:
- Number of unique paths with violations
- """
- for path, files in violations.items():
- logger.warning(f"{warning_message}: {path}")
- for filename in files:
- logger.debug(f" File: {filename}")
- return len(violations)
- # =============================================================================
- # LINTER RULES
- # =============================================================================
- def rule_folder_depth(
- library_path: str,
- max_depth: int = 2
- ) -> int:
- """
- Rule: Folder structure should only be artist/album/song.ext.
- No deeper folders should exist.
- Returns: Number of violations found.
- """
- logger.info("Checking folder structure depth...")
- violations: Dict[str, List[str]] = {}
- library_root = Path(library_path)
- for root, dirs, files in os.walk(library_path):
- rel_path = Path(root).relative_to(library_root)
- depth = len(rel_path.parts)
- if depth > max_depth:
- path_key = get_artist_album_path(root)
- if path_key not in violations:
- violations[path_key] = []
- violations[path_key].append(root)
- count = emit_path_warnings(violations, "Folder depth violation")
- logger.info(f"Folder depth violations: {count}")
- return count
- def rule_allowed_extensions(
- library_path: str,
- allowed: Set[str] = None
- ) -> int:
- """
- Rule: Only allowed file extensions should exist in the library.
- Returns: Number of violations found.
- """
- if allowed is None:
- allowed = ALLOWED_EXTENSIONS
- logger.info("Checking for disallowed file extensions...")
- # Track both extensions and files per path
- violations: Dict[str, Dict[str, Set[str]]] = {} # path -> {"extensions": {...}, "files": {...}}
- for root, dirs, files in os.walk(library_path):
- for filename in files:
- ext = get_extension(filename)
- if ext and ext not in allowed:
- path_key = get_artist_album_path(root)
- if path_key not in violations:
- violations[path_key] = {"extensions": set(), "files": []}
- violations[path_key]["extensions"].add(ext)
- violations[path_key]["files"].append(f"{filename} (.{ext})")
- # Emit warnings with extension list
- for path, data in violations.items():
- ext_list = ", ".join(f".{e}" for e in sorted(data["extensions"]))
- logger.warning(f"Disallowed file extensions: {path} ({ext_list})")
- for filename in data["files"]:
- logger.debug(f" File: {filename}")
- count = len(violations)
- logger.info(f"Disallowed extension violations: {count}")
- return count
- def rule_lossy_folder_placement(
- library_path: str,
- lossy_exts: Set[str] = None,
- indicator: str = None
- ) -> int:
- """
- Rule: Non-flac audio files should only exist in folders containing [Lossy].
- Returns: Number of violations found.
- """
- if lossy_exts is None:
- lossy_exts = LOSSY_EXTENSIONS
- if indicator is None:
- indicator = LOSSY_INDICATOR
- logger.info("Checking lossy audio file placement...")
- violations: Dict[str, List[str]] = {}
- for root, dirs, files in os.walk(library_path):
- if indicator.lower() in root.lower():
- continue
- for filename in files:
- ext = get_extension(filename)
- if ext in lossy_exts:
- path_key = get_artist_album_path(root)
- if path_key not in violations:
- violations[path_key] = []
- violations[path_key].append(filename)
- count = emit_path_warnings(violations, "Lossy file outside [Lossy] folder")
- logger.info(f"Lossy placement violations: {count}")
- return count
- def rule_artist_picture(
- library_path: str,
- picture_exts: Set[str] = None,
- picture_names: Set[str] = None,
- max_size: int = None
- ) -> int:
- """
- Rule: Artist folders should contain exactly one picture (jpg/png) named
- 'artist' (or similar), and it should be < max_size bytes.
- Returns: Number of violations found.
- """
- if picture_exts is None:
- picture_exts = PICTURE_EXTENSIONS
- if picture_names is None:
- picture_names = ARTIST_PICTURE_NAMES
- if max_size is None:
- max_size = MAX_PICTURE_SIZE
- logger.info("Checking artist pictures...")
- violations: Dict[str, List[str]] = {}
- library_root = Path(library_path)
- for artist_folder in library_root.iterdir():
- if not artist_folder.is_dir():
- continue
- artist_pictures = []
- for item in artist_folder.iterdir():
- if item.is_file() and is_picture_file(item.name):
- stem = item.stem.lower()
- if stem in picture_names:
- artist_pictures.append(item)
- path_key = artist_folder.name
- # Missing artist picture
- if not artist_pictures:
- if path_key not in violations:
- violations[path_key] = []
- violations[path_key].append("Missing artist picture")
- continue
- # Multiple artist pictures
- if len(artist_pictures) > 1:
- if path_key not in violations:
- violations[path_key] = []
- violations[path_key].append(f"Multiple artist pictures: {[p.name for p in artist_pictures]}")
- # Check picture size
- #for pic in artist_pictures:
- # size = pic.stat().st_size
- # if size > max_size:
- # if path_key not in violations:
- # violations[path_key] = []
- # violations[path_key].append(f"{pic.name} ({size:,} bytes > {max_size:,} bytes)")
- count = emit_path_warnings(violations, "Artist picture issue")
- logger.info(f"Artist picture violations: {count}")
- return count
- def rule_album_picture(
- library_path: str,
- picture_exts: Set[str] = None,
- picture_names: Set[str] = None,
- max_size: int = None
- ) -> int:
- """
- Rule: Album folders should contain exactly one cover picture (jpg/png),
- and it should be < max_size bytes.
- Returns: Number of violations found.
- """
- if picture_exts is None:
- picture_exts = PICTURE_EXTENSIONS
- if picture_names is None:
- picture_names = ALBUM_PICTURE_NAMES
- if max_size is None:
- max_size = MAX_PICTURE_SIZE
- logger.info("Checking album pictures...")
- violations: Dict[str, List[str]] = {}
- library_root = Path(library_path)
- for artist_folder in library_root.iterdir():
- if not artist_folder.is_dir():
- continue
- for album_folder in artist_folder.iterdir():
- if not album_folder.is_dir():
- continue
- album_pictures = []
- for item in album_folder.iterdir():
- if item.is_file() and is_picture_file(item.name):
- stem = item.stem.lower()
- if stem in picture_names:
- album_pictures.append(item)
- path_key = f"{artist_folder.name}/{album_folder.name}"
- # Missing album picture
- if not album_pictures:
- if path_key not in violations:
- violations[path_key] = []
- violations[path_key].append("Missing album picture (expected cover.jpg or cover.png)")
- continue
- # Multiple album pictures
- if len(album_pictures) > 1:
- if path_key not in violations:
- violations[path_key] = []
- violations[path_key].append(f"Multiple album pictures: {[p.name for p in album_pictures]}")
- # Check picture size
- #for pic in album_pictures:
- # size = pic.stat().st_size
- # if size > max_size:
- # if path_key not in violations:
- # violations[path_key] = []
- # violations[path_key].append(f"{pic.name} ({size:,} bytes > {max_size:,} bytes)")
- count = emit_path_warnings(violations, "Album picture issue")
- logger.info(f"Album picture violations: {count}")
- return count
- def rule_album_year_prefix(
- library_path: str,
- valid_markers: Set[str] = None
- ) -> int:
- """
- Rule: Album folder names should have a year prefix or special marker
- like "(unknown)" or "(various)".
- Returns: Number of violations found.
- """
- if valid_markers is None:
- valid_markers = VALID_YEAR_MARKERS
- logger.info("Checking album folder naming...")
- violations: Dict[str, List[str]] = {}
- library_root = Path(library_path)
- for artist_folder in library_root.iterdir():
- if not artist_folder.is_dir():
- continue
- for album_folder in artist_folder.iterdir():
- if not album_folder.is_dir():
- continue
- year = extract_year_from_folder(album_folder.name)
- if year is None:
- path_key = f"{artist_folder.name}/{album_folder.name}"
- if path_key not in violations:
- violations[path_key] = []
- violations[path_key].append(f"Missing year prefix (expected (YYYY) or {valid_markers})")
- count = emit_path_warnings(violations, "Album folder missing year prefix")
- logger.info(f"Album year prefix violations: {count}")
- return count
- def rule_track_numbers(
- library_path: str,
- whitelist: List[str] = None
- ) -> int:
- """
- Rule: Song files should have a track number prefix.
- Paths matching whitelist patterns are exempt.
- Returns: Number of violations found.
- """
- if whitelist is None:
- whitelist = TRACK_NUMBER_WHITELIST
- logger.info("Checking song track numbers...")
- violations: Dict[str, List[str]] = {}
- library_root = Path(library_path)
- for root, dirs, files in os.walk(library_path):
- for filename in files:
- if not is_audio_file(filename):
- continue
- if matches_whitelist(root, whitelist):
- continue
- if not has_track_number(filename):
- path_key = get_artist_album_path(root)
- if path_key not in violations:
- violations[path_key] = []
- violations[path_key].append(filename)
- count = emit_path_warnings(violations, "Song missing track number")
- logger.info(f"Track number violations: {count}")
- return count
- def rule_duplicate_track_numbers(library_path: str) -> int:
- """
- Rule: No two tracks in the same album should have the same track number.
- Returns: Number of violations found.
- """
- logger.info("Checking for duplicate track numbers...")
- violations: Dict[str, List[str]] = {}
- for root, dirs, files in os.walk(library_path):
- # Only process directories that contain files
- if not files:
- continue
- track_map: Dict[int, List[str]] = {}
- for filename in files:
- track_num = _get_track_number(filename)
- if track_num is not None:
- if track_num not in track_map:
- track_map[track_num] = []
- track_map[track_num].append(filename)
- path_key = get_artist_album_path(root)
- for track_num, filenames in track_map.items():
- if len(filenames) > 1:
- if path_key not in violations:
- violations[path_key] = []
- error_msg = f"Track {track_num} assigned to multiple files: {', '.join(filenames)}"
- violations[path_key].append(error_msg)
- logger.debug(f"Duplicate found in {path_key}: {error_msg}")
- count = emit_path_warnings(violations, "Duplicate track numbers")
- logger.info(f"Duplicate track violations: {count}")
- return count
- def rule_sequential_track_numbers(library_path: str) -> int:
- """
- Rule: Track numbers should be sequential (start from 1, no gaps).
- Returns: Number of violations found.
- """
- logger.info("Checking for sequential track numbering...")
- violations: Dict[str, List[str]] = {}
- for root, dirs, files in os.walk(library_path):
- if not files:
- continue
- # Extract all track numbers found in the folder
- track_nums = sorted({_get_track_number(f) for f in files if _get_track_number(f) is not None})
- if not track_nums:
- continue
- path_key = get_artist_album_path(root)
- album_violations = []
- # Check if sequence starts at 1
- if track_nums[0] != 1:
- album_violations.append(f"Starts at {track_nums[0]} instead of 1")
- # Check for gaps
- expected_sequence = list(range(1, len(track_nums) + 1))
- if track_nums != expected_sequence:
- missing = set(expected_sequence) - set(track_nums)
- if missing:
- album_violations.append(f"Missing track numbers: {sorted(list(missing))}")
- # Check for unexpected high numbers (gaps that don't change count)
- for i in range(len(track_nums) - 1):
- if track_nums[i+1] != track_nums[i] + 1:
- gap_msg = f"Gap detected between {track_nums[i]} and {track_nums[i+1]}"
- if gap_msg not in album_violations:
- album_violations.append(gap_msg)
- if album_violations:
- violations[path_key] = album_violations
- for detail in album_violations:
- logger.debug(f"Sequence error in {path_key}: {detail}")
- count = emit_path_warnings(violations, "Non-sequential track numbers")
- logger.info(f"Sequential track violations: {count}")
- return count
- # =============================================================================
- # MAIN FUNCTION
- # =============================================================================
- def count_album_folders(library_path: str) -> int:
- """Count total album folders for progress reporting."""
- count = 0
- library_root = Path(library_path)
- for artist_folder in library_root.iterdir():
- if artist_folder.is_dir():
- for album_folder in artist_folder.iterdir():
- if album_folder.is_dir():
- count += 1
- return count
- def main():
- """
- Main entry point for the music library linter.
- Runs all configured rules and reports total violations.
- """
- total_violations = 0
- print(f"\nMusic Library Linter")
- print(f"Library: {LIBRARY_PATH}")
- print(f"Log file: {LOG_FILE}\n")
- # Verify library path exists
- if not os.path.exists(LIBRARY_PATH):
- logger.error(f"Library path does not exist: {LIBRARY_PATH}")
- return 1
- # Count folders for progress
- album_count = count_album_folders(LIBRARY_PATH)
- print(f"Found {album_count} album folders to process.\n")
- # -------------------------------------------------------------------------
- # RULE CALLS - Comment out any rules you want to disable
- # -------------------------------------------------------------------------
- print("Processing rule: folder depth...")
- total_violations += rule_folder_depth(LIBRARY_PATH)
- print("Processing rule: allowed extensions...")
- total_violations += rule_allowed_extensions(LIBRARY_PATH)
- print("Processing rule: lossy folder placement...")
- total_violations += rule_lossy_folder_placement(LIBRARY_PATH)
- print("Processing rule: artist pictures...")
- total_violations += rule_artist_picture(LIBRARY_PATH)
- print("Processing rule: album pictures...")
- total_violations += rule_album_picture(LIBRARY_PATH)
- #print("Processing rule: album year prefixes...")
- #total_violations += rule_album_year_prefix(LIBRARY_PATH)
- #print("Processing rule: track numbers...")
- #total_violations += rule_track_numbers(LIBRARY_PATH)
- print("Processing rule: unique track numbers...")
- total_violations = rule_duplicate_track_numbers(LIBRARY_PATH)
- print("Processing rule: sequential track numbers...")
- total_violations = rule_sequential_track_numbers(LIBRARY_PATH)
- # -------------------------------------------------------------------------
- # Report final results
- print("\n" + "=" * 50)
- if total_violations == 0:
- logger.info("Linter complete. Total violations: 0")
- print("All checks passed! No violations found.")
- else:
- logger.warning(f"Linter complete. Total violations: {total_violations}")
- print(f"Found {total_violations} total violation(s).")
- print(f"See {LOG_FILE} for details.")
- print("=" * 50 + "\n")
- return total_violations
- if __name__ == "__main__":
- exit(main())
Add Comment
Please, Sign In to add comment