Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Step 1 - Download song
- # Download music and get metadata genre,artist for label, from provider:
- - `scdl` for Spotify: https://pastebin.com/8JNTHRHc
- - `spotdl` for Soundcloud: https://github.com/scdl-org/scdl
- # Optional if each folder name correspond to a playlist e.g."chill" then append categories for label:
- ```python
- import os # Append prefix tags into each label.txt using folder name.
- def modify_txt_files(folder_path):
- for filename in os.listdir(folder_path):
- if filename.endswith(".txt"):
- with open(os.path.join(folder_path, filename), 'r+', encoding='utf-8', errors='ignore') as f:
- content, prefix = f.read(), f"in mood for {folder_path}, "
- if prefix not in content: f.seek(0), f.write(prefix + content)
- if __name__ == "__main__": modify_txt_files("chill")
- ```
- # Step 2 - more descriptive Label music.mp3
- # Option 1 using one of the huggingface model tag=audio-classification, for model "laion/larger_clap_music_and_speech" we can use https://github.com/lyramakesmusic/clap-interrogator
- # Option 2 using `Multimodal LLM` like AudioQween usage https://pastebin.com/cPXEUrxf (or label music: https://pastebin.com/G1BB1yTg)
- # however after comparing quality, moved to gemini
- # gemini_LabelClassification.py
- import os, base64, requests, json, time
- from termcolor import colored
- from tqdm import tqdm
- API_KEYS = [
- os.environ.get("GOOGLE_API_KEY"), # from environment variable
- #"AI.API-Key2", # If encountering free tier rate limit.
- #"AI.API-Key3",
- #"AI.API-Key4",
- ]
- BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
- HEADERS, PROCESSED_FILES = {"Content-Type": "application/json"}, "processed_files.json"
- MODEL = "gemini-1.5-pro-002"
- api_key_index = 0 # Start with the first key
- def get_next_api_key():
- global api_key_index
- api_key_index = (api_key_index + 1) % len(API_KEYS) # Only increment if error occurs
- time.sleep(180)
- return API_KEYS[api_key_index]
- def generate_content_with_audio(prompt, audio_file_path):
- global api_key_index
- while True: # Keep retrying indefinitely until successful
- api_key = API_KEYS[api_key_index]
- url = f"{BASE_URL}/models/{MODEL}:generateContent?key={api_key}"
- # Read the audio file and encode it in base64
- with open(audio_file_path, "rb") as audio_file:
- audio_content = base64.b64encode(audio_file.read()).decode('utf-8')
- # Construct the request payload
- payload = {
- "contents": [{"parts": [{"text": prompt}, {"inlineData": {"mimeType": "audio/mp3", "data": audio_content}}]}],
- "generationConfig": {
- "temperature": 0.2,
- "topP": 0.85,
- "topK": 40,
- "maxOutputTokens": 1000,
- },
- "safetySettings": [
- {"category": cat, "threshold": "BLOCK_NONE"} for cat in [
- "HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH",
- "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"
- ]
- ]
- }
- # Make the API request
- tqdm.write(colored(f"Prompt: {prompt}", 'blue'))
- response = requests.post(url, headers=HEADERS, json=payload)
- # Handle a successful response
- if response.status_code == 200:
- result = response.json()['candidates'][0]['content']['parts'][0]['text'].strip()
- tqdm.write(colored(f"Response: {result}", 'green'))
- return result
- # On any error, switch to the next API key and retry
- else:
- tqdm.write(colored(f"Error: {response.status_code}, {response.text}", 'red'))
- tqdm.write(colored(f"Switching in 3 min to next API Key: {api_key[:10]}...", 'yellow'))
- api_key = get_next_api_key()
- def append_at_prefix(file_path, content):
- with open(file_path, 'r+', encoding='utf-8') as f:
- existing_content = f.read().strip()
- f.seek(0, 0)
- f.write(f"{content} {existing_content}" if existing_content else content)
- f.truncate()
- def load_processed_files():
- return json.load(open(PROCESSED_FILES, 'r')) if os.path.exists(PROCESSED_FILES) else {}
- def save_processed_files(processed):
- json.dump(processed, open(PROCESSED_FILES, 'w'))
- def main():
- folder_path = "myfolder" # gemini supports audio files of <20MB
- prompt = """You are the CLAP Interrogator. Provide a concise, 25-word description of the audio, including genre, mood, instruments, and notable nuance features and information to label for this music track."""
- processed_files = load_processed_files()
- mp3_files = [f for f in os.listdir(folder_path) if f.endswith(".mp3")]
- for filename in tqdm(mp3_files, desc="Processing", unit="file"):
- if filename in processed_files: continue
- mp3_path, txt_path = os.path.join(folder_path, filename), os.path.join(folder_path, filename[:-4] + ".txt")
- tqdm.write(f"Processing '{filename[:-4]}.txt'")
- if not os.path.exists(txt_path): open(txt_path, 'w', encoding='utf-8').close()
- while True:
- label = generate_content_with_audio(prompt, mp3_path)
- if label:
- append_at_prefix(txt_path, label)
- processed_files[filename] = True
- save_processed_files(processed_files)
- break
- else:
- tqdm.write(colored(f"Error occurred. Retrying in 3 minutes...", 'yellow'))
- time.sleep(180)
- if __name__ == "__main__":
- main()
- # Step 3 Optional. Append key+BPM in .txt from `librosa`.
- # keyBPMlabel.py:
- import os
- import numpy as np
- import librosa # Fix: pip install "numpy>=1.22.4,<2.3.0" librosa
- from tqdm import tqdm
- def extract_key_and_bpm(file_path):
- try:
- y, sr = librosa.load(file_path, sr=None) # Load at original sample rate
- # Extract features relevant for music generation
- tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
- chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
- if isinstance(tempo, np.ndarray): # Ensure tempo is a single value
- tempo = tempo.mean()
- if np.isnan(tempo) or np.isinf(tempo) or tempo <= 0: # Check for invalid tempo values
- return None
- key_idx = np.argmax(np.mean(chroma_stft, axis=1))
- key = librosa.hz_to_note(440 * 2**((key_idx - 69)/12))
- if np.isnan(key_idx) or np.isinf(key_idx): # Check for invalid key values
- return None
- return {
- 'tempo': int(np.round(tempo)), # Use np.round for numpy arrays and floats
- 'key': key
- }
- except Exception as e:
- print(f"Error processing {file_path}: {e}")
- return None
- def process_files(folder_path):
- mp3_files = [f for f in os.listdir(folder_path) if f.endswith('.mp3')]
- for filename in tqdm(mp3_files, desc="Processing Files", unit="file"):
- mp3_file = os.path.join(folder_path, filename)
- txt_file = os.path.join(folder_path, filename.replace('.mp3', '.txt'))
- audio_info = extract_key_and_bpm(mp3_file) # Extract audio information
- if audio_info:
- if os.path.exists(txt_file):
- with open(txt_file, 'r', encoding='utf-8') as file:
- content = file.read().strip()
- # Append key and BPM information
- new_content = f"{content} at tempo {audio_info['tempo']} BPM in the key of {audio_info['key']}"
- # Write the new content back to the .txt file
- with open(txt_file, 'w', encoding='utf-8') as file:
- file.write(new_content)
- else:
- # If the text file does not exist, create it with the extracted information
- with open(txt_file, 'w', encoding='utf-8') as file:
- file.write(f"Tempo: {audio_info['tempo']} BPM, Key: {audio_info['key']}")
- folder_path = r"C:\path\to\myfolder"
- process_files(folder_path)
- # Step 4 Optional. Deduplicated https://pastebin.com/04LgbNsR
Add Comment
Please, Sign In to add comment