Guest User

WorkflowMusicDownload_Labeling.py

a guest
Nov 18th, 2024
129
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.12 KB | Source Code | 0 0
  1. #  Step 1 - Download song
  2. # Download music and get metadata genre,artist for label, from provider:
  3. - `scdl`   for Spotify:    https://pastebin.com/8JNTHRHc
  4. - `spotdl` for Soundcloud: https://github.com/scdl-org/scdl
  5.  
  6. # Optional if each folder name correspond to a playlist e.g."chill" then append categories for label:
  7. ```python
  8. import os # Append prefix tags into each label.txt using folder name.
  9. def modify_txt_files(folder_path):
  10.     for filename in os.listdir(folder_path):
  11.         if filename.endswith(".txt"):
  12.             with open(os.path.join(folder_path, filename), 'r+', encoding='utf-8', errors='ignore') as f:
  13.                 content, prefix = f.read(), f"in mood for {folder_path}, "
  14.                 if prefix not in content: f.seek(0), f.write(prefix + content)
  15. if __name__ == "__main__": modify_txt_files("chill")
  16. ```
  17.  
  18.  
  19. #  Step 2 - more descriptive Label music.mp3
  20.  
  21. # Option 1 using one of the huggingface model tag=audio-classification, for model "laion/larger_clap_music_and_speech" we can use https://github.com/lyramakesmusic/clap-interrogator
  22.  
  23. # Option 2 using `Multimodal LLM` like AudioQween usage https://pastebin.com/cPXEUrxf (or label music: https://pastebin.com/G1BB1yTg)
  24.  
  25. # however after comparing quality, moved to gemini
  26. # gemini_LabelClassification.py
  27. import os, base64, requests, json, time
  28. from termcolor import colored
  29. from tqdm import tqdm
  30.  
  31. API_KEYS = [
  32.     os.environ.get("GOOGLE_API_KEY"),  # from environment variable
  33.     #"AI.API-Key2", # If encountering free tier rate limit.
  34.     #"AI.API-Key3",
  35.     #"AI.API-Key4",
  36. ]
  37.  
  38. BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
  39. HEADERS, PROCESSED_FILES = {"Content-Type": "application/json"}, "processed_files.json"
  40. MODEL = "gemini-1.5-pro-002"
  41. api_key_index = 0  # Start with the first key
  42.  
  43. def get_next_api_key():
  44.     global api_key_index
  45.     api_key_index = (api_key_index + 1) % len(API_KEYS)  # Only increment if error occurs
  46.     time.sleep(180)
  47.     return API_KEYS[api_key_index]
  48.  
  49. def generate_content_with_audio(prompt, audio_file_path):
  50.     global api_key_index
  51.     while True:  # Keep retrying indefinitely until successful
  52.         api_key = API_KEYS[api_key_index]
  53.         url = f"{BASE_URL}/models/{MODEL}:generateContent?key={api_key}"
  54.        
  55.         # Read the audio file and encode it in base64
  56.         with open(audio_file_path, "rb") as audio_file:
  57.             audio_content = base64.b64encode(audio_file.read()).decode('utf-8')
  58.        
  59.         # Construct the request payload
  60.         payload = {
  61.             "contents": [{"parts": [{"text": prompt}, {"inlineData": {"mimeType": "audio/mp3", "data": audio_content}}]}],
  62.             "generationConfig": {
  63.                 "temperature": 0.2,
  64.                 "topP": 0.85,
  65.                 "topK": 40,
  66.                 "maxOutputTokens": 1000,
  67.             },
  68.             "safetySettings": [
  69.                 {"category": cat, "threshold": "BLOCK_NONE"} for cat in [
  70.                     "HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH",
  71.                     "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"
  72.                 ]
  73.             ]
  74.         }
  75.        
  76.         # Make the API request
  77.         tqdm.write(colored(f"Prompt: {prompt}", 'blue'))
  78.         response = requests.post(url, headers=HEADERS, json=payload)
  79.        
  80.         # Handle a successful response
  81.         if response.status_code == 200:
  82.             result = response.json()['candidates'][0]['content']['parts'][0]['text'].strip()
  83.             tqdm.write(colored(f"Response: {result}", 'green'))
  84.             return result
  85.  
  86.         # On any error, switch to the next API key and retry
  87.         else:
  88.             tqdm.write(colored(f"Error: {response.status_code}, {response.text}", 'red'))
  89.             tqdm.write(colored(f"Switching in 3 min to next API Key: {api_key[:10]}...", 'yellow'))
  90.             api_key = get_next_api_key()
  91.  
  92.  
  93. def append_at_prefix(file_path, content):
  94.     with open(file_path, 'r+', encoding='utf-8') as f:
  95.         existing_content = f.read().strip()
  96.         f.seek(0, 0)
  97.         f.write(f"{content} {existing_content}" if existing_content else content)
  98.         f.truncate()
  99.  
  100. def load_processed_files():
  101.     return json.load(open(PROCESSED_FILES, 'r')) if os.path.exists(PROCESSED_FILES) else {}
  102.  
  103. def save_processed_files(processed):
  104.     json.dump(processed, open(PROCESSED_FILES, 'w'))
  105.  
  106. def main():
  107.     folder_path = "myfolder" # gemini supports audio files of <20MB
  108.     prompt = """You are the CLAP Interrogator. Provide a concise, 25-word description of the audio, including genre, mood, instruments, and notable nuance features and information to label for this music track."""
  109.     processed_files = load_processed_files()
  110.     mp3_files = [f for f in os.listdir(folder_path) if f.endswith(".mp3")]
  111.     for filename in tqdm(mp3_files, desc="Processing", unit="file"):
  112.         if filename in processed_files: continue
  113.         mp3_path, txt_path = os.path.join(folder_path, filename), os.path.join(folder_path, filename[:-4] + ".txt")
  114.         tqdm.write(f"Processing '{filename[:-4]}.txt'")
  115.         if not os.path.exists(txt_path): open(txt_path, 'w', encoding='utf-8').close()
  116.         while True:
  117.             label = generate_content_with_audio(prompt, mp3_path)
  118.             if label:
  119.                 append_at_prefix(txt_path, label)
  120.                 processed_files[filename] = True
  121.                 save_processed_files(processed_files)
  122.                 break
  123.             else:
  124.                 tqdm.write(colored(f"Error occurred. Retrying in 3 minutes...", 'yellow'))
  125.                 time.sleep(180)
  126.  
  127. if __name__ == "__main__":
  128.     main()
  129.  
  130.  
  131. # Step 3 Optional. Append key+BPM in .txt from `librosa`.
  132. # keyBPMlabel.py:
  133. import os
  134. import numpy as np
  135. import librosa # Fix: pip install "numpy>=1.22.4,<2.3.0" librosa
  136. from tqdm import tqdm
  137.  
  138. def extract_key_and_bpm(file_path):
  139.     try:
  140.         y, sr = librosa.load(file_path, sr=None)  # Load at original sample rate
  141.         # Extract features relevant for music generation
  142.         tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
  143.         chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
  144.         if isinstance(tempo, np.ndarray): # Ensure tempo is a single value
  145.             tempo = tempo.mean()
  146.         if np.isnan(tempo) or np.isinf(tempo) or tempo <= 0: # Check for invalid tempo values
  147.             return None
  148.  
  149.         key_idx = np.argmax(np.mean(chroma_stft, axis=1))
  150.         key = librosa.hz_to_note(440 * 2**((key_idx - 69)/12))
  151.         if np.isnan(key_idx) or np.isinf(key_idx): # Check for invalid key values
  152.             return None
  153.         return {
  154.             'tempo': int(np.round(tempo)),  # Use np.round for numpy arrays and floats
  155.             'key': key
  156.         }
  157.     except Exception as e:
  158.         print(f"Error processing {file_path}: {e}")
  159.         return None
  160.  
  161. def process_files(folder_path):
  162.     mp3_files = [f for f in os.listdir(folder_path) if f.endswith('.mp3')]
  163.     for filename in tqdm(mp3_files, desc="Processing Files", unit="file"):
  164.         mp3_file = os.path.join(folder_path, filename)
  165.         txt_file = os.path.join(folder_path, filename.replace('.mp3', '.txt'))
  166.         audio_info = extract_key_and_bpm(mp3_file) # Extract audio information
  167.  
  168.         if audio_info:
  169.             if os.path.exists(txt_file):
  170.                 with open(txt_file, 'r', encoding='utf-8') as file:
  171.                     content = file.read().strip()
  172.                 # Append key and BPM information
  173.                 new_content = f"{content} at tempo {audio_info['tempo']} BPM in the key of {audio_info['key']}"
  174.                
  175.                 # Write the new content back to the .txt file
  176.                 with open(txt_file, 'w', encoding='utf-8') as file:
  177.                     file.write(new_content)
  178.             else:
  179.                 # If the text file does not exist, create it with the extracted information
  180.                 with open(txt_file, 'w', encoding='utf-8') as file:
  181.                     file.write(f"Tempo: {audio_info['tempo']} BPM, Key: {audio_info['key']}")
  182.  
  183. folder_path = r"C:\path\to\myfolder"
  184. process_files(folder_path)
  185.  
  186.  
  187. # Step 4 Optional. Deduplicated https://pastebin.com/04LgbNsR
Add Comment
Please, Sign In to add comment