Advertisement
Kafke

wav to spectro

Dec 15th, 2022
815
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.27 KB | None | 0 0
  1. import io
  2. import typing as T
  3.  
  4. import numpy as np
  5. from PIL import Image
  6. import pydub
  7. from scipy.io import wavfile
  8. import torch
  9. import torchaudio
  10.  
  11. def spectrogram_image_from_wav(wav_bytes: io.BytesIO, max_volume: float = 50, power_for_image: float = 0.25) -> Image.Image:
  12.     """
  13.    Generate a spectrogram image from a WAV file.
  14.    """
  15.     # Read WAV file from bytes
  16.     sample_rate, waveform = wavfile.read(wav_bytes)
  17.  
  18.     sample_rate = 44100  # [Hz]
  19.     clip_duration_ms = 5000  # [ms]
  20.  
  21.     bins_per_image = 512
  22.     n_mels = 512
  23.     mel_scale = True
  24.  
  25.     # FFT parameters
  26.     window_duration_ms = 100  # [ms]
  27.     padded_duration_ms = 400  # [ms]
  28.     step_size_ms = 10  # [ms]
  29.  
  30.     # Derived parameters
  31.     num_samples = int(512 / float(bins_per_image) * clip_duration_ms) * sample_rate
  32.     n_fft = int(padded_duration_ms / 1000.0 * sample_rate)
  33.     hop_length = int(step_size_ms / 1000.0 * sample_rate)
  34.     win_length = int(window_duration_ms / 1000.0 * sample_rate)
  35.  
  36.     # Compute spectrogram from waveform
  37.     Sxx = spectrogram_from_waveform(
  38.         waveform=waveform,
  39.         sample_rate=sample_rate,
  40.         n_fft=n_fft,
  41.         hop_length=hop_length,
  42.         win_length=win_length,
  43.         mel_scale=mel_scale,
  44.         n_mels=n_mels,
  45.     )
  46.  
  47.     # Convert spectrogram to image
  48.     image = image_from_spectrogram(Sxx, max_volume=max_volume, power_for_image=power_for_image)
  49.  
  50.     return image
  51.  
  52. def spectrogram_from_waveform(
  53.     waveform: np.ndarray,
  54.     sample_rate: int,
  55.     n_fft: int,
  56.     hop_length: int,
  57.     win_length: int,
  58.     mel_scale: bool = True,
  59.     n_mels: int = 512,
  60. ) -> np.ndarray:
  61.     """
  62.    Compute a spectrogram from a waveform.
  63.    """
  64.  
  65.     spectrogram_func = torchaudio.transforms.Spectrogram(
  66.         n_fft=n_fft,
  67.         power=None,
  68.         hop_length=hop_length,
  69.         win_length=win_length,
  70.     )
  71.  
  72.     waveform_tensor = torch.from_numpy(waveform.astype(np.float32)).reshape(1, -1)
  73.     Sxx_complex = spectrogram_func(waveform_tensor).numpy()[0]
  74.  
  75.     Sxx_mag = np.abs(Sxx_complex)
  76.  
  77.     if mel_scale:
  78.         mel_scaler = torchaudio.transforms.MelScale(
  79.             n_mels=n_mels,
  80.             sample_rate=sample_rate,
  81.             f_min=0,
  82.             f_max=10000,
  83.             n_stft=n_fft // 2 + 1,
  84.             norm=None,
  85.             mel_scale="htk",
  86.         )
  87.  
  88.         Sxx_mag = mel_scaler(torch.from_numpy(Sxx_mag)).numpy()
  89.  
  90.     return Sxx_mag
  91.  
  92. def image_from_spectrogram(
  93.     Sxx: np.ndarray, max_volume: float = 50, power_for_image: float = 0.25
  94. ) -> Image.Image:
  95.     """
  96.    Generate an image from a spectrogram magnitude array.
  97.  
  98.    TODO(hayk): Add spectrogram_from_image and call this out as the reverse.
  99.    """
  100.     # Apply power curve
  101.     data = np.power(Sxx, power_for_image)
  102.  
  103.     # Rescale to 255
  104.     data = data * 255 / max_volume
  105.  
  106.     # Invert
  107.     data = 255 - data
  108.  
  109.     # Flip Y
  110.     data = data[::-1, :]
  111.  
  112.     # Convert to 8-bit unsigned integer
  113.     data = data.astype(np.uint8)
  114.  
  115.     # Create image
  116.     image = Image.fromarray(data, mode="L")
  117.  
  118.     return image
  119.  
  120. # Open WAV file
  121. with open('music.wav', 'rb') as f:
  122.     wav_bytes = io.BytesIO(f.read())
  123.  
  124. # Generate spectrogram image
  125. image = spectrogram_image_from_wav(wav_bytes)
  126.  
  127. # Save image to file
  128. image.save('restoredinput.png')
  129.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement