FIR zero-cross W2M

import numpy as np
import mido
import argparse
import os
from scipy.signal import firwin, filtfilt
from scipy.io.wavfile import read
from tqdm import tqdm

class FrequencyBand:
    def __init__(self, midi_note, sr):
        self.note = midi_note
        self.freq = 440 * (2 ** ((midi_note - 69) / 12))
        self.sr = sr
        self.filter = self.create_filter()

    def create_filter(self):
        numtaps = 2049
        cutoff = [self.freq * 0.95, self.freq * 1.05]
        return firwin(numtaps, cutoff, fs=self.sr, pass_zero=False)

class AudioAnalyzer:
    def __init__(self, file_path, target_sr=44100):
        self.sr, self.audio = read(file_path)
        if self.audio.ndim > 1:
            self.audio = self.audio.mean(axis=1)
        self.audio = self.audio.astype(np.float32)
        self.audio /= np.max(np.abs(self.audio))

        if self.sr != target_sr:
            self.resample(target_sr)
            self.sr = target_sr

    def resample(self, target_sr):
        ratio = target_sr / self.sr
        self.audio = np.interp(
            np.arange(0, len(self.audio), ratio),
            np.arange(0, len(self.audio)),
            self.audio
        )

class MidiConverter:
    def __init__(self, ppqn=22050, bpm=120*(88200/1920)):
        self.ppqn = ppqn
        self.bpm = bpm
        self.ticks_per_sec = 44100

    def convert_events(self, all_events):
        tracks = {note: mido.MidiTrack() for note in range(128)}
        sorted_events = sorted(all_events, key=lambda x: x['time'])

        last_times = {note: 0 for note in range(128)}
        for event in tqdm(sorted_events, desc="Placing notes"):
            note = event['note']
            track = tracks[note]

            ticks = int(event['time'] * self.ticks_per_sec)
            delta = ticks - last_times[note]

            track.append(mido.Message(
                event['type'],
                note=note,
                velocity=event.get('velocity', 64),
                time=delta
            ))
            last_times[note] = ticks

        return [mido.MidiTrack()] + list(tracks.values())

def analyze_band(args):
    band, audio = args
    try:
        filtered = filtfilt(band.filter, [1.0], audio)
    except ValueError:
        return []

    crossings = []
    state = 0
    HYSTERESIS = 0.01

    for i in tqdm(range(len(filtered)), desc=f"Note {band.note:03d}", leave=False):
        current = filtered[i]
        if state == 0 and current > HYSTERESIS:
            state = 1
            crossings.append(i)
        elif state == 1 and current < -HYSTERESIS:
            state = 0
            crossings.append(i)

    events = []
    for i in tqdm(range(2, len(crossings), 2), desc=f"Cycles {band.note:03d}", leave=False):
        start = crossings[i-2]
        end = crossings[i]

        if end - start < 2:
            continue

        segment = filtered[start:end]
        velocity = int(np.clip(np.max(np.abs(segment) ** 0.5) * 127, 1, 127))

        start_time = start / band.sr
        end_time = end / band.sr

        wavelength = 1.0 / band.freq
        max_duration = 1.5 * wavelength
        actual_duration = end_time - start_time

        if actual_duration > max_duration:
            end_time = start_time + wavelength

        events.append({'type': 'note_on', 'note': band.note, 'time': start_time, 'velocity': velocity})
        events.append({'type': 'note_off', 'note': band.note, 'time': end_time, 'velocity': 0})

    return events

def colorize_midi(input_path, output_path, num_tracks=31):
    mid = mido.MidiFile(input_path)
    tracks = [[] for _ in range(num_tracks)]
    active_notes = {}
    tempo_messages = []

    for track in mid.tracks:
        current_time = 0
        for msg in track:
            current_time += msg.time
            if msg.type == 'set_tempo':
                tempo_messages.append((current_time, msg))
            elif msg.type == 'note_on' and msg.velocity > 0:
                track_index = min(msg.velocity // (128 // num_tracks), num_tracks - 1)
                tracks[track_index].append(('on', msg.note, current_time, msg.velocity))
                active_notes[msg.note] = (track_index, current_time)
            elif msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0):
                if msg.note in active_notes:
                    track_index, start_time = active_notes[msg.note]
                    tracks[track_index].append(('off', msg.note, current_time, 0))
                    del active_notes[msg.note]

    new_mid = mido.MidiFile(ticks_per_beat=mid.ticks_per_beat)
    master_track = mido.MidiTrack()

    prev_time = 0
    for abs_time, tempo_msg in sorted(tempo_messages, key=lambda x: x[0]):
        delta = abs_time - prev_time
        tempo_msg.time = delta
        master_track.append(tempo_msg)
        prev_time = abs_time

    new_mid.tracks.append(master_track)

    for track in tracks:
        if not track:
            continue
        sorted_track = sorted(track, key=lambda x: x[2])
        midi_track = mido.MidiTrack()
        prev_time = 0
        for event in sorted_track:
            delta = event[2] - prev_time
            if event[0] == 'on':
                msg = mido.Message('note_on', note=event[1], velocity=event[3], time=delta)
            else:
                msg = mido.Message('note_off', note=event[1], velocity=0, time=delta)
            midi_track.append(msg)
            prev_time = event[2]
        new_mid.tracks.append(midi_track)

    new_mid.save(output_path)

def main(file_path, output_path):
    audio_analyzer = AudioAnalyzer(file_path)
    converter = MidiConverter()

    bands = [FrequencyBand(n, audio_analyzer.sr) for n in tqdm(range(128), desc="Creating bands")]

    all_events = []
    for band in tqdm(bands, desc="Analyzing bands"):
        events = analyze_band((band, audio_analyzer.audio))
        all_events.extend(events)

    midi = mido.MidiFile(type=1)
    midi.tracks = converter.convert_events(all_events)
    midi.tracks[0].append(mido.MetaMessage('set_tempo', tempo=mido.bpm2tempo(converter.bpm)))

    base_path = os.path.splitext(output_path)[0]
    intermediate_path = f"{base_path}_intermediate.mid"
    colored_path = f"{base_path}-colored.mid"

    midi.save(intermediate_path)
    colorize_midi(intermediate_path, colored_path)
    os.remove(intermediate_path)
    print(f"\nSuccessfully saved colored MIDI to {colored_path}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='audio to midi converter with velocity corresponded to track colors')
    parser.add_argument('input', help='Input WAV file')
    parser.add_argument('output', help='Output MIDI file')
    args = parser.parse_args()

    main(args.input, args.output)