frontend2

from flask import Flask, render_template, request, jsonify
import requests
import os
import soundfile as sf
import numpy as np
from kokoro import KPipeline # Your existing Kokoro TTS
import uuid
import base64
import io # For handling in-memory audio

# Attempt to import NeMo ASR
try:
    import nemo.collections.asr as nemo_asr
    print("NVIDIA NeMo ASR Toolkit imported successfully.")
except ImportError:
    print("NVIDIA NeMo ASR Toolkit not found. Please install it: pip install nemo_toolkit['asr']")
    nemo_asr = None
except Exception as e:
    print(f"Error importing NeMo ASR: {e}")
    nemo_asr = None


app = Flask(__name__)

# --- Configuration ---
OLLAMA_API_URL = "http://localhost:11434/api/generate"
OLLAMA_TAGS_URL = "http://localhost:11434/api/tags"
MODEL_DIR = "models" # General models directory
ASR_MODEL_DIR = os.path.join(MODEL_DIR, "asr_models") # Specific for ASR
KOKORO_MODEL_PATH = os.path.join(MODEL_DIR, "kokoro-v1.0.onnx")
KOKORO_VOICES_PATH = os.path.join(MODEL_DIR, "voices-v1.0.bin")
KOKORO_LANG_CODE = "a"
VOICE = "af_bella"

# Parakeet ASR model configuration
ASR_MODEL_NAME = "nvidia/parakeet-tdt-0.6b-v2"

# --- Initialize Services ---
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(ASR_MODEL_DIR, exist_ok=True) # Ensure ASR model directory exists
os.makedirs("static", exist_ok=True) # For temporary audio files if needed

# Initialize Kokoro TTS
tts_pipeline = None
try:
    # Pass repo_id explicitly to suppress the warning if you know it, e.g. from hexgrad for Kokoro
    # For Kokoro, if it's from hexgrad/Kokoro-82M, you could try passing repo_id='hexgrad/Kokoro-82M'
    # However, KPipeline itself might not accept repo_id. This warning is from underlying Hugging Face libs Kokoro might use.
    # For now, we'll let the warning be, as Kokoro seems to initialize.
    if os.path.exists(KOKORO_MODEL_PATH) and os.path.exists(KOKORO_VOICES_PATH):
        tts_pipeline = KPipeline(lang_code=KOKORO_LANG_CODE) # repo_id='hexgrad/Kokoro-82M' # example if KPipeline supported it
        print("Kokoro TTS initialized successfully.")
    else:
        print("Kokoro TTS model/voice files not found. Skipping initialization.")
except Exception as e:
    print(f"Error initializing Kokoro TTS: {str(e)}")

# Initialize NeMo ASR Model
asr_model_instance = None
if nemo_asr:
    try:
        print(f"Loading ASR model: {ASR_MODEL_NAME}...")
        asr_model_instance = nemo_asr.models.ASRModel.from_pretrained(
            model_name=ASR_MODEL_NAME,
            map_location='cpu' # Use 'cuda' if you have a GPU and CUDA installed
        )
        asr_model_instance.eval() # Set to evaluation mode
        print(f"ASR model '{ASR_MODEL_NAME}' loaded successfully.")
    except Exception as e:
        print(f"Error loading ASR model '{ASR_MODEL_NAME}': {str(e)}")
        print("ASR will not be available.")
else:
    print("NeMo ASR toolkit not available. ASR functionality will be disabled.")


def download_kokoro_model():
    if not os.path.exists(KOKORO_MODEL_PATH) or not os.path.exists(KOKORO_VOICES_PATH):
        print("Attempting to download Kokoro TTS model files...")
        try:
            model_url = "https://github.com/nazdridoy/kokoro-tts/releases/download/v1.0.0/kokoro-v1.0.onnx"
            voices_url = "https://github.com/nazdridoy/kokoro-tts/releases/download/v1.0.0/voices-v1.0.bin"
            for url, path in [(model_url, KOKORO_MODEL_PATH), (voices_url, KOKORO_VOICES_PATH)]:
                if not os.path.exists(path):
                    print(f"Downloading {url} to {path}")
                    response = requests.get(url, stream=True)
                    response.raise_for_status()
                    with open(path, "wb") as f:
                        for chunk in response.iter_content(chunk_size=8192):
                            f.write(chunk)
            global tts_pipeline # Declare tts_pipeline as global to modify it
            if not tts_pipeline and os.path.exists(KOKORO_MODEL_PATH) and os.path.exists(KOKORO_VOICES_PATH):
                 tts_pipeline = KPipeline(lang_code=KOKORO_LANG_CODE)
                 print("Kokoro TTS initialized successfully after download.")

        except Exception as e:
            print(f"Error downloading Kokoro model: {str(e)}")

download_kokoro_model()

def get_ollama_models():
    try:
        response = requests.get(OLLAMA_TAGS_URL)
        response.raise_for_status()
        models_data = response.json().get("models", [])
        return [model["name"] for model in models_data]
    except requests.RequestException as e:
        print(f"Error fetching Ollama models: {str(e)}")
        return ["llama2:latest"]

@app.route('/')
def index():
    models = get_ollama_models()
    return render_template('index2.html', models=models)

@app.route('/process_voice_input', methods=['POST'])
def process_voice_input():
    if 'audio_data' not in request.files:
        return jsonify({"error": "No audio data found in request"}), 400
    if not asr_model_instance:
        return jsonify({"error": "ASR model not available on server"}), 500

    audio_file = request.files['audio_data']

    temp_audio_filename = f"temp_audio_{uuid.uuid4()}.wav"
    temp_audio_path = os.path.join("static", temp_audio_filename)

    try:
        audio_file.save(temp_audio_path)
        print(f"Temporary audio file saved to: {temp_audio_path}")

        # Transcribe audio using NeMo ASR model
        transcription_results = asr_model_instance.transcribe([temp_audio_path])

        user_input_text = "" # Initialize to empty

        if transcription_results:
            # NeMo's transcribe can return:
            # 1. A list of strings (if not using N-best, simple case)
            # 2. A list of lists of Hypothesis objects (if using N-best, even with N=1)
            # Based on your log: [Hypothesis(text="Hey, what's up?", ...)]
            # This means transcription_results is a list, and its first element is the Hypothesis object.

            first_file_result = transcription_results[0] # Result for the first (and only) file

            if isinstance(first_file_result, str):
                user_input_text = first_file_result
            elif isinstance(first_file_result, list) and len(first_file_result) > 0:
                # This would be for N-best lists, where first_file_result is a list of Hypothesis objects
                if hasattr(first_file_result[0], 'text'):
                    user_input_text = first_file_result[0].text # Get text from the top hypothesis
                else:
                    print(f"Warning: Top hypothesis object in N-best list lacks 'text' attribute: {first_file_result[0]}")
            elif hasattr(first_file_result, 'text'):
                # This covers the case where first_file_result is a single Hypothesis object
                # (e.g., when transcribe returns a list of Hypothesis objects, one per input file)
                user_input_text = first_file_result.text
            else:
                print(f"Warning: Transcription result format for the file was unexpected: {first_file_result}")
        else:
            print("Warning: ASR returned no transcription results at all.")

        print(f"Transcribed text: '{user_input_text}'")

    except Exception as e:
        print(f"Error during ASR transcription: {str(e)}")
        return jsonify({"error": f"ASR transcription error: {str(e)}"}), 500
    finally:
        if os.path.exists(temp_audio_path):
            try:
                os.remove(temp_audio_path)
                print(f"Temporary audio file {temp_audio_path} removed.")
            except Exception as e_remove:
                print(f"Error removing temporary audio file {temp_audio_path}: {e_remove}")

    if not user_input_text.strip():
        return jsonify({
            "transcribed_text": user_input_text,
            "text": "Could not understand audio or audio was silent.",
            "audio": None
        })

    selected_model = request.form.get('model', get_ollama_models()[0])
    system_prompt = request.form.get('system_prompt', "You are a helpful, friendly AI assistant.")

    available_models = get_ollama_models()
    if selected_model not in available_models:
        selected_model = available_models[0] if available_models else "llama2:latest"

    try:
        ollama_payload = {
            "model": selected_model,
            "prompt": user_input_text,
            "system": system_prompt,
            "stream": False
        }
        print(f"Sending to Ollama: {ollama_payload}")
        ollama_response = requests.post(OLLAMA_API_URL, json=ollama_payload)
        ollama_response.raise_for_status()
        llm_response_text = ollama_response.json().get("response", "")
        print(f"Ollama response: '{llm_response_text}'")
    except requests.RequestException as e:
        print(f"Ollama API error: {str(e)}")
        return jsonify({"error": f"Ollama API error: {str(e)}", "transcribed_text": user_input_text}), 500

    tts_audio_base64 = None
    if tts_pipeline and llm_response_text:
        try:
            generator = tts_pipeline(llm_response_text, voice=VOICE)
            audio_chunks = [audio for _, _, audio in generator]
            if audio_chunks:
                final_audio_np = np.concatenate(audio_chunks)
                wav_buffer = io.BytesIO()
                sf.write(wav_buffer, final_audio_np, 24000, format='WAV', subtype='PCM_16')
                wav_buffer.seek(0)
                tts_audio_base64 = base64.b64encode(wav_buffer.read()).decode('utf-8')
                print("Kokoro TTS audio generated.")
            else:
                print("Kokoro TTS produced no audio chunks.")
        except Exception as e:
            print(f"Kokoro TTS generation error: {str(e)}")
            llm_response_text += " (TTS Error)"
    elif not tts_pipeline:
        print("Kokoro TTS pipeline not available.")
    elif not llm_response_text:
        print("LLM response was empty, skipping TTS.")

    return jsonify({
        "transcribed_text": user_input_text,
        "text": llm_response_text,
        "audio": tts_audio_base64
    })

@app.route('/process_typed_text', methods=['POST'])
def process_typed_text():
    data = request.json
    user_input_text = data.get('text')
    selected_model = data.get('model')
    system_prompt = data.get('system_prompt', "You are a helpful, friendly AI assistant.")

    if not user_input_text:
        return jsonify({"error": "No input text provided"}), 400

    available_models = get_ollama_models()
    if selected_model not in available_models:
        selected_model = available_models[0] if available_models else "llama2:latest"

    try:
        ollama_payload = {
            "model": selected_model,
            "prompt": user_input_text,
            "system": system_prompt,
            "stream": False
        }
        print(f"Sending typed text to Ollama: {ollama_payload}")
        ollama_response = requests.post(OLLAMA_API_URL, json=ollama_payload)
        ollama_response.raise_for_status()
        llm_response_text = ollama_response.json().get("response", "")
        print(f"Ollama response to typed text: '{llm_response_text}'")
    except requests.RequestException as e:
        print(f"Ollama API error (typed text): {str(e)}")
        return jsonify({"error": f"Ollama API error: {str(e)}"}), 500

    tts_audio_base64 = None
    if tts_pipeline and llm_response_text:
        try:
            generator = tts_pipeline(llm_response_text, voice=VOICE)
            audio_chunks = [audio for _, _, audio in generator]
            if audio_chunks:
                final_audio_np = np.concatenate(audio_chunks)
                wav_buffer = io.BytesIO()
                sf.write(wav_buffer, final_audio_np, 24000, format='WAV', subtype='PCM_16')
                wav_buffer.seek(0)
                tts_audio_base64 = base64.b64encode(wav_buffer.read()).decode('utf-8')
                print("Kokoro TTS for typed text generated.")
        except Exception as e:
            print(f"Kokoro TTS generation error (typed text): {str(e)}")
            llm_response_text += " (TTS Error)"

    return jsonify({
        "transcribed_text": None,
        "text": llm_response_text,
        "audio": tts_audio_base64
    })

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)