Untitled

#!/usr/bin/env python3
import os
import json
import time
from openai import OpenAI
import sounddevice as sd
import soundfile as sf

# Create LM Studio client (using the externally reachable address)
client = OpenAI(base_url="http://26.133.56.123:1234/v1", api_key="lm-studio")
model = "lmstudio-community/qwen2.5-7B-Instruct-Q4_K_M.gguf"

# Create a separate client for your TTS server (covas-next-aiserver)
tts_client = OpenAI(base_url="http://192.168.40.218:8080", api_key="-")

def generate_speech(text: str) -> str:
    """
    Generate speech from text using the TTS server.
    This calls the /audio/speech endpoint of the covas-next-aiserver.
    The generated audio is saved to 'output.wav'.
    """
    print(f"[generate_speech] Sending text to TTS server: {text}")
    try:
        response = tts_client.audio.speech.create(
            model="-",          # Use default or configured model on your TTS server
            input=text,         # The text to be converted into speech.
            voice="nova",       # Specify the voice; adjust if needed.
            speed=1.0,          # Speech speed.
            response_format="wav"  # Request a WAV audio response.
        )
        if response and hasattr(response, "content") and isinstance(response.content, bytes):
            with open("output.wav", "wb") as f:
                f.write(response.content)
            result = "Speech generated and saved to output.wav"
            print(f"[generate_speech] {result}")
            return result
        else:
            result = "TTS server returned no content."
            print(f"[generate_speech] {result}")
            return result
    except Exception as e:
        result = f"TTS server error: {str(e)}"
        print(f"[generate_speech] {result}")
        return result

def play_audio(filename: str):
    """
    Play the WAV file internally using sounddevice and soundfile.
    """
    try:
        data, samplerate = sf.read(filename, dtype='float32')
        print(f"[play_audio] Playing audio from {filename} at {samplerate} Hz")
        sd.play(data, samplerate)
        sd.wait()  # Wait until playback is finished
        print("[play_audio] Audio playback completed.")
    except Exception as e:
        print(f"[play_audio] Error during audio playback: {e}")

def chat_loop():
    """
    Interactive chat loop where the assistant is instructed to output in JSON format.
    The script extracts the 'response' field and uses that text for TTS playback.
    """
    messages = [
        {
            "role": "system",
            "content": (
                "You are GLaDOS, the sarcastic, dry, and menacing AI from Portal. "
                "When responding, output your answer in JSON format according to the following schema:\n\n"
                "{\n"
                '  "response": "Your response here, written with sarcastic wit.",\n'
                '  "mood": "One of: sarcastic, menacing, dry, mocking",\n'
                '  "additional_comment": "Any extra commentary, if needed."\n'
                "}\n\n"
                "Do not include any additional keys or text outside this JSON object."
            )
        }
    ]

    print("GLaDOS mode activated. Type 'quit' to exit.")

    while True:
        user_input = input("\nYou: ").strip()
        if user_input.lower() == "quit":
            break

        messages.append({"role": "user", "content": user_input})

        try:
            response = client.chat.completions.create(
                model=model,
                messages=messages
            )
        except Exception as e:
            print(f"Error communicating with LM Studio: {str(e)}")
            continue

        if response.choices[0].message.content:
            raw_output = response.choices[0].message.content
            try:
                glados_output = json.loads(raw_output)
                assistant_text = glados_output.get("response", "")
                mood = glados_output.get("mood", "")
                additional_comment = glados_output.get("additional_comment", "")

                print("\nGLaDOS:")
                print("Response:", assistant_text)
                print("Mood:", mood)
                if additional_comment:
                    print("Additional Comment:", additional_comment)
            except json.JSONDecodeError as je:
                print("Error decoding JSON:", je)
                print("Raw response:", raw_output)
                assistant_text = raw_output  # Fallback to raw text

            messages.append({"role": "assistant", "content": raw_output})

            # Use only the extracted assistant text for TTS.
            tts_result = generate_speech(assistant_text)
            print(f"[TTS] {tts_result}")
            play_audio("output.wav")
        else:
            print("GLaDOS returned no content.")

        time.sleep(0.5)

if __name__ == "__main__":
    chat_loop()