kobold translate

import tkinter as tk
import threading
import time
import keyboard
from PIL import Image
import mss
import pytesseract
import sys
import shutil
import os
import queue
import ctypes
import requests

# ========================================================================================
# USER CONFIGURATION
# ========================================================================================
# --- Tesseract Configuration ---
# IMPORTANT: Please verify this path is correct for your Tesseract installation.
pytesseract.pytesseract.tesseract_cmd = r'[YOUR TESSERACT.EXE PATH GOES HERE]'

# --- Kobold AI API Configuration ---
# This should be the address of your running Kobold AI API server.
# For KoboldCpp, the default is usually http://127.0.0.1:5001
# For the original Kobold AI client, it might be http://127.0.0.1:5000
KOBOLD_API_URL = "http://127.0.0.1:5001"


# --- Prompting Configuration ---
# These are the "system" prompts that define the AI's role for each task.
# NOTE: Kobold's simple API doesn't have a dedicated "system" prompt.
# This text will be prepended to the user's prompt.
SYSTEM_PROMPT_TRANSLATE = """As a professional translator specializing in Japanese to English, your task is to provide an accurate translation that sounds natural in English without changing the meaning or context of the original text. Take care to preserve the tone, nuance, and cultural context of the original text. Provide ONLY the final translation. Do not add any notes, additions, or context."""
SYSTEM_PROMPT_ALT = """You are a creative linguistic expert. Your task is to provide an accurate alternative translation for the following Japanese text. The goal is to offer a plausible alternative interpretation of the original text, while remaining true to the original meaning. Take care to preserve the tone, nuance, and cultural context of the original text. Provide a single, alternative English translation. Do not repeat the original or add any commentary."""
SYSTEM_PROMPT_EXPLAIN = """As a Japanese language expert, your task is to provide a brief explanation for the following translation. Focus on any interesting grammar, vocabulary, cultural nuances, or reasons why a particular phrasing was chosen in the translation. Provide a concise explanation. Do not repeat the original text or the translation in your response."""


# --- Hotkey Configuration ---
RESELECT_HOTKEY = "alt+r"
ALT_TRANSLATE_HOTKEY = "ctrl+right arrow"
EXPLAIN_TRANSLATION_HOTKEY = "ctrl+down arrow"
# ========================================================================================

def make_dpi_aware():
    """Makes the application DPI-aware on Windows to fix screen scaling issues."""
    if sys.platform == "win32":
        try:
            ctypes.windll.shcore.SetProcessDpiAwareness(2)
        except (AttributeError, OSError):
            try:
                ctypes.windll.user32.SetProcessDPIAware()
            except Exception:
                pass

def check_prerequisites():
    """Checks for Tesseract and the Kobold AI server."""
    print("--- Checking Prerequisites ---")

    # Check for Tesseract
    if not os.path.isfile(pytesseract.pytesseract.tesseract_cmd):
        print("\n[ERROR] Tesseract OCR not found.")
        print("Please update the path for 'pytesseract.pytesseract.tesseract_cmd' at the top of this script.")
        sys.exit(1)
    print(f"✅ Tesseract OCR found at manual path: {pytesseract.pytesseract.tesseract_cmd}")

    # Check for Kobold AI server connection
    try:
        # Pinging the /api/v1/model endpoint is a reliable way to check for a Kobold API
        response = requests.get(f"{KOBOLD_API_URL}/api/v1/model", timeout=3)
        response.raise_for_status()
        print(f"✅ Connected to Kobold AI server at: {KOBOLD_API_URL}")
        print(f"   Model: {response.json().get('result', 'Unknown')}")
    except requests.exceptions.RequestException:
        print(f"\n[ERROR] Could not connect to Kobold AI server at {KOBOLD_API_URL}")
        print("Please ensure your Kobold AI or KoboldCpp server is running and the API is enabled.")
        sys.exit(1)

    print("----------------------------\n")
    return True

class ScreenRegionSelector:
    """A tool to let the user select a rectangular region on the screen."""
    def __init__(self, parent):
        self.parent = parent
        self.parent.attributes("-alpha", 0.3)
        self.parent.attributes("-topmost", True)
        self.parent.configure(bg='grey')
        self.canvas = tk.Canvas(self.parent, cursor="cross", bg='grey')
        self.canvas.pack(fill=tk.BOTH, expand=True)
        self.start_x, self.start_y = 0, 0
        self.rect, self.region = None, None
        self.canvas.bind("<ButtonPress-1>", self.on_button_press)
        self.canvas.bind("<B1-Motion>", self.on_mouse_drag)
        self.canvas.bind("<ButtonRelease-1>", self.on_button_release)

    def on_button_press(self, event):
        self.start_x, self.start_y = self.canvas.winfo_pointerx(), self.canvas.winfo_pointery()
        if self.rect: self.canvas.delete(self.rect)
        self.rect = self.canvas.create_rectangle(self.start_x, self.start_y, self.start_x, self.start_y, outline='red', width=2)

    def on_mouse_drag(self, event):
        cur_x, cur_y = self.canvas.winfo_pointerx(), self.canvas.winfo_pointery()
        self.canvas.coords(self.rect, self.start_x, self.start_y, cur_x, cur_y)

    def on_button_release(self, event):
        end_x, end_y = self.canvas.winfo_pointerx(), self.canvas.winfo_pointery()
        x1, y1 = min(self.start_x, end_x), min(self.start_y, end_y)
        x2, y2 = max(self.start_x, end_x), max(self.start_y, end_y)
        if abs(x2 - x1) < 10 or abs(y2 - y1) < 10:
            print("Region is too small."); self.region = None
        else:
            self.region = {"top": y1, "left": x1, "width": x2 - x1, "height": y2 - y1}
        self.parent.destroy()

class TranslationApp:
    """Main application orchestrating OCR and translation in the console."""
    def __init__(self):
        self.capture_region = None
        self.last_ocr_text = ""
        self.last_translation = ""
        self.running = True
        self.root = None
        self.message_queue = queue.Queue()

    def select_region(self):
        """Creates a Toplevel window for region selection and returns the region."""
        print("\nPlease click and drag to select the screen region...")
        selector_window = tk.Toplevel(self.root)
        selector_window.overrideredirect(True)
        width, height = selector_window.winfo_screenwidth(), selector_window.winfo_screenheight()
        selector_window.geometry(f'{width}x{height}+0+0')
        selector_logic = ScreenRegionSelector(selector_window)
        self.root.wait_window(selector_window)
        return selector_logic.region

    def capture_and_ocr(self, region):
        """Captures the screen region and performs OCR."""
        if not region: return None
        try:
            with mss.mss() as sct:
                screenshot = sct.grab(region)
                img = Image.frombytes("RGB", screenshot.size, screenshot.bgra, "raw", "BGRX")
                custom_config = r'--oem 3 --psm 5 -l jpn+jpn_vert'
                return " ".join(pytesseract.image_to_string(img, config=custom_config).split()).strip()
        except Exception as e:
            print(f"An error occurred during OCR: {e}"); return None

    def get_llm_response(self, system_prompt, user_prompt):
        """Gets a response from the Kobold AI server."""
        if not user_prompt: return "No text provided."

        headers = {"Content-Type": "application/json"}
        api_url = f"{KOBOLD_API_URL}/api/v1/generate"

        # Combine system and user prompts for Kobold's single prompt format
        full_prompt = f"{system_prompt}\n\n{user_prompt}"

        payload = {
            "prompt": full_prompt,
            "temperature": 0.7,
            "max_length": 512  # Kobold uses 'max_length' instead of 'max_tokens'
        }

        try:
            response = requests.post(api_url, headers=headers, json=payload, timeout=None)
            response.raise_for_status()
            # The response format for Kobold is different
            return response.json()['results'][0]['text'].strip()
        except requests.exceptions.RequestException as e:
            print(f"Error connecting to Kobold AI server: {e}")
            return "Error: Could not connect to server."
        except (KeyError, IndexError) as e:
            print(f"Error parsing Kobold AI response: {e}")
            print(f"Received data: {response.text}")
            return "Error: Unexpected response format from server."
        except Exception as e:
            print(f"An error occurred during model inference: {e}")
            return "Error: Model inference failed."

    def setup_hotkey(self):
        """Sets up the global hotkeys to put messages in the queue."""
        keyboard.add_hotkey(RESELECT_HOTKEY, lambda: self.message_queue.put(("RESELECT_REQUESTED", None)))
        keyboard.add_hotkey(ALT_TRANSLATE_HOTKEY, lambda: self.message_queue.put(("ALT_TRANSLATE_REQUESTED", None)))
        keyboard.add_hotkey(EXPLAIN_TRANSLATION_HOTKEY, lambda: self.message_queue.put(("EXPLAIN_REQUESTED", None)))

        print(f"\n>>> Press '{RESELECT_HOTKEY}' to select a region and translate.")
        print(f">>> Press '{ALT_TRANSLATE_HOTKEY}' for an alternative translation.")
        print(f">>> Press '{EXPLAIN_TRANSLATION_HOTKEY}' for an explanation.")
        print(">>> Press 'Ctrl+C' in this console to quit the application.\n")


    def process_queue(self):
        """Processes messages from the hotkey callbacks in the main thread."""
        try:
            message = self.message_queue.get_nowait()
            if message is None:
                self.running = False; self.root.destroy(); return

            msg_type, msg_data = message

            if msg_type == "RESELECT_REQUESTED":
                print("\n--- Hotkey pressed: Select Region ---")
                new_region = self.select_region()
                if new_region:
                    self.capture_region = new_region
                    print(f"Region successfully selected. Performing OCR...")
                    ocr_text = self.capture_and_ocr(self.capture_region)
                    if ocr_text:
                        self.last_ocr_text = ocr_text
                        print(f"Detected Text: \"{ocr_text}\"")
                        print("Translating...")
                        user_prompt = f"Japanese Text:\n\"{ocr_text}\"\n\nEnglish Translation:"
                        translation = self.get_llm_response(SYSTEM_PROMPT_TRANSLATE, user_prompt)
                        self.last_translation = translation
                        print("\n" + "="*20 + " TRANSLATION " + "="*20); print(f"{translation}"); print("="*53 + "\n")
                    else:
                        print("No text found in the selected region.")
                else:
                    print("Region selection cancelled or invalid.")

            elif msg_type == "ALT_TRANSLATE_REQUESTED":
                if not self.last_ocr_text: print("No text has been translated yet.")
                else:
                    print(f"\n--- Requesting alternative for: {self.last_ocr_text} ---")
                    user_prompt = f"Japanese Text:\n\"{self.last_ocr_text}\"\n\nAlternative English Translation:"
                    alt_translation = self.get_llm_response(SYSTEM_PROMPT_ALT, user_prompt)
                    print("\n" + "="*15 + " ALTERNATIVE TRANSLATION " + "="*15); print(f"{alt_translation}"); print("="*57 + "\n")

            elif msg_type == "EXPLAIN_REQUESTED":
                if not self.last_ocr_text: print("No text has been translated yet.")
                else:
                    print("\n--- Requesting explanation ---")
                    user_prompt = f"Original Japanese Text:\n\"{self.last_ocr_text}\"\n\nEnglish Translation:\n\"{self.last_translation}\"\n\nExplanation:"
                    explanation = self.get_llm_response(SYSTEM_PROMPT_EXPLAIN, user_prompt)
                    print("\n" + "="*20 + " EXPLANATION " + "="*20); print(f"{explanation}"); print("="*53 + "\n")

        except queue.Empty: pass
        finally:
            if self.running: self.root.after(100, self.process_queue)
            else:
                if self.root.winfo_exists(): self.root.destroy()

    def run(self):
        """Starts the main application loop."""
        self.root = tk.Tk()
        self.root.withdraw()

        self.setup_hotkey()

        print("Script started. Press 'alt+r' to select your first region.")

        self.process_queue()
        self.root.mainloop()

        self.running = False
        keyboard.remove_all_hotkeys()
        print("\nApplication shutting down.")

if __name__ == "__main__":
    try:
        make_dpi_aware()
        check_prerequisites()
        app = TranslationApp()
        app.run()
    except KeyboardInterrupt: print("\nCtrl+C detected. Shutting down.")
    except SystemExit as e: print(f"Exiting. Exit code: {e.code if e.code is not None else 1}")
    except Exception as e: print(f"An unexpected error occurred: {e}")