Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import tkinter as tk
- import threading
- import time
- import keyboard
- from PIL import Image
- import mss
- import pytesseract
- import sys
- import shutil
- import os
- import queue
- import ctypes
- import requests
- # ========================================================================================
- # USER CONFIGURATION
- # ========================================================================================
- # --- Tesseract Configuration ---
- # IMPORTANT: Please verify this path is correct for your Tesseract installation.
- pytesseract.pytesseract.tesseract_cmd = r'[YOUR TESSERACT.EXE PATH GOES HERE]'
- # --- Kobold AI API Configuration ---
- # This should be the address of your running Kobold AI API server.
- # For KoboldCpp, the default is usually http://127.0.0.1:5001
- # For the original Kobold AI client, it might be http://127.0.0.1:5000
- KOBOLD_API_URL = "http://127.0.0.1:5001"
- # --- Prompting Configuration ---
- # These are the "system" prompts that define the AI's role for each task.
- # NOTE: Kobold's simple API doesn't have a dedicated "system" prompt.
- # This text will be prepended to the user's prompt.
- SYSTEM_PROMPT_TRANSLATE = """As a professional translator specializing in Japanese to English, your task is to provide an accurate translation that sounds natural in English without changing the meaning or context of the original text. Take care to preserve the tone, nuance, and cultural context of the original text. Provide ONLY the final translation. Do not add any notes, additions, or context."""
- SYSTEM_PROMPT_ALT = """You are a creative linguistic expert. Your task is to provide an accurate alternative translation for the following Japanese text. The goal is to offer a plausible alternative interpretation of the original text, while remaining true to the original meaning. Take care to preserve the tone, nuance, and cultural context of the original text. Provide a single, alternative English translation. Do not repeat the original or add any commentary."""
- SYSTEM_PROMPT_EXPLAIN = """As a Japanese language expert, your task is to provide a brief explanation for the following translation. Focus on any interesting grammar, vocabulary, cultural nuances, or reasons why a particular phrasing was chosen in the translation. Provide a concise explanation. Do not repeat the original text or the translation in your response."""
- # --- Hotkey Configuration ---
- RESELECT_HOTKEY = "alt+r"
- ALT_TRANSLATE_HOTKEY = "ctrl+right arrow"
- EXPLAIN_TRANSLATION_HOTKEY = "ctrl+down arrow"
- # ========================================================================================
- def make_dpi_aware():
- """Makes the application DPI-aware on Windows to fix screen scaling issues."""
- if sys.platform == "win32":
- try:
- ctypes.windll.shcore.SetProcessDpiAwareness(2)
- except (AttributeError, OSError):
- try:
- ctypes.windll.user32.SetProcessDPIAware()
- except Exception:
- pass
- def check_prerequisites():
- """Checks for Tesseract and the Kobold AI server."""
- print("--- Checking Prerequisites ---")
- # Check for Tesseract
- if not os.path.isfile(pytesseract.pytesseract.tesseract_cmd):
- print("\n[ERROR] Tesseract OCR not found.")
- print("Please update the path for 'pytesseract.pytesseract.tesseract_cmd' at the top of this script.")
- sys.exit(1)
- print(f"✅ Tesseract OCR found at manual path: {pytesseract.pytesseract.tesseract_cmd}")
- # Check for Kobold AI server connection
- try:
- # Pinging the /api/v1/model endpoint is a reliable way to check for a Kobold API
- response = requests.get(f"{KOBOLD_API_URL}/api/v1/model", timeout=3)
- response.raise_for_status()
- print(f"✅ Connected to Kobold AI server at: {KOBOLD_API_URL}")
- print(f" Model: {response.json().get('result', 'Unknown')}")
- except requests.exceptions.RequestException:
- print(f"\n[ERROR] Could not connect to Kobold AI server at {KOBOLD_API_URL}")
- print("Please ensure your Kobold AI or KoboldCpp server is running and the API is enabled.")
- sys.exit(1)
- print("----------------------------\n")
- return True
- class ScreenRegionSelector:
- """A tool to let the user select a rectangular region on the screen."""
- def __init__(self, parent):
- self.parent = parent
- self.parent.attributes("-alpha", 0.3)
- self.parent.attributes("-topmost", True)
- self.parent.configure(bg='grey')
- self.canvas = tk.Canvas(self.parent, cursor="cross", bg='grey')
- self.canvas.pack(fill=tk.BOTH, expand=True)
- self.start_x, self.start_y = 0, 0
- self.rect, self.region = None, None
- self.canvas.bind("<ButtonPress-1>", self.on_button_press)
- self.canvas.bind("<B1-Motion>", self.on_mouse_drag)
- self.canvas.bind("<ButtonRelease-1>", self.on_button_release)
- def on_button_press(self, event):
- self.start_x, self.start_y = self.canvas.winfo_pointerx(), self.canvas.winfo_pointery()
- if self.rect: self.canvas.delete(self.rect)
- self.rect = self.canvas.create_rectangle(self.start_x, self.start_y, self.start_x, self.start_y, outline='red', width=2)
- def on_mouse_drag(self, event):
- cur_x, cur_y = self.canvas.winfo_pointerx(), self.canvas.winfo_pointery()
- self.canvas.coords(self.rect, self.start_x, self.start_y, cur_x, cur_y)
- def on_button_release(self, event):
- end_x, end_y = self.canvas.winfo_pointerx(), self.canvas.winfo_pointery()
- x1, y1 = min(self.start_x, end_x), min(self.start_y, end_y)
- x2, y2 = max(self.start_x, end_x), max(self.start_y, end_y)
- if abs(x2 - x1) < 10 or abs(y2 - y1) < 10:
- print("Region is too small."); self.region = None
- else:
- self.region = {"top": y1, "left": x1, "width": x2 - x1, "height": y2 - y1}
- self.parent.destroy()
- class TranslationApp:
- """Main application orchestrating OCR and translation in the console."""
- def __init__(self):
- self.capture_region = None
- self.last_ocr_text = ""
- self.last_translation = ""
- self.running = True
- self.root = None
- self.message_queue = queue.Queue()
- def select_region(self):
- """Creates a Toplevel window for region selection and returns the region."""
- print("\nPlease click and drag to select the screen region...")
- selector_window = tk.Toplevel(self.root)
- selector_window.overrideredirect(True)
- width, height = selector_window.winfo_screenwidth(), selector_window.winfo_screenheight()
- selector_window.geometry(f'{width}x{height}+0+0')
- selector_logic = ScreenRegionSelector(selector_window)
- self.root.wait_window(selector_window)
- return selector_logic.region
- def capture_and_ocr(self, region):
- """Captures the screen region and performs OCR."""
- if not region: return None
- try:
- with mss.mss() as sct:
- screenshot = sct.grab(region)
- img = Image.frombytes("RGB", screenshot.size, screenshot.bgra, "raw", "BGRX")
- custom_config = r'--oem 3 --psm 5 -l jpn+jpn_vert'
- return " ".join(pytesseract.image_to_string(img, config=custom_config).split()).strip()
- except Exception as e:
- print(f"An error occurred during OCR: {e}"); return None
- def get_llm_response(self, system_prompt, user_prompt):
- """Gets a response from the Kobold AI server."""
- if not user_prompt: return "No text provided."
- headers = {"Content-Type": "application/json"}
- api_url = f"{KOBOLD_API_URL}/api/v1/generate"
- # Combine system and user prompts for Kobold's single prompt format
- full_prompt = f"{system_prompt}\n\n{user_prompt}"
- payload = {
- "prompt": full_prompt,
- "temperature": 0.7,
- "max_length": 512 # Kobold uses 'max_length' instead of 'max_tokens'
- }
- try:
- response = requests.post(api_url, headers=headers, json=payload, timeout=None)
- response.raise_for_status()
- # The response format for Kobold is different
- return response.json()['results'][0]['text'].strip()
- except requests.exceptions.RequestException as e:
- print(f"Error connecting to Kobold AI server: {e}")
- return "Error: Could not connect to server."
- except (KeyError, IndexError) as e:
- print(f"Error parsing Kobold AI response: {e}")
- print(f"Received data: {response.text}")
- return "Error: Unexpected response format from server."
- except Exception as e:
- print(f"An error occurred during model inference: {e}")
- return "Error: Model inference failed."
- def setup_hotkey(self):
- """Sets up the global hotkeys to put messages in the queue."""
- keyboard.add_hotkey(RESELECT_HOTKEY, lambda: self.message_queue.put(("RESELECT_REQUESTED", None)))
- keyboard.add_hotkey(ALT_TRANSLATE_HOTKEY, lambda: self.message_queue.put(("ALT_TRANSLATE_REQUESTED", None)))
- keyboard.add_hotkey(EXPLAIN_TRANSLATION_HOTKEY, lambda: self.message_queue.put(("EXPLAIN_REQUESTED", None)))
- print(f"\n>>> Press '{RESELECT_HOTKEY}' to select a region and translate.")
- print(f">>> Press '{ALT_TRANSLATE_HOTKEY}' for an alternative translation.")
- print(f">>> Press '{EXPLAIN_TRANSLATION_HOTKEY}' for an explanation.")
- print(">>> Press 'Ctrl+C' in this console to quit the application.\n")
- def process_queue(self):
- """Processes messages from the hotkey callbacks in the main thread."""
- try:
- message = self.message_queue.get_nowait()
- if message is None:
- self.running = False; self.root.destroy(); return
- msg_type, msg_data = message
- if msg_type == "RESELECT_REQUESTED":
- print("\n--- Hotkey pressed: Select Region ---")
- new_region = self.select_region()
- if new_region:
- self.capture_region = new_region
- print(f"Region successfully selected. Performing OCR...")
- ocr_text = self.capture_and_ocr(self.capture_region)
- if ocr_text:
- self.last_ocr_text = ocr_text
- print(f"Detected Text: \"{ocr_text}\"")
- print("Translating...")
- user_prompt = f"Japanese Text:\n\"{ocr_text}\"\n\nEnglish Translation:"
- translation = self.get_llm_response(SYSTEM_PROMPT_TRANSLATE, user_prompt)
- self.last_translation = translation
- print("\n" + "="*20 + " TRANSLATION " + "="*20); print(f"{translation}"); print("="*53 + "\n")
- else:
- print("No text found in the selected region.")
- else:
- print("Region selection cancelled or invalid.")
- elif msg_type == "ALT_TRANSLATE_REQUESTED":
- if not self.last_ocr_text: print("No text has been translated yet.")
- else:
- print(f"\n--- Requesting alternative for: {self.last_ocr_text} ---")
- user_prompt = f"Japanese Text:\n\"{self.last_ocr_text}\"\n\nAlternative English Translation:"
- alt_translation = self.get_llm_response(SYSTEM_PROMPT_ALT, user_prompt)
- print("\n" + "="*15 + " ALTERNATIVE TRANSLATION " + "="*15); print(f"{alt_translation}"); print("="*57 + "\n")
- elif msg_type == "EXPLAIN_REQUESTED":
- if not self.last_ocr_text: print("No text has been translated yet.")
- else:
- print("\n--- Requesting explanation ---")
- user_prompt = f"Original Japanese Text:\n\"{self.last_ocr_text}\"\n\nEnglish Translation:\n\"{self.last_translation}\"\n\nExplanation:"
- explanation = self.get_llm_response(SYSTEM_PROMPT_EXPLAIN, user_prompt)
- print("\n" + "="*20 + " EXPLANATION " + "="*20); print(f"{explanation}"); print("="*53 + "\n")
- except queue.Empty: pass
- finally:
- if self.running: self.root.after(100, self.process_queue)
- else:
- if self.root.winfo_exists(): self.root.destroy()
- def run(self):
- """Starts the main application loop."""
- self.root = tk.Tk()
- self.root.withdraw()
- self.setup_hotkey()
- print("Script started. Press 'alt+r' to select your first region.")
- self.process_queue()
- self.root.mainloop()
- self.running = False
- keyboard.remove_all_hotkeys()
- print("\nApplication shutting down.")
- if __name__ == "__main__":
- try:
- make_dpi_aware()
- check_prerequisites()
- app = TranslationApp()
- app.run()
- except KeyboardInterrupt: print("\nCtrl+C detected. Shutting down.")
- except SystemExit as e: print(f"Exiting. Exit code: {e.code if e.code is not None else 1}")
- except Exception as e: print(f"An unexpected error occurred: {e}")
Advertisement
Add Comment
Please, Sign In to add comment