Guest User

frontend2

a guest
May 17th, 2025
6
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 30.69 KB | None | 0 0
  1. <!DOCTYPE html>
  2. <html lang="en">
  3. <head>
  4. <meta charset="UTF-8">
  5. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6. <title>Always Listening Voice Chat: Smarter Interruption</title>
  7. <link rel="icon" href="data:,">
  8. <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.2/css/all.min.css">
  9. <style>
  10. :root {
  11. --background-color: #121212;
  12. --card-color: #1e1e1e;
  13. --text-color: #e0e0e0;
  14. --accent-color: #2c7be5;
  15. --secondary-color: #404040;
  16. --highlight-color: #60EFFF;
  17. --success-color: #28a745;
  18. --speaking-color: #ffc107;
  19. --processing-color: #fd7e14;
  20. --danger-color: #dc3545;
  21. }
  22. html, body {
  23. font-family: Arial, sans-serif; margin: 0; padding: 0; height: 100%;
  24. background-color: var(--background-color); color: var(--text-color); overflow: hidden;
  25. }
  26. #container {
  27. display: flex; flex-direction: column; height: 100vh; max-width: 1200px;
  28. margin: 0 auto; padding: 20px; box-sizing: border-box;
  29. }
  30. .header {
  31. display: flex; justify-content: space-between; align-items: center;
  32. margin-bottom: 20px; flex-wrap: wrap;
  33. }
  34. h1 {
  35. margin: 0; color: var(--highlight-color); font-size: 24px; white-space: nowrap;
  36. }
  37. .selector-group {
  38. display: flex; gap: 10px; align-items: center; flex-grow: 1; justify-content: flex-end;
  39. }
  40. .selector-group select {
  41. flex: 1; min-width: 150px; padding: 8px 12px; background-color: var(--card-color);
  42. color: var(--text-color); border: 1px solid var(--secondary-color); border-radius: 6px; cursor: pointer;
  43. }
  44. #settings-btn {
  45. background: none; border: none; color: var(--text-color); font-size: 20px;
  46. cursor: pointer; padding: 8px; transition: color 0.2s;
  47. }
  48. #settings-btn:hover { color: var(--highlight-color); }
  49. #listening-info {
  50. background-color: rgba(44, 123, 229, 0.1); padding: 10px 15px; border-radius: 6px;
  51. margin-bottom: 15px; border-left: 4px solid var(--accent-color); font-size: 14px;
  52. min-height: 1.5em;
  53. }
  54. .chat-area {
  55. flex: 1; display: flex; position: relative; margin-bottom: 20px; min-height: 250px;
  56. overflow: hidden; border-radius: 8px; border: 1px solid var(--secondary-color);
  57. background-color: rgba(30, 30, 30, 0.5);
  58. }
  59. #output {
  60. position: absolute; top: 0; left: 0; right: 0; bottom: 0; padding: 20px; overflow-y: auto;
  61. background-color: transparent; z-index: 3; color: var(--text-color); font-size: 18px; line-height: 1.6;
  62. }
  63. #output strong { font-weight: bold; }
  64. #output br { content: ""; display: block; margin-bottom: 0.5em; }
  65. #text-input {
  66. width: 100%; padding: 15px; background-color: var(--card-color); color: var(--text-color);
  67. border: 1px solid var(--secondary-color); border-radius: 6px; min-height: 70px;
  68. resize: vertical; margin-bottom: 15px; box-sizing: border-box;
  69. }
  70. .button-group { display: flex; gap: 10px; }
  71. .btn {
  72. padding: 12px 20px; background-color: var(--accent-color); color: white; border: none;
  73. border-radius: 6px; cursor: pointer; font-size: 16px; transition: background-color 0.2s, color 0.2s; flex: 1;
  74. }
  75. .btn:hover:not(:disabled) { background-color: #1a68d1; }
  76. #voice-btn.listening-mode { background-color: var(--success-color); }
  77. #voice-btn.speech-detected { background-color: var(--speaking-color); color: #121212; }
  78. #send-btn.processing, #voice-btn.processing-backend {
  79. background-color: var(--processing-color); color: #121212;
  80. }
  81. #voice-btn:disabled, #send-btn:disabled {
  82. background-color: var(--secondary-color); color: #777; cursor: not-allowed;
  83. }
  84. #status { margin-top: 10px; font-size: 14px; color: #929292; min-height: 20px; }
  85. #audio-output { margin-top: 15px; width: 100%; }
  86. .modal {
  87. display: none; position: fixed; z-index: 10; left: 0; top: 0;
  88. width: 100%; height: 100%; background-color: rgba(0,0,0,0.7);
  89. align-items: center; justify-content: center;
  90. }
  91. .modal-content {
  92. background-color: var(--card-color); padding: 25px;
  93. border: 1px solid var(--secondary-color); border-radius: 8px;
  94. width: 70%; max-width: 600px; color: var(--text-color);
  95. box-shadow: 0 5px 15px rgba(0,0,0,0.3);
  96. }
  97. .modal-header {
  98. display: flex; justify-content: space-between; align-items: center;
  99. margin-bottom: 20px; padding-bottom: 10px;
  100. border-bottom: 1px solid var(--secondary-color);
  101. }
  102. .modal-header h2 { margin: 0; color: var(--highlight-color); }
  103. .close { color: var(--text-color); font-size: 28px; font-weight: bold; cursor: pointer; }
  104. .close:hover { color: var(--highlight-color); }
  105. .modal textarea {
  106. width: calc(100% - 24px); padding: 12px; height: 150px; margin-bottom: 20px;
  107. background-color: var(--background-color); color: var(--text-color);
  108. border: 1px solid var(--secondary-color); border-radius: 6px; resize: vertical;
  109. }
  110. .modal .btn { width: 100%; padding: 12px; }
  111. @media (max-width: 768px) {
  112. #container { padding: 10px; }
  113. .header { flex-direction: column; align-items: stretch; }
  114. h1 { text-align: center; margin-bottom: 15px; }
  115. .selector-group { width: 100%; margin-top: 10px; flex-direction: column; }
  116. .selector-group select { width: 100%; margin-bottom: 10px; }
  117. #settings-btn { align-self: flex-end; }
  118. .modal-content { width: 90%; }
  119. .button-group { flex-direction: column; }
  120. .button-group .btn { margin-bottom: 10px; }
  121. .button-group .btn:last-child { margin-bottom: 0; }
  122. }
  123. </style>
  124. </head>
  125. <body>
  126. <div id="container">
  127. <div class="header">
  128. <h1>Always Listening Voice Chat</h1>
  129. <div class="selector-group">
  130. <select id="model-selector">
  131. {% if models %}
  132. {% for model in models %}
  133. <option value="{{ model }}">{{ model }}</option>
  134. {% endfor %}
  135. {% else %}
  136. <option value="llama2:latest">llama2 (fallback)</option>
  137. {% endif %}
  138. </select>
  139. <select id="kroko-language-selector">
  140. <option value="en" selected>English (TTS)</option>
  141. </select>
  142. <button id="settings-btn" title="Settings"><i class="fas fa-cog"></i></button>
  143. </div>
  144. </div>
  145.  
  146. <p id="listening-info">Click "Start Always Listening" to begin.</p>
  147.  
  148. <div class="chat-area">
  149. <div id="output">Welcome! AI is ready.</div>
  150. </div>
  151.  
  152. <textarea id="text-input" placeholder="Transcribed speech will appear here. You can also type."></textarea>
  153.  
  154. <div class="button-group">
  155. <button id="voice-btn" class="btn">Start Always Listening</button>
  156. <button id="send-btn" class="btn">Send Typed Text</button>
  157. </div>
  158.  
  159. <div id="status">Ready</div>
  160. <audio id="audio-output" controls style="display: none;"></audio>
  161. </div>
  162.  
  163. <div id="settings-modal" class="modal">
  164. <div class="modal-content">
  165. <div class="modal-header">
  166. <h2>System Prompt Settings</h2>
  167. <span class="close" onclick="closeSettingsModal()">&times;</span>
  168. </div>
  169. <p>Set a custom system prompt for the AI model (Ollama):</p>
  170. <textarea id="system-prompt-input" placeholder="e.g., You are a pirate captain."></textarea>
  171. <button id="save-settings-btn" class="btn">Save Settings</button>
  172. </div>
  173. </div>
  174.  
  175. <script>
  176. const voiceBtn = document.getElementById('voice-btn');
  177. const sendBtn = document.getElementById('send-btn');
  178. const textInput = document.getElementById('text-input');
  179. const outputDiv = document.getElementById('output');
  180. const audioOutput = document.getElementById('audio-output');
  181. const modelSelector = document.getElementById('model-selector');
  182. const languageSelector = document.getElementById('kroko-language-selector');
  183. const statusEl = document.getElementById('status');
  184. const listeningInfoEl = document.getElementById('listening-info');
  185. const settingsBtn = document.getElementById('settings-btn');
  186. const settingsModal = document.getElementById('settings-modal');
  187. const systemPromptInputEl = document.getElementById('system-prompt-input');
  188. const saveSettingsBtn = document.getElementById('save-settings-btn');
  189.  
  190. let appState = 'IDLE';
  191. let isAlwaysListeningModeActive = false;
  192. let isBackendProcessing = false;
  193. let currentTtsAudio = null;
  194. let userSystemPrompt = "You are a helpful, friendly AI assistant.";
  195.  
  196. let audioContext;
  197. let mediaStream;
  198. let scriptProcessorNode;
  199. const TARGET_SAMPLE_RATE = 16000;
  200. let vadSpeechBuffer = [];
  201. let preSpeechBuffer = [];
  202. let silenceFramesCount = 0;
  203. let speechFramesCount = 0;
  204. // No longer need speechStreakForInterrupt with the new logic
  205.  
  206. const VAD_ENERGY_THRESHOLD = 0.005;
  207. const VAD_SILENCE_BUFFERS_NEEDED = 5;
  208. const VAD_MIN_SPEECH_BUFFERS = 3;
  209. const VAD_PRE_SPEECH_BUFFER_COUNT = 2;
  210.  
  211. function updateUIState(newState) {
  212. appState = newState;
  213. console.log("App State:", appState);
  214. voiceBtn.classList.remove("listening-mode", "speech-detected", "processing-backend");
  215. voiceBtn.disabled = false;
  216. sendBtn.disabled = false;
  217.  
  218. if (isAlwaysListeningModeActive) {
  219. if (appState === 'LISTENING') {
  220. voiceBtn.classList.add("listening-mode");
  221. voiceBtn.textContent = "Stop Always Listening";
  222. listeningInfoEl.textContent = "Listening for you to speak...";
  223. } else if (appState === 'SPEAKING_DETECTED') {
  224. voiceBtn.classList.add("speech-detected");
  225. voiceBtn.textContent = "Stop Always Listening (Speaking...)";
  226. listeningInfoEl.textContent = "Speaking detected, keep going!";
  227. } else if (appState === 'PROCESSING_AUDIO' || (isBackendProcessing && appState !== 'PLAYING_TTS')) {
  228. voiceBtn.classList.add("processing-backend");
  229. voiceBtn.textContent = "Stop Always Listening (Processing...)";
  230. } else if (appState === 'PLAYING_TTS') {
  231. voiceBtn.classList.add("listening-mode");
  232. voiceBtn.textContent = "Stop Always Listening (AI Speaking)";
  233. listeningInfoEl.textContent = "AI responding... Listening for interruptions...";
  234. } else {
  235. voiceBtn.classList.add("listening-mode");
  236. voiceBtn.textContent = "Stop Always Listening";
  237. listeningInfoEl.textContent = "Listening for you to speak...";
  238. }
  239. } else {
  240. voiceBtn.textContent = "Start Always Listening";
  241. listeningInfoEl.textContent = "Click 'Start Always Listening' to begin.";
  242. }
  243.  
  244. if (isBackendProcessing) {
  245. sendBtn.disabled = true;
  246. if (appState === 'PROCESSING_AUDIO' && isAlwaysListeningModeActive) {
  247. // Allow stopping always listening even when processing VAD audio.
  248. voiceBtn.disabled = false;
  249. } else if (!isAlwaysListeningModeActive){
  250. voiceBtn.disabled = true; // If not always listening, disable voice btn when typed text is processing
  251. }
  252. }
  253. }
  254.  
  255. function startAlwaysListening() {
  256. if (isAlwaysListeningModeActive) return;
  257.  
  258. navigator.mediaDevices.getUserMedia({ audio: true })
  259. .then(stream => {
  260. isAlwaysListeningModeActive = true;
  261. mediaStream = stream;
  262. audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: TARGET_SAMPLE_RATE });
  263. const source = audioContext.createMediaStreamSource(stream);
  264. scriptProcessorNode = audioContext.createScriptProcessor(4096, 1, 1);
  265.  
  266. vadSpeechBuffer = [];
  267. preSpeechBuffer = [];
  268. silenceFramesCount = 0;
  269. speechFramesCount = 0;
  270.  
  271. scriptProcessorNode.onaudioprocess = (e) => {
  272. if (!isAlwaysListeningModeActive || (isBackendProcessing && appState !== 'PLAYING_TTS')) {
  273. // If backend is busy with a VAD-triggered request, VAD pauses to prevent overload,
  274. // UNLESS TTS is playing, in which case VAD needs to run to detect interruptions for completed utterances.
  275. // If TTS is playing AND backend becomes busy due to an interruption, this guard will then take effect.
  276. return;
  277. }
  278.  
  279. const samples = e.inputBuffer.getChannelData(0);
  280. const currentAudioChunk = new Float32Array(samples);
  281. let energy = 0;
  282. for (let i = 0; i < currentAudioChunk.length; i++) energy += currentAudioChunk[i] * currentAudioChunk[i];
  283. energy = Math.sqrt(energy / currentAudioChunk.length);
  284.  
  285. if (energy > VAD_ENERGY_THRESHOLD) {
  286. silenceFramesCount = 0;
  287.  
  288. if (appState === 'LISTENING' || appState === 'PLAYING_TTS') {
  289. // If sound starts while listening or while TTS is playing,
  290. // we transition to SPEAKING_DETECTED. TTS is NOT interrupted here.
  291. updateUIState('SPEAKING_DETECTED');
  292. vadSpeechBuffer = [...preSpeechBuffer.slice(-VAD_PRE_SPEECH_BUFFER_COUNT)];
  293. vadSpeechBuffer.push(currentAudioChunk);
  294. speechFramesCount = vadSpeechBuffer.length; // Count actual frames buffered
  295. preSpeechBuffer = [];
  296. } else if (appState === 'SPEAKING_DETECTED') {
  297. vadSpeechBuffer.push(currentAudioChunk);
  298. speechFramesCount++;
  299. }
  300. } else { // Silence detected
  301. if (appState === 'SPEAKING_DETECTED') {
  302. vadSpeechBuffer.push(currentAudioChunk);
  303. silenceFramesCount++;
  304. if (silenceFramesCount >= VAD_SILENCE_BUFFERS_NEEDED) {
  305. if (speechFramesCount >= VAD_MIN_SPEECH_BUFFERS) {
  306. console.log("VAD: End of speech confirmed by silence.");
  307. // --- TTS INTERSECT POINT ---
  308. // Now that a full utterance is detected, check if TTS needs to be interrupted.
  309. if (currentTtsAudio && !currentTtsAudio.paused) {
  310. console.log("Interrupting TTS because a new complete user utterance is ready.");
  311. currentTtsAudio.pause();
  312. currentTtsAudio.src = ''; // Stop it fully
  313. }
  314. // --- END TTS INTERSECT POINT ---
  315. const audioToSend = concatenateFloat32Arrays(vadSpeechBuffer);
  316. processAndSendAudio(audioToSend);
  317. } else {
  318. console.log("VAD: Speech was too short, discarding.");
  319. if (isAlwaysListeningModeActive) updateUIState('LISTENING');
  320. }
  321. vadSpeechBuffer = [];
  322. speechFramesCount = 0;
  323. preSpeechBuffer = []; // Clear pre-buffer after an utterance attempt
  324. }
  325. } else { // Continuous silence, not in SPEAKING_DETECTED
  326. preSpeechBuffer.push(currentAudioChunk);
  327. if (preSpeechBuffer.length > VAD_PRE_SPEECH_BUFFER_COUNT) {
  328. preSpeechBuffer.shift();
  329. }
  330. // If TTS just finished and it's silent, ensure state is LISTENING
  331. if (isAlwaysListeningModeActive && appState === 'PLAYING_TTS' && (!currentTtsAudio || currentTtsAudio.paused)) {
  332. updateUIState('LISTENING');
  333. } else if (isAlwaysListeningModeActive && appState !== 'LISTENING' && !isBackendProcessing) {
  334. // If it's not any other active state, default to listening
  335. updateUIState('LISTENING');
  336. }
  337. }
  338. }
  339. };
  340.  
  341. source.connect(scriptProcessorNode);
  342. scriptProcessorNode.connect(audioContext.destination);
  343. updateUIState('LISTENING');
  344. updateStatus("Always listening mode activated.");
  345. })
  346. .catch(error => {
  347. console.error("Error starting microphone capture:", error);
  348. updateStatus(`Mic Error: ${error.message}`);
  349. isAlwaysListeningModeActive = false;
  350. updateUIState('IDLE');
  351. });
  352. }
  353.  
  354. function stopAlwaysListening() { /* ... (Keep this function exactly the same as previous version) ... */
  355. if (!isAlwaysListeningModeActive) return;
  356. isAlwaysListeningModeActive = false;
  357.  
  358. if (mediaStream) {
  359. mediaStream.getTracks().forEach(track => track.stop());
  360. mediaStream = null;
  361. }
  362. if (scriptProcessorNode) {
  363. scriptProcessorNode.disconnect();
  364. scriptProcessorNode.onaudioprocess = null;
  365. scriptProcessorNode = null;
  366. }
  367. if (audioContext && audioContext.state !== 'closed') {
  368. audioContext.close().catch(e => console.error("Error closing audio context:", e));
  369. audioContext = null;
  370. }
  371. vadSpeechBuffer = []; preSpeechBuffer = [];
  372. silenceFramesCount = 0; speechFramesCount = 0;
  373. updateUIState('IDLE');
  374. updateStatus("Always listening mode deactivated.");
  375. }
  376.  
  377. function toggleAlwaysListeningMode() { /* ... (Keep this function exactly the same as previous version) ... */
  378. if (isAlwaysListeningModeActive) {
  379. stopAlwaysListening();
  380. } else {
  381. startAlwaysListening();
  382. }
  383. }
  384.  
  385. function processAndSendAudio(audioFloat32Array) { /* ... (Keep this function exactly the same as previous version) ... */
  386. if (audioFloat32Array.length === 0) {
  387. console.log("processAndSendAudio called with empty buffer.");
  388. if(isAlwaysListeningModeActive) updateUIState('LISTENING');
  389. return;
  390. }
  391. // appState will be set to PROCESSING_AUDIO within sendAudioToBackend
  392. updateStatus("Encoding audio for backend ASR...");
  393. listeningInfoEl.textContent = "Sending audio for transcription...";
  394.  
  395. const wavBlob = encodeWAV(audioFloat32Array, TARGET_SAMPLE_RATE);
  396. sendAudioToBackend(wavBlob);
  397. }
  398.  
  399. function concatenateFloat32Arrays(arrays) { /* ... (Keep this function exactly the same as previous version) ... */
  400. let totalLength = 0;
  401. for (const arr of arrays) { totalLength += arr.length; }
  402. const result = new Float32Array(totalLength);
  403. let offset = 0;
  404. for (const arr of arrays) { result.set(arr, offset); offset += arr.length; }
  405. return result;
  406. }
  407.  
  408. function encodeWAV(samples, sampleRate) { /* ... (Keep this function exactly the same as previous version) ... */
  409. const buffer = new ArrayBuffer(44 + samples.length * 2);
  410. const view = new DataView(buffer);
  411. function writeString(view, offset, string) {
  412. for (let i = 0; i < string.length; i++) view.setUint8(offset + i, string.charCodeAt(i));
  413. }
  414. let offset = 0;
  415. writeString(view, offset, 'RIFF'); offset += 4;
  416. view.setUint32(offset, 36 + samples.length * 2, true); offset += 4;
  417. writeString(view, offset, 'WAVE'); offset += 4;
  418. writeString(view, offset, 'fmt '); offset += 4;
  419. view.setUint32(offset, 16, true); offset += 4;
  420. view.setUint16(offset, 1, true); offset += 2;
  421. view.setUint16(offset, 1, true); offset += 2;
  422. view.setUint32(offset, sampleRate, true); offset += 4;
  423. view.setUint32(offset, sampleRate * 1 * 2, true); offset += 4;
  424. view.setUint16(offset, 1 * 2, true); offset += 2;
  425. view.setUint16(offset, 16, true); offset += 2;
  426. writeString(view, offset, 'data'); offset += 4;
  427. view.setUint32(offset, samples.length * 2, true); offset += 4;
  428. for (let i = 0; i < samples.length; i++, offset += 2) {
  429. const s = Math.max(-1, Math.min(1, samples[i]));
  430. view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
  431. }
  432. return new Blob([view], { type: 'audio/wav' });
  433. }
  434.  
  435. async function sendAudioToBackend(audioBlob) { /* ... (Keep the version that prevents display of "Speech not clear") ... */
  436. if (isBackendProcessing && appState !== 'PROCESSING_AUDIO') {
  437. updateStatus("Still processing previous request...");
  438. if (isAlwaysListeningModeActive) updateUIState('LISTENING'); else updateUIState('IDLE');
  439. return;
  440. }
  441. isBackendProcessing = true;
  442. sendBtn.disabled = true;
  443. updateUIState('PROCESSING_AUDIO');
  444.  
  445. const formData = new FormData();
  446. formData.append('audio_data', audioBlob, 'vad_audio.wav');
  447. formData.append('model', modelSelector.value);
  448. formData.append('system_prompt', userSystemPrompt);
  449.  
  450. try {
  451. const response = await fetch('/process_voice_input', { method: 'POST', body: formData });
  452. if (!response.ok) {
  453. const errorData = await response.json().catch(() => ({ error: `Server error ${response.status}` }));
  454. throw new Error(errorData.error || `Server Error: ${response.status} ${response.statusText}`);
  455. }
  456. const data = await response.json();
  457. if (data.error && data.text !== "Could not understand audio or audio was silent.") {
  458. throw new Error(data.error);
  459. }
  460.  
  461. if (data.transcribed_text && data.transcribed_text.trim() !== "") {
  462. textInput.value = data.transcribed_text;
  463.  
  464. if (outputDiv.textContent.startsWith("Welcome!")) {
  465. outputDiv.innerHTML = `<strong>You (voice):</strong> ${data.transcribed_text}`;
  466. } else {
  467. outputDiv.innerHTML += `<br><br><strong>You (voice):</strong> ${data.transcribed_text}`;
  468. }
  469. if (data.text && data.text !== "Could not understand audio or audio was silent.") {
  470. outputDiv.innerHTML += `<br><strong>AI:</strong> ${data.text}`;
  471. } else {
  472. console.log("ASR produced no clear text for AI query or AI response was generic 'could not understand'.");
  473. }
  474. outputDiv.scrollTop = outputDiv.scrollHeight;
  475.  
  476. if (data.audio) {
  477. updateUIState('PLAYING_TTS');
  478. audioOutput.src = `data:audio/wav;base64,${data.audio}`;
  479. audioOutput.style.display = 'block';
  480. currentTtsAudio = audioOutput;
  481. await audioOutput.play();
  482. // onended will handle state transition after play
  483. } else {
  484. updateStatus("AI response received (no audio).");
  485. if (isAlwaysListeningModeActive) updateUIState('LISTENING'); else updateUIState('IDLE');
  486. }
  487. } else {
  488. console.log("Backend ASR resulted in empty transcription. UI not updated for this voice attempt.");
  489. updateStatus("Couldn't quite catch that. Try speaking again.");
  490. listeningInfoEl.textContent = "Audio unclear or too short. Listening again...";
  491. }
  492.  
  493. } catch (error) {
  494. console.error("Error sending audio or processing response:", error);
  495. outputDiv.innerHTML += `<br><strong style="color:var(--danger-color);">Error:</strong> ${error.message}`;
  496. updateStatus(`Error: ${error.message}. Check console.`);
  497. } finally {
  498. isBackendProcessing = false; // This is key
  499. // State transition after backend processing is done
  500. if (isAlwaysListeningModeActive) {
  501. // If TTS was playing, onended/onerror will transition to LISTENING.
  502. // If no TTS, or if TTS already finished before this finally, transition to LISTENING.
  503. if (appState !== 'PLAYING_TTS' || (currentTtsAudio && currentTtsAudio.paused)) {
  504. updateUIState('LISTENING');
  505. }
  506. } else {
  507. updateUIState('IDLE');
  508. }
  509. }
  510. }
  511.  
  512. async function sendTypedText() { /* ... (Keep this function largely the same, ensure UI state updates are sensible) ... */
  513. const textToSend = textInput.value.trim();
  514. if (!textToSend || isBackendProcessing) {
  515. if(!textToSend) updateStatus("Input is empty.");
  516. return;
  517. }
  518. isBackendProcessing = true;
  519. sendBtn.disabled = true; sendBtn.classList.add("processing");
  520.  
  521. let originalVoiceBtnText = voiceBtn.textContent;
  522. let originalVoiceBtnDisabled = voiceBtn.disabled;
  523. voiceBtn.disabled = true;
  524. if(isAlwaysListeningModeActive) voiceBtn.textContent = "Stop (Processing Typed...)";
  525.  
  526. updateStatus("Sending typed text to AI...");
  527.  
  528. if (outputDiv.textContent.startsWith("Welcome!")) outputDiv.innerHTML = `<strong>You:</strong> ${textToSend}`;
  529. else outputDiv.innerHTML += `<br><br><strong>You:</strong> ${textToSend}`;
  530. outputDiv.scrollTop = outputDiv.scrollHeight;
  531.  
  532. try {
  533. const response = await fetch('/process_typed_text', {
  534. method: 'POST', headers: { 'Content-Type': 'application/json' },
  535. body: JSON.stringify({ text: textToSend, model: modelSelector.value, system_prompt: userSystemPrompt })
  536. });
  537. if (!response.ok) {
  538. const errorData = await response.json().catch(() => ({ error: `Server error ${response.status}` }));
  539. throw new Error(errorData.error || `Server Error: ${response.status} ${response.statusText}`);
  540. }
  541. const data = await response.json();
  542. if (data.error) throw new Error(data.error);
  543.  
  544. outputDiv.innerHTML += `<br><strong>AI:</strong> ${data.text}`;
  545. outputDiv.scrollTop = outputDiv.scrollHeight;
  546.  
  547. if (data.audio) {
  548. // If always listening, TTS playing is a sub-state of it.
  549. // If not always listening, it goes to IDLE after TTS.
  550. updateUIState(isAlwaysListeningModeActive ? 'PLAYING_TTS' : 'IDLE');
  551. audioOutput.src = `data:audio/wav;base64,${data.audio}`;
  552. audioOutput.style.display = 'block';
  553. currentTtsAudio = audioOutput;
  554. await audioOutput.play();
  555. // onended will handle further state transitions
  556. } else {
  557. updateStatus("AI response received (no audio).");
  558. }
  559. } catch (error) {
  560. console.error("Error sending typed text:", error);
  561. outputDiv.innerHTML += `<br><strong style="color:var(--danger-color);">Error:</strong> ${error.message}`;
  562. updateStatus(`Error: ${error.message}. Check console.`);
  563. } finally {
  564. isBackendProcessing = false;
  565. sendBtn.disabled = false; sendBtn.classList.remove("processing");
  566.  
  567. if (isAlwaysListeningModeActive) {
  568. // If TTS was playing, onended will transition to LISTENING.
  569. // Otherwise, ensure it's LISTENING.
  570. if (appState !== 'PLAYING_TTS' || (currentTtsAudio && currentTtsAudio.paused) ) {
  571. updateUIState('LISTENING');
  572. }
  573. } else {
  574. voiceBtn.disabled = originalVoiceBtnDisabled; // Restore its specific disabled state
  575. updateUIState('IDLE');
  576. }
  577. }
  578. }
  579.  
  580. function updateStatus(message) {
  581. statusEl.textContent = message; console.log("Status Update:", message);
  582. }
  583. function openSettingsModal() {
  584. systemPromptInputEl.value = userSystemPrompt; settingsModal.style.display = "flex";
  585. }
  586. function closeSettingsModal() {
  587. settingsModal.style.display = "none";
  588. }
  589. function saveSystemPrompt() {
  590. userSystemPrompt = systemPromptInputEl.value.trim();
  591. if (!userSystemPrompt) userSystemPrompt = "You are a helpful, friendly AI assistant.";
  592. localStorage.setItem('ollamaSystemPrompt', userSystemPrompt);
  593. closeSettingsModal(); updateStatus("System prompt saved.");
  594. }
  595.  
  596. voiceBtn.addEventListener('click', toggleAlwaysListeningMode);
  597. sendBtn.addEventListener('click', sendTypedText);
  598. settingsBtn.addEventListener('click', openSettingsModal);
  599. saveSettingsBtn.addEventListener('click', saveSystemPrompt);
  600. window.addEventListener('click', (event) => { if (event.target === settingsModal) closeSettingsModal(); });
  601. textInput.addEventListener('keydown', (event) => {
  602. if (event.key === 'Enter' && !event.shiftKey) { event.preventDefault(); sendTypedText(); }
  603. });
  604. audioOutput.onended = () => {
  605. updateStatus("Audio playback finished.");
  606. if (isAlwaysListeningModeActive) { // If mode is still active, go back to listening
  607. updateUIState('LISTENING');
  608. } else { // Otherwise, we are idle
  609. updateUIState('IDLE');
  610. }
  611. };
  612. audioOutput.onerror = (e) => {
  613. console.error("Error playing TTS audio:", e); updateStatus("Error playing audio.");
  614. if (isAlwaysListeningModeActive) {
  615. updateUIState('LISTENING');
  616. } else {
  617. updateUIState('IDLE');
  618. }
  619. };
  620.  
  621. document.addEventListener('DOMContentLoaded', () => {
  622. const savedPrompt = localStorage.getItem('ollamaSystemPrompt');
  623. if (savedPrompt) userSystemPrompt = savedPrompt;
  624. if(systemPromptInputEl) systemPromptInputEl.value = userSystemPrompt;
  625. updateUIState('IDLE');
  626. updateStatus("Ready. Click 'Start Always Listening' or type message.");
  627. });
  628. </script>
  629. </body>
  630. </html>
Add Comment
Please, Sign In to add comment