Spaces:
Running
Running
| // digital.human.video.js | |
| import * as webllm from "https://esm.run/@mlc-ai/web-llm"; | |
| // Ensure the script runs after the DOM is fully loaded | |
| document.addEventListener("DOMContentLoaded", () => { | |
| // Initialize the Digital Human Video Assistant section | |
| const videoMessages = [ | |
| { | |
| content: "You are Aged Guru, an intelligent assistant skilled in video analysis and related interdisciplinary studies. Provide insightful and comprehensive answers to complex video-related questions.", | |
| role: "system" | |
| } | |
| ]; | |
| const videoAvailableModels = webllm.prebuiltAppConfig.model_list.map( | |
| (m) => m.model_id | |
| ); | |
| let videoSelectedModel = "Phi-3.5-vision-instruct-q4f16_1-MLC"; // Default model | |
| function videoUpdateEngineInitProgressCallback(report) { | |
| console.log("Digital Human Video Initialize", report.progress); | |
| // Instead of updating a status span, log the progress | |
| logMessage(`Model Initialization Progress: ${report.text}`, "system"); | |
| } | |
| const videoEngine = new webllm.MLCEngine(); | |
| videoEngine.setInitProgressCallback(videoUpdateEngineInitProgressCallback); | |
| let videoIsGenerating = false; // Flag to prevent multiple generations | |
| async function videoInitializeWebLLMEngine() { | |
| logMessage("Model initialization started.", "system"); | |
| document.getElementById("video-loading-spinner").classList.remove("hidden"); // Show spinner | |
| videoSelectedModel = document.getElementById("video-model-selection").value; | |
| const config = { | |
| temperature: 0.7, // Adjusted for more precise answers | |
| top_p: 0.9 | |
| }; | |
| try { | |
| await videoEngine.reload(videoSelectedModel, config); | |
| document.getElementById("video-selected-model").textContent = videoSelectedModel; | |
| document.getElementById("video-start_button").disabled = false; | |
| document.getElementById("video-text-input").disabled = false; // Enable text input after initialization | |
| document.getElementById("video-submit-button").disabled = false; // Enable submit button after initialization | |
| document.getElementById("video-speech-controls").disabled = false; // Enable speech controls after initialization | |
| document.getElementById("video-configuration").classList.remove("hidden"); | |
| logMessage("Model initialized successfully.", "system"); | |
| } catch (error) { | |
| console.error("Error initializing the model:", error); | |
| alert("Failed to initialize the model. Please try again."); | |
| logMessage("Failed to initialize the model.", "error"); | |
| } finally { | |
| document.getElementById("video-loading-spinner").classList.add("hidden"); // Hide spinner | |
| } | |
| } | |
| async function videoStreamingGenerating(messages, onUpdate, onFinish, onError) { | |
| if (videoIsGenerating) { | |
| console.warn("Video Generation already in progress."); | |
| return; | |
| } | |
| videoIsGenerating = true; | |
| try { | |
| let curMessage = ""; | |
| const completion = await videoEngine.chat.completions.create({ | |
| stream: true, | |
| messages | |
| }); | |
| for await (const chunk of completion) { | |
| const curDelta = chunk.choices[0].delta.content; | |
| if (curDelta) { | |
| curMessage += curDelta; | |
| } | |
| onUpdate(curMessage); | |
| } | |
| const finalMessage = await videoEngine.getMessage(); | |
| console.log(`Digital Human Video Generated final message: ${finalMessage}`); // Debugging | |
| onFinish(finalMessage); | |
| logMessage("Response generated successfully.", "system"); | |
| } catch (err) { | |
| console.error(err); | |
| onError(err); | |
| logMessage("An error occurred during response generation.", "error"); | |
| } finally { | |
| videoIsGenerating = false; | |
| } | |
| } | |
| // Flag to track the last input method | |
| let videoLastInputWasVoice = false; | |
| function videoAppendMessage(message) { | |
| console.log(`Digital Human Video Appending message: ${message.content} (Role: ${message.role})`); // Debugging | |
| const videoChatBox = document.getElementById("video-chat-box"); | |
| // Check if the assistant's message is already appended to avoid duplication | |
| if (message.role === "assistant") { | |
| const existingMessages = videoChatBox.querySelectorAll(".message"); | |
| const lastMessage = existingMessages[existingMessages.length - 1]; | |
| if (lastMessage && lastMessage.textContent === message.content) { | |
| console.warn("Duplicate assistant message detected in Video section, skipping append."); | |
| // Only trigger TTS for assistant messages if the last input was via voice | |
| if (message.role === "assistant" && message.content !== "typing..." && videoLastInputWasVoice) { | |
| videoSpeak(message.content); | |
| } | |
| return; // Exit to avoid appending the same message twice | |
| } | |
| } | |
| const container = document.createElement("div"); | |
| container.classList.add("message-container"); | |
| const newMessage = document.createElement("div"); | |
| newMessage.classList.add("message"); | |
| newMessage.textContent = message.content; | |
| if (message.role === "user") { | |
| container.classList.add("user"); | |
| } else { | |
| container.classList.add("assistant"); | |
| } | |
| container.appendChild(newMessage); | |
| videoChatBox.appendChild(container); | |
| videoChatBox.scrollTop = videoChatBox.scrollHeight; | |
| // Only trigger TTS for assistant messages if the last input was via voice | |
| if (message.role === "assistant" && message.content !== "typing..." && videoLastInputWasVoice) { | |
| videoSpeak(message.content); | |
| } | |
| } | |
| function videoUpdateLastMessage(content) { | |
| const messageDoms = document.getElementById("video-chat-box").querySelectorAll(".message"); | |
| const lastMessageDom = messageDoms[messageDoms.length - 1]; | |
| lastMessageDom.textContent = content; | |
| } | |
| function videoOnSpeechRecognized(transcript) { | |
| const input = transcript.trim(); | |
| const message = { | |
| content: input, | |
| role: "user" | |
| }; | |
| if (input.length === 0) { | |
| return; | |
| } | |
| videoLastInputWasVoice = true; // Set flag as voice input | |
| console.log(`Digital Human Video Voice input received: ${input}`); // Debugging | |
| document.getElementById("video-start_button").disabled = true; | |
| document.getElementById("video-submit-button").disabled = true; // Disable submit button during processing | |
| videoMessages.push(message); | |
| videoAppendMessage(message); | |
| logMessage(`User (Voice): ${input}`, "user"); | |
| // Append "typing..." placeholder | |
| const aiPlaceholder = { | |
| content: "typing...", | |
| role: "assistant" | |
| }; | |
| videoAppendMessage(aiPlaceholder); | |
| logMessage("VideoBot is typing...", "system"); | |
| const onFinishGenerating = (finalMessage) => { | |
| console.log(`Digital Human Video Finishing generation with message: ${finalMessage}`); // Debugging | |
| // Remove the "typing..." placeholder | |
| const videoChatBox = document.getElementById("video-chat-box"); | |
| const lastMessageContainer = videoChatBox.lastElementChild; | |
| if (lastMessageContainer && lastMessageContainer.querySelector(".message").textContent === "typing...") { | |
| videoChatBox.removeChild(lastMessageContainer); | |
| } | |
| // Append the final message | |
| const aiMessage = { | |
| content: finalMessage, | |
| role: "assistant" | |
| }; | |
| videoAppendMessage(aiMessage); | |
| logMessage(`VideoBot: ${finalMessage}`, "assistant"); | |
| document.getElementById("video-start_button").disabled = false; | |
| document.getElementById("video-submit-button").disabled = false; // Re-enable submit button after processing | |
| videoEngine.runtimeStatsText().then((statsText) => { | |
| document.getElementById("video-chat-stats").classList.remove("hidden"); | |
| document.getElementById("video-chat-stats").textContent = statsText; | |
| logMessage(`Runtime Stats: ${statsText}`, "system"); | |
| }); | |
| }; | |
| videoStreamingGenerating( | |
| videoMessages, | |
| videoUpdateLastMessage, | |
| onFinishGenerating, | |
| (err) => { | |
| console.error(err); | |
| alert("An error occurred while generating the response. Please try again."); | |
| logMessage("Error during response generation.", "error"); | |
| document.getElementById("video-start_button").disabled = false; | |
| document.getElementById("video-submit-button").disabled = false; | |
| } | |
| ); | |
| } | |
| // Speech Recognition Code for Video | |
| let videoRecognizing = false; | |
| let videoIgnore_onend; | |
| let videoFinal_transcript = ''; | |
| let videoRecognition; | |
| function videoStartButton(event) { | |
| if (videoRecognizing) { | |
| videoRecognition.stop(); | |
| return; | |
| } | |
| videoFinal_transcript = ''; | |
| videoRecognition.lang = 'en-US'; | |
| videoRecognition.start(); | |
| videoIgnore_onend = false; | |
| document.getElementById("video-start_button").classList.add("mic-animate"); | |
| logMessage("Voice input started.", "system"); | |
| } | |
| if (!('webkitSpeechRecognition' in window)) { | |
| alert("Web Speech API is not supported by this browser."); | |
| logMessage("Web Speech API is not supported by this browser.", "error"); | |
| } else { | |
| videoRecognition = new webkitSpeechRecognition(); | |
| videoRecognition.continuous = false; // Non-continuous recognition | |
| videoRecognition.interimResults = false; // Get only final results | |
| videoRecognition.onstart = function() { | |
| videoRecognizing = true; | |
| logMessage("Speech recognition started.", "system"); | |
| }; | |
| videoRecognition.onerror = function(event) { | |
| if (event.error == 'no-speech') { | |
| document.getElementById("video-start_button").classList.remove("mic-animate"); | |
| alert('No speech was detected in Video section.'); | |
| logMessage("No speech detected.", "error"); | |
| videoIgnore_onend = true; | |
| } | |
| if (event.error == 'audio-capture') { | |
| document.getElementById("video-start_button").classList.remove("mic-animate"); | |
| alert('No microphone was found in Video section.'); | |
| logMessage("No microphone found.", "error"); | |
| videoIgnore_onend = true; | |
| } | |
| if (event.error == 'not-allowed') { | |
| alert('Permission to use microphone was denied in Video section.'); | |
| logMessage("Microphone permission denied.", "error"); | |
| videoIgnore_onend = true; | |
| } | |
| }; | |
| videoRecognition.onend = function() { | |
| videoRecognizing = false; | |
| document.getElementById("video-start_button").classList.remove("mic-animate"); | |
| logMessage("Speech recognition ended.", "system"); | |
| if (videoIgnore_onend) { | |
| return; | |
| } | |
| if (!videoFinal_transcript) { | |
| logMessage("No transcript captured.", "error"); | |
| return; | |
| } | |
| // Process the final transcript | |
| videoOnSpeechRecognized(videoFinal_transcript); | |
| }; | |
| videoRecognition.onresult = function(event) { | |
| for (let i = event.resultIndex; i < event.results.length; ++i) { | |
| if (event.results[i].isFinal) { | |
| videoFinal_transcript += event.results[i][0].transcript; | |
| } | |
| } | |
| videoFinal_transcript = videoFinal_transcript.trim(); | |
| logMessage(`Recognized Speech: ${videoFinal_transcript}`, "user"); | |
| }; | |
| } | |
| document.getElementById("video-start_button").addEventListener("click", function(event) { | |
| videoStartButton(event); | |
| }); | |
| // Initialize Model Selection | |
| videoAvailableModels.forEach((modelId) => { | |
| const option = document.createElement("option"); | |
| option.value = modelId; | |
| option.textContent = modelId; | |
| document.getElementById("video-model-selection").appendChild(option); | |
| }); | |
| document.getElementById("video-model-selection").value = videoSelectedModel; | |
| // **Enable the Download Model button after models are loaded** | |
| document.getElementById("video-download").disabled = false; | |
| document.getElementById("video-download").addEventListener("click", function () { | |
| videoInitializeWebLLMEngine().then(() => { | |
| document.getElementById("video-start_button").disabled = false; | |
| // Enable speech controls after model initialization | |
| document.getElementById("video-speech-rate").disabled = false; | |
| document.getElementById("video-speech-pitch").disabled = false; | |
| logMessage("Model download initiated.", "system"); | |
| }); | |
| }); | |
| document.getElementById("video-clear-logs").addEventListener("click", function () { | |
| document.getElementById("video-logs").innerHTML = ''; | |
| logMessage("Logs cleared.", "system"); | |
| }); | |
| // ===== TTS Integration ===== | |
| // Initialize Speech Synthesis | |
| let videoSpeech = new SpeechSynthesisUtterance(); | |
| videoSpeech.lang = "en"; | |
| let videoVoices = []; | |
| // Use addEventListener instead of directly assigning to onvoiceschanged | |
| window.speechSynthesis.addEventListener("voiceschanged", () => { | |
| videoVoices = window.speechSynthesis.getVoices(); | |
| videoPopulateVoices(); | |
| }); | |
| function videoPopulateVoices() { | |
| const voiceSelect = document.getElementById("video-tools"); | |
| voiceSelect.innerHTML = ''; // Clear existing options | |
| videoVoices.forEach((voice, i) => { | |
| const option = new Option(voice.name, i); | |
| voiceSelect.appendChild(option); | |
| }); | |
| if (videoVoices.length > 0) { | |
| const savedVoice = localStorage.getItem("video-selectedVoice"); | |
| if (savedVoice !== null && videoVoices[savedVoice]) { | |
| videoSpeech.voice = videoVoices[savedVoice]; | |
| voiceSelect.value = savedVoice; | |
| } else { | |
| videoSpeech.voice = videoVoices[0]; | |
| } | |
| } | |
| } | |
| // Voice Selection Event Listener | |
| document.getElementById("video-tools").addEventListener("change", () => { | |
| const selectedVoiceIndex = document.getElementById("video-tools").value; | |
| videoSpeech.voice = videoVoices[selectedVoiceIndex]; | |
| // Save to localStorage | |
| localStorage.setItem("video-selectedVoice", selectedVoiceIndex); | |
| logMessage(`Voice changed to: ${videoVoices[selectedVoiceIndex].name}`, "system"); | |
| }); | |
| // Function to Speak Text with Voice Selection and Handling Large Texts | |
| function videoSpeak(text) { | |
| if (!window.speechSynthesis) { | |
| console.warn("Speech Synthesis not supported in this browser for Video section."); | |
| logMessage("Speech Synthesis not supported in this browser.", "error"); | |
| return; | |
| } | |
| // Show spinner and enable Stop button | |
| document.getElementById("video-loading-spinner").classList.remove("hidden"); | |
| document.getElementById("video-stop_button").disabled = false; | |
| logMessage("TTS started.", "system"); | |
| // Retrieve the currently selected voice | |
| const selectedVoice = videoSpeech.voice; | |
| // Split the text into sentences to manage large texts | |
| const sentences = text.match(/[^\.!\?]+[\.!\?]+/g) || [text]; | |
| let utterancesCount = sentences.length; | |
| sentences.forEach(sentence => { | |
| const utterance = new SpeechSynthesisUtterance(sentence.trim()); | |
| // Assign the selected voice to the utterance | |
| if (selectedVoice) { | |
| utterance.voice = selectedVoice; | |
| } | |
| // Assign rate and pitch from sliders | |
| const rate = parseFloat(document.getElementById("video-speech-rate").value); | |
| const pitch = parseFloat(document.getElementById("video-speech-pitch").value); | |
| utterance.rate = rate; // Adjust the speaking rate (0.1 to 10) | |
| utterance.pitch = pitch; // Adjust the pitch (0 to 2) | |
| // Add event listeners for debugging or additional functionality | |
| utterance.onstart = () => { | |
| console.log("Speech started:", sentence); | |
| logMessage(`TTS started: ${sentence.trim()}`, "system"); | |
| }; | |
| utterance.onend = () => { | |
| console.log("Speech ended:", sentence); | |
| logMessage(`TTS ended: ${sentence.trim()}`, "system"); | |
| utterancesCount--; | |
| if (utterancesCount === 0) { | |
| // Hide spinner and disable Stop button when all utterances have been spoken | |
| document.getElementById("video-loading-spinner").classList.add("hidden"); | |
| document.getElementById("video-stop_button").disabled = true; | |
| logMessage("All TTS messages have been spoken.", "system"); | |
| } | |
| }; | |
| utterance.onerror = (e) => { | |
| console.error("Speech Synthesis Error:", e); | |
| alert("An error occurred during speech synthesis. Please try again."); | |
| logMessage("Speech synthesis encountered an error.", "error"); | |
| utterancesCount = 0; | |
| document.getElementById("video-loading-spinner").classList.add("hidden"); | |
| document.getElementById("video-stop_button").disabled = true; | |
| }; | |
| window.speechSynthesis.speak(utterance); | |
| }); | |
| } | |
| // ===== New: Stop Speech Functionality ===== | |
| /** | |
| * Stops any ongoing speech synthesis. | |
| */ | |
| function videoStopSpeech() { | |
| if (window.speechSynthesis.speaking) { | |
| window.speechSynthesis.cancel(); | |
| document.getElementById("video-loading-spinner").classList.add("hidden"); | |
| document.getElementById("video-stop_button").disabled = true; | |
| logMessage("Speech synthesis stopped by user.", "system"); | |
| } | |
| } | |
| // Event Listener for Stop Button | |
| document.getElementById("video-stop_button").addEventListener("click", function () { | |
| videoStopSpeech(); | |
| }); | |
| // ===== New: Text Input Handling ===== | |
| // Function to Handle Text Submission | |
| function videoHandleTextSubmit() { | |
| const textInput = document.getElementById("video-text-input"); | |
| const input = textInput.value.trim(); | |
| if (input.length === 0) { | |
| return; | |
| } | |
| textInput.value = ''; // Clear the input field | |
| const message = { | |
| content: input, | |
| role: "user" // Ensure this is correctly set | |
| }; | |
| console.log(`Digital Human Video Text input received: ${input}`); // Debugging | |
| logMessage(`User: ${input}`, "user"); | |
| videoLastInputWasVoice = false; // Set flag as text input | |
| document.getElementById("video-submit-button").disabled = true; // Disable to prevent multiple submissions | |
| videoMessages.push(message); | |
| videoAppendMessage(message); | |
| // Append "typing..." placeholder | |
| const aiPlaceholder = { | |
| content: "typing...", | |
| role: "assistant" | |
| }; | |
| videoAppendMessage(aiPlaceholder); | |
| logMessage("VideoBot is typing...", "system"); | |
| const onFinishGenerating = (finalMessage) => { | |
| console.log(`Digital Human Video Finishing generation with message: ${finalMessage}`); // Debugging | |
| // Remove the "typing..." placeholder | |
| const videoChatBox = document.getElementById("video-chat-box"); | |
| const lastMessageContainer = videoChatBox.lastElementChild; | |
| if (lastMessageContainer && lastMessageContainer.querySelector(".message").textContent === "typing...") { | |
| videoChatBox.removeChild(lastMessageContainer); | |
| } | |
| // Append the final message | |
| const aiMessage = { | |
| content: finalMessage, | |
| role: "assistant" | |
| }; | |
| videoAppendMessage(aiMessage); | |
| logMessage(`VideoBot: ${finalMessage}`, "assistant"); | |
| // Trigger TTS for assistant messages if required | |
| if (videoLastInputWasVoice) { | |
| videoSpeak(finalMessage); | |
| } | |
| document.getElementById("video-submit-button").disabled = false; // Re-enable submit button after processing | |
| videoEngine.runtimeStatsText().then((statsText) => { | |
| document.getElementById("video-chat-stats").classList.remove("hidden"); | |
| document.getElementById("video-chat-stats").textContent = statsText; | |
| logMessage(`Runtime Stats: ${statsText}`, "system"); | |
| }); | |
| }; | |
| videoStreamingGenerating( | |
| videoMessages, | |
| videoUpdateLastMessage, | |
| onFinishGenerating, | |
| (err) => { | |
| console.error(err); | |
| alert("An error occurred while generating the response. Please try again."); | |
| logMessage("Error during response generation.", "error"); | |
| document.getElementById("video-submit-button").disabled = false; | |
| } | |
| ); | |
| } | |
| // Event Listener for Submit Button | |
| document.getElementById("video-submit-button").addEventListener("click", function () { | |
| videoHandleTextSubmit(); | |
| }); | |
| // Event Listener for Enter Key in Text Input | |
| document.getElementById("video-text-input").addEventListener("keypress", function (e) { | |
| if (e.key === 'Enter') { | |
| videoHandleTextSubmit(); | |
| } | |
| }); | |
| // ===== Persisting User Preferences ===== | |
| // Load Preferences on Initialization | |
| window.addEventListener("load", () => { | |
| const savedVoice = localStorage.getItem("video-selectedVoice"); | |
| if (savedVoice !== null && videoVoices[savedVoice]) { | |
| document.getElementById("video-tools").value = savedVoice; | |
| videoSpeech.voice = videoVoices[savedVoice]; | |
| logMessage(`Loaded saved voice: ${videoVoices[savedVoice].name}`, "system"); | |
| } | |
| const savedRate = localStorage.getItem("video-speechRate"); | |
| if (savedRate !== null) { | |
| document.getElementById("video-speech-rate").value = savedRate; | |
| videoSpeech.rate = parseFloat(savedRate); | |
| logMessage(`Loaded saved speech rate: ${savedRate}`, "system"); | |
| } | |
| const savedPitch = localStorage.getItem("video-speechPitch"); | |
| if (savedPitch !== null) { | |
| document.getElementById("video-speech-pitch").value = savedPitch; | |
| videoSpeech.pitch = parseFloat(savedPitch); | |
| logMessage(`Loaded saved speech pitch: ${savedPitch}`, "system"); | |
| } | |
| }); | |
| // Save Speech Rate | |
| document.getElementById("video-speech-rate").addEventListener("input", (e) => { | |
| const rate = e.target.value; | |
| videoSpeech.rate = parseFloat(rate); | |
| localStorage.setItem("video-speechRate", rate); | |
| logMessage(`Speech rate changed to: ${rate}`, "system"); | |
| }); | |
| // Save Speech Pitch | |
| document.getElementById("video-speech-pitch").addEventListener("input", (e) => { | |
| const pitch = e.target.value; | |
| videoSpeech.pitch = parseFloat(pitch); | |
| localStorage.setItem("video-speechPitch", pitch); | |
| logMessage(`Speech pitch changed to: ${pitch}`, "system"); | |
| }); | |
| // ===== Logging Function ===== | |
| /** | |
| * Logs messages to the #video-logs container. | |
| * @param {string} message - The message to log. | |
| * @param {string} type - The type of message: 'user', 'assistant', 'system', 'error'. | |
| */ | |
| function logMessage(message, type) { | |
| const videoLogs = document.getElementById("video-logs"); | |
| const logEntry = document.createElement("div"); | |
| logEntry.classList.add("log-entry"); | |
| logEntry.textContent = `[${type.toUpperCase()}] ${message}`; | |
| // Style log entries based on type | |
| switch(type) { | |
| case 'user': | |
| logEntry.style.color = "#00796B"; | |
| break; | |
| case 'assistant': | |
| logEntry.style.color = "#004D40"; | |
| break; | |
| case 'system': | |
| logEntry.style.color = "#555555"; | |
| break; | |
| case 'error': | |
| logEntry.style.color = "#E53935"; | |
| break; | |
| default: | |
| logEntry.style.color = "#000000"; | |
| } | |
| videoLogs.appendChild(logEntry); | |
| videoLogs.scrollTop = videoLogs.scrollHeight; | |
| } | |
| // ===== TTS Integration Continued ===== | |
| // Optional: Global Listener to Detect When All Speech Has Finished | |
| window.speechSynthesis.addEventListener('end', () => { | |
| console.log("All video speech has been spoken."); | |
| logMessage("All TTS messages have been spoken.", "system"); | |
| // Ensure Stop button is disabled after speech ends | |
| document.getElementById("video-stop_button").disabled = true; | |
| }); | |
| // Event Listener for Image Upload | |
| document.getElementById("video-image-upload").addEventListener("change", function (e) { | |
| const file = e.target.files[0]; | |
| if (!file) { | |
| return; | |
| } | |
| const canvas = document.getElementById("video-image-canvas"); | |
| const ctx = canvas.getContext("2d"); | |
| const img = new Image(); | |
| img.onload = function () { | |
| // Resize canvas to match image | |
| canvas.width = img.width; | |
| canvas.height = img.height; | |
| // Draw image on canvas | |
| ctx.drawImage(img, 0, 0); | |
| logMessage("Image uploaded and displayed on canvas.", "system"); | |
| }; | |
| img.onerror = function () { | |
| alert("Failed to load the image. Please try a different file."); | |
| logMessage("Failed to load the uploaded image.", "error"); | |
| }; | |
| // Read the image file as a Data URL | |
| const reader = new FileReader(); | |
| reader.onload = function (event) { | |
| img.src = event.target.result; | |
| }; | |
| reader.readAsDataURL(file); | |
| }); | |
| // ===== Modified: Send Image to API via CORS Anywhere ===== | |
| /** | |
| * Sends the base64-encoded image to the NVIDIA API through the CORS Anywhere proxy. | |
| * @param {string} imageB64 - The base64-encoded image string. | |
| * @param {function} onResult - Callback function to handle successful responses. | |
| * @param {function} onError - Callback function to handle errors. | |
| */ | |
| function sendImageToAPI(imageB64, onResult, onError) { | |
| const proxyUrl = 'https://cors-anywhere.herokuapp.com/'; | |
| const targetUrl = 'https://integrate.api.nvidia.com/v1/chat/completions'; | |
| const invokeUrl = proxyUrl + targetUrl; | |
| const stream = false; // Streaming not supported via proxy | |
| const headers = { | |
| "Authorization": "Bearer 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", | |
| "Accept": "application/json", | |
| "Content-Type": "application/json" | |
| }; | |
| const payload = { | |
| "model": "microsoft/phi-3.5-vision-instruct", | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": `Describe the image and solve it. <img src="data:image/jpeg;base64,${imageB64}" />` | |
| } | |
| ], | |
| "max_tokens": 512, | |
| "temperature": 0.20, | |
| "top_p": 0.70, | |
| "stream": stream | |
| }; | |
| axios.post(invokeUrl, payload, { headers: headers }) | |
| .then(response => { | |
| onResult(response.data); | |
| }) | |
| .catch(error => { | |
| console.error(error); | |
| onError(error); | |
| }); | |
| } | |
| // Event listener for Analyze button | |
| document.getElementById("analyze-button").addEventListener("click", function() { | |
| const canvas = document.getElementById("video-image-canvas"); | |
| const imageDataURL = canvas.toDataURL("image/png"); | |
| const imageB64 = imageDataURL.replace(/^data:image\/(png|jpg|jpeg);base64,/, ""); | |
| const userMessage = { | |
| content: "Describe the image mathematically and solve it.", | |
| role: "user" | |
| }; | |
| videoAppendMessage(userMessage); | |
| sendImageToAPI(imageB64, function(responseData) { | |
| const assistantMessage = { | |
| content: responseData.choices[0].message.content, | |
| role: "assistant" | |
| }; | |
| videoAppendMessage(assistantMessage); | |
| }, function(error) { | |
| alert("Error: " + (error.response?.data?.error || error.message)); | |
| }); | |
| }); | |
| // Event listener for Explain button (Webcam) | |
| document.getElementById("capture-webcam-button").addEventListener("click", function() { | |
| const video = document.getElementById("webcam-video"); | |
| const canvas = document.getElementById("webcam-canvas"); | |
| const context = canvas.getContext("2d"); | |
| // Set canvas size to desired dimensions (e.g., 640x480) | |
| canvas.width = 640; | |
| canvas.height = 480; | |
| // Draw the video frame onto the canvas, scaling it to fit | |
| context.drawImage(video, 0, 0, canvas.width, canvas.height); | |
| const imageDataURL = canvas.toDataURL("image/jpeg", 0.8); // Use JPEG for smaller file size | |
| const imageB64 = imageDataURL.replace(/^data:image\/(png|jpg|jpeg);base64,/, ""); | |
| const userMessage = { | |
| content: "Describe the image mathematically and solve it.", | |
| role: "user" | |
| }; | |
| videoAppendMessage(userMessage); | |
| sendImageToAPI(imageB64, function(responseData) { | |
| const assistantMessage = { | |
| content: responseData.choices[0].message.content, | |
| role: "assistant" | |
| }; | |
| videoAppendMessage(assistantMessage); | |
| }, function(error) { | |
| alert("Error: " + (error.response?.data?.error || error.message)); | |
| }); | |
| }); | |
| // Event listener for Discuss button (Draw tab) | |
| document.getElementById("discuss-button").addEventListener("click", function() { | |
| const canvas = document.getElementById("draw-canvas"); | |
| const ctx = canvas.getContext('2d'); | |
| // Check if the canvas is empty | |
| const isCanvasBlank = ctx.getImageData(0, 0, canvas.width, canvas.height).data.every(channel => channel === 0); | |
| if (isCanvasBlank) { | |
| console.log("Canvas is blank. Please draw something before analyzing."); | |
| alert("The canvas is empty. Please draw something before analyzing."); | |
| return; | |
| } | |
| const imageDataURL = canvas.toDataURL("image/png"); | |
| const imageB64 = imageDataURL.replace(/^data:image\/(png|jpg|jpeg);base64,/, ""); | |
| console.log('Canvas dimensions:', canvas.width, 'x', canvas.height); | |
| console.log('Image data length:', imageB64.length); | |
| console.log('First 100 characters of image data:', imageB64.substring(0, 100)); | |
| // Create a visual confirmation for the user | |
| //const confirmationImage = new Image(); | |
| //confirmationImage.src = imageDataURL; | |
| //confirmationImage.onload = function() { | |
| // const confirmationCanvas = document.createElement('canvas'); | |
| // confirmationCanvas.width = this.width; | |
| // confirmationCanvas.height = this.height; | |
| // const confirmCtx = confirmationCanvas.getContext('2d'); | |
| // confirmCtx.drawImage(this, 0, 0); | |
| // document.body.appendChild(confirmationCanvas); | |
| // setTimeout(() => document.body.removeChild(confirmationCanvas), 5000); // Remove after 5 seconds | |
| //}; | |
| const userMessage = { | |
| content: "Describe the image mathematically and solve it.", | |
| role: "user" | |
| }; | |
| videoAppendMessage(userMessage); | |
| sendImageToAPI(imageB64, function(responseData) { | |
| const assistantMessage = { | |
| content: responseData.choices[0].message.content, | |
| role: "assistant" | |
| }; | |
| videoAppendMessage(assistantMessage); | |
| }, function(error) { | |
| alert("Error: " + (error.response?.data?.error || error.message)); | |
| }); | |
| }); | |
| // Function to check if the canvas is blank | |
| function isCanvasBlank(canvas) { | |
| const context = canvas.getContext('2d'); | |
| const pixelBuffer = new Uint32Array( | |
| context.getImageData(0, 0, canvas.width, canvas.height).data.buffer | |
| ); | |
| return !pixelBuffer.some(color => color !== 0); | |
| } | |
| // Tab Switching Functionality | |
| const tabButtons = document.querySelectorAll('.image-tabs .tab-button'); | |
| const tabContents = document.querySelectorAll('.video-image-section .tab-content'); | |
| tabButtons.forEach(button => { | |
| button.addEventListener('click', () => { | |
| // Remove active class from all buttons | |
| tabButtons.forEach(btn => btn.classList.remove('active')); | |
| // Add active class to the clicked button | |
| button.classList.add('active'); | |
| // Hide all tab contents | |
| tabContents.forEach(content => content.classList.add('hidden')); | |
| // Show the selected tab content | |
| const selectedTab = button.getAttribute('data-tab'); | |
| document.querySelector(`.video-image-section .${selectedTab}-tab`).classList.remove("hidden"); | |
| // Initialize functionalities when their respective tabs are activated | |
| if (selectedTab === 'webcam') { | |
| initializeWebcam(); | |
| } | |
| if (selectedTab === 'draw') { | |
| initializeDrawing(); | |
| } | |
| }); | |
| }); | |
| // ------------------ Webcam Functionality ------------------ | |
| let webcamStream = null; | |
| function initializeWebcam() { | |
| const video = document.getElementById('webcam-video'); | |
| const captureButton = document.getElementById('capture-webcam-button'); | |
| const canvas = document.getElementById('webcam-canvas'); | |
| // Check if webcam is already initialized | |
| if (webcamStream) { | |
| return; | |
| } | |
| // Access the webcam | |
| navigator.mediaDevices.getUserMedia({ video: true }) | |
| .then(stream => { | |
| webcamStream = stream; | |
| video.srcObject = stream; | |
| logMessage("Webcam access granted.", "system"); | |
| }) | |
| .catch(err => { | |
| console.error("Error accessing webcam:", err); | |
| alert("Unable to access the webcam. Please check permissions."); | |
| logMessage("Webcam access denied or error occurred.", "error"); | |
| }); | |
| // Capture Image from Webcam | |
| captureButton.addEventListener('click', () => { | |
| if (!webcamStream) { | |
| alert("Webcam not initialized."); | |
| return; | |
| } | |
| const context = canvas.getContext('2d'); | |
| canvas.width = video.videoWidth; | |
| canvas.height = video.videoHeight; | |
| context.drawImage(video, 0, 0, canvas.width, canvas.height); | |
| logMessage("Webcam image captured.", "system"); | |
| }); | |
| } | |
| // Function to stop the webcam stream | |
| function stopWebcam() { | |
| if (webcamStream) { | |
| webcamStream.getTracks().forEach(track => track.stop()); | |
| webcamStream = null; | |
| const videoElement = document.getElementById('webcam-video'); | |
| if (videoElement) videoElement.srcObject = null; | |
| const canvas = document.getElementById('webcam-canvas'); | |
| if (canvas && canvas.getContext) { | |
| const ctx = canvas.getContext('2d'); | |
| ctx.clearRect(0, 0, canvas.width, canvas.height); | |
| } | |
| logMessage("Webcam stream stopped.", "system"); | |
| } | |
| } | |
| // Modify the tab switching logic to include webcam cleanup | |
| tabButtons.forEach(button => { | |
| button.addEventListener('click', () => { | |
| // Existing tab switching code... | |
| // After switching tabs, check if the previous tab was Webcam and stop the stream | |
| const selectedTab = button.getAttribute('data-tab'); | |
| if (selectedTab !== 'webcam') { | |
| stopWebcam(); | |
| } | |
| }); | |
| }); | |
| // ------------------ Drawing Functionality ------------------ | |
| let draw = false; | |
| let currentColor = '#000000'; | |
| let currentSize = 2; | |
| let ctxDraw = null; | |
| let lastX = 0; | |
| let lastY = 0; | |
| function initializeDrawing() { | |
| const canvas = document.getElementById('draw-canvas'); | |
| const colorPicker = document.getElementById('draw-color'); | |
| const sizePicker = document.getElementById('draw-size'); | |
| const clearButton = document.getElementById('clear-draw-button'); | |
| if (ctxDraw) { | |
| return; // Drawing already initialized | |
| } | |
| ctxDraw = canvas.getContext('2d'); | |
| canvas.width = canvas.offsetWidth; | |
| canvas.height = 400; // Fixed height | |
| //added by rahul | |
| canvas.style.backgroundColor = 'white'; | |
| // Set initial styles | |
| ctxDraw.strokeStyle = currentColor; | |
| ctxDraw.lineWidth = currentSize; | |
| ctxDraw.lineJoin = 'round'; | |
| ctxDraw.lineCap = 'round'; | |
| //added by rahul | |
| ctxDraw.fillStyle ='white' | |
| ctxDraw.fillRect(0, 0, canvas.width, canvas.height); | |
| // Mouse Events | |
| canvas.addEventListener('mousedown', (e) => { | |
| draw = true; | |
| [lastX, lastY] = [e.offsetX, e.offsetY]; | |
| }); | |
| canvas.addEventListener('mousemove', drawLine); | |
| canvas.addEventListener('mouseup', () => draw = false); | |
| canvas.addEventListener('mouseout', () => draw = false); | |
| // Touch Events for mobile support | |
| canvas.addEventListener('touchstart', (e) => { | |
| e.preventDefault(); | |
| if (e.touches.length === 1) { | |
| const touch = e.touches[0]; | |
| const rect = canvas.getBoundingClientRect(); | |
| lastX = touch.clientX - rect.left; | |
| lastY = touch.clientY - rect.top; | |
| draw = true; | |
| } | |
| }); | |
| canvas.addEventListener('touchmove', (e) => { | |
| e.preventDefault(); | |
| if (draw && e.touches.length === 1) { | |
| const touch = e.touches[0]; | |
| const rect = canvas.getBoundingClientRect(); | |
| const x = touch.clientX - rect.left; | |
| const y = touch.clientY - rect.top; | |
| drawLine({ offsetX: x, offsetY: y }); | |
| } | |
| }); | |
| canvas.addEventListener('touchend', () => draw = false); | |
| // Color Picker | |
| colorPicker.addEventListener('input', (e) => { | |
| currentColor = e.target.value; | |
| ctxDraw.strokeStyle = currentColor; | |
| logMessage(`Drawing color changed to: ${currentColor}`, "system"); | |
| }); | |
| // Size Picker | |
| sizePicker.addEventListener('input', (e) => { | |
| currentSize = e.target.value; | |
| ctxDraw.lineWidth = currentSize; | |
| logMessage(`Brush size changed to: ${currentSize}`, "system"); | |
| }); | |
| // Clear Drawing | |
| clearButton.addEventListener('click', () => { | |
| ctxDraw.clearRect(0, 0, canvas.width, canvas.height); | |
| logMessage("Drawing canvas cleared.", "system"); | |
| }); | |
| } | |
| function drawLine(e) { | |
| if (!draw) return; | |
| ctxDraw.beginPath(); | |
| ctxDraw.moveTo(lastX, lastY); | |
| ctxDraw.lineTo(e.offsetX, e.offsetY); | |
| ctxDraw.stroke(); | |
| [lastX, lastY] = [e.offsetX, e.offsetY]; | |
| } | |
| // Adjust canvas size on window resize | |
| window.addEventListener('resize', () => { | |
| const drawCanvas = document.getElementById('draw-canvas'); | |
| if (ctxDraw && drawCanvas.width !== drawCanvas.offsetWidth) { | |
| const imageData = ctxDraw.getImageData(0, 0, drawCanvas.width, drawCanvas.height); | |
| drawCanvas.width = drawCanvas.offsetWidth; | |
| ctxDraw.putImageData(imageData, 0, 0); | |
| } | |
| const webcamCanvas = document.getElementById('webcam-canvas'); | |
| if (webcamCanvas) { | |
| webcamCanvas.width = webcamCanvas.offsetWidth; | |
| // Optionally, clear or redraw if necessary | |
| } | |
| }); | |
| // ------------------ End of Additions ------------------ | |
| }); | |