Spaces:

Mistral-AI-Game-Jam
/

ParentalControl

Runtime error

App Files Files Community

GitLab CI commited on Jan 27

Commit

c27e5a4

1 Parent(s): 2e1a626

Update game build from GitLab CI

Browse files

Files changed (7) hide show

server/ActionProcessor.py +23 -34
server/AudioTranscriber.py +9 -21
server/TextFilterer.py +72 -0
server/__main__.py +72 -23
server/static/godot/index.html +1 -1
server/static/godot/index.pck +2 -2
server/static/index.html +17 -22

server/ActionProcessor.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import datetime
 import json
 from threading import Thread
 from multiprocessing import Queue
-from typing import Dict, Any, List
 import logging
 import sys
 from mistralai import Mistral
@@ -31,14 +33,13 @@ class ActionProcessor(Thread):
     def __init__(
         self,
-        text_queue: "Queue[str]",
-        action_queue: "Queue[Dict[str, Any]]",
         mistral_api_key: str,
     ):
         super().__init__()
-        self.text_queue = text_queue
         self.action_queue = action_queue
-        self.text_buffers: List[str] = []
         self.mistral_client = Mistral(api_key=mistral_api_key)
         self.daemon = True  # Thread will exit when main program exits
@@ -102,53 +103,41 @@ Output: ["None", "neutralSentiment"]
         return result.strip()
-    def process_text(self, text: str) -> Dict[str, Any] | None:
         """Convert text into an action if a complete command is detected."""
         # Get sentiment first
-        self.text_buffers.append(text)
-        if len(self.text_buffers) < 3:
             return None
-        if len(self.text_buffers) > 3:
-            _ = self.text_buffers.pop(0)
-        candidate = self.text_buffers[1]
-        if len(self.text_buffers[0]) < len(candidate) >= len(self.text_buffers[2]):
-            action_and_sentiment = json.loads(self.get_action_and_sentiment(candidate))
-            if (
-                not isinstance(action_and_sentiment, list)
-                or len(action_and_sentiment) != 2
-            ):
-                return None
-            action, sentiment = action_and_sentiment
-            if action not in self.valid_action:
-                action = "None"
-            return {
-                "action": action,
-                "sentiment": sentiment,
-                "voice": candidate,
-                "time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-            }
-        return None
     def run(self) -> None:
         """Main processing loop."""
         while True:
             try:
                 # Get text from queue, blocks until text is available
-                text = self.text_queue.get()
                 # Process the text into an action
                 action = self.process_text(text)
                 # If we got a valid action, add it to the action queue
                 if action:
-                    self.action_queue.put(action)
             except Exception as e:
                 logger.error(f"Error processing text: {str(e)}")

+from collections import defaultdict
 import datetime
 import json
 from threading import Thread
 from multiprocessing import Queue
+import time
+from typing import Dict, Any, List, Tuple
 import logging
 import sys
 from mistralai import Mistral
     def __init__(
         self,
+        text_queue: "Queue[Tuple[str, str]]",
+        action_queue: "Queue[Tuple[Dict[str, Any], str]]",
         mistral_api_key: str,
     ):
         super().__init__()
+        self.filtered_text_queue = text_queue
         self.action_queue = action_queue
         self.mistral_client = Mistral(api_key=mistral_api_key)
         self.daemon = True  # Thread will exit when main program exits
         return result.strip()
+    def process_text(self, candidate: str) -> Dict[str, Any] | None:
         """Convert text into an action if a complete command is detected."""
         # Get sentiment first
+        action_and_sentiment = json.loads(self.get_action_and_sentiment(candidate))
+        if not isinstance(action_and_sentiment, list) or len(action_and_sentiment) != 2:
             return None
+        action, sentiment = action_and_sentiment
+        if action not in self.valid_action:
+            action = "None"
+        return {
+            "action": action,
+            "sentiment": sentiment,
+            "voice": candidate,
+            "time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        }
     def run(self) -> None:
         """Main processing loop."""
         while True:
             try:
                 # Get text from queue, blocks until text is available
+                text, session_id = self.filtered_text_queue.get()
                 # Process the text into an action
+                start_time = time.time()
                 action = self.process_text(text)
+                processing_time = time.time() - start_time
+                logger.info(f"{processing_time:.2f}s: {text} -> {action}")
                 # If we got a valid action, add it to the action queue
                 if action:
+                    self.action_queue.put((action, session_id))
             except Exception as e:
                 logger.error(f"Error processing text: {str(e)}")

server/AudioTranscriber.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import io
-from typing import List
 import threading
 from multiprocessing import Queue
 from queue import Empty
@@ -22,8 +22,8 @@ logger = logging.getLogger(__name__)
 class AudioTranscriber(threading.Thread):
     def __init__(
         self,
-        audio_queue: "Queue[io.BytesIO]",
-        text_queue: "Queue[str]",
         language: str = "en",
         confidence_threshold: float = 0.5,
     ):
@@ -31,10 +31,8 @@ class AudioTranscriber(threading.Thread):
         self.audio_queue = audio_queue
         self.action_queue = text_queue
         self.daemon = True  # Thread will exit when main program exits
-        self.max_buffer_size = 4
         self.language = language
         self.confidence_threshold = confidence_threshold
-        self.buffer: List[io.BytesIO] = []
         self.transcriber = WhisperModel(
             "large",
             device="cuda",
@@ -45,32 +43,22 @@ class AudioTranscriber(threading.Thread):
         while True:
             try:
                 # Wait for 1 second before timing out and checking again
-                audio_chunk = self.audio_queue.get(timeout=1)
-                self.buffer.append(audio_chunk)
-                while len(self.buffer) >= self.max_buffer_size:
-                    _ = self.buffer.pop(0)
-                # Create a BytesIO object from the joined buffer
-                joined_buffer = io.BytesIO(
-                    b"".join([chunk.getvalue() for chunk in self.buffer])
-                )
-                segments, info = self.transcriber.transcribe(
-                    joined_buffer, language=self.language
                 )
                 # Put the transcription results in the output queue
                 for segment in segments:
                     if segment.no_speech_prob <= self.confidence_threshold:
-                        self.action_queue.put(segment.text)
                         # Still print for debugging
                         logger.info(
-                            f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}"
                         )
                     else:
-                        self.action_queue.put("")
             except Empty:
                 continue  # If queue is empty, continue waiting

 import io
+from typing import Tuple
 import threading
 from multiprocessing import Queue
 from queue import Empty
 class AudioTranscriber(threading.Thread):
     def __init__(
         self,
+        audio_queue: "Queue[Tuple[io.BytesIO, str]]",
+        text_queue: "Queue[Tuple[str, str]]",
         language: str = "en",
         confidence_threshold: float = 0.5,
     ):
         self.audio_queue = audio_queue
         self.action_queue = text_queue
         self.daemon = True  # Thread will exit when main program exits
         self.language = language
         self.confidence_threshold = confidence_threshold
         self.transcriber = WhisperModel(
             "large",
             device="cuda",
         while True:
             try:
                 # Wait for 1 second before timing out and checking again
+                audio_data, session_id = self.audio_queue.get(timeout=1)
+                segments, _ = self.transcriber.transcribe(
+                    audio_data, language=self.language
                 )
                 # Put the transcription results in the output queue
                 for segment in segments:
                     if segment.no_speech_prob <= self.confidence_threshold:
+                        self.action_queue.put((segment.text, session_id))
                         # Still print for debugging
                         logger.info(
+                            f"[Thread {threading.get_ident()}] [{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}"
                         )
                     else:
+                        self.action_queue.put(("", session_id))
             except Empty:
                 continue  # If queue is empty, continue waiting

server/TextFilterer.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from threading import Thread
+from multiprocessing import Queue
+from typing import Tuple, Dict, List
+from collections import defaultdict
+import logging
+import sys
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    handlers=[logging.StreamHandler(sys.stdout)],
+)
+logger = logging.getLogger(__name__)
+class TextFilterer(Thread):
+    def __init__(
+        self,
+        text_queue: "Queue[Tuple[str, str]]",
+        filtered_text_queue: "Queue[Tuple[str, str]]",
+    ):
+        super().__init__()
+        self.text_queue = text_queue
+        self.filtered_text_queue = filtered_text_queue
+        self.daemon = True  # Thread will exit when main program exits
+        self.text_buffers: Dict[str, List[str]] = defaultdict(list)
+        self.max_buffer_size = 5
+    def filter_text(self, text: str, session_id: str) -> str | None:
+        self.text_buffers[session_id].append(text)
+        if len(self.text_buffers[session_id]) < self.max_buffer_size:
+            return None
+        while len(self.text_buffers[session_id]) > self.max_buffer_size:
+            _ = self.text_buffers[session_id].pop(0)
+        candidate = self.text_buffers[session_id][-2]
+        print(f"Candidate: {candidate}")
+        if (
+            len(self.text_buffers[session_id][-3])
+            < len(candidate)
+            >= len(self.text_buffers[session_id][-1])
+        ):
+            for past in self.text_buffers[session_id][:-2]:
+                if candidate == past:
+                    return None
+            return candidate
+        return None
+    def run(self) -> None:
+        """Main processing loop."""
+        while True:
+            try:
+                # Get text from queue, blocks until text is available
+                text, session_id = self.text_queue.get()
+                # Process the text into an action
+                filtered_text = self.filter_text(text, session_id)
+                # If we got a valid action, add it to the action queue
+                if filtered_text:
+                    self.filtered_text_queue.put((filtered_text, session_id))
+            except Exception as e:
+                logger.error(f"Error processing text: {str(e)}")
+                continue

server/__main__.py CHANGED Viewed

@@ -4,14 +4,17 @@ import os
 from flask_cors import CORS
 from multiprocessing import Queue
 import base64
-from typing import Any, List, Dict, Tuple
 from multiprocessing import Queue
 import logging
 import sys
 from server.AudioTranscriber import AudioTranscriber
 from server.ActionProcessor import ActionProcessor
 from server.StandaloneApplication import StandaloneApplication
 # Configure logging
 logging.basicConfig(
@@ -25,14 +28,19 @@ logger = logging.getLogger(__name__)
 # Use a directory in the user's home folder for static files
 STATIC_DIR = (
-    "/app/server/static"
-    if os.getenv("DEBUG") != "true"
-    else "/home/gab/work/gogogo/html"
 )
-audio_queue: "Queue[io.BytesIO]" = Queue()
-text_queue: "Queue[str]" = Queue()
-action_queue: "Queue[Dict[str, Any]]" = Queue()
 app = Flask(__name__, static_folder=STATIC_DIR)
@@ -103,11 +111,13 @@ def post_order() -> Tuple[Response, int]:
         action_text: str = data["action"]
         mid_split = len(action_text) // 2
         # Add the text to the queue
-        text_queue.put(action_text[:mid_split])
-        text_queue.put(action_text)
-        text_queue.put(action_text[mid_split:])
         return jsonify({"status": "success"}), 200
@@ -125,6 +135,7 @@ def process_data():
     try:
         # Check content type
         content_type = request.headers.get("Content-Type", "")
         # Handle different content types
         if "application/json" in content_type:
@@ -158,7 +169,7 @@ def process_data():
             )
         # Put the audio chunk in the queue for processing
-        audio_queue.put(io.BytesIO(audio_chunk))
         return jsonify(
             {
@@ -176,15 +187,13 @@ def process_data():
 @app.route("/api/actions", methods=["GET"])
 def get_actions() -> Tuple[Response, int]:
-    """Retrieve and clear all pending actions from the queue"""
-    actions: List[Dict[str, Any]] = []
-    # Drain the queue into our actions list
-    while not action_queue.empty():
-        try:
-            actions.append(action_queue.get_nowait())
-        except Exception:
-            break
     return jsonify({"actions": actions, "status": "success"}), 200
@@ -197,6 +206,31 @@ def serve_static(path: str):
         abort(404, description=f"File {path} not found in static folder")
 if __name__ == "__main__":
     if os.path.exists(app.static_folder):
         logger.info(f"Static folder contents: {os.listdir(app.static_folder)}")
@@ -204,16 +238,31 @@ if __name__ == "__main__":
     os.makedirs(app.static_folder, exist_ok=True)
     # Start the audio transcriber thread
-    transcriber = AudioTranscriber(audio_queue, text_queue)
-    transcriber.start()
     # Start the action processor thread
     MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
     if not MISTRAL_API_KEY:
         raise ValueError("MISTRAL_API_KEY is not set")
-    action_processor = ActionProcessor(text_queue, action_queue, MISTRAL_API_KEY)
-    action_processor.start()
     options: Any = {
         "bind": "0.0.0.0:7860",

 from flask_cors import CORS
 from multiprocessing import Queue
 import base64
+from typing import Any, Dict, Tuple
 from multiprocessing import Queue
 import logging
 import sys
+from threading import Lock
+from multiprocessing import Manager
 from server.AudioTranscriber import AudioTranscriber
 from server.ActionProcessor import ActionProcessor
 from server.StandaloneApplication import StandaloneApplication
+from server.TextFilterer import TextFilterer
 # Configure logging
 logging.basicConfig(
 # Use a directory in the user's home folder for static files
 STATIC_DIR = (
+    "/app/server/static" if os.getenv("DEBUG") != "True" else "/mnt/UE/work/godot/html"
 )
+# Each packet is a tuple of (data, token)
+audio_queue: "Queue[Tuple[io.BytesIO, str]]" = Queue()
+text_queue: "Queue[Tuple[str, str]]" = Queue()
+filtered_text_queue: "Queue[Tuple[str, str]]" = Queue()
+action_queue: "Queue[Tuple[Dict[str, Any], str]]" = Queue()
+# Thread-safe storage for actions by session
+action_storage_lock = Lock()
+manager = Manager()
+action_storage = manager.dict()  # Shared dictionary across processes
 app = Flask(__name__, static_folder=STATIC_DIR)
         action_text: str = data["action"]
+        token: str = request.headers.get("Authorization").split(" ")[1]
         mid_split = len(action_text) // 2
         # Add the text to the queue
+        text_queue.put((action_text[:mid_split], token))
+        text_queue.put((action_text, token))
+        text_queue.put((action_text[mid_split:], token))
         return jsonify({"status": "success"}), 200
     try:
         # Check content type
         content_type = request.headers.get("Content-Type", "")
+        token: str = request.headers.get("Authorization").split(" ")[1]
         # Handle different content types
         if "application/json" in content_type:
             )
         # Put the audio chunk in the queue for processing
+        audio_queue.put((io.BytesIO(audio_chunk), token))
         return jsonify(
             {
 @app.route("/api/actions", methods=["GET"])
 def get_actions() -> Tuple[Response, int]:
+    """Retrieve and clear all pending actions for the current session"""
+    token: str = request.headers.get("Authorization", "").split(" ")[1]
+    with action_storage_lock:
+        # Get and clear actions for this session
+        actions = action_storage.get(token, [])
+        action_storage[token] = []
     return jsonify({"actions": actions, "status": "success"}), 200
         abort(404, description=f"File {path} not found in static folder")
+class ActionConsumer:
+    def __init__(self, action_queue: Queue):
+        self.action_queue = action_queue
+        self.running = True
+    def start(self):
+        import threading
+        self.thread = threading.Thread(target=self.run, daemon=True)
+        self.thread.start()
+    def run(self):
+        while self.running:
+            try:
+                action, token = self.action_queue.get()
+                with action_storage_lock:
+                    if token not in action_storage:
+                        action_storage[token] = []
+                    current_actions = action_storage[token]
+                    current_actions.append(action)
+                    action_storage[token] = current_actions
+            except Exception as e:
+                logger.error(f"Error in ActionConsumer: {e}")
 if __name__ == "__main__":
     if os.path.exists(app.static_folder):
         logger.info(f"Static folder contents: {os.listdir(app.static_folder)}")
     os.makedirs(app.static_folder, exist_ok=True)
     # Start the audio transcriber thread
+    transcribers = [
+        AudioTranscriber(audio_queue, text_queue)
+        for _ in range(4 if os.getenv("DEBUG") == "True" else 32)
+    ]
+    for transcriber in transcribers:
+        transcriber.start()
+    # Start the action consumer thread
+    action_consumer = ActionConsumer(action_queue)
+    action_consumer.start()
     # Start the action processor thread
     MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
     if not MISTRAL_API_KEY:
         raise ValueError("MISTRAL_API_KEY is not set")
+    filterer = TextFilterer(text_queue, filtered_text_queue)
+    filterer.start()
+    actions_processors = [
+        ActionProcessor(filtered_text_queue, action_queue, MISTRAL_API_KEY)
+        for _ in range(4 if os.getenv("DEBUG") == "True" else 16)
+    ]
+    for actions_processor in actions_processors:
+        actions_processor.start()
     options: Any = {
         "bind": "0.0.0.0:7860",

server/static/godot/index.html CHANGED Viewed

@@ -97,7 +97,7 @@ body {
 		<script src="index.js"></script>
 		<script>
-const GODOT_CONFIG = {"args":[],"canvasResizePolicy":2,"ensureCrossOriginIsolationHeaders":false,"executable":"index","experimentalVK":false,"fileSizes":{"index.pck":18582528,"index.wasm":35376909},"focusCanvas":true,"gdextensionLibs":[]};
 const GODOT_THREADS_ENABLED = false;
 const engine = new Engine(GODOT_CONFIG);

 		<script src="index.js"></script>
 		<script>
+const GODOT_CONFIG = {"args":[],"canvasResizePolicy":2,"ensureCrossOriginIsolationHeaders":false,"executable":"index","experimentalVK":false,"fileSizes":{"index.pck":18582560,"index.wasm":35376909},"focusCanvas":true,"gdextensionLibs":[]};
 const GODOT_THREADS_ENABLED = false;
 const engine = new Engine(GODOT_CONFIG);

server/static/godot/index.pck CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aed0401261f4e6874f34383b267790e2465c030c957dc078a8eebb7bdc5a6e3f
-size 18582528

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec1cd2b27c61d036b88d4e08696c9f029e01aee83777894e465bc51906e9be54
+size 18582560

server/static/index.html CHANGED Viewed

@@ -41,8 +41,6 @@
         // Écouter les messages venant de l'iframe
         window.addEventListener('message', function (event) {
-            // Vérifier que le message a la structure attendue
-            console.log(event.data)
             if (event.data?.type === 'game_token') {
                 token = event.data.data
                 console.log("Token reçu :", token)
@@ -70,8 +68,6 @@
                 const errorMessage = error.name === 'AbortError'
                     ? 'Server request timed out. Please try again later.'
                     : 'Could not connect to the server. Please try again later.'
-                console.error('Server check failed:', error)
-                console.error(errorMessage)
                 throw error
             })
@@ -88,21 +84,23 @@
                 .then(stream => {
                     const audioContext = new (window.AudioContext || window.webkitAudioContext)()
                     const mediaRecorder = new MediaRecorder(stream)
-                    const audioChunks = []
                     mediaRecorder.ondataavailable = event => {
-                        audioChunks.push(event.data)
-                    }
-                    mediaRecorder.onstop = () => {
-                        const audioBlob = new Blob(audioChunks, { type: 'audio/webm' })
-                        audioChunks.length = 0 // Clear chunks after creating the Blob
-                        // Convert Blob to base64
                         const reader = new FileReader()
-                        reader.readAsDataURL(audioBlob)
                         reader.onloadend = () => {
-                            // Extract the base64 data (remove the data URL prefix)
                             const base64Audio = reader.result.split(',')[1]
                             // Send as JSON with base64-encoded audio
@@ -112,7 +110,8 @@
                             fetch(serverUrl, {
                                 method: 'POST',
                                 headers: {
-                                    'Content-Type': 'application/json'
                                 },
                                 body: JSON.stringify({
                                     audio_chunk: base64Audio
@@ -123,20 +122,16 @@
                                 console.log('Audio chunk sent successfully')
                             }).catch(error => {
                                 clearTimeout(audioTimeout)
-                                console.error('Error sending audio chunk:', error)
                             })
                         }
                     }
                     // Start recording in intervals
-                    const chunkInterval = 700 // Chunk duration in milliseconds
                     setInterval(() => {
-                        if (mediaRecorder.state === 'recording') {
-                            mediaRecorder.stop()
-                            mediaRecorder.start()
-                        } else {
-                            mediaRecorder.start()
-                        }
                     }, chunkInterval)
                 })
                 .catch(error => {

         // Écouter les messages venant de l'iframe
         window.addEventListener('message', function (event) {
             if (event.data?.type === 'game_token') {
                 token = event.data.data
                 console.log("Token reçu :", token)
                 const errorMessage = error.name === 'AbortError'
                     ? 'Server request timed out. Please try again later.'
                     : 'Could not connect to the server. Please try again later.'
                 throw error
             })
                 .then(stream => {
                     const audioContext = new (window.AudioContext || window.webkitAudioContext)()
                     const mediaRecorder = new MediaRecorder(stream)
+                    const audioBuffer = [] // Buffer to store last 5 recordings
+                    const MAX_BUFFER_SIZE = 12
                     mediaRecorder.ondataavailable = event => {
+                        // Add new chunk to buffer and maintain max size
+                        audioBuffer.push(event.data)
+                        if (audioBuffer.length > MAX_BUFFER_SIZE) {
+                            audioBuffer.shift() // Remove oldest chunk
+                        }
+                        // Merge all blobs in buffer
+                        const mergedBlob = new Blob(audioBuffer, { type: 'audio/webm' })
+                        // Convert merged Blob to base64
                         const reader = new FileReader()
+                        reader.readAsDataURL(mergedBlob)
                         reader.onloadend = () => {
                             const base64Audio = reader.result.split(',')[1]
                             // Send as JSON with base64-encoded audio
                             fetch(serverUrl, {
                                 method: 'POST',
                                 headers: {
+                                    'Content-Type': 'application/json',
+                                    'Authorization': `Bearer ${token}`
                                 },
                                 body: JSON.stringify({
                                     audio_chunk: base64Audio
                                 console.log('Audio chunk sent successfully')
                             }).catch(error => {
                                 clearTimeout(audioTimeout)
                             })
                         }
                     }
                     // Start recording in intervals
+                    const chunkInterval = 300 // Chunk duration in milliseconds
+                    mediaRecorder.start()
                     setInterval(() => {
+                        mediaRecorder.stop()
+                        mediaRecorder.start()
                     }, chunkInterval)
                 })
                 .catch(error => {