Spaces:
Running
Running
| /** | |
| * Play audio samples using the Web Audio API. | |
| * @param {Float32Array} audioSamples - The audio samples to play. | |
| * @param {number} sampleRate - The sample rate of the audio samples. | |
| */ | |
| function playAudioSamples(audioSamples, sampleRate = 16000) { | |
| // Create an AudioContext | |
| const audioContext = new (window.AudioContext || window.webkitAudioContext)(); | |
| // Create an AudioBuffer | |
| const audioBuffer = audioContext.createBuffer( | |
| 1, // number of channels | |
| audioSamples.length, // length of the buffer in samples | |
| sampleRate // sample rate (samples per second) | |
| ); | |
| // Fill the AudioBuffer with the Float32Array of audio samples | |
| audioBuffer.getChannelData(0).set(audioSamples); | |
| // Create a BufferSource node | |
| const source = audioContext.createBufferSource(); | |
| source.buffer = audioBuffer; | |
| // Connect the source to the AudioContext's destination (the speakers) | |
| source.connect(audioContext.destination); | |
| // Start playback | |
| source.start(); | |
| }; | |
| /** | |
| * Turns floating-point audio samples to a Wave blob. | |
| * @param {Float32Array} audioSamples - The audio samples to play. | |
| * @param {number} sampleRate - The sample rate of the audio samples. | |
| * @param {number} numChannels - The number of channels in the audio. Defaults to 1 (mono). | |
| * @return {Blob} A blob of type `audio/wav` | |
| */ | |
| function samplesToBlob(audioSamples, sampleRate = 16000, numChannels = 1) { | |
| // Helper to write a string to the DataView | |
| const writeString = (view, offset, string) => { | |
| for (let i = 0; i < string.length; i++) { | |
| view.setUint8(offset + i, string.charCodeAt(i)); | |
| } | |
| }; | |
| // Helper to convert Float32Array to Int16Array (16-bit PCM) | |
| const floatTo16BitPCM = (output, offset, input) => { | |
| for (let i = 0; i < input.length; i++, offset += 2) { | |
| let s = Math.max(-1, Math.min(1, input[i])); // Clamping to [-1, 1] | |
| output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); // Convert to 16-bit PCM | |
| } | |
| }; | |
| const byteRate = sampleRate * numChannels * 2; // 16-bit PCM = 2 bytes per sample | |
| // Calculate sizes | |
| const blockAlign = numChannels * 2; // 2 bytes per sample for 16-bit audio | |
| const wavHeaderSize = 44; | |
| const dataLength = audioSamples.length * numChannels * 2; // 16-bit PCM data length | |
| const buffer = new ArrayBuffer(wavHeaderSize + dataLength); | |
| const view = new DataView(buffer); | |
| // Write WAV file headers | |
| writeString(view, 0, 'RIFF'); // ChunkID | |
| view.setUint32(4, 36 + dataLength, true); // ChunkSize | |
| writeString(view, 8, 'WAVE'); // Format | |
| writeString(view, 12, 'fmt '); // Subchunk1ID | |
| view.setUint32(16, 16, true); // Subchunk1Size (PCM = 16) | |
| view.setUint16(20, 1, true); // AudioFormat (PCM = 1) | |
| view.setUint16(22, numChannels, true); // NumChannels | |
| view.setUint32(24, sampleRate, true); // SampleRate | |
| view.setUint32(28, byteRate, true); // ByteRate | |
| view.setUint16(32, blockAlign, true); // BlockAlign | |
| view.setUint16(34, 16, true); // BitsPerSample (16-bit PCM) | |
| writeString(view, 36, 'data'); // Subchunk2ID | |
| view.setUint32(40, dataLength, true); // Subchunk2Size | |
| // Convert the Float32Array audio samples to 16-bit PCM and write them to the DataView | |
| floatTo16BitPCM(view, wavHeaderSize, audioSamples); | |
| // Create and return the Blob | |
| return new Blob([view], { type: 'audio/wav' }); | |
| } | |
| /** | |
| * Renders a blob to an audio element with controls. | |
| * Use `appendChild(result)` to add to the document or a node. | |
| * @param {Blob} audioBlob - A blob with a valid audio type. | |
| * @see samplesToBlob | |
| */ | |
| function blobToAudio(audioBlob) { | |
| const url = URL.createObjectURL(audioBlob); | |
| const audio = document.createElement("audio"); | |
| audio.controls = true; | |
| audio.src = url; | |
| return audio; | |
| } | |
| /** Configuration */ | |
| const colors = { | |
| "buddy": [0,119,187], | |
| "hey buddy": [0,153,136], | |
| "hi buddy": [51,227,138], | |
| "sup buddy": [238,119,51], | |
| "yo buddy": [204,51,217], | |
| "okay buddy": [238,51,119], | |
| "hello buddy": [184,62,104], | |
| "speech": [22,200,206], | |
| "frame budget": [25,255,25] | |
| }; | |
| const rootUrl = "https://huggingface.co/benjamin-paine/hey-buddy/resolve/main"; | |
| const wakeWords = ["buddy", "hey buddy", "hi buddy", "sup buddy", "yo buddy", "okay buddy", "hello buddy"]; | |
| const canvasSize = { width: 640, height: 100 }; | |
| const graphLineWidth = 1; | |
| const options = { | |
| debug: true, | |
| modelPath: wakeWords.map((word) => `${rootUrl}/models/${word.replace(' ', '-')}.onnx`), | |
| vadModelPath: `${rootUrl}/pretrained/silero-vad.onnx`, | |
| spectrogramModelPath: `${rootUrl}/pretrained/mel-spectrogram.onnx`, | |
| embeddingModelPath: `${rootUrl}/pretrained/speech-embedding.onnx`, | |
| }; | |
| /** Main */ | |
| document.addEventListener("DOMContentLoaded", async () => { | |
| /** DOM elements */ | |
| const graphsContainer = document.getElementById("graphs"); | |
| const audioContainer = document.getElementById("audio"); | |
| /** Memory for drawing */ | |
| const graphs = {}; | |
| const history = {}; | |
| const current = {}; | |
| const active = {}; | |
| /** Get user media to request permission and start the microphone */ | |
| try { | |
| await navigator.mediaDevices.getUserMedia({ audio: true }); | |
| } catch (error) { | |
| alert("Microphone access has been denied, this demo will not function. Please reset audio permissions and refresh the page to try again."); | |
| return; | |
| } | |
| /** Instantiate */ | |
| const heyBuddy = new HeyBuddy(options); | |
| /** Add callbacks */ | |
| // When processed, update state for next draw | |
| heyBuddy.onProcessed((result) => { | |
| current["frame budget"] = heyBuddy.frameTimeEma; | |
| current["speech"] = result.speech.probability || 0.0; | |
| active["speech"] = result.speech.active; | |
| for (let wakeWord in result.wakeWords) { | |
| current[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].probability || 0.0; | |
| active[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].active; | |
| } | |
| if (result.recording) { | |
| audioContainer.innerHTML = "Recording…"; | |
| } | |
| }); | |
| // When recording is complete, replace the audio element | |
| heyBuddy.onRecording((audioSamples) => { | |
| const audioBlob = samplesToBlob(audioSamples); | |
| const audioElement = blobToAudio(audioBlob); | |
| audioContainer.innerHTML = ""; | |
| audioContainer.appendChild(audioElement); | |
| }); | |
| /** Add graphs */ | |
| for (let graphName of ["wake words", "speech", "frame budget"]) { | |
| // Create containers for the graph and its label | |
| const graphContainer = document.createElement("div"); | |
| const graphLabel = document.createElement("label"); | |
| graphLabel.textContent = graphName; | |
| // Create a canvas for the graph | |
| const graphCanvas = document.createElement("canvas"); | |
| graphCanvas.className = "graph"; | |
| graphCanvas.width = canvasSize.width; | |
| graphCanvas.height = canvasSize.height; | |
| graphs[graphName] = graphCanvas; | |
| // Add the canvas to the container and the container to the document | |
| graphContainer.appendChild(graphCanvas); | |
| graphContainer.appendChild(graphLabel); | |
| graphsContainer.appendChild(graphContainer); | |
| // If this is the wake-word graph, also add legend | |
| if (graphName === "wake words") { | |
| const graphLegend = document.createElement("div"); | |
| graphLegend.className = "legend"; | |
| for (let wakeWord of wakeWords) { | |
| const legendItem = document.createElement("div"); | |
| const [r,g,b] = colors[wakeWord]; | |
| legendItem.style.color = `rgb(${r},${g},${b})`; | |
| legendItem.textContent = wakeWord; | |
| graphLegend.appendChild(legendItem); | |
| } | |
| graphLabel.appendChild(graphLegend); | |
| } | |
| } | |
| /** Define draw loop */ | |
| const draw = () => { | |
| // Draw speech and model graphs | |
| for (let graphName in graphs) { | |
| const isWakeWords = graphName === "wake words"; | |
| const isFrameBudget = graphName === "frame budget"; | |
| const subGraphs = isWakeWords ? wakeWords : [graphName]; | |
| let isFirst = true; | |
| for (let name of subGraphs) { | |
| // Update history | |
| history[name] = history[name] || []; | |
| if (isFrameBudget) { | |
| history[name].push((current[name] || 0.0) / 120.0); // 120ms budget | |
| } else { | |
| history[name].push(current[name] || 0.0); | |
| } | |
| // Trim history | |
| if (history[name].length > canvasSize.width) { | |
| history[name] = history[name].slice(history[name].length - canvasSize.width); | |
| } | |
| // Draw graph | |
| const canvas = graphs[graphName]; | |
| const ctx = canvas.getContext("2d"); | |
| const [r,g,b] = colors[name]; | |
| const opacity = isFrameBudget || active[name] ? 1.0 : 0.5; | |
| if (isFirst) { | |
| // Clear canvas on first draw | |
| ctx.clearRect(0, 0, canvas.width, canvas.height); | |
| isFirst = false; | |
| } | |
| ctx.strokeStyle = `rgba(${r},${g},${b},${opacity})`; | |
| ctx.fillStyle = `rgba(${r},${g},${b},${opacity/2})`; | |
| ctx.lineWidth = graphLineWidth; | |
| // Draw from left to right (the frame shifts right to left) | |
| ctx.beginPath(); | |
| let lastX; | |
| for (let i = 0; i < history[name].length; i++) { | |
| const x = i; | |
| const y = canvas.height - history[name][i] * canvas.height; | |
| if (i === 0) { | |
| ctx.moveTo(1, y); | |
| } else { | |
| ctx.lineTo(x, y); | |
| } | |
| lastX = x; | |
| } | |
| // extend downwards to make a polygon | |
| ctx.lineTo(lastX, canvas.height); | |
| ctx.lineTo(0, canvas.height); | |
| ctx.closePath(); | |
| ctx.fill(); | |
| ctx.stroke(); | |
| } | |
| } | |
| // Request next frame | |
| requestAnimationFrame(draw); | |
| }; | |
| /** Start the loop */ | |
| requestAnimationFrame(draw); | |
| }); | |