Spaces:
Runtime error
Runtime error
| import * as THREE from 'three'; | |
| import {OrbitControls} from 'three/examples/jsm/controls/OrbitControls.js'; | |
| import ThreeMeshUI, {Block, Text} from 'three-mesh-ui'; | |
| import FontJSON from '../assets/RobotoMono-Regular-msdf.json?url'; | |
| import FontImage from '../assets/RobotoMono-Regular.png'; | |
| import {TranslationSentences} from '../types/StreamingTypes'; | |
| import supportedCharSet from './supportedCharSet'; | |
| // Augment three-mesh-ui types which aren't implemented | |
| declare module 'three-mesh-ui' { | |
| interface Block { | |
| add(any: any); | |
| set(props: BlockOptions); | |
| position: { | |
| x: number; | |
| y: number; | |
| z: number; | |
| set: (x: number, y: number, z: number) => void; | |
| }; | |
| } | |
| interface Text { | |
| set(props: {content: string}); | |
| } | |
| } | |
| // Various configuration parameters | |
| const INITIAL_PROMPT = 'Listening...\n'; | |
| const NUM_LINES = 3; | |
| const CHARS_PER_LINE = 37; | |
| const CHARS_PER_SECOND = 15; | |
| const MAX_WIDTH = 0.89; | |
| const CHAR_WIDTH = 0.0233; | |
| const Y_COORD_START = -0.38; | |
| const Z_COORD = -1.3; | |
| const LINE_HEIGHT = 0.062; | |
| const BLOCK_SPACING = 0.02; | |
| const FONT_SIZE = 0.038; | |
| // Speed of scrolling of text lines | |
| const SCROLL_Y_DELTA = 0.01; | |
| // Overlay an extra block for padding due to inflexibilities of native padding | |
| const OFFSET = 0.01; | |
| const OFFSET_WIDTH = OFFSET * 3; | |
| // The tick interval | |
| const CURSOR_BLINK_INTERVAL_MS = 500; | |
| type TranscriptState = { | |
| translationText: string; | |
| textBlocksProps: TextBlockProps[]; | |
| lastTranslationStringIndex: number; | |
| lastTranslationLineStartIndex: number; | |
| transcriptLines: string[]; | |
| lastUpdateTime: number; | |
| }; | |
| type TextBlockProps = { | |
| content: string; | |
| // The end position when animating | |
| targetY: number; | |
| // Current scroll position that caps at targetY | |
| currentY: number; | |
| textOpacity: number; | |
| backgroundOpacity: number; | |
| index: number; | |
| isBottomLine: boolean; | |
| }; | |
| function initialTextBlockProps(count: number): TextBlockProps[] { | |
| return Array.from({length: count}).map(() => { | |
| // Push in non display blocks because mesh UI crashes if elements are add / removed from screen. | |
| return { | |
| // key: textBlocksProps.length, | |
| targetY: Y_COORD_START, | |
| currentY: Y_COORD_START, | |
| index: 0, | |
| textOpacity: 0, | |
| backgroundOpacity: 0, | |
| width: MAX_WIDTH, | |
| height: LINE_HEIGHT, | |
| content: '', | |
| isBottomLine: true, | |
| }; | |
| }); | |
| } | |
| function initialState(): TranscriptState { | |
| return { | |
| translationText: '', | |
| textBlocksProps: initialTextBlockProps(NUM_LINES), | |
| lastTranslationStringIndex: 0, | |
| lastTranslationLineStartIndex: 0, | |
| transcriptLines: [], | |
| lastUpdateTime: new Date().getTime(), | |
| }; | |
| } | |
| let transcriptState: TranscriptState = initialState(); | |
| let scene: THREE.Scene | null; | |
| let camera: THREE.PerspectiveCamera | null; | |
| let renderer: THREE.WebGLRenderer | null; | |
| let controls: THREE.OrbitControls | null; | |
| let cursorBlinkOn: boolean = false; | |
| setInterval(() => { | |
| cursorBlinkOn = !cursorBlinkOn; | |
| }, CURSOR_BLINK_INTERVAL_MS); | |
| type TextBlock = { | |
| textBlockOuterContainer: Block; | |
| textBlockInnerContainer: Block; | |
| text: Text; | |
| }; | |
| const textBlocks: TextBlock[] = []; | |
| export function getRenderer(): THREE.WebGLRenderer | null { | |
| return renderer; | |
| } | |
| export function init( | |
| width: number, | |
| height: number, | |
| parentElement: HTMLDivElement | null, | |
| ): THREE.WebGLRenderer { | |
| scene = new THREE.Scene(); | |
| scene.background = new THREE.Color(0x505050); | |
| camera = new THREE.PerspectiveCamera(60, width / height, 0.1, 1000); | |
| camera.position.z = 1; | |
| renderer = new THREE.WebGLRenderer({ | |
| antialias: true, | |
| }); | |
| renderer.setPixelRatio(window.devicePixelRatio); | |
| renderer.setSize(width, height); | |
| renderer.xr.enabled = true; | |
| renderer.xr.setReferenceSpaceType('local'); | |
| parentElement?.appendChild(renderer.domElement); | |
| controls = new OrbitControls(camera, renderer.domElement); | |
| controls.update(); | |
| scene.add(camera); | |
| textBlocks.push( | |
| ...initialTextBlockProps(NUM_LINES).map((props) => makeTextBlock(props)), | |
| ); | |
| renderer.setAnimationLoop(loop); | |
| return renderer; | |
| } | |
| export function updatetranslationText( | |
| translationSentences: TranslationSentences, | |
| ): void { | |
| const newText = INITIAL_PROMPT + translationSentences.join('\n'); | |
| if (transcriptState.translationText === newText) { | |
| return; | |
| } | |
| transcriptState.translationText = newText; | |
| } | |
| export function resetState(): void { | |
| transcriptState = initialState(); | |
| } | |
| function makeTextBlock({ | |
| content, | |
| backgroundOpacity, | |
| }: TextBlockProps): TextBlock { | |
| const width = MAX_WIDTH; | |
| const height = LINE_HEIGHT; | |
| const fontProps = { | |
| fontSize: FONT_SIZE, | |
| textAlign: 'left', | |
| // TODO: support more language charsets | |
| // This renders using MSDF format supported in WebGL. Renderable characters are defined in the "charset" json | |
| // Currently supports most default keyboard inputs but this would exclude many non latin charset based languages. | |
| // You can use https://msdf-bmfont.donmccurdy.com/ for easily generating these files | |
| fontFamily: FontJSON, | |
| fontTexture: FontImage, | |
| }; | |
| const textBlockOuterContainer = new Block({ | |
| backgroundOpacity, | |
| width: width + OFFSET_WIDTH, | |
| height: height, | |
| borderRadius: 0, | |
| ...fontProps, | |
| }); | |
| const text = new Text({content}); | |
| const textBlockInnerContainer = new Block({ | |
| padding: 0, | |
| backgroundOpacity: 0, | |
| width, | |
| height, | |
| }); | |
| // Adding it to the camera makes the UI follow it. | |
| camera.add(textBlockOuterContainer); | |
| textBlockOuterContainer.add(textBlockInnerContainer); | |
| textBlockInnerContainer.add(text); | |
| return { | |
| textBlockOuterContainer, | |
| textBlockInnerContainer, | |
| text, | |
| }; | |
| } | |
| // Updates the position and text of a text block from its props | |
| function updateTextBlock( | |
| id: number, | |
| {content, targetY, currentY, backgroundOpacity, isBottomLine}: TextBlockProps, | |
| ): void { | |
| const {textBlockOuterContainer, textBlockInnerContainer, text} = | |
| textBlocks[id]; | |
| const {lastTranslationStringIndex, translationText} = transcriptState; | |
| // Add blinking cursor if we don't have any new input to render | |
| const numChars = content.length; | |
| if ( | |
| isBottomLine && | |
| cursorBlinkOn && | |
| lastTranslationStringIndex >= translationText.length | |
| ) { | |
| content = content + '|'; | |
| } | |
| // Accounting for potential cursor for block width (the +1) | |
| const width = | |
| (numChars + (isBottomLine ? 1.1 : 0) + (numChars < 10 ? 1 : 0)) * | |
| CHAR_WIDTH; | |
| const height = LINE_HEIGHT; | |
| // Width starts from 0 and goes 1/2 in each direction so offset x | |
| const xPosition = width / 2 - MAX_WIDTH / 2 + OFFSET_WIDTH; | |
| textBlockOuterContainer?.set({ | |
| backgroundOpacity, | |
| width: width + 2 * OFFSET_WIDTH, | |
| height: height + OFFSET / 3, | |
| borderRadius: 0, | |
| }); | |
| // Scroll up line toward target | |
| const y = isBottomLine | |
| ? targetY | |
| : Math.min(currentY + SCROLL_Y_DELTA, targetY); | |
| transcriptState.textBlocksProps[id].currentY = y; | |
| textBlockOuterContainer.position.set(-OFFSET_WIDTH + xPosition, y, Z_COORD); | |
| textBlockInnerContainer.set({ | |
| padding: 0, | |
| backgroundOpacity: 0, | |
| width, | |
| height, | |
| }); | |
| text.set({content}); | |
| } | |
| // We split the text so it fits line by line into the UI | |
| function chunkTranslationTextIntoLines( | |
| translationText: string, | |
| nextTranslationStringIndex: number, | |
| ): string[] { | |
| // Ideally we continue where we left off but this is complicated when we have mid-words. Recalculating for now | |
| const newSentences = translationText | |
| .substring(0, nextTranslationStringIndex) | |
| .split('\n'); | |
| const transcriptLines = ['']; | |
| newSentences.forEach((newSentence, sentenceIdx) => { | |
| const words = newSentence.split(/\s+/); | |
| words.forEach((word) => { | |
| const filteredWord = [...word] | |
| .filter((c) => { | |
| if (supportedCharSet().has(c)) { | |
| return true; | |
| } | |
| console.error( | |
| `Unsupported char ${c} - make sure this is supported in the font family msdf file`, | |
| ); | |
| return false; | |
| }) | |
| .join('') | |
| // Filter out unknown symbol | |
| .replace('<unk>', ''); | |
| const lastLineSoFar = transcriptLines[0]; | |
| const charCount = lastLineSoFar.length + filteredWord.length + 1; | |
| if (charCount <= CHARS_PER_LINE) { | |
| transcriptLines[0] = lastLineSoFar + ' ' + filteredWord; | |
| } else { | |
| transcriptLines.unshift(filteredWord); | |
| } | |
| }); | |
| if (sentenceIdx < newSentences.length - 1) { | |
| transcriptLines.unshift('\n'); | |
| transcriptLines.unshift(''); | |
| } | |
| }); | |
| return transcriptLines; | |
| } | |
| // The main loop, | |
| function updateTextBlocksProps(): void { | |
| const {translationText, lastTranslationStringIndex, lastUpdateTime} = | |
| transcriptState; | |
| const currentTime = new Date().getTime(); | |
| const charsToRender = Math.round( | |
| ((currentTime - lastUpdateTime) * CHARS_PER_SECOND) / 1000, | |
| ); | |
| if (charsToRender < 1) { | |
| // Wait some more until we render more characters | |
| return; | |
| } | |
| const nextTranslationStringIndex = Math.min( | |
| lastTranslationStringIndex + charsToRender, | |
| translationText.length, | |
| ); | |
| if (nextTranslationStringIndex === lastTranslationStringIndex) { | |
| // No new characters to render | |
| transcriptState.lastUpdateTime = currentTime; | |
| return; | |
| } | |
| // Ideally we continue where we left off but this is complicated when we have mid-words. Recalculating for now | |
| const transcriptLines = chunkTranslationTextIntoLines( | |
| translationText, | |
| nextTranslationStringIndex, | |
| ); | |
| transcriptState.transcriptLines = transcriptLines; | |
| transcriptState.lastTranslationStringIndex = nextTranslationStringIndex; | |
| // Compute the new props for each text block | |
| const newTextBlocksProps: TextBlockProps[] = []; | |
| // We start with the most recent line and increment the y coordinate for older lines. | |
| // If it is a new sentence we increment the y coordinate a little more to leave a visible space | |
| let y = Y_COORD_START; | |
| transcriptLines.forEach((line, i) => { | |
| if (newTextBlocksProps.length == NUM_LINES) { | |
| return; | |
| } | |
| if (line === '\n') { | |
| y += BLOCK_SPACING; | |
| return; | |
| } | |
| const isBottomLine = newTextBlocksProps.length === 0; | |
| const textOpacity = 1 - 0.1 * newTextBlocksProps.length; | |
| const previousProps = transcriptState.textBlocksProps.find( | |
| (props) => props.index === i, | |
| ); | |
| const props = { | |
| targetY: y + LINE_HEIGHT / 2, | |
| currentY: isBottomLine ? y : previousProps?.currentY || y, | |
| index: i, | |
| textOpacity, | |
| backgroundOpacity: 1, | |
| content: line, | |
| isBottomLine, | |
| }; | |
| newTextBlocksProps.push(props); | |
| y += LINE_HEIGHT; | |
| }); | |
| transcriptState.textBlocksProps = newTextBlocksProps; | |
| transcriptState.lastUpdateTime = currentTime; | |
| } | |
| // The main render loop, everything gets rendered here. | |
| function loop() { | |
| updateTextBlocksProps(); | |
| transcriptState.textBlocksProps.map((props, i) => updateTextBlock(i, props)); | |
| ThreeMeshUI.update(); | |
| controls.update(); | |
| renderer.render(scene, camera); | |
| } | |