Spaces:
Build error
Build error
| const paramDefaults = { | |
| stream: true, | |
| n_predict: 500, | |
| temperature: 0.2, | |
| stop: ["</s>"] | |
| }; | |
| let generation_settings = null; | |
| // Completes the prompt as a generator. Recommended for most use cases. | |
| // | |
| // Example: | |
| // | |
| // import { llama } from '/completion.js' | |
| // | |
| // const request = llama("Tell me a joke", {n_predict: 800}) | |
| // for await (const chunk of request) { | |
| // document.write(chunk.data.content) | |
| // } | |
| // | |
| export async function* llama(prompt, params = {}, config = {}) { | |
| let controller = config.controller; | |
| const api_url = config.api_url?.replace(/\/+$/, '') || ""; | |
| if (!controller) { | |
| controller = new AbortController(); | |
| } | |
| const completionParams = { ...paramDefaults, ...params, prompt }; | |
| const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, { | |
| method: 'POST', | |
| body: JSON.stringify(completionParams), | |
| headers: { | |
| 'Connection': 'keep-alive', | |
| 'Content-Type': 'application/json', | |
| 'Accept': 'text/event-stream', | |
| ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {}) | |
| }, | |
| signal: controller.signal, | |
| }); | |
| const reader = response.body.getReader(); | |
| const decoder = new TextDecoder(); | |
| let content = ""; | |
| let leftover = ""; // Buffer for partially read lines | |
| try { | |
| let cont = true; | |
| while (cont) { | |
| const result = await reader.read(); | |
| if (result.done) { | |
| break; | |
| } | |
| // Add any leftover data to the current chunk of data | |
| const text = leftover + decoder.decode(result.value); | |
| // Check if the last character is a line break | |
| const endsWithLineBreak = text.endsWith('\n'); | |
| // Split the text into lines | |
| let lines = text.split('\n'); | |
| // If the text doesn't end with a line break, then the last line is incomplete | |
| // Store it in leftover to be added to the next chunk of data | |
| if (!endsWithLineBreak) { | |
| leftover = lines.pop(); | |
| } else { | |
| leftover = ""; // Reset leftover if we have a line break at the end | |
| } | |
| // Parse all sse events and add them to result | |
| const regex = /^(\S+):\s(.*)$/gm; | |
| for (const line of lines) { | |
| const match = regex.exec(line); | |
| if (match) { | |
| result[match[1]] = match[2]; | |
| if (result.data === '[DONE]') { | |
| cont = false; | |
| break; | |
| } | |
| // since we know this is llama.cpp, let's just decode the json in data | |
| if (result.data) { | |
| result.data = JSON.parse(result.data); | |
| content += result.data.content; | |
| // yield | |
| yield result; | |
| // if we got a stop token from server, we will break here | |
| if (result.data.stop) { | |
| if (result.data.generation_settings) { | |
| generation_settings = result.data.generation_settings; | |
| } | |
| cont = false; | |
| break; | |
| } | |
| } | |
| if (result.error) { | |
| try { | |
| result.error = JSON.parse(result.error); | |
| if (result.error.message.includes('slot unavailable')) { | |
| // Throw an error to be caught by upstream callers | |
| throw new Error('slot unavailable'); | |
| } else { | |
| console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`); | |
| } | |
| } catch(e) { | |
| console.error(`llama.cpp error ${result.error}`) | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } catch (e) { | |
| if (e.name !== 'AbortError') { | |
| console.error("llama error: ", e); | |
| } | |
| throw e; | |
| } | |
| finally { | |
| controller.abort(); | |
| } | |
| return content; | |
| } | |
| // Call llama, return an event target that you can subscribe to | |
| // | |
| // Example: | |
| // | |
| // import { llamaEventTarget } from '/completion.js' | |
| // | |
| // const conn = llamaEventTarget(prompt) | |
| // conn.addEventListener("message", (chunk) => { | |
| // document.write(chunk.detail.content) | |
| // }) | |
| // | |
| export const llamaEventTarget = (prompt, params = {}, config = {}) => { | |
| const eventTarget = new EventTarget(); | |
| (async () => { | |
| let content = ""; | |
| for await (const chunk of llama(prompt, params, config)) { | |
| if (chunk.data) { | |
| content += chunk.data.content; | |
| eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data })); | |
| } | |
| if (chunk.data.generation_settings) { | |
| eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings })); | |
| } | |
| if (chunk.data.timings) { | |
| eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings })); | |
| } | |
| } | |
| eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } })); | |
| })(); | |
| return eventTarget; | |
| } | |
| // Call llama, return a promise that resolves to the completed text. This does not support streaming | |
| // | |
| // Example: | |
| // | |
| // llamaPromise(prompt).then((content) => { | |
| // document.write(content) | |
| // }) | |
| // | |
| // or | |
| // | |
| // const content = await llamaPromise(prompt) | |
| // document.write(content) | |
| // | |
| export const llamaPromise = (prompt, params = {}, config = {}) => { | |
| return new Promise(async (resolve, reject) => { | |
| let content = ""; | |
| try { | |
| for await (const chunk of llama(prompt, params, config)) { | |
| content += chunk.data.content; | |
| } | |
| resolve(content); | |
| } catch (error) { | |
| reject(error); | |
| } | |
| }); | |
| }; | |
| /** | |
| * (deprecated) | |
| */ | |
| export const llamaComplete = async (params, controller, callback) => { | |
| for await (const chunk of llama(params.prompt, params, { controller })) { | |
| callback(chunk); | |
| } | |
| } | |
| // Get the model info from the server. This is useful for getting the context window and so on. | |
| export const llamaModelInfo = async (config = {}) => { | |
| if (!generation_settings) { | |
| const api_url = config.api_url?.replace(/\/+$/, '') || ""; | |
| const props = await fetch(`${api_url}/props`).then(r => r.json()); | |
| generation_settings = props.default_generation_settings; | |
| } | |
| return generation_settings; | |
| } | |