kotoba_whisper

Runtime error

File size: 1,106 Bytes

bbd4e37

import torch
from faster_whisper import WhisperModel

model = None
model_size = None

def load_model(_model_size):
    global model_size, model

    if _model_size and model_size != _model_size:
        model_size = _model_size

    if torch.cuda.is_available():
        model = WhisperModel(model_size, device="cuda", compute_type="float16")
        # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
    else:
        model = WhisperModel(model_size, device="cpu", compute_type="int8")

def speech_to_text(audio_file, _model_size = None):
    global model_size, model

    load_model(_model_size)

    with torch.no_grad():
        segments, info = model.transcribe(
            audio_file,
            language='ja',
            beam_size=5,
            vad_filter=True,
            without_timestamps=False,
        )

    text_only = ''
    text_with_timestamps = ''
    for segment in segments:
        text_only += f"{segment.text}\n"
        text_with_timestamps += f"{segment.start:.2f}\t{segment.end:.2f}\t{segment.text}\n"

    return text_only, text_with_timestamps