blender-portable-repo/extensions/.local/lib/python3.11/site-packages/vosk/transcriber/transcriber.py

import json
import subprocess
import srt
import datetime
import os
import logging

from pathlib import Path
from timeit import default_timer as timer
from vosk import KaldiRecognizer, Model

class Transcriber:

    def __init__(self, args):
        self.model = Model(model_path=args.model, model_name=args.model_name, lang=args.lang)
        self.args = args

    def recognize_stream(self, rec, stream):
        tot_samples = 0
        result = []
        while True:
            data = stream.stdout.read(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                tot_samples += len(data)
                result.append(json.loads(rec.Result()))
        result.append(json.loads(rec.FinalResult()))
        return result, tot_samples

    def format_result(self, result, words_per_line=7):
        final_result = ''
        if self.args.output_type == 'srt':
            subs = []
            for i, res in enumerate(result):
                if not 'result' in res:
                    continue
                words = res['result']
                for j in range(0, len(words), words_per_line):
                    line = words[j : j + words_per_line]
                    s = srt.Subtitle(index=len(subs),
                            content = ' '.join([l['word'] for l in line]),
                            start=datetime.timedelta(seconds=line[0]['start']),
                            end=datetime.timedelta(seconds=line[-1]['end']))
                    subs.append(s)
            final_result = srt.compose(subs)
        elif self.args.output_type == 'txt':
            for part in result:
                final_result += part['text'] + ' '
        return final_result


    def resample_ffmpeg(self, infile):
        stream = subprocess.Popen(
            ['ffmpeg', '-nostdin', '-loglevel', 'quiet', '-i',
            infile,
            '-ar', '16000','-ac', '1', '-f', 's16le', '-'],
            stdout=subprocess.PIPE)
        return stream


    def process_entry(self, inputdata):
        logging.info(f'Recognizing {inputdata[0]}')

        rec = KaldiRecognizer(self.model, 16000)
        rec.SetWords(True)

        stream = self.resample_ffmpeg(inputdata[0])
        result, tot_samples = self.recognize_stream(rec, stream)
        final_result = self.format_result(result)

        if inputdata[1] != '':
            with open(inputdata[1], 'w', encoding='utf-8') as fh:
                fh.write(final_result)
        else:
            print(final_result)
        return final_result, tot_samples


    def process_directory(self,args):
        task_list = [(Path(args.input, fn), Path(args.output, Path(fn).stem).with_suffix('.' + args.output_type)) for fn in os.listdir(args.input)]
        with Pool() as pool:
            pool.map(self.process_entry, file_list)

    def process_file(self, args):
        start_time = timer()
        final_result, tot_samples = self.process_entry([args.input, args.output])
        elapsed = timer() - start_time
        logging.info(f'''Execution time: {elapsed:.3f} sec; xRT: {format(tot_samples / 16000.0 / float(elapsed), '.3f')}''')