2025-12-01

2026-03-17 14:58:51 -06:00
parent 183e865f8b
commit 4b82b57113
6846 changed files with 954887 additions and 162606 deletions
@@ -0,0 +1,89 @@
+import json
+import subprocess
+import srt
+import datetime
+import os
+import logging
+
+from pathlib import Path
+from timeit import default_timer as timer
+from vosk import KaldiRecognizer, Model
+
+class Transcriber:
+
+    def __init__(self, args):
+        self.model = Model(model_path=args.model, model_name=args.model_name, lang=args.lang)
+        self.args = args
+
+    def recognize_stream(self, rec, stream):
+        tot_samples = 0
+        result = []
+        while True:
+            data = stream.stdout.read(4000)
+            if len(data) == 0:
+                break
+            if rec.AcceptWaveform(data):
+                tot_samples += len(data)
+                result.append(json.loads(rec.Result())) 
+        result.append(json.loads(rec.FinalResult()))
+        return result, tot_samples
+
+    def format_result(self, result, words_per_line=7):
+        final_result = ''
+        if self.args.output_type == 'srt':
+            subs = []
+            for i, res in enumerate(result):
+                if not 'result' in res:
+                    continue
+                words = res['result']
+                for j in range(0, len(words), words_per_line):
+                    line = words[j : j + words_per_line]
+                    s = srt.Subtitle(index=len(subs),
+                            content = ' '.join([l['word'] for l in line]),
+                            start=datetime.timedelta(seconds=line[0]['start']),
+                            end=datetime.timedelta(seconds=line[-1]['end']))
+                    subs.append(s)
+            final_result = srt.compose(subs)
+        elif self.args.output_type == 'txt':
+            for part in result:
+                final_result += part['text'] + ' '
+        return final_result
+
+
+    def resample_ffmpeg(self, infile):
+        stream = subprocess.Popen(
+            ['ffmpeg', '-nostdin', '-loglevel', 'quiet', '-i', 
+            infile, 
+            '-ar', '16000','-ac', '1', '-f', 's16le', '-'], 
+            stdout=subprocess.PIPE)
+        return stream
+
+
+    def process_entry(self, inputdata):
+        logging.info(f'Recognizing {inputdata[0]}')
+
+        rec = KaldiRecognizer(self.model, 16000)
+        rec.SetWords(True)
+
+        stream = self.resample_ffmpeg(inputdata[0])
+        result, tot_samples = self.recognize_stream(rec, stream)
+        final_result = self.format_result(result)
+
+        if inputdata[1] != '':
+            with open(inputdata[1], 'w', encoding='utf-8') as fh:
+                fh.write(final_result)
+        else:
+            print(final_result)
+        return final_result, tot_samples
+
+
+    def process_directory(self,args):
+        task_list = [(Path(args.input, fn), Path(args.output, Path(fn).stem).with_suffix('.' + args.output_type)) for fn in os.listdir(args.input)]
+        with Pool() as pool:
+            pool.map(self.process_entry, file_list)
+
+    def process_file(self, args):
+        start_time = timer()
+        final_result, tot_samples = self.process_entry([args.input, args.output])
+        elapsed = timer() - start_time
+        logging.info(f'''Execution time: {elapsed:.3f} sec; xRT: {format(tot_samples / 16000.0 / float(elapsed), '.3f')}''')