2025-12-01
This commit is contained in:
@@ -0,0 +1,89 @@
|
||||
import json
|
||||
import subprocess
|
||||
import srt
|
||||
import datetime
|
||||
import os
|
||||
import logging
|
||||
|
||||
from pathlib import Path
|
||||
from timeit import default_timer as timer
|
||||
from vosk import KaldiRecognizer, Model
|
||||
|
||||
class Transcriber:
|
||||
|
||||
def __init__(self, args):
|
||||
self.model = Model(model_path=args.model, model_name=args.model_name, lang=args.lang)
|
||||
self.args = args
|
||||
|
||||
def recognize_stream(self, rec, stream):
|
||||
tot_samples = 0
|
||||
result = []
|
||||
while True:
|
||||
data = stream.stdout.read(4000)
|
||||
if len(data) == 0:
|
||||
break
|
||||
if rec.AcceptWaveform(data):
|
||||
tot_samples += len(data)
|
||||
result.append(json.loads(rec.Result()))
|
||||
result.append(json.loads(rec.FinalResult()))
|
||||
return result, tot_samples
|
||||
|
||||
def format_result(self, result, words_per_line=7):
|
||||
final_result = ''
|
||||
if self.args.output_type == 'srt':
|
||||
subs = []
|
||||
for i, res in enumerate(result):
|
||||
if not 'result' in res:
|
||||
continue
|
||||
words = res['result']
|
||||
for j in range(0, len(words), words_per_line):
|
||||
line = words[j : j + words_per_line]
|
||||
s = srt.Subtitle(index=len(subs),
|
||||
content = ' '.join([l['word'] for l in line]),
|
||||
start=datetime.timedelta(seconds=line[0]['start']),
|
||||
end=datetime.timedelta(seconds=line[-1]['end']))
|
||||
subs.append(s)
|
||||
final_result = srt.compose(subs)
|
||||
elif self.args.output_type == 'txt':
|
||||
for part in result:
|
||||
final_result += part['text'] + ' '
|
||||
return final_result
|
||||
|
||||
|
||||
def resample_ffmpeg(self, infile):
|
||||
stream = subprocess.Popen(
|
||||
['ffmpeg', '-nostdin', '-loglevel', 'quiet', '-i',
|
||||
infile,
|
||||
'-ar', '16000','-ac', '1', '-f', 's16le', '-'],
|
||||
stdout=subprocess.PIPE)
|
||||
return stream
|
||||
|
||||
|
||||
def process_entry(self, inputdata):
|
||||
logging.info(f'Recognizing {inputdata[0]}')
|
||||
|
||||
rec = KaldiRecognizer(self.model, 16000)
|
||||
rec.SetWords(True)
|
||||
|
||||
stream = self.resample_ffmpeg(inputdata[0])
|
||||
result, tot_samples = self.recognize_stream(rec, stream)
|
||||
final_result = self.format_result(result)
|
||||
|
||||
if inputdata[1] != '':
|
||||
with open(inputdata[1], 'w', encoding='utf-8') as fh:
|
||||
fh.write(final_result)
|
||||
else:
|
||||
print(final_result)
|
||||
return final_result, tot_samples
|
||||
|
||||
|
||||
def process_directory(self,args):
|
||||
task_list = [(Path(args.input, fn), Path(args.output, Path(fn).stem).with_suffix('.' + args.output_type)) for fn in os.listdir(args.input)]
|
||||
with Pool() as pool:
|
||||
pool.map(self.process_entry, file_list)
|
||||
|
||||
def process_file(self, args):
|
||||
start_time = timer()
|
||||
final_result, tot_samples = self.process_entry([args.input, args.output])
|
||||
elapsed = timer() - start_time
|
||||
logging.info(f'''Execution time: {elapsed:.3f} sec; xRT: {format(tot_samples / 16000.0 / float(elapsed), '.3f')}''')
|
||||
Reference in New Issue
Block a user