Files
GigaMux/encode_VOD_pyro.py
T

447 lines
19 KiB
Python

import hashlib
import os
import re
import subprocess
import sys
from pathlib import Path
import json
import logging
from datetime import datetime
import shutil
import time
# Distributed mode requires tqdm and ffmpeg_distributed.py (SSH, Unix select.poll); on Windows use WSL or Linux.
# Workers = (ssh_host, gpu_index). Unraid (GuiltsCurse, Godzilla) excluded; RenderScrap has 2 GPUs.
DISTRIBUTED_WORKERS_DEFAULT = [
("Pyro", 0),
("RenderScrap", 0),
("RenderScrap", 1),
("PostIrony", 0),
]
DISTRIBUTED_REMOTE_ARGS_DEFAULT = "-c:v hevc_nvenc -preset p7 -tune hq -rc vbr -rc-lookahead 32 -spatial-aq 1 -aq-strength 15 -cq 0 -b:v 9000k -maxrate 9000k -bufsize 18000k -an"
DISTRIBUTED_SEGMENT_SECONDS = 60
def _parse_workers_env(s):
"""Parse DISTRIBUTED_WORKERS e.g. 'Pyro:0,RenderScrap:0,RenderScrap:1,PostIrony:0' -> [(host, gpu_id), ...]."""
out = []
for part in (s or "").strip().split(","):
part = part.strip()
if not part:
continue
if ":" in part:
host, gpu = part.rsplit(":", 1)
try:
out.append((host.strip(), int(gpu.strip())))
except ValueError:
pass
else:
out.append((part, 0))
return out
# ANSI color codes
class Colors:
PURPLE = '\033[95m'
BLUE = '\033[94m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
RED = '\033[91m'
ENDC = '\033[0m'
def get_gpu_selection():
while True:
print(f"\n{Colors.BLUE}Select GPU slot:{Colors.ENDC}")
print("0 - First GPU")
print("1 - Second GPU")
print("2 - Third GPU")
gpu = input(f"{Colors.YELLOW}Enter GPU number (0-2):{Colors.ENDC} ").strip()
if gpu in ['0', '1', '2']:
return gpu
print(f"{Colors.RED}Invalid selection. Please try again.{Colors.ENDC}")
# Custom file handler that silently handles I/O errors (for network shares)
class SafeFileHandler(logging.FileHandler):
"""File handler that silently handles I/O errors during flush"""
def flush(self):
"""Override flush to silently handle I/O errors"""
try:
super().flush()
except (OSError, IOError):
# Silently ignore I/O errors (network share issues)
pass
except Exception:
# Silently ignore all other errors during flush
pass
def emit(self, record):
"""Override emit to handle errors gracefully"""
try:
super().emit(record)
except (OSError, IOError):
# Silently ignore I/O errors - we'll fall back to console output
self.handleError(record)
except Exception:
# Handle other errors
self.handleError(record)
def handleError(self, record):
"""Override to prevent error messages from being printed"""
# Don't print "--- Logging error ---" messages
pass
# Set up logging
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
log_file = os.path.join(log_dir, f"encode_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
# Configure logging with custom handler that handles network share errors
handler = SafeFileHandler(log_file, mode='w', encoding='utf-8')
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.addHandler(handler)
# Remove default handlers to avoid duplicate output
logger.handlers = [handler]
def get_file_info(input_file):
cmd = [
'ffprobe',
'-v', 'error',
'-show_entries', 'format=duration,size:stream=codec_type,codec_name,width,height,r_frame_rate,channels,channel_layout',
'-of', 'json',
input_file
]
result = subprocess.run(cmd, capture_output=True, text=True)
return json.loads(result.stdout)
def get_audio_labels(input_file):
cmd = [
'ffprobe',
'-v', 'error',
'-select_streams', 'a',
'-show_entries', 'stream=index:stream_tags=title',
'-of', 'json',
input_file
]
result = subprocess.run(cmd, capture_output=True, text=True)
info = json.loads(result.stdout)
labels = []
for stream in info.get('streams', []):
title = stream.get('tags', {}).get('title', None)
labels.append(title)
return labels
def parse_fps(r_frame_rate):
"""Parse ffprobe r_frame_rate e.g. '60/1' or '30000/1001' to float."""
if not r_frame_rate:
return None
parts = str(r_frame_rate).strip().split('/')
if len(parts) == 2 and float(parts[1]) != 0:
return float(parts[0]) / float(parts[1])
try:
return float(parts[0])
except (ValueError, IndexError):
return None
def format_size(size_bytes):
for unit in ['B', 'KB', 'MB', 'GB']:
if size_bytes < 1024:
return f"{size_bytes:.2f} {unit}"
size_bytes /= 1024
return f"{size_bytes:.2f} TB"
def safe_log_info(message, print_msg=None):
"""Safely log info message, ensuring console output even if logging fails"""
try:
logging.info(message)
except (OSError, IOError) as e:
# Logging failed (likely network share issue) - print to console
if print_msg is None:
print(f"{Colors.YELLOW}[Logging failed: {e}] {message}{Colors.ENDC}")
else:
print(print_msg)
print(f"{Colors.YELLOW}[Logging failed: {e}]{Colors.ENDC}")
return
except Exception as e:
# Other logging errors
if print_msg is None:
print(f"{Colors.YELLOW}[Logging error: {e}] {message}{Colors.ENDC}")
else:
print(print_msg)
print(f"{Colors.YELLOW}[Logging error: {e}]{Colors.ENDC}")
return
# Always print to console if message provided
if print_msg is not None:
print(print_msg)
def safe_log_error(message, print_msg=None):
"""Safely log error message, ensuring console output even if logging fails"""
try:
logging.error(message)
except (OSError, IOError) as e:
# Logging failed (likely network share issue) - print to console
if print_msg is None:
print(f"{Colors.RED}[Logging failed: {e}] {message}{Colors.ENDC}")
else:
print(print_msg)
print(f"{Colors.RED}[Logging failed: {e}]{Colors.ENDC}")
return
except Exception as e:
# Other logging errors
if print_msg is None:
print(f"{Colors.RED}[Logging error: {e}] {message}{Colors.ENDC}")
else:
print(print_msg)
print(f"{Colors.RED}[Logging error: {e}]{Colors.ENDC}")
return
# Always print to console if message provided
if print_msg is not None:
print(print_msg)
else:
print(f"{Colors.RED}{message}{Colors.ENDC}")
def encode_dvr(input_file, output_dir, gpu):
input_path = Path(input_file)
output_path = Path(output_dir) / f"{input_path.stem}{input_path.suffix}"
# Get file info for logging
file_info = get_file_info(str(input_path))
input_size = int(file_info['format']['size'])
duration = float(file_info['format']['duration'])
safe_log_info(f"Processing file: {input_path}")
safe_log_info(f"Input size: {format_size(input_size)}")
safe_log_info(f"Duration: {duration:.2f} seconds")
print(f"\n{Colors.BLUE}Processing file: {input_path}{Colors.ENDC}")
print(f"Input size: {format_size(input_size)}")
print(f"Duration: {duration:.2f} seconds")
# Log stream information
for i, stream in enumerate(file_info.get('streams', [])):
stream_type = 'Video' if stream.get('codec_name', '').startswith('h') else 'Audio'
safe_log_info(f"Stream {i} ({stream_type}):")
for key, value in stream.items():
safe_log_info(f" {key}: {value}")
# Skip if output file already exists
if output_path.exists():
output_size = output_path.stat().st_size
safe_log_info(f"Skipping {input_path} - output already exists: {output_path}")
safe_log_info(f"Output size: {format_size(output_size)}")
print(f"{Colors.YELLOW}Skipping {input_path} - output already exists{Colors.ENDC}")
return
# Get audio labels and input FPS (for speed display when stderr is piped)
audio_labels = get_audio_labels(str(input_path))
safe_log_info(f"Audio labels: {audio_labels}")
input_fps = None
for s in file_info.get('streams', []):
if s.get('codec_type') == 'video':
input_fps = parse_fps(s.get('r_frame_rate'))
break
if input_fps is None:
# Fallback: first stream with r_frame_rate (e.g. codec_type not in probe)
for s in file_info.get('streams', []):
if 'width' in s or s.get('codec_name', '').startswith(('h', 'm')):
input_fps = parse_fps(s.get('r_frame_rate'))
break
if input_fps is None:
for s in file_info.get('streams', []):
input_fps = parse_fps(s.get('r_frame_rate'))
if input_fps and input_fps > 0:
break
# FFmpeg command with NVIDIA HEVC encoder and maximum quality
cmd = [
'ffmpeg',
'-v', 'info', # Lower verbosity to reduce noise
'-stats', # Emit periodic stats
'-stats_period', '1.0', # Update stats every 1s (more stable)
'-i', str(input_path),
'-c:v', 'hevc_nvenc',
'-gpu', gpu,
'-preset', 'p7',
'-tune', 'hq',
'-rc', 'vbr',
'-rc-lookahead', '32',
'-spatial-aq', '1',
'-aq-strength', '15',
'-cq', '0',
'-b:v', '9000k',
'-maxrate', '9000k',
'-bufsize', '18000k',
'-c:a', 'copy',
'-map', '0',
]
# Add metadata for each audio stream if label exists
for idx, label in enumerate(audio_labels):
if label:
cmd += [f'-metadata:s:a:{idx}', f'title={label}']
cmd.append(str(output_path))
try:
# Run FFmpeg and capture combined output (avoid dual-pipe deadlocks on Windows)
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=True,
bufsize=1
)
# Stream output line-by-line; compute speed when stderr is piped (FFmpeg shows speed=N/A)
for line in iter(process.stdout.readline, ''):
if not line:
break
text = line.strip().strip('\r')
try:
if text.startswith('frame=') or ' fps=' in text:
display = text
m_frame = re.search(r'frame=\s*(\d+)', text)
m_elapsed = re.search(r'elapsed=(\d+):(\d+):(\d+\.?\d*)', text)
m_size = re.search(r'size=\s*(\d+)\s*KiB', text)
if m_frame and m_elapsed:
frames = int(m_frame.group(1))
h, m, s = float(m_elapsed.group(1)), float(m_elapsed.group(2)), float(m_elapsed.group(3))
elapsed_sec = h * 3600 + m * 60 + s
if elapsed_sec > 0:
if input_fps and input_fps > 0:
speed_x = (frames / input_fps) / elapsed_sec
display = re.sub(r'speed=N/A', f'speed={speed_x:.2f}x', text)
# time = output position (HH:MM:SS.ms)
video_sec = frames / input_fps
t_h = int(video_sec // 3600)
t_m = int((video_sec % 3600) // 60)
t_s = video_sec % 60
time_str = f'{t_h}:{t_m:02d}:{t_s:06.3f}' if t_h else f'0:{t_m:02d}:{t_s:06.3f}'
display = re.sub(r'time=N/A', f'time={time_str}', display)
# bitrate when muxer has written data (stays N/A until size > 0)
size_kib = int(m_size.group(1)) if m_size else 0
if size_kib > 0 and elapsed_sec > 0:
bitrate_kbps = (size_kib * 8192) / (elapsed_sec * 1000)
display = re.sub(r'bitrate=N/A', f'bitrate={bitrate_kbps:.0f}kbits/s', display)
else:
enc_fps = frames / elapsed_sec
display = re.sub(r'speed=N/A', f'speed={enc_fps:.0f}fps', text)
safe_log_info(f"Progress: {text}", f"{Colors.PURPLE}Progress: {display}{Colors.ENDC}")
else:
safe_log_info(f"FFmpeg: {text}", f"{Colors.GREEN}FFmpeg: {text}{Colors.ENDC}")
except (OSError, IOError) as e:
# I/O error reading from pipe - log it
safe_log_error(f"I/O error reading FFmpeg output: {e}")
break
except Exception as e:
# Unexpected error
safe_log_error(f"Unexpected error processing FFmpeg output: {e}")
process.wait()
if process.returncode == 0:
# Get output file info
output_info = get_file_info(str(output_path))
output_size = int(output_info['format']['size'])
compression_ratio = input_size / output_size if output_size > 0 else 0
safe_log_info(f"Successfully encoded: {output_path}", f"{Colors.GREEN}Successfully encoded: {output_path}{Colors.ENDC}")
safe_log_info(f"Output size: {format_size(output_size)}")
safe_log_info(f"Compression ratio: {compression_ratio:.2f}x", f"Compression ratio: {compression_ratio:.2f}x")
else:
# Convert Windows error code to signed integer if needed
return_code = process.returncode
if return_code > 2147483647: # If it's a large unsigned int, convert to signed
return_code = return_code - 4294967296
safe_log_error(f"FFmpeg process failed with return code {return_code}",
f"{Colors.RED}FFmpeg process failed with return code {return_code}{Colors.ENDC}")
except subprocess.CalledProcessError as e:
safe_log_error(f"Error encoding {input_path}: {e}", f"{Colors.RED}Error encoding {input_path}: {e}{Colors.ENDC}")
except Exception as e:
safe_log_error(f"Unexpected error encoding {input_path}: {type(e).__name__}: {e}",
f"{Colors.RED}Unexpected error encoding {input_path}: {e}{Colors.ENDC}")
def encode_dvr_distributed(input_file, output_dir, workers, segment_seconds=60, remote_args=None, concat_args="-c:a copy", probe_host=None, probe_path=None, remote_ffmpeg_path=None):
"""Encode one file using ffmpeg_distributed (split -> farm -> concat). workers = [(host, gpu_id), ...].
Segment temp dirs go under script dir/tmp/. If probe_host and probe_path are set, ffprobe runs there (faster when input is on NAS)."""
input_path = Path(input_file).resolve()
output_path = (Path(output_dir) / f"{input_path.stem}{input_path.suffix}").resolve()
if output_path.exists():
safe_log_info(f"Skipping {input_path} - output already exists: {output_path}")
print(f"{Colors.YELLOW}Skipping {input_path} - output already exists{Colors.ENDC}")
return
remote_args = remote_args or os.environ.get("DISTRIBUTED_REMOTE_ARGS", DISTRIBUTED_REMOTE_ARGS_DEFAULT)
probe_host = probe_host or os.environ.get("PROBE_HOST")
if probe_path is None and probe_host and os.environ.get("PROBE_PATH_PREFIX"):
prefix = os.environ.get("PROBE_PATH_PREFIX", "").rstrip("/")
probe_path = f"{prefix}/{input_path.name}"
script_dir = Path(__file__).resolve().parent
tmp_base = script_dir / "tmp"
tmp_base.mkdir(exist_ok=True)
path_for_hash = os.path.abspath(os.path.expanduser(str(input_path)))
segment_hash = hashlib.md5(path_for_hash.encode()).hexdigest()
tmp_dir = str(tmp_base / f"ffmpeg_segments_{segment_hash}")
cwd = os.getcwd()
try:
os.chdir(output_dir)
from ffmpeg_distributed import encode as distributed_encode
safe_log_info(f"Distributed encode: {input_path} -> {output_path} (workers: {workers})")
print(f"{Colors.BLUE}Distributed encode (HEVC): {input_path.name}{Colors.ENDC}")
remote_ffmpeg = remote_ffmpeg_path or os.environ.get("DISTRIBUTED_REMOTE_FFMPEG_PATH")
ok = distributed_encode(
workers,
str(input_path),
str(output_path),
segment_seconds=segment_seconds,
remote_args=remote_args,
concat_args=concat_args,
tmp_dir=tmp_dir,
probe_host=probe_host,
probe_path=probe_path,
remote_ffmpeg_path=remote_ffmpeg,
)
if ok and output_path.exists():
safe_log_info(f"Successfully encoded: {output_path}", f"{Colors.GREEN}Successfully encoded: {output_path}{Colors.ENDC}")
else:
safe_log_error("Distributed encode did not produce output (see [4/4] ERROR above)", f"{Colors.RED}Distributed encode did not produce output{Colors.ENDC}")
except Exception as e:
safe_log_error(f"Distributed encode failed: {e}", f"{Colors.RED}Distributed encode failed: {e}{Colors.ENDC}")
finally:
os.chdir(cwd)
if __name__ == "__main__":
if sys.platform == "win32":
print(f"{Colors.YELLOW}Distributed mode uses select.poll() and may fail on Windows; use WSL or Linux for best results.{Colors.ENDC}")
input_dir = "input"
output_dir = "output"
os.makedirs(output_dir, exist_ok=True)
workers_str = os.environ.get("DISTRIBUTED_WORKERS")
if workers_str:
workers = _parse_workers_env(workers_str)
else:
workers = DISTRIBUTED_WORKERS_DEFAULT
workers_desc = ", ".join(f"{h}:gpu{g}" for h, g in workers)
print(f"{Colors.BLUE}Using workers: {workers_desc}{Colors.ENDC}")
safe_log_info(f"Distributed mode; workers: {workers}")
files = [f for f in os.listdir(input_dir) if f.endswith(('.mp4', '.DVR.mp4'))]
total_files = len(files)
if total_files == 0:
safe_log_info("No files to process in input directory", f"{Colors.YELLOW}No files to process in input directory{Colors.ENDC}")
else:
safe_log_info(f"Found {total_files} files to process", f"{Colors.BLUE}Found {total_files} files to process{Colors.ENDC}")
for i, file in enumerate(files, 1):
input_file = os.path.join(input_dir, file)
safe_log_info(f"Processing file {i}/{total_files}: {file}")
print(f"\n{Colors.BLUE}Processing file {i}/{total_files}: {file}{Colors.ENDC}")
encode_dvr_distributed(input_file, output_dir, workers, segment_seconds=DISTRIBUTED_SEGMENT_SECONDS)