Refactor file handling in check_files.py to improve clarity and maintainability. Introduce helper functions for input/output file checks and expected output naming. Update main logic to utilize these functions for better readability. Adjust output file handling in encoding scripts to ensure consistent .mp4 naming convention across all modules.

This commit is contained in:
2026-05-06 19:00:05 -06:00
parent d1221f83b3
commit 3d64eb3094
4 changed files with 70 additions and 49 deletions
+64 -43
View File
@@ -41,6 +41,15 @@ def get_file_info(file_path):
except Exception as e:
return None, f"Error running ffprobe: {str(e)}"
def expected_mp4_output_name(input_filename):
return Path(input_filename).stem + '.mp4'
def is_input_video_file(name):
return name.lower().endswith(('.mp4', '.mkv', '.mov'))
def is_output_mp4_file(name):
return name.lower().endswith('.mp4')
def check_file(input_path, output_path):
try:
input_info, input_error = get_file_info(input_path)
@@ -166,7 +175,7 @@ def main():
# Get list of files that exist in both directories, verifying each file exists
input_files = set()
for f in os.listdir(input_dir):
if f.endswith(('.mp4', '.DVR.mp4')):
if is_input_video_file(f):
full_path = os.path.join(input_dir, f)
if os.path.isfile(full_path):
input_files.add(f)
@@ -176,7 +185,7 @@ def main():
output_files = set()
for f in os.listdir(output_dir):
if f.endswith(('.mp4', '.DVR.mp4')):
if is_output_mp4_file(f):
full_path = os.path.join(output_dir, f)
if os.path.isfile(full_path):
output_files.add(f)
@@ -193,47 +202,59 @@ def main():
for f in sorted(output_files):
logging.info(f" {f}")
# Report missing files
missing_in_output = input_files - output_files
missing_in_input = output_files - input_files
if missing_in_output:
input_stems = {Path(f).stem for f in input_files}
# Report missing files (output is always {stem}.mp4 for encoder workflow)
missing_in_output_inputs = sorted(
inf for inf in input_files if expected_mp4_output_name(inf) not in output_files
)
missing_in_input_outputs = sorted(
outf for outf in output_files if Path(outf).stem not in input_stems
)
if missing_in_output_inputs:
print(f"\n{Colors.YELLOW}Files missing from output directory:{Colors.ENDC}")
for f in sorted(missing_in_output):
print(f" {f}")
logging.warning(f"File missing from output: {f}")
issues_found.append((f, "Missing from output"))
for inf in missing_in_output_inputs:
outp = expected_mp4_output_name(inf)
print(f" {inf} → expected {outp}")
logging.warning(f"File missing from output: {inf} (expected {outp})")
issues_found.append((inf, outp, "Missing from output"))
issue_counts["Missing from output"] += 1
if missing_in_input:
if missing_in_input_outputs:
print(f"\n{Colors.YELLOW}Files missing from input directory:{Colors.ENDC}")
for f in sorted(missing_in_input):
print(f" {f}")
logging.warning(f"File missing from input: {f}")
issues_found.append((f, "Missing from input"))
for outf in missing_in_input_outputs:
print(f" {outf}")
logging.warning(f"File missing from input: {outf}")
issues_found.append((None, outf, "Missing from input"))
issue_counts["Missing from input"] += 1
files_to_check = input_files.intersection(output_files)
logging.info(f"Number of matching files found: {len(files_to_check)}")
if not files_to_check:
msg = "No matching files found in both input and output directories"
pairs_to_check = []
for inf in sorted(input_files):
outp = expected_mp4_output_name(inf)
if outp in output_files:
pairs_to_check.append((inf, outp))
logging.info(f"Number of input/output pairs to verify: {len(pairs_to_check)}")
if not pairs_to_check and not missing_in_output_inputs and not missing_in_input_outputs:
msg = "No input video files and no mismatches to report"
print(f"\n{Colors.YELLOW}{msg}{Colors.ENDC}")
logging.warning(msg)
return
for input_file in sorted(files_to_check):
for input_file, output_file in pairs_to_check:
input_path = os.path.join(input_dir, input_file)
output_path = os.path.join(output_dir, input_file)
print(f"\nChecking: {input_file}")
logging.info(f"Checking file: {input_file}")
output_path = os.path.join(output_dir, output_file)
print(f"\nChecking: {input_file}{output_file}")
logging.info(f"Checking file: {input_file} vs output {output_file}")
issue = check_file(input_path, output_path)
if issue:
issues_found.append((input_file, issue))
issues_found.append((input_file, output_file, issue))
print(f"{Colors.RED}ISSUE FOUND: {issue}{Colors.ENDC}")
logging.warning(f"Issue with {input_file}: {issue}")
logging.warning(f"Issue with {input_file}{output_file}: {issue}")
# Count the type of issue
if "Duration mismatch" in issue:
@@ -250,8 +271,8 @@ def main():
issue_counts["Other errors"] += 1
else:
print(f"{Colors.GREEN}No issues found{Colors.ENDC}")
logging.info(f"No issues found for {input_file}")
logging.info(f"No issues found for {input_file}{output_file}")
# Print summary
print("\nSummary of issues found:")
if issues_found:
@@ -261,8 +282,9 @@ def main():
print(f"{issue_type}: {color}{count}{Colors.ENDC}")
print("\nDetailed issues:")
for file, issue in issues_found:
print(f"\n{file}:")
for in_base, out_base, issue in issues_found:
label = f"{in_base}{out_base}" if in_base else str(out_base)
print(f"\n{label}:")
print(f"{Colors.RED} {issue}{Colors.ENDC}")
# Prompt to delete problem files
@@ -273,23 +295,22 @@ def main():
deleted_count = 0
failed_count = 0
for file, issue in issues_found:
# Skip "Missing from output" - nothing to delete
for in_base, out_base, issue in issues_found:
if issue == "Missing from output":
continue
# Delete output file for all other issues
output_path = os.path.join(output_dir, file)
if not out_base:
continue
output_path = os.path.join(output_dir, out_base)
if os.path.exists(output_path):
try:
os.remove(output_path)
deleted_count += 1
logging.info(f"Deleted problematic output file: {output_path} (Issue: {issue})")
print(f"{Colors.GREEN}Deleted: {file}{Colors.ENDC}")
print(f"{Colors.GREEN}Deleted: {out_base}{Colors.ENDC}")
except Exception as e:
failed_count += 1
logging.error(f"Failed to delete {output_path}: {str(e)}")
print(f"{Colors.RED}Failed to delete {file}: {str(e)}{Colors.ENDC}")
print(f"{Colors.RED}Failed to delete {out_base}: {str(e)}{Colors.ENDC}")
print(f"\n{Colors.GREEN}Deleted {deleted_count} file(s){Colors.ENDC}")
if failed_count > 0: