#!/usr/bin/env python import json import os import os.path import re import subprocess import sys import time from multiprocessing import Pool import chardet os.chdir(os.path.dirname(os.path.realpath(__file__))) # Enable this to get files in the parent directory instead of the current directory. # os.chdir('..') class mopen: def __init__(self, filename, mode='r'): # TODO: Attempt checking encoding, and only fall back to utf-8 if it fails. # self.real_encoding = chardet.detect(open(filename, 'rb').read())['encoding'].lower() self.file_o = open(filename, mode, encoding="utf-8") def __getattr__(self, name): return getattr(self.file_o, name) def __enter__(self): return self.file_o def __exit__(self, type, value, traceback): self.file_o.close() def remove_styles(lines, styles): res = [] for line in lines: if line.startswith("Dialogue: "): style = line.split(',')[3] if style not in styles: continue res.append(line) return res def count_style_occurences(lines, style): res = 0 for line in lines: if line.startswith("Dialogue: "): line_style = line.split(',')[3] if style == line_style: res += 1 return res def get_example_lines(lines, style, sample=5): res = [] for line in lines: if line.startswith("Dialogue: "): line_style = line.split(',')[3] if style == line_style: res.append(line) sample -= 1 if sample == 0: return res return res def get_styles(lines): res = [] for line in lines: if line.startswith('Style: '): res.append(line[7:].split(',')[0]) return res def select_keep_styles(all_styles, lines): styles_occ = sorted([(s, count_style_occurences(lines, s)) for s in all_styles], key=lambda x: x[1], reverse=True) for i, s in enumerate(styles_occ): print("[{}] {} ({} lines use this style)".format(i, s[0], s[1])) if i < 3: print(''.join(get_example_lines(lines, s[0]))) keep_indices = input("Which sub styles to keep? Enter numbers separated by spaces or leave blank for all: ").split() if not keep_indices: return all_styles return [s[0] for i, s in enumerate(styles_occ) if str(i) in keep_indices] def extract_eng_subs(mkv, index=None): print("\nExtracting english subtitles, this may take a while...\n") mkv_json = json.loads(subprocess.check_output([ "ffprobe", "-v", "quiet", "-print_format", "json", "-show_streams", "-select_streams", "s", mkv ])) # Use the subtitle track as provided. If there is not one, # ask instead. all_streams = {s["index"]: s for s in mkv_json["streams"]} if index not in all_streams: index = None if index is None: if not mkv_json.get("streams"): raise Exception("No subtitle streams to extract? Can't do any syncing. {}".format(mkv)) elif len(mkv_json["streams"]) == 1: index = mkv_json["streams"][0]["index"] else: print("[id]: Tag Information") for s in mkv_json["streams"]: tags = 'Unknown' try: tags = str(s['tags']) except: pass print(f"[{s['index']}]:\n{tags}\n") index = int(input("Pick the stream to retime against: ")) # Extract stream = all_streams[index] codec_name = stream['codec_name'] if codec_name == "subrip": codec_name = "srt" if codec_name not in ("srt", "ssa", "ass", "idx"): print("\n\n") print("#" * 20) print("Embedded subtitle stream is an image based subtitle file.") print("These are not supported by Alass and cannot be used.") print("#" * 20) sys.exit(1) extracted = mkv.replace(".mkv", f".EXTRACTED.{codec_name}") subprocess.run(["mkvextract", "tracks", mkv, f"{index}:{extracted}"]) return index def fix_styling(): extracted_subs = [f for f in os.listdir() if '.EXTRACTED.' in f] if extracted_subs[0].split('.')[-1] != 'ass': print("Extracted subs are not .ASS format. Skipping style removal.") return all_lines = sum([mopen(f).readlines() for f in extracted_subs], []) all_styles = list(set(get_styles(all_lines))) keep_styles = select_keep_styles(all_styles, all_lines) for sub in extracted_subs: with mopen(sub) as ass: lines = ass.readlines() lines = remove_styles(lines, keep_styles) with open(sub, 'w', encoding="utf-8") as ass: ass.write('\r\n'.join(lines)) def retime_based_on_audio(mkv, srt, conf): retimed = mkv.replace(".mkv", ".ja.RETIMED.{}".format(conf['ext'])) subprocess.run(["alass-cli", "--split-penalty", conf['split_pen'], mkv, srt, retimed]) os.remove(srt) os.rename(retimed, mkv.replace(".mkv", ".ja.{}".format(conf['ext']))) def retime(mkv, srt, conf): retimed = mkv.replace(".mkv", ".ja.RETIMED.{}".format(conf['ext'])) extracted = [f for f in os.listdir() if mkv.replace(".mkv", ".EXTRACTED.") in f][0] print(["alass-cli", "--split-penalty", conf['split_pen'], extracted, srt, retimed]) subprocess.run( ["alass-cli", "--split-penalty", conf['split_pen'], extracted, srt, retimed]) os.remove(srt) os.rename(retimed, mkv.replace(".mkv", ".ja.{}".format(conf['ext']))) os.remove(extracted) def fix_common_errors(srt): lines = mopen(srt).readlines() lines = [x for i, x in enumerate(lines) if not (x.strip() == "" and i + 1 < len(lines) and not lines[i + 1].strip().isdigit())] with open(srt, 'w', encoding="utf-8") as subfile: subfile.write(''.join(lines)) def tryint(s): try: return int(s) except: return s def alphanum_key(s): return [tryint(c) for c in re.split('([0-9]+)', s)] if __name__ == '__main__': # Guess the local mkv/srt pairs that need syncing files = os.listdir() CONF = {} print('[1] .srt files (Default)') print('[2] .ass files') choice = input("What file type are your Japanese subs? ") if choice.strip() == "": choice = 1 CONF['ext'] = ["", "srt", "ass"][int(choice)] srts = sorted([f for f in files if f.endswith(f".{CONF['ext']}")], key=alphanum_key) mkvs = [f for f in files if f.endswith(".mkv")] while len(mkvs) > len(srts): print( f"Found more .mkv files than .{CONF['ext']} files. This can occur when OP/ED/extras are in the same " f"folder as episodes.") print("Please enter a pattern to filter out of mkv results. (e.g. OP)") pattern = input(">>> Pattern: ") removed = [m for m in mkvs if pattern in m] mkvs = [m for m in mkvs if pattern not in m] print("Ignoring filtered mkv files:\n{}".format("\n".join(removed))) mkvs = sorted([f for f in mkvs], key=alphanum_key) if len(mkvs) == 0 or len(mkvs) != len(srts): print(f"ERROR: Some .{CONF['ext']} files don't have a matching .mkv!") print(f"{len(srts)} {CONF['ext']} files found. {len(mkvs)} mkv files found") print(f"Ensure auto-sub-retimer folder is in same folder as your .mkv and .{CONF['ext']} files.") print(f"Ensure you have an equal number of .mkv and .{CONF['ext']} files!") input("press enter to exit...") exit(1) if len(sys.argv) > 1 and sys.argv[1] == "rename_only": for srt, mkv in zip(srts, mkvs): os.rename(srt, mkv.replace('.mkv', ".ja.{}".format(CONF['ext']))) input("Rename finished!") sys.exit(0) pool = Pool() print("[1] Retime using embedded subs. (Default)") print("[2] Retime using audio.") retime_choice = input("Enter desired option number: ") SPLIT_PEN = input("Split Penalty? Only change this if default resulted in mis-timed subs. (Default 7): ").strip() if not SPLIT_PEN: SPLIT_PEN = "7" CONF['split_pen'] = SPLIT_PEN for srt in srts: fix_common_errors(srt) if "2" in retime_choice: pool.starmap(retime_based_on_audio, zip(mkvs, srts, [CONF] * len(mkvs))) else: index = None for mkv in mkvs: index = extract_eng_subs(mkv, index) print("English Sub Extraction Complete.") fix_styling() pool.starmap(retime, zip(mkvs, srts, [CONF] * len(mkvs))) print("\nSuccess!\n") time.sleep(3)