summaryrefslogtreecommitdiff
path: root/subsync.py
diff options
context:
space:
mode:
Diffstat (limited to 'subsync.py')
-rw-r--r--subsync.py462
1 files changed, 231 insertions, 231 deletions
diff --git a/subsync.py b/subsync.py
index cdd52c2..8473429 100644
--- a/subsync.py
+++ b/subsync.py
@@ -1,231 +1,231 @@
-#!/usr/bin/env python
-
-import os
-import subprocess
-import sys
-import json
-import os.path
-import time
-import re
-from pprint import pprint
-from multiprocessing import Pool
-from collections import defaultdict
-import chardet
-
-os.chdir(os.path.dirname(os.path.realpath(__file__)))
-os.chdir('..')
-
-class mopen:
- def __init__(self, filename, mode='r', **kwargs):
- self.real_encoding = chardet.detect(open(filename, 'rb').read())['encoding'].lower()
- self.file_o = open(filename, mode, encoding=self.real_encoding)
- def __getattr__(self, name):
- return getattr(self.file_o, name)
- def __enter__(self):
- return self.file_o
- def __exit__(self, type, value, traceback):
- self.file_o.close()
-
-
-def get_lib_folder():
- return "auto-sub-retimer"
-
-def remove_styles(lines, styles):
- res = []
- for line in lines:
- if line.startswith("Dialogue: "):
- style = line.split(',')[3]
- if style not in styles:
- continue
- res.append(line)
- return res
-
-def count_style_occurences(lines, style):
- res = 0
- for line in lines:
- if line.startswith("Dialogue: "):
- line_style = line.split(',')[3]
- if style == line_style:
- res += 1
- return res
-
-def get_example_lines(lines, style, sample=5):
- res = []
- for line in lines:
- if line.startswith("Dialogue: "):
- line_style = line.split(',')[3]
- if style == line_style:
- res.append(line)
- sample -= 1
- if sample == 0:
- return res
- return res
-
-def get_styles(lines):
- res = []
- for line in lines:
- if line.startswith('Style: '):
- res.append(line[7:].split(',')[0])
- return res
-
-def select_keep_styles(all_styles, lines):
- styles_occ = sorted([(s, count_style_occurences(lines, s)) for s in all_styles], key=lambda x: x[1], reverse=True)
- for i, s in enumerate(styles_occ):
- print("[{}] {} ({} lines use this style)".format(i, s[0], s[1]))
- if i < 3:
- print(''.join(get_example_lines(lines, s[0])))
- keep_indices = input("Which sub styles to keep? Enter numbers separated by spaces or leave blank for all: ").split()
- if not keep_indices:
- return all_styles
- return [s[0] for i, s in enumerate(styles_occ) if str(i) in keep_indices]
-
-def extract_eng_subs(mkv, index=None):
- print("\nExtracting english subtitles, this may take a while...\n")
- mkv_json = json.loads(subprocess.check_output([
- os.path.join(get_lib_folder(), 'ffmpeg', 'bin', 'ffprobe'),
- "-v",
- "quiet",
- "-print_format",
- "json",
- "-show_streams",
- "-select_streams",
- "s",
- mkv
- ]))
-
- # Use the subtitle track as provided. If there is not one,
- # ask instead.
- all_streams = {s["index"]: s for s in mkv_json["streams"]}
- if index not in all_streams:
- index = None
- if index is None:
- if not mkv_json.get("streams"):
- raise Exception("No subtitle streams to extract? Can't do any syncing. {}".format(mkv))
- elif len(mkv_json["streams"]) == 1:
- index = mkv_json["streams"][0]["index"]
- else:
- print("[id]: Tag Information")
- for s in mkv_json["streams"]:
- tags = 'Unknown'
- try:
- tags = str(s['tags'])
- except:
- pass
- print(f"[{s['index']}]:\n{tags}\n")
- index = int(input("Pick the stream to retime against: "))
-
- # Extract
- stream = all_streams[index]
- codec_name = stream['codec_name']
- if codec_name == "subrip": codec_name = "srt"
- if codec_name not in ("srt", "ssa", "ass", "idx"):
- print("\n\n")
- print("#"*20)
- print("Embedded subtitle stream is an image based subtitle file.")
- print("These are not supported by Alass and cannot be used.")
- print("#"*20)
- sys.exit(1)
- extracted = mkv.replace(".mkv", f".EXTRACTED.{codec_name}")
- subprocess.run([os.path.join(get_lib_folder(), "mkvtoolnix\\mkvextract.exe"), "tracks", mkv, f"{index}:{extracted}"])
- return index
-
-def fix_styling():
- extracted_subs = [f for f in os.listdir() if '.EXTRACTED.' in f]
- if extracted_subs[0].split('.')[-1] != 'ass':
- print("Extracted subs are not .ASS format. Skipping style removal.")
- return
- all_lines = sum([mopen(f, encoding="utf-8").readlines() for f in extracted_subs], [])
- all_styles = list(set(get_styles(all_lines)))
- keep_styles = select_keep_styles(all_styles, all_lines)
- for sub in extracted_subs:
- with mopen(sub, encoding="utf-8") as ass:
- lines = ass.readlines()
- lines = remove_styles(lines, keep_styles)
- with open(sub, 'w', encoding="utf-8") as ass:
- ass.write('\r\n'.join(lines))
-
-def retime_based_on_audio(mkv, srt, conf):
- retimed = mkv.replace(".mkv", ".ja.RETIMED.{}".format(conf['ext']))
- subprocess.run([os.path.join(get_lib_folder(), "alass\\alass.bat"), "--split-penalty", conf['split_pen'], mkv, srt, retimed])
- os.remove(srt)
- os.rename(retimed, mkv.replace(".mkv", ".ja.{}".format(conf['ext'])))
-
-def retime(mkv, srt, conf):
- retimed = mkv.replace(".mkv", ".ja.RETIMED.{}".format(conf['ext']))
- extracted = [f for f in os.listdir() if mkv.replace(".mkv", ".EXTRACTED.") in f][0]
- print([os.path.join(get_lib_folder(), "alass\\alass.bat"), "--split-penalty", conf['split_pen'], extracted, srt, retimed])
- subprocess.run([os.path.join(get_lib_folder(), "alass\\alass.bat"), "--split-penalty", conf['split_pen'], extracted, srt, retimed])
- os.remove(srt)
- os.rename(retimed, mkv.replace(".mkv", ".ja.{}".format(conf['ext'])))
- os.remove(extracted)
-
-def fix_common_errors(srt):
- lines = mopen(srt, encoding='utf-8').readlines()
- lines = [x for i, x in enumerate(lines) if not (x.strip() == "" and i+1 < len(lines) and not lines[i+1].strip().isdigit())]
- with open(srt, 'w', encoding="utf-8") as subfile:
- subfile.write(''.join(lines))
-
-def tryint(s):
- try:
- return int(s)
- except:
- return s
-
-def alphanum_key(s):
- return [ tryint(c) for c in re.split('([0-9]+)', s) ]
-
-if __name__ == '__main__':
- # Guess the local mkv/srt pairs that need syncing
- files = os.listdir()
- CONF = {}
- print('[1] .srt files (Default)')
- print('[2] .ass files')
- choice = input("What file type are your Japanese subs? ")
- if choice.strip() == "":
- choice = 1
- CONF['ext'] = ["", "srt", "ass"][int(choice)]
- srts = sorted([f for f in files if f.endswith(f".{CONF['ext']}")], key=alphanum_key)
- mkvs = [f for f in files if f.endswith(".mkv")]
- while len(mkvs) > len(srts):
- print(f"Found more .mkv files than .{CONF['ext']} files. This can occur when OP/ED/extras are in the same folder as episodes.")
- print("Please enter a pattern to filter out of mkv results. (e.g. OP)")
- pattern = input(">>> Pattern: ")
- removed = [m for m in mkvs if pattern in m]
- mkvs = [m for m in mkvs if pattern not in m]
- print("Ignoring filtered mkv files:\n{}".format("\n".join(removed)))
- mkvs = sorted([f for f in mkvs], key=alphanum_key)
- if len(mkvs) == 0 or len(mkvs) != len(srts):
- print(f"ERROR: Some .{CONF['ext']} files don't have a matching .mkv!")
- print(f"{len(srts)} {CONF['ext']} files found. {len(mkvs)} mkv files found")
- print(f"Ensure auto-sub-retimer folder is in same folder as your .mkv and .{CONF['ext']} files.")
- print(f"Ensure you have an equal number of .mkv and .{CONF['ext']} files!")
- input("press enter to exit...")
- exit(1)
- if len(sys.argv) > 1 and sys.argv[1] == "rename_only":
- for srt, mkv in zip(srts, mkvs):
- os.rename(srt, mkv.replace('.mkv', ".ja.{}".format(CONF['ext'])))
- input("Rename finished!")
- sys.exit(0)
- pool = Pool()
- print("[1] Retime using embedded subs. (Default)")
- print("[2] Retime using audio.")
- retime_choice = input("Enter desired option number: ")
- SPLIT_PEN = input("Split Penalty? Only change this if default resulted in mis-timed subs. (Default 7): ").strip()
- if not SPLIT_PEN:
- SPLIT_PEN = "7"
- CONF['split_pen'] = SPLIT_PEN
- for srt in srts:
- fix_common_errors(srt)
- if "2" in retime_choice:
- pool.starmap(retime_based_on_audio, zip(mkvs, srts, [CONF]*len(mkvs)))
- else:
- index = None
- for mkv in mkvs:
- index = extract_eng_subs(mkv, index)
- #index = extract_eng_subs(mkvs[0])
- #pool.starmap(extract_eng_subs, [(x, index) for x in mkvs])
- print("English Sub Extraction Complete.")
- fix_styling()
- pool.starmap(retime, zip(mkvs, srts, [CONF]*len(mkvs)))
- print("\nSuccess!\n")
- time.sleep(3)
+#!/usr/bin/env python
+
+import os
+import subprocess
+import sys
+import json
+import os.path
+import time
+import re
+from pprint import pprint
+from multiprocessing import Pool
+from collections import defaultdict
+import chardet
+
+os.chdir(os.path.dirname(os.path.realpath(__file__)))
+os.chdir('..')
+
+class mopen:
+ def __init__(self, filename, mode='r', **kwargs):
+ self.real_encoding = chardet.detect(open(filename, 'rb').read())['encoding'].lower()
+ self.file_o = open(filename, mode, encoding=self.real_encoding)
+ def __getattr__(self, name):
+ return getattr(self.file_o, name)
+ def __enter__(self):
+ return self.file_o
+ def __exit__(self, type, value, traceback):
+ self.file_o.close()
+
+
+def get_lib_folder():
+ return "auto-sub-retimer"
+
+def remove_styles(lines, styles):
+ res = []
+ for line in lines:
+ if line.startswith("Dialogue: "):
+ style = line.split(',')[3]
+ if style not in styles:
+ continue
+ res.append(line)
+ return res
+
+def count_style_occurences(lines, style):
+ res = 0
+ for line in lines:
+ if line.startswith("Dialogue: "):
+ line_style = line.split(',')[3]
+ if style == line_style:
+ res += 1
+ return res
+
+def get_example_lines(lines, style, sample=5):
+ res = []
+ for line in lines:
+ if line.startswith("Dialogue: "):
+ line_style = line.split(',')[3]
+ if style == line_style:
+ res.append(line)
+ sample -= 1
+ if sample == 0:
+ return res
+ return res
+
+def get_styles(lines):
+ res = []
+ for line in lines:
+ if line.startswith('Style: '):
+ res.append(line[7:].split(',')[0])
+ return res
+
+def select_keep_styles(all_styles, lines):
+ styles_occ = sorted([(s, count_style_occurences(lines, s)) for s in all_styles], key=lambda x: x[1], reverse=True)
+ for i, s in enumerate(styles_occ):
+ print("[{}] {} ({} lines use this style)".format(i, s[0], s[1]))
+ if i < 3:
+ print(''.join(get_example_lines(lines, s[0])))
+ keep_indices = input("Which sub styles to keep? Enter numbers separated by spaces or leave blank for all: ").split()
+ if not keep_indices:
+ return all_styles
+ return [s[0] for i, s in enumerate(styles_occ) if str(i) in keep_indices]
+
+def extract_eng_subs(mkv, index=None):
+ print("\nExtracting english subtitles, this may take a while...\n")
+ mkv_json = json.loads(subprocess.check_output([
+ os.path.join(get_lib_folder(), 'ffmpeg', 'bin', 'ffprobe'),
+ "-v",
+ "quiet",
+ "-print_format",
+ "json",
+ "-show_streams",
+ "-select_streams",
+ "s",
+ mkv
+ ]))
+
+ # Use the subtitle track as provided. If there is not one,
+ # ask instead.
+ all_streams = {s["index"]: s for s in mkv_json["streams"]}
+ if index not in all_streams:
+ index = None
+ if index is None:
+ if not mkv_json.get("streams"):
+ raise Exception("No subtitle streams to extract? Can't do any syncing. {}".format(mkv))
+ elif len(mkv_json["streams"]) == 1:
+ index = mkv_json["streams"][0]["index"]
+ else:
+ print("[id]: Tag Information")
+ for s in mkv_json["streams"]:
+ tags = 'Unknown'
+ try:
+ tags = str(s['tags'])
+ except:
+ pass
+ print(f"[{s['index']}]:\n{tags}\n")
+ index = int(input("Pick the stream to retime against: "))
+
+ # Extract
+ stream = all_streams[index]
+ codec_name = stream['codec_name']
+ if codec_name == "subrip": codec_name = "srt"
+ if codec_name not in ("srt", "ssa", "ass", "idx"):
+ print("\n\n")
+ print("#"*20)
+ print("Embedded subtitle stream is an image based subtitle file.")
+ print("These are not supported by Alass and cannot be used.")
+ print("#"*20)
+ sys.exit(1)
+ extracted = mkv.replace(".mkv", f".EXTRACTED.{codec_name}")
+ subprocess.run([os.path.join(get_lib_folder(), "mkvtoolnix\\mkvextract.exe"), "tracks", mkv, f"{index}:{extracted}"])
+ return index
+
+def fix_styling():
+ extracted_subs = [f for f in os.listdir() if '.EXTRACTED.' in f]
+ if extracted_subs[0].split('.')[-1] != 'ass':
+ print("Extracted subs are not .ASS format. Skipping style removal.")
+ return
+ all_lines = sum([mopen(f, encoding="utf-8").readlines() for f in extracted_subs], [])
+ all_styles = list(set(get_styles(all_lines)))
+ keep_styles = select_keep_styles(all_styles, all_lines)
+ for sub in extracted_subs:
+ with mopen(sub, encoding="utf-8") as ass:
+ lines = ass.readlines()
+ lines = remove_styles(lines, keep_styles)
+ with open(sub, 'w', encoding="utf-8") as ass:
+ ass.write('\r\n'.join(lines))
+
+def retime_based_on_audio(mkv, srt, conf):
+ retimed = mkv.replace(".mkv", ".ja.RETIMED.{}".format(conf['ext']))
+ subprocess.run([os.path.join(get_lib_folder(), "alass\\alass.bat"), "--split-penalty", conf['split_pen'], mkv, srt, retimed])
+ os.remove(srt)
+ os.rename(retimed, mkv.replace(".mkv", ".ja.{}".format(conf['ext'])))
+
+def retime(mkv, srt, conf):
+ retimed = mkv.replace(".mkv", ".ja.RETIMED.{}".format(conf['ext']))
+ extracted = [f for f in os.listdir() if mkv.replace(".mkv", ".EXTRACTED.") in f][0]
+ print([os.path.join(get_lib_folder(), "alass\\alass.bat"), "--split-penalty", conf['split_pen'], extracted, srt, retimed])
+ subprocess.run([os.path.join(get_lib_folder(), "alass\\alass.bat"), "--split-penalty", conf['split_pen'], extracted, srt, retimed])
+ os.remove(srt)
+ os.rename(retimed, mkv.replace(".mkv", ".ja.{}".format(conf['ext'])))
+ os.remove(extracted)
+
+def fix_common_errors(srt):
+ lines = mopen(srt, encoding='utf-8').readlines()
+ lines = [x for i, x in enumerate(lines) if not (x.strip() == "" and i+1 < len(lines) and not lines[i+1].strip().isdigit())]
+ with open(srt, 'w', encoding="utf-8") as subfile:
+ subfile.write(''.join(lines))
+
+def tryint(s):
+ try:
+ return int(s)
+ except:
+ return s
+
+def alphanum_key(s):
+ return [ tryint(c) for c in re.split('([0-9]+)', s) ]
+
+if __name__ == '__main__':
+ # Guess the local mkv/srt pairs that need syncing
+ files = os.listdir()
+ CONF = {}
+ print('[1] .srt files (Default)')
+ print('[2] .ass files')
+ choice = input("What file type are your Japanese subs? ")
+ if choice.strip() == "":
+ choice = 1
+ CONF['ext'] = ["", "srt", "ass"][int(choice)]
+ srts = sorted([f for f in files if f.endswith(f".{CONF['ext']}")], key=alphanum_key)
+ mkvs = [f for f in files if f.endswith(".mkv")]
+ while len(mkvs) > len(srts):
+ print(f"Found more .mkv files than .{CONF['ext']} files. This can occur when OP/ED/extras are in the same folder as episodes.")
+ print("Please enter a pattern to filter out of mkv results. (e.g. OP)")
+ pattern = input(">>> Pattern: ")
+ removed = [m for m in mkvs if pattern in m]
+ mkvs = [m for m in mkvs if pattern not in m]
+ print("Ignoring filtered mkv files:\n{}".format("\n".join(removed)))
+ mkvs = sorted([f for f in mkvs], key=alphanum_key)
+ if len(mkvs) == 0 or len(mkvs) != len(srts):
+ print(f"ERROR: Some .{CONF['ext']} files don't have a matching .mkv!")
+ print(f"{len(srts)} {CONF['ext']} files found. {len(mkvs)} mkv files found")
+ print(f"Ensure auto-sub-retimer folder is in same folder as your .mkv and .{CONF['ext']} files.")
+ print(f"Ensure you have an equal number of .mkv and .{CONF['ext']} files!")
+ input("press enter to exit...")
+ exit(1)
+ if len(sys.argv) > 1 and sys.argv[1] == "rename_only":
+ for srt, mkv in zip(srts, mkvs):
+ os.rename(srt, mkv.replace('.mkv', ".ja.{}".format(CONF['ext'])))
+ input("Rename finished!")
+ sys.exit(0)
+ pool = Pool()
+ print("[1] Retime using embedded subs. (Default)")
+ print("[2] Retime using audio.")
+ retime_choice = input("Enter desired option number: ")
+ SPLIT_PEN = input("Split Penalty? Only change this if default resulted in mis-timed subs. (Default 7): ").strip()
+ if not SPLIT_PEN:
+ SPLIT_PEN = "7"
+ CONF['split_pen'] = SPLIT_PEN
+ for srt in srts:
+ fix_common_errors(srt)
+ if "2" in retime_choice:
+ pool.starmap(retime_based_on_audio, zip(mkvs, srts, [CONF]*len(mkvs)))
+ else:
+ index = None
+ for mkv in mkvs:
+ index = extract_eng_subs(mkv, index)
+ #index = extract_eng_subs(mkvs[0])
+ #pool.starmap(extract_eng_subs, [(x, index) for x in mkvs])
+ print("English Sub Extraction Complete.")
+ fix_styling()
+ pool.starmap(retime, zip(mkvs, srts, [CONF]*len(mkvs)))
+ print("\nSuccess!\n")
+ time.sleep(3)