diff --git a/unshackle/commands/dl.py b/unshackle/commands/dl.py index 0d9f765..7770d75 100644 --- a/unshackle/commands/dl.py +++ b/unshackle/commands/dl.py @@ -25,6 +25,7 @@ import click import jsonpickle import yaml from construct import ConstError +from langcodes import Language from pymediainfo import MediaInfo from pyplayready.cdm import Cdm as PlayReadyCdm from pyplayready.device import Device as PlayReadyDevice @@ -2025,49 +2026,6 @@ class dl: dl_time = time_elapsed_since(dl_start_time) console.print(Padding(f"Track downloads finished in [progress.elapsed]{dl_time}[/]", (0, 5))) - video_track_n = 0 - - while ( - not title.tracks.subtitles - and not no_subs - and not (hasattr(service, "NO_SUBTITLES") and service.NO_SUBTITLES) - and not video_only - and not no_video - and len(title.tracks.videos) > video_track_n - and any( - x.get("codec_name", "").startswith("eia_") - for x in ffprobe(title.tracks.videos[video_track_n].path).get("streams", []) - ) - ): - with console.status(f"Checking Video track {video_track_n + 1} for Closed Captions..."): - try: - # TODO: Figure out the real language, it might be different - # EIA-CC tracks sadly don't carry language information :( - # TODO: Figure out if the CC language is original lang or not. - # Will need to figure out above first to do so. - video_track = title.tracks.videos[video_track_n] - track_id = f"ccextractor-{video_track.id}" - cc_lang = title.language or video_track.language - cc = video_track.ccextractor( - track_id=track_id, - out_path=config.directories.temp - / config.filenames.subtitle.format(id=track_id, language=cc_lang), - language=cc_lang, - original=False, - ) - if cc: - # will not appear in track listings as it's added after all times it lists - title.tracks.add(cc) - self.log.info(f"Extracted a Closed Caption from Video track {video_track_n + 1}") - else: - self.log.info(f"No Closed Captions were found in Video track {video_track_n + 1}") - except EnvironmentError: - self.log.error( - "Cannot extract Closed Captions as the ccextractor executable was not found..." - ) - break - video_track_n += 1 - # Subtitle output mode configuration (for sidecar originals) subtitle_output_mode = config.subtitle.get("output_mode", "mux") sidecar_format = config.subtitle.get("sidecar_format", "srt") @@ -2133,6 +2091,57 @@ class dl: if has_decrypted: self.log.info(f"Decrypted tracks with {decrypt_tool}") + # Extract Closed Captions from decrypted video tracks + if ( + not no_subs + and not (hasattr(service, "NO_SUBTITLES") and service.NO_SUBTITLES) + and not video_only + and not no_video + ): + for video_track_n, video_track in enumerate(title.tracks.videos): + has_manifest_cc = bool(getattr(video_track, "closed_captions", None)) + has_eia_cc = ( + not has_manifest_cc + and not title.tracks.subtitles + and any( + x.get("codec_name", "").startswith("eia_") + for x in ffprobe(video_track.path).get("streams", []) + ) + ) + if not has_manifest_cc and not has_eia_cc: + continue + + with console.status(f"Checking Video track {video_track_n + 1} for Closed Captions..."): + try: + cc_lang = ( + Language.get(video_track.closed_captions[0]["language"]) + if has_manifest_cc and video_track.closed_captions[0].get("language") + else title.language or video_track.language + ) + track_id = f"ccextractor-{video_track.id}" + cc = video_track.ccextractor( + track_id=track_id, + out_path=config.directories.temp + / config.filenames.subtitle.format(id=track_id, language=cc_lang), + language=cc_lang, + original=False, + ) + if cc: + cc.cc = True + title.tracks.add(cc) + self.log.info( + f"Extracted a Closed Caption from Video track {video_track_n + 1}" + ) + else: + self.log.info( + f"No Closed Captions were found in Video track {video_track_n + 1}" + ) + except EnvironmentError: + self.log.error( + "Cannot extract Closed Captions as the ccextractor executable was not found..." + ) + break + # Now repack the decrypted tracks with console.status("Repackaging tracks with FFMPEG..."): has_repacked = False diff --git a/unshackle/core/manifests/hls.py b/unshackle/core/manifests/hls.py index 122b85c..e04edb4 100644 --- a/unshackle/core/manifests/hls.py +++ b/unshackle/core/manifests/hls.py @@ -112,6 +112,15 @@ class HLS: session_drm = HLS.get_all_drm(session_keys) audio_codecs_by_group_id: dict[str, Audio.Codec] = {} + cc_by_group_id: dict[str, list[dict[str, Any]]] = {} + for media in self.manifest.media: + if media.type == "CLOSED-CAPTIONS": + cc_by_group_id.setdefault(media.group_id, []).append({ + "language": media.language, + "name": media.name, + "instream_id": media.instream_id, + "characteristics": media.characteristics, + }) tracks = Tracks() for playlist in self.manifest.playlists: @@ -161,6 +170,9 @@ class HLS: width=playlist.stream_info.resolution[0] if playlist.stream_info.resolution else None, height=playlist.stream_info.resolution[1] if playlist.stream_info.resolution else None, fps=playlist.stream_info.frame_rate, + closed_captions=cc_by_group_id.get( + (playlist.stream_info.closed_captions or "").strip('"'), [] + ), ) if primary_track_type is Video else {} diff --git a/unshackle/core/tracks/tracks.py b/unshackle/core/tracks/tracks.py index e1770a5..7c6cc7f 100644 --- a/unshackle/core/tracks/tracks.py +++ b/unshackle/core/tracks/tracks.py @@ -103,53 +103,78 @@ class Tracks: tree = Tree("", hide_root=True) for track_type in self.TRACK_ORDER_MAP: tracks = list(x for x in all_tracks if isinstance(x, track_type)) - if not tracks: - continue - num_tracks = len(tracks) - track_type_plural = track_type.__name__ + ("s" if track_type != Audio and num_tracks != 1 else "") - tracks_tree = tree.add(f"[repr.number]{num_tracks}[/] {track_type_plural}") - for track in tracks: - if add_progress and track_type not in (Chapter, Attachment): - progress = Progress( - SpinnerColumn(finished_text=""), - BarColumn(), - "•", - TimeRemainingColumn(compact=True, elapsed_when_finished=True), - "•", - TextColumn("[progress.data.speed]{task.fields[downloaded]}"), - console=console, - speed_estimate_period=10, + if tracks: + num_tracks = len(tracks) + track_type_plural = track_type.__name__ + ("s" if track_type != Audio and num_tracks != 1 else "") + tracks_tree = tree.add(f"[repr.number]{num_tracks}[/] {track_type_plural}") + for track in tracks: + if add_progress and track_type not in (Chapter, Attachment): + progress = Progress( + SpinnerColumn(finished_text=""), + BarColumn(), + "•", + TimeRemainingColumn(compact=True, elapsed_when_finished=True), + "•", + TextColumn("[progress.data.speed]{task.fields[downloaded]}"), + console=console, + speed_estimate_period=10, + ) + task = progress.add_task("", downloaded="-") + state = {"total": 100.0} + + def update_track_progress( + task_id: int = task, + _state: dict[str, float] = state, + _progress: Progress = progress, + **kwargs, + ) -> None: + """ + Ensure terminal status states render as a fully completed bar. + + Some downloaders can report completed slightly below total + before emitting the final "Downloaded" state. + """ + if "total" in kwargs and kwargs["total"] is not None: + _state["total"] = kwargs["total"] + + downloaded_state = kwargs.get("downloaded") + if downloaded_state in {"Downloaded", "Decrypted", "[yellow]SKIPPED"}: + kwargs["completed"] = _state["total"] + _progress.update(task_id=task_id, **kwargs) + + progress_callables.append(update_track_progress) + track_table = Table.grid() + track_table.add_row(str(track)[6:], style="text2") + track_table.add_row(progress) + tracks_tree.add(track_table) + else: + tracks_tree.add(str(track)[6:], style="text2") + + # Show Closed Captions right after Subtitles (even if no subtitle tracks exist) + if track_type is Subtitle: + seen_cc: set[str] = set() + unique_cc: list[str] = [] + for video in (x for x in all_tracks if isinstance(x, Video)): + for cc in getattr(video, "closed_captions", []): + lang = cc.get("language", "und") + name = cc.get("name", "") + instream_id = cc.get("instream_id", "") + key = f"{lang}|{instream_id}" + if key in seen_cc: + continue + seen_cc.add(key) + parts = [f"[CC] | {lang}"] + if name: + parts.append(name) + if instream_id: + parts.append(instream_id) + unique_cc.append(" | ".join(parts)) + if unique_cc: + cc_tree = tree.add( + f"[repr.number]{len(unique_cc)}[/] Closed Caption{'s' if len(unique_cc) != 1 else ''}" ) - task = progress.add_task("", downloaded="-") - state = {"total": 100.0} - - def update_track_progress( - task_id: int = task, - _state: dict[str, float] = state, - _progress: Progress = progress, - **kwargs, - ) -> None: - """ - Ensure terminal status states render as a fully completed bar. - - Some downloaders can report completed slightly below total - before emitting the final "Downloaded" state. - """ - if "total" in kwargs and kwargs["total"] is not None: - _state["total"] = kwargs["total"] - - downloaded_state = kwargs.get("downloaded") - if downloaded_state in {"Downloaded", "Decrypted", "[yellow]SKIPPED"}: - kwargs["completed"] = _state["total"] - _progress.update(task_id=task_id, **kwargs) - - progress_callables.append(update_track_progress) - track_table = Table.grid() - track_table.add_row(str(track)[6:], style="text2") - track_table.add_row(progress) - tracks_tree.add(track_table) - else: - tracks_tree.add(str(track)[6:], style="text2") + for cc_str in unique_cc: + cc_tree.add(cc_str, style="text2") return tree, progress_callables diff --git a/unshackle/core/tracks/video.py b/unshackle/core/tracks/video.py index a8cb97b..1f5674f 100644 --- a/unshackle/core/tracks/video.py +++ b/unshackle/core/tracks/video.py @@ -200,6 +200,7 @@ class Video(Track): height: Optional[int] = None, fps: Optional[Union[str, int, float]] = None, scan_type: Optional[Video.ScanType] = None, + closed_captions: Optional[list[dict[str, Any]]] = None, **kwargs: Any, ) -> None: """ @@ -264,6 +265,7 @@ class Video(Track): raise ValueError("Expected fps to be a number, float, or a string as numerator/denominator form, " + str(e)) self.scan_type = scan_type + self.closed_captions: list[dict[str, Any]] = closed_captions or [] self.needs_duration_fix = False def __str__(self) -> str: @@ -346,22 +348,27 @@ class Video(Track): if not binaries.CCExtractor: raise EnvironmentError("ccextractor executable was not found.") - # ccextractor often fails in weird ways unless we repack - self.repackage() - out_path = Path(out_path) - try: - subprocess.run( - [binaries.CCExtractor, "-trim", "-nobom", "-noru", "-ru1", "-o", out_path, self.path], - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - except subprocess.CalledProcessError as e: - out_path.unlink(missing_ok=True) - if not e.returncode == 10: # No captions found - raise + def _run_ccextractor() -> bool: + try: + subprocess.run( + [binaries.CCExtractor, "-trim", "-nobom", "-noru", "-ru1", "-o", out_path, self.path], + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + except subprocess.CalledProcessError as e: + out_path.unlink(missing_ok=True) + if e.returncode != 10: # 10 = No captions found + raise + return out_path.exists() + + # Try on the original file first (preserves container-level CC data like c608 boxes), + # then fall back to repacked file (ccextractor can fail on some container formats). + if not _run_ccextractor(): + self.repackage() + _run_ccextractor() if out_path.exists(): cc_track = Subtitle(