Initial commit
This commit is contained in:
335
devine/core/tracks/track.py
Normal file
335
devine/core/tracks/track.py
Normal file
@@ -0,0 +1,335 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Iterable, Optional, Union
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import m3u8
|
||||
import requests
|
||||
from langcodes import Language
|
||||
|
||||
from devine.core.constants import TERRITORY_MAP
|
||||
from devine.core.downloaders import aria2c
|
||||
from devine.core.drm import DRM_T
|
||||
from devine.core.utilities import get_binary_path
|
||||
|
||||
|
||||
class Track:
|
||||
class DRM(Enum):
|
||||
pass
|
||||
|
||||
class Descriptor(Enum):
|
||||
URL = 1 # Direct URL, nothing fancy
|
||||
M3U = 2 # https://en.wikipedia.org/wiki/M3U (and M3U8)
|
||||
MPD = 3 # https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
id_: str,
|
||||
url: Union[str, list[str]],
|
||||
language: Union[Language, str],
|
||||
is_original_lang: bool = False,
|
||||
descriptor: Descriptor = Descriptor.URL,
|
||||
needs_proxy: bool = False,
|
||||
needs_repack: bool = False,
|
||||
drm: Optional[Iterable[DRM_T]] = None,
|
||||
edition: Optional[str] = None,
|
||||
extra: Optional[Any] = None
|
||||
) -> None:
|
||||
self.id = id_
|
||||
self.url = url
|
||||
# required basic metadata
|
||||
self.language = Language.get(language)
|
||||
self.is_original_lang = bool(is_original_lang)
|
||||
# optional io metadata
|
||||
self.descriptor = descriptor
|
||||
self.needs_proxy = bool(needs_proxy)
|
||||
self.needs_repack = bool(needs_repack)
|
||||
# drm
|
||||
self.drm = drm
|
||||
# extra data
|
||||
self.edition: str = edition
|
||||
self.extra: Any = extra or {} # allow anything for extra, but default to a dict
|
||||
|
||||
# events
|
||||
self.OnSegmentFilter: Optional[Callable] = None
|
||||
self.OnDownloaded: Optional[Callable] = None
|
||||
self.OnDecrypted: Optional[Callable] = None
|
||||
self.OnRepacked: Optional[Callable] = None
|
||||
|
||||
# should only be set internally
|
||||
self.path: Optional[Path] = None
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "{name}({items})".format(
|
||||
name=self.__class__.__name__,
|
||||
items=", ".join([f"{k}={repr(v)}" for k, v in self.__dict__.items()])
|
||||
)
|
||||
|
||||
def __eq__(self, other: object) -> bool:
|
||||
return isinstance(other, Track) and self.id == other.id
|
||||
|
||||
def get_track_name(self) -> Optional[str]:
|
||||
"""Return the base Track Name. This may be enhanced in sub-classes."""
|
||||
if (self.language.language or "").lower() == (self.language.territory or "").lower():
|
||||
self.language.territory = None # e.g. en-en, de-DE
|
||||
if self.language.territory == "US":
|
||||
self.language.territory = None
|
||||
reduced = self.language.simplify_script()
|
||||
extra_parts = []
|
||||
if reduced.script is not None:
|
||||
extra_parts.append(reduced.script_name(max_distance=25))
|
||||
if reduced.territory is not None:
|
||||
territory = reduced.territory_name(max_distance=25)
|
||||
extra_parts.append(TERRITORY_MAP.get(territory, territory))
|
||||
return ", ".join(extra_parts) or None
|
||||
|
||||
def get_init_segment(self, session: Optional[requests.Session] = None) -> bytes:
|
||||
"""
|
||||
Get the Track's Initial Segment Data Stream.
|
||||
If the Track URL is not detected to be an init segment, it will download
|
||||
up to the first 20,000 (20KB) bytes only.
|
||||
"""
|
||||
if not session:
|
||||
session = requests.Session()
|
||||
|
||||
url = None
|
||||
is_init_stream = False
|
||||
|
||||
if self.descriptor == self.Descriptor.M3U:
|
||||
master = m3u8.loads(session.get(self.url).text, uri=self.url)
|
||||
for segment in master.segments:
|
||||
if not segment.init_section:
|
||||
continue
|
||||
# skip any segment that would be skipped from the download
|
||||
# as we cant consider these a true initial segment
|
||||
if callable(self.OnSegmentFilter) and self.OnSegmentFilter(segment):
|
||||
continue
|
||||
url = ("" if re.match("^https?://", segment.init_section.uri) else segment.init_section.base_uri)
|
||||
url += segment.init_section.uri
|
||||
is_init_stream = True
|
||||
break
|
||||
|
||||
if not url:
|
||||
url = self.url
|
||||
|
||||
if isinstance(url, list):
|
||||
url = url[0]
|
||||
is_init_stream = True
|
||||
|
||||
if is_init_stream:
|
||||
return session.get(url).content
|
||||
|
||||
# likely a full single-file download, get first 20k bytes
|
||||
with session.get(url, stream=True) as s:
|
||||
# assuming enough to contain the pssh/kid
|
||||
for chunk in s.iter_content(20000):
|
||||
# we only want the first chunk
|
||||
return chunk
|
||||
|
||||
def download(self, out: Path, name_template: str = "{type}_{id}", headers: Optional[dict] = None,
|
||||
proxy: Optional[str] = None) -> Path:
|
||||
"""
|
||||
Download the Track and apply any necessary post-edits like Subtitle conversion.
|
||||
|
||||
Parameters:
|
||||
out: Output Directory Path for the downloaded track.
|
||||
name_template: Override the default filename template.
|
||||
Must contain both `{type}` and `{id}` variables.
|
||||
headers: Headers to use when downloading.
|
||||
proxy: Proxy to use when downloading.
|
||||
|
||||
Returns:
|
||||
Where the file was saved, as a Path object.
|
||||
"""
|
||||
if out.is_file():
|
||||
raise ValueError("Path must be to a directory and not a file")
|
||||
|
||||
log = logging.getLogger("download")
|
||||
|
||||
out.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
file_name = name_template.format(
|
||||
type=self.__class__.__name__,
|
||||
id=self.id
|
||||
)
|
||||
|
||||
# we must use .mp4 on tracks:
|
||||
# - as shaka-packager expects mp4 input and mp4 output
|
||||
# - and mkvtoolnix would try to parse the file in raw-bitstream
|
||||
save_path = (out / file_name).with_suffix(".mp4")
|
||||
if self.__class__.__name__ == "Subtitle":
|
||||
save_path = save_path.with_suffix(f".{self.codec.extension}")
|
||||
|
||||
# these would be files like .decrypted, .repack and such.
|
||||
# we cannot trust that these files were not interrupted while writing to disc
|
||||
# lets just delete them before re-attempting a download
|
||||
for existing_file in save_path.parent.glob(f"{save_path.stem}.*{save_path.suffix}"):
|
||||
existing_file.unlink()
|
||||
save_path.with_suffix(".srt").unlink(missing_ok=True)
|
||||
|
||||
if self.descriptor == self.Descriptor.M3U:
|
||||
master = m3u8.loads(
|
||||
requests.get(
|
||||
self.url,
|
||||
headers=headers,
|
||||
proxies={"all": proxy} if self.needs_proxy and proxy else None
|
||||
).text,
|
||||
uri=self.url
|
||||
)
|
||||
|
||||
if not master.segments:
|
||||
raise ValueError("Track URI (an M3U8) has no segments...")
|
||||
|
||||
if all(segment.uri == master.segments[0].uri for segment in master.segments):
|
||||
# all segments use the same file, presumably an EXT-X-BYTERANGE M3U (FUNI)
|
||||
# TODO: This might be a risky way to deal with these kinds of Playlists
|
||||
# What if there's an init section, or one segment is reusing a byte-range
|
||||
segment = master.segments[0]
|
||||
if not re.match("^https?://", segment.uri):
|
||||
segment.uri = urljoin(segment.base_uri, segment.uri)
|
||||
self.url = segment.uri
|
||||
self.descriptor = self.Descriptor.URL
|
||||
else:
|
||||
has_init = False
|
||||
segments = []
|
||||
for segment in master.segments:
|
||||
# merge base uri with uri where needed in both normal and init segments
|
||||
if not re.match("^https?://", segment.uri):
|
||||
segment.uri = segment.base_uri + segment.uri
|
||||
if segment.init_section and not re.match("^https?://", segment.init_section.uri):
|
||||
segment.init_section.uri = segment.init_section.base_uri + segment.init_section.uri
|
||||
|
||||
if segment.discontinuity:
|
||||
has_init = False
|
||||
|
||||
# skip segments we don't want to download (e.g., bumpers, dub cards)
|
||||
if callable(self.OnSegmentFilter) and self.OnSegmentFilter(segment):
|
||||
continue
|
||||
|
||||
if segment.init_section and not has_init:
|
||||
segments.append(segment.init_section.uri)
|
||||
has_init = True
|
||||
segments.append(segment.uri)
|
||||
self.url = list(dict.fromkeys(segments))
|
||||
|
||||
is_segmented = isinstance(self.url, list) and len(self.url) > 1
|
||||
segments_dir = save_path.with_name(save_path.name + "_segments")
|
||||
|
||||
attempts = 1
|
||||
while True:
|
||||
try:
|
||||
asyncio.run(aria2c(
|
||||
self.url,
|
||||
[save_path, segments_dir][is_segmented],
|
||||
headers,
|
||||
proxy if self.needs_proxy else None
|
||||
))
|
||||
break
|
||||
except subprocess.CalledProcessError:
|
||||
log.info(f" - Download attempt {attempts} failed, {['retrying', 'stopping'][attempts == 3]}...")
|
||||
if attempts == 3:
|
||||
raise
|
||||
attempts += 1
|
||||
|
||||
if is_segmented:
|
||||
# merge the segments together
|
||||
with open(save_path, "wb") as f:
|
||||
for file in sorted(segments_dir.iterdir()):
|
||||
data = file.read_bytes()
|
||||
# Apple TV+ needs this done to fix audio decryption
|
||||
data = re.sub(b"(tfhd\x00\x02\x00\x1a\x00\x00\x00\x01\x00\x00\x00)\x02", b"\\g<1>\x01", data)
|
||||
f.write(data)
|
||||
file.unlink() # delete, we don't need it anymore
|
||||
segments_dir.rmdir()
|
||||
|
||||
self.path = save_path
|
||||
|
||||
if self.path.stat().st_size <= 3: # Empty UTF-8 BOM == 3 bytes
|
||||
raise IOError(
|
||||
"Download failed, the downloaded file is empty. "
|
||||
f"This {'was' if self.needs_proxy else 'was not'} downloaded with a proxy." +
|
||||
(
|
||||
" Perhaps you need to set `needs_proxy` as True to use the proxy for this track."
|
||||
if not self.needs_proxy else ""
|
||||
)
|
||||
)
|
||||
|
||||
return self.path
|
||||
|
||||
def delete(self) -> None:
|
||||
if self.path:
|
||||
self.path.unlink()
|
||||
self.path = None
|
||||
|
||||
def repackage(self) -> None:
|
||||
if not self.path or not self.path.exists():
|
||||
raise ValueError("Cannot repackage a Track that has not been downloaded.")
|
||||
|
||||
executable = get_binary_path("ffmpeg")
|
||||
if not executable:
|
||||
raise EnvironmentError("FFmpeg executable \"ffmpeg\" was not found but is required for this call.")
|
||||
|
||||
repacked_path = self.path.with_suffix(f".repack{self.path.suffix}")
|
||||
|
||||
def _ffmpeg(extra_args: list[str] = None):
|
||||
subprocess.run(
|
||||
[
|
||||
executable, "-hide_banner",
|
||||
"-loglevel", "error",
|
||||
"-i", self.path,
|
||||
*(extra_args or []),
|
||||
# Following are very important!
|
||||
"-map_metadata", "-1", # don't transfer metadata to output file
|
||||
"-fflags", "bitexact", # only have minimal tag data, reproducible mux
|
||||
"-codec", "copy",
|
||||
str(repacked_path)
|
||||
],
|
||||
check=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE
|
||||
)
|
||||
|
||||
try:
|
||||
_ffmpeg()
|
||||
except subprocess.CalledProcessError as e:
|
||||
if b"Malformed AAC bitstream detected" in e.stderr:
|
||||
# e.g., TruTV's dodgy encodes
|
||||
_ffmpeg(["-y", "-bsf:a", "aac_adtstoasc"])
|
||||
else:
|
||||
raise
|
||||
|
||||
self.swap(repacked_path)
|
||||
|
||||
def move(self, target: Union[str, Path]) -> bool:
|
||||
"""
|
||||
Move the Track's file from current location, to target location.
|
||||
This will overwrite anything at the target path.
|
||||
"""
|
||||
if not self.path:
|
||||
return False
|
||||
target = Path(target)
|
||||
ok = self.path.rename(target).resolve() == target.resolve()
|
||||
if ok:
|
||||
self.path = target
|
||||
return ok
|
||||
|
||||
def swap(self, target: Union[str, Path]) -> bool:
|
||||
"""
|
||||
Swaps the Track's file with the Target file. The current Track's file is deleted.
|
||||
Returns False if the Track is not yet downloaded, or the target path does not exist.
|
||||
"""
|
||||
target = Path(target)
|
||||
if not target.exists() or not self.path:
|
||||
return False
|
||||
self.path.unlink()
|
||||
ok = target.rename(self.path) == self.path
|
||||
if not ok:
|
||||
return False
|
||||
return self.move(target)
|
||||
|
||||
|
||||
__ALL__ = (Track,)
|
||||
Reference in New Issue
Block a user