Add option for automatic subtitle character encoding normalization (#68)
* Add option for automatic subtitle character encoding normalization The rationale behind this function is that some services use ISO-8859-1 (latin1) or Windows-1252 (CP-1252) instead of UTF-8 encoding, whether intentionally or accidentally. Some services even stream subtitles with malformed/mixed encoding (each segment has a different encoding). * Remove Subtitle parameter `auto_fix_encoding` Just always attempt to fix encoding. If the subtitle is neither UTF-8 nor CP-1252, then it should realistically error out instead of producing garbage Subtitle data anyway. * Move Subtitle encoding fixing code out of if drm tree * Use chardet as a last ditch effort fixing Subs, or return original data * Move Subtitle.fix_encoding method to utilities as try_ensure_utf8 * Add Shivelight as a contributor --------- Co-authored-by: rlaphoenix <rlaphoenix@pm.me>
This commit is contained in:
@@ -31,7 +31,7 @@ from devine.core.downloaders import downloader
|
||||
from devine.core.downloaders import requests as requests_downloader
|
||||
from devine.core.drm import Widevine
|
||||
from devine.core.tracks import Audio, Subtitle, Tracks, Video
|
||||
from devine.core.utilities import is_close_match
|
||||
from devine.core.utilities import is_close_match, try_ensure_utf8
|
||||
from devine.core.utils.xml import load_xml
|
||||
|
||||
|
||||
@@ -471,7 +471,11 @@ class DASH:
|
||||
if init_data:
|
||||
f.write(init_data)
|
||||
for segment_file in sorted(save_dir.iterdir()):
|
||||
f.write(segment_file.read_bytes())
|
||||
segment_data = segment_file.read_bytes()
|
||||
# TODO: fix encoding after decryption?
|
||||
if not drm and isinstance(track, Subtitle):
|
||||
segment_data = try_ensure_utf8(segment_data)
|
||||
f.write(segment_data)
|
||||
segment_file.unlink()
|
||||
|
||||
if drm:
|
||||
|
||||
@@ -28,7 +28,7 @@ from devine.core.downloaders import downloader
|
||||
from devine.core.downloaders import requests as requests_downloader
|
||||
from devine.core.drm import DRM_T, ClearKey, Widevine
|
||||
from devine.core.tracks import Audio, Subtitle, Tracks, Video
|
||||
from devine.core.utilities import is_close_match
|
||||
from devine.core.utilities import is_close_match, try_ensure_utf8
|
||||
|
||||
|
||||
class HLS:
|
||||
@@ -301,7 +301,10 @@ class HLS:
|
||||
|
||||
with open(save_path, "wb") as f:
|
||||
for segment_file in sorted(save_dir.iterdir()):
|
||||
f.write(segment_file.read_bytes())
|
||||
segment_data = segment_file.read_bytes()
|
||||
if isinstance(track, Subtitle):
|
||||
segment_data = try_ensure_utf8(segment_data)
|
||||
f.write(segment_data)
|
||||
segment_file.unlink()
|
||||
|
||||
progress(downloaded="Downloaded")
|
||||
|
||||
Reference in New Issue
Block a user