Unescape HTML Entities in Subtitles after Downloading
This fixes some Subtitles having e.g., `&` instead of just `&`, but especially for special entities like `‏` which enables Right-to-Left mode on Hebrew and Arabic Subtitles.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import html
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
@@ -473,6 +474,7 @@ class DASH:
|
||||
track.codec not in (Subtitle.Codec.fVTT, Subtitle.Codec.fTTML)
|
||||
):
|
||||
segment_data = try_ensure_utf8(segment_data)
|
||||
segment_data = html.unescape(segment_data.decode("utf8")).encode("utf8")
|
||||
f.write(segment_data)
|
||||
segment_file.unlink()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user