This commit is contained in:
niksedk 2023-01-15 12:12:22 +01:00
parent 7f99f97a62
commit 8190ce5552
3 changed files with 194 additions and 42 deletions

View File

@ -1715,6 +1715,68 @@ VÄLKOMMEN TILL TEXAS
"<font color=\"cyan\">line 2.2</font>", subtitle.Paragraphs[0].Text);
}
[TestMethod]
public void WebVttItalicCue1()
{
var target = new WebVTT();
var subtitle = new Subtitle();
var raw = @"WEBVTT
STYLE
::cue(.background-color_transparent) {
background-color: rgba(255,255,255,0.0);
}
::cue(.font-family_proportionalSansSerif) {
font-family: proportionalSansSerif;
}
::cue(.font-style_normal) {
font-style: normal;
}
::cue(.font-weight_normal) {
font-weight: normal;
}
::cue(.text-shadow_black-4%) {
text-shadow: black 4%;
}
::cue(.font-style_italic) {
font-style: italic;
}
00:02:36.840 --> 00:02:39.120 line:81.11% align:center
<c.background-color_transparent.font-family_proportionalSansSerif.font-style_normal.font-weight_normal.text-shadow_black-4%><c.font-style_italic>Hallo world!</c></c>
";
target.LoadSubtitle(subtitle, raw.SplitToLines(), null);
target.RemoveNativeFormatting(subtitle, new SubRip());
Assert.AreEqual("<i>Hallo world!</i>", subtitle.Paragraphs[0].Text);
}
[TestMethod]
public void WebVttItalicCue2()
{
var target = new WebVTT();
var subtitle = new Subtitle();
var raw = @"WEBVTT
STYLE
::cue(.bold) {
font-weight: bold;
}
::cue(.italic) {
font-style: italic;
}
00:02:36.840 --> 00:02:39.120 line:81.11% align:center
<c.bold.italic>Hallo world!</c>
";
target.LoadSubtitle(subtitle, raw.SplitToLines(), null);
target.RemoveNativeFormatting(subtitle, new SubRip());
Assert.AreEqual("<b><i>Hallo world!</i></b>", subtitle.Paragraphs[0].Text);
}
[TestMethod]
public void WebVttEscapeEncoding()
{

View File

@ -62,6 +62,7 @@ namespace Nikse.SubtitleEdit.Core.Common
return new TimeCode(hours, minutes, seconds, 0).TotalMilliseconds;
}
}
return 0;
}
@ -155,8 +156,8 @@ namespace Nikse.SubtitleEdit.Core.Common
public string ToString(bool localize)
{
var ts = TimeSpan;
string decimalSeparator = localize ? CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator : ",";
string s = $"{ts.Hours + ts.Days * 24:00}:{ts.Minutes:00}:{ts.Seconds:00}{decimalSeparator}{ts.Milliseconds:000}";
var decimalSeparator = localize ? CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator : ",";
var s = $"{ts.Hours + ts.Days * 24:00}:{ts.Minutes:00}:{ts.Seconds:00}{decimalSeparator}{ts.Milliseconds:000}";
return PrefixSign(s);
}
@ -164,7 +165,7 @@ namespace Nikse.SubtitleEdit.Core.Common
public string ToShortString(bool localize = false)
{
var ts = TimeSpan;
string decimalSeparator = localize ? CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator : ",";
var decimalSeparator = localize ? CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator : ",";
string s;
if (ts.Minutes == 0 && ts.Hours == 0 && ts.Days == 0)
{
@ -178,20 +179,22 @@ namespace Nikse.SubtitleEdit.Core.Common
{
s = $"{ts.Hours + ts.Days * 24:0}:{ts.Minutes:00}:{ts.Seconds:00}{decimalSeparator}{ts.Milliseconds:000}";
}
return PrefixSign(s);
}
public string ToShortStringHHMMSSFF()
{
string s = ToHHMMSSFF();
string pre = string.Empty;
var s = ToHHMMSSFF();
var pre = string.Empty;
if (s.StartsWith('-'))
{
pre = "-";
s = s.TrimStart('-');
}
int j = 0;
int len = s.Length;
var j = 0;
var len = s.Length;
while (j + 6 < len && s[j] == '0' && s[j + 1] == '0' && s[j + 2] == ':')
{
j += 3;
@ -215,6 +218,7 @@ namespace Nikse.SubtitleEdit.Core.Common
{
s = $"{ts.Days * 24 + ts.Hours:00}:{ts.Minutes:00}:{ts.Seconds:00}:{SubtitleFormat.MillisecondsToFramesMaxFrameRate(ts.Milliseconds):00}";
}
return PrefixSign(s);
}
@ -267,6 +271,7 @@ namespace Nikse.SubtitleEdit.Core.Common
{
s = $"{ts.Seconds:00}:{SubtitleFormat.MillisecondsToFramesMaxFrameRate(ts.Milliseconds):00}";
}
return PrefixSign(s);
}
@ -320,6 +325,5 @@ namespace Nikse.SubtitleEdit.Core.Common
return ToShortString(true);
}
}
}

View File

@ -576,7 +576,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{
v = v.Remove(end + 1);
}
end = v.IndexOf(' ');
if (end >= 0)
{
@ -591,55 +591,56 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
public override void RemoveNativeFormatting(Subtitle subtitle, SubtitleFormat newFormat)
{
var regexWebVttColorMulti = new Regex(@"<c.[A-Za-z0-9-_\.]*>", RegexOptions.Compiled);
var regexRemoveCTags = new Regex(@"\</?c([a-zA-Z\._\-\d%#]*)\>", RegexOptions.Compiled);
var regexRemoveTimeCodes = new Regex(@"\<\d+:\d+:\d+\.\d+\>", RegexOptions.Compiled); // <00:00:10.049>
var regexTagsPlusWhiteSpace = new Regex(@"(\{\\an\d\})[\s\r\n]+", RegexOptions.Compiled); // <00:00:10.049>
var cueStyles = GetCueStyles(subtitle.Header);
var italicStyles = GetStylesWith(cueStyles, "font-style:italic;");
var boldStyles = GetStylesWith(cueStyles, "font-weight:bold;");
foreach (var p in subtitle.Paragraphs)
{
if (p.Text.Contains('<') || p.Text.Contains('&'))
{
var text = p.Text.Replace("&rlm;", string.Empty).Replace("&lrm;", string.Empty); // or use rlm=\u202B, lrm=\u202A ?
foreach (var knownLanguage in KnownLanguages)
{
text = text.Replace("<c." + knownLanguage + ">", string.Empty).Replace("</c." + knownLanguage + ">", string.Empty);
}
text = System.Net.WebUtility.HtmlDecode(text);
var match = regexWebVttColorMulti.Match(text);
var match = regexRemoveCTags.Match(text);
while (match.Success)
{
var tag = match.Value.Substring(3, match.Value.Length - 4);
tag = FindBestColorTagOrDefault(tag);
if (tag == null)
var start = match.Index + 1;
var styles = GetStyles(match.Value);
var hasItalic = italicStyles.Any(st => styles.Contains(st));
var hasBold = boldStyles.Any(st => styles.Contains(st));
var colorTag = FindBestColorTagOrDefault(styles.ToList());
if (hasItalic)
{
text = text.Replace(match.Value, string.Empty);
text = text.Replace(match.Value.Insert(1, "/"), string.Empty);
match = regexWebVttColorMulti.Match(text);
continue;
text = text.Insert(match.Index, "<i>");
start += 3;
}
var fontString = "<font color=\"" + tag + "\">";
fontString = fontString.Trim('"').Trim('\'');
text = text.Remove(match.Index, match.Length).Insert(match.Index, fontString);
var endIndex = text.IndexOf("</c>", match.Index, StringComparison.OrdinalIgnoreCase);
if (endIndex >= 0)
if (hasBold)
{
text = text.Remove(endIndex, 4).Insert(endIndex, "</font>");
text = text.Insert(match.Index, "<b>");
start += 3;
}
else
if (colorTag != null)
{
endIndex = text.IndexOf("</c.", match.Index, StringComparison.OrdinalIgnoreCase);
if (endIndex >= 0)
{
var endEndIndex = text.IndexOf('>', endIndex);
if (endEndIndex > 0)
{
text = text.Remove(endIndex, endEndIndex - endIndex).Insert(endIndex, "</font>");
}
}
var fontString = "<font color=\"" + colorTag + "\">";
fontString = fontString.Trim('"').Trim('\'');
text = text.Insert(match.Index, fontString);
start += fontString.Length;
text = SetEndTag(text, text.IndexOf("<c", match.Index, StringComparison.Ordinal), "</font>");
}
match = regexWebVttColorMulti.Match(text);
if (hasItalic)
{
text = SetEndTag(text, text.IndexOf("<c", match.Index, StringComparison.Ordinal), "</i>");
}
if (hasBold)
{
text = SetEndTag(text, text.IndexOf("<c", match.Index, StringComparison.Ordinal), "</b>");
}
match = regexRemoveCTags.Match(text, start);
}
text = RemoveTag("v", text);
@ -654,9 +655,94 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
}
}
private static string FindBestColorTagOrDefault(string tag)
/// <summary>
/// Set end tag taking nested level into account.
/// </summary>
private static string SetEndTag(string text, int matchIndex, string endTag)
{
var level = 0;
var startLevel = -1;
for (var i = 0; i < text.Length - 2; i++)
{
if (text[i] == '<')
{
if (text[i + 1] == 'c' && text[i + 2] == '.')
{
if (i == matchIndex)
{
startLevel = level;
}
level++;
}
else if (text[i + 1] == '/' && text[i + 2] == 'c')
{
level--;
if (startLevel == level)
{
text = text.Insert(i, endTag);
return text;
}
}
}
}
return text + endTag; //TODO: fix
}
private static List<string> GetStylesWith(Dictionary<string, string> cueStyles, string searchText)
{
var styleList = new List<string>();
foreach (var cueStyle in cueStyles)
{
if (cueStyle.Value.Contains(searchText, StringComparison.OrdinalIgnoreCase))
{
styleList.Add(cueStyle.Key);
}
}
return styleList;
}
private Dictionary<string, string> GetCueStyles(string header)
{
var dic = new Dictionary<string, string>();
if (string.IsNullOrEmpty(header))
{
return dic;
}
var matches = new Regex(@"::cue\(([a-zA-Z\._\-\d%#]*)\)\s*{").Matches(header);
for (var i = 0; i < matches.Count; i++)
{
var match = matches[i];
var cueName = match.Value
.Replace(" ", string.Empty)
.Replace("::cue(", string.Empty)
.TrimEnd('{')
.TrimEnd(')')
.TrimStart('.');
var end = header.IndexOf('}', match.Index + match.Length);
if (end > 0)
{
var content = header.Substring(match.Index + match.Length, end - (match.Index + match.Length));
dic.Add(cueName, content.Trim().Replace(" ", string.Empty));
}
}
return dic;
}
private static string[] GetStyles(string cTag)
{
return cTag.Replace("<c.", string.Empty).TrimEnd('>').Split('.');
}
private static string FindBestColorTagOrDefault(List<string> tags)
{
var tags = tag.Split('.').ToList();
tags.Reverse();
foreach (var s in tags)
{