Handle Uniocde zero-width-shitespace in SRT time code - thx GJK :)

This commit is contained in:
Nikolaj Olsson 2025-01-16 08:41:18 +01:00
parent f42b17a588
commit 9136e21150
2 changed files with 31 additions and 3 deletions

View File

@ -120,6 +120,26 @@ Line 2";
string expected = "Line 1" + Environment.NewLine + "Line 2";
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void SrtZeroWidthWhitepaceCharInTimeStamp()
{
var target = new SubRip();
var subtitle = new Subtitle();
const string text = @"1
01:05:58,455 --> 01:05:59,260
Papa? Papa?
2
01:06:01,725 --> 01:06:03,805
Es ist einfach, Sie mitzunehmen.";
target.LoadSubtitle(subtitle, GetSrtLines(text), null);
string actual = subtitle.Paragraphs[0].Text;
string expected = "Papa? Papa?";
Assert.AreEqual(2, subtitle.Paragraphs.Count);
Assert.AreEqual(expected, actual);
}
[TestMethod]
public void SrtThreeLiner()
{

View File

@ -296,11 +296,15 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
return false;
}
const char zeroWidthSpace = '\u200B';
const char zeroWidthNoBreakSpace = '\uFEFF';
const string defaultSeparator = " --> ";
// Fix some badly formatted separator sequences - anything can happen if you manually edit ;)
var line = input.Replace('،', ',')
.Replace('', ',')
.Replace('¡', ',')
.Replace(zeroWidthSpace, ' ')
.Replace(zeroWidthNoBreakSpace, ' ')
.Replace(" -> ", defaultSeparator)
.Replace(" —> ", defaultSeparator) // em-dash
.Replace(" ——> ", defaultSeparator) // em-dash
@ -310,7 +314,10 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
.Replace(" - -> ", defaultSeparator)
.Replace(" -->> ", defaultSeparator)
.Replace(" ---> ", defaultSeparator)
.Replace(": ", ":").Trim();
.Replace(" ", " ")
.Replace(": ", ":")
.Replace(" :", ":")
.Trim();
// Removed stuff after time codes - like subtitle position
// - example of position info: 00:02:26,407 --> 00:02:31,356 X1:100 X2:100 Y1:100 Y2:100
@ -416,9 +423,9 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
/// </summary>
private static bool IsValidTimeCode(string line)
{
int step = 0;
var step = 0;
var max = line.Length;
for (int i = 0; i < max; i++)
for (var i = 0; i < max; i++)
{
var ch = line[i];
if (char.IsWhiteSpace(ch))
@ -509,6 +516,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
return false;
}
}
return true;
}
}