Fix typo + improve generic parsing a little

This commit is contained in:
niksedk 2019-11-11 10:16:42 +01:00
parent 94c3fb8f5d
commit e821495566
7 changed files with 981 additions and 923 deletions

View File

@ -611,8 +611,8 @@
<Compile Include="TransportStream\Teletext.cs" />
<Compile Include="TransportStream\TransportStreamParser.cs" />
<Compile Include="TransportStream\TransportStreamSubtitle.cs" />
<Compile Include="UknownFormatImporterJson.cs" />
<Compile Include="UknownFormatImporter.cs" />
<Compile Include="UnknownFormatImporterJson.cs" />
<Compile Include="UnknownFormatImporter.cs" />
<Compile Include="Utilities.cs" />
<Compile Include="VideoInfo.cs" />
<Compile Include="VobSub\Helper.cs" />

View File

@ -10,7 +10,7 @@ namespace Nikse.SubtitleEdit.Core
/// <summary>
/// Generic subtitle format parser
/// </summary>
public class UknownFormatImporter
public class UnknownFormatImporter
{
private static readonly char[] ExpectedSplitChars = { '.', ',', ';', ':' };
public bool UseFrames { get; set; }
@ -69,7 +69,7 @@ namespace Nikse.SubtitleEdit.Core
if (subtitle.Paragraphs.Count < 2 || isJson)
{
var jsonSubtitle = new UknownFormatImporterJson().AutoGuessImport(lines);
var jsonSubtitle = new UnknownFormatImporterJson().AutoGuessImport(lines);
if (jsonSubtitle != null && jsonSubtitle.Paragraphs.Count > 2)
{
subtitle = jsonSubtitle;
@ -81,6 +81,13 @@ namespace Nikse.SubtitleEdit.Core
subtitle = ImportSubtitleWithNoLineBreaks(lines[0]);
}
if (subtitle.Paragraphs.Count == 0)
{
var text = string.Join(Environment.NewLine, lines);
subtitle = ImportSubtitleWithNoLineBreaksWithExtraSpaces(text);
}
if (subtitle.Paragraphs.Count > 0 && lines.Count > 0 && lines.Count / subtitle.Paragraphs.Count > 25)
{ // no more than 25 raw lines per subtitle lines
return new Subtitle();
@ -620,6 +627,11 @@ namespace Nikse.SubtitleEdit.Core
}
if (allNumbers && lineWithPerhapsOnlyNumbers.Length > 5)
{
if (line.Contains("->"))
{
line = line.RemoveChar(' ');
}
string[] arr = line.Replace('-', ' ').Replace('>', ' ').Replace('{', ' ').Replace('}', ' ').Replace('[', ' ').Replace(']', ' ').Trim().Split(splitChars, StringSplitOptions.RemoveEmptyEntries);
if (arr.Length == 2)
{
@ -875,8 +887,8 @@ namespace Nikse.SubtitleEdit.Core
var ch = text[i];
if (char.IsNumber(ch))
{
var macth = regex.Match(text.Substring(i));
if (macth.Success)
var match = regex.Match(text.Substring(i));
if (match.Success)
{
if (p != null)
{
@ -884,10 +896,10 @@ namespace Nikse.SubtitleEdit.Core
}
sb.Clear();
var arr = macth.Value.Split(' ');
var arr = match.Value.Split(' ');
if (arr.Length == 4)
{
i += macth.Value.Length;
i += match.Value.Length;
p = new Paragraph
{
StartTime = DecodeTime(arr[1].Split(ExpectedSplitChars)),
@ -909,5 +921,52 @@ namespace Nikse.SubtitleEdit.Core
subtitle.Renumber();
return subtitle;
}
private static Subtitle ImportSubtitleWithNoLineBreaksWithExtraSpaces(string text)
{
var regex = new Regex(@"^(\d+: *)?\d+:\s*\d+[.,:;] *\d+ -{0,3}> \d+: *\d+:\s*\d+[.,:;] *\d+\b", RegexOptions.Compiled); // e.g.: 1 00:00:01.502 --> 00:00:03.604
var subtitle = new Subtitle();
int i = 0;
var sb = new StringBuilder();
Paragraph p = null;
while (i < text.Length)
{
var ch = text[i];
if (char.IsNumber(ch))
{
var match = regex.Match(text.Substring(i));
if (match.Success)
{
if (p != null)
{
p.Text = Utilities.AutoBreakLine(sb.ToString().Trim());
}
sb.Clear();
var arr = match.Value.Split('>');
if (arr.Length == 2)
{
i += match.Value.Length;
p = new Paragraph
{
StartTime = DecodeTime(arr[0].RemoveChar(' ').TrimEnd('-').Split(ExpectedSplitChars)),
EndTime = DecodeTime(arr[1].RemoveChar(' ').TrimEnd('-').Split(ExpectedSplitChars))
};
subtitle.Paragraphs.Add(p);
continue;
}
}
}
sb.Append(ch);
i++;
}
if (p != null && string.IsNullOrEmpty(p.Text))
{
p.Text = Utilities.AutoBreakLine(sb.ToString().Trim());
}
subtitle.Renumber();
return subtitle;
}
}
}

View File

@ -7,9 +7,8 @@ using System.Text;
namespace Nikse.SubtitleEdit.Core
{
public class UknownFormatImporterJson
public class UnknownFormatImporterJson
{
public Subtitle AutoGuessImport(List<string> lines)
{

View File

@ -672,7 +672,7 @@ namespace Nikse.SubtitleEdit.Forms
s = rtb.Text;
}
}
var unknownFormatImporter = new UknownFormatImporter { UseFrames = true };
var unknownFormatImporter = new UnknownFormatImporter { UseFrames = true };
var genericParseSubtitle = unknownFormatImporter.AutoGuessImport(s.SplitToLines());
if (genericParseSubtitle.Paragraphs.Count > 1)
{

View File

@ -45,7 +45,7 @@ namespace Nikse.SubtitleEdit.Forms
private void GeneratePreviewReal()
{
var uknownFormatImporter = new UknownFormatImporter { UseFrames = radioButtonTimeCodeFrames.Checked };
var uknownFormatImporter = new UnknownFormatImporter { UseFrames = radioButtonTimeCodeFrames.Checked };
ImportedSubitle = uknownFormatImporter.AutoGuessImport(textBoxText.Lines.ToList());
groupBoxImportResult.Text = string.Format(Configuration.Settings.Language.ImportText.PreviewLinesModifiedX, ImportedSubitle.Paragraphs.Count);
SubtitleListview1.Fill(ImportedSubitle);

View File

@ -2811,7 +2811,7 @@ namespace Nikse.SubtitleEdit.Forms
}
}
var uknownFormatImporter = new UknownFormatImporter { UseFrames = true };
var uknownFormatImporter = new UnknownFormatImporter { UseFrames = true };
var genericParseSubtitle = uknownFormatImporter.AutoGuessImport(s.SplitToLines());
if (genericParseSubtitle.Paragraphs.Count > 1)
{

View File

@ -70,7 +70,7 @@ namespace Test.Logic
}]
}";
var importer = new UknownFormatImporterJson();
var importer = new UnknownFormatImporterJson();
var subtitle = importer.AutoGuessImport(raw.Replace('\'', '"').SplitToLines());
Assert.AreEqual(11, subtitle.Paragraphs.Count);
Assert.AreEqual("development", subtitle.Paragraphs[1].Text);
@ -139,7 +139,7 @@ namespace Test.Logic
}]
}";
var importer = new UknownFormatImporterJson();
var importer = new UnknownFormatImporterJson();
var subtitle = importer.AutoGuessImport(raw.Replace('\'', '"').SplitToLines());
Assert.AreEqual(11, subtitle.Paragraphs.Count);
Assert.AreEqual("Ford" + Environment.NewLine + "BMW" + Environment.NewLine + "Fiat", subtitle.Paragraphs[1].Text);
@ -176,7 +176,7 @@ namespace Test.Logic
}]
}";
var importer = new UknownFormatImporterJson();
var importer = new UnknownFormatImporterJson();
var subtitle = importer.AutoGuessImport(raw.Replace('\'', '"').SplitToLines());
Assert.AreEqual(4, subtitle.Paragraphs.Count);
Assert.AreEqual("Line1", subtitle.Paragraphs[1].Text);