Minor fixes for JacoSub + added two unit tests

Fix #2452
This commit is contained in:
Nikolaj Olsson 2017-06-28 20:26:43 +02:00
parent 3a894dda38
commit 40a86afa12
5 changed files with 314 additions and 229 deletions

View File

@ -217,7 +217,7 @@
<Compile Include="SubtitleFormats\AribB36.cs" />
<Compile Include="SubtitleFormats\AribB24Decoder.cs" />
<Compile Include="SubtitleFormats\Csv5.cs" />
<Compile Include="SubtitleFormats\JacobSub.cs" />
<Compile Include="SubtitleFormats\JacoSub.cs" />
<Compile Include="SubtitleFormats\JsonType11.cs" />
<Compile Include="SubtitleFormats\SmpteTt2052.cs" />
<Compile Include="SubtitleFormats\Ted20.cs" />

View File

@ -0,0 +1,269 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{
/// <summary>
/// Implementation of JacoSub. Specs: (http://unicorn.us.com/jacosub/jscripts.html)
/// </summary>
public class JacoSub : SubtitleFormat
{
// H:MM:SS.FF H:MM:SS.FF directive {comment} text {comment} more text...
// 0:30:57.22 0:30:59.46 vm {opening credit} A Film By Akira Kurosawa
private static readonly Regex RegexTimeCode = new Regex(@"^(\d:\d\d:\d\d\.\d\d) (\d:\d\d:\d\d\.\d\d)", RegexOptions.Compiled);
public override string Extension => ".jss";
public override string Name => "JACOsub";
public override bool IsTimeBased => true;
public override bool IsMine(List<string> lines, string fileName)
{
// only validate/check file extension if file exists
if (File.Exists(fileName) && !fileName.EndsWith(Extension, StringComparison.OrdinalIgnoreCase))
{
return false;
}
var subtitle = new Subtitle();
LoadSubtitle(subtitle, lines, fileName);
return subtitle.Paragraphs.Count > _errorCount;
}
public override void LoadSubtitle(Subtitle subtitle, List<string> lines, string fileName)
{
// reset members
subtitle.Paragraphs.Clear();
_errorCount = 0;
// must only be used with matched value of RegexTimeCode
char[] timeSplitChar = { ':', '.', ' ' };
int lineCount = lines.Count;
int i = 0;
while (i < lineCount)
{
string line = lines[i].Trim();
Match match = null;
if (line.Length >= 21 && !line.StartsWith('#'))
{
match = RegexTimeCode.Match(line);
}
if (match?.Success == true)
{
var text = line.Substring(match.Value.Length);
while (text.EndsWith('\\') && i < lineCount - 1)
{
i++;
text = text.TrimEnd('\\') + lines[i].Trim();
}
text = DecodeText(text.TrimEnd('\\').Trim());
if (!string.IsNullOrEmpty(text))
{
subtitle.Paragraphs.Add(new Paragraph()
{
StartTime = DecodeTime(match.Groups[1].Value, timeSplitChar),
EndTime = DecodeTime(match.Groups[2].Value, timeSplitChar),
Text = text
});
}
}
else if (line.Length > 0 && !line.StartsWith('#'))
{
_errorCount++;
}
i++;
}
subtitle.Renumber();
}
public override string ToText(Subtitle subtitle, string title)
{
// 0:30:57.22 0:30:59.46 vm {opening credit} A Film By Akira Kurosawa
string writeFormat = "{0} {1} D {2}" + Environment.NewLine;
var sb = new StringBuilder();
foreach (Paragraph p in subtitle.Paragraphs)
{
string startTime = EncodeTime(p.StartTime);
string endTime = EncodeTime(p.EndTime);
string text = p.Text.Replace(Environment.NewLine, "\\n");
text = text.Replace("<i>", "\\I");
text = text.Replace("</i>", "\\i");
text = text.Replace("<b>", "\\B");
text = text.Replace("</b>", "\\b");
text = text.Replace("<u>", "\\U");
text = text.Replace("</u>", "\\u");
text = HtmlUtil.RemoveHtmlTags(text, true);
sb.AppendFormat(writeFormat, startTime, endTime, text);
}
return sb.ToString();
}
private static TimeCode DecodeTime(string timestamp, char[] splitChars)
{
// H:MM:SS.FF H:MM:SS.FF
string[] tokens = timestamp.Split(splitChars, StringSplitOptions.RemoveEmptyEntries);
// parse tokens
int hours = int.Parse(tokens[0]);
int minutes = int.Parse(tokens[1]);
int seconds = int.Parse(tokens[2]);
int milliseconds = FramesToMilliseconds(double.Parse(tokens[3]));
return new TimeCode(hours, minutes, seconds, milliseconds);
}
/// <summary>
/// Returns time code encoded in this format: H:MM:SS.FF H:MM:SS.FF
/// </summary>
private static string EncodeTime(TimeCode tc) => $"{tc.Hours:#0}:{tc.Minutes:00}:{tc.Seconds:00}.{MillisecondsToFrames(tc.Milliseconds):00}";
private static string DecodeText(string input)
{
input = input.Trim();
var sb = new StringBuilder(input.Length);
bool directiveOn = true;
int i = 0;
string endTags = string.Empty;
while (i < input.Length)
{
var ch = input[i];
if (directiveOn)
{
if (ch == ' ')
{
directiveOn = false;
}
}
else if (ch == '\\' && i < input.Length - 1)
{
var next = input[i + 1];
switch (next)
{
case 'n':
{
sb.AppendLine();
i++;
break;
}
case '{':
{
sb.Append('{');
i++;
break;
}
case '~':
{
sb.Append('~');
i++;
break;
}
case '\\':
{
sb.Append('\\');
i++;
break;
}
case 'D':
{
sb.Append(DateTime.Now.ToShortDateString()); // DD MMM YYYY, as in 2 Apr 1996
i++;
break;
}
case 'T':
{
sb.Append(DateTime.Now.ToShortTimeString()); // HH:MM (24-hour time)
i++;
break;
}
case 'N':
{
sb.Append(endTags);
endTags = string.Empty;
i++;
break;
}
case 'I':
{
endTags = endTags + "</i>";
sb.Append("<i>");
i++;
break;
}
case 'i':
{
if (endTags.StartsWith("</i>", StringComparison.Ordinal))
{
endTags = endTags.Remove(0, 4);
}
sb.Append("</i>");
i++;
break;
}
case 'B':
{
endTags = endTags + "</b>";
sb.Append("<b>");
i++;
break;
}
case 'b':
{
if (endTags.StartsWith("</b>", StringComparison.Ordinal))
{
endTags = endTags.Remove(0, 4);
}
sb.Append("</b>");
i++;
break;
}
case 'U':
{
endTags = endTags + "</u>";
sb.Append("<u>");
i++;
break;
}
case 'u':
{
if (endTags.StartsWith("</u>", StringComparison.Ordinal))
{
endTags = endTags.Remove(0, 4);
}
sb.Append("</u>");
i++;
break;
}
}
}
else if (ch == '{') // comment
{
var endComment = input.IndexOf('}', i);
if (endComment < 0)
{
i = input.Length;
}
else
{
i += endComment - i;
}
}
else if (ch == '~') // hard space
{
sb.Append(" ");
}
else
{
sb.Append(ch);
}
i++;
}
return sb + endTags;
}
}
}

View File

@ -1,220 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{
/// <summary>
/// Implementation of JacobSub. Specs: (http://unicorn.us.com/jacosub/jscripts.html)
/// </summary>
public class JacobSub : SubtitleFormat
{
// H:MM:SS.FF H:MM:SS.FF directive {comment} text {comment} more text...
// 0:30:57.22 0:30:59.46 vm {opening credit} A Film By Akira Kurosawa
private static readonly Regex RegexTimeCode = new Regex(@"^(\d:\d\d:\d\d\.\d\d) (\d:\d\d:\d\d\.\d\d)", RegexOptions.Compiled);
/// <summary>
/// Each character code begins with an alphabet character followed by arguments
/// made up of other alphabet characters and numbers
/// </summary>
private static readonly Regex RegexDirectives = new Regex("^[a-z\\d]+(?= )", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture);
public override string Extension => ".jss";
public override string Name => "JacobSub";
public override bool IsTimeBased => true;
public override bool IsMine(List<string> lines, string fileName)
{
// only validate/check file extension if file exists
if (File.Exists(fileName) && !fileName.EndsWith(Extension, StringComparison.OrdinalIgnoreCase))
{
return false;
}
var subtitle = new Subtitle();
LoadSubtitle(subtitle, lines, fileName);
return subtitle.Paragraphs.Count > _errorCount;
}
public override void LoadSubtitle(Subtitle subtitle, List<string> lines, string fileName)
{
// reset members
subtitle.Paragraphs.Clear();
_errorCount = 0;
// must only be used with matched value of RegexTimeCode
char[] timeSplitChar = { ':', '.', ' ' };
int lineCount = lines.Count;
var paragraph = new Paragraph();
for (int i = 0; i < lineCount; i++)
{
string line = lines[i].Trim();
string lineNext = string.Empty;
if (i + 1 < lineCount)
{
lineNext = lines[i + 1].Trim();
}
Match match = null;
if (line.Length >= 21)
{
match = RegexTimeCode.Match(line);
}
if (match?.Success == true)
{
// save previous read paragraph
if (paragraph?.Text.Length > 0)
{
subtitle.Paragraphs.Add(paragraph);
}
int len = match.Value.Length;
paragraph = new Paragraph()
{
StartTime = DecodeTime(match.Groups[1].Value, timeSplitChar),
EndTime = DecodeTime(match.Groups[2].Value, timeSplitChar),
Text = DecodeText(line.Substring(len))
};
}
else
{
if (paragraph.Text.Length == 0)
{
_errorCount++;
}
else
{
paragraph.Text += (Environment.NewLine + DecodeText(line)).TrimEnd();
}
}
// read last line
if (i + 1 == lineCount && !RegexTimeCode.IsMatch(lineNext))
{
paragraph.Text = paragraph.Text.Trim();
subtitle.Paragraphs.Add(paragraph);
}
}
subtitle.Renumber();
}
public override string ToText(Subtitle subtitle, string title)
{
// 0:30:57.22 0:30:59.46 vm {opening credit} A Film By Akira Kurosawa
string writeFormat = "{0} {1} D {2}" + Environment.NewLine;
var sb = new StringBuilder();
foreach (Paragraph p in subtitle.Paragraphs)
{
string startTime = EncodeTime(p.StartTime);
string endTime = EncodeTime(p.EndTime);
string text = HtmlUtil.RemoveHtmlTags(p.Text, true);
sb.AppendFormat(writeFormat, startTime, endTime, text);
}
return sb.ToString();
}
private static TimeCode DecodeTime(string timestamp, char[] splitChars)
{
// H:MM:SS.FF H:MM:SS.FF
string[] tokens = timestamp.Split(splitChars, StringSplitOptions.RemoveEmptyEntries);
// parse tokens
int hours = int.Parse(tokens[0]);
int minutes = int.Parse(tokens[1]);
int seconds = int.Parse(tokens[2]);
int milliseconds = FramesToMilliseconds(double.Parse(tokens[3]));
return new TimeCode(hours, minutes, seconds, milliseconds);
}
/// <summary>
/// Returns time code encoded in this format: H:MM:SS.FF H:MM:SS.FF
/// </summary>
private static string EncodeTime(TimeCode tc) => $"{tc.Hours:#0}:{tc.Minutes:00}:{tc.Seconds:00}.{MillisecondsToFrames(tc.Milliseconds):00}";
private static string DecodeText(string input)
{
input = input.Trim();
if (string.IsNullOrEmpty(input))
{
return string.Empty;
}
// remove all comment
int idx = input.IndexOf('{');
// You don't even need this directive at all, if your text begins with a
// non -alphabetic character (such as a {comment}, number, etc.).
bool startsWithComment = idx == 0;
while (idx >= 0)
{
int endIdx = input.IndexOf('}');
if (endIdx < idx)
{
break;
}
input = input.Remove(idx, endIdx - idx + 1);
idx = input.IndexOf('{', idx);
}
// remove leading chars
input = input.Replace("~", string.Empty);
input = input.FixExtraSpaces();
input = input.TrimStart();
// do not include directives
Match matchDirective = RegexDirectives.Match(input);
if (startsWithComment || !(matchDirective.Success && IsDirective(matchDirective.Value)))
{
return input.Trim();
}
return input.Substring(matchDirective.Value.Length).Trim();
}
/// <summary>
/// A directive determines a subtitle's position, font, style, color, and so forth.
/// Each character code begins with an alphabet character followed by arguments made up
/// of other alphabet characters and numbers.
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
private static bool IsDirective(string input)
{
// default directives
if (input.Equals("d", StringComparison.OrdinalIgnoreCase))
{
return true;
}
// vertival positioning
// horizontal positioning
// fonts
// genlock fader control (amiga only)
// iff graphic files
// special effects
//argument directives
// time track
return false;
}
}
}

View File

@ -81,7 +81,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats
new ImageLogicAutocaption(),
new IssXml(),
new ItunesTimedText(),
new JacobSub(),
new JacoSub(),
new Json(),
new JsonType2(),
new JsonType3(),

View File

@ -957,20 +957,19 @@ and astronauts.“...""
#endregion
#region JacobSub
#region JacoSub
[TestMethod]
public void JacobSubSubtitleTest()
public void JacoSubSubtitleTest()
{
var jacobSub = new JacobSub();
var jacobSub = new JacoSub();
var subtitle = new Subtitle();
const string text = @"1:55:52.16 1:55:53.20 D [Billy] That might have been my fault.
1:55:53.20 1:55:55.13 D That might have been my fault,
I'm so sorry.
";
1:55:53.20 1:55:55.13 D That might have been my fault,\nI'm so sorry.";
// Test text.
jacobSub.LoadSubtitle(subtitle, new List<string>(text.SplitToLines()), null);
Assert.AreEqual("[Billy] That might have been my fault.", subtitle.Paragraphs[0].Text);
Assert.AreEqual("That might have been my fault,\r\nI'm so sorry.", subtitle.Paragraphs[1].Text);
Assert.AreEqual("That might have been my fault," + Environment.NewLine + "I'm so sorry.", subtitle.Paragraphs[1].Text);
// Test time code.
double expectedTotalMilliseconds = new TimeCode(1, 55, 52, SubtitleFormat.FramesToMilliseconds(16)).TotalMilliseconds;
@ -978,8 +977,45 @@ I'm so sorry.
// Test total lines.
Assert.AreEqual(2, subtitle.Paragraphs[1].NumberOfLines);
}
public void JacoSubSubtitleTestItalicAndBold()
{
var jacobSub = new JacoSub();
var subtitle = new Subtitle();
const string text = @"1:55:52.16 1:55:53.20 D \BBillyb That might have been my fault.
1:55:53.20 1:55:55.13 D That might have been my /Ifault/i.
1:55:53.20 1:55:55.13 D That might have been my /Ifault/N.";
jacobSub.LoadSubtitle(subtitle, new List<string>(text.SplitToLines()), null);
Assert.AreEqual("<b>Billy</b> That might have been my fault.", subtitle.Paragraphs[0].Text);
Assert.AreEqual("That might have been my <i>fault</i>.", subtitle.Paragraphs[1].Text);
Assert.AreEqual("That might have been my <i>fault</i>.", subtitle.Paragraphs[2].Text);
}
#endregion
#region LambdaCap
[TestMethod]
public void LambdaCapTestItalic()
{
Configuration.Settings.General.CurrentFrameRate = 25;
var lambdaCap = new LambdaCap();
var subtitle = new Subtitle();
const string text = "Lambda字幕V4 DF0+1 SCENE\"和文標準\"" + @"
1 00000000/00000300 Line 1 with italic word.
2 00000900/00001200 Line 1
Line 2";
lambdaCap.LoadSubtitle(subtitle, new List<string>(text.SplitToLines()), null);
Assert.AreEqual("Line 1 with <i>italic</i> word.", subtitle.Paragraphs[0].Text);
Assert.AreEqual("Line 1" + Environment.NewLine + "Line 2", subtitle.Paragraphs[1].Text);
Assert.AreEqual(3000, subtitle.Paragraphs[0].EndTime.TotalMilliseconds);
Assert.AreEqual(2, subtitle.Paragraphs.Count);
}
#endregion
}
}