diff --git a/libse/LibSE.csproj b/libse/LibSE.csproj index ab9b7fcc3..0c46e9217 100644 --- a/libse/LibSE.csproj +++ b/libse/LibSE.csproj @@ -253,6 +253,7 @@ + diff --git a/libse/SubtitleFormats/SubtitleFormat.cs b/libse/SubtitleFormats/SubtitleFormat.cs index 5f6c127bc..d2618f792 100644 --- a/libse/SubtitleFormats/SubtitleFormat.cs +++ b/libse/SubtitleFormats/SubtitleFormat.cs @@ -271,6 +271,7 @@ namespace Nikse.SubtitleEdit.Core.SubtitleFormats new UnknownSubtitle84(), new UnknownSubtitle85(), new UnknownSubtitle86(), + new UnknownSubtitle87(), }; string path = Configuration.PluginsDirectory; diff --git a/libse/SubtitleFormats/UnknownSubtitle87.cs b/libse/SubtitleFormats/UnknownSubtitle87.cs new file mode 100644 index 000000000..0fa7370ff --- /dev/null +++ b/libse/SubtitleFormats/UnknownSubtitle87.cs @@ -0,0 +1,197 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Text; +using System.Text.RegularExpressions; + +namespace Nikse.SubtitleEdit.Core.SubtitleFormats +{ + public class UnknownSubtitle87 : SubtitleFormat + { + private enum ExpectingLine + { + Number, + TimeCodes, + Text + } + + private Paragraph _paragraph; + private Paragraph _lastParagraph; + private ExpectingLine _expecting = ExpectingLine.Number; + private static readonly Regex RegexTimeCodes = new Regex(@"^-?\d+:-?\d+:-?\d+:-?\d\d\d\d\s[–-]\s-?\d+:-?\d+:-?\d+:-?\d\d\d\d$", RegexOptions.Compiled); + + public override string Extension => ".txt"; + + public override string Name => "Unknown 87"; + + public override bool IsMine(List lines, string fileName) + { + var subtitle = new Subtitle(); + LoadSubtitle(subtitle, lines, fileName); + return subtitle.Paragraphs.Count > _errorCount; + } + + public override string ToText(Subtitle subtitle, string title) + { + const string paragraphWriteFormat = "{0}.\r\n{1} – {2}\r\n{3}\r\n\r\n"; + + var sb = new StringBuilder(); + foreach (Paragraph p in subtitle.Paragraphs) + { + sb.AppendFormat(paragraphWriteFormat, p.Number, EncodeTime(p.StartTime), EncodeTime(p.EndTime), p.Text); + } + return sb.ToString().Trim() + Environment.NewLine; + } + + private string EncodeTime(TimeCode tc) + { + return $"{tc.Hours:00}:{tc.Minutes:00}:{tc.Seconds:00}:{tc.Milliseconds:000}0"; + } + + public override void LoadSubtitle(Subtitle subtitle, List lines, string fileName) + { + _paragraph = new Paragraph(); + _expecting = ExpectingLine.Number; + _errorCount = 0; + + subtitle.Paragraphs.Clear(); + for (int i = 0; i < lines.Count; i++) + { + string line = lines[i].TrimEnd(); + line = line.Trim('\u007F'); // 127=delete acscii + + string next = string.Empty; + if (i + 1 < lines.Count) + next = lines[i + 1]; + + string nextNext = string.Empty; + if (i + 2 < lines.Count) + nextNext = lines[i + 2]; + + // A new line is missing between two paragraphs (buggy srt file) + if (_expecting == ExpectingLine.Text && i + 1 < lines.Count && + _paragraph != null && !string.IsNullOrEmpty(_paragraph.Text) && Utilities.IsInteger(line.TrimEnd('.')) && + RegexTimeCodes.IsMatch(lines[i + 1])) + { + ReadLine(subtitle, string.Empty, string.Empty, string.Empty); + } + if (_expecting == ExpectingLine.Number && RegexTimeCodes.IsMatch(line)) + { + _expecting = ExpectingLine.TimeCodes; + } + ReadLine(subtitle, line, next, nextNext); + } + if (_paragraph != null && _paragraph.EndTime.TotalMilliseconds > _paragraph.StartTime.TotalMilliseconds) + subtitle.Paragraphs.Add(_paragraph); + + subtitle.Renumber(); + } + + private void ReadLine(Subtitle subtitle, string line, string next, string nextNext) + { + switch (_expecting) + { + case ExpectingLine.Number: + if (int.TryParse(line.TrimEnd('.'), out var number)) + { + _paragraph.Number = number; + _expecting = ExpectingLine.TimeCodes; + } + else if (!string.IsNullOrWhiteSpace(line)) + { + if (_lastParagraph != null && nextNext != null && (_lastParagraph.Number + 1).ToString(CultureInfo.InvariantCulture) == nextNext) + { + _lastParagraph.Text = (_lastParagraph.Text + Environment.NewLine + line.Trim()).Trim(); + } + else + { + _errorCount++; + } + } + break; + case ExpectingLine.TimeCodes: + if (TryReadTimeCodesLine(line, _paragraph)) + { + _paragraph.Text = string.Empty; + _expecting = ExpectingLine.Text; + } + else if (!string.IsNullOrWhiteSpace(line)) + { + _errorCount++; + _expecting = ExpectingLine.Number; // lets go to next paragraph + } + break; + case ExpectingLine.Text: + if (!string.IsNullOrWhiteSpace(line) || IsText(next)) + { + if (_paragraph.Text.Length > 0) + _paragraph.Text += Environment.NewLine; + _paragraph.Text += RemoveBadChars(line).TrimEnd().Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine); + } + else if (string.IsNullOrEmpty(line) && string.IsNullOrEmpty(_paragraph.Text)) + { + _paragraph.Text = string.Empty; + if (!string.IsNullOrEmpty(next) && (Utilities.IsInteger(next.TrimEnd('.')) || RegexTimeCodes.IsMatch(next))) + { + subtitle.Paragraphs.Add(_paragraph); + _lastParagraph = _paragraph; + _paragraph = new Paragraph(); + _expecting = ExpectingLine.Number; + } + } + else + { + subtitle.Paragraphs.Add(_paragraph); + _lastParagraph = _paragraph; + _paragraph = new Paragraph(); + _expecting = ExpectingLine.Number; + } + break; + } + } + + private static bool IsText(string text) + { + return !(string.IsNullOrWhiteSpace(text) || Utilities.IsInteger(text.TrimEnd('.')) || RegexTimeCodes.IsMatch(text)); + } + + private static string RemoveBadChars(string line) + { + return line.Replace('\0', ' '); + } + + private bool TryReadTimeCodesLine(string line, Paragraph paragraph) + { + if (RegexTimeCodes.IsMatch(line)) + { + string[] parts = line.Replace("–", ":").Replace("-", ":").RemoveChar(' ').Split(':', ','); + try + { + int startHours = int.Parse(parts[0]); + int startMinutes = int.Parse(parts[1]); + int startSeconds = int.Parse(parts[2]); + int startMilliseconds = int.Parse(parts[3]) / 10; + int endHours = int.Parse(parts[4]); + int endMinutes = int.Parse(parts[5]); + int endSeconds = int.Parse(parts[6]); + int endMilliseconds = int.Parse(parts[7]) / 10; + + paragraph.StartTime = new TimeCode(startHours, startMinutes, startSeconds, startMilliseconds); + if (parts[0].StartsWith('-') && paragraph.StartTime.TotalMilliseconds > 0) + paragraph.StartTime.TotalMilliseconds = paragraph.StartTime.TotalMilliseconds * -1; + + paragraph.EndTime = new TimeCode(endHours, endMinutes, endSeconds, endMilliseconds); + if (parts[4].StartsWith('-') && paragraph.EndTime.TotalMilliseconds > 0) + paragraph.EndTime.TotalMilliseconds = paragraph.EndTime.TotalMilliseconds * -1; + + return true; + } + catch + { + return false; + } + } + return false; + } + } +} \ No newline at end of file