2016-02-08 21:11:03 +01:00
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.Text;
|
|
|
|
|
using System.Text.RegularExpressions;
|
|
|
|
|
|
|
|
|
|
namespace Nikse.SubtitleEdit.Core.SubtitleFormats
|
|
|
|
|
{
|
|
|
|
|
public class YouTubeTranscriptOneLine : SubtitleFormat
|
|
|
|
|
{
|
|
|
|
|
private static readonly Regex RegexTimeCodes = new Regex(@"^\d{1,3}:\d\d.+$", RegexOptions.Compiled);
|
|
|
|
|
|
|
|
|
|
public override string Extension
|
|
|
|
|
{
|
|
|
|
|
get { return ".txt"; }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public override string Name
|
|
|
|
|
{
|
|
|
|
|
get { return "YouTube Transcript one line"; }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public override bool IsTimeBased
|
|
|
|
|
{
|
|
|
|
|
get { return true; }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public override bool IsMine(List<string> lines, string fileName)
|
|
|
|
|
{
|
|
|
|
|
var subtitle = new Subtitle();
|
|
|
|
|
|
|
|
|
|
var sb = new StringBuilder();
|
|
|
|
|
foreach (string line in lines)
|
|
|
|
|
sb.AppendLine(line);
|
|
|
|
|
|
|
|
|
|
LoadSubtitle(subtitle, lines, fileName);
|
|
|
|
|
return subtitle.Paragraphs.Count > _errorCount;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public override string ToText(Subtitle subtitle, string title)
|
|
|
|
|
{
|
|
|
|
|
var sb = new StringBuilder();
|
|
|
|
|
const string writeFormat = "{0}{1}";
|
|
|
|
|
foreach (Paragraph p in subtitle.Paragraphs)
|
|
|
|
|
{
|
|
|
|
|
sb.AppendLine(string.Format(writeFormat, EncodeTimeCode(p.StartTime), HtmlUtil.RemoveHtmlTags(p.Text.Replace(Environment.NewLine, " "))));
|
|
|
|
|
}
|
|
|
|
|
return sb.ToString();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static string EncodeTimeCode(TimeCode time)
|
|
|
|
|
{
|
|
|
|
|
return string.Format("{0}:{1:00}", time.Hours * 60 + time.Minutes, time.Seconds);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public override void LoadSubtitle(Subtitle subtitle, List<string> lines, string fileName)
|
|
|
|
|
{
|
|
|
|
|
_errorCount = 0;
|
|
|
|
|
subtitle.Paragraphs.Clear();
|
|
|
|
|
char[] trimChars = { '–', '.', ';', ':' };
|
|
|
|
|
foreach (string line in lines)
|
|
|
|
|
{
|
|
|
|
|
if (RegexTimeCodes.IsMatch(line))
|
|
|
|
|
{
|
|
|
|
|
int splitter = line.IndexOf(':') + 3;
|
|
|
|
|
string text = line.Remove(0, splitter);
|
|
|
|
|
var p = new Paragraph(DecodeTimeCode(line.Substring(0, splitter)), new TimeCode(0, 0, 0, 0), text);
|
|
|
|
|
subtitle.Paragraphs.Add(p);
|
|
|
|
|
text = text.Trim().Trim(trimChars).Trim();
|
|
|
|
|
if (text.Length > 0 && char.IsDigit(text[0]))
|
|
|
|
|
_errorCount++;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
_errorCount += 2;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
foreach (Paragraph p2 in subtitle.Paragraphs)
|
|
|
|
|
{
|
|
|
|
|
p2.Text = Utilities.AutoBreakLine(p2.Text);
|
|
|
|
|
}
|
|
|
|
|
subtitle.RecalculateDisplayTimes(Configuration.Settings.General.SubtitleMaximumDisplayMilliseconds, null);
|
|
|
|
|
subtitle.Renumber();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static TimeCode DecodeTimeCode(string s)
|
|
|
|
|
{
|
|
|
|
|
string[] parts = s.Split(':');
|
|
|
|
|
|
|
|
|
|
string minutes = parts[0];
|
|
|
|
|
string seconds = parts[1];
|
|
|
|
|
|
|
|
|
|
return new TimeCode(0, int.Parse(minutes), int.Parse(seconds), 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|