2016-02-08 21:11:03 +01:00
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.Text;
|
|
|
|
|
using System.Text.RegularExpressions;
|
|
|
|
|
|
|
|
|
|
namespace Nikse.SubtitleEdit.Core.SubtitleFormats
|
|
|
|
|
{
|
|
|
|
|
public class YouTubeTranscript : SubtitleFormat
|
|
|
|
|
{
|
|
|
|
|
private static readonly Regex RegexTimeCodes = new Regex(@"^\d{1,3}:\d\d$", RegexOptions.Compiled);
|
|
|
|
|
|
2017-08-03 12:43:52 +02:00
|
|
|
|
public override string Extension => ".txt";
|
2016-02-08 21:11:03 +01:00
|
|
|
|
|
2017-08-03 12:43:52 +02:00
|
|
|
|
public override string Name => "YouTube Transcript";
|
2016-02-08 21:11:03 +01:00
|
|
|
|
|
|
|
|
|
public override string ToText(Subtitle subtitle, string title)
|
|
|
|
|
{
|
|
|
|
|
var sb = new StringBuilder();
|
|
|
|
|
foreach (Paragraph p in subtitle.Paragraphs)
|
|
|
|
|
{
|
|
|
|
|
sb.AppendLine(string.Format("{0}" + Environment.NewLine + "{1}", EncodeTimeCode(p.StartTime), HtmlUtil.RemoveHtmlTags(p.Text.Replace(Environment.NewLine, " "))));
|
|
|
|
|
}
|
|
|
|
|
return sb.ToString();
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-01 20:48:54 +01:00
|
|
|
|
public override bool IsMine(List<string> lines, string fileName)
|
|
|
|
|
{
|
|
|
|
|
if (new UnknownSubtitle88().IsMine(lines, fileName))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2018-11-01 20:48:54 +01:00
|
|
|
|
return false;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2018-11-01 20:48:54 +01:00
|
|
|
|
|
|
|
|
|
return base.IsMine(lines, fileName);
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-08 21:11:03 +01:00
|
|
|
|
private static string EncodeTimeCode(TimeCode time)
|
|
|
|
|
{
|
2017-08-03 12:43:52 +02:00
|
|
|
|
return $"{time.Hours * 60 + time.Minutes}:{time.Seconds:00}";
|
2016-02-08 21:11:03 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public override void LoadSubtitle(Subtitle subtitle, List<string> lines, string fileName)
|
|
|
|
|
{
|
|
|
|
|
_errorCount = 0;
|
|
|
|
|
Paragraph p = null;
|
|
|
|
|
subtitle.Paragraphs.Clear();
|
|
|
|
|
foreach (string line in lines)
|
|
|
|
|
{
|
2018-02-13 15:25:32 +01:00
|
|
|
|
var s = line.TrimEnd();
|
|
|
|
|
if (RegexTimeCodes.IsMatch(s))
|
2016-02-08 21:11:03 +01:00
|
|
|
|
{
|
2018-02-13 15:25:32 +01:00
|
|
|
|
p = new Paragraph(DecodeTimeCode(s), new TimeCode(), string.Empty);
|
2016-02-08 21:11:03 +01:00
|
|
|
|
subtitle.Paragraphs.Add(p);
|
|
|
|
|
}
|
2018-02-13 15:25:32 +01:00
|
|
|
|
else if (string.IsNullOrWhiteSpace(s))
|
2016-02-08 21:11:03 +01:00
|
|
|
|
{
|
|
|
|
|
// skip these lines
|
|
|
|
|
}
|
|
|
|
|
else if (p != null)
|
|
|
|
|
{
|
|
|
|
|
if (string.IsNullOrEmpty(p.Text))
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2018-02-13 15:25:32 +01:00
|
|
|
|
p.Text = s;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-08 21:11:03 +01:00
|
|
|
|
else
|
2019-01-19 14:40:37 +01:00
|
|
|
|
{
|
2018-02-13 15:25:32 +01:00
|
|
|
|
p.Text = p.Text + Environment.NewLine + s;
|
2019-01-19 14:40:37 +01:00
|
|
|
|
}
|
2016-02-08 21:11:03 +01:00
|
|
|
|
|
|
|
|
|
if (p.Text.Length > 800)
|
|
|
|
|
{
|
|
|
|
|
_errorCount++;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
foreach (Paragraph p2 in subtitle.Paragraphs)
|
|
|
|
|
{
|
|
|
|
|
p2.Text = Utilities.AutoBreakLine(p2.Text);
|
|
|
|
|
}
|
2018-08-12 17:12:53 +02:00
|
|
|
|
subtitle.RecalculateDisplayTimes(Configuration.Settings.General.SubtitleMaximumDisplayMilliseconds, null, Configuration.Settings.General.SubtitleOptimalCharactersPerSeconds);
|
2016-02-08 21:11:03 +01:00
|
|
|
|
subtitle.Renumber();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static TimeCode DecodeTimeCode(string s)
|
|
|
|
|
{
|
|
|
|
|
string[] parts = s.Split(':');
|
|
|
|
|
|
|
|
|
|
var minutes = int.Parse(parts[0]);
|
|
|
|
|
var seconds = int.Parse(parts[1]);
|
|
|
|
|
|
|
|
|
|
return new TimeCode(0, minutes, seconds, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|