2016-02-08 21:11:03 +01:00
using System ;
using System.Collections.Generic ;
2019-01-15 21:20:23 +01:00
using System.Globalization ;
2016-02-08 21:11:03 +01:00
using System.Text ;
using System.Text.RegularExpressions ;
namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{
/// <summary>
/// http://www.whatwg.org/specs/web-apps/current-work/webvtt.html
/// </summary>
public class WebVTT : SubtitleFormat
{
private static readonly Regex RegexTimeCodes = new Regex ( @"^-?\d+:-?\d+:-?\d+\.-?\d+\s*-->\s*-?\d+:-?\d+:-?\d+\.-?\d+" , RegexOptions . Compiled ) ;
private static readonly Regex RegexTimeCodesMiddle = new Regex ( @"^-?\d+:-?\d+\.-?\d+\s*-->\s*-?\d+:-?\d+:-?\d+\.-?\d+" , RegexOptions . Compiled ) ;
private static readonly Regex RegexTimeCodesShort = new Regex ( @"^-?\d+:-?\d+\.-?\d+\s*-->\s*-?\d+:-?\d+\.-?\d+" , RegexOptions . Compiled ) ;
2017-08-03 12:43:52 +02:00
public override string Extension = > ".vtt" ;
2016-02-08 21:11:03 +01:00
2017-08-03 12:43:52 +02:00
public override string Name = > "WebVTT" ;
2016-02-08 21:11:03 +01:00
public override string ToText ( Subtitle subtitle , string title )
{
2017-04-14 09:26:40 +02:00
const string timeCodeFormatHours = "{0:00}:{1:00}:{2:00}.{3:000}" ; // hh:mm:ss.mmm
2017-06-02 16:40:44 +02:00
const string paragraphWriteFormat = "{0} --> {1}{2}{5}{3}{4}{5}" ;
2016-02-08 21:11:03 +01:00
var sb = new StringBuilder ( ) ;
sb . AppendLine ( "WEBVTT" ) ;
sb . AppendLine ( ) ;
foreach ( Paragraph p in subtitle . Paragraphs )
{
string start = string . Format ( timeCodeFormatHours , p . StartTime . Hours , p . StartTime . Minutes , p . StartTime . Seconds , p . StartTime . Milliseconds ) ;
string end = string . Format ( timeCodeFormatHours , p . EndTime . Hours , p . EndTime . Minutes , p . EndTime . Seconds , p . EndTime . Milliseconds ) ;
2017-04-14 21:30:36 +02:00
string positionInfo = GetPositionInfoFromAssTag ( p ) ;
2016-02-08 21:11:03 +01:00
string style = string . Empty ;
if ( ! string . IsNullOrEmpty ( p . Extra ) & & subtitle . Header = = "WEBVTT" )
2019-01-19 14:40:37 +01:00
{
2016-02-08 21:11:03 +01:00
style = p . Extra ;
2019-01-19 14:40:37 +01:00
}
2017-04-14 09:26:40 +02:00
sb . AppendLine ( string . Format ( paragraphWriteFormat , start , end , positionInfo , FormatText ( p ) , style , Environment . NewLine ) ) ;
2016-02-08 21:11:03 +01:00
}
return sb . ToString ( ) . Trim ( ) ;
}
2017-04-14 21:30:36 +02:00
internal static string GetPositionInfoFromAssTag ( Paragraph p )
{
string positionInfo = string . Empty ;
if ( p . Text . StartsWith ( "{\\a" , StringComparison . Ordinal ) )
{
string position = null ; // horizontal
if ( p . Text . StartsWith ( "{\\an1}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an4}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an7}" , StringComparison . Ordinal ) ) // advanced sub station alpha
{
position = "20%" ; //left
}
else if ( p . Text . StartsWith ( "{\\an3}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an6}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an9}" , StringComparison . Ordinal ) ) // advanced sub station alpha
{
position = "80%" ; //right
}
string line = null ;
if ( p . Text . StartsWith ( "{\\an7}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an8}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an9}" , StringComparison . Ordinal ) ) // advanced sub station alpha
{
line = "20%" ; //top
}
else if ( p . Text . StartsWith ( "{\\an4}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an5}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an6}" , StringComparison . Ordinal ) ) // advanced sub station alpha
{
line = "50%" ; //middle
}
if ( ! string . IsNullOrEmpty ( position ) )
{
positionInfo = " position:" + position ;
}
if ( ! string . IsNullOrEmpty ( line ) )
{
2018-03-06 23:33:24 +01:00
positionInfo + = " line:" + line ;
2017-04-14 21:30:36 +02:00
}
}
return positionInfo ;
}
internal static string FormatText ( Paragraph p )
2016-02-08 21:11:03 +01:00
{
2017-04-14 09:26:40 +02:00
string text = Utilities . RemoveSsaTags ( p . Text ) ;
2016-02-08 21:11:03 +01:00
while ( text . Contains ( Environment . NewLine + Environment . NewLine ) )
2019-01-19 14:40:37 +01:00
{
2016-02-08 21:11:03 +01:00
text = text . Replace ( Environment . NewLine + Environment . NewLine , Environment . NewLine ) ;
2019-01-19 14:40:37 +01:00
}
2017-11-23 21:48:25 +01:00
text = ColorHtmlToWebVtt ( text ) ;
2016-02-08 21:11:03 +01:00
return text ;
}
public override void LoadSubtitle ( Subtitle subtitle , List < string > lines , string fileName )
{
_errorCount = 0 ;
Paragraph p = null ;
2017-04-14 09:26:40 +02:00
string positionInfo = string . Empty ;
2018-03-02 16:56:25 +01:00
bool hadEmptyLine = false ;
int numbers = 0 ;
for ( var index = 0 ; index < lines . Count ; index + + )
2016-02-08 21:11:03 +01:00
{
2018-03-02 16:56:25 +01:00
string line = lines [ index ] ;
string next = string . Empty ;
2019-01-15 21:20:23 +01:00
if ( index < lines . Count - 1 )
2019-01-19 14:40:37 +01:00
{
2018-03-02 16:56:25 +01:00
next = lines [ index + 1 ] ;
2019-01-19 14:40:37 +01:00
}
2018-03-02 16:56:25 +01:00
var s = line ;
2016-02-08 21:11:03 +01:00
bool isTimeCode = line . Contains ( "-->" ) ;
if ( isTimeCode & & RegexTimeCodesMiddle . IsMatch ( s ) )
{
s = "00:" + s ; // start is without hours, end is with hours
}
2018-03-02 16:56:25 +01:00
2016-02-08 21:11:03 +01:00
if ( isTimeCode & & RegexTimeCodesShort . IsMatch ( s ) )
{
s = "00:" + s . Replace ( "--> " , "--> 00:" ) ;
}
2018-03-02 16:56:25 +01:00
if ( isTimeCode & & RegexTimeCodes . IsMatch ( s . TrimStart ( ) ) )
2016-02-08 21:11:03 +01:00
{
if ( p ! = null )
{
2018-03-02 16:56:25 +01:00
p . Text = p . Text . TrimEnd ( ) ;
2016-02-08 21:11:03 +01:00
subtitle . Paragraphs . Add ( p ) ;
}
2018-03-02 16:56:25 +01:00
2016-02-08 21:11:03 +01:00
try
{
2018-03-02 16:56:25 +01:00
var parts = s . TrimStart ( ) . Replace ( "-->" , "@" ) . Split ( new [ ] { '@' } , StringSplitOptions . RemoveEmptyEntries ) ;
p = new Paragraph
{
StartTime = GetTimeCodeFromString ( parts [ 0 ] ) ,
EndTime = GetTimeCodeFromString ( parts [ 1 ] )
} ;
2017-04-14 09:26:40 +02:00
positionInfo = GetPositionInfo ( s ) ;
2016-02-08 21:11:03 +01:00
}
catch ( Exception exception )
{
System . Diagnostics . Debug . WriteLine ( exception . Message ) ;
_errorCount + + ;
p = null ;
}
2018-03-02 16:56:25 +01:00
hadEmptyLine = false ;
2016-02-08 21:11:03 +01:00
}
else if ( subtitle . Paragraphs . Count = = 0 & & line . Trim ( ) = = "WEBVTT" )
{
subtitle . Header = "WEBVTT" ;
}
2019-01-15 21:20:23 +01:00
else if ( p ! = null & & hadEmptyLine & & Utilities . IsInteger ( line ) & &
2018-03-02 16:56:25 +01:00
( RegexTimeCodesMiddle . IsMatch ( next ) | |
RegexTimeCodesShort . IsMatch ( next ) | |
RegexTimeCodes . IsMatch ( next ) ) )
2016-02-08 21:11:03 +01:00
{
2018-03-02 16:56:25 +01:00
numbers + + ;
2016-02-08 21:11:03 +01:00
}
2018-03-02 16:56:25 +01:00
else if ( p ! = null )
2016-02-08 21:11:03 +01:00
{
2018-03-02 16:56:25 +01:00
string text = positionInfo + line . Trim ( ) ;
if ( string . IsNullOrEmpty ( text ) )
2019-01-19 14:40:37 +01:00
{
2018-03-02 16:56:25 +01:00
hadEmptyLine = true ;
2019-01-19 14:40:37 +01:00
}
2018-03-02 16:56:25 +01:00
if ( string . IsNullOrEmpty ( p . Text ) )
2019-01-19 14:40:37 +01:00
{
2018-03-02 16:56:25 +01:00
p . Text = text + Environment . NewLine ;
2019-01-19 14:40:37 +01:00
}
2018-03-02 16:56:25 +01:00
else
2019-01-19 14:40:37 +01:00
{
2018-03-02 16:56:25 +01:00
p . Text + = text + Environment . NewLine ;
2019-01-19 14:40:37 +01:00
}
2018-03-02 16:56:25 +01:00
positionInfo = string . Empty ;
2016-02-08 21:11:03 +01:00
}
}
2018-03-02 16:56:25 +01:00
2016-02-08 21:11:03 +01:00
if ( p ! = null )
2018-03-02 16:56:25 +01:00
{
p . Text = p . Text . TrimEnd ( ) ;
2016-02-08 21:11:03 +01:00
subtitle . Paragraphs . Add ( p ) ;
2018-03-02 16:56:25 +01:00
}
2019-01-15 21:20:23 +01:00
if ( subtitle . Paragraphs . Count > 5 & &
2018-03-02 16:56:25 +01:00
numbers > = subtitle . Paragraphs . Count - 1 & &
lines [ 0 ] = = "WEBVTT FILE" )
{
// let format WebVTTFileWithLineNumber take the subtitle
_errorCount = subtitle . Paragraphs . Count + 1 ;
return ;
}
2017-11-23 21:48:25 +01:00
foreach ( var paragraph in subtitle . Paragraphs )
{
paragraph . Text = ColorWebVttToHtml ( paragraph . Text ) ;
2019-01-15 21:20:23 +01:00
paragraph . Text = System . Net . WebUtility . HtmlDecode ( paragraph . Text ) ;
2017-11-23 21:48:25 +01:00
}
2016-02-08 21:11:03 +01:00
subtitle . Renumber ( ) ;
}
2017-04-14 21:30:36 +02:00
internal static string GetPositionInfo ( string s )
2017-04-14 09:26:40 +02:00
{
//position: x --- 0% = left, 100%=right (horizontal)
//line: x --- 0 or -16 or 0%=top, 16 or -1 or 100% = bottom (vertical)
var pos = GetTag ( s , "position:" ) ;
var line = GetTag ( s , "line:" ) ;
var positionInfo = string . Empty ;
bool hAlignLeft = false ;
bool hAlignRight = false ;
bool vAlignTop = false ;
bool vAlignMiddle = false ;
if ( ! string . IsNullOrEmpty ( pos ) & & pos . EndsWith ( '%' ) )
{
2019-01-15 21:20:23 +01:00
if ( double . TryParse ( pos . TrimEnd ( '%' ) , NumberStyles . AllowDecimalPoint , CultureInfo . InvariantCulture , out var number ) )
2017-04-14 09:26:40 +02:00
{
if ( number < 25 )
{
hAlignLeft = true ;
}
else if ( number > 75 )
{
hAlignRight = true ;
}
}
}
if ( ! string . IsNullOrEmpty ( line ) & & line . EndsWith ( '%' ) )
{
if ( line . EndsWith ( '%' ) )
{
2019-01-15 21:20:23 +01:00
if ( double . TryParse ( line . TrimEnd ( '%' ) , NumberStyles . AllowDecimalPoint , CultureInfo . InvariantCulture , out var number ) )
2017-04-14 09:26:40 +02:00
{
if ( number < 25 )
{
vAlignTop = true ;
}
else if ( number < 75 )
{
vAlignMiddle = true ;
}
}
}
else
{
2019-01-15 21:20:23 +01:00
if ( double . TryParse ( line . TrimEnd ( '%' ) , NumberStyles . AllowDecimalPoint , CultureInfo . InvariantCulture , out var number ) )
2017-04-14 09:26:40 +02:00
{
if ( number < 7 )
{
vAlignTop = true ;
}
else if ( number < 11 )
{
vAlignMiddle = true ;
}
}
}
}
if ( hAlignLeft )
{
if ( vAlignTop )
{
return "{\\an7}" ;
}
if ( vAlignMiddle )
{
return "{\\an4}" ;
}
return "{\\an1}" ;
}
2019-01-15 21:20:23 +01:00
if ( hAlignRight )
2017-04-14 09:26:40 +02:00
{
if ( vAlignTop )
{
return "{\\an9}" ;
}
if ( vAlignMiddle )
{
return "{\\an6}" ;
}
return "{\\an3}" ;
}
2019-01-15 21:20:23 +01:00
if ( vAlignTop )
2017-04-14 09:26:40 +02:00
{
return "{\\an8}" ;
}
2019-01-15 21:20:23 +01:00
if ( vAlignMiddle )
2017-04-14 09:26:40 +02:00
{
return "{\\an5}" ;
}
return positionInfo ;
}
2017-04-14 21:30:36 +02:00
private static string GetTag ( string s , string tag )
2017-04-14 09:26:40 +02:00
{
var pos = s . IndexOf ( tag , StringComparison . Ordinal ) ;
if ( pos > = 0 )
{
var v = s . Substring ( pos + tag . Length ) . Trim ( ) ;
var end = v . IndexOf ( "%," , StringComparison . Ordinal ) ;
if ( end > = 0 )
{
v = v . Remove ( end + 1 ) ;
}
end = v . IndexOf ( ' ' ) ;
if ( end > = 0 )
{
v = v . Remove ( end ) ;
}
return v ;
}
return null ;
}
2016-02-08 21:11:03 +01:00
public override void RemoveNativeFormatting ( Subtitle subtitle , SubtitleFormat newFormat )
{
2019-01-20 14:51:00 +01:00
var regexRemoveCTags = new Regex ( @"\</?c([a-zA-Z\._\d]*)\>" , RegexOptions . Compiled ) ;
var regexRemoveTimeCodes = new Regex ( @"\<\d+:\d+:\d+.\d+\>" , RegexOptions . Compiled ) ; // <00:00:10.049>
var regexTagsPlusWhiteSpace = new Regex ( @"(\{\\an\d\})[\s\r\n]+" , RegexOptions . Compiled ) ; // <00:00:10.049>
2016-02-08 21:11:03 +01:00
foreach ( Paragraph p in subtitle . Paragraphs )
{
if ( p . Text . Contains ( '<' ) )
{
string text = p . Text ;
text = RemoveTag ( "v" , text ) ;
text = RemoveTag ( "rt" , text ) ;
text = RemoveTag ( "ruby" , text ) ;
2017-11-23 21:48:25 +01:00
text = RemoveTag ( "span" , text ) ;
2019-01-08 18:30:44 +01:00
text = regexRemoveCTags . Replace ( text , string . Empty ) . Trim ( ) ;
2019-01-20 14:51:00 +01:00
text = regexRemoveTimeCodes . Replace ( text , string . Empty ) . Trim ( ) ;
text = regexTagsPlusWhiteSpace . Replace ( text , "$1" ) ;
2017-11-23 21:48:25 +01:00
p . Text = text ;
2016-02-08 21:11:03 +01:00
}
}
}
2017-11-23 21:48:25 +01:00
private static readonly Regex RegexWebVttColor = new Regex ( @"<c.[a-z]*>" , RegexOptions . Compiled ) ;
2019-01-08 18:30:44 +01:00
internal static string ColorWebVttToHtml ( string text )
2017-11-23 21:48:25 +01:00
{
var match = RegexWebVttColor . Match ( text ) ;
while ( match . Success )
{
var fontString = "<font color=\"" + match . Value . Substring ( 3 , match . Value . Length - 4 ) + "\">" ;
fontString = fontString . Trim ( '"' ) . Trim ( '\'' ) ;
text = text . Remove ( match . Index , match . Length ) . Insert ( match . Index , fontString ) ;
2019-01-20 14:51:00 +01:00
var endIndex = text . IndexOf ( "</c>" , match . Index , StringComparison . OrdinalIgnoreCase ) ;
if ( endIndex > = 0 )
{
text = text . Remove ( endIndex , 4 ) . Insert ( endIndex , "</font>" ) ;
}
2017-11-23 21:48:25 +01:00
match = RegexWebVttColor . Match ( text ) ;
}
return text ;
}
private static readonly Regex RegexHtmlColor = new Regex ( "<font color=\"[a-z]*\">" , RegexOptions . Compiled ) ;
private static readonly Regex RegexHtmlColor2 = new Regex ( "<font color=[a-z]*>" , RegexOptions . Compiled ) ;
private static string ColorHtmlToWebVtt ( string text )
{
text = text . Replace ( "</font>" , "</c>" ) ;
var match = RegexHtmlColor . Match ( text ) ;
while ( match . Success )
{
var fontString = "<c." + match . Value . Substring ( 13 , match . Value . Length - 15 ) + ">" ;
fontString = fontString . Trim ( '"' ) . Trim ( '\'' ) ;
text = text . Remove ( match . Index , match . Length ) . Insert ( match . Index , fontString ) ;
match = RegexHtmlColor . Match ( text ) ;
}
match = RegexHtmlColor2 . Match ( text ) ;
while ( match . Success )
{
var fontString = "<c." + match . Value . Substring ( 12 , match . Value . Length - 13 ) + ">" ;
fontString = fontString . Trim ( '"' ) . Trim ( '\'' ) ;
text = text . Remove ( match . Index , match . Length ) . Insert ( match . Index , fontString ) ;
match = RegexHtmlColor2 . Match ( text ) ;
}
return text ;
}
2016-02-08 21:11:03 +01:00
public static List < string > GetVoices ( Subtitle subtitle )
{
var list = new List < string > ( ) ;
if ( subtitle ! = null & & subtitle . Paragraphs ! = null )
{
foreach ( Paragraph p in subtitle . Paragraphs )
{
string s = p . Text ;
var startIndex = s . IndexOf ( "<v " , StringComparison . Ordinal ) ;
while ( startIndex > = 0 )
{
int endIndex = s . IndexOf ( '>' , startIndex ) ;
if ( endIndex > startIndex )
{
string voice = s . Substring ( startIndex + 2 , endIndex - startIndex - 2 ) . Trim ( ) ;
if ( ! list . Contains ( voice ) )
2019-01-19 14:40:37 +01:00
{
2016-02-08 21:11:03 +01:00
list . Add ( voice ) ;
2019-01-19 14:40:37 +01:00
}
2016-02-08 21:11:03 +01:00
}
if ( startIndex = = s . Length - 1 )
2019-01-19 14:40:37 +01:00
{
2016-02-08 21:11:03 +01:00
startIndex = - 1 ;
2019-01-19 14:40:37 +01:00
}
2016-02-08 21:11:03 +01:00
else
2019-01-19 14:40:37 +01:00
{
2016-02-08 21:11:03 +01:00
startIndex = s . IndexOf ( "<v " , startIndex + 1 , StringComparison . Ordinal ) ;
2019-01-19 14:40:37 +01:00
}
2016-02-08 21:11:03 +01:00
}
}
}
return list ;
}
public static string RemoveTag ( string tag , string text )
{
int indexOfTag = text . IndexOf ( "<" + tag + " " , StringComparison . Ordinal ) ;
if ( indexOfTag > = 0 )
{
int indexOfEnd = text . IndexOf ( '>' , indexOfTag ) ;
if ( indexOfEnd > 0 )
{
text = text . Remove ( indexOfTag , indexOfEnd - indexOfTag + 1 ) ;
text = text . Replace ( "</" + tag + ">" , string . Empty ) ;
}
}
return text ;
}
2017-11-23 18:56:45 +01:00
internal static TimeCode GetTimeCodeFromString ( string time )
2016-02-08 21:11:03 +01:00
{
// hh:mm:ss.mmm
string [ ] timeCode = time . Trim ( ) . Split ( ':' , '.' , ' ' ) ;
return new TimeCode ( int . Parse ( timeCode [ 0 ] ) ,
int . Parse ( timeCode [ 1 ] ) ,
int . Parse ( timeCode [ 2 ] ) ,
int . Parse ( timeCode [ 3 ] ) ) ;
}
}
}