2016-02-08 21:11:03 +01:00
using System ;
using System.Collections.Generic ;
2019-02-09 18:10:47 +01:00
using System.Drawing ;
2019-01-15 21:20:23 +01:00
using System.Globalization ;
2019-02-09 18:10:47 +01:00
using System.Linq ;
2016-02-08 21:11:03 +01:00
using System.Text ;
using System.Text.RegularExpressions ;
namespace Nikse.SubtitleEdit.Core.SubtitleFormats
{
/// <summary>
/// http://www.whatwg.org/specs/web-apps/current-work/webvtt.html
/// </summary>
public class WebVTT : SubtitleFormat
{
private static readonly Regex RegexTimeCodes = new Regex ( @"^-?\d+:-?\d+:-?\d+\.-?\d+\s*-->\s*-?\d+:-?\d+:-?\d+\.-?\d+" , RegexOptions . Compiled ) ;
private static readonly Regex RegexTimeCodesMiddle = new Regex ( @"^-?\d+:-?\d+\.-?\d+\s*-->\s*-?\d+:-?\d+:-?\d+\.-?\d+" , RegexOptions . Compiled ) ;
private static readonly Regex RegexTimeCodesShort = new Regex ( @"^-?\d+:-?\d+\.-?\d+\s*-->\s*-?\d+:-?\d+\.-?\d+" , RegexOptions . Compiled ) ;
2019-02-10 12:27:19 +01:00
private static readonly Dictionary < string , Color > DefaultColorClasses = new Dictionary < string , Color >
{
{
"white" , Color . FromArgb ( 255 , 255 , 255 )
} ,
{
"lime" , Color . FromArgb ( 0 , 255 , 0 )
} ,
{
2019-11-15 11:00:31 +01:00
"cyan" , Color . FromArgb ( 0 , 255 , 255 )
2019-02-10 12:27:19 +01:00
} ,
{
2019-11-15 11:00:31 +01:00
"red" , Color . FromArgb ( 255 , 0 , 0 )
2019-02-10 12:27:19 +01:00
} ,
{
2019-11-15 11:00:31 +01:00
"yellow" , Color . FromArgb ( 255 , 255 , 0 )
2019-02-10 12:27:19 +01:00
} ,
{
2019-11-15 11:00:31 +01:00
"magenta" , Color . FromArgb ( 255 , 0 , 255 )
2019-02-10 12:27:19 +01:00
} ,
{
2019-11-15 11:00:31 +01:00
"blue" , Color . FromArgb ( 0 , 0 , 255 )
2019-02-10 12:27:19 +01:00
} ,
{
2019-11-15 11:00:31 +01:00
"black" , Color . FromArgb ( 0 , 0 , 0 )
2019-02-10 12:27:19 +01:00
} ,
} ;
2020-01-16 05:14:08 +01:00
private static readonly string [ ] KnownLanguages = new [ ] { "arabic" , "hebrew" , "simplifiedchinese" , "traditionalchinese" , "thai" , "korean" , "Japanese" , "hungarian" , "czech" , "vietnamese" } ;
2019-02-25 06:01:25 +01:00
2017-08-03 12:43:52 +02:00
public override string Extension = > ".vtt" ;
2016-02-08 21:11:03 +01:00
2017-08-03 12:43:52 +02:00
public override string Name = > "WebVTT" ;
2016-02-08 21:11:03 +01:00
public override string ToText ( Subtitle subtitle , string title )
{
2017-04-14 09:26:40 +02:00
const string timeCodeFormatHours = "{0:00}:{1:00}:{2:00}.{3:000}" ; // hh:mm:ss.mmm
2017-06-02 16:40:44 +02:00
const string paragraphWriteFormat = "{0} --> {1}{2}{5}{3}{4}{5}" ;
2016-02-08 21:11:03 +01:00
var sb = new StringBuilder ( ) ;
sb . AppendLine ( "WEBVTT" ) ;
sb . AppendLine ( ) ;
foreach ( Paragraph p in subtitle . Paragraphs )
{
string start = string . Format ( timeCodeFormatHours , p . StartTime . Hours , p . StartTime . Minutes , p . StartTime . Seconds , p . StartTime . Milliseconds ) ;
string end = string . Format ( timeCodeFormatHours , p . EndTime . Hours , p . EndTime . Minutes , p . EndTime . Seconds , p . EndTime . Milliseconds ) ;
2017-04-14 21:30:36 +02:00
string positionInfo = GetPositionInfoFromAssTag ( p ) ;
2016-02-08 21:11:03 +01:00
string style = string . Empty ;
if ( ! string . IsNullOrEmpty ( p . Extra ) & & subtitle . Header = = "WEBVTT" )
2019-01-19 14:40:37 +01:00
{
2016-02-08 21:11:03 +01:00
style = p . Extra ;
2019-01-19 14:40:37 +01:00
}
2017-04-14 09:26:40 +02:00
sb . AppendLine ( string . Format ( paragraphWriteFormat , start , end , positionInfo , FormatText ( p ) , style , Environment . NewLine ) ) ;
2016-02-08 21:11:03 +01:00
}
return sb . ToString ( ) . Trim ( ) ;
}
2017-04-14 21:30:36 +02:00
internal static string GetPositionInfoFromAssTag ( Paragraph p )
{
string positionInfo = string . Empty ;
if ( p . Text . StartsWith ( "{\\a" , StringComparison . Ordinal ) )
{
string position = null ; // horizontal
if ( p . Text . StartsWith ( "{\\an1}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an4}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an7}" , StringComparison . Ordinal ) ) // advanced sub station alpha
{
position = "20%" ; //left
}
else if ( p . Text . StartsWith ( "{\\an3}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an6}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an9}" , StringComparison . Ordinal ) ) // advanced sub station alpha
{
position = "80%" ; //right
}
string line = null ;
if ( p . Text . StartsWith ( "{\\an7}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an8}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an9}" , StringComparison . Ordinal ) ) // advanced sub station alpha
{
line = "20%" ; //top
}
else if ( p . Text . StartsWith ( "{\\an4}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an5}" , StringComparison . Ordinal ) | | p . Text . StartsWith ( "{\\an6}" , StringComparison . Ordinal ) ) // advanced sub station alpha
{
line = "50%" ; //middle
}
if ( ! string . IsNullOrEmpty ( position ) )
{
positionInfo = " position:" + position ;
}
if ( ! string . IsNullOrEmpty ( line ) )
{
2018-03-06 23:33:24 +01:00
positionInfo + = " line:" + line ;
2017-04-14 21:30:36 +02:00
}
}
return positionInfo ;
}
internal static string FormatText ( Paragraph p )
2016-02-08 21:11:03 +01:00
{
2017-04-14 09:26:40 +02:00
string text = Utilities . RemoveSsaTags ( p . Text ) ;
2016-02-08 21:11:03 +01:00
while ( text . Contains ( Environment . NewLine + Environment . NewLine ) )
2019-01-19 14:40:37 +01:00
{
2016-02-08 21:11:03 +01:00
text = text . Replace ( Environment . NewLine + Environment . NewLine , Environment . NewLine ) ;
2019-01-19 14:40:37 +01:00
}
2017-11-23 21:48:25 +01:00
text = ColorHtmlToWebVtt ( text ) ;
2019-08-27 19:43:51 +02:00
text = EscapeEncodeText ( text ) ;
2016-02-08 21:11:03 +01:00
return text ;
}
public override void LoadSubtitle ( Subtitle subtitle , List < string > lines , string fileName )
{
_errorCount = 0 ;
Paragraph p = null ;
2017-04-14 09:26:40 +02:00
string positionInfo = string . Empty ;
2018-03-02 16:56:25 +01:00
bool hadEmptyLine = false ;
int numbers = 0 ;
2019-08-14 15:56:46 +02:00
double addSeconds = 0 ;
2018-03-02 16:56:25 +01:00
for ( var index = 0 ; index < lines . Count ; index + + )
2016-02-08 21:11:03 +01:00
{
2018-03-02 16:56:25 +01:00
string line = lines [ index ] ;
string next = string . Empty ;
2019-01-15 21:20:23 +01:00
if ( index < lines . Count - 1 )
2019-01-19 14:40:37 +01:00
{
2018-03-02 16:56:25 +01:00
next = lines [ index + 1 ] ;
2019-01-19 14:40:37 +01:00
}
2018-03-02 16:56:25 +01:00
var s = line ;
2016-02-08 21:11:03 +01:00
bool isTimeCode = line . Contains ( "-->" ) ;
if ( isTimeCode & & RegexTimeCodesMiddle . IsMatch ( s ) )
{
s = "00:" + s ; // start is without hours, end is with hours
}
2018-03-02 16:56:25 +01:00
2016-02-08 21:11:03 +01:00
if ( isTimeCode & & RegexTimeCodesShort . IsMatch ( s ) )
{
s = "00:" + s . Replace ( "--> " , "--> 00:" ) ;
}
2019-08-14 15:56:46 +02:00
if ( index = = 1 & & s . StartsWith ( "X-TIMESTAMP-MAP=" , StringComparison . OrdinalIgnoreCase ) & &
s . IndexOf ( "MPEGTS:" , StringComparison . OrdinalIgnoreCase ) > 0 )
{
addSeconds = GetXTimeStampSeconds ( s ) ;
}
else if ( isTimeCode & & RegexTimeCodes . IsMatch ( s . TrimStart ( ) ) )
2016-02-08 21:11:03 +01:00
{
if ( p ! = null )
{
2018-03-02 16:56:25 +01:00
p . Text = p . Text . TrimEnd ( ) ;
2016-02-08 21:11:03 +01:00
subtitle . Paragraphs . Add ( p ) ;
}
2018-03-02 16:56:25 +01:00
2016-02-08 21:11:03 +01:00
try
{
2018-03-02 16:56:25 +01:00
var parts = s . TrimStart ( ) . Replace ( "-->" , "@" ) . Split ( new [ ] { '@' } , StringSplitOptions . RemoveEmptyEntries ) ;
p = new Paragraph
{
StartTime = GetTimeCodeFromString ( parts [ 0 ] ) ,
EndTime = GetTimeCodeFromString ( parts [ 1 ] )
} ;
2017-04-14 09:26:40 +02:00
positionInfo = GetPositionInfo ( s ) ;
2016-02-08 21:11:03 +01:00
}
catch ( Exception exception )
{
System . Diagnostics . Debug . WriteLine ( exception . Message ) ;
_errorCount + + ;
p = null ;
}
2018-03-02 16:56:25 +01:00
hadEmptyLine = false ;
2016-02-08 21:11:03 +01:00
}
else if ( subtitle . Paragraphs . Count = = 0 & & line . Trim ( ) = = "WEBVTT" )
{
subtitle . Header = "WEBVTT" ;
}
2019-02-13 18:37:00 +01:00
else if ( p ! = null & & hadEmptyLine & & Utilities . IsInteger ( line . RemoveChar ( '-' ) ) & &
2018-03-02 16:56:25 +01:00
( RegexTimeCodesMiddle . IsMatch ( next ) | |
RegexTimeCodesShort . IsMatch ( next ) | |
RegexTimeCodes . IsMatch ( next ) ) )
2016-02-08 21:11:03 +01:00
{
2018-03-02 16:56:25 +01:00
numbers + + ;
2016-02-08 21:11:03 +01:00
}
2018-03-02 16:56:25 +01:00
else if ( p ! = null )
2016-02-08 21:11:03 +01:00
{
2018-03-02 16:56:25 +01:00
string text = positionInfo + line . Trim ( ) ;
if ( string . IsNullOrEmpty ( text ) )
2019-01-19 14:40:37 +01:00
{
2018-03-02 16:56:25 +01:00
hadEmptyLine = true ;
2019-01-19 14:40:37 +01:00
}
2018-03-02 16:56:25 +01:00
if ( string . IsNullOrEmpty ( p . Text ) )
2019-01-19 14:40:37 +01:00
{
2018-03-02 16:56:25 +01:00
p . Text = text + Environment . NewLine ;
2019-01-19 14:40:37 +01:00
}
2018-03-02 16:56:25 +01:00
else
2019-01-19 14:40:37 +01:00
{
2018-03-02 16:56:25 +01:00
p . Text + = text + Environment . NewLine ;
2019-01-19 14:40:37 +01:00
}
2018-03-02 16:56:25 +01:00
positionInfo = string . Empty ;
2016-02-08 21:11:03 +01:00
}
}
2018-03-02 16:56:25 +01:00
2016-02-08 21:11:03 +01:00
if ( p ! = null )
2018-03-02 16:56:25 +01:00
{
p . Text = p . Text . TrimEnd ( ) ;
2016-02-08 21:11:03 +01:00
subtitle . Paragraphs . Add ( p ) ;
2018-03-02 16:56:25 +01:00
}
2019-01-15 21:20:23 +01:00
if ( subtitle . Paragraphs . Count > 5 & &
2018-03-02 16:56:25 +01:00
numbers > = subtitle . Paragraphs . Count - 1 & &
lines [ 0 ] = = "WEBVTT FILE" )
{
// let format WebVTTFileWithLineNumber take the subtitle
_errorCount = subtitle . Paragraphs . Count + 1 ;
return ;
}
2017-11-23 21:48:25 +01:00
foreach ( var paragraph in subtitle . Paragraphs )
{
paragraph . Text = ColorWebVttToHtml ( paragraph . Text ) ;
2019-08-27 19:43:51 +02:00
paragraph . Text = EscapeDecodeText ( paragraph . Text ) ;
2019-08-31 20:15:47 +02:00
paragraph . StartTime . TotalMilliseconds + = addSeconds * 1000 ;
paragraph . EndTime . TotalMilliseconds + = addSeconds * 1000 ;
2017-11-23 21:48:25 +01:00
}
2016-02-08 21:11:03 +01:00
subtitle . Renumber ( ) ;
}
2019-08-14 15:56:46 +02:00
private static double GetXTimeStampSeconds ( string input )
{
2019-08-31 20:23:48 +02:00
if ( ! Configuration . Settings . SubtitleSettings . WebVttUseXTimestampMap )
{
return 0 ;
}
2019-08-14 15:56:46 +02:00
var s = input . RemoveChar ( ' ' ) ;
var subtractSeconds = 0d ;
var startIndex = s . IndexOf ( "LOCAL:" , StringComparison . OrdinalIgnoreCase ) ;
var localSb = new StringBuilder ( ) ;
for ( int i = startIndex + 6 ; i < s . Length ; i + + )
{
var ch = s [ i ] ;
if ( char . IsNumber ( ch ) | | ch = = ':' | | ch = = '.' )
{
localSb . Append ( ch ) ;
}
else
{
break ;
}
}
2019-08-21 07:08:50 +02:00
var parts = localSb . ToString ( ) . Split ( ':' , '.' ) ;
if ( parts . Length = = 3 )
2019-08-14 15:56:46 +02:00
{
2019-08-21 07:08:50 +02:00
parts = ( "00:" + localSb ) . Split ( ':' , '.' ) ;
2019-08-14 15:56:46 +02:00
}
2019-08-21 07:08:50 +02:00
if ( parts . Length = = 4 )
2019-08-14 15:56:46 +02:00
{
subtractSeconds = DecodeTimeCodeMsFourParts ( parts ) . TotalSeconds ;
}
startIndex = s . IndexOf ( "MPEGTS:" , StringComparison . OrdinalIgnoreCase ) ;
var tsSb = new StringBuilder ( ) ;
for ( int i = startIndex + 7 ; i < s . Length ; i + + )
{
var ch = s [ i ] ;
if ( char . IsNumber ( ch ) )
{
tsSb . Append ( ch ) ;
}
else
{
break ;
}
}
if ( tsSb . Length > 0 )
{
2019-08-21 07:08:50 +02:00
if ( long . TryParse ( tsSb . ToString ( ) , out var number ) )
2019-08-14 15:56:46 +02:00
{
2019-08-21 07:08:50 +02:00
var seconds = ( double ) number / Configuration . Settings . SubtitleSettings . WebVttTimescale - subtractSeconds ;
2019-08-14 15:56:46 +02:00
if ( seconds > 0 & & seconds < 90000 ) // max 25 hours - or wrong timescale
{
return seconds ;
}
}
}
return 0 ;
}
2017-04-14 21:30:36 +02:00
internal static string GetPositionInfo ( string s )
2017-04-14 09:26:40 +02:00
{
2019-09-17 04:11:35 +02:00
//position: x --- 0% = left, 100% = right (horizontal)
//line: x --- 0 or -16 or 0% = top, 16 or -1 or 100% = bottom (vertical)
2017-04-14 09:26:40 +02:00
var pos = GetTag ( s , "position:" ) ;
var line = GetTag ( s , "line:" ) ;
var positionInfo = string . Empty ;
bool hAlignLeft = false ;
bool hAlignRight = false ;
bool vAlignTop = false ;
bool vAlignMiddle = false ;
if ( ! string . IsNullOrEmpty ( pos ) & & pos . EndsWith ( '%' ) )
{
2019-01-15 21:20:23 +01:00
if ( double . TryParse ( pos . TrimEnd ( '%' ) , NumberStyles . AllowDecimalPoint , CultureInfo . InvariantCulture , out var number ) )
2017-04-14 09:26:40 +02:00
{
if ( number < 25 )
{
hAlignLeft = true ;
}
else if ( number > 75 )
{
hAlignRight = true ;
}
}
}
2019-02-08 16:04:53 +01:00
if ( ! string . IsNullOrEmpty ( line ) )
2017-04-14 09:26:40 +02:00
{
2019-02-08 16:04:53 +01:00
line = line . Trim ( ) ;
2017-04-14 09:26:40 +02:00
if ( line . EndsWith ( '%' ) )
{
2019-01-15 21:20:23 +01:00
if ( double . TryParse ( line . TrimEnd ( '%' ) , NumberStyles . AllowDecimalPoint , CultureInfo . InvariantCulture , out var number ) )
2017-04-14 09:26:40 +02:00
{
if ( number < 25 )
{
vAlignTop = true ;
}
else if ( number < 75 )
{
vAlignMiddle = true ;
}
}
}
else
{
2019-02-08 16:04:53 +01:00
if ( double . TryParse ( line , NumberStyles . AllowDecimalPoint , CultureInfo . InvariantCulture , out var number ) )
2017-04-14 09:26:40 +02:00
{
2019-02-08 16:04:53 +01:00
if ( number > = 0 & & number < = 7 )
2017-04-14 09:26:40 +02:00
{
2019-02-08 16:04:53 +01:00
vAlignTop = true ; // Positive numbers indicate top down
2017-04-14 09:26:40 +02:00
}
2019-02-08 16:04:53 +01:00
else if ( number > 7 & & number < 11 )
2017-04-14 09:26:40 +02:00
{
vAlignMiddle = true ;
}
}
}
}
if ( hAlignLeft )
{
if ( vAlignTop )
{
return "{\\an7}" ;
}
if ( vAlignMiddle )
{
return "{\\an4}" ;
}
return "{\\an1}" ;
}
2019-01-15 21:20:23 +01:00
if ( hAlignRight )
2017-04-14 09:26:40 +02:00
{
if ( vAlignTop )
{
return "{\\an9}" ;
}
if ( vAlignMiddle )
{
return "{\\an6}" ;
}
return "{\\an3}" ;
}
2019-01-15 21:20:23 +01:00
if ( vAlignTop )
2017-04-14 09:26:40 +02:00
{
return "{\\an8}" ;
}
2019-01-15 21:20:23 +01:00
if ( vAlignMiddle )
2017-04-14 09:26:40 +02:00
{
return "{\\an5}" ;
}
return positionInfo ;
}
2017-04-14 21:30:36 +02:00
private static string GetTag ( string s , string tag )
2017-04-14 09:26:40 +02:00
{
var pos = s . IndexOf ( tag , StringComparison . Ordinal ) ;
if ( pos > = 0 )
{
var v = s . Substring ( pos + tag . Length ) . Trim ( ) ;
var end = v . IndexOf ( "%," , StringComparison . Ordinal ) ;
if ( end > = 0 )
{
v = v . Remove ( end + 1 ) ;
}
end = v . IndexOf ( ' ' ) ;
if ( end > = 0 )
{
v = v . Remove ( end ) ;
}
return v ;
}
return null ;
}
2016-02-08 21:11:03 +01:00
public override void RemoveNativeFormatting ( Subtitle subtitle , SubtitleFormat newFormat )
{
2019-02-09 18:10:47 +01:00
var regexWebVttColorMulti = new Regex ( @"<c.[a-z0-9_\.]*>" , RegexOptions . Compiled ) ;
2019-01-20 14:51:00 +01:00
var regexRemoveCTags = new Regex ( @"\</?c([a-zA-Z\._\d]*)\>" , RegexOptions . Compiled ) ;
2020-01-28 17:55:14 +01:00
var regexRemoveTimeCodes = new Regex ( @"\<\d+:\d+:\d+\.\d+\>" , RegexOptions . Compiled ) ; // <00:00:10.049>
2019-01-20 14:51:00 +01:00
var regexTagsPlusWhiteSpace = new Regex ( @"(\{\\an\d\})[\s\r\n]+" , RegexOptions . Compiled ) ; // <00:00:10.049>
2019-02-09 18:10:47 +01:00
2016-02-08 21:11:03 +01:00
foreach ( Paragraph p in subtitle . Paragraphs )
{
2019-09-15 08:10:13 +02:00
if ( p . Text . Contains ( '<' ) | | p . Text . Contains ( '&' ) )
2016-02-08 21:11:03 +01:00
{
2019-09-15 08:10:13 +02:00
var text = p . Text . Replace ( "‏" , string . Empty ) . Replace ( "‎" , string . Empty ) ; // or use rlm=\u202B, lrm=\u202A ?
2019-02-25 06:01:25 +01:00
foreach ( var knownLanguage in KnownLanguages )
{
2019-02-25 13:23:05 +01:00
text = text . Replace ( "<c." + knownLanguage + ">" , string . Empty ) . Replace ( "</c." + knownLanguage + ">" , string . Empty ) ;
2019-02-25 06:01:25 +01:00
}
2019-02-23 14:13:44 +01:00
text = System . Net . WebUtility . HtmlDecode ( text ) ;
2019-02-09 18:10:47 +01:00
var match = regexWebVttColorMulti . Match ( text ) ;
while ( match . Success )
{
var tag = match . Value . Substring ( 3 , match . Value . Length - 4 ) ;
tag = FindBestColorTagOrDefault ( tag ) ;
if ( tag = = null )
{
break ;
}
var fontString = "<font color=\"" + tag + "\">" ;
fontString = fontString . Trim ( '"' ) . Trim ( '\'' ) ;
text = text . Remove ( match . Index , match . Length ) . Insert ( match . Index , fontString ) ;
var endIndex = text . IndexOf ( "</c>" , match . Index , StringComparison . OrdinalIgnoreCase ) ;
if ( endIndex > = 0 )
{
text = text . Remove ( endIndex , 4 ) . Insert ( endIndex , "</font>" ) ;
}
match = RegexWebVttColor . Match ( text ) ;
}
2016-02-08 21:11:03 +01:00
text = RemoveTag ( "v" , text ) ;
text = RemoveTag ( "rt" , text ) ;
text = RemoveTag ( "ruby" , text ) ;
2017-11-23 21:48:25 +01:00
text = RemoveTag ( "span" , text ) ;
2019-01-08 18:30:44 +01:00
text = regexRemoveCTags . Replace ( text , string . Empty ) . Trim ( ) ;
2019-01-20 14:51:00 +01:00
text = regexRemoveTimeCodes . Replace ( text , string . Empty ) . Trim ( ) ;
text = regexTagsPlusWhiteSpace . Replace ( text , "$1" ) ;
2017-11-23 21:48:25 +01:00
p . Text = text ;
2016-02-08 21:11:03 +01:00
}
}
}
2019-02-09 18:10:47 +01:00
private string FindBestColorTagOrDefault ( string tag )
2019-02-23 14:13:44 +01:00
{
2019-02-09 18:10:47 +01:00
var tags = tag . Split ( '.' ) . ToList ( ) ;
tags . Reverse ( ) ;
foreach ( var s in tags )
{
var l = s . ToLowerInvariant ( ) ;
2019-02-10 12:27:19 +01:00
if ( DefaultColorClasses . Keys . Contains ( l ) )
2019-02-09 18:10:47 +01:00
{
return l ;
}
if ( l . StartsWith ( "color" ) & & l . Length > 6 & & Utilities . IsHex ( l . Remove ( 0 , 5 ) ) ) // e.g. color008000
{
return "#" + l . Remove ( 0 , 5 ) ;
}
}
return null ;
}
2017-11-23 21:48:25 +01:00
private static readonly Regex RegexWebVttColor = new Regex ( @"<c.[a-z]*>" , RegexOptions . Compiled ) ;
2019-02-09 18:10:47 +01:00
private static readonly Regex RegexWebVttColorHex = new Regex ( @"<c.[a-z]*\d+>" , RegexOptions . Compiled ) ;
2017-11-23 21:48:25 +01:00
2019-01-08 18:30:44 +01:00
internal static string ColorWebVttToHtml ( string text )
2017-11-23 21:48:25 +01:00
{
2019-02-09 18:10:47 +01:00
var res = RunColorRegEx ( text , RegexWebVttColor ) ;
res = RunColorRegEx ( res , RegexWebVttColorHex ) ;
return res ;
}
private static string RunColorRegEx ( string input , Regex regex )
{
var res = input ;
var match = regex . Match ( res ) ;
2017-11-23 21:48:25 +01:00
while ( match . Success )
{
2019-02-23 14:13:44 +01:00
var value = match . Value . Substring ( 3 , match . Value . Length - 4 ) ;
2019-02-09 18:10:47 +01:00
if ( match . Value . StartsWith ( "<c.color" , StringComparison . Ordinal ) )
2019-02-23 14:13:44 +01:00
{
value = "#" + match . Value . Substring ( 3 + 5 , match . Value . Length - 4 - 5 ) ;
}
2019-02-09 18:10:47 +01:00
2019-02-25 06:01:25 +01:00
if ( ! KnownLanguages . Contains ( value ) )
2019-01-20 14:51:00 +01:00
{
2019-02-23 14:13:44 +01:00
var fontString = "<font color=\"" + value + "\">" ;
fontString = fontString . Trim ( '"' ) . Trim ( '\'' ) ;
res = res . Remove ( match . Index , match . Length ) . Insert ( match . Index , fontString ) ;
var endIndex = res . IndexOf ( "</c>" , match . Index , StringComparison . OrdinalIgnoreCase ) ;
if ( endIndex > = 0 )
{
res = res . Remove ( endIndex , 4 ) . Insert ( endIndex , "</font>" ) ;
}
2019-01-20 14:51:00 +01:00
}
2019-02-23 14:13:44 +01:00
match = regex . Match ( res , match . Index + 1 ) ;
2017-11-23 21:48:25 +01:00
}
2019-01-20 15:11:37 +01:00
return res ;
2017-11-23 21:48:25 +01:00
}
private static readonly Regex RegexHtmlColor = new Regex ( "<font color=\"[a-z]*\">" , RegexOptions . Compiled ) ;
private static readonly Regex RegexHtmlColor2 = new Regex ( "<font color=[a-z]*>" , RegexOptions . Compiled ) ;
2019-02-09 18:10:47 +01:00
private static readonly Regex RegexHtmlColor3 = new Regex ( "<font color=\"#[ABCDEFabcdef\\d]*\">" , RegexOptions . Compiled ) ;
2017-11-23 21:48:25 +01:00
private static string ColorHtmlToWebVtt ( string text )
{
2019-01-20 15:11:37 +01:00
var res = text . Replace ( "</font>" , "</c>" ) ;
var match = RegexHtmlColor . Match ( res ) ;
2017-11-23 21:48:25 +01:00
while ( match . Success )
{
var fontString = "<c." + match . Value . Substring ( 13 , match . Value . Length - 15 ) + ">" ;
fontString = fontString . Trim ( '"' ) . Trim ( '\'' ) ;
2019-01-20 15:11:37 +01:00
res = res . Remove ( match . Index , match . Length ) . Insert ( match . Index , fontString ) ;
match = RegexHtmlColor . Match ( res ) ;
2017-11-23 21:48:25 +01:00
}
2019-02-09 18:10:47 +01:00
2019-01-20 15:11:37 +01:00
match = RegexHtmlColor2 . Match ( res ) ;
2017-11-23 21:48:25 +01:00
while ( match . Success )
{
var fontString = "<c." + match . Value . Substring ( 12 , match . Value . Length - 13 ) + ">" ;
fontString = fontString . Trim ( '"' ) . Trim ( '\'' ) ;
2019-01-20 15:11:37 +01:00
res = res . Remove ( match . Index , match . Length ) . Insert ( match . Index , fontString ) ;
match = RegexHtmlColor2 . Match ( res ) ;
2017-11-23 21:48:25 +01:00
}
2019-02-09 18:10:47 +01:00
match = RegexHtmlColor3 . Match ( res ) ;
while ( match . Success )
{
var tag = match . Value . Substring ( 14 , match . Value . Length - 16 ) ;
var fontString = "<c.color" + tag + ">" ;
var closeColor = GetCloseColor ( tag ) ;
if ( closeColor ! = null )
{
fontString = "<c." + closeColor + ">" ;
}
fontString = fontString . Trim ( '"' ) . Trim ( '\'' ) ;
res = res . Remove ( match . Index , match . Length ) . Insert ( match . Index , fontString ) ;
match = RegexHtmlColor3 . Match ( res ) ;
}
2019-01-20 15:11:37 +01:00
return res ;
2017-11-23 21:48:25 +01:00
}
2019-02-09 18:10:47 +01:00
private static string GetCloseColor ( string tag )
{
try
{
var c = ColorTranslator . FromHtml ( "#" + tag . Trim ( '#' ) ) ;
int maxDiff = 25 ;
2019-02-10 12:27:19 +01:00
foreach ( var kvp in DefaultColorClasses )
2019-02-09 18:10:47 +01:00
{
if ( Math . Abs ( kvp . Value . R - c . R ) < = maxDiff & &
Math . Abs ( kvp . Value . G - c . G ) < = maxDiff & &
Math . Abs ( kvp . Value . B - c . B ) < = maxDiff )
{
return kvp . Key ;
}
}
}
catch
{
return null ;
}
return null ;
}
2016-02-08 21:11:03 +01:00
public static List < string > GetVoices ( Subtitle subtitle )
{
var list = new List < string > ( ) ;
2019-01-20 15:11:37 +01:00
if ( subtitle ? . Paragraphs ! = null )
2016-02-08 21:11:03 +01:00
{
foreach ( Paragraph p in subtitle . Paragraphs )
{
string s = p . Text ;
var startIndex = s . IndexOf ( "<v " , StringComparison . Ordinal ) ;
while ( startIndex > = 0 )
{
int endIndex = s . IndexOf ( '>' , startIndex ) ;
if ( endIndex > startIndex )
{
string voice = s . Substring ( startIndex + 2 , endIndex - startIndex - 2 ) . Trim ( ) ;
if ( ! list . Contains ( voice ) )
2019-01-19 14:40:37 +01:00
{
2016-02-08 21:11:03 +01:00
list . Add ( voice ) ;
2019-01-19 14:40:37 +01:00
}
2016-02-08 21:11:03 +01:00
}
if ( startIndex = = s . Length - 1 )
2019-01-19 14:40:37 +01:00
{
2016-02-08 21:11:03 +01:00
startIndex = - 1 ;
2019-01-19 14:40:37 +01:00
}
2016-02-08 21:11:03 +01:00
else
2019-01-19 14:40:37 +01:00
{
2016-02-08 21:11:03 +01:00
startIndex = s . IndexOf ( "<v " , startIndex + 1 , StringComparison . Ordinal ) ;
2019-01-19 14:40:37 +01:00
}
2016-02-08 21:11:03 +01:00
}
}
}
return list ;
}
public static string RemoveTag ( string tag , string text )
{
2019-01-20 15:11:37 +01:00
var res = text ;
int indexOfTag = res . IndexOf ( "<" + tag + " " , StringComparison . Ordinal ) ;
2016-02-08 21:11:03 +01:00
if ( indexOfTag > = 0 )
{
2019-01-20 15:11:37 +01:00
int indexOfEnd = res . IndexOf ( '>' , indexOfTag ) ;
2016-02-08 21:11:03 +01:00
if ( indexOfEnd > 0 )
{
2019-01-20 15:11:37 +01:00
res = res . Remove ( indexOfTag , indexOfEnd - indexOfTag + 1 ) ;
res = res . Replace ( "</" + tag + ">" , string . Empty ) ;
2016-02-08 21:11:03 +01:00
}
}
2019-01-20 15:11:37 +01:00
return res ;
2016-02-08 21:11:03 +01:00
}
2019-08-27 19:43:51 +02:00
internal static string EscapeEncodeText ( string input )
{
if ( ! input . Contains ( '<' ) & & ! input . Contains ( '>' ) & & ! input . Contains ( '&' ) )
{
return input ;
}
var sb = new StringBuilder ( input . Length ) ;
var max = input . Length ;
int i = 0 ;
var tagOn = false ;
while ( i < max )
{
var ch = input [ i ] ;
if ( ch = = '<' )
{
var s = input . Substring ( i ) ;
if ( s . StartsWith ( "<i>" , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( "<b>" , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( "<u>" , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( "<c>" , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( "<v>" , StringComparison . OrdinalIgnoreCase ) )
{
sb . Append ( s . Substring ( 0 , 3 ) ) ;
i + = 3 ;
}
else if ( s . StartsWith ( "</" , StringComparison . OrdinalIgnoreCase ) )
{
sb . Append ( s . Substring ( 0 , 2 ) ) ;
i + = 2 ;
tagOn = true ;
}
else if ( s . StartsWith ( "<ruby" , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( "<font" , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( "<v." , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( "<v " , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( "<c." , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( "<c " , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( "<lang." , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( "<lang " , StringComparison . OrdinalIgnoreCase ) )
{
sb . Append ( ch ) ;
i + + ;
tagOn = true ;
}
else
{
sb . Append ( "<" ) ;
i + + ;
}
}
else if ( ch = = '>' )
{
if ( tagOn )
{
sb . Append ( ch ) ;
i + + ;
tagOn = false ;
}
else
{
sb . Append ( ">" ) ;
i + + ;
}
}
else if ( ch = = '&' )
{
var s = input . Substring ( i ) ;
if ( s . StartsWith ( "‎" , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( "&" , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( "<" , StringComparison . OrdinalIgnoreCase ) | |
s . StartsWith ( ">" , StringComparison . OrdinalIgnoreCase ) | |
s . Length > 3 & & s [ 3 ] = = ';' & & char . IsLetter ( s [ 2 ] ) & & char . IsLetter ( s [ 1 ] ) | |
s . Length > 4 & & s [ 4 ] = = ';' & & char . IsLetter ( s [ 3 ] ) & & char . IsLetter ( s [ 2 ] ) & & char . IsLetter ( s [ 1 ] ) )
{
sb . Append ( ch ) ;
i + + ;
}
else
{
sb . Append ( "&" ) ;
i + + ;
}
}
else
{
sb . Append ( ch ) ;
i + + ;
}
}
return sb . ToString ( ) ;
}
internal static string EscapeDecodeText ( string input )
{
return input
. Replace ( ">" , ">" )
. Replace ( "<" , "<" )
. Replace ( "&" , "&" ) ;
}
2017-11-23 18:56:45 +01:00
internal static TimeCode GetTimeCodeFromString ( string time )
2016-02-08 21:11:03 +01:00
{
// hh:mm:ss.mmm
2019-02-23 14:13:44 +01:00
var timeCode = time . Trim ( ) . Split ( ':' , '.' , ' ' ) ;
2016-02-08 21:11:03 +01:00
return new TimeCode ( int . Parse ( timeCode [ 0 ] ) ,
int . Parse ( timeCode [ 1 ] ) ,
int . Parse ( timeCode [ 2 ] ) ,
int . Parse ( timeCode [ 3 ] ) ) ;
}
}
}