SubtitleEdit/libse/Utilities.cs
2018-09-20 00:05:55 +02:00

2321 lines
97 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using Nikse.SubtitleEdit.Core.ContainerFormats.Matroska;
using Nikse.SubtitleEdit.Core.SubtitleFormats;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Globalization;
using System.IO;
using System.Net;
using System.Reflection;
using System.Security.Authentication;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Xml;
namespace Nikse.SubtitleEdit.Core
{
public static class Utilities
{
public const string WinXP2KUnicodeFontName = "Times New Roman";
/// <summary>
/// Cached environment new line characters for faster lookup.
/// </summary>
public static readonly char[] NewLineChars = { '\r', '\n' };
// TODO: Change to IReadonlyList in .net >= 4.5
public static ICollection<string> VideoFileExtensions { get; } = new List<string>
{ ".avi", ".mkv", ".wmv", ".mpg", ".mpeg", ".divx", ".mp4", ".asf", ".flv",".mov", ".m4v", ".vob", ".ogv", ".webm", ".ts", ".m2ts", ".avs", ".mxf" };
public static string GetVideoFileFilter(bool includeAudioFiles)
{
var sb = new StringBuilder();
sb.Append(Configuration.Settings.Language.General.VideoFiles + "|");
int i = 0;
foreach (string extension in VideoFileExtensions)
{
if (i > 0)
sb.Append(';');
sb.Append('*');
sb.Append(extension);
i++;
}
if (includeAudioFiles)
{
sb.Append('|');
sb.Append(Configuration.Settings.Language.General.AudioFiles);
sb.Append("|*.mp3;*.wav;*.wma;*.ogg;*.mpa;*.m4a;*.ape;*.aiff;*.flac;*.aac;*.ac3;*.mka");
}
sb.Append('|');
sb.Append(Configuration.Settings.Language.General.AllFiles);
sb.Append("|*.*");
return sb.ToString();
}
public static bool IsInteger(string s)
{
int i;
return int.TryParse(s, out i);
}
public static SubtitleFormat GetSubtitleFormatByFriendlyName(string friendlyName)
{
foreach (SubtitleFormat format in SubtitleFormat.AllSubtitleFormats)
{
if (format.FriendlyName == friendlyName || format.Name == friendlyName)
return format;
}
return null;
}
public static string FormatBytesToDisplayFileSize(long fileSize)
{
if (fileSize <= 1024)
return string.Format("{0} bytes", fileSize);
if (fileSize <= 1024 * 1024)
return string.Format("{0} kb", fileSize / 1024);
if (fileSize <= 1024 * 1024 * 1024)
return string.Format("{0:0.0} mb", (float)fileSize / (1024 * 1024));
return string.Format("{0:0.0} gb", (float)fileSize / (1024 * 1024 * 1024));
}
/// <summary>
/// Downloads the requested resource as a <see cref="String"/> using the configured <see cref="WebProxy"/>.
/// </summary>
/// <param name="address">A <see cref="String"/> containing the URI to download.</param>
/// <param name="encoding">Encoding for source text</param>
/// <returns>A <see cref="String"/> containing the requested resource.</returns>
public static string DownloadString(string address, Encoding encoding = null)
{
using (var wc = new WebClient())
{
wc.Proxy = GetProxy();
if (encoding != null)
wc.Encoding = encoding;
return wc.DownloadString(address).Trim();
}
}
public static void SetSecurityProtocol()
{
if (Environment.OSVersion.Version.Major < 6)
return; // don't try TLS 1.2 on WinXP as it does not exist and will crash
// Github requires TLS 1.2
var tls12Protocol = (SslProtocols)0x00000C00; //TODO: Remove this when it's standard in .net framework - 4.5?
ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3 | SecurityProtocolType.Tls | (SecurityProtocolType)tls12Protocol;
}
public static WebProxy GetProxy()
{
if (!string.IsNullOrEmpty(Configuration.Settings.Proxy.ProxyAddress))
{
var proxy = new WebProxy(Configuration.Settings.Proxy.ProxyAddress);
if (!string.IsNullOrEmpty(Configuration.Settings.Proxy.UserName))
{
if (string.IsNullOrEmpty(Configuration.Settings.Proxy.Domain))
proxy.Credentials = new NetworkCredential(Configuration.Settings.Proxy.UserName, Configuration.Settings.Proxy.DecodePassword());
else
proxy.Credentials = new NetworkCredential(Configuration.Settings.Proxy.UserName, Configuration.Settings.Proxy.DecodePassword(), Configuration.Settings.Proxy.Domain);
}
else
proxy.UseDefaultCredentials = true;
return proxy;
}
return null;
}
private static bool IsPartOfNumber(string s, int position)
{
if (string.IsNullOrWhiteSpace(s) || position + 1 >= s.Length)
return false;
if (position > 0 && @",.".Contains(s[position]))
{
return char.IsDigit(s[position - 1]) && char.IsDigit(s[position + 1]);
}
return false;
}
public static bool IsBetweenNumbers(string s, int position)
{
if (string.IsNullOrEmpty(s) || position < 1 || position + 2 > s.Length)
return false;
return char.IsDigit(s[position - 1]) && char.IsDigit(s[position + 1]);
}
public static string AutoBreakLine(string text, string language)
{
return AutoBreakLine(text, Configuration.Settings.General.SubtitleLineMaximumLength, Configuration.Settings.Tools.MergeLinesShorterThan, language);
}
public static string AutoBreakLine(string text)
{
return AutoBreakLine(text, string.Empty); // no language
}
private static bool CanBreak(string s, int index, string language)
{
char nextChar;
if (index >= 0 && index < s.Length)
nextChar = s[index];
else
return false;
if (!"\r\n\t ".Contains(nextChar))
return false;
// Some words we don't like breaking after
string s2 = s.Substring(0, index);
if (Configuration.Settings.Tools.UseNoLineBreakAfter)
{
foreach (NoBreakAfterItem ending in NoBreakAfterList(language))
{
if (ending.IsMatch(s2))
return false;
}
}
else
{
if (s2.EndsWith(" mr.", StringComparison.OrdinalIgnoreCase) ||
s2.EndsWith(" dr.", StringComparison.OrdinalIgnoreCase))
{
return false;
}
}
if (s2.EndsWith("? -", StringComparison.Ordinal) || s2.EndsWith("! -", StringComparison.Ordinal) || s2.EndsWith(". -", StringComparison.Ordinal))
return false;
return true;
}
private static string _lastNoBreakAfterListLanguage;
private static List<NoBreakAfterItem> _lastNoBreakAfterList = new List<NoBreakAfterItem>();
private static IEnumerable<NoBreakAfterItem> NoBreakAfterList(string languageName)
{
if (string.IsNullOrEmpty(languageName))
return new List<NoBreakAfterItem>();
if (languageName == _lastNoBreakAfterListLanguage)
return _lastNoBreakAfterList;
_lastNoBreakAfterList = new List<NoBreakAfterItem>();
//load words via xml
string noBreakAfterFileName = DictionaryFolder + languageName + "_NoBreakAfterList.xml";
var doc = new XmlDocument();
if (File.Exists(noBreakAfterFileName))
{
doc.Load(noBreakAfterFileName);
foreach (XmlNode node in doc.DocumentElement)
{
if (!string.IsNullOrEmpty(node.InnerText))
{
if (node.Attributes?["RegEx"] != null && node.Attributes["RegEx"].InnerText.Equals("true", StringComparison.OrdinalIgnoreCase))
{
var r = new Regex(node.InnerText, RegexOptions.Compiled);
_lastNoBreakAfterList.Add(new NoBreakAfterItem(r, node.InnerText));
}
else
{
_lastNoBreakAfterList.Add(new NoBreakAfterItem(" " + node.InnerText.TrimStart()));
}
}
}
}
_lastNoBreakAfterListLanguage = languageName;
return _lastNoBreakAfterList;
}
public static string AutoBreakLineMoreThanTwoLines(string text, int maximumLineLength, string language)
{
if (text == null || text.Length < 3)
return text;
string s = AutoBreakLine(text, 0, 0, language);
var arr = s.SplitToLines();
if ((arr.Count < 2 && arr[0].Length <= maximumLineLength) || (arr[0].Length <= maximumLineLength && arr[1].Length <= maximumLineLength))
return s;
s = RemoveLineBreaks(s);
var htmlTags = new Dictionary<int, string>();
var sb = new StringBuilder(s.Length);
int six = 0;
while (six < s.Length)
{
var letter = s[six];
var tagFound = letter == '<' && (s.Substring(six).StartsWith("<font", StringComparison.OrdinalIgnoreCase)
|| s.Substring(six).StartsWith("</font", StringComparison.OrdinalIgnoreCase)
|| s.Substring(six).StartsWith("<u", StringComparison.OrdinalIgnoreCase)
|| s.Substring(six).StartsWith("</u", StringComparison.OrdinalIgnoreCase)
|| s.Substring(six).StartsWith("<b", StringComparison.OrdinalIgnoreCase)
|| s.Substring(six).StartsWith("</b", StringComparison.OrdinalIgnoreCase)
|| s.Substring(six).StartsWith("<i", StringComparison.OrdinalIgnoreCase)
|| s.Substring(six).StartsWith("</i", StringComparison.OrdinalIgnoreCase));
int endIndex = -1;
if (tagFound)
endIndex = s.IndexOf('>', six + 1);
if (tagFound && endIndex > 0)
{
string tag = s.Substring(six, endIndex - six + 1);
s = s.Remove(six, tag.Length);
if (htmlTags.ContainsKey(six))
htmlTags[six] = htmlTags[six] + tag;
else
htmlTags.Add(six, tag);
}
else
{
sb.Append(letter);
six++;
}
}
s = sb.ToString();
var words = s.Split(' ');
for (int numberOfLines = 3; numberOfLines < 9999; numberOfLines++)
{
int average = s.Length / numberOfLines + 1;
for (int len = average; len < maximumLineLength; len++)
{
List<int> list = SplitToX(words, numberOfLines, len);
bool allOk = true;
foreach (var lineLength in list)
{
if (lineLength > maximumLineLength)
allOk = false;
}
if (allOk)
{
int index = 0;
foreach (var item in list)
{
index += item;
htmlTags.Add(index, Environment.NewLine);
}
s = ReInsertHtmlTags(s, htmlTags);
s = s.Replace(" " + Environment.NewLine, Environment.NewLine);
s = s.Replace(Environment.NewLine + " ", Environment.NewLine);
s = s.Replace(Environment.NewLine + "</i>", "</i>" + Environment.NewLine);
s = s.Replace(Environment.NewLine + "</b>", "</b>" + Environment.NewLine);
s = s.Replace(Environment.NewLine + "</u>", "</u>" + Environment.NewLine);
s = s.Replace(Environment.NewLine + "</font>", "</font>" + Environment.NewLine);
return s.TrimEnd();
}
}
}
return text;
}
private static List<int> SplitToX(string[] words, int count, int average)
{
var list = new List<int>();
int currentIdx = 0;
int currentCount = 0;
foreach (string word in words)
{
if (currentCount + word.Length + 3 > average && currentIdx < count)
{
list.Add(currentCount);
currentIdx++;
currentCount = 0;
}
currentCount += word.Length + 1;
}
if (currentIdx < count)
list.Add(currentCount);
else
list[list.Count - 1] += currentCount;
return list;
}
public static string AutoBreakLine(string text, int maximumLength, int mergeLinesShorterThan, string language)
{
if (text == null || text.Length < 3)
return text;
// do not autobreak dialogs or music symbol
if (text.Contains(Environment.NewLine) && (text.Contains('-') || text.Contains('♪')))
{
var noTagLines = HtmlUtil.RemoveHtmlTags(text, true).SplitToLines();
if (noTagLines.Count == 2)
{
var arr0 = noTagLines[0].Trim().TrimEnd('"', '\'').TrimEnd();
if (language == "ar")
{
if (arr0.EndsWith('-') && noTagLines[1].TrimStart().EndsWith('-') && arr0.Length > 1 && (".?!)]".Contains(arr0[0]) || arr0.StartsWith("--", StringComparison.Ordinal) || arr0.StartsWith('')))
return text;
}
else
{
if (arr0.StartsWith('-') && noTagLines[1].TrimStart().StartsWith('-') && arr0.Length > 1 && (".?!)]".Contains(arr0[arr0.Length - 1]) || arr0.EndsWith("--", StringComparison.Ordinal) || arr0.EndsWith('')))
return text;
}
if (noTagLines[0].StartsWith('♪') && noTagLines[0].EndsWith('♪') || noTagLines[1].StartsWith('♪') && noTagLines[0].EndsWith('♪'))
return text;
}
}
string s = RemoveLineBreaks(text);
if (HtmlUtil.RemoveHtmlTags(s, true).Length < mergeLinesShorterThan)
{
return s;
}
var htmlTags = new Dictionary<int, string>();
var sb = new StringBuilder();
int six = 0;
while (six < s.Length)
{
var letter = s[six];
bool tagFound = false;
if (letter == '<')
{
string tagString = s.Substring(six);
tagFound = tagString.StartsWith("<font", StringComparison.OrdinalIgnoreCase)
|| tagString.StartsWith("</font", StringComparison.OrdinalIgnoreCase)
|| tagString.StartsWith("<u", StringComparison.OrdinalIgnoreCase)
|| tagString.StartsWith("</u", StringComparison.OrdinalIgnoreCase)
|| tagString.StartsWith("<b", StringComparison.OrdinalIgnoreCase)
|| tagString.StartsWith("</b", StringComparison.OrdinalIgnoreCase)
|| tagString.StartsWith("<i", StringComparison.OrdinalIgnoreCase)
|| tagString.StartsWith("</i", StringComparison.OrdinalIgnoreCase);
}
else if (letter == '{' && s.Substring(six).StartsWith("{\\"))
{
string tagString = s.Substring(six);
var endIndexAssTag = tagString.IndexOf('}') + 1;
if (endIndexAssTag > 0)
{
tagString = tagString.Substring(0, endIndexAssTag);
if (htmlTags.ContainsKey(six))
htmlTags[six] = htmlTags[six] + tagString;
else
htmlTags.Add(six, tagString);
s = s.Remove(six, endIndexAssTag);
continue;
}
}
int endIndex = -1;
if (tagFound)
endIndex = s.IndexOf('>', six + 1);
if (tagFound && endIndex > 0)
{
string tag = s.Substring(six, endIndex - six + 1);
s = s.Remove(six, tag.Length);
if (htmlTags.ContainsKey(six))
htmlTags[six] = htmlTags[six] + tag;
else
htmlTags.Add(six, tag);
}
else
{
sb.Append(letter);
six++;
}
}
s = sb.ToString();
int splitPos = -1;
int mid = s.Length / 2;
// try to find " - " with uppercase letter after (dialog)
if (s.Contains(" - "))
{
for (int j = 0; j <= (maximumLength / 2) + 5; j++)
{
if (mid + j + 4 < s.Length)
{
if (s[mid + j] == '-' && s[mid + j + 1] == ' ' && s[mid + j - 1] == ' ')
{
string rest = s.Substring(mid + j + 1).TrimStart();
if (rest.Length > 0 && char.IsUpper(rest[0]))
{
splitPos = mid + j;
break;
}
}
}
if (mid - (j + 1) > 4)
{
if (s[mid - j] == '-' && s[mid - j + 1] == ' ' && s[mid - j - 1] == ' ')
{
string rest = s.Substring(mid - j + 1).TrimStart();
if (rest.Length > 0 && char.IsUpper(rest[0]))
{
if (mid - j > 5)
{
if ("!?.…".Contains(s[mid - j - 2]))
{
splitPos = mid - j;
break;
}
var first = s.Substring(0, mid - j - 1);
if (first.EndsWith(".\"", StringComparison.Ordinal) || first.EndsWith("!\"", StringComparison.Ordinal) || first.EndsWith("?\"", StringComparison.Ordinal))
{
splitPos = mid - j;
break;
}
}
}
}
}
}
}
if (splitPos == maximumLength + 1 && s[maximumLength] != ' ') // only allow space for last char (as it does not count)
splitPos = -1;
if (splitPos < 0)
{
const string expectedChars1 = ".!?0123456789";
const string expectedChars2 = ".!?";
for (int j = 0; j < 15; j++)
{
if (mid + j + 1 < s.Length && mid + j > 0)
{
if (expectedChars2.Contains(s[mid + j]) && !IsPartOfNumber(s, mid + j) && CanBreak(s, mid + j + 1, language))
{
splitPos = mid + j + 1;
if (expectedChars1.Contains(s[splitPos]))
{ // do not break double/tripple end lines like "!!!" or "..."
splitPos++;
if (expectedChars1.Contains(s[mid + j + 1]))
splitPos++;
}
break;
}
if (expectedChars2.Contains(s[mid - j]) && !IsPartOfNumber(s, mid - j) && CanBreak(s, mid - j, language))
{
splitPos = mid - j;
splitPos++;
break;
}
}
}
}
if (splitPos > maximumLength) // too long first line
{
if (splitPos != maximumLength + 1 || s[maximumLength] != ' ') // allow for maxlength+1 char to be space (does not count)
splitPos = -1;
}
else if (splitPos >= 0 && s.Length - splitPos > maximumLength) // too long second line
{
splitPos = -1;
}
if (splitPos < 0)
{
const string expectedChars1 = ".!?, ";
const string expectedChars2 = " .!?";
const string expectedChars3 = ".!?";
for (int j = 0; j < 25; j++)
{
if (mid + j + 1 < s.Length && mid + j > 0)
{
if (expectedChars1.Contains(s[mid + j]) && !IsPartOfNumber(s, mid + j) && s.Length > mid + j + 2 && CanBreak(s, mid + j, language))
{
splitPos = mid + j;
if (expectedChars2.Contains(s[mid + j + 1]))
{
splitPos++;
if (expectedChars2.Contains(s[mid + j + 2]))
splitPos++;
}
break;
}
if (expectedChars1.Contains(s[mid - j]) && !IsPartOfNumber(s, mid - j) && s.Length > mid + j + 2 && CanBreak(s, mid - j, language))
{
splitPos = mid - j;
if (expectedChars3.Contains(s[splitPos]))
splitPos--;
if (expectedChars3.Contains(s[splitPos]))
splitPos--;
if (expectedChars3.Contains(s[splitPos]))
splitPos--;
break;
}
}
}
}
if (splitPos < 0)
{
splitPos = mid;
s = s.Insert(mid - 1, Environment.NewLine);
s = ReInsertHtmlTags(s, htmlTags);
htmlTags = new Dictionary<int, string>();
s = s.Replace(Environment.NewLine, "-");
}
if (splitPos < s.Length - 2)
s = s.Substring(0, splitPos) + Environment.NewLine + s.Substring(splitPos);
s = ReInsertHtmlTags(s, htmlTags);
var idx = s.IndexOf(Environment.NewLine + "</", StringComparison.Ordinal);
if (idx > 2)
{
var endIdx = s.IndexOf('>', idx + 2);
if (endIdx > idx)
{
var tag = s.Substring(idx + Environment.NewLine.Length, endIdx - (idx + Environment.NewLine.Length) + 1);
s = s.Insert(idx, tag);
s = s.Remove(idx + tag.Length + Environment.NewLine.Length, tag.Length);
}
}
s = s.Replace(" " + Environment.NewLine, Environment.NewLine);
s = s.Replace(Environment.NewLine + " ", Environment.NewLine);
return s.TrimEnd();
}
public static string RemoveLineBreaks(string s)
{
s = HtmlUtil.FixUpperTags(s);
s = s.Replace(Environment.NewLine + "</i>", "</i>" + Environment.NewLine);
s = s.Replace(Environment.NewLine + "</b>", "</b>" + Environment.NewLine);
s = s.Replace(Environment.NewLine + "</u>", "</u>" + Environment.NewLine);
s = s.Replace(Environment.NewLine + "</font>", "</font>" + Environment.NewLine);
s = s.Replace("</i> " + Environment.NewLine + "<i>", " ");
s = s.Replace("</i>" + Environment.NewLine + " <i>", " ");
s = s.Replace("</i>" + Environment.NewLine + "<i>", " ");
s = s.Replace(Environment.NewLine, " ");
s = s.Replace(" </i>", "</i> ");
s = s.Replace(" </b>", "</b> ");
s = s.Replace(" </u>", "</u> ");
s = s.Replace(" </font>", "</font> ");
s = s.FixExtraSpaces();
return s.Trim();
}
private static string ReInsertHtmlTags(string s, Dictionary<int, string> htmlTags)
{
if (htmlTags.Count > 0)
{
var sb = new StringBuilder(s.Length);
int six = 0;
foreach (var letter in s)
{
if (Environment.NewLine.Contains(letter))
{
sb.Append(letter);
}
else
{
if (htmlTags.ContainsKey(six))
{
sb.Append(htmlTags[six]);
}
sb.Append(letter);
six++;
}
}
if (htmlTags.ContainsKey(six))
{
sb.Append(htmlTags[six]);
}
return sb.ToString();
}
return s;
}
public static string UnbreakLine(string text)
{
var lines = text.SplitToLines();
if (lines.Count == 1)
return text;
var singleLine = string.Join(" ", lines);
while (singleLine.Contains(" "))
singleLine = singleLine.Replace(" ", " ");
if (singleLine.Contains("</")) // Fix tag
{
singleLine = singleLine.Replace("</i> <i>", " ");
singleLine = singleLine.Replace("</i><i>", " ");
singleLine = singleLine.Replace("</b> <b>", " ");
singleLine = singleLine.Replace("</b><b>", " ");
singleLine = singleLine.Replace("</u> <u>", " ");
singleLine = singleLine.Replace("</u><u>", " ");
}
return singleLine;
}
public static string RemoveSsaTags(string s)
{
int k = s.IndexOf("{\\", StringComparison.Ordinal);
while (k >= 0)
{
int l = s.IndexOf('}', k + 1);
if (l < k) break;
s = s.Remove(k, l - k + 1);
k = s.IndexOf('{', k);
}
return s;
}
public static string DictionaryFolder => Configuration.DictionariesDirectory;
public static List<string> GetDictionaryLanguages()
{
var list = new List<string>();
if (Directory.Exists(DictionaryFolder))
{
foreach (string dic in Directory.GetFiles(DictionaryFolder, "*.dic"))
{
string name = Path.GetFileNameWithoutExtension(dic);
if (!name.StartsWith("hyph", StringComparison.Ordinal))
{
try
{
var ci = CultureInfo.GetCultureInfo(name.Replace('_', '-'));
name = ci.DisplayName + " [" + name + "]";
}
catch (Exception exception)
{
System.Diagnostics.Debug.WriteLine(exception.Message);
name = "[" + name + "]";
}
list.Add(name);
}
}
}
return list;
}
public static List<string> GetDictionaryLanguagesCultureNeutral()
{
var list = new List<string>();
if (Directory.Exists(DictionaryFolder))
{
foreach (string dic in Directory.GetFiles(DictionaryFolder, "*.dic"))
{
string name = Path.GetFileNameWithoutExtension(dic);
if (!name.StartsWith("hyph", StringComparison.Ordinal))
{
try
{
var ci = CultureInfo.GetCultureInfo(name.Replace('_', '-'));
var displayName = ci.DisplayName;
if (displayName.Contains("("))
{
displayName = displayName.Remove(displayName.IndexOf('(')).TrimEnd();
}
name = displayName + " [" + ci.TwoLetterISOLanguageName + "]";
}
catch (Exception exception)
{
System.Diagnostics.Debug.WriteLine(exception.Message);
name = "[" + name + "]";
}
if (!list.Contains(name))
{
list.Add(name);
}
}
}
}
return list;
}
public static double GetOptimalDisplayMilliseconds(string text)
{
return GetOptimalDisplayMilliseconds(text, Configuration.Settings.General.SubtitleOptimalCharactersPerSeconds);
}
public static double GetOptimalDisplayMilliseconds(string text, double optimalCharactersPerSecond)
{
if (optimalCharactersPerSecond < 2 || optimalCharactersPerSecond > 100)
optimalCharactersPerSecond = 14.7;
double duration = (HtmlUtil.RemoveHtmlTags(text, true).Length / optimalCharactersPerSecond) * TimeCode.BaseUnit;
if (duration < Configuration.Settings.General.SubtitleMinimumDisplayMilliseconds)
duration = Configuration.Settings.General.SubtitleMinimumDisplayMilliseconds;
if (duration > Configuration.Settings.General.SubtitleMaximumDisplayMilliseconds)
duration = Configuration.Settings.General.SubtitleMaximumDisplayMilliseconds;
return duration;
}
public static string ColorToHex(Color c)
{
return string.Format("#{0:x2}{1:x2}{2:x2}", c.R, c.G, c.B);
}
public static int GetMaxLineLength(string text)
{
int maxLength = 0;
text = HtmlUtil.RemoveHtmlTags(text, true);
foreach (string line in text.SplitToLines())
{
if (line.Length > maxLength)
maxLength = line.Length;
}
return maxLength;
}
public static double GetCharactersPerSecond(Paragraph paragraph)
{
var duration = paragraph.Duration;
if (duration.TotalMilliseconds < 1)
return 999;
return paragraph.Text.CountCharacters(Configuration.Settings.General.CharactersPerSecondsIgnoreWhiteSpace) / duration.TotalSeconds;
}
public static bool IsRunningOnMono()
{
return Type.GetType("Mono.Runtime") != null;
}
public static void ShowHelp(string parameter)
{
string helpFile = Configuration.Settings.Language.General.HelpFile;
if (string.IsNullOrEmpty(helpFile))
helpFile = "https://www.nikse.dk/SubtitleEdit/Help";
System.Diagnostics.Process.Start(helpFile + parameter);
}
public static string AssemblyVersion
{
get
{
return Assembly.GetEntryAssembly().GetName().Version.ToString();
}
}
public static string AssemblyDescription
{
get
{
var assembly = Assembly.GetEntryAssembly();
string assemblyName = assembly.GetName().Name;
if (Attribute.IsDefined(assembly, typeof(AssemblyDescriptionAttribute)))
{
Console.WriteLine(assemblyName);
var descriptionAttribute = (AssemblyDescriptionAttribute)Attribute.GetCustomAttribute(assembly, typeof(AssemblyDescriptionAttribute));
if (descriptionAttribute != null)
return descriptionAttribute.Description;
}
return null;
}
}
public static void RemoveFromUserDictionary(string word, string languageName)
{
word = word.Trim();
if (word.Length > 0)
{
string userWordsXmlFileName = DictionaryFolder + languageName + "_user.xml";
var userWords = new XmlDocument();
if (File.Exists(userWordsXmlFileName))
userWords.Load(userWordsXmlFileName);
else
userWords.LoadXml("<words />");
var words = new List<string>();
var nodes = userWords.DocumentElement?.SelectNodes("word");
if (nodes != null)
{
foreach (XmlNode node in nodes)
{
string w = node.InnerText.Trim();
if (w.Length > 0 && w != word)
words.Add(w);
}
}
words.Sort();
if (userWords.DocumentElement != null)
{
userWords.DocumentElement.RemoveAll();
foreach (string w in words)
{
XmlNode node = userWords.CreateElement("word");
node.InnerText = w;
userWords.DocumentElement.AppendChild(node);
}
}
userWords.Save(userWordsXmlFileName);
}
}
public static void AddToUserDictionary(string word, string languageName)
{
word = word.Trim();
if (word.Length > 0)
{
string userWordsXmlFileName = DictionaryFolder + languageName + "_user.xml";
var userWords = new XmlDocument();
if (File.Exists(userWordsXmlFileName))
userWords.Load(userWordsXmlFileName);
else
userWords.LoadXml("<words />");
var words = new List<string>();
if (userWords.DocumentElement != null)
{
var nodes = userWords.DocumentElement.SelectNodes("word");
if (nodes != null)
{
foreach (XmlNode node in nodes)
{
string w = node.InnerText.Trim();
if (w.Length > 0)
words.Add(w);
}
}
if (!words.Contains(word))
words.Add(word);
words.Sort();
userWords.DocumentElement.RemoveAll();
foreach (string w in words)
{
XmlNode node = userWords.CreateElement("word");
node.InnerText = w;
userWords.DocumentElement.AppendChild(node);
}
}
userWords.Save(userWordsXmlFileName);
}
}
public static string LoadUserWordList(List<string> userWordList, string languageName)
{
userWordList.Clear();
var userWordDictionary = new XmlDocument();
string userWordListXmlFileName = DictionaryFolder + languageName + "_user.xml";
if (File.Exists(userWordListXmlFileName))
{
userWordDictionary.Load(userWordListXmlFileName);
foreach (XmlNode node in userWordDictionary.DocumentElement.SelectNodes("word"))
{
string s = node.InnerText.ToLower();
if (!userWordList.Contains(s))
userWordList.Add(s);
}
}
return userWordListXmlFileName;
}
public static string LoadUserWordList(HashSet<string> userWordList, string languageName)
{
userWordList.Clear();
var userWordDictionary = new XmlDocument();
string userWordListXmlFileName = DictionaryFolder + languageName + "_user.xml";
if (File.Exists(userWordListXmlFileName))
{
userWordDictionary.Load(userWordListXmlFileName);
var nodes = userWordDictionary.DocumentElement?.SelectNodes("word");
if (nodes != null)
{
foreach (XmlNode node in nodes)
{
string s = node.InnerText.ToLower();
if (!userWordList.Contains(s))
userWordList.Add(s);
}
}
}
return userWordListXmlFileName;
}
public static readonly string UppercaseLetters = Configuration.Settings.General.UppercaseLetters.ToUpper();
public static readonly string LowercaseLetters = Configuration.Settings.General.UppercaseLetters.ToLower();
public static readonly string LowercaseLettersWithNumbers = LowercaseLetters + "0123456789";
public static readonly string AllLetters = UppercaseLetters + LowercaseLetters;
public static readonly string AllLettersAndNumbers = UppercaseLetters + LowercaseLettersWithNumbers;
public static Color GetColorFromUserName(string userName)
{
if (string.IsNullOrEmpty(userName))
return Color.Pink;
byte[] buffer = Encoding.UTF8.GetBytes(userName);
long number = 0;
foreach (byte b in buffer)
number += b;
switch (number % 20)
{
case 0: return Color.Red;
case 1: return Color.Blue;
case 2: return Color.Green;
case 3: return Color.DarkCyan;
case 4: return Color.DarkGreen;
case 5: return Color.DarkBlue;
case 6: return Color.DarkTurquoise;
case 7: return Color.DarkViolet;
case 8: return Color.DeepPink;
case 9: return Color.DodgerBlue;
case 10: return Color.ForestGreen;
case 11: return Color.Fuchsia;
case 12: return Color.DarkOrange;
case 13: return Color.GreenYellow;
case 14: return Color.IndianRed;
case 15: return Color.Indigo;
case 16: return Color.LawnGreen;
case 17: return Color.LightBlue;
case 18: return Color.DarkGoldenrod;
case 19: return Color.Magenta;
default:
return Color.Black;
}
}
public static int GetNumber0To7FromUserName(string userName)
{
if (string.IsNullOrEmpty(userName))
return 0;
byte[] buffer = Encoding.UTF8.GetBytes(userName);
long number = 0;
foreach (byte b in buffer)
number += b;
return (int)(number % 8);
}
public static string LowercaseVowels
{
get
{
return "aeiouyæøåéóáôèòæøåäöïɤəɛʊʉɨ";
}
}
public static int CountTagInText(string text, string tag)
{
int count = 0;
int index = text.IndexOf(tag, StringComparison.Ordinal);
while (index >= 0)
{
count++;
index = index + tag.Length;
if (index >= text.Length)
return count;
index = text.IndexOf(tag, index, StringComparison.Ordinal);
}
return count;
}
public static int CountTagInText(string text, char tag)
{
int count = 0;
int index = text.IndexOf(tag);
while (index >= 0)
{
count++;
if ((index + 1) == text.Length)
return count;
index = text.IndexOf(tag, index + 1);
}
return count;
}
public static bool StartsAndEndsWithTag(string text, string startTag, string endTag)
{
if (string.IsNullOrWhiteSpace(text))
return false;
if (!text.Contains(startTag) || !text.Contains(endTag))
return false;
while (text.Contains(" "))
text = text.Replace(" ", " ");
var s1 = "- " + startTag;
var s2 = "-" + startTag;
var s3 = "- ..." + startTag;
var s4 = "- " + startTag + "..."; // - <i>...
var e1 = endTag + ".";
var e2 = endTag + "!";
var e3 = endTag + "?";
var e4 = endTag + "...";
var e5 = endTag + "-";
bool isStart = false;
bool isEnd = false;
if (text.StartsWith(startTag, StringComparison.Ordinal) || text.StartsWith(s1, StringComparison.Ordinal) || text.StartsWith(s2, StringComparison.Ordinal) || text.StartsWith(s3, StringComparison.Ordinal) || text.StartsWith(s4, StringComparison.Ordinal))
isStart = true;
if (text.EndsWith(endTag, StringComparison.Ordinal) || text.EndsWith(e1, StringComparison.Ordinal) || text.EndsWith(e2, StringComparison.Ordinal) || text.EndsWith(e3, StringComparison.Ordinal) || text.EndsWith(e4, StringComparison.Ordinal) || text.EndsWith(e5, StringComparison.Ordinal))
isEnd = true;
return isStart && isEnd;
}
public static Paragraph GetOriginalParagraph(int index, Paragraph paragraph, List<Paragraph> originalParagraphs)
{
if (index < 0)
return null;
if (index < originalParagraphs.Count && Math.Abs(originalParagraphs[index].StartTime.TotalMilliseconds - paragraph.StartTime.TotalMilliseconds) < 50)
return originalParagraphs[index];
foreach (Paragraph p in originalParagraphs)
{
if (Math.Abs(p.StartTime.TotalMilliseconds - paragraph.StartTime.TotalMilliseconds) < 0.01)
return p;
}
foreach (Paragraph p in originalParagraphs)
{
if (p.StartTime.TotalMilliseconds > paragraph.StartTime.TotalMilliseconds - 200 &&
p.StartTime.TotalMilliseconds < paragraph.StartTime.TotalMilliseconds + TimeCode.BaseUnit)
return p;
}
return null;
}
/// <summary>
/// UrlEncodes a string without the requirement for System.Web
/// </summary>
public static string UrlEncode(string text)
{
return Uri.EscapeDataString(text);
}
/// <summary>
/// UrlDecodes a string without requiring System.Web
/// </summary>
public static string UrlDecode(string text)
{
// pre-process for + sign space formatting since System.Uri doesn't handle it
// plus literals are encoded as %2b normally so this should be safe
text = text.Replace('+', ' ');
return Uri.UnescapeDataString(text);
}
private static readonly Regex TwoOrMoreDigitsNumber = new Regex(@"\d\d+", RegexOptions.Compiled);
private const string PrePostStringsToReverse = @"-— !?.…""،,():;[]+~*/<>&^%$#\\|";
public static string ReverseStartAndEndingForRightToLeft(string s)
{
var newLines = new StringBuilder();
var pre = new StringBuilder();
var post = new StringBuilder();
var lines = s.SplitToLines();
foreach (var line in lines)
{
string s2 = line;
bool startsWithAssTag = false;
string assTag = string.Empty;
while (s2.StartsWith("{\\", StringComparison.Ordinal) && s2.IndexOf('}') > 0)
{
startsWithAssTag = true;
int end = s2.IndexOf('}') + 1;
assTag += s2.Substring(0, end);
s2 = s2.Remove(0, end);
}
bool startsWithItalic = false;
if (s2.StartsWith("<i>", StringComparison.Ordinal))
{
startsWithItalic = true;
s2 = s2.Remove(0, 3);
}
bool startsWithBold = false;
if (s2.StartsWith("<b>", StringComparison.Ordinal))
{
startsWithBold = true;
s2 = s2.Remove(0, 3);
}
var startFontTag = string.Empty;
if (s2.StartsWith("<font ", StringComparison.Ordinal) && s2.IndexOf('>') > 0)
{
int idx = s2.IndexOf('>');
idx++;
startFontTag = s2.Substring(0, idx);
s2 = s2.Remove(0, idx);
}
var endFontTag = string.Empty;
if (s2.EndsWith("</font>", StringComparison.Ordinal))
{
endFontTag = "</font>";
s2 = s2.Remove(s2.Length - endFontTag.Length);
}
bool endsWithBold = false;
if (s2.EndsWith("</b>", StringComparison.Ordinal))
{
endsWithBold = true;
s2 = s2.Remove(s2.Length - 4, 4);
}
bool endsWithItalic = false;
if (s2.EndsWith("</i>", StringComparison.Ordinal))
{
endsWithItalic = true;
s2 = s2.Remove(s2.Length - 4, 4);
}
pre.Clear();
post.Clear();
int i = 0;
while (i < s2.Length && PrePostStringsToReverse.Contains(s2[i]) && s2[i] != '{')
{
pre.Append(s2[i]);
i++;
}
int j = s2.Length - 1;
while (j > i && PrePostStringsToReverse.Contains(s2[j]) && s2[j] != '}')
{
post.Append(s2[j]);
j--;
}
if (startsWithAssTag)
newLines.Append(assTag);
if (startsWithItalic)
newLines.Append("<i>");
if (startsWithBold)
newLines.Append("<b>");
newLines.Append(startFontTag);
newLines.Append(ReverseParenthesis(post.ToString()));
newLines.Append(s2.Substring(pre.Length, s2.Length - (pre.Length + post.Length)));
newLines.Append(ReverseParenthesis(ReverseString(pre.ToString())));
newLines.Append(endFontTag);
if (endsWithBold)
newLines.Append("</b>");
if (endsWithItalic)
newLines.Append("</i>");
newLines.AppendLine();
}
return newLines.ToString().Trim();
}
public static string ReverseNumbers(string s)
{
return TwoOrMoreDigitsNumber.Replace(s, m => ReverseString(m.Value));
}
internal static string ReverseString(string s)
{
int len = s.Length;
if (len <= 1)
{
return s;
}
var chars = new char[len];
for (int i = 0; i < len; i++)
{
chars[i] = s[len - i - 1];
}
return new string(chars);
}
private static string ReverseParenthesis(string s)
{
if (string.IsNullOrEmpty(s))
{
return s;
}
int len = s.Length;
var chars = new char[len];
for (int i = 0; i < len; i++)
{
char ch = s[i];
switch (ch)
{
case '(':
ch = ')';
break;
case ')':
ch = '(';
break;
case '[':
ch = ']';
break;
case ']':
ch = '[';
break;
}
chars[i] = ch;
}
return new string(chars);
}
public static string FixEnglishTextInRightToLeftLanguage(string text, string reverseChars)
{
var sb = new StringBuilder();
var lines = text.SplitToLines();
foreach (string line in lines)
{
string s = ReverseParenthesis(line.Trim());
bool numbersOn = false;
string numbers = string.Empty;
for (int i = 0; i < s.Length; i++)
{
if (numbersOn && reverseChars.Contains(s[i]))
{
numbers = s[i] + numbers;
}
else if (numbersOn)
{
numbersOn = false;
s = s.Remove(i - numbers.Length, numbers.Length).Insert(i - numbers.Length, numbers);
numbers = string.Empty;
}
else if (reverseChars.Contains(s[i]))
{
numbers = s[i] + numbers;
numbersOn = true;
}
}
if (numbersOn)
{
int i = s.Length;
s = s.Remove(i - numbers.Length, numbers.Length).Insert(i - numbers.Length, numbers);
}
sb.AppendLine(s);
}
return sb.ToString().Trim();
}
public static string ToSuperscript(string text)
{
var sb = new StringBuilder();
var superscript = new List<char>{
'⁰',
'¹',
'²',
'³',
'⁴',
'⁵',
'⁶',
'⁷',
'⁸',
'⁹',
'⁺',
'⁻',
'⁼',
'⁽',
'⁾',
'ᵃ',
'ᵇ',
'ᶜ',
'ᵈ',
'ᵉ',
'ᶠ',
'ᵍ',
'ʰ',
'ⁱ',
'ʲ',
'ᵏ',
'ˡ',
'ᵐ',
'ⁿ',
'ᵒ',
'ᵖ',
'ʳ',
'ˢ',
'ᵗ',
'ᵘ',
'ᵛ',
'ʷ',
'ˣ',
'ʸ',
'ᶻ',
'ᴬ',
'ᴮ',
'ᴰ',
'ᴱ',
'ᴳ',
'ᴴ',
'ᴵ',
'ᴶ',
'ᴷ',
'ᴸ',
'ᴹ',
'ᴺ',
'ᴼ',
'ᴾ',
'ᴿ',
'ᵀ',
'ᵁ',
'ᵂ'
};
var normal = new List<char>{
'0', // "⁰"
'1', // "¹"
'2', // "²"
'3', // "³"
'4', // "⁴"
'5', // "⁵"
'6', // "⁶"
'7', // "⁷"
'8', // "⁸"
'9', // "⁹"
'+', // "⁺"
'-', // "⁻"
'=', // "⁼"
'(', // "⁽"
')', // "⁾"
'a', // "ᵃ"
'b', // "ᵇ"
'c', // "ᶜ"
'd', // "ᵈ"
'e', // "ᵉ"
'f', // "ᶠ"
'g', // "ᵍ"
'h', // "ʰ"
'i', // "ⁱ"
'j', // "ʲ"
'k', // "ᵏ"
'l', // "ˡ"
'm', // "ᵐ"
'n', // "ⁿ"
'o', // "ᵒ"
'p', // "ᵖ"
'r', // "ʳ"
's', // "ˢ"
't', // "ᵗ"
'u', // "ᵘ"
'v', // "ᵛ"
'w', // "ʷ"
'x', // "ˣ"
'y', // "ʸ"
'z', // "ᶻ"
'A', // "ᴬ"
'B', // "ᴮ"
'D', // "ᴰ"
'E', // "ᴱ"
'G', // "ᴳ"
'H', // "ᴴ"
'I', // "ᴵ"
'J', // "ᴶ"
'K', // "ᴷ"
'L', // "ᴸ"
'M', // "ᴹ"
'N', // "ᴺ"
'O', // "ᴼ"
'P', // "ᴾ"
'R', // "ᴿ"
'T', // "ᵀ"
'U', // "ᵁ"
'W', // "ᵂ"
};
for (int i = 0; i < text.Length; i++)
{
char s = text[i];
int index = normal.IndexOf(s);
if (index >= 0)
sb.Append(superscript[index]);
else
sb.Append(s);
}
return sb.ToString();
}
public static string ToSubscript(string text)
{
var sb = new StringBuilder();
var subcript = new List<char>{
'₀',
'₁',
'₂',
'₃',
'₄',
'₅',
'₆',
'₇',
'₈',
'₉',
'₊',
'₋',
'₌',
'₍',
'₎',
'ₐ',
'ₑ',
'ᵢ',
'ₒ',
'ᵣ',
'ᵤ',
'ᵥ',
'ₓ',
};
var normal = new List<char>
{
'0', // "₀"
'1', // "₁"
'2', // "₂"
'3', // "₃"
'4', // "₄"
'5', // "₅"
'6', // "₆"
'7', // "₇"
'8', // "₈"
'9', // "₉"
'+', // "₊"
'-', // "₋"
'=', // "₌"
'(', // "₍"
')', // "₎"
'a', // "ₐ"
'e', // "ₑ"
'i', // "ᵢ"
'o', // "ₒ"
'r', // "ᵣ"
'u', // "ᵤ"
'v', // "ᵥ"
'x', // "ₓ"
};
for (int i = 0; i < text.Length; i++)
{
char s = text[i];
int index = normal.IndexOf(s);
if (index >= 0)
sb.Append(subcript[index]);
else
sb.Append(s);
}
return sb.ToString();
}
public static string FixQuotes(string text)
{
if (string.IsNullOrEmpty(text))
return text;
if (text.StartsWith('"') && text.Length > 1)
text = text.Substring(1);
if (text.EndsWith('"') && text.Length >= 1)
text = text.Substring(0, text.Length - 1);
return text.Replace("\"\"", "\"");
}
public static Color GetColorFromFontString(string text, Color defaultColor)
{
string s = text.TrimEnd();
int start = s.IndexOf("<font ", StringComparison.OrdinalIgnoreCase);
if (start >= 0 && s.EndsWith("</font>", StringComparison.OrdinalIgnoreCase))
{
int end = s.IndexOf('>', start);
if (end > 0)
{
string f = s.Substring(start, end - start);
if (f.Contains(" color="))
{
int colorStart = f.IndexOf(" color=", StringComparison.OrdinalIgnoreCase);
if (s.IndexOf('"', colorStart + " color=".Length + 1) > 0)
end = s.IndexOf('"', colorStart + " color=".Length + 1);
s = s.Substring(colorStart, end - colorStart);
s = s.Replace(" color=", string.Empty);
s = s.Trim('\'').Trim('"').Trim('\'');
try
{
if (s.StartsWith("rgb(", StringComparison.Ordinal))
{
var arr = s.Remove(0, 4).TrimEnd(')').Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
return Color.FromArgb(int.Parse(arr[0]), int.Parse(arr[1]), int.Parse(arr[2]));
}
return ColorTranslator.FromHtml(s);
}
catch
{
return defaultColor;
}
}
}
}
return defaultColor;
}
public static string[] SplitForChangedCalc(string s, bool ignoreLineBreaks, bool ignoreFormatting, bool breakToLetters)
{
const string endChars = "!?.…:;,#%$£";
var list = new List<string>();
if (ignoreFormatting)
s = HtmlUtil.RemoveHtmlTags(s, true);
if (breakToLetters)
{
foreach (char ch in s)
list.Add(ch.ToString());
}
else
{
var word = new StringBuilder();
int i = 0;
while (i < s.Length)
{
if (s.Substring(i).StartsWith(Environment.NewLine, StringComparison.Ordinal))
{
if (word.Length > 0)
list.Add(word.ToString());
word.Clear();
if (!ignoreLineBreaks)
list.Add(Environment.NewLine);
i += Environment.NewLine.Length;
}
else if (s[i] == ' ')
{
if (word.Length > 0)
list.Add(word.ToString());
word.Clear();
i++;
}
else if (endChars.Contains(s[i]) && (word.Length == 0 || endChars.Contains(word[0])))
{
word.Append(s[i]);
i++;
}
else if (endChars.Contains(s[i]))
{
if (word.Length > 0)
list.Add(word.ToString());
word.Clear();
word.Append(s[i]);
i++;
}
else
{
word.Append(s[i]);
i++;
}
}
if (word.Length > 0)
list.Add(word.ToString());
}
return list.ToArray();
}
public static void GetTotalAndChangedWords(string s1, string s2, ref int total, ref int change, bool ignoreLineBreaks, bool ignoreFormatting, bool breakToLetters)
{
var parts1 = SplitForChangedCalc(s1, ignoreLineBreaks, ignoreFormatting, breakToLetters);
var parts2 = SplitForChangedCalc(s2, ignoreLineBreaks, ignoreFormatting, breakToLetters);
total += Math.Max(parts1.Length, parts2.Length);
change += GetChangesAdvanced(parts1, parts2);
}
private static int GetChangesAdvanced(string[] parts1, string[] parts2)
{
int i1 = 0;
int i2 = 0;
int i = 0;
int c = 0;
var max = Math.Max(parts1.Length, parts2.Length);
while (i < max && i1 < parts1.Length && i2 < parts2.Length)
{
if (parts1[i1] == parts2[i2])
{
i1++;
i2++;
}
else
{
int i1Next = FindNext(parts2[i2], parts1, i1);
int i2Next = FindNext(parts1[i1], parts2, i2);
if (i1Next < i2Next)
{
c += i1Next - i1;
i1 = i1Next + 1;
i2++;
}
else if (i2Next < i1Next)
{
c += i2Next - i2;
i1++;
i2 = i2Next + 1;
}
else
{
i1++;
i2++;
c++;
}
}
i++;
}
if (i1 == parts1.Length && i2 == parts2.Length)
return c;
return c + Math.Abs(parts1.Length - parts2.Length);
}
private static int FindNext(string s, string[] parts, int startIndex)
{
for (; startIndex < parts.Length; startIndex++)
{
if (s == parts[startIndex])
return startIndex;
}
return int.MaxValue;
}
public static string RemoveNonNumbers(string p)
{
if (string.IsNullOrEmpty(p))
return p;
var sb = new StringBuilder();
foreach (var c in p)
{
if (char.IsDigit(c))
sb.Append(c);
}
return sb.ToString();
}
private static readonly Regex RemoveSpaceBetweenNumbersRegex = new Regex(@"(?<=\b\d+) \d(?!/\d)", RegexOptions.Compiled);
public static string RemoveSpaceBetweenNumbers(string text)
{
if (!string.IsNullOrEmpty(text))
{
var match = RemoveSpaceBetweenNumbersRegex.Match(text);
while (match.Success)
{
text = text.Remove(match.Index, 1);
match = RemoveSpaceBetweenNumbersRegex.Match(text, match.Index);
}
}
return text;
}
/// <summary>
/// Remove unneeded spaces
/// </summary>
/// <param name="text">text string to remove unneeded spaces from</param>
/// <param name="language">two letter language id string</param>
/// <returns>text with unneeded spaces removed</returns>
public static string RemoveUnneededSpaces(string text, string language)
{
const char zeroWidthSpace = '\u200B';
const char zeroWidthNoBreakSpace = '\uFEFF';
const char noBreakSpace = '\u00A0';
const char operatingSystemCommand = '\u009D';
text = text.Trim();
int len = text.Length;
int count = 0;
char[] textChars = new char[len];
for (int i = 0; i < len; i++)
{
char ch = text[i];
switch (ch)
{
// Ignore: \u200B, \uFEFF and \u009D.
case zeroWidthSpace:
case zeroWidthNoBreakSpace:
case operatingSystemCommand:
break;
// Replace: \t or \u00A0 with white-space.
case '\t':
case noBreakSpace:
textChars[count++] = ' ';
break;
default:
textChars[count++] = ch;
break;
}
}
// Construct new string from textChars.
text = new string(textChars, 0, count);
text = text.FixExtraSpaces();
if (text.EndsWith(' '))
text = text.Substring(0, text.Length - 1);
const string ellipses = "...";
text = text.Replace(". . ..", ellipses);
text = text.Replace(". ...", ellipses);
text = text.Replace(". .. .", ellipses);
text = text.Replace(". . .", ellipses);
text = text.Replace(". ..", ellipses);
text = text.Replace(".. .", ellipses);
// Fix recursive: ...
while (text.Contains("...."))
text = text.Replace("....", ellipses);
text = text.Replace(" ..." + Environment.NewLine, "..." + Environment.NewLine);
text = text.Replace(Environment.NewLine + "... ", Environment.NewLine + "...");
text = text.Replace(Environment.NewLine + "<i>... ", Environment.NewLine + "<i>...");
text = text.Replace(Environment.NewLine + "- ... ", Environment.NewLine + "- ...");
text = text.Replace(Environment.NewLine + "<i>- ... ", Environment.NewLine + "<i>- ...");
text = text.Replace(Environment.NewLine + "- ... ", Environment.NewLine + "- ...");
if (text.StartsWith("... ", StringComparison.Ordinal))
text = text.Remove(3, 1);
if (text.EndsWith(" ...", StringComparison.Ordinal))
text = text.Remove(text.Length - 4, 1);
if (text.EndsWith(" ...</i>", StringComparison.Ordinal))
text = text.Remove(text.Length - 8, 1);
if (text.StartsWith("- ... ", StringComparison.Ordinal))
text = text.Remove(5, 1);
if (text.StartsWith("<i>... ", StringComparison.Ordinal))
text = text.Remove(6, 1);
if (language != "fr") // special rules for French
{
text = text.Replace("... ?", "...?");
text = text.Replace("... !", "...!");
text = text.Replace(" :", ":");
text = text.Replace(" :", ":");
}
if (!text.Contains("- ..."))
text = text.Replace(" ... ", "... ");
while (text.Contains(" ,"))
text = text.Replace(" ,", ",");
if (text.EndsWith(" .", StringComparison.Ordinal))
text = text.Remove(text.Length - 2, 1);
if (text.EndsWith(" \"", StringComparison.Ordinal))
text = text.Remove(text.Length - 2, 1);
if (text.Contains(" \"" + Environment.NewLine))
text = text.Replace(" \"" + Environment.NewLine, "\"" + Environment.NewLine);
if (text.Contains(" ." + Environment.NewLine))
text = text.Replace(" ." + Environment.NewLine, "." + Environment.NewLine);
if (language != "fr") // special rules for French
{
if (text.Contains(" !"))
text = text.Replace(" !", "!");
if (text.Contains(" ?"))
text = text.Replace(" ?", "?");
}
if (text.Contains(" . "))
{
var regex = new Regex(@"[a-z] \. [A-Z]");
var match = regex.Match(text);
while (match.Success)
{
text = text.Remove(match.Index + 1, 1);
match = regex.Match(text);
}
}
while (text.Contains("¿ "))
text = text.Replace("¿ ", "¿");
while (text.Contains("¡ "))
text = text.Replace("¡ ", "¡");
// Italic
if (text.Contains("<i>", StringComparison.OrdinalIgnoreCase) && text.Contains("</i>", StringComparison.OrdinalIgnoreCase))
text = RemoveSpaceBeforeAfterTag(text, "<i>");
// Bold
if (text.Contains("<b>", StringComparison.OrdinalIgnoreCase) && text.Contains("</b>", StringComparison.OrdinalIgnoreCase))
text = RemoveSpaceBeforeAfterTag(text, "<b>");
// Underline
if (text.Contains("<u>", StringComparison.OrdinalIgnoreCase) && text.Contains("</u>", StringComparison.OrdinalIgnoreCase))
text = RemoveSpaceBeforeAfterTag(text, "<u>");
// Font
if (text.Contains("<font ", StringComparison.OrdinalIgnoreCase))
{
var idx = text.IndexOf("<font ", StringComparison.OrdinalIgnoreCase);
var endIdx = text.IndexOf('>', idx + 6);
if (endIdx > idx && endIdx < text.Length - 8)
{
var color = text.Substring(idx, (endIdx - idx) + 1).ToLower();
text = RemoveSpaceBeforeAfterTag(text, color);
}
}
text = text.Trim();
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
if (text.Contains("- ") && text.Length > 5)
{
int idx = text.IndexOf("- ", 2, StringComparison.Ordinal);
if (text.StartsWith("<i>", StringComparison.OrdinalIgnoreCase))
idx = text.IndexOf("- ", 5, StringComparison.Ordinal);
while (idx > 0)
{
if (idx > 0 && idx < text.Length - 2)
{
string before = string.Empty;
int k = idx - 1;
while (k >= 0 && char.IsLetterOrDigit(text[k]))
{
before = text[k--] + before;
}
string after = string.Empty;
k = idx + 2;
while (k < text.Length && char.IsLetter(text[k]))
{
after = after + text[k++];
}
if (after.Length > 0 && after.Equals(before, StringComparison.OrdinalIgnoreCase))
text = text.Remove(idx + 1, 1);
else if (before.Length > 0)
{
if ((language == "en" && (after.Equals("and", StringComparison.OrdinalIgnoreCase) || after.Equals("or", StringComparison.OrdinalIgnoreCase))) ||
(language == "es" && (after.Equals("y", StringComparison.OrdinalIgnoreCase) || after.Equals("o", StringComparison.OrdinalIgnoreCase))) ||
(language == "da" && (after.Equals("og", StringComparison.OrdinalIgnoreCase) || after.Equals("eller", StringComparison.OrdinalIgnoreCase))) ||
(language == "de" && (after.Equals("und", StringComparison.OrdinalIgnoreCase) || after.Equals("oder", StringComparison.OrdinalIgnoreCase))) ||
(language == "fi" && (after.Equals("ja", StringComparison.OrdinalIgnoreCase) || after.Equals("tai", StringComparison.OrdinalIgnoreCase))) ||
(language == "fr" && (after.Equals("et", StringComparison.OrdinalIgnoreCase) || after.Equals("ou", StringComparison.OrdinalIgnoreCase))) ||
(language == "it" && (after.Equals("e", StringComparison.OrdinalIgnoreCase) || after.Equals("o", StringComparison.OrdinalIgnoreCase))) ||
(language == "nl" && (after.Equals("en", StringComparison.OrdinalIgnoreCase) || after.Equals("of", StringComparison.OrdinalIgnoreCase))) ||
(language == "pl" && (after.Equals("i", StringComparison.OrdinalIgnoreCase) || after.Equals("czy", StringComparison.OrdinalIgnoreCase))) ||
(language == "pt" && (after.Equals("e", StringComparison.OrdinalIgnoreCase) || after.Equals("ou", StringComparison.OrdinalIgnoreCase))))
{
}
else
{
text = text.Remove(idx + 1, 1);
}
}
}
if (idx + 1 < text.Length && idx != -1)
idx = text.IndexOf("- ", idx + 1, StringComparison.Ordinal);
else
break;
}
}
if (CountTagInText(text, '"') == 2 && text.Contains(" \" "))
{
int idx = text.IndexOf(" \" ", StringComparison.Ordinal);
int idxp = text.IndexOf('"');
//"Foo " bar.
if ((idxp >= 0 && idxp < idx) && char.IsLetterOrDigit(text[idx - 1]) && !" \r\n".Contains(text[idxp + 1]))
{
text = text.Remove(idx, 1);
}
//" Foo " bar.
idx = text.IndexOf(" \" ", StringComparison.Ordinal);
idxp = text.IndexOf('"');
if (idxp >= 0 && idx > idxp)
{
if (text[idxp + 1] == ' ' && char.IsLetterOrDigit(text[idxp + 2]))
{
text = text.Remove(idxp + 1, 1);
idx--;
}
text = text.Remove(idx, 1);
}
}
// Fix spaces after quotes
// e.g: Foobar. " Foobar" => Foobar. "Foobar"
string preText = string.Empty;
if (text.LineStartsWithHtmlTag(true, true))
{
int endIdx = text.IndexOf('>') + 1;
preText = text.Substring(0, endIdx);
text = text.Substring(endIdx);
}
if (text.StartsWith('"'))
{
text = '"' + text.Substring(1).TrimStart();
}
text = preText + text;
// Fix spaces before quotes at line ending
string postText = string.Empty;
if (text.LineEndsWithHtmlTag(true, true))
{
int endIdx = text.LastIndexOf('<');
postText = text.Substring(endIdx);
text = text.Substring(0, endIdx);
}
if (text.EndsWith(" \""))
{
text = text.Remove(text.Length - 2, 1);
}
text = text + postText;
text = text.Replace(". \" ", ". \"");
text = text.Replace("? \" ", "? \"");
text = text.Replace("! \" ", "! \"");
text = text.Replace(") \" ", ") \"");
text = text.Replace("> \" ", "> \"");
while (text.Contains(" . "))
text = text.Replace(" . ", ". ");
return text;
}
public static string RemoveSpaceBeforeAfterTag(string text, string openTag)
{
text = HtmlUtil.FixUpperTags(text);
var closeTag = string.Empty;
switch (openTag)
{
case "<i>":
closeTag = "</i>";
break;
case "<b>":
closeTag = "</b>";
break;
case "<u>":
closeTag = "</u>";
break;
}
if (closeTag.Length == 0 && openTag.Contains("<font ", StringComparison.Ordinal))
closeTag = "</font>";
// Open tags
var open1 = openTag + " ";
var open2 = Environment.NewLine + openTag + " ";
var open3 = openTag + Environment.NewLine;
// Closing tags
var close1 = "! " + closeTag + Environment.NewLine;
var close2 = "? " + closeTag + Environment.NewLine;
var close3 = " " + closeTag;
var close4 = " " + closeTag + Environment.NewLine;
var close5 = Environment.NewLine + closeTag;
if (text.Contains(close1, StringComparison.Ordinal))
text = text.Replace(close1, "!" + closeTag + Environment.NewLine);
if (text.Contains(close2, StringComparison.Ordinal))
text = text.Replace(close2, "?" + closeTag + Environment.NewLine);
if (text.EndsWith(close3, StringComparison.Ordinal))
text = text.Substring(0, text.Length - close3.Length) + closeTag;
if (text.Contains(close4))
text = text.Replace(close4, closeTag + Environment.NewLine);
// e.g: ! </i><br>Foobar
if (text.StartsWith(open1, StringComparison.Ordinal))
text = openTag + text.Substring(open1.Length);
// e.g.: <i>\r\n
if (text.StartsWith(open3, StringComparison.Ordinal))
text = text.Remove(openTag.Length, Environment.NewLine.Length);
// e.g.: \r\n</i>
if (text.EndsWith(close5, StringComparison.Ordinal))
text = text.Remove(text.Length - openTag.Length - Environment.NewLine.Length - 1, Environment.NewLine.Length);
if (text.Contains(open2, StringComparison.Ordinal))
text = text.Replace(open2, Environment.NewLine + openTag);
// Hi <i> bad</i> man! -> Hi <i>bad</i> man!
text = text.Replace(" " + openTag + " ", " " + openTag);
text = text.Replace(Environment.NewLine + openTag + " ", Environment.NewLine + openTag);
// Hi <i>bad </i> man! -> Hi <i>bad</i> man!
text = text.Replace(" " + closeTag + " ", closeTag + " ");
text = text.Replace(" " + closeTag + Environment.NewLine, closeTag + Environment.NewLine);
text = text.Trim();
if (text.StartsWith(open1, StringComparison.Ordinal))
text = openTag + text.Substring(open1.Length);
return text;
}
/// <summary>
/// Creates a task that will complete after a time delay.
/// </summary>
/// <param name="millisecondsDelay">The number of milliseconds to wait before completing the returned task.</param>
/// <returns>A task that represents the time delay.</returns>
public static Task TaskDelay(int millisecondsDelay)
{
var tcs = new TaskCompletionSource<object>();
var t = new System.Threading.Timer(_ => tcs.SetResult(null));
t.Change(millisecondsDelay, -1);
return tcs.Task;
}
public static SubtitleFormat LoadMatroskaTextSubtitle(MatroskaTrackInfo matroskaSubtitleInfo, MatroskaFile matroska, List<MatroskaSubtitle> sub, Subtitle subtitle)
{
if (subtitle == null)
throw new ArgumentNullException(nameof(subtitle));
subtitle.Paragraphs.Clear();
var isSsa = false;
SubtitleFormat format = new SubRip();
var codecPrivate = matroskaSubtitleInfo.GetCodecPrivate();
if (codecPrivate.Contains("[script info]", StringComparison.OrdinalIgnoreCase))
{
if (codecPrivate.Contains("[V4 Styles]", StringComparison.OrdinalIgnoreCase))
format = new SubStationAlpha();
else
format = new AdvancedSubStationAlpha();
isSsa = true;
}
if (isSsa)
{
foreach (var p in LoadMatroskaSSA(matroskaSubtitleInfo, matroska.Path, format, sub).Paragraphs)
{
subtitle.Paragraphs.Add(p);
}
if (!string.IsNullOrEmpty(codecPrivate))
{
bool eventsStarted = false;
bool fontsStarted = false;
bool graphicsStarted = false;
var header = new StringBuilder();
foreach (string line in codecPrivate.Replace(Environment.NewLine, "\n").Split('\n'))
{
if (!eventsStarted && !fontsStarted && !graphicsStarted)
{
header.AppendLine(line);
}
if (line.TrimStart().StartsWith("dialog:", StringComparison.OrdinalIgnoreCase))
{
eventsStarted = true;
fontsStarted = false;
graphicsStarted = false;
}
else if (line.Trim().Equals("[events]", StringComparison.OrdinalIgnoreCase))
{
eventsStarted = true;
fontsStarted = false;
graphicsStarted = false;
}
else if (line.Trim().Equals("[fonts]", StringComparison.OrdinalIgnoreCase))
{
eventsStarted = false;
fontsStarted = true;
graphicsStarted = false;
}
else if (line.Trim().Equals("[graphics]", StringComparison.OrdinalIgnoreCase))
{
eventsStarted = false;
fontsStarted = false;
graphicsStarted = true;
}
}
subtitle.Header = header.ToString().TrimEnd();
if (!subtitle.Header.Contains("[events]", StringComparison.OrdinalIgnoreCase))
{
subtitle.Header += Environment.NewLine + Environment.NewLine + "[Events]" + Environment.NewLine;
}
}
}
else
{
foreach (var p in sub)
{
subtitle.Paragraphs.Add(new Paragraph(p.GetText(matroskaSubtitleInfo), p.Start, p.End));
}
}
subtitle.Renumber();
return format;
}
public static Subtitle LoadMatroskaSSA(MatroskaTrackInfo matroskaSubtitleInfo, string fileName, SubtitleFormat format, List<MatroskaSubtitle> sub)
{
var codecPrivate = matroskaSubtitleInfo.GetCodecPrivate();
var subtitle = new Subtitle { Header = codecPrivate };
var lines = subtitle.Header.Trim().SplitToLines();
var footer = new StringBuilder();
var comments = new Subtitle();
if (!string.IsNullOrEmpty(codecPrivate))
{
bool footerOn = false;
char[] splitChars = { ':', '.' };
foreach (string line in lines)
{
if (footerOn)
{
footer.AppendLine(line);
}
else if (line.Trim() == "[Events]")
{
}
else if (line.Trim() == "[Fonts]" || line.Trim() == "[Graphics]")
{
footerOn = true;
footer.AppendLine();
footer.AppendLine();
footer.AppendLine(line);
}
else if (line.StartsWith("Comment:", StringComparison.Ordinal))
{
var arr = line.Split(',');
if (arr.Length > 3)
{
arr = arr[1].Split(splitChars);
if (arr.Length == 4)
{
int hour;
int min;
int sec;
int ms;
if (int.TryParse(arr[0], out hour) && int.TryParse(arr[1], out min) &&
int.TryParse(arr[2], out sec) && int.TryParse(arr[3], out ms))
{
comments.Paragraphs.Add(new Paragraph(new TimeCode(hour, min, sec, ms * 10), new TimeCode(), line));
}
}
}
}
}
}
const string headerFormat = "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text";
if (!subtitle.Header.Contains("[Events]"))
{
subtitle.Header = subtitle.Header.Trim() + Environment.NewLine +
Environment.NewLine +
"[Events]" + Environment.NewLine +
headerFormat + Environment.NewLine;
}
else if (subtitle.Header.LastIndexOf("Format:", StringComparison.Ordinal) < subtitle.Header.IndexOf("[Events]", StringComparison.Ordinal))
{
subtitle.Header = subtitle.Header.Remove(subtitle.Header.IndexOf("[Events]", StringComparison.Ordinal));
subtitle.Header = subtitle.Header.Trim() + Environment.NewLine +
Environment.NewLine +
"[Events]" + Environment.NewLine +
headerFormat + Environment.NewLine;
}
else
{
subtitle.Header = subtitle.Header.Trim() + Environment.NewLine;
}
lines = new List<string>();
foreach (string l in subtitle.Header.Trim().SplitToLines())
lines.Add(l);
const string timeCodeFormat = "{0}:{1:00}:{2:00}.{3:00}"; // h:mm:ss.cc
foreach (var mp in sub)
{
var p = new Paragraph(string.Empty, mp.Start, mp.End);
string start = string.Format(timeCodeFormat, p.StartTime.Hours, p.StartTime.Minutes, p.StartTime.Seconds, p.StartTime.Milliseconds / 10);
string end = string.Format(timeCodeFormat, p.EndTime.Hours, p.EndTime.Minutes, p.EndTime.Seconds, p.EndTime.Milliseconds / 10);
//MKS contains this: ReadOrder, Layer, Style, Name, MarginL, MarginR, MarginV, Effect, Text
for (int commentIndex = 0; commentIndex < comments.Paragraphs.Count; commentIndex++)
{
var cp = comments.Paragraphs[commentIndex];
if (cp.StartTime.TotalMilliseconds <= p.StartTime.TotalMilliseconds)
lines.Add(cp.Text);
}
for (int commentIndex = comments.Paragraphs.Count - 1; commentIndex >= 0; commentIndex--)
{
var cp = comments.Paragraphs[commentIndex];
if (cp.StartTime.TotalMilliseconds <= p.StartTime.TotalMilliseconds)
comments.Paragraphs.RemoveAt(commentIndex);
}
string text = mp.GetText(matroskaSubtitleInfo).Replace(Environment.NewLine, "\\N");
int idx = text.IndexOf(',') + 1;
if (idx > 0 && idx < text.Length)
{
text = text.Remove(0, idx); // remove ReadOrder
idx = text.IndexOf(',');
text = text.Insert(idx, "," + start + "," + end);
lines.Add("Dialogue: " + text);
}
}
for (int commentIndex = 0; commentIndex < comments.Paragraphs.Count; commentIndex++)
{
var cp = comments.Paragraphs[commentIndex];
lines.Add(cp.Text);
}
foreach (string l in footer.ToString().SplitToLines())
lines.Add(l);
format.LoadSubtitle(subtitle, lines, fileName);
return subtitle;
}
public static int GetNumberOfLines(string text)
{
if (string.IsNullOrEmpty(text))
return 0;
int lines = 1;
int idx = text.IndexOf('\n');
while (idx >= 0)
{
lines++;
idx = text.IndexOf('\n', idx + 1);
}
return lines;
}
public static string Sha256Hash(string value)
{
using (var hasher = new System.Security.Cryptography.SHA256Managed())
{
var bytes = Encoding.UTF8.GetBytes(value);
var hash = hasher.ComputeHash(bytes);
return Convert.ToBase64String(hash, 0, hash.Length);
}
}
public static bool QualifiesForMerge(Paragraph p, Paragraph next, double maximumMillisecondsBetweenLines, int maximumTotalLength, bool onlyContinuationLines)
{
if (p?.Text != null && next?.Text != null)
{
var s = HtmlUtil.RemoveHtmlTags(p.Text.Trim(), true);
var nextText = HtmlUtil.RemoveHtmlTags(next.Text.Trim(), true);
if (s.Length + nextText.Length < maximumTotalLength && next.StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds < maximumMillisecondsBetweenLines)
{
if (string.IsNullOrEmpty(s))
return true;
bool isLineContinuation = s.EndsWith(',') ||
s.EndsWith('-') ||
s.EndsWith("...", StringComparison.Ordinal) ||
s.EndsWith("…", StringComparison.Ordinal) || // Unicode Character 'HORIZONTAL ELLIPSIS' (U+2026)
AllLettersAndNumbers.Contains(s.Substring(s.Length - 1));
if (!onlyContinuationLines)
return true;
return isLineContinuation;
}
}
return false;
}
public static string GetFileNameWithoutExtension(string fileName)
{
if (string.IsNullOrEmpty(fileName))
return fileName;
var idx = fileName.LastIndexOf('.');
if (idx > 0)
return fileName.Substring(0, idx);
return fileName;
}
}
}