diff --git a/LanguageMaster.xml b/LanguageMaster.xml index 743ec9700..5933ea505 100644 --- a/LanguageMaster.xml +++ b/LanguageMaster.xml @@ -282,6 +282,7 @@ Note: Do check free disk space. Number of differences: {0} ({1:0.##}% of letters changed) Show only differences Ignore line breaks + Ignore formatting Only look for differences in text Cannot compare with image based subtitles diff --git a/libse/AudioToText/PhocketSphinx/SubtitleGenerator.cs b/libse/AudioToText/PhocketSphinx/SubtitleGenerator.cs deleted file mode 100644 index e7428166f..000000000 --- a/libse/AudioToText/PhocketSphinx/SubtitleGenerator.cs +++ /dev/null @@ -1,30 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using Nikse.SubtitleEdit.Core.Forms; - -namespace Nikse.SubtitleEdit.Core.AudioToText.PhocketSphinx -{ - public class SubtitleGenerator - { - private readonly List _resultTexts; - - public SubtitleGenerator(List resultTexts) - { - _resultTexts = resultTexts; - } - - public Subtitle Generate() - { - var subtitle = new Subtitle(); - var currentList = new List(); - foreach (var resultText in _resultTexts) - { - subtitle.Paragraphs.Add(new Paragraph(resultText.Text, resultText.Start * 1000.0, resultText.End * 1000.0)); - } - //SplitLongLinesHelper.SplitLongLinesInSubtitle() - return subtitle; - } - } -} diff --git a/libse/AudioToText/PocketSphinx/PocketSphinxSettings.cs b/libse/AudioToText/PocketSphinx/PocketSphinxSettings.cs new file mode 100644 index 000000000..cd110d033 --- /dev/null +++ b/libse/AudioToText/PocketSphinx/PocketSphinxSettings.cs @@ -0,0 +1,32 @@ +using System.Collections.Generic; + +namespace Nikse.SubtitleEdit.Core.AudioToText.PocketSphinx +{ + public class PocketSphinxSettings + { + /// + /// Language name + ISO 639-1 language code + /// Languages available: https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/ + /// + public Dictionary Languages => new Dictionary + { + { "Dutch", "nl" }, + { "English", "en" }, + { "French", "fr" }, + { "German", "de" }, + { "Greek", "el" }, + { "Hindi", "hi" }, + { "Indian", "id" }, + { "Italian", "it" }, + { "Kazakh", "kk" }, + { "Mandarin", "zh" }, + { "Spanish", "es" }, + { "Russian", "ru" }, + }; + + public string FfmpegWaveExtractionParameters => "-i \"{0}\" -vn -ar 24000 -ac 2 -ab 128 -vol 448 -f wav {2} \"{1}\""; + public string VlcWaveExtractionParameters => ""; + public string Name => "PocketSphinx"; + + } +} diff --git a/libse/AudioToText/PhocketSphinx/ResultReader.cs b/libse/AudioToText/PocketSphinx/ResultReader.cs similarity index 72% rename from libse/AudioToText/PhocketSphinx/ResultReader.cs rename to libse/AudioToText/PocketSphinx/ResultReader.cs index 2e411140c..eb1c5ecc0 100644 --- a/libse/AudioToText/PhocketSphinx/ResultReader.cs +++ b/libse/AudioToText/PocketSphinx/ResultReader.cs @@ -2,16 +2,19 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using System.Text.RegularExpressions; +using Nikse.SubtitleEdit.Core.AudioToText.PhocketSphinx; -namespace Nikse.SubtitleEdit.Core.AudioToText.PhocketSphinx +namespace Nikse.SubtitleEdit.Core.AudioToText.PocketSphinx { public class ResultReader { - private List _lines; + private readonly List _lines; + private static readonly Regex EndsWithNumberInParentheses = new Regex(@"\(\d+\)$", RegexOptions.Compiled); public ResultReader(Stream stream) { - _lines = new List(); ; + _lines = new List(); using (var reader = new StreamReader(stream)) { while (!reader.EndOfStream) @@ -51,14 +54,20 @@ namespace Nikse.SubtitleEdit.Core.AudioToText.PhocketSphinx { try { - var t = parts[0]; + var text = parts[0]; + if (text.EndsWith(')') && EndsWithNumberInParentheses.IsMatch(text)) + { + text = text.Substring(0, text.LastIndexOf("(", StringComparison.Ordinal)); + } + var start = double.Parse(parts[1]); var end = double.Parse(parts[2]); var confidence = double.Parse(parts[3]); - list.Add(new ResultText { Text = t, Start = start, End = end, Confidence = confidence }); + list.Add(new ResultText { Text = text, Start = start, End = end, Confidence = confidence }); } - catch (Exception e) + catch { + // ignored } } diff --git a/libse/AudioToText/PocketSphinx/SubtitleGenerator.cs b/libse/AudioToText/PocketSphinx/SubtitleGenerator.cs new file mode 100644 index 000000000..d2789600d --- /dev/null +++ b/libse/AudioToText/PocketSphinx/SubtitleGenerator.cs @@ -0,0 +1,182 @@ +using System; +using System.Collections.Generic; +using Nikse.SubtitleEdit.Core.AudioToText.PhocketSphinx; +using Nikse.SubtitleEdit.Core.Dictionaries; + +namespace Nikse.SubtitleEdit.Core.AudioToText.PocketSphinx +{ + public class SubtitleGenerator + { + private readonly List _resultTexts; + + /// + /// Set period if distance to next subtitle is more than this value in milliseconds + /// + public double SetPeriodIfDistanceToNextIsMoreThan { get; set; } + + public SubtitleGenerator(List resultTexts) + { + _resultTexts = resultTexts; + SetPeriodIfDistanceToNextIsMoreThan = 250; + } + + public Subtitle Generate(string language) + { + var subtitle = new Subtitle(); + foreach (var resultText in _resultTexts) + { + subtitle.Paragraphs.Add(new Paragraph(resultText.Text, resultText.Start * 1000.0, resultText.End * 1000.0)); + } + + AddPeriods(subtitle, language); + + // subtitle = MergeShortLines(subtitle, language); + + FixCasing(subtitle, language); + + return subtitle; + } + + private void AddPeriods(Subtitle subtitle, string language) + { + //TODO: check of English non-break words + + for (var index = 0; index < subtitle.Paragraphs.Count - 1; index++) + { + var paragraph = subtitle.Paragraphs[index]; + var next = subtitle.Paragraphs[index + 1]; + if (next.StartTime.TotalMilliseconds - paragraph.EndTime.TotalMilliseconds > SetPeriodIfDistanceToNextIsMoreThan && + !paragraph.Text.EndsWith('.') && + !paragraph.Text.EndsWith('!') && + !paragraph.Text.EndsWith('?') && + !paragraph.Text.EndsWith(':')) + { + paragraph.Text += "."; + } + } + + if (subtitle.Paragraphs.Count > 0 && + !subtitle.Paragraphs[subtitle.Paragraphs.Count - 1].Text.EndsWith('.') && + !subtitle.Paragraphs[subtitle.Paragraphs.Count - 1].Text.EndsWith('!') && + !subtitle.Paragraphs[subtitle.Paragraphs.Count - 1].Text.EndsWith('?') && + !subtitle.Paragraphs[subtitle.Paragraphs.Count - 1].Text.EndsWith(':')) + { + subtitle.Paragraphs[subtitle.Paragraphs.Count - 1].Text += "."; + } + } + + private Subtitle MergeShortLines(Subtitle subtitle, string language) + { + int maxMillisecondsBetweenLines = 100; + int maxCharacters = 90; + const bool onlyContinuousLines = true; + + var mergedSubtitle = new Subtitle(); + bool lastMerged = false; + Paragraph p = null; + for (int i = 1; i < subtitle.Paragraphs.Count; i++) + { + if (!lastMerged) + { + p = new Paragraph(subtitle.GetParagraphOrDefault(i - 1)); + mergedSubtitle.Paragraphs.Add(p); + } + var next = subtitle.GetParagraphOrDefault(i); + if (next != null) + { + if (Utilities.QualifiesForMerge(p, next, maxMillisecondsBetweenLines, maxCharacters, onlyContinuousLines)) + { + if (GetStartTag(p.Text) == GetStartTag(next.Text) && + GetEndTag(p.Text) == GetEndTag(next.Text)) + { + string s1 = p.Text.Trim(); + s1 = s1.Substring(0, s1.Length - GetEndTag(s1).Length); + string s2 = next.Text.Trim(); + s2 = s2.Substring(GetStartTag(s2).Length); + p.Text = Utilities.AutoBreakLine(s1 + Environment.NewLine + s2, language); + } + else + { + p.Text = Utilities.AutoBreakLine(p.Text + Environment.NewLine + next.Text, language); + } + p.EndTime = next.EndTime; + + lastMerged = true; + } + else + { + lastMerged = false; + } + } + else + { + lastMerged = false; + } + } + if (!lastMerged) + mergedSubtitle.Paragraphs.Add(new Paragraph(subtitle.GetParagraphOrDefault(subtitle.Paragraphs.Count - 1))); + + return mergedSubtitle; + } + + private static void FixCasing(Subtitle subtitle, string language) + { + // fix casing normal + var fixCasing = new FixCasing(language); + fixCasing.Fix(subtitle); + + // fix casing for names + var nameList = new NameList(Configuration.DictionariesDirectory, language, Configuration.Settings.WordLists.UseOnlineNames, Configuration.Settings.WordLists.NamesUrl); + var nameListInclMulti = nameList.GetAllNames(); + foreach (var paragraph in subtitle.Paragraphs) + { + string text = paragraph.Text; + string textNoTags = HtmlUtil.RemoveHtmlTags(text, true); + if (textNoTags != textNoTags.ToUpper()) + { + if (!string.IsNullOrEmpty(text)) + { + var st = new StrippableText(text); + st.FixCasing(nameListInclMulti, true, false, false, string.Empty); + paragraph.Text = st.MergedString; + } + } + } + } + + private static string GetEndTag(string text) + { + if (string.IsNullOrEmpty(text)) + return string.Empty; + text = text.Trim(); + if (!text.EndsWith('>')) + return string.Empty; + + string endTag = string.Empty; + int start = text.LastIndexOf(" 0 && start >= text.Length - 8) + { + endTag = text.Substring(start); + } + return endTag; + } + + private static string GetStartTag(string text) + { + if (string.IsNullOrEmpty(text)) + return string.Empty; + text = text.Trim(); + if (!text.StartsWith('<')) + return string.Empty; + + string startTag = string.Empty; + int end = text.IndexOf('>'); + if (end > 0 && end < 25) + { + startTag = text.Substring(0, end + 1); + } + return startTag; + } + + } +} diff --git a/libse/FixCasing.cs b/libse/FixCasing.cs new file mode 100644 index 000000000..1c8c7da8d --- /dev/null +++ b/libse/FixCasing.cs @@ -0,0 +1,203 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Linq; +using Nikse.SubtitleEdit.Core.Dictionaries; + +namespace Nikse.SubtitleEdit.Core +{ + public class FixCasing + { + public bool FixNormal = true; + public bool FixNormalOnlyAllUppercase = false; + public bool FixMakeLowercase = false; + public bool FixMakeUppercase = false; + + private readonly string _language; + private readonly List _names; + + public FixCasing(string language) + { + _language = language; + var nameList = new NameList(Configuration.DictionariesDirectory, language, Configuration.Settings.WordLists.UseOnlineNames, Configuration.Settings.WordLists.NamesUrl); + _names = nameList.GetAllNames(); + + // Longer names must be first + _names.Sort((s1, s2) => s2.Length.CompareTo(s1.Length)); + } + + public int NoOfLinesChanged { get; set; } + + public void Fix(Subtitle subtitle) + { + var subCulture = GetCultureInfoFromLanguage(_language); + Paragraph last = null; + foreach (Paragraph p in subtitle.Paragraphs) + { + if (last != null) + { + p.Text = Fix(p.Text, last.Text, _names, subCulture, p.StartTime.TotalMilliseconds - last.EndTime.TotalMilliseconds); + } + else + { + p.Text = Fix(p.Text, string.Empty, _names, subCulture, 10000); + } + + // fix casing of English alone i to I + if (FixNormal && _language.StartsWith("en", StringComparison.Ordinal)) + { + p.Text = FixEnglishAloneILowerToUpper(p.Text); + p.Text = FixCasingAfterTitles(p.Text); + } + + last = p; + } + } + + private static CultureInfo GetCultureInfoFromLanguage(string language) + { + try + { + return CultureInfo.GetCultureInfo(language); + } + catch + { + return CultureInfo.CurrentUICulture; + } + } + + private static string FixEnglishAloneILowerToUpper(string text) + { + const string pre = " >¡¿♪♫(["; + const string post = " = 0; indexOfI = text.IndexOf('i', indexOfI + 1)) + { + if (indexOfI == 0 || pre.Contains(text[indexOfI - 1])) + { + if (text.Substring(indexOfI).StartsWith("i-i ", StringComparison.Ordinal)) + { + text = text.Remove(indexOfI, 3).Insert(indexOfI, "I-I"); + } + else if (text.Substring(indexOfI).StartsWith("i-if ", StringComparison.Ordinal)) + { + text = text.Remove(indexOfI, 4).Insert(indexOfI, "I-If"); + } + else if (indexOfI + 1 == text.Length || post.Contains(text[indexOfI + 1])) + { + text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); + } + else if (indexOfI > 1 && indexOfI < text.Length - 2 && "\r\n".Contains(text[indexOfI + 1]) && text[indexOfI - 1] == ' ') + { + text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); + } + } + if (indexOfI > 1 && indexOfI < text.Length - 2 && "\r\n".Contains(text[indexOfI - 1]) && " .?!".Contains(text[indexOfI + 1])) + { + text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); + } + else if (indexOfI > 1 && "\r\n ".Contains(text[indexOfI - 1]) && text.Substring(indexOfI).StartsWith("i-i ", StringComparison.Ordinal)) + { + text = text.Remove(indexOfI, 3).Insert(indexOfI, "I-I"); + } + else if (indexOfI >= 1 && indexOfI < text.Length - 2 && "“\"".Contains(text[indexOfI - 1]) && " .?!".Contains(text[indexOfI + 1])) + { + text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); + } + else if (indexOfI > 2 && text.Substring(indexOfI - 2).StartsWith("I-i ", StringComparison.Ordinal)) + { + text = text.Remove(indexOfI - 2, 3).Insert(indexOfI - 2, "I-I"); + } + else if (indexOfI > 2 && text.Substring(indexOfI - 2).StartsWith("I-it's ", StringComparison.Ordinal)) + { + text = text.Remove(indexOfI - 2, 3).Insert(indexOfI - 2, "I-I"); + } + else if (text.Substring(indexOfI).StartsWith("i'll ", StringComparison.Ordinal)) + { + text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); + } + else if (text.Substring(indexOfI).StartsWith("i've ", StringComparison.Ordinal)) + { + text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); + } + else if (text.Substring(indexOfI).StartsWith("i'm ", StringComparison.Ordinal)) + { + text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); + } + else if (text.Substring(indexOfI).StartsWith("i'd ", StringComparison.Ordinal)) + { + text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); + } + } + return text; + } + + private string FixCasingAfterTitles(string text) + { + var titles = new[] { "Mrs.", "Miss.", "Mr.", "Ms.", "Dr." }; + var notChangeWords = new[] { "does", "has", "will", "is", "and", "for", "but", "or", "of" }; + for (int i = 0; i < text.Length - 4; i++) + { + var start = text.Substring(i); + foreach (var title in titles) + { + if (start.StartsWith(title, StringComparison.OrdinalIgnoreCase)) + { + var idx = i + title.Length; + if (idx < text.Length - 2 && text[idx] == ' ') + { + idx++; + var words = text.Substring(idx).Split(' ', '\r', '\n', ',', '"', '?', '!', '.', '\''); + if (words.Length > 0 && !notChangeWords.Contains(words[0])) + { + var upper = text[idx].ToString().ToUpper(); + text = text.Remove(idx, 1).Insert(idx, upper); + } + } + break; + } + } + } + return text; + } + + private string Fix(string text, string lastLine, List nameList, CultureInfo subtitleCulture, double millisecondsFromLast) + { + string original = text; + if (FixNormal) + { + if (FixNormalOnlyAllUppercase && text != text.ToUpper(subtitleCulture)) + return text; + + if (text.Length > 1) + { + // first all to lower + text = text.ToLower(subtitleCulture).Trim(); + text = text.FixExtraSpaces(); + var st = new StrippableText(text); + st.FixCasing(nameList, false, true, true, lastLine, millisecondsFromLast); // fix all casing but names (that's a seperate option) + text = st.MergedString; + } + } + else if (FixMakeUppercase) + { + var st = new StrippableText(text); + text = st.Pre + st.StrippedText.ToUpper(subtitleCulture) + st.Post; + text = HtmlUtil.FixUpperTags(text); // tags inside text + } + else if (FixMakeLowercase) + { + text = text.ToLower(subtitleCulture); + } + if (original != text) + NoOfLinesChanged++; + return text; + } + + } +} diff --git a/libse/Language.cs b/libse/Language.cs index b41b12e0b..c9a5673d0 100644 --- a/libse/Language.cs +++ b/libse/Language.cs @@ -440,6 +440,7 @@ namespace Nikse.SubtitleEdit.Core XNumberOfDifferenceAndPercentLettersChanged = "Number of differences: {0} ({1:0.##}% of letters changed)", ShowOnlyDifferences = "Show only differences", IgnoreLineBreaks = "Ignore line breaks", + IgnoreFormatting = "Ignore formatting", OnlyLookForDifferencesInText = "Only look for differences in text", CannotCompareWithImageBasedSubtitles = "Cannot compare with image based subtitles", }; diff --git a/libse/LanguageDeserializer.cs b/libse/LanguageDeserializer.cs index 314d9a279..674eebc30 100644 --- a/libse/LanguageDeserializer.cs +++ b/libse/LanguageDeserializer.cs @@ -718,6 +718,9 @@ namespace Nikse.SubtitleEdit.Core case "CompareSubtitles/IgnoreLineBreaks": language.CompareSubtitles.IgnoreLineBreaks = reader.Value; break; + case "CompareSubtitles/IgnoreFormatting": + language.CompareSubtitles.IgnoreFormatting = reader.Value; + break; case "CompareSubtitles/OnlyLookForDifferencesInText": language.CompareSubtitles.OnlyLookForDifferencesInText = reader.Value; break; diff --git a/libse/LanguageStructure.cs b/libse/LanguageStructure.cs index f8ed4ff2e..d4399e1a2 100644 --- a/libse/LanguageStructure.cs +++ b/libse/LanguageStructure.cs @@ -315,6 +315,7 @@ public string XNumberOfDifferenceAndPercentLettersChanged { get; set; } public string ShowOnlyDifferences { get; set; } public string IgnoreLineBreaks { get; set; } + public string IgnoreFormatting { get; set; } public string OnlyLookForDifferencesInText { get; set; } public string CannotCompareWithImageBasedSubtitles { get; set; } } diff --git a/libse/LibSE.csproj b/libse/LibSE.csproj index 8d2827d4f..d62d00fe1 100644 --- a/libse/LibSE.csproj +++ b/libse/LibSE.csproj @@ -40,8 +40,9 @@ - - + + + @@ -137,6 +138,7 @@ + diff --git a/libse/Settings.cs b/libse/Settings.cs index 1c0dd2d30..82ea6d887 100644 --- a/libse/Settings.cs +++ b/libse/Settings.cs @@ -1256,12 +1256,11 @@ namespace Nikse.SubtitleEdit.Core public bool ShowOnlyDifferences { get; set; } public bool OnlyLookForDifferenceInText { get; set; } public bool IgnoreLineBreaks { get; set; } + public bool IgnoreFormatting { get; set; } public CompareSettings() { - ShowOnlyDifferences = false; OnlyLookForDifferenceInText = true; - IgnoreLineBreaks = false; } } @@ -1415,6 +1414,9 @@ namespace Nikse.SubtitleEdit.Core xnode = nodeCompare.SelectSingleNode("IgnoreLineBreaks"); if (xnode != null) settings.Compare.IgnoreLineBreaks = Convert.ToBoolean(xnode.InnerText); + xnode = nodeCompare.SelectSingleNode("IgnoreFormatting"); + if (xnode != null) + settings.Compare.IgnoreFormatting = Convert.ToBoolean(xnode.InnerText); } // Recent files @@ -3353,6 +3355,7 @@ namespace Nikse.SubtitleEdit.Core textWriter.WriteElementString("ShowOnlyDifferences", settings.Compare.ShowOnlyDifferences.ToString()); textWriter.WriteElementString("OnlyLookForDifferenceInText", settings.Compare.OnlyLookForDifferenceInText.ToString()); textWriter.WriteElementString("IgnoreLineBreaks", settings.Compare.IgnoreLineBreaks.ToString()); + textWriter.WriteElementString("IgnoreFormatting", settings.Compare.IgnoreFormatting.ToString()); textWriter.WriteEndElement(); textWriter.WriteStartElement("RecentFiles", string.Empty); diff --git a/libse/Utilities.cs b/libse/Utilities.cs index ca9dae6df..e98e8f0f8 100644 --- a/libse/Utilities.cs +++ b/libse/Utilities.cs @@ -1591,11 +1591,14 @@ namespace Nikse.SubtitleEdit.Core return defaultColor; } - public static string[] SplitForChangedCalc(string s, bool ignoreLineBreaks, bool breakToLetters) + public static string[] SplitForChangedCalc(string s, bool ignoreLineBreaks, bool ignoreFormatting, bool breakToLetters) { const string endChars = "!?.:;,#%$£"; var list = new List(); + if (ignoreFormatting) + s = HtmlUtil.RemoveHtmlTags(s, true); + if (breakToLetters) { foreach (char ch in s) @@ -1648,10 +1651,10 @@ namespace Nikse.SubtitleEdit.Core return list.ToArray(); } - public static void GetTotalAndChangedWords(string s1, string s2, ref int total, ref int change, bool ignoreLineBreaks, bool breakToLetters) + public static void GetTotalAndChangedWords(string s1, string s2, ref int total, ref int change, bool ignoreLineBreaks, bool ignoreFormatting, bool breakToLetters) { - var parts1 = SplitForChangedCalc(s1, ignoreLineBreaks, breakToLetters); - var parts2 = SplitForChangedCalc(s2, ignoreLineBreaks, breakToLetters); + var parts1 = SplitForChangedCalc(s1, ignoreLineBreaks, ignoreFormatting, breakToLetters); + var parts2 = SplitForChangedCalc(s2, ignoreLineBreaks, ignoreFormatting, breakToLetters); total += Math.Max(parts1.Length, parts2.Length); change += GetChangesAdvanced(parts1, parts2); } @@ -2319,5 +2322,30 @@ namespace Nikse.SubtitleEdit.Core } } + public static bool QualifiesForMerge(Paragraph p, Paragraph next, double maximumMillisecondsBetweenLines, int maximumTotalLength, bool onlyContinuationLines) + { + if (p?.Text != null && next?.Text != null) + { + var s = HtmlUtil.RemoveHtmlTags(p.Text.Trim(), true); + var nextText = HtmlUtil.RemoveHtmlTags(next.Text.Trim(), true); + if (s.Length + nextText.Length < maximumTotalLength && next.StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds < maximumMillisecondsBetweenLines) + { + if (string.IsNullOrEmpty(s)) + return true; + bool isLineContinuation = s.EndsWith(',') || + s.EndsWith('-') || + s.EndsWith("...", StringComparison.Ordinal) || + s.EndsWith("…", StringComparison.Ordinal) || // Unicode Character 'HORIZONTAL ELLIPSIS' (U+2026) + AllLettersAndNumbers.Contains(s.Substring(s.Length - 1)); + + if (!onlyContinuationLines) + return true; + + return isLineContinuation; + } + } + return false; + } + } } diff --git a/src/Forms/AudioToText.Designer.cs b/src/Forms/AudioToText.Designer.cs index b866088f9..f8ee170ec 100644 --- a/src/Forms/AudioToText.Designer.cs +++ b/src/Forms/AudioToText.Designer.cs @@ -32,17 +32,19 @@ this.buttonCancel = new System.Windows.Forms.Button(); this.labelStatus = new System.Windows.Forms.Label(); this.textBoxLog = new System.Windows.Forms.TextBox(); - this.label1 = new System.Windows.Forms.Label(); - this.label2 = new System.Windows.Forms.Label(); + this.labelLog = new System.Windows.Forms.Label(); + this.labelOutput = new System.Windows.Forms.Label(); this.textBoxOutput = new System.Windows.Forms.TextBox(); this.progressBar1 = new System.Windows.Forms.ProgressBar(); + this.labelProgress = new System.Windows.Forms.Label(); + this.linkLabelShowMoreLess = new System.Windows.Forms.LinkLabel(); this.SuspendLayout(); // // buttonOK // this.buttonOK.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); this.buttonOK.ImeMode = System.Windows.Forms.ImeMode.NoControl; - this.buttonOK.Location = new System.Drawing.Point(518, 315); + this.buttonOK.Location = new System.Drawing.Point(518, 442); this.buttonOK.Name = "buttonOK"; this.buttonOK.Size = new System.Drawing.Size(75, 21); this.buttonOK.TabIndex = 4; @@ -55,7 +57,7 @@ this.buttonCancel.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Right))); this.buttonCancel.DialogResult = System.Windows.Forms.DialogResult.Cancel; this.buttonCancel.ImeMode = System.Windows.Forms.ImeMode.NoControl; - this.buttonCancel.Location = new System.Drawing.Point(602, 315); + this.buttonCancel.Location = new System.Drawing.Point(602, 442); this.buttonCancel.Name = "buttonCancel"; this.buttonCancel.Size = new System.Drawing.Size(75, 21); this.buttonCancel.TabIndex = 5; @@ -67,7 +69,7 @@ // this.labelStatus.AutoSize = true; this.labelStatus.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0))); - this.labelStatus.Location = new System.Drawing.Point(244, 40); + this.labelStatus.Location = new System.Drawing.Point(12, 25); this.labelStatus.Name = "labelStatus"; this.labelStatus.Size = new System.Drawing.Size(41, 13); this.labelStatus.TabIndex = 8; @@ -77,68 +79,103 @@ // this.textBoxLog.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) | System.Windows.Forms.AnchorStyles.Right))); - this.textBoxLog.Location = new System.Drawing.Point(26, 67); + this.textBoxLog.Enabled = false; + this.textBoxLog.Location = new System.Drawing.Point(15, 130); this.textBoxLog.Multiline = true; this.textBoxLog.Name = "textBoxLog"; + this.textBoxLog.ReadOnly = true; this.textBoxLog.ScrollBars = System.Windows.Forms.ScrollBars.Both; - this.textBoxLog.Size = new System.Drawing.Size(651, 88); + this.textBoxLog.Size = new System.Drawing.Size(662, 97); this.textBoxLog.TabIndex = 9; // - // label1 + // labelLog // - this.label1.AutoSize = true; - this.label1.Location = new System.Drawing.Point(23, 51); - this.label1.Name = "label1"; - this.label1.Size = new System.Drawing.Size(25, 13); - this.label1.TabIndex = 10; - this.label1.Text = "Log"; + this.labelLog.AutoSize = true; + this.labelLog.Location = new System.Drawing.Point(12, 114); + this.labelLog.Name = "labelLog"; + this.labelLog.Size = new System.Drawing.Size(25, 13); + this.labelLog.TabIndex = 10; + this.labelLog.Text = "Log"; // - // label2 + // labelOutput // - this.label2.AutoSize = true; - this.label2.Location = new System.Drawing.Point(23, 175); - this.label2.Name = "label2"; - this.label2.Size = new System.Drawing.Size(39, 13); - this.label2.TabIndex = 12; - this.label2.Text = "Output"; + this.labelOutput.AutoSize = true; + this.labelOutput.Location = new System.Drawing.Point(12, 247); + this.labelOutput.Name = "labelOutput"; + this.labelOutput.Size = new System.Drawing.Size(39, 13); + this.labelOutput.TabIndex = 12; + this.labelOutput.Text = "Output"; // // textBoxOutput // this.textBoxOutput.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom) | System.Windows.Forms.AnchorStyles.Left) | System.Windows.Forms.AnchorStyles.Right))); - this.textBoxOutput.Location = new System.Drawing.Point(26, 191); + this.textBoxOutput.Enabled = false; + this.textBoxOutput.Location = new System.Drawing.Point(15, 263); this.textBoxOutput.Multiline = true; this.textBoxOutput.Name = "textBoxOutput"; + this.textBoxOutput.ReadOnly = true; this.textBoxOutput.ScrollBars = System.Windows.Forms.ScrollBars.Both; - this.textBoxOutput.Size = new System.Drawing.Size(651, 106); + this.textBoxOutput.Size = new System.Drawing.Size(662, 161); this.textBoxOutput.TabIndex = 11; // // progressBar1 // this.progressBar1.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) | System.Windows.Forms.AnchorStyles.Right))); - this.progressBar1.Location = new System.Drawing.Point(26, 13); + this.progressBar1.Location = new System.Drawing.Point(15, 41); this.progressBar1.Name = "progressBar1"; - this.progressBar1.Size = new System.Drawing.Size(651, 23); + this.progressBar1.Size = new System.Drawing.Size(662, 23); this.progressBar1.TabIndex = 13; // + // labelProgress + // + this.labelProgress.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left))); + this.labelProgress.AutoSize = true; + this.labelProgress.Location = new System.Drawing.Point(12, 453); + this.labelProgress.Name = "labelProgress"; + this.labelProgress.Size = new System.Drawing.Size(70, 13); + this.labelProgress.TabIndex = 14; + this.labelProgress.Text = "labelProgress"; + // + // linkLabelShowMoreLess + // + this.linkLabelShowMoreLess.AutoSize = true; + this.linkLabelShowMoreLess.Location = new System.Drawing.Point(13, 71); + this.linkLabelShowMoreLess.Name = "linkLabelShowMoreLess"; + this.linkLabelShowMoreLess.Size = new System.Drawing.Size(60, 13); + this.linkLabelShowMoreLess.TabIndex = 15; + this.linkLabelShowMoreLess.TabStop = true; + this.linkLabelShowMoreLess.Text = "More info..."; + this.linkLabelShowMoreLess.LinkClicked += new System.Windows.Forms.LinkLabelLinkClickedEventHandler(this.linkLabelShowMoreLess_LinkClicked); + // // AudioToText // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; - this.ClientSize = new System.Drawing.Size(689, 348); + this.ClientSize = new System.Drawing.Size(689, 475); + this.Controls.Add(this.linkLabelShowMoreLess); + this.Controls.Add(this.labelProgress); this.Controls.Add(this.progressBar1); - this.Controls.Add(this.label2); + this.Controls.Add(this.labelOutput); this.Controls.Add(this.textBoxOutput); - this.Controls.Add(this.label1); + this.Controls.Add(this.labelLog); this.Controls.Add(this.textBoxLog); this.Controls.Add(this.labelStatus); this.Controls.Add(this.buttonOK); this.Controls.Add(this.buttonCancel); + this.FormBorderStyle = System.Windows.Forms.FormBorderStyle.FixedSingle; + this.MaximizeBox = false; + this.MinimizeBox = false; this.Name = "AudioToText"; + this.ShowIcon = false; + this.ShowInTaskbar = false; + this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent; this.Text = "AudioToText"; + this.FormClosing += new System.Windows.Forms.FormClosingEventHandler(this.AudioToText_FormClosing); this.Load += new System.EventHandler(this.AudioToText_Load); + this.Shown += new System.EventHandler(this.AudioToText_Shown); this.ResumeLayout(false); this.PerformLayout(); @@ -150,9 +187,11 @@ private System.Windows.Forms.Button buttonCancel; private System.Windows.Forms.Label labelStatus; private System.Windows.Forms.TextBox textBoxLog; - private System.Windows.Forms.Label label1; - private System.Windows.Forms.Label label2; + private System.Windows.Forms.Label labelLog; + private System.Windows.Forms.Label labelOutput; private System.Windows.Forms.TextBox textBoxOutput; private System.Windows.Forms.ProgressBar progressBar1; + private System.Windows.Forms.Label labelProgress; + private System.Windows.Forms.LinkLabel linkLabelShowMoreLess; } } \ No newline at end of file diff --git a/src/Forms/AudioToText.cs b/src/Forms/AudioToText.cs index 370ab83c2..546e596e6 100644 --- a/src/Forms/AudioToText.cs +++ b/src/Forms/AudioToText.cs @@ -7,29 +7,35 @@ using System.Text; using System.Threading; using System.Windows.Forms; using Nikse.SubtitleEdit.Core; -using Nikse.SubtitleEdit.Core.AudioToText.PhocketSphinx; +using Nikse.SubtitleEdit.Core.AudioToText.PocketSphinx; using Nikse.SubtitleEdit.Core.ContainerFormats.Matroska; +using Nikse.SubtitleEdit.Logic; namespace Nikse.SubtitleEdit.Forms { public partial class AudioToText : Form { public Subtitle Subtitle { get; set; } + private readonly VideoInfo _videoInfo; private readonly string _videoFileName; private int _delayInMilliseconds; private bool _abort; private string _waveFileName; private readonly BackgroundWorker _backgroundWorker; - private static readonly StringBuilder Output = new StringBuilder(); private static readonly StringBuilder Error = new StringBuilder(); + private bool _showMore = true; - - public AudioToText(string videoFileName) + public AudioToText(string videoFileName, VideoInfo videoInfo) { + UiUtil.PreInitialize(this); InitializeComponent(); + UiUtil.FixFonts(this); _videoFileName = videoFileName; + _videoInfo = videoInfo; _backgroundWorker = new BackgroundWorker(); + UiUtil.FixLargeFonts(this, buttonOK); + labelProgress.Text = "Progress: 0%"; } public static Process GetCommandLineProcess(string inputVideoFile, int audioTrackNumber, string outWaveFile, string encodeParamters, out string encoderName) @@ -43,13 +49,11 @@ namespace Nikse.SubtitleEdit.Forms //-i indicates the input //-ac 1 means 1 channel (mono) - string exeFilePath = Configuration.Settings.General.FFmpegLocation; string parameters = string.Format(fFmpegWaveTranscodeSettings, inputVideoFile, outWaveFile, audioParameter); return new Process { StartInfo = new ProcessStartInfo(exeFilePath, parameters) { WindowStyle = ProcessWindowStyle.Hidden } }; } - private void ExtractAudio() { try @@ -138,11 +142,15 @@ namespace Nikse.SubtitleEdit.Forms process.StartInfo.RedirectStandardOutput = true; process.StartInfo.RedirectStandardError = true; process.StartInfo.UseShellExecute = false; + using (AutoResetEvent outputWaitHandle = new AutoResetEvent(false)) using (AutoResetEvent errorWaitHandle = new AutoResetEvent(false)) { process.OutputDataReceived += (sender, e) => { + if (_abort) + return; + if (e.Data == null) { outputWaitHandle.Set(); @@ -150,11 +158,18 @@ namespace Nikse.SubtitleEdit.Forms else { Output.AppendLine(e.Data); + var seconds = GetLastTimeStampInSeconds(e.Data); + if (seconds > 0) + { + _backgroundWorker.ReportProgress(seconds); + } } }; process.ErrorDataReceived += (sender, e) => { - if (e.Data == null) + if (_abort) + return; + if (e.Data != null) { errorWaitHandle.Set(); } @@ -168,43 +183,57 @@ namespace Nikse.SubtitleEdit.Forms process.BeginOutputReadLine(); process.BeginErrorReadLine(); - var timeout = 1000 * 60 * 60; //60 min timeout - if (process.WaitForExit(timeout) && - outputWaitHandle.WaitOne(timeout) && - errorWaitHandle.WaitOne(timeout)) + var killed = false; + while (!process.HasExited) { - //Console.WriteLine("Done"); - // Process completed. Check process.ExitCode here. - } - else - { - // Console.WriteLine("Timeout"); - // Timed out. + Application.DoEvents(); + Thread.Sleep(50); + if (_abort && !killed) + { + process.Kill(); + killed = true; + } } + } return Output.ToString(); } - private void button1_Click(object sender, EventArgs e) + private static int GetLastTimeStampInSeconds(string text) { - var fileName = @"E:\PocketSphinx\pocketsphinx\a.txt"; - using (var s = new FileStream(fileName, FileMode.Open)) + var lines = text.SplitToLines(); + lines.Reverse(); + foreach (var line in lines) { - var reader = new ResultReader(s); - var results = reader.Parse(); - var subtitleGenerator = new SubtitleGenerator(results); - Subtitle = subtitleGenerator.Generate(); + if (!line.StartsWith('<') && !line.StartsWith('[')) + { + var parts = line.Split(); + if (parts.Length == 4) + { + double start; + double end; + if (double.TryParse(parts[1], out start) && double.TryParse(parts[2], out end)) + { + return (int)Math.Round(end); + } + } + } } + + return -1; } private void buttonOK_Click(object sender, EventArgs e) { + _abort = true; DialogResult = DialogResult.OK; } private void buttonCancel_Click(object sender, EventArgs e) { + _abort = true; + _backgroundWorker.CancelAsync(); DialogResult = DialogResult.Cancel; } @@ -218,19 +247,18 @@ namespace Nikse.SubtitleEdit.Forms return stream; } - private Subtitle Start(string videoFileName) + private Subtitle Start() { - //labelStatus.Text = "Extracting audio from video..."; ExtractAudio(); - Subtitle subtitle = new Subtitle(); + Subtitle subtitle; var result = ExtractTextFromAudio(_waveFileName, _delayInMilliseconds); using (var stream = GenerateStreamFromString(result)) { var reader = new ResultReader(stream); var results = reader.Parse(); var subtitleGenerator = new SubtitleGenerator(results); - subtitle = subtitleGenerator.Generate(); + subtitle = subtitleGenerator.Generate("en"); } // cleanup @@ -248,9 +276,13 @@ namespace Nikse.SubtitleEdit.Forms private void AudioToText_Load(object sender, EventArgs e) { + buttonOK.Enabled = false; _backgroundWorker.DoWork += _backgroundWorker_DoWork; _backgroundWorker.RunWorkerCompleted += _backgroundWorker_RunWorkerCompleted; - _backgroundWorker.RunWorkerAsync(_videoFileName); + _backgroundWorker.WorkerReportsProgress = true; + _backgroundWorker.ProgressChanged += _backgroundWorker_ProgressChanged; + _backgroundWorker.WorkerSupportsCancellation = true; + _backgroundWorker.RunWorkerAsync(); progressBar1.Style = ProgressBarStyle.Marquee; progressBar1.Visible = true; labelStatus.Text = "Extracting text from video - this will take a while..."; @@ -262,12 +294,51 @@ namespace Nikse.SubtitleEdit.Forms labelStatus.Text = "Done extracing text from video."; textBoxOutput.Text = Output.ToString(); textBoxLog.Text = Error.ToString(); + labelProgress.Text = "Progress: " + 100 + "%"; progressBar1.Visible = false; + buttonOK.Enabled = true; } private void _backgroundWorker_DoWork(object sender, DoWorkEventArgs e) { - e.Result = Start((string)e.Argument); + e.Result = Start(); + } + + private void _backgroundWorker_ProgressChanged(object sender, ProgressChangedEventArgs e) + { + var positionInSeconds = e.ProgressPercentage; + var percentage = (int)Math.Round(positionInSeconds * 100.0 / (_videoInfo.TotalMilliseconds / 1000.0)); + labelProgress.Text = "Progress: " + percentage + "%"; + } + + private void linkLabelShowMoreLess_LinkClicked(object sender, LinkLabelLinkClickedEventArgs e) + { + _showMore = !_showMore; + if (_showMore) + { + linkLabelShowMoreLess.Text = "Show less"; + Height = 500; + } + else + { + linkLabelShowMoreLess.Text = "Show more"; + Height = linkLabelShowMoreLess.Top + linkLabelShowMoreLess.Height + buttonOK.Height + 60; + } + labelLog.Visible = _showMore; + textBoxLog.Visible = _showMore; + labelOutput.Visible = _showMore; + textBoxOutput.Visible = _showMore; + } + + private void AudioToText_FormClosing(object sender, FormClosingEventArgs e) + { + if (!_abort) + e.Cancel = true; + } + + private void AudioToText_Shown(object sender, EventArgs e) + { + linkLabelShowMoreLess_LinkClicked(null, null); } } } diff --git a/src/Forms/BatchConvert.Designer.cs b/src/Forms/BatchConvert.Designer.cs index 3b94db2a6..f4afa07f5 100644 --- a/src/Forms/BatchConvert.Designer.cs +++ b/src/Forms/BatchConvert.Designer.cs @@ -29,7 +29,7 @@ private void InitializeComponent() { this.components = new System.ComponentModel.Container(); - Nikse.SubtitleEdit.Core.TimeCode timeCode2 = new Nikse.SubtitleEdit.Core.TimeCode(); + Nikse.SubtitleEdit.Core.TimeCode timeCode1 = new Nikse.SubtitleEdit.Core.TimeCode(); this.buttonConvert = new System.Windows.Forms.Button(); this.buttonCancel = new System.Windows.Forms.Button(); this.groupBoxConvertOptions = new System.Windows.Forms.GroupBox(); @@ -272,7 +272,7 @@ // buttonRemoveTextForHiSettings // this.buttonRemoveTextForHiSettings.ImeMode = System.Windows.Forms.ImeMode.NoControl; - this.buttonRemoveTextForHiSettings.Location = new System.Drawing.Point(183, 69); + this.buttonRemoveTextForHiSettings.Location = new System.Drawing.Point(183, 68); this.buttonRemoveTextForHiSettings.Name = "buttonRemoveTextForHiSettings"; this.buttonRemoveTextForHiSettings.Size = new System.Drawing.Size(104, 21); this.buttonRemoveTextForHiSettings.TabIndex = 3; @@ -303,7 +303,7 @@ // buttonMultipleReplaceSettings // this.buttonMultipleReplaceSettings.ImeMode = System.Windows.Forms.ImeMode.NoControl; - this.buttonMultipleReplaceSettings.Location = new System.Drawing.Point(183, 141); + this.buttonMultipleReplaceSettings.Location = new System.Drawing.Point(183, 140); this.buttonMultipleReplaceSettings.Name = "buttonMultipleReplaceSettings"; this.buttonMultipleReplaceSettings.Size = new System.Drawing.Size(104, 21); this.buttonMultipleReplaceSettings.TabIndex = 30; @@ -434,14 +434,14 @@ this.timeUpDownAdjust.Name = "timeUpDownAdjust"; this.timeUpDownAdjust.Size = new System.Drawing.Size(96, 27); this.timeUpDownAdjust.TabIndex = 1; - timeCode2.Hours = 0; - timeCode2.Milliseconds = 0; - timeCode2.Minutes = 0; - timeCode2.Seconds = 0; - timeCode2.TimeSpan = System.TimeSpan.Parse("00:00:00"); - timeCode2.TotalMilliseconds = 0D; - timeCode2.TotalSeconds = 0D; - this.timeUpDownAdjust.TimeCode = timeCode2; + timeCode1.Hours = 0; + timeCode1.Milliseconds = 0; + timeCode1.Minutes = 0; + timeCode1.Seconds = 0; + timeCode1.TimeSpan = System.TimeSpan.Parse("00:00:00"); + timeCode1.TotalMilliseconds = 0D; + timeCode1.TotalSeconds = 0D; + this.timeUpDownAdjust.TimeCode = timeCode1; this.timeUpDownAdjust.UseVideoOffset = false; // // labelHourMinSecMilliSecond diff --git a/src/Forms/ChangeCasing.cs b/src/Forms/ChangeCasing.cs index a174205ce..7ef8047d9 100644 --- a/src/Forms/ChangeCasing.cs +++ b/src/Forms/ChangeCasing.cs @@ -1,18 +1,12 @@ using Nikse.SubtitleEdit.Core; -using Nikse.SubtitleEdit.Core.Dictionaries; using Nikse.SubtitleEdit.Logic; using System; -using System.Collections.Generic; -using System.Globalization; -using System.Linq; using System.Windows.Forms; namespace Nikse.SubtitleEdit.Forms { public sealed partial class ChangeCasing : PositionAndSizeForm { - private int _noOfLinesChanged; - public ChangeCasing() { UiUtil.PreInitialize(this); @@ -40,19 +34,9 @@ namespace Nikse.SubtitleEdit.Forms radioButtonLowercase.Checked = true; } - public int LinesChanged - { - get { return _noOfLinesChanged; } - } + public int LinesChanged { get; private set; } - public bool ChangeNamesToo - { - get - { - return radioButtonFixOnlyNames.Checked || - (radioButtonNormal.Checked && checkBoxFixNames.Checked); - } - } + public bool ChangeNamesToo => radioButtonFixOnlyNames.Checked || radioButtonNormal.Checked && checkBoxFixNames.Checked; private void FixLargeFonts() { @@ -63,179 +47,15 @@ namespace Nikse.SubtitleEdit.Forms internal void FixCasing(Subtitle subtitle, string language) { - var nameList = new NameList(Configuration.DictionariesDirectory, language, Configuration.Settings.WordLists.UseOnlineNames, Configuration.Settings.WordLists.NamesUrl); - var names = nameList.GetAllNames(); - var subCulture = GetCultureInfoFromLanguage(language); - - // Longer names must be first - names.Sort((s1, s2) => s2.Length.CompareTo(s1.Length)); - - Paragraph last = null; - foreach (Paragraph p in subtitle.Paragraphs) + var fixCasing = new FixCasing(language) { - if (last != null) - { - p.Text = FixCasing(p.Text, last.Text, names, subCulture, p.StartTime.TotalMilliseconds - last.EndTime.TotalMilliseconds); - } - else - { - p.Text = FixCasing(p.Text, string.Empty, names, subCulture, 10000); - } - - // fix casing of English alone i to I - if (radioButtonNormal.Checked && language.StartsWith("en", StringComparison.Ordinal)) - { - p.Text = FixEnglishAloneILowerToUpper(p.Text); - p.Text = FixCasingAfterTitles(p.Text); - } - - last = p; - } - } - - private CultureInfo GetCultureInfoFromLanguage(string language) - { - try - { - return CultureInfo.GetCultureInfo(language); - } - catch - { - return CultureInfo.CurrentUICulture; - } - } - - public static string FixEnglishAloneILowerToUpper(string text) - { - const string pre = " >¡¿♪♫(["; - const string post = " = 0; indexOfI = text.IndexOf('i', indexOfI + 1)) - { - if (indexOfI == 0 || pre.Contains(text[indexOfI - 1])) - { - if (text.Substring(indexOfI).StartsWith("i-i ", StringComparison.Ordinal)) - { - text = text.Remove(indexOfI, 3).Insert(indexOfI, "I-I"); - } - else if (text.Substring(indexOfI).StartsWith("i-if ", StringComparison.Ordinal)) - { - text = text.Remove(indexOfI, 4).Insert(indexOfI, "I-If"); - } - else if (indexOfI + 1 == text.Length || post.Contains(text[indexOfI + 1])) - { - text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); - } - else if (indexOfI > 1 && indexOfI < text.Length - 2 && "\r\n".Contains(text[indexOfI + 1]) && text[indexOfI - 1] == ' ') - { - text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); - } - } - if (indexOfI > 1 && indexOfI < text.Length - 2 && "\r\n".Contains(text[indexOfI - 1]) && " .?!".Contains(text[indexOfI + 1])) - { - text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); - } - else if (indexOfI > 1 && "\r\n ".Contains(text[indexOfI - 1]) && text.Substring(indexOfI).StartsWith("i-i ", StringComparison.Ordinal)) - { - text = text.Remove(indexOfI, 3).Insert(indexOfI, "I-I"); - } - else if (indexOfI >= 1 && indexOfI < text.Length - 2 && "“\"".Contains(text[indexOfI - 1]) && " .?!".Contains(text[indexOfI + 1])) - { - text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); - } - else if (indexOfI > 2 && text.Substring(indexOfI - 2).StartsWith("I-i ", StringComparison.Ordinal)) - { - text = text.Remove(indexOfI - 2, 3).Insert(indexOfI - 2, "I-I"); - } - else if (indexOfI > 2 && text.Substring(indexOfI - 2).StartsWith("I-it's ", StringComparison.Ordinal)) - { - text = text.Remove(indexOfI - 2, 3).Insert(indexOfI - 2, "I-I"); - } - else if (text.Substring(indexOfI).StartsWith("i'll ", StringComparison.Ordinal)) - { - text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); - } - else if (text.Substring(indexOfI).StartsWith("i've ", StringComparison.Ordinal)) - { - text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); - } - else if (text.Substring(indexOfI).StartsWith("i'm ", StringComparison.Ordinal)) - { - text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); - } - else if (text.Substring(indexOfI).StartsWith("i'd ", StringComparison.Ordinal)) - { - text = text.Remove(indexOfI, 1).Insert(indexOfI, "I"); - } - } - return text; - } - - private string FixCasingAfterTitles(string text) - { - var titles = new[] { "Mrs.", "Miss.", "Mr.", "Ms.", "Dr." }; - var notChangeWords = new[] { "does", "has", "will", "is", "and", "for", "but", "or", "of" }; - for (int i = 0; i < text.Length - 4; i++) - { - var start = text.Substring(i); - foreach (var title in titles) - { - if (start.StartsWith(title, StringComparison.OrdinalIgnoreCase)) - { - var idx = i + title.Length; - if (idx < text.Length - 2 && text[idx] == ' ') - { - idx++; - var words = text.Substring(idx).Split(' ', '\r', '\n', ',', '"', '?', '!', '.', '\''); - if (words.Length > 0 && !notChangeWords.Contains(words[0])) - { - var upper = text[idx].ToString().ToUpper(); - text = text.Remove(idx, 1).Insert(idx, upper); - } - } - break; - } - } - } - return text; - } - - private string FixCasing(string text, string lastLine, List nameList, CultureInfo subtitleCulture, double millisecondsFromLast) - { - string original = text; - if (radioButtonNormal.Checked) - { - if (checkBoxOnlyAllUpper.Checked && text != text.ToUpper(subtitleCulture)) - return text; - - if (text.Length > 1) - { - // first all to lower - text = text.ToLower(subtitleCulture).Trim(); - text = text.FixExtraSpaces(); - var st = new StrippableText(text); - st.FixCasing(nameList, false, true, true, lastLine, millisecondsFromLast); // fix all casing but names (that's a seperate option) - text = st.MergedString; - } - } - else if (radioButtonUppercase.Checked) - { - var st = new StrippableText(text); - text = st.Pre + st.StrippedText.ToUpper(subtitleCulture) + st.Post; - text = HtmlUtil.FixUpperTags(text); // tags inside text - } - else if (radioButtonLowercase.Checked) - { - text = text.ToLower(subtitleCulture); - } - if (original != text) - _noOfLinesChanged++; - return text; + FixNormal = radioButtonNormal.Checked, + FixMakeUppercase = radioButtonUppercase.Checked, + FixMakeLowercase = radioButtonLowercase.Checked, + FixNormalOnlyAllUppercase = checkBoxOnlyAllUpper.Checked + }; + fixCasing.Fix(subtitle); + LinesChanged = fixCasing.NoOfLinesChanged; } private void FormChangeCasing_KeyDown(object sender, KeyEventArgs e) diff --git a/src/Forms/ChangeCasingNames.cs b/src/Forms/ChangeCasingNames.cs index 7ff941ef6..897a2bad4 100644 --- a/src/Forms/ChangeCasingNames.cs +++ b/src/Forms/ChangeCasingNames.cs @@ -196,7 +196,7 @@ namespace Nikse.SubtitleEdit.Forms int start = lower.IndexOf(name.ToLower(), StringComparison.Ordinal); if (start >= 0) { - bool startOk = (start == 0) || (lower[start - 1] == ' ') || (lower[start - 1] == '-') || (lower[start - 1] == '"') || + bool startOk = start == 0 || lower[start - 1] == ' ' || lower[start - 1] == '-' || lower[start - 1] == '"' || lower[start - 1] == '\'' || lower[start - 1] == '>' || Environment.NewLine.EndsWith(lower[start - 1]); if (startOk) diff --git a/src/Forms/Compare.Designer.cs b/src/Forms/Compare.Designer.cs index e26d57cc6..4fe91f97e 100644 --- a/src/Forms/Compare.Designer.cs +++ b/src/Forms/Compare.Designer.cs @@ -53,6 +53,7 @@ namespace Nikse.SubtitleEdit.Forms this.toolTip1 = new System.Windows.Forms.ToolTip(this.components); this.subtitleListView2 = new Nikse.SubtitleEdit.Controls.SubtitleListView(); this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView(); + this.checkBoxIgnoreFormatting = new System.Windows.Forms.CheckBox(); this.contextMenuStrip1.SuspendLayout(); this.contextMenuStrip2.SuspendLayout(); this.SuspendLayout(); @@ -234,7 +235,7 @@ namespace Nikse.SubtitleEdit.Forms // this.checkBoxIgnoreLineBreaks.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left))); this.checkBoxIgnoreLineBreaks.AutoSize = true; - this.checkBoxIgnoreLineBreaks.Location = new System.Drawing.Point(330, 570); + this.checkBoxIgnoreLineBreaks.Location = new System.Drawing.Point(582, 534); this.checkBoxIgnoreLineBreaks.Name = "checkBoxIgnoreLineBreaks"; this.checkBoxIgnoreLineBreaks.Size = new System.Drawing.Size(112, 17); this.checkBoxIgnoreLineBreaks.TabIndex = 12; @@ -249,11 +250,13 @@ namespace Nikse.SubtitleEdit.Forms // // subtitleListView2 // + this.subtitleListView2.AllowColumnReorder = true; this.subtitleListView2.AllowDrop = true; this.subtitleListView2.FirstVisibleIndex = -1; this.subtitleListView2.Font = new System.Drawing.Font("Tahoma", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.subtitleListView2.FullRowSelect = true; this.subtitleListView2.GridLines = true; + this.subtitleListView2.HeaderStyle = System.Windows.Forms.ColumnHeaderStyle.Nonclickable; this.subtitleListView2.HideSelection = false; this.subtitleListView2.Location = new System.Drawing.Point(490, 56); this.subtitleListView2.Name = "subtitleListView2"; @@ -272,11 +275,13 @@ namespace Nikse.SubtitleEdit.Forms // // subtitleListView1 // + this.subtitleListView1.AllowColumnReorder = true; this.subtitleListView1.AllowDrop = true; this.subtitleListView1.FirstVisibleIndex = -1; this.subtitleListView1.Font = new System.Drawing.Font("Tahoma", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); this.subtitleListView1.FullRowSelect = true; this.subtitleListView1.GridLines = true; + this.subtitleListView1.HeaderStyle = System.Windows.Forms.ColumnHeaderStyle.Nonclickable; this.subtitleListView1.HideSelection = false; this.subtitleListView1.Location = new System.Drawing.Point(8, 56); this.subtitleListView1.Name = "subtitleListView1"; @@ -293,11 +298,24 @@ namespace Nikse.SubtitleEdit.Forms this.subtitleListView1.DragDrop += new System.Windows.Forms.DragEventHandler(this.subtitleListView1_DragDrop); this.subtitleListView1.DragEnter += new System.Windows.Forms.DragEventHandler(this.subtitleListView1_DragEnter); // + // checkBoxIgnoreFormatting + // + this.checkBoxIgnoreFormatting.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left))); + this.checkBoxIgnoreFormatting.AutoSize = true; + this.checkBoxIgnoreFormatting.Location = new System.Drawing.Point(582, 552); + this.checkBoxIgnoreFormatting.Name = "checkBoxIgnoreFormatting"; + this.checkBoxIgnoreFormatting.Size = new System.Drawing.Size(111, 17); + this.checkBoxIgnoreFormatting.TabIndex = 18; + this.checkBoxIgnoreFormatting.Text = "Ignore formatting"; + this.checkBoxIgnoreFormatting.UseVisualStyleBackColor = true; + this.checkBoxIgnoreFormatting.CheckedChanged += new System.EventHandler(this.checkBoxIgnoreFormatting_CheckedChanged); + // // Compare // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; this.ClientSize = new System.Drawing.Size(974, 591); + this.Controls.Add(this.checkBoxIgnoreFormatting); this.Controls.Add(this.checkBoxIgnoreLineBreaks); this.Controls.Add(this.checkBoxOnlyListDifferencesInText); this.Controls.Add(this.checkBoxShowOnlyDifferences); @@ -354,5 +372,6 @@ namespace Nikse.SubtitleEdit.Forms private System.Windows.Forms.ToolStripMenuItem copyTextToolStripMenuItem; private System.Windows.Forms.ContextMenuStrip contextMenuStrip2; private System.Windows.Forms.ToolStripMenuItem copyTextToolStripMenuItem1; + private System.Windows.Forms.CheckBox checkBoxIgnoreFormatting; } } \ No newline at end of file diff --git a/src/Forms/Compare.cs b/src/Forms/Compare.cs index 451e2ad0e..24832628a 100644 --- a/src/Forms/Compare.cs +++ b/src/Forms/Compare.cs @@ -35,6 +35,7 @@ namespace Nikse.SubtitleEdit.Forms buttonNextDifference.Text = Configuration.Settings.Language.CompareSubtitles.NextDifference; checkBoxShowOnlyDifferences.Text = Configuration.Settings.Language.CompareSubtitles.ShowOnlyDifferences; checkBoxIgnoreLineBreaks.Text = Configuration.Settings.Language.CompareSubtitles.IgnoreLineBreaks; + checkBoxIgnoreFormatting.Text = Configuration.Settings.Language.CompareSubtitles.IgnoreFormatting; checkBoxOnlyListDifferencesInText.Text = Configuration.Settings.Language.CompareSubtitles.OnlyLookForDifferencesInText; buttonOK.Text = Configuration.Settings.Language.General.Ok; copyTextToolStripMenuItem.Text = Configuration.Settings.Language.Main.Menu.ContextMenu.Copy; @@ -292,7 +293,7 @@ namespace Nikse.SubtitleEdit.Forms while (index < min) { bool addIndexToDifferences = false; - Utilities.GetTotalAndChangedWords(p1.Text, p2.Text, ref totalWords, ref wordsChanged, checkBoxIgnoreLineBreaks.Checked, ShouldBreakToLetter()); + Utilities.GetTotalAndChangedWords(p1.Text, p2.Text, ref totalWords, ref wordsChanged, checkBoxIgnoreLineBreaks.Checked, checkBoxIgnoreFormatting.Checked, ShouldBreakToLetter()); if (p1.ToString() == emptyParagraphAsString) { addIndexToDifferences = true; @@ -323,7 +324,7 @@ namespace Nikse.SubtitleEdit.Forms const double tolerance = 0.1; while (index < min) { - Utilities.GetTotalAndChangedWords(p1.Text, p2.Text, ref totalWords, ref wordsChanged, checkBoxIgnoreLineBreaks.Checked, ShouldBreakToLetter()); + Utilities.GetTotalAndChangedWords(p1.Text, p2.Text, ref totalWords, ref wordsChanged, checkBoxIgnoreLineBreaks.Checked, checkBoxIgnoreFormatting.Checked, ShouldBreakToLetter()); bool addIndexToDifferences = false; if (p1.ToString() == emptyParagraphAsString) { @@ -461,7 +462,7 @@ namespace Nikse.SubtitleEdit.Forms subtitleListView1.SelectIndexAndEnsureVisible(0); } - private bool ShouldBreakToLetter() => _language1 == null ? false : (_language1 == "ja" || _language1 == "zh"); + private bool ShouldBreakToLetter() => _language1 != null && (_language1 == "ja" || _language1 == "zh"); private string FixWhitespace(string p) { @@ -471,10 +472,16 @@ namespace Nikse.SubtitleEdit.Forms while (p.Contains(" ")) p = p.Replace(" ", " "); } + + if (checkBoxIgnoreFormatting.Checked) + { + p = HtmlUtil.RemoveHtmlTags(p, true); + } + return p; } - private static int GetColumnsEqualExceptNumber(Paragraph p1, Paragraph p2) + private int GetColumnsEqualExceptNumber(Paragraph p1, Paragraph p2) { if (p1 == null || p2 == null) return 0; @@ -491,13 +498,16 @@ namespace Nikse.SubtitleEdit.Forms if (Math.Abs(p1.Duration.TotalMilliseconds - p2.Duration.TotalMilliseconds) < tolerance) columnsEqual++; - if (p1.Text.Trim() == p2.Text.Trim()) + if (p1.Text.Trim() == p2.Text.Trim() || + checkBoxIgnoreFormatting.Checked && HtmlUtil.RemoveHtmlTags(p1.Text.Trim()) == HtmlUtil.RemoveHtmlTags(p2.Text.Trim())) + { columnsEqual++; - + } + return columnsEqual; } - private static int GetColumnsEqualExceptNumberAndDuration(Paragraph p1, Paragraph p2) + private int GetColumnsEqualExceptNumberAndDuration(Paragraph p1, Paragraph p2) { if (p1 == null || p2 == null) return 0; @@ -511,8 +521,11 @@ namespace Nikse.SubtitleEdit.Forms if (Math.Abs(p1.EndTime.TotalMilliseconds - p2.EndTime.TotalMilliseconds) < tolerance) columnsEqual++; - if (p1.Text.Trim() == p2.Text.Trim()) + if (p1.Text.Trim() == p2.Text.Trim() || + checkBoxIgnoreFormatting.Checked && HtmlUtil.RemoveHtmlTags(p1.Text.Trim()) == HtmlUtil.RemoveHtmlTags(p2.Text.Trim())) + { columnsEqual++; + } return columnsEqual; } @@ -995,6 +1008,7 @@ namespace Nikse.SubtitleEdit.Forms checkBoxShowOnlyDifferences.Checked = config.ShowOnlyDifferences; checkBoxOnlyListDifferencesInText.Checked = config.OnlyLookForDifferenceInText; checkBoxIgnoreLineBreaks.Checked = config.IgnoreLineBreaks; + checkBoxIgnoreFormatting.Checked = config.IgnoreFormatting; _loadingConfig = false; } @@ -1004,7 +1018,12 @@ namespace Nikse.SubtitleEdit.Forms config.ShowOnlyDifferences = checkBoxShowOnlyDifferences.Checked; config.OnlyLookForDifferenceInText = checkBoxOnlyListDifferencesInText.Checked; config.IgnoreLineBreaks = checkBoxIgnoreLineBreaks.Checked; + config.IgnoreFormatting = checkBoxIgnoreFormatting.Checked; } + private void checkBoxIgnoreFormatting_CheckedChanged(object sender, EventArgs e) + { + CompareSubtitles(); + } } } diff --git a/src/Forms/GetDictionaries.cs b/src/Forms/GetDictionaries.cs index 7ac54b230..90905336d 100644 --- a/src/Forms/GetDictionaries.cs +++ b/src/Forms/GetDictionaries.cs @@ -14,9 +14,13 @@ namespace Nikse.SubtitleEdit.Forms { private List _dictionaryDownloadLinks = new List(); private List _descriptions = new List(); + private List _englishNames = new List(); private string _xmlName; private int _testAllIndex = -1; + public string SelectedEnglishName { get; private set; } = null; + + public GetDictionaries() { UiUtil.PreInitialize(this); @@ -43,6 +47,7 @@ namespace Nikse.SubtitleEdit.Forms { _dictionaryDownloadLinks = new List(); _descriptions = new List(); + _englishNames = new List(); _xmlName = xmlRessourceName; System.Reflection.Assembly asm = System.Reflection.Assembly.GetExecutingAssembly(); var strm = asm.GetManifestResourceStream(_xmlName); @@ -76,6 +81,7 @@ namespace Nikse.SubtitleEdit.Forms comboBoxDictionaries.Items.Add(name); _dictionaryDownloadLinks.Add(downloadLink); _descriptions.Add(description); + _englishNames.Add(englishName); } } comboBoxDictionaries.SelectedIndex = 0; @@ -122,11 +128,12 @@ namespace Nikse.SubtitleEdit.Forms buttonDownload.Enabled = false; buttonDownloadAll.Enabled = false; comboBoxDictionaries.Enabled = false; - this.Refresh(); + Refresh(); Cursor = Cursors.WaitCursor; int index = comboBoxDictionaries.SelectedIndex; string url = _dictionaryDownloadLinks[index]; + SelectedEnglishName = _englishNames[index]; var wc = new WebClient { Proxy = Utilities.GetProxy() }; wc.DownloadDataCompleted += wc_DownloadDataCompleted; diff --git a/src/Forms/Main.cs b/src/Forms/Main.cs index bade2effe..d6fb3fc3d 100644 --- a/src/Forms/Main.cs +++ b/src/Forms/Main.cs @@ -22034,7 +22034,7 @@ namespace Nikse.SubtitleEdit.Forms private void generateTextFromCurrentVideoToolStripMenuItem_Click(object sender, EventArgs e) { - using (var form = new AudioToText(_videoFileName)) + using (var form = new AudioToText(_videoFileName, _videoInfo)) { if (form.ShowDialog(this) == DialogResult.OK) { diff --git a/src/Forms/MergeShortLines.cs b/src/Forms/MergeShortLines.cs index 30fc05e57..ce65ddab8 100644 --- a/src/Forms/MergeShortLines.cs +++ b/src/Forms/MergeShortLines.cs @@ -27,10 +27,7 @@ namespace Nikse.SubtitleEdit.Forms SubtitleListview1.HideColumn(SubtitleListView.SubtitleColumn.WordsPerMinute); } - public Subtitle MergedSubtitle - { - get { return _mergedSubtitle; } - } + public Subtitle MergedSubtitle => _mergedSubtitle; private void MergeShortLines_KeyDown(object sender, KeyEventArgs e) { @@ -121,6 +118,7 @@ namespace Nikse.SubtitleEdit.Forms bool lastMerged = false; Paragraph p = null; var lineNumbers = new StringBuilder(); + bool onlyContinuousLines = checkBoxOnlyContinuationLines.Checked; for (int i = 1; i < subtitle.Paragraphs.Count; i++) { if (!lastMerged) @@ -131,7 +129,7 @@ namespace Nikse.SubtitleEdit.Forms Paragraph next = subtitle.GetParagraphOrDefault(i); if (next != null) { - if (QualifiesForMerge(p, next, maxMillisecondsBetweenLines, maxCharacters) && IsFixAllowed(p)) + if (Utilities.QualifiesForMerge(p, next, maxMillisecondsBetweenLines, maxCharacters, onlyContinuousLines) && IsFixAllowed(p)) { if (GetStartTag(p.Text) == GetStartTag(next.Text) && GetEndTag(p.Text) == GetEndTag(next.Text)) @@ -222,28 +220,7 @@ namespace Nikse.SubtitleEdit.Forms startTag = text.Substring(0, end + 1); } return startTag; - } - - private bool QualifiesForMerge(Paragraph p, Paragraph next, double maximumMillisecondsBetweenLines, int maximumTotalLength) - { - if (p != null && p.Text != null && next != null && next.Text != null) - { - var s = HtmlUtil.RemoveHtmlTags(p.Text.Trim(), true); - var nextText = HtmlUtil.RemoveHtmlTags(next.Text.Trim(), true); - if (s.Length + nextText.Length < maximumTotalLength && next.StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds < maximumMillisecondsBetweenLines) - { - if (string.IsNullOrEmpty(s)) - return true; - bool isLineContinuation = s.EndsWith(',') || s.EndsWith('-') || s.EndsWith("...", StringComparison.Ordinal) || Utilities.AllLettersAndNumbers.Contains(s.Substring(s.Length - 1)); - - if (!checkBoxOnlyContinuationLines.Checked) - return true; - - return isLineContinuation; - } - } - return false; - } + } private void NumericUpDownMaxCharactersValueChanged(object sender, EventArgs e) { diff --git a/src/Forms/Ocr/VobSubOcr.cs b/src/Forms/Ocr/VobSubOcr.cs index a3982ced5..43469e1d1 100644 --- a/src/Forms/Ocr/VobSubOcr.cs +++ b/src/Forms/Ocr/VobSubOcr.cs @@ -8841,8 +8841,21 @@ namespace Nikse.SubtitleEdit.Forms.Ocr using (var form = new GetDictionaries()) { form.ShowDialog(this); + FillSpellCheckDictionaries(); + if (!string.IsNullOrEmpty(form.SelectedEnglishName)) + { + for (var index = 0; index < comboBoxDictionaries.Items.Count; index++) + { + var item = comboBoxDictionaries.Items[index].ToString(); + if (item.Contains(form.SelectedEnglishName)) + { + comboBoxDictionaries.SelectedIndex = index; + return; + } + } + } } - FillSpellCheckDictionaries(); + if (comboBoxDictionaries.Items.Count > 0) comboBoxDictionaries.SelectedIndex = 0; }