From 001cad505d2be2b3b420d4e5ec99655030ef9b9c Mon Sep 17 00:00:00 2001 From: Nikolaj Olsson Date: Wed, 20 Mar 2024 19:53:32 +0100 Subject: [PATCH] Fix for Whisper Post-processing - thx Purfview/cvrle77 :) Somewhat related to #8044 --- src/Test/Core/AudioToTextTest.cs | 102 ++++++++++++++++++ .../AutoTranslate/MergeAndSplitHelperTest.cs | 13 ++- .../AudioToText/AudioToTextPostProcessor.cs | 57 +++++++--- src/libse/Common/Subtitle.cs | 3 +- 4 files changed, 154 insertions(+), 21 deletions(-) diff --git a/src/Test/Core/AudioToTextTest.cs b/src/Test/Core/AudioToTextTest.cs index 565981aa6..368a3a4bf 100644 --- a/src/Test/Core/AudioToTextTest.cs +++ b/src/Test/Core/AudioToTextTest.cs @@ -115,5 +115,107 @@ Lexington, Massachusetts."; Assert.IsTrue(fixedSubtitle.Paragraphs[4].Text.EndsWith("camp.", StringComparison.Ordinal)); Assert.IsTrue(fixedSubtitle.Paragraphs[5].Text.StartsWith("Hey there", StringComparison.Ordinal)); } + + [TestMethod] + public void TryForWholeSentences1() + { + var raw = @"12 +00:00:25,500 --> 00:00:27,060 +Oh, my... Bob, right? + +13 +00:00:28,560 --> 00:00:29,220 +Could be fun. + +14 +00:00:29,660 --> 00:00:32,580 +Well, we could get to know each other a +little, maybe loosen things up around + +15 +00:00:32,580 --> 00:00:33,060 +here? + +16 +00:00:33,680 --> 00:00:39,160 +Well, I've worked with this lot before, +and, erm... Yeah, this is as loose as they + +17 +00:00:39,160 --> 00:00:40,300 +get. + +18 +00:00:46,120 --> 00:00:46,340 +Hmm. + +19 +00:00:48,160 --> 00:00:49,120 +What's the about that, Bob's? + +20 +00:00:49,120 --> 00:00:49,860 +Oh, no. + +21 +00:00:50,580 --> 00:00:50,700 +Yep. + +22 +00:00:51,240 --> 00:00:52,600 +I felt that soon as I said it. + +23 +00:00:54,860 --> 00:00:56,340 +Right, I'm headed out. + +24 +00:00:57,460 --> 00:00:58,780 +Everyone have a great day, yeah? + +25 +00:00:58,780 --> 00:00:59,600 +Yeah. + +26 +00:01:00,600 --> 00:01:01,880 +Wait. + +27 +00:01:01,880 --> 00:01:02,480 +Wait."; + + var subtitle = new Subtitle(); + new SubRip().LoadSubtitle(subtitle, raw.SplitToLines(), null); + + var fixedSubtitle = AudioToTextPostProcessor.TryForWholeSentences(subtitle, "en", 42); + + Assert.AreEqual(14, fixedSubtitle.Paragraphs.Count); + Assert.IsTrue(fixedSubtitle.Paragraphs[2].Text == "Well, we could get to know each other a" + Environment.NewLine + "little, maybe loosen things up around here?"); + Assert.IsTrue(fixedSubtitle.Paragraphs[3].Text == "Well, I've worked with this lot before, and," + Environment.NewLine + "erm... Yeah, this is as loose as they get."); + Assert.IsTrue(fixedSubtitle.Paragraphs[4].Text == "Hmm."); + Assert.IsTrue(fixedSubtitle.Paragraphs[5].Text == "What's the about that, Bob's?"); + } + + [TestMethod] + public void TryForWholeSentences2() + { + var raw = @"1 +00:00:26,500 --> 00:00:27,060 +Yes, I think this could indeed be very good. But also + +2 +00:00:28,560 --> 00:00:29,220 +that could be fun indeed my friend."; + + var subtitle = new Subtitle(); + new SubRip().LoadSubtitle(subtitle, raw.SplitToLines(), null); + + var fixedSubtitle = AudioToTextPostProcessor.TryForWholeSentences(subtitle, "en", 42); + + Assert.AreEqual(2, fixedSubtitle.Paragraphs.Count); + Assert.IsTrue(fixedSubtitle.Paragraphs[0].Text == "Yes, I think this could" + Environment.NewLine + "indeed be very good."); + Assert.IsTrue(fixedSubtitle.Paragraphs[1].Text == "But also that could be" + Environment.NewLine + "fun indeed my friend."); + } } } diff --git a/src/Test/Logic/AutoTranslate/MergeAndSplitHelperTest.cs b/src/Test/Logic/AutoTranslate/MergeAndSplitHelperTest.cs index 90bf4e849..23daca2ba 100644 --- a/src/Test/Logic/AutoTranslate/MergeAndSplitHelperTest.cs +++ b/src/Test/Logic/AutoTranslate/MergeAndSplitHelperTest.cs @@ -172,15 +172,22 @@ namespace Test.Logic.AutoTranslate Assert.IsTrue(splitResult[0].Length > 5); Assert.IsTrue(splitResult[1].Length > 5); Assert.IsTrue(splitResult[2].Length > 5); - Assert.AreEqual(string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)), string.Join(" ", splitResult)); + + var subtitleText = string.Join("", subtitle.Paragraphs.Select(p => p.Text)).RemoveChar('\n', '\r', ' '); + var splitText = string.Join("", splitResult).RemoveChar('\n', '\r', ' '); + Assert.AreEqual(subtitleText, splitText); + + Assert.AreEqual("Hallo there. In the" + Environment.NewLine + "garden today are we?", splitResult[0]); + Assert.AreEqual("So I will very", splitResult[1]); + Assert.AreEqual("soon be going home to Sweden.", splitResult[2]); Assert.AreEqual("My name is Peter!", splitResult[3]); Assert.AreEqual("My name is Peter! And Jones.", splitResult[4]); Assert.AreEqual("My name is Peter. And Jones.", splitResult[5]); Assert.AreEqual("", splitResult[6]); Assert.AreEqual("Hallo there.", splitResult[7]); - var inputText = string.Join(" ", subtitle.Paragraphs.Select(p => p.Text)).Replace(Environment.NewLine, " "); - var splitResultText = string.Join(" ", splitResult); + var inputText = string.Join("", subtitle.Paragraphs.Select(p => p.Text)).RemoveChar('\n', '\r', ' '); + var splitResultText = string.Join("", splitResult).RemoveChar('\n', '\r', ' '); Assert.AreEqual(inputText, splitResultText); } } diff --git a/src/libse/AudioToText/AudioToTextPostProcessor.cs b/src/libse/AudioToText/AudioToTextPostProcessor.cs index 2adbc1d6e..bb73c304a 100644 --- a/src/libse/AudioToText/AudioToTextPostProcessor.cs +++ b/src/libse/AudioToText/AudioToTextPostProcessor.cs @@ -64,11 +64,7 @@ namespace Nikse.SubtitleEdit.Core.AudioToText continue; } - if (TwoLetterLanguageCode == "en") - { - // anything? - } - else if (TwoLetterLanguageCode == "da") + if (TwoLetterLanguageCode == "da") { if (paragraph.Text.Contains("Danske tekster af nicolai winther", StringComparison.OrdinalIgnoreCase)) { @@ -144,10 +140,12 @@ namespace Nikse.SubtitleEdit.Core.AudioToText return subtitle; } - public static Subtitle TryForWholeSentences(Subtitle inputSubtitle, string language, int lineMxLength) + public static Subtitle TryForWholeSentences(Subtitle inputSubtitle, string language, int lineMaxLength) { var s = new Subtitle(inputSubtitle); const int maxMoveChunkSize = 15; + var deleteIndices = new List(); + for (var i = 0; i < s.Paragraphs.Count - 1; i++) { var p = s.Paragraphs[i]; @@ -158,14 +156,21 @@ namespace Nikse.SubtitleEdit.Core.AudioToText p.EndTime.TotalMilliseconds - next.StartTime.TotalMilliseconds > 100 || p.Text.Contains('<') || next.Text.Contains('<') || - !(p.Text.Contains('.') || next.Text.Contains('.')) || - p.Text.EndsWith('.')) + !(p.Text.Contains('.') || p.Text.Contains('?') || p.Text.Contains('!') || next.Text.Contains('.') || next.Text.Contains('?') || next.Text.Contains('!')) || + p.Text.EndsWith('.') || + p.Text.EndsWith('?') || + p.Text.EndsWith('!')) + { + continue; + } + + if (deleteIndices.Contains(i)) { continue; } // check for period in last part of p - var lastPeriodIdx = p.Text.LastIndexOf('.'); + var lastPeriodIdx = p.Text.LastIndexOfAny(new char[] { '.', '?', '!' }); if (lastPeriodIdx > 3 && lastPeriodIdx > p.Text.Length - maxMoveChunkSize) { var newCurrentText = p.Text.Substring(0, lastPeriodIdx + 1).Trim(); @@ -177,23 +182,31 @@ namespace Nikse.SubtitleEdit.Core.AudioToText var arrayCurrent = newCurrentText.SplitToLines(); var arrayNext = newNextText.SplitToLines(); - var currentOk = arrayCurrent.Count == 1 || (arrayCurrent.Count == 2 && arrayCurrent[0].Length <= lineMxLength); - var nextOk = arrayNext.Count == 1 || (arrayNext.Count == 2 && arrayNext[0].Length <= lineMxLength); + var currentOk = arrayCurrent.Count == 1 || (arrayCurrent.Count == 2 && arrayCurrent[0].Length < lineMaxLength * 2); + var nextOk = arrayNext.Count == 1 || (arrayNext.Count == 2 && arrayNext[0].Length < lineMaxLength * 2); if (currentOk && nextOk) { p.Text = newCurrentText; next.Text = newNextText; - //TODO: calc time + if (string.IsNullOrWhiteSpace(newCurrentText)) + { + deleteIndices.Add(i); + next.StartTime.TotalMilliseconds = p.StartTime.TotalMilliseconds; + } + else + { + //TODO: calc time + } continue; } } // check for period in beginning of next - var firstPeriodIdx = next.Text.IndexOf('.'); - if (firstPeriodIdx > 3 && firstPeriodIdx < maxMoveChunkSize) + var firstPeriodIdx = next.Text.IndexOfAny(new char[] { '.', '?', '!' }); + if (firstPeriodIdx >= 3 && firstPeriodIdx < maxMoveChunkSize) { var newCurrentText = next.Text.Substring(0, firstPeriodIdx + 1).Trim(); var newNextText = next.Text.Remove(0, firstPeriodIdx + 1).Trim(); @@ -204,19 +217,29 @@ namespace Nikse.SubtitleEdit.Core.AudioToText var arrayCurrent = newCurrentText.SplitToLines(); var arrayNext = newNextText.SplitToLines(); - var currentOk = arrayCurrent.Count == 1 || (arrayCurrent.Count == 2 && arrayCurrent[0].Length <= lineMxLength); - var nextOk = arrayNext.Count == 1 || (arrayNext.Count == 2 && arrayNext[0].Length <= lineMxLength); + var currentOk = arrayCurrent.Count == 1 || (arrayCurrent.Count == 2 && arrayCurrent[0].Length < lineMaxLength * 2); + var nextOk = arrayNext.Count == 1 || (arrayNext.Count == 2 && arrayNext[0].Length < lineMaxLength * 2); if (currentOk && nextOk) { p.Text = newCurrentText; next.Text = newNextText; - //TODO: calc time + if (string.IsNullOrWhiteSpace(newNextText)) + { + deleteIndices.Add(i + 1); + p.EndTime.TotalMilliseconds = next.EndTime.TotalMilliseconds; + } + else + { + //TODO: calc time + } } } } + s.RemoveParagraphsByIndices(deleteIndices); + return s; } diff --git a/src/libse/Common/Subtitle.cs b/src/libse/Common/Subtitle.cs index a8a53dc32..4338c74a2 100644 --- a/src/libse/Common/Subtitle.cs +++ b/src/libse/Common/Subtitle.cs @@ -756,7 +756,7 @@ namespace Nikse.SubtitleEdit.Core.Common /// Number of lines deleted public int RemoveParagraphsByIndices(IEnumerable indices) { - int count = 0; + var count = 0; foreach (var index in indices.OrderByDescending(p => p)) { if (index >= 0 && index < Paragraphs.Count) @@ -765,6 +765,7 @@ namespace Nikse.SubtitleEdit.Core.Common count++; } } + return count; }