Testing some German nouns for fix casing after audio-to-text

This commit is contained in:
niksedk 2022-06-29 05:20:18 +02:00
parent 69cfe59371
commit 2d225420aa
3 changed files with 3641 additions and 0 deletions

3616
Dictionaries/deu_Nouns.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@ -236,6 +236,7 @@ Source: ..\Dictionaries\fra_WordSplitList.txt; DestDir: {userappdata}\Subtit
Source: ..\Dictionaries\ita_WordSplitList.txt; DestDir: {userappdata}\Subtitle Edit\Dictionaries; Flags: ignoreversion onlyifdoesntexist; Components: main Source: ..\Dictionaries\ita_WordSplitList.txt; DestDir: {userappdata}\Subtitle Edit\Dictionaries; Flags: ignoreversion onlyifdoesntexist; Components: main
Source: ..\Dictionaries\pol_WordSplitList.txt; DestDir: {userappdata}\Subtitle Edit\Dictionaries; Flags: ignoreversion onlyifdoesntexist; Components: main Source: ..\Dictionaries\pol_WordSplitList.txt; DestDir: {userappdata}\Subtitle Edit\Dictionaries; Flags: ignoreversion onlyifdoesntexist; Components: main
Source: ..\Dictionaries\spa_WordSplitList.txt; DestDir: {userappdata}\Subtitle Edit\Dictionaries; Flags: ignoreversion onlyifdoesntexist; Components: main Source: ..\Dictionaries\spa_WordSplitList.txt; DestDir: {userappdata}\Subtitle Edit\Dictionaries; Flags: ignoreversion onlyifdoesntexist; Components: main
Source: ..\Dictionaries\deu_Nouns.txt; DestDir: {userappdata}\Subtitle Edit\Dictionaries; Flags: ignoreversion onlyifdoesntexist; Components: main
Source: ..\Ocr\Latin.db; DestDir: {userappdata}\Subtitle Edit\Ocr; Flags: ignoreversion uninsneveruninstall onlyifdoesntexist; Components: main Source: ..\Ocr\Latin.db; DestDir: {userappdata}\Subtitle Edit\Ocr; Flags: ignoreversion uninsneveruninstall onlyifdoesntexist; Components: main
@ -372,6 +373,7 @@ Type: files; Name: {app}\Dictionaries\fra_WordSplitList.txt; Check: Is
Type: files; Name: {app}\Dictionaries\ita_WordSplitList.txt; Check: IsUpgrade() Type: files; Name: {app}\Dictionaries\ita_WordSplitList.txt; Check: IsUpgrade()
Type: files; Name: {app}\Dictionaries\pol_WordSplitList.txt; Check: IsUpgrade() Type: files; Name: {app}\Dictionaries\pol_WordSplitList.txt; Check: IsUpgrade()
Type: files; Name: {app}\Dictionaries\spa_WordSplitList.txt; Check: IsUpgrade() Type: files; Name: {app}\Dictionaries\spa_WordSplitList.txt; Check: IsUpgrade()
Type: files; Name: {app}\Dictionaries\deu_Nouns.txt; Check: IsUpgrade()
Type: dirifempty; Name: {app}\Dictionaries; Check: IsUpgrade() Type: dirifempty; Name: {app}\Dictionaries; Check: IsUpgrade()
Type: files; Name: {app}\TessData\eng.DangAmbigs; Check: IsUpgrade() Type: files; Name: {app}\TessData\eng.DangAmbigs; Check: IsUpgrade()

View File

@ -1,6 +1,8 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.IO;
using System.Linq; using System.Linq;
using System.Text;
using Nikse.SubtitleEdit.Core.Common; using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.Dictionaries; using Nikse.SubtitleEdit.Core.Dictionaries;
using Nikse.SubtitleEdit.Core.Forms.FixCommonErrors; using Nikse.SubtitleEdit.Core.Forms.FixCommonErrors;
@ -275,6 +277,27 @@ namespace Nikse.SubtitleEdit.Core.AudioToText
} }
} }
// fix german nouns
if (language == "de")
{
var inputFile = Path.Combine(Configuration.DictionariesDirectory, "deu_Nouns.txt");
if (File.Exists(inputFile))
{
var nounList = FileUtil.ReadAllLinesShared(inputFile, Encoding.UTF8);
foreach (var paragraph in subtitle.Paragraphs)
{
var text = paragraph.Text;
var textNoTags = HtmlUtil.RemoveHtmlTags(text, true);
if (textNoTags != textNoTags.ToUpperInvariant() && !string.IsNullOrEmpty(text))
{
var st = new StrippableText(text);
st.FixCasing(nounList, true, false, false, string.Empty);
paragraph.Text = st.MergedString;
}
}
}
}
return subtitle; return subtitle;
} }