Improve words-without-space-split

This commit is contained in:
niksedk 2021-12-18 17:31:56 +01:00
parent ac395f9b5d
commit fdafbaeff8
3 changed files with 5907 additions and 4064 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -24,8 +24,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
UiUtil.FixFonts(this); UiUtil.FixFonts(this);
FillSpellCheckDictionaries(); FillSpellCheckDictionaries();
_subtitleList = new List<Subtitle>(); _subtitleList = new List<Subtitle>();
comboBoxMinOccurrences.SelectedIndex = 13; comboBoxMinOccurrences.SelectedIndex = 8;
comboBoxMinOccurrencesLongWords.SelectedIndex = 5; comboBoxMinOccurrencesLongWords.SelectedIndex = 3;
listViewInputFiles.AutoSizeLastColumn(); listViewInputFiles.AutoSizeLastColumn();
labelStatus.Text = string.Empty; labelStatus.Text = string.Empty;
} }
@ -192,20 +192,20 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{ {
if (saveFileDialog.ShowDialog(this) == DialogResult.OK) if (saveFileDialog.ShowDialog(this) == DialogResult.OK)
{ {
var list = new List<string>(); var list = new Dictionary<string, int>();
foreach (var word in wordDictionary) foreach (var word in wordDictionary)
{ {
if (word.Key.Length < 5 && word.Value >= minUseCountSmall || if (word.Key.Length < 5 && word.Value >= minUseCountSmall ||
word.Key.Length >= 5 && word.Value >= minUseCountLarge) word.Key.Length >= 5 && word.Value >= minUseCountLarge)
{ {
list.Add(word.Key); list.Add(word.Key, word.Value);
} }
} }
var sb = new StringBuilder(); var sb = new StringBuilder();
foreach (var word in list.OrderByDescending(prop => prop.Length)) foreach (var word in list.OrderByDescending(p => p.Key.Length).ThenByDescending(p => p.Value))
{ {
sb.AppendLine(word); sb.AppendLine(word.Key);
} }
File.WriteAllText(saveFileDialog.FileName, sb.ToString()); File.WriteAllText(saveFileDialog.FileName, sb.ToString());