From 9be405fba3af03412483ec00446d1937df7595c4 Mon Sep 17 00:00:00 2001 From: niksedk Date: Sat, 24 Dec 2022 14:32:19 +0100 Subject: [PATCH] Default only if word-split-list on non-propercase words - thx JP :) --- LanguageBaseEnglish.xml | 1 + src/libse/Common/Settings.cs | 9 ++++++ .../StringWithoutSpaceSplitToWords.cs | 9 ++++++ src/ui/Forms/Options/Settings.Designer.cs | 31 +++++++++++++------ src/ui/Forms/Options/Settings.cs | 3 ++ src/ui/Logic/Language.cs | 1 + src/ui/Logic/LanguageDeserializer.cs | 3 ++ src/ui/Logic/LanguageStructure.cs | 1 + 8 files changed, 49 insertions(+), 9 deletions(-) diff --git a/LanguageBaseEnglish.xml b/LanguageBaseEnglish.xml index 2ed4957c8..d9dd1f538 100644 --- a/LanguageBaseEnglish.xml +++ b/LanguageBaseEnglish.xml @@ -2348,6 +2348,7 @@ can edit in same subtitle file (collaboration) Music symbols to replace (separate by comma) Fix common OCR errors - also use hard-coded rules Use word split list (OCR + FCE) + Avoid propercase Fix short display time - allow move of start time Skip step one (choose fix rules) Default format diff --git a/src/libse/Common/Settings.cs b/src/libse/Common/Settings.cs index 382ce3cbe..854c50fe2 100644 --- a/src/libse/Common/Settings.cs +++ b/src/libse/Common/Settings.cs @@ -141,6 +141,7 @@ namespace Nikse.SubtitleEdit.Core.Common public string OcrTrainMergedLetters { get; set; } public string OcrTrainSrtFile { get; set; } public bool OcrUseWordSplitList { get; set; } + public bool OcrUseWordSplitListAvoidPropercase { get; set; } public string BDOpenIn { get; set; } public string Interjections { get; set; } public string MicrosoftBingApiId { get; set; } @@ -450,6 +451,7 @@ namespace Nikse.SubtitleEdit.Core.Common OcrTrainFonts = "Arial;Calibri;Corbel;Futura Std Book;Futura Bis;Helvetica Neue;Lucida Console;Tahoma;Trebuchet MS;Verdana"; OcrTrainMergedLetters = "ff ft fi fj fy fl rf rt rv rw ry rt rz ryt tt TV tw yt yw wy wf ryt xy"; OcrUseWordSplitList = true; + OcrUseWordSplitListAvoidPropercase = true; Interjections = "Ah;Ahem;Ahh;Ahhh;Ahhhh;Eh;Ehh;Ehhh;Hm;Hmm;Hmmm;Huh;Mm;Mmm;Mmmm;Phew;Gah;Oh;Ohh;Ohhh;Ow;Oww;Owww;Ugh;Ughh;Uh;Uhh;Uhhh;Whew"; MicrosoftTranslatorTokenEndpoint = "https://api.cognitive.microsoft.com/sts/v1.0/issueToken"; GoogleTranslateNoKeyWarningShow = true; @@ -4575,6 +4577,12 @@ $HorzAlign = Center settings.Tools.OcrUseWordSplitList = Convert.ToBoolean(subNode.InnerText, CultureInfo.InvariantCulture); } + subNode = node.SelectSingleNode("OcrUseWordSplitListAvoidPropercase"); + if (subNode != null) + { + settings.Tools.OcrUseWordSplitListAvoidPropercase = Convert.ToBoolean(subNode.InnerText, CultureInfo.InvariantCulture); + } + subNode = node.SelectSingleNode("BDOpenIn"); if (subNode != null) { @@ -10223,6 +10231,7 @@ $HorzAlign = Center textWriter.WriteElementString("OcrTrainMergedLetters", settings.Tools.OcrTrainMergedLetters); textWriter.WriteElementString("OcrTrainSrtFile", settings.Tools.OcrTrainSrtFile); textWriter.WriteElementString("OcrUseWordSplitList", settings.Tools.OcrUseWordSplitList.ToString(CultureInfo.InvariantCulture)); + textWriter.WriteElementString("OcrUseWordSplitListAvoidPropercase", settings.Tools.OcrUseWordSplitListAvoidPropercase.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("BDOpenIn", settings.Tools.BDOpenIn); textWriter.WriteElementString("Interjections", settings.Tools.Interjections); textWriter.WriteElementString("MicrosoftBingApiId", settings.Tools.MicrosoftBingApiId); diff --git a/src/libse/Dictionaries/StringWithoutSpaceSplitToWords.cs b/src/libse/Dictionaries/StringWithoutSpaceSplitToWords.cs index cf866798e..09983f665 100644 --- a/src/libse/Dictionaries/StringWithoutSpaceSplitToWords.cs +++ b/src/libse/Dictionaries/StringWithoutSpaceSplitToWords.cs @@ -45,6 +45,15 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries return input; } + if (Configuration.Settings.Tools.OcrUseWordSplitListAvoidPropercase && input.Length > 1 && + input.StartsWith(input[0].ToString().ToUpperInvariant()) && + input != input.ToLowerInvariant() && input != input.ToUpperInvariant() && + input.Length < 12) + { + //TODO: Improve... some better way to detect uncommon/special names + return input; + } + for (var i = 0; i < words.Length; i++) { var w = words[i]; diff --git a/src/ui/Forms/Options/Settings.Designer.cs b/src/ui/Forms/Options/Settings.Designer.cs index ba6ec69be..6beee877f 100644 --- a/src/ui/Forms/Options/Settings.Designer.cs +++ b/src/ui/Forms/Options/Settings.Designer.cs @@ -414,6 +414,7 @@ this.labelUpdateFileTypeAssociationsStatus = new System.Windows.Forms.Label(); this.imageListFileTypeAssociations = new System.Windows.Forms.ImageList(this.components); this.toolTipDialogStylePreview = new System.Windows.Forms.ToolTip(this.components); + this.checkBoxUseWordSplitListAvoidPropercase = new System.Windows.Forms.CheckBox(); this.panelGeneral.SuspendLayout(); this.groupBoxMiscellaneous.SuspendLayout(); this.groupBoxGeneralRules.SuspendLayout(); @@ -3532,9 +3533,9 @@ this.groupBoxSpellCheck.Controls.Add(this.checkBoxTreatINQuoteAsING); this.groupBoxSpellCheck.Controls.Add(this.checkBoxSpellCheckOneLetterWords); this.groupBoxSpellCheck.Controls.Add(this.checkBoxSpellCheckAutoChangeNames); - this.groupBoxSpellCheck.Location = new System.Drawing.Point(0, 381); + this.groupBoxSpellCheck.Location = new System.Drawing.Point(0, 389); this.groupBoxSpellCheck.Name = "groupBoxSpellCheck"; - this.groupBoxSpellCheck.Size = new System.Drawing.Size(408, 139); + this.groupBoxSpellCheck.Size = new System.Drawing.Size(408, 131); this.groupBoxSpellCheck.TabIndex = 4; this.groupBoxSpellCheck.TabStop = false; this.groupBoxSpellCheck.Text = "Spell check"; @@ -3591,6 +3592,7 @@ // // groupBoxFixCommonErrors // + this.groupBoxFixCommonErrors.Controls.Add(this.checkBoxUseWordSplitListAvoidPropercase); this.groupBoxFixCommonErrors.Controls.Add(this.checkBoxUseWordSplitList); this.groupBoxFixCommonErrors.Controls.Add(this.buttonFixContinuationStyleSettings); this.groupBoxFixCommonErrors.Controls.Add(this.checkBoxFceSkipStep1); @@ -3602,7 +3604,7 @@ this.groupBoxFixCommonErrors.Controls.Add(this.labelToolsMusicSymbol); this.groupBoxFixCommonErrors.Location = new System.Drawing.Point(0, 123); this.groupBoxFixCommonErrors.Name = "groupBoxFixCommonErrors"; - this.groupBoxFixCommonErrors.Size = new System.Drawing.Size(408, 252); + this.groupBoxFixCommonErrors.Size = new System.Drawing.Size(408, 260); this.groupBoxFixCommonErrors.TabIndex = 3; this.groupBoxFixCommonErrors.TabStop = false; this.groupBoxFixCommonErrors.Text = "Fix common errors"; @@ -3610,7 +3612,7 @@ // checkBoxUseWordSplitList // this.checkBoxUseWordSplitList.AutoSize = true; - this.checkBoxUseWordSplitList.Location = new System.Drawing.Point(15, 137); + this.checkBoxUseWordSplitList.Location = new System.Drawing.Point(15, 125); this.checkBoxUseWordSplitList.Name = "checkBoxUseWordSplitList"; this.checkBoxUseWordSplitList.Size = new System.Drawing.Size(231, 17); this.checkBoxUseWordSplitList.TabIndex = 3; @@ -3619,7 +3621,7 @@ // // buttonFixContinuationStyleSettings // - this.buttonFixContinuationStyleSettings.Location = new System.Drawing.Point(16, 212); + this.buttonFixContinuationStyleSettings.Location = new System.Drawing.Point(16, 220); this.buttonFixContinuationStyleSettings.Name = "buttonFixContinuationStyleSettings"; this.buttonFixContinuationStyleSettings.Size = new System.Drawing.Size(271, 23); this.buttonFixContinuationStyleSettings.TabIndex = 6; @@ -3630,7 +3632,7 @@ // checkBoxFceSkipStep1 // this.checkBoxFceSkipStep1.AutoSize = true; - this.checkBoxFceSkipStep1.Location = new System.Drawing.Point(15, 185); + this.checkBoxFceSkipStep1.Location = new System.Drawing.Point(15, 193); this.checkBoxFceSkipStep1.Name = "checkBoxFceSkipStep1"; this.checkBoxFceSkipStep1.Size = new System.Drawing.Size(176, 17); this.checkBoxFceSkipStep1.TabIndex = 5; @@ -3640,7 +3642,7 @@ // checkBoxFixShortDisplayTimesAllowMoveStartTime // this.checkBoxFixShortDisplayTimesAllowMoveStartTime.AutoSize = true; - this.checkBoxFixShortDisplayTimesAllowMoveStartTime.Location = new System.Drawing.Point(15, 162); + this.checkBoxFixShortDisplayTimesAllowMoveStartTime.Location = new System.Drawing.Point(15, 170); this.checkBoxFixShortDisplayTimesAllowMoveStartTime.Name = "checkBoxFixShortDisplayTimesAllowMoveStartTime"; this.checkBoxFixShortDisplayTimesAllowMoveStartTime.Size = new System.Drawing.Size(252, 17); this.checkBoxFixShortDisplayTimesAllowMoveStartTime.TabIndex = 4; @@ -3650,7 +3652,7 @@ // checkBoxFixCommonOcrErrorsUsingHardcodedRules // this.checkBoxFixCommonOcrErrorsUsingHardcodedRules.AutoSize = true; - this.checkBoxFixCommonOcrErrorsUsingHardcodedRules.Location = new System.Drawing.Point(15, 115); + this.checkBoxFixCommonOcrErrorsUsingHardcodedRules.Location = new System.Drawing.Point(15, 103); this.checkBoxFixCommonOcrErrorsUsingHardcodedRules.Name = "checkBoxFixCommonOcrErrorsUsingHardcodedRules"; this.checkBoxFixCommonOcrErrorsUsingHardcodedRules.Size = new System.Drawing.Size(268, 17); this.checkBoxFixCommonOcrErrorsUsingHardcodedRules.TabIndex = 2; @@ -4971,18 +4973,28 @@ this.toolTipDialogStylePreview.InitialDelay = 500; this.toolTipDialogStylePreview.ReshowDelay = 100; // + // checkBoxUseWordSplitListAvoidPropercase + // + this.checkBoxUseWordSplitListAvoidPropercase.AutoSize = true; + this.checkBoxUseWordSplitListAvoidPropercase.Location = new System.Drawing.Point(34, 146); + this.checkBoxUseWordSplitListAvoidPropercase.Name = "checkBoxUseWordSplitListAvoidPropercase"; + this.checkBoxUseWordSplitListAvoidPropercase.Size = new System.Drawing.Size(102, 17); + this.checkBoxUseWordSplitListAvoidPropercase.TabIndex = 35; + this.checkBoxUseWordSplitListAvoidPropercase.Text = "Skip propercase"; + this.checkBoxUseWordSplitListAvoidPropercase.UseVisualStyleBackColor = true; + // // Settings // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; this.ClientSize = new System.Drawing.Size(1092, 574); this.Controls.Add(this.labelUpdateFileTypeAssociationsStatus); + this.Controls.Add(this.panelTools); this.Controls.Add(this.panelGeneral); this.Controls.Add(this.panelFont); this.Controls.Add(this.panelToolBar); this.Controls.Add(this.panelNetwork); this.Controls.Add(this.panelWaveform); - this.Controls.Add(this.panelTools); this.Controls.Add(this.panelFileTypeAssociations); this.Controls.Add(this.panelShortcuts); this.Controls.Add(this.panelSubtitleFormats); @@ -5494,5 +5506,6 @@ private System.Windows.Forms.Button buttonTranslationAutoSuffix; private System.Windows.Forms.ComboBox comboBoxTranslationAutoSuffix; private System.Windows.Forms.Label labelTranslationAutoSuffix; + private System.Windows.Forms.CheckBox checkBoxUseWordSplitListAvoidPropercase; } } \ No newline at end of file diff --git a/src/ui/Forms/Options/Settings.cs b/src/ui/Forms/Options/Settings.cs index 3bd521c13..976f911ee 100644 --- a/src/ui/Forms/Options/Settings.cs +++ b/src/ui/Forms/Options/Settings.cs @@ -737,6 +737,7 @@ namespace Nikse.SubtitleEdit.Forms.Options labelToolsMusicSymbolsToReplace.Text = language.MusicSymbolsReplace; checkBoxFixCommonOcrErrorsUsingHardcodedRules.Text = language.FixCommonOcrErrorsUseHardcodedRules; checkBoxUseWordSplitList.Text = language.UseWordSplitList; + checkBoxUseWordSplitListAvoidPropercase.Text = language.AvoidPropercase; checkBoxFixShortDisplayTimesAllowMoveStartTime.Text = language.FixCommonerrorsFixShortDisplayTimesAllowMoveStartTime; checkBoxFceSkipStep1.Text = language.FixCommonErrorsSkipStepOne; groupBoxSpellCheck.Text = language.SpellCheck; @@ -954,6 +955,7 @@ namespace Nikse.SubtitleEdit.Forms.Options textBoxMusicSymbolsToReplace.Text = toolsSettings.MusicSymbolReplace; checkBoxFixCommonOcrErrorsUsingHardcodedRules.Checked = toolsSettings.OcrFixUseHardcodedRules; checkBoxUseWordSplitList.Checked = toolsSettings.OcrUseWordSplitList; + checkBoxUseWordSplitListAvoidPropercase.Checked = toolsSettings.OcrUseWordSplitListAvoidPropercase; checkBoxFixShortDisplayTimesAllowMoveStartTime.Checked = toolsSettings.FixShortDisplayTimesAllowMoveStartTime; checkBoxFceSkipStep1.Checked = toolsSettings.FixCommonErrorsSkipStepOne; checkBoxSpellCheckAutoChangeNames.Checked = toolsSettings.SpellCheckAutoChangeNameCasing; @@ -2016,6 +2018,7 @@ namespace Nikse.SubtitleEdit.Forms.Options toolsSettings.OcrFixUseHardcodedRules = checkBoxFixCommonOcrErrorsUsingHardcodedRules.Checked; toolsSettings.OcrUseWordSplitList = checkBoxUseWordSplitList.Checked; + toolsSettings.OcrUseWordSplitListAvoidPropercase = checkBoxUseWordSplitListAvoidPropercase.Checked; toolsSettings.FixShortDisplayTimesAllowMoveStartTime = checkBoxFixShortDisplayTimesAllowMoveStartTime.Checked; toolsSettings.FixCommonErrorsSkipStepOne = checkBoxFceSkipStep1.Checked; toolsSettings.MicrosoftTranslatorApiKey = textBoxBingClientSecret.Text.Trim(); diff --git a/src/ui/Logic/Language.cs b/src/ui/Logic/Language.cs index 46e3cf900..857bf41e1 100644 --- a/src/ui/Logic/Language.cs +++ b/src/ui/Logic/Language.cs @@ -2680,6 +2680,7 @@ can edit in same subtitle file (collaboration)", MusicSymbolsReplace = "Music symbols to replace (separate by comma)", FixCommonOcrErrorsUseHardcodedRules = "Fix common OCR errors - also use hard-coded rules", UseWordSplitList = "Use word split list (OCR + FCE)", + AvoidPropercase = "Avoid propercase", FixCommonerrorsFixShortDisplayTimesAllowMoveStartTime = "Fix short display time - allow move of start time", FixCommonErrorsSkipStepOne = "Skip step one (choose fix rules)", DefaultFormat = "Default format", diff --git a/src/ui/Logic/LanguageDeserializer.cs b/src/ui/Logic/LanguageDeserializer.cs index 333a83a98..36afd9cb0 100644 --- a/src/ui/Logic/LanguageDeserializer.cs +++ b/src/ui/Logic/LanguageDeserializer.cs @@ -6391,6 +6391,9 @@ namespace Nikse.SubtitleEdit.Logic case "Settings/UseWordSplitList": language.Settings.UseWordSplitList = reader.Value; break; + case "Settings/AvoidPropercase": + language.Settings.AvoidPropercase = reader.Value; + break; case "Settings/FixCommonerrorsFixShortDisplayTimesAllowMoveStartTime": language.Settings.FixCommonerrorsFixShortDisplayTimesAllowMoveStartTime = reader.Value; break; diff --git a/src/ui/Logic/LanguageStructure.cs b/src/ui/Logic/LanguageStructure.cs index ab34b4301..c78769df5 100644 --- a/src/ui/Logic/LanguageStructure.cs +++ b/src/ui/Logic/LanguageStructure.cs @@ -2531,6 +2531,7 @@ namespace Nikse.SubtitleEdit.Logic public string MusicSymbolsReplace { get; set; } public string FixCommonOcrErrorsUseHardcodedRules { get; set; } public string UseWordSplitList { get; set; } + public string AvoidPropercase { get; set; } public string FixCommonerrorsFixShortDisplayTimesAllowMoveStartTime { get; set; } public string FixCommonErrorsSkipStepOne { get; set; }