Work on language filter

This commit is contained in:
niksedk 2023-09-25 17:51:16 +02:00
parent e86e5751b5
commit 8c10d3425a
8 changed files with 199 additions and 35 deletions

View File

@ -1,11 +1,11 @@
using Nikse.SubtitleEdit.Logic;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Forms.Ocr;
using Nikse.SubtitleEdit.Logic;
using Nikse.SubtitleEdit.Logic.Ocr;
using System;
using System.Windows.Forms;
using Nikse.SubtitleEdit.Core.Common;
using System.IO;
using System.Linq;
using Nikse.SubtitleEdit.Forms.Ocr;
using System.Windows.Forms;
namespace Nikse.SubtitleEdit.Forms
{
@ -156,7 +156,7 @@ namespace Nikse.SubtitleEdit.Forms
private void buttonGetTesseractDictionaries_Click(object sender, EventArgs e)
{
using (var form = new GetTesseractDictionaries(comboBoxLanguage.Items.Count == 0))
using (var form = new GetTesseractDictionaries())
{
form.ShowDialog(this);
Configuration.Settings.VobSubOcr.TesseractLastLanguage = form.ChosenLanguage;

View File

@ -4,7 +4,6 @@ using Nikse.SubtitleEdit.Forms.Options;
using Nikse.SubtitleEdit.Logic;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Globalization;
using System.IO;
using System.IO.Compression;

View File

@ -39,12 +39,27 @@
//
// comboBoxDictionaries
//
this.comboBoxDictionaries.BackColor = System.Drawing.SystemColors.Window;
this.comboBoxDictionaries.BackColorDisabled = System.Drawing.Color.FromArgb(((int)(((byte)(240)))), ((int)(((byte)(240)))), ((int)(((byte)(240)))));
this.comboBoxDictionaries.BorderColor = System.Drawing.Color.FromArgb(((int)(((byte)(171)))), ((int)(((byte)(173)))), ((int)(((byte)(179)))));
this.comboBoxDictionaries.BorderColorDisabled = System.Drawing.Color.FromArgb(((int)(((byte)(120)))), ((int)(((byte)(120)))), ((int)(((byte)(120)))));
this.comboBoxDictionaries.ButtonForeColor = System.Drawing.SystemColors.ControlText;
this.comboBoxDictionaries.ButtonForeColorDown = System.Drawing.Color.Orange;
this.comboBoxDictionaries.ButtonForeColorOver = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215)))));
this.comboBoxDictionaries.DropDownHeight = 400;
this.comboBoxDictionaries.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxDictionaries.DropDownWidth = 256;
this.comboBoxDictionaries.FormattingEnabled = true;
this.comboBoxDictionaries.Location = new System.Drawing.Point(22, 71);
this.comboBoxDictionaries.MaxLength = 32767;
this.comboBoxDictionaries.Name = "comboBoxDictionaries";
this.comboBoxDictionaries.SelectedIndex = -1;
this.comboBoxDictionaries.SelectedItem = null;
this.comboBoxDictionaries.SelectedText = "";
this.comboBoxDictionaries.Size = new System.Drawing.Size(256, 21);
this.comboBoxDictionaries.TabIndex = 21;
this.comboBoxDictionaries.UsePopupWindow = false;
this.comboBoxDictionaries.SelectedIndexChanged += new System.EventHandler(this.comboBoxDictionaries_SelectedIndexChanged);
//
// labelPleaseWait
//
@ -104,7 +119,7 @@
this.labelDescription1.TabIndex = 27;
this.labelDescription1.Text = "Get Tesseract OCR dictionaries from the web";
//
// GetTesseractDictionaries
// GetTesseract302Dictionaries
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
@ -120,7 +135,7 @@
this.KeyPreview = true;
this.MaximizeBox = false;
this.MinimizeBox = false;
this.Name = "GetTesseractDictionaries";
this.Name = "GetTesseract302Dictionaries";
this.ShowIcon = false;
this.ShowInTaskbar = false;
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;

View File

@ -1,8 +1,10 @@
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.Http;
using Nikse.SubtitleEdit.Forms.Options;
using Nikse.SubtitleEdit.Logic;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.IO.Compression;
using System.Linq;
@ -15,7 +17,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
public sealed partial class GetTesseract302Dictionaries : Form
{
private List<string> _dictionaryDownloadLinks = new List<string>();
private Dictionary<string, string> _dictionaryDownloadLinks = new Dictionary<string, string>();
private string _xmlName;
private string _dictionaryFileName;
internal string ChosenLanguage { get; private set; }
@ -42,10 +44,19 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private void LoadDictionaryList(string xmlResourceName)
{
_dictionaryDownloadLinks = new List<string>();
var languageFilter = new List<CultureInfo>();
var useAllLanguages = string.IsNullOrEmpty(Configuration.Settings.General.DefaultLanguages);
if (!useAllLanguages)
{
languageFilter = Utilities.GetSubtitleLanguageCultures(true).ToList();
}
_dictionaryDownloadLinks = new Dictionary<string, string>();
_xmlName = xmlResourceName;
System.Reflection.Assembly asm = System.Reflection.Assembly.GetExecutingAssembly();
var asm = System.Reflection.Assembly.GetExecutingAssembly();
var stream = asm.GetManifestResourceStream(_xmlName);
var nameList = new List<string>();
var nameListAll = new List<string>();
if (stream != null)
{
comboBoxDictionaries.Items.Clear();
@ -70,18 +81,63 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
var englishName = node.SelectSingleNode("EnglishName").InnerText;
var downloadLink = node.SelectSingleNode("DownloadLink").InnerText;
if (!string.IsNullOrEmpty(downloadLink))
{
var name = englishName;
nameListAll.Add(englishName);
comboBoxDictionaries.Items.Add(name);
_dictionaryDownloadLinks.Add(downloadLink);
if (useAllLanguages || IsInLanguageFilter(englishName, languageFilter))
{
nameList.Add(englishName);
}
_dictionaryDownloadLinks.Add(englishName, downloadLink);
}
}
comboBoxDictionaries.Items.AddRange(nameList.Count == 0 ? nameListAll.ToArray<object>() : nameList.ToArray<object>());
if (comboBoxDictionaries.Items.Count > 0)
{
comboBoxDictionaries.Items.Add(LanguageSettings.Current.General.ChangeLanguageFilter);
}
for (var i = 0; i < comboBoxDictionaries.Items.Count; i++)
{
if (comboBoxDictionaries.Items[i] is string n && n == "English")
{
comboBoxDictionaries.SelectedIndex = i;
break;
}
}
if (comboBoxDictionaries.SelectedIndex < 0)
{
comboBoxDictionaries.SelectedIndex = 0;
}
}
}
private static bool IsInLanguageFilter(string englishName, List<CultureInfo> languageFilter)
{
foreach (var cultureInfo in languageFilter)
{
if (!string.IsNullOrEmpty(englishName) &&
cultureInfo.EnglishName.Contains(englishName, StringComparison.OrdinalIgnoreCase))
{
return true;
}
if (!string.IsNullOrEmpty(englishName) &&
cultureInfo.ThreeLetterISOLanguageName.Contains(englishName, StringComparison.OrdinalIgnoreCase))
{
return true;
}
}
return false;
}
private void FixLargeFonts()
{
if (labelDescription1.Left + labelDescription1.Width + 5 > Width)
@ -101,9 +157,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
Refresh();
Cursor = Cursors.WaitCursor;
var index = comboBoxDictionaries.SelectedIndex;
var url = _dictionaryDownloadLinks[index];
ChosenLanguage = comboBoxDictionaries.Items[index].ToString();
var language = comboBoxDictionaries.Items[comboBoxDictionaries.SelectedIndex].ToString();
var url = _dictionaryDownloadLinks[language];
ChosenLanguage = language;
try
{
@ -253,5 +309,21 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
e.SuppressKeyPress = true;
}
}
private void comboBoxDictionaries_SelectedIndexChanged(object sender, EventArgs e)
{
if (comboBoxDictionaries.SelectedIndex >= 0 && comboBoxDictionaries.Text == LanguageSettings.Current.General.ChangeLanguageFilter)
{
using (var form = new DefaultLanguagesChooser(Configuration.Settings.General.DefaultLanguages))
{
if (form.ShowDialog(this) == DialogResult.OK)
{
Configuration.Settings.General.DefaultLanguages = form.DefaultLanguages;
}
}
LoadDictionaryList("Nikse.SubtitleEdit.Resources.TesseractDictionaries.xml.gz");
}
}
}
}

View File

@ -39,12 +39,27 @@
//
// comboBoxDictionaries
//
this.comboBoxDictionaries.BackColor = System.Drawing.SystemColors.Window;
this.comboBoxDictionaries.BackColorDisabled = System.Drawing.Color.FromArgb(((int)(((byte)(240)))), ((int)(((byte)(240)))), ((int)(((byte)(240)))));
this.comboBoxDictionaries.BorderColor = System.Drawing.Color.FromArgb(((int)(((byte)(171)))), ((int)(((byte)(173)))), ((int)(((byte)(179)))));
this.comboBoxDictionaries.BorderColorDisabled = System.Drawing.Color.FromArgb(((int)(((byte)(120)))), ((int)(((byte)(120)))), ((int)(((byte)(120)))));
this.comboBoxDictionaries.ButtonForeColor = System.Drawing.SystemColors.ControlText;
this.comboBoxDictionaries.ButtonForeColorDown = System.Drawing.Color.Orange;
this.comboBoxDictionaries.ButtonForeColorOver = System.Drawing.Color.FromArgb(((int)(((byte)(0)))), ((int)(((byte)(120)))), ((int)(((byte)(215)))));
this.comboBoxDictionaries.DropDownHeight = 400;
this.comboBoxDictionaries.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
this.comboBoxDictionaries.DropDownWidth = 256;
this.comboBoxDictionaries.FormattingEnabled = true;
this.comboBoxDictionaries.Location = new System.Drawing.Point(22, 71);
this.comboBoxDictionaries.MaxLength = 32767;
this.comboBoxDictionaries.Name = "comboBoxDictionaries";
this.comboBoxDictionaries.SelectedIndex = -1;
this.comboBoxDictionaries.SelectedItem = null;
this.comboBoxDictionaries.SelectedText = "";
this.comboBoxDictionaries.Size = new System.Drawing.Size(256, 21);
this.comboBoxDictionaries.TabIndex = 21;
this.comboBoxDictionaries.UsePopupWindow = false;
this.comboBoxDictionaries.SelectedIndexChanged += new System.EventHandler(this.comboBoxDictionaries_SelectedIndexChanged);
//
// labelPleaseWait
//

View File

@ -1,8 +1,10 @@
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.Http;
using Nikse.SubtitleEdit.Forms.Options;
using Nikse.SubtitleEdit.Logic;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.IO.Compression;
using System.Linq;
@ -19,7 +21,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private readonly List<TesseractDictionary> _dictionaries;
private readonly CancellationTokenSource _cancellationTokenSource;
public GetTesseractDictionaries(bool first)
public GetTesseractDictionaries()
{
UiUtil.PreInitialize(this);
InitializeComponent();
@ -33,14 +35,22 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
labelPleaseWait.Text = string.Empty;
buttonOK.Text = LanguageSettings.Current.General.Ok;
FixLargeFonts();
_dictionaries = TesseractDictionary.List().OrderBy(p=>p.Name).ToList();
LoadDictionaryList(first);
_dictionaries = TesseractDictionary.List().OrderBy(p => p.Name).ToList();
LoadDictionaryList();
comboBoxDictionaries.UsePopupWindow = true;
_cancellationTokenSource = new CancellationTokenSource();
}
private void LoadDictionaryList(bool first)
private void LoadDictionaryList()
{
var languageFilter = new List<CultureInfo>();
var useAllLanguages = string.IsNullOrEmpty(Configuration.Settings.General.DefaultLanguages);
if (!useAllLanguages)
{
languageFilter = Utilities.GetSubtitleLanguageCultures(true).ToList();
}
var dictionaries = new List<TesseractDictionary>();
comboBoxDictionaries.BeginUpdate();
comboBoxDictionaries.Items.Clear();
for (var i = 0; i < _dictionaries.Count; i++)
@ -48,14 +58,28 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
var d = _dictionaries[i];
if (!string.IsNullOrEmpty(d.Url))
{
comboBoxDictionaries.Items.Add(d);
if (first && d.Name == "English")
if (useAllLanguages || IsInLanguageFilter(d.Name, d.Code, languageFilter))
{
comboBoxDictionaries.SelectedIndex = i;
dictionaries.Add(d);
}
}
}
comboBoxDictionaries.Items.AddRange(dictionaries.Count == 0 ? _dictionaries.ToArray<object>() : dictionaries.ToArray<object>());
if (comboBoxDictionaries.Items.Count > 0)
{
comboBoxDictionaries.Items.Add(LanguageSettings.Current.General.ChangeLanguageFilter);
}
for (var i = 0; i < comboBoxDictionaries.Items.Count; i++)
{
if (comboBoxDictionaries.Items[i] is string n && n == "English")
{
comboBoxDictionaries.SelectedIndex = i;
break;
}
}
if (comboBoxDictionaries.SelectedIndex < 0)
{
comboBoxDictionaries.SelectedIndex = 0;
@ -64,6 +88,26 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
comboBoxDictionaries.EndUpdate();
}
private static bool IsInLanguageFilter(string name, string code, List<CultureInfo> languageFilter)
{
foreach (var cultureInfo in languageFilter)
{
if (!string.IsNullOrEmpty(name) &&
cultureInfo.EnglishName.Contains(name, StringComparison.OrdinalIgnoreCase))
{
return true;
}
if (!string.IsNullOrEmpty(code) &&
cultureInfo.ThreeLetterISOLanguageName.Contains(code, StringComparison.OrdinalIgnoreCase))
{
return true;
}
}
return false;
}
private void FixLargeFonts()
{
if (labelDescription1.Left + labelDescription1.Width + 5 > Width)
@ -76,8 +120,13 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private void buttonDownload_Click(object sender, EventArgs e)
{
var index = comboBoxDictionaries.SelectedIndex;
var url = _dictionaries[index].Url;
var dictionary = comboBoxDictionaries.Items[comboBoxDictionaries.SelectedIndex] as TesseractDictionary;
if (dictionary == null)
{
return;
}
var url = dictionary.Url;
try
{
labelPleaseWait.Text = LanguageSettings.Current.General.PleaseWait;
@ -87,7 +136,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
Refresh();
Cursor = Cursors.WaitCursor;
ChosenLanguage = comboBoxDictionaries.Items[index].ToString();
ChosenLanguage = dictionary.ToString();
var httpClient = DownloaderFactory.MakeHttpClient();
using (var downloadStream = new MemoryStream())
@ -185,7 +234,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
throw new Exception("No content downloaded - missing file or no internet connection!");
}
string dictionaryFolder = Configuration.TesseractDataDirectory;
var dictionaryFolder = Configuration.TesseractDataDirectory;
if (!Directory.Exists(dictionaryFolder))
{
Directory.CreateDirectory(dictionaryFolder);
@ -233,5 +282,21 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
e.SuppressKeyPress = true;
}
}
private void comboBoxDictionaries_SelectedIndexChanged(object sender, EventArgs e)
{
if (comboBoxDictionaries.SelectedIndex >= 0 && comboBoxDictionaries.Text == LanguageSettings.Current.General.ChangeLanguageFilter)
{
using (var form = new DefaultLanguagesChooser(Configuration.Settings.General.DefaultLanguages))
{
if (form.ShowDialog(this) == DialogResult.OK)
{
Configuration.Settings.General.DefaultLanguages = form.DefaultLanguages;
}
}
LoadDictionaryList();
}
}
}
}

View File

@ -3,6 +3,7 @@ using Nikse.SubtitleEdit.Core.BluRaySup;
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.ContainerFormats;
using Nikse.SubtitleEdit.Core.ContainerFormats.TransportStream;
using Nikse.SubtitleEdit.Core.Enums;
using Nikse.SubtitleEdit.Core.Interfaces;
using Nikse.SubtitleEdit.Core.SubtitleFormats;
using Nikse.SubtitleEdit.Core.VobSub;
@ -26,7 +27,6 @@ using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Xml;
using Nikse.SubtitleEdit.Core.Enums;
using MessageBox = Nikse.SubtitleEdit.Forms.SeMsgBox.MessageBox;
namespace Nikse.SubtitleEdit.Forms.Ocr
@ -747,7 +747,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
text = text.RemoveRecursiveLineBreaks();
if (Utilities.GetNumberOfLines(text) > 2)
{
text = Utilities.AutoBreakLine(text);
@ -1037,7 +1037,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
text = text.RemoveRecursiveLineBreaks();
if (Utilities.GetNumberOfLines(text) > 2)
{
text = Utilities.AutoBreakLine(text);
@ -4962,7 +4962,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
text = text.Replace(Environment.NewLine + " ", Environment.NewLine);
text = text.RemoveRecursiveLineBreaks();
if (Utilities.GetNumberOfLines(text) > 2)
{
text = Utilities.AutoBreakLine(text);
@ -8704,7 +8704,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
return;
}
using (var form = new GetTesseractDictionaries(comboBoxTesseractLanguages.Items.Count == 0))
using (var form = new GetTesseractDictionaries())
{
form.ShowDialog(this);
InitializeTesseract(form.ChosenLanguage);

View File

@ -315,8 +315,6 @@ namespace Nikse.SubtitleEdit.Forms
if (editInterjections.ShowDialog(this) == DialogResult.OK)
{
SaveInterjections(editInterjections.GetInterjectionList());
_removeTextForHiLib.ReloadInterjection(fileName);
if (checkBoxRemoveInterjections.Checked)
{
GeneratePreview();