From 5245c3b1bf7e808fc4bf5d6b55e1d6a6ad0b2fdd Mon Sep 17 00:00:00 2001 From: "Martijn van Berkel (Flitskikker)" Date: Wed, 31 Aug 2022 20:09:19 +0200 Subject: [PATCH 1/5] Add basic Google Cloud Vision OCR support --- src/libse/Common/Settings.cs | 18 ++ src/libse/VobSub/Ocr/OCRHelper.cs | 20 ++ .../Ocr/Service/GoogleCloudVisionAPI.cs | 227 ++++++++++++++++++ .../VobSub/Ocr/Service/GoogleOCRService.cs | 46 ++++ src/libse/VobSub/Ocr/Service/IOCRService.cs | 9 + src/libse/VobSub/Ocr/Service/IOCRStrategy.cs | 15 ++ src/libse/VobSub/Ocr/Service/OCRException.cs | 22 ++ src/ui/Forms/Ocr/VobSubOcr.Designer.cs | 122 ++++++++++ src/ui/Forms/Ocr/VobSubOcr.cs | 51 ++++ src/ui/Logic/Language.cs | 3 + src/ui/Logic/LanguageStructure.cs | 3 + 11 files changed, 536 insertions(+) create mode 100644 src/libse/VobSub/Ocr/OCRHelper.cs create mode 100644 src/libse/VobSub/Ocr/Service/GoogleCloudVisionAPI.cs create mode 100644 src/libse/VobSub/Ocr/Service/GoogleOCRService.cs create mode 100644 src/libse/VobSub/Ocr/Service/IOCRService.cs create mode 100644 src/libse/VobSub/Ocr/Service/IOCRStrategy.cs create mode 100644 src/libse/VobSub/Ocr/Service/OCRException.cs diff --git a/src/libse/Common/Settings.cs b/src/libse/Common/Settings.cs index 1db068c72..1ace8877b 100644 --- a/src/libse/Common/Settings.cs +++ b/src/libse/Common/Settings.cs @@ -2063,6 +2063,8 @@ $HorzAlign = Center public bool CaptureTopAlign { get; set; } public int UnfocusedAttentionBlinkCount { get; set; } public int UnfocusedAttentionPlaySoundCount { get; set; } + public string CloudVisionAPIKey { get; set; } + public string CloudVisionLanguage { get; set; } public VobSubOcrSettings() { @@ -2091,6 +2093,8 @@ $HorzAlign = Center CaptureTopAlign = false; UnfocusedAttentionBlinkCount = 50; UnfocusedAttentionPlaySoundCount = 1; + CloudVisionAPIKey = string.Empty; + CloudVisionLanguage = "en"; } } @@ -7379,6 +7383,18 @@ $HorzAlign = Center settings.VobSubOcr.UnfocusedAttentionPlaySoundCount = Convert.ToInt32(subNode.InnerText, CultureInfo.InvariantCulture); } + subNode = node.SelectSingleNode("CloudVisionAPIKey"); + if (subNode != null) + { + settings.VobSubOcr.CloudVisionAPIKey = subNode.InnerText; + } + + subNode = node.SelectSingleNode("CloudVisionLanguage"); + if (subNode != null) + { + settings.VobSubOcr.CloudVisionLanguage = subNode.InnerText; + } + foreach (XmlNode groupNode in doc.DocumentElement.SelectNodes("MultipleSearchAndReplaceGroups/Group")) { var group = new MultipleSearchAndReplaceGroup @@ -10458,6 +10474,8 @@ $HorzAlign = Center textWriter.WriteElementString("CaptureTopAlign", settings.VobSubOcr.CaptureTopAlign.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("UnfocusedAttentionBlinkCount", settings.VobSubOcr.UnfocusedAttentionBlinkCount.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("UnfocusedAttentionPlaySoundCount", settings.VobSubOcr.UnfocusedAttentionPlaySoundCount.ToString(CultureInfo.InvariantCulture)); + textWriter.WriteElementString("CloudVisionAPIKey", settings.VobSubOcr.CloudVisionAPIKey); + textWriter.WriteElementString("CloudVisionLanguage", settings.VobSubOcr.CloudVisionLanguage); textWriter.WriteEndElement(); diff --git a/src/libse/VobSub/Ocr/OCRHelper.cs b/src/libse/VobSub/Ocr/OCRHelper.cs new file mode 100644 index 000000000..4c1d646b0 --- /dev/null +++ b/src/libse/VobSub/Ocr/OCRHelper.cs @@ -0,0 +1,20 @@ +using Nikse.SubtitleEdit.Core.Common; +using System; + +namespace Nikse.SubtitleEdit.Core.VobSub.Ocr +{ + public static class OCRHelper + { + public static string PostOCR(string input, string language) + { + var s = input; + return FixInvalidCarriageReturnLineFeedCharacters(s); + } + + private static string FixInvalidCarriageReturnLineFeedCharacters(string input) + { + // Fix new line chars + return string.Join(Environment.NewLine, input.SplitToLines()).Trim(); + } + } +} diff --git a/src/libse/VobSub/Ocr/Service/GoogleCloudVisionAPI.cs b/src/libse/VobSub/Ocr/Service/GoogleCloudVisionAPI.cs new file mode 100644 index 000000000..1192a80e0 --- /dev/null +++ b/src/libse/VobSub/Ocr/Service/GoogleCloudVisionAPI.cs @@ -0,0 +1,227 @@ +using Nikse.SubtitleEdit.Core.Common; +using System; +using System.Collections.Generic; +using System.Drawing; +using System.IO; +using System.Net; +using System.Net.Http; +using System.Net.Http.Headers; +using System.Runtime.Serialization; +using System.Runtime.Serialization.Json; +using System.Text; + +namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service +{ + /// + /// OCR via Google Cloud Vision API - see https://cloud.google.com/vision/docs/ocr + /// + public class GoogleCloudVisionAPI : IOCRStrategy + { + private readonly string _apiKey; + private readonly HttpClient _httpClient; + + public string GetName() + { + return "Google Cloud Vision API"; + } + + public int GetMaxImageSize() + { + return 20000000; + } + + public int GetMaximumRequestArraySize() + { + return 16; + } + + public GoogleCloudVisionAPI(string apiKey) + { + _apiKey = apiKey; + _httpClient = HttpClientHelper.MakeHttpClient(); + _httpClient.BaseAddress = new Uri("https://vision.googleapis.com/v1/images:annotate"); + _httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); + } + + public string GetUrl() + { + return "https://cloud.google.com/vision/docs/ocr"; + } + + public List PerformOCR(string language, List images) + { + // Create a request body object + var requestBody = new RequestBody(); + + foreach (var image in images) + { + var imageBase64 = string.Empty; + using (MemoryStream memoryStream = new MemoryStream()) + { + image.Save(memoryStream, System.Drawing.Imaging.ImageFormat.Png); + imageBase64 = Convert.ToBase64String(memoryStream.ToArray()); + } + + var request = new RequestBody.Request(imageBase64, language); + requestBody.requests.Add(request); + } + + // Convert to JSON string + var requestBodyString = string.Empty; + using (MemoryStream memoryStream = new MemoryStream()) + { + new DataContractJsonSerializer(typeof(RequestBody)).WriteObject(memoryStream, requestBody); + requestBodyString = Encoding.Default.GetString(memoryStream.ToArray()); + } + + // Do request + var uri = $"?key={_apiKey}"; + string content; + try + { + var result = _httpClient.PostAsync(uri, new StringContent(requestBodyString)).Result; + if ((int)result.StatusCode == 400) + { + throw new OCRException("API key invalid (or perhaps billing is not enabled)?"); + } + if ((int)result.StatusCode == 403) + { + throw new OCRException("\"Perhaps billing is not enabled (or API key is invalid)?\""); + } + + if (!result.IsSuccessStatusCode) + { + throw new OCRException($"An error occurred calling Cloud Vision API - status code: {result.StatusCode}"); + } + + content = result.Content.ReadAsStringAsync().Result; + } + catch (WebException webException) + { + var message = string.Empty; + if (webException.Message.Contains("(400) Bad Request")) + { + message = "API key invalid (or perhaps billing is not enabled)?"; + } + else if (webException.Message.Contains("(403) Forbidden.")) + { + message = "Perhaps billing is not enabled (or API key is invalid)?"; + } + throw new OCRException(message, webException); + } + + var resultList = new List(); + var parser = new JsonParser(); + var jsonObject = (Dictionary)parser.Parse(content); + + if (jsonObject.ContainsKey("responses")) + { + if (jsonObject["responses"] is List responses) + { + foreach (var responseObject in responses) + { + var result = string.Empty; + + if (responseObject is Dictionary response) + { + if (response.ContainsKey("textAnnotations")) + { + if (response["textAnnotations"] is List textAnnotations) + { + if (textAnnotations.Count > 0) + { + if (textAnnotations[0] is Dictionary firstTextAnnotation) + { + if (firstTextAnnotation.ContainsKey("description")) + { + if (firstTextAnnotation["description"] is string description) + { + result = OCRHelper.PostOCR(description, language); + } + } + } + } + } + } + } + + resultList.Add(result); + } + } + } + + return resultList; + } + + [DataContract, Serializable] + public class RequestBody + { + [DataMember] + public List requests { get; set; } + + public RequestBody() + { + this.requests = new List(); + } + + + [DataContract, Serializable] + public class Request + { + [DataMember] + public Image image { get; set; } + [DataMember] + public ImageContext imageContext { get; set; } + [DataMember] + public List features { get; set; } + + public Request(string imageContent, string language) + { + this.image = new Image(imageContent); + this.imageContext = new ImageContext(new List() { language, "en" }); + this.features = new List() { new Feature("TEXT_DETECTION", 1) }; + } + + + [DataContract, Serializable] + public class Image + { + [DataMember] + public string content { get; set; } + + public Image(string content) + { + this.content = content; + } + } + + [DataContract, Serializable] + public class ImageContext + { + [DataMember] + public List languageHints { get; set; } + + public ImageContext(List languageHints) + { + this.languageHints = languageHints; + } + } + + [DataContract, Serializable] + public class Feature + { + [DataMember] + public string type { get; set; } + [DataMember] + public int maxResults { get; set; } + + public Feature(string type, int maxResults) + { + this.type = type; + this.maxResults = maxResults; + } + } + } + } + } +} diff --git a/src/libse/VobSub/Ocr/Service/GoogleOCRService.cs b/src/libse/VobSub/Ocr/Service/GoogleOCRService.cs new file mode 100644 index 000000000..9e89e7b0e --- /dev/null +++ b/src/libse/VobSub/Ocr/Service/GoogleOCRService.cs @@ -0,0 +1,46 @@ +using System.Collections.Generic; +using System.Drawing; + +namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service +{ + public class GoogleOCRService : IOCRService + { + + private readonly IOCRStrategy _ocrStrategy; + + public GoogleOCRService(IOCRStrategy translationStrategy) + { + _ocrStrategy = translationStrategy; + } + + public string GetName() + { + return _ocrStrategy.GetName(); + } + + public override string ToString() + { + return GetName(); + } + + public int GetMaxImageSize() + { + return _ocrStrategy.GetMaxImageSize(); + } + + public int GetMaximumRequestArraySize() + { + return _ocrStrategy.GetMaximumRequestArraySize(); + } + + public string GetUrl() + { + return ""; + } + + public List PerformOCR(string language, List images) + { + return _ocrStrategy.PerformOCR(language, images); + } + } +} \ No newline at end of file diff --git a/src/libse/VobSub/Ocr/Service/IOCRService.cs b/src/libse/VobSub/Ocr/Service/IOCRService.cs new file mode 100644 index 000000000..8de05ed35 --- /dev/null +++ b/src/libse/VobSub/Ocr/Service/IOCRService.cs @@ -0,0 +1,9 @@ +using System.Collections.Generic; + +namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service +{ + public interface IOCRService : IOCRStrategy + { + + } +} diff --git a/src/libse/VobSub/Ocr/Service/IOCRStrategy.cs b/src/libse/VobSub/Ocr/Service/IOCRStrategy.cs new file mode 100644 index 000000000..d7e11534e --- /dev/null +++ b/src/libse/VobSub/Ocr/Service/IOCRStrategy.cs @@ -0,0 +1,15 @@ +using Nikse.SubtitleEdit.Core.Common; +using System.Collections.Generic; +using System.Drawing; + +namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service +{ + public interface IOCRStrategy + { + string GetName(); + string GetUrl(); + List PerformOCR(string language, List images); + int GetMaxImageSize(); + int GetMaximumRequestArraySize(); + } +} diff --git a/src/libse/VobSub/Ocr/Service/OCRException.cs b/src/libse/VobSub/Ocr/Service/OCRException.cs new file mode 100644 index 000000000..4505de2ea --- /dev/null +++ b/src/libse/VobSub/Ocr/Service/OCRException.cs @@ -0,0 +1,22 @@ +using System; +using System.Net; + +namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service +{ + public class OCRException : Exception + { + public OCRException(WebException webException) : base("",webException) + { + } + + public OCRException(string message, Exception exception) : base(message, exception) + { + + } + + public OCRException(string message) : base(message) + { + + } + } +} diff --git a/src/ui/Forms/Ocr/VobSubOcr.Designer.cs b/src/ui/Forms/Ocr/VobSubOcr.Designer.cs index bb921f45d..d9605c9a6 100644 --- a/src/ui/Forms/Ocr/VobSubOcr.Designer.cs +++ b/src/ui/Forms/Ocr/VobSubOcr.Designer.cs @@ -55,6 +55,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.buttonCancel = new System.Windows.Forms.Button(); this.groupBoxOcrMethod = new System.Windows.Forms.GroupBox(); this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox(); + this.groupBoxCloudVision = new System.Windows.Forms.GroupBox(); + this.textBoxCloudVisionAPIKey = new System.Windows.Forms.TextBox(); + this.labelCloudVisionAPIKey = new System.Windows.Forms.Label(); this.groupBoxNOCR = new System.Windows.Forms.GroupBox(); this.label3 = new System.Windows.Forms.Label(); this.comboBoxNOcrLineSplitMinHeight = new System.Windows.Forms.ComboBox(); @@ -171,8 +174,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.underlineToolStripMenuItem1 = new System.Windows.Forms.ToolStripMenuItem(); this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView(); this.timerHideStatus = new System.Windows.Forms.Timer(this.components); + this.labelCloudVisionLanguage = new System.Windows.Forms.Label(); + this.comboBoxCloudVisionLanguageHint = new System.Windows.Forms.ComboBox(); this.contextMenuStripListview.SuspendLayout(); this.groupBoxOcrMethod.SuspendLayout(); + this.groupBoxCloudVision.SuspendLayout(); this.groupBoxNOCR.SuspendLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownNOcrMaxWrongPixels)).BeginInit(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).BeginInit(); @@ -494,6 +500,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr // groupBoxOcrMethod // this.groupBoxOcrMethod.Controls.Add(this.comboBoxOcrMethod); + this.groupBoxOcrMethod.Controls.Add(this.groupBoxCloudVision); this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR); this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod); this.groupBoxOcrMethod.Controls.Add(this.GroupBoxTesseractMethod); @@ -521,6 +528,35 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.comboBoxOcrMethod.TabIndex = 0; this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged); // + // groupBoxCloudVision + // + this.groupBoxCloudVision.Controls.Add(this.comboBoxCloudVisionLanguageHint); + this.groupBoxCloudVision.Controls.Add(this.labelCloudVisionLanguage); + this.groupBoxCloudVision.Controls.Add(this.textBoxCloudVisionAPIKey); + this.groupBoxCloudVision.Controls.Add(this.labelCloudVisionAPIKey); + this.groupBoxCloudVision.Location = new System.Drawing.Point(7, 38); + this.groupBoxCloudVision.Name = "groupBoxCloudVision"; + this.groupBoxCloudVision.Size = new System.Drawing.Size(372, 143); + this.groupBoxCloudVision.TabIndex = 8; + this.groupBoxCloudVision.TabStop = false; + this.groupBoxCloudVision.Text = "Cloud Vision API"; + // + // textBoxCloudVisionAPIKey + // + this.textBoxCloudVisionAPIKey.Location = new System.Drawing.Point(87, 22); + this.textBoxCloudVisionAPIKey.Name = "textBoxCloudVisionAPIKey"; + this.textBoxCloudVisionAPIKey.Size = new System.Drawing.Size(279, 21); + this.textBoxCloudVisionAPIKey.TabIndex = 1; + // + // labelCloudVisionAPIKey + // + this.labelCloudVisionAPIKey.AutoSize = true; + this.labelCloudVisionAPIKey.Location = new System.Drawing.Point(6, 25); + this.labelCloudVisionAPIKey.Name = "labelCloudVisionAPIKey"; + this.labelCloudVisionAPIKey.Size = new System.Drawing.Size(44, 13); + this.labelCloudVisionAPIKey.TabIndex = 0; + this.labelCloudVisionAPIKey.Text = "API key"; + // // groupBoxNOCR // this.groupBoxNOCR.Controls.Add(this.label3); @@ -1964,6 +2000,85 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.timerHideStatus.Interval = 2000; this.timerHideStatus.Tick += new System.EventHandler(this.timerHideStatus_Tick); // + // labelCloudVisionLanguage + // + this.labelCloudVisionLanguage.AutoSize = true; + this.labelCloudVisionLanguage.Location = new System.Drawing.Point(6, 52); + this.labelCloudVisionLanguage.Name = "labelCloudVisionLanguage"; + this.labelCloudVisionLanguage.Size = new System.Drawing.Size(75, 13); + this.labelCloudVisionLanguage.TabIndex = 2; + this.labelCloudVisionLanguage.Text = "Language hint"; + // + // comboBoxCloudVisionLanguageHint + // + this.comboBoxCloudVisionLanguageHint.FormattingEnabled = true; + this.comboBoxCloudVisionLanguageHint.Items.AddRange(new object[] { + "af", + "ar", + "be", + "bg", + "bn", + "ca", + "cs", + "da", + "de", + "el", + "en", + "es", + "et", + "fa", + "fi", + "fil", + "fr", + "gu", + "hi", + "hr", + "hu", + "hy", + "id", + "is", + "it", + "iw", + "ja", + "km", + "kn", + "ko", + "lo", + "lt", + "lv", + "mk", + "ml", + "mr", + "ms", + "ne", + "nl", + "no", + "pa", + "pl", + "pt", + "ro", + "ru", + "ru-PETR1708", + "sk", + "sl", + "sq", + "sr", + "sr-Latn", + "sv", + "ta", + "te", + "th", + "tl", + "tr", + "uk", + "vi", + "yi", + "zh"}); + this.comboBoxCloudVisionLanguageHint.Location = new System.Drawing.Point(87, 49); + this.comboBoxCloudVisionLanguageHint.Name = "comboBoxCloudVisionLanguageHint"; + this.comboBoxCloudVisionLanguageHint.Size = new System.Drawing.Size(279, 21); + this.comboBoxCloudVisionLanguageHint.TabIndex = 3; + // // VobSubOcr // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); @@ -1990,6 +2105,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.Resize += new System.EventHandler(this.VobSubOcr_Resize); this.contextMenuStripListview.ResumeLayout(false); this.groupBoxOcrMethod.ResumeLayout(false); + this.groupBoxCloudVision.ResumeLayout(false); + this.groupBoxCloudVision.PerformLayout(); this.groupBoxNOCR.ResumeLayout(false); this.groupBoxNOCR.PerformLayout(); ((System.ComponentModel.ISupportInitialize)(this.numericUpDownNOcrMaxWrongPixels)).EndInit(); @@ -2193,5 +2310,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private System.Windows.Forms.ToolStripMenuItem removeAllXToolStripMenuItem; private System.Windows.Forms.ToolStripMenuItem oCRSelectedLinesToolStripMenuItem; private System.Windows.Forms.ToolStripSeparator toolStripSeparatorOcrSelected; + private System.Windows.Forms.GroupBox groupBoxCloudVision; + private System.Windows.Forms.TextBox textBoxCloudVisionAPIKey; + private System.Windows.Forms.Label labelCloudVisionAPIKey; + private System.Windows.Forms.ComboBox comboBoxCloudVisionLanguageHint; + private System.Windows.Forms.Label labelCloudVisionLanguage; } } \ No newline at end of file diff --git a/src/ui/Forms/Ocr/VobSubOcr.cs b/src/ui/Forms/Ocr/VobSubOcr.cs index be45ef156..7ef7d3a5f 100644 --- a/src/ui/Forms/Ocr/VobSubOcr.cs +++ b/src/ui/Forms/Ocr/VobSubOcr.cs @@ -6,6 +6,7 @@ using Nikse.SubtitleEdit.Core.ContainerFormats.TransportStream; using Nikse.SubtitleEdit.Core.Interfaces; using Nikse.SubtitleEdit.Core.SubtitleFormats; using Nikse.SubtitleEdit.Core.VobSub; +using Nikse.SubtitleEdit.Core.VobSub.Ocr.Service; using Nikse.SubtitleEdit.Logic; using Nikse.SubtitleEdit.Logic.Ocr; using Nikse.SubtitleEdit.Logic.Ocr.Binary; @@ -346,6 +347,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private NOcrThreadResult[] _nOcrThreadResults; private bool _ocrThreadStop; + private IOCRService _ocrService; + private readonly Keys _italicShortcut = UiUtil.GetKeys(Configuration.Settings.Shortcuts.MainListViewItalic); private readonly Keys _mainGeneralGoToNextSubtitle = UiUtil.GetKeys(Configuration.Settings.Shortcuts.GeneralGoToNextSubtitle); private readonly Keys _mainGeneralGoToPrevSubtitle = UiUtil.GetKeys(Configuration.Settings.Shortcuts.GeneralGoToPrevSubtitle); @@ -370,6 +373,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private readonly int _ocrMethodTesseract5 = -1; private readonly int _ocrMethodModi = -1; private readonly int _ocrMethodNocr = -1; + private readonly int _ocrMethodCloudVision = -1; private FindReplaceDialogHelper _findHelper; @@ -514,6 +518,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } _ocrMethodNocr = comboBoxOcrMethod.Items.Add(language.OcrViaNOCR); + _ocrMethodCloudVision = comboBoxOcrMethod.Items.Add(language.OcrViaCloudVision); checkBoxTesseractItalicsOn.Checked = Configuration.Settings.VobSubOcr.UseItalicsInTesseract; checkBoxTesseractItalicsOn.Text = LanguageSettings.Current.General.Italic; @@ -583,6 +588,12 @@ namespace Nikse.SubtitleEdit.Forms.Ocr checkBoxNOcrItalic.Checked = Configuration.Settings.VobSubOcr.LineOcrAdvancedItalic; numericUpDownNOcrMaxWrongPixels.Value = Configuration.Settings.VobSubOcr.LineOcrMaxErrorPixels; + labelCloudVisionAPIKey.Text = language.APIKey; + labelCloudVisionLanguage.Text = language.LanguageHint; + + textBoxCloudVisionAPIKey.Text = Configuration.Settings.VobSubOcr.CloudVisionAPIKey; + comboBoxCloudVisionLanguageHint.Text = Configuration.Settings.VobSubOcr.CloudVisionLanguage; + comboBoxTesseractLanguages.Left = labelTesseractLanguage.Left + labelTesseractLanguage.Width; buttonGetTesseractDictionaries.Left = comboBoxTesseractLanguages.Left + comboBoxTesseractLanguages.Width + 5; @@ -989,6 +1000,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { text = OcrViaNOCR(GetSubtitleBitmap(i), i); } + else if (_ocrMethodIndex == _ocrMethodCloudVision) + { + text = OcrViaCloudVision(GetSubtitleBitmap(i), i); + } else { text = OcrViaTesseract(GetSubtitleBitmap(i), i); @@ -5185,6 +5200,13 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { _ocrMinLineHeight = -1; } + } + else if (_ocrMethodIndex == _ocrMethodCloudVision) + { + if (_ocrService == null) + { + _ocrService = new GoogleOCRService(new GoogleCloudVisionAPI(textBoxCloudVisionAPIKey.Text)); + } } progressBar1.Maximum = max; @@ -5564,6 +5586,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { text = CallModi(i); } + else if (_ocrMethodIndex == _ocrMethodCloudVision) + { + text = OcrViaCloudVision(bmp, i); + } _lastLine = text; @@ -6763,6 +6789,19 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } } + private string OcrViaCloudVision(Bitmap bitmap, int listViewIndex) + { + var language = comboBoxCloudVisionLanguageHint.Text; + var cloudVisionResult = _ocrService.PerformOCR(language, new List() { bitmap }); + + if (cloudVisionResult.Count > 0) + { + return cloudVisionResult[0]; + } + + return string.Empty; + } + private void InitializeNOcrForBatch(string db) { _ocrMethodIndex = _ocrMethodNocr; @@ -7522,6 +7561,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr ShowOcrMethodGroupBox(groupBoxModiMethod); Configuration.Settings.VobSubOcr.LastOcrMethod = "MODI"; } + else if (_ocrMethodIndex == _ocrMethodCloudVision) + { + ShowOcrMethodGroupBox(groupBoxCloudVision); + Configuration.Settings.VobSubOcr.LastOcrMethod = "CloudVision"; + } _ocrFixEngine = null; SubtitleListView1SelectedIndexChanged(null, null); @@ -7543,6 +7587,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr groupBoxImageCompareMethod.Visible = false; groupBoxModiMethod.Visible = false; groupBoxNOCR.Visible = false; + groupBoxCloudVision.Visible = false; groupBox.Visible = true; groupBox.BringToFront(); @@ -8017,6 +8062,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { comboBoxOcrMethod.SelectedIndex = _ocrMethodBinaryImageCompare; } + else if (Configuration.Settings.VobSubOcr.LastOcrMethod == "CloudVision" && comboBoxOcrMethod.Items.Count > _ocrMethodCloudVision) + { + comboBoxOcrMethod.SelectedIndex = _ocrMethodCloudVision; + } else if (Configuration.Settings.VobSubOcr.LastOcrMethod == "MODI" && comboBoxOcrMethod.Items.Count > _ocrMethodModi) { comboBoxOcrMethod.SelectedIndex = _ocrMethodModi; @@ -8490,6 +8539,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr Configuration.Settings.VobSubOcr.LineOcrMaxErrorPixels = (int)numericUpDownNOcrMaxWrongPixels.Value; Configuration.Settings.VobSubOcr.UseTesseractFallback = checkBoxTesseractFallback.Checked; Configuration.Settings.VobSubOcr.CaptureTopAlign = toolStripMenuItemCaptureTopAlign.Checked; + Configuration.Settings.VobSubOcr.CloudVisionAPIKey = textBoxCloudVisionAPIKey.Text; + Configuration.Settings.VobSubOcr.CloudVisionLanguage = comboBoxCloudVisionLanguageHint.Text; if (_ocrMethodIndex == _ocrMethodBinaryImageCompare) { diff --git a/src/ui/Logic/Language.cs b/src/ui/Logic/Language.cs index 70a64104a..9aebdc846 100644 --- a/src/ui/Logic/Language.cs +++ b/src/ui/Logic/Language.cs @@ -3311,6 +3311,7 @@ Keep changes?", OcrViaImageCompare = "Binary image compare", OcrViaModi = "Microsoft Office Document Imaging (MODI). Requires Microsoft Office", OcrViaNOCR = "OCR via nOCR", + OcrViaCloudVision = "OCR via Cloud Vision API", TesseractEngineMode = "Engine mode", TesseractEngineModeLegacy = "Original Tesseract only (can detect italic)", TesseractEngineModeNeural = "Neural nets LSTM only", @@ -3380,6 +3381,8 @@ Keep changes?", StartTraining = "Start training", NowTraining = "Now training font '{1}'. Total chars trained: {0:#,###,##0}, {2:#,###,##0} known", ImagesWithTimeCodesInFileName = "Images with time codes in file name...", + APIKey = "API key", + LanguageHint = "Language hint", }; VobSubOcrCharacter = new LanguageStructure.VobSubOcrCharacter diff --git a/src/ui/Logic/LanguageStructure.cs b/src/ui/Logic/LanguageStructure.cs index 75bfdca58..5efa83d7d 100644 --- a/src/ui/Logic/LanguageStructure.cs +++ b/src/ui/Logic/LanguageStructure.cs @@ -3158,6 +3158,7 @@ public string OcrViaImageCompare { get; set; } public string OcrViaModi { get; set; } public string OcrViaNOCR { get; set; } + public string OcrViaCloudVision { get; set; } public string TesseractEngineMode { get; set; } public string TesseractEngineModeLegacy { get; set; } public string TesseractEngineModeNeural { get; set; } @@ -3227,6 +3228,8 @@ public string StartTraining { get; set; } public string NowTraining { get; set; } public string ImagesWithTimeCodesInFileName { get; set; } + public string APIKey { get; set; } + public string LanguageHint { get; set; } } public class VobSubOcrCharacter From 2b23813c4cc3eb94c10ebc6d84aa54ba1323fec9 Mon Sep 17 00:00:00 2001 From: "Martijn van Berkel (Flitskikker)" Date: Wed, 31 Aug 2022 20:25:44 +0200 Subject: [PATCH 2/5] Don't overwrite existing text when aborting --- src/ui/Forms/Ocr/VobSubOcr.cs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ui/Forms/Ocr/VobSubOcr.cs b/src/ui/Forms/Ocr/VobSubOcr.cs index 7ef7d3a5f..a457d3fb2 100644 --- a/src/ui/Forms/Ocr/VobSubOcr.cs +++ b/src/ui/Forms/Ocr/VobSubOcr.cs @@ -5629,7 +5629,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr if (_abort) { - textBoxCurrentText.Text = text; + if (textBoxCurrentText.Text == "") + { + textBoxCurrentText.Text = text; + } _mainOcrRunning = false; SetButtonsEnabledAfterOcrDone(); return true; From 5c7c2d84b1e5eabed514c2911ef1ece183393310 Mon Sep 17 00:00:00 2001 From: "Martijn van Berkel (Flitskikker)" Date: Wed, 31 Aug 2022 20:44:43 +0200 Subject: [PATCH 3/5] Use typed character (if available) instead of * when skipping --- src/ui/Forms/Ocr/VobSubOcr.cs | 44 +++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/src/ui/Forms/Ocr/VobSubOcr.cs b/src/ui/Forms/Ocr/VobSubOcr.cs index a457d3fb2..e73660176 100644 --- a/src/ui/Forms/Ocr/VobSubOcr.cs +++ b/src/ui/Forms/Ocr/VobSubOcr.cs @@ -3964,7 +3964,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } else { - matches.Add(new CompareMatch("*", false, 0, null)); + string text = _vobSubOcrCharacter.ManualRecognizedCharacters; + + if (text != "") + { + matches.Add(new CompareMatch(text, _vobSubOcrCharacter.IsItalic, 0, null)); + } + else + { + matches.Add(new CompareMatch("*", false, 0, null)); + } } _italicCheckedLast = _vobSubOcrCharacter.IsItalic; @@ -4020,7 +4029,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } else { - matches.Add(new CompareMatch("*", false, 0, null, item)); + string text = _vobSubOcrCharacter.ManualRecognizedCharacters; + + if (text != "") + { + matches.Add(new CompareMatch(text, _vobSubOcrCharacter.IsItalic, 0, null)); + } + else + { + matches.Add(new CompareMatch("*", false, 0, null)); + } } _italicCheckedLast = _vobSubOcrCharacter.IsItalic; @@ -4384,7 +4402,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } else { - matches.Add(new CompareMatch("*", false, 0, null)); + var text = _vobSubOcrNOcrCharacter.NOcrChar.Text; + + if (text != "") + { + matches.Add(new CompareMatch(text, _vobSubOcrNOcrCharacter.IsItalic, 0, null)); + } + else + { + matches.Add(new CompareMatch("*", false, 0, null)); + } } _italicCheckedLast = _vobSubOcrNOcrCharacter.IsItalic; @@ -4419,7 +4446,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } else { - matches.Add(new CompareMatch("*", false, 0, null)); + var text = _vobSubOcrNOcrCharacter.NOcrChar.Text; + + if (text != "") + { + matches.Add(new CompareMatch(text, _vobSubOcrNOcrCharacter.IsItalic, 0, null)); + } + else + { + matches.Add(new CompareMatch("*", false, 0, null)); + } } _italicCheckedLast = _vobSubOcrNOcrCharacter.IsItalic; From 90f49a5b92125ba6f5fc17aaefa969cf6e17c9e5 Mon Sep 17 00:00:00 2001 From: "Martijn van Berkel (Flitskikker)" Date: Wed, 31 Aug 2022 20:57:27 +0200 Subject: [PATCH 4/5] Fix VSF image import with multiple extensions (e.g. .jpeg.png) --- src/ui/Forms/ImportImages.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ui/Forms/ImportImages.cs b/src/ui/Forms/ImportImages.cs index ac380ea30..1dcc288a8 100644 --- a/src/ui/Forms/ImportImages.cs +++ b/src/ui/Forms/ImportImages.cs @@ -13,6 +13,7 @@ namespace Nikse.SubtitleEdit.Forms { // 0_00_01_042__0_00_03_919_01.jpeg private static readonly Regex TimeCodeFormat1 = new Regex(@"^\d+_\d+_\d+_\d+__\d+_\d+_\d+_\d+_\d+$", RegexOptions.Compiled); + private static readonly Regex TimeCodeFormat1WithExtension = new Regex(@"^\d+_\d+_\d+_\d+__\d+_\d+_\d+_\d+_\d+\..+$", RegexOptions.Compiled); private static readonly Regex TimeCodeFormat2 = new Regex(@"^\d+_\d+_\d+_\d+__\d+_\d+_\d+_\d+$", RegexOptions.Compiled); public Subtitle Subtitle { get; private set; } @@ -99,7 +100,7 @@ namespace Nikse.SubtitleEdit.Forms p.EndTime.TotalMilliseconds = endTime; } } - else if (TimeCodeFormat1.IsMatch(name) || TimeCodeFormat2.IsMatch(name)) + else if (TimeCodeFormat1.IsMatch(name) || TimeCodeFormat1WithExtension.IsMatch(name) || TimeCodeFormat2.IsMatch(name)) { var arr = name.Replace("__", "_").Split('_'); if (arr.Length >= 8) From 58f8d00fea0c7ee7af7b0f6bdcab6c37100ac68b Mon Sep 17 00:00:00 2001 From: "Martijn van Berkel (Flitskikker)" Date: Wed, 31 Aug 2022 22:57:36 +0200 Subject: [PATCH 5/5] Add support for sending original VSF images --- src/libse/Common/Settings.cs | 9 ++ src/ui/Forms/Ocr/VobSubOcr.Designer.cs | 181 +++++++++++++------------ src/ui/Forms/Ocr/VobSubOcr.cs | 38 +++++- src/ui/Logic/Language.cs | 1 + src/ui/Logic/LanguageStructure.cs | 1 + 5 files changed, 144 insertions(+), 86 deletions(-) diff --git a/src/libse/Common/Settings.cs b/src/libse/Common/Settings.cs index 1ace8877b..6f055861e 100644 --- a/src/libse/Common/Settings.cs +++ b/src/libse/Common/Settings.cs @@ -2065,6 +2065,7 @@ $HorzAlign = Center public int UnfocusedAttentionPlaySoundCount { get; set; } public string CloudVisionAPIKey { get; set; } public string CloudVisionLanguage { get; set; } + public bool CloudVisionSendOriginalImages { get; set; } public VobSubOcrSettings() { @@ -2095,6 +2096,7 @@ $HorzAlign = Center UnfocusedAttentionPlaySoundCount = 1; CloudVisionAPIKey = string.Empty; CloudVisionLanguage = "en"; + CloudVisionSendOriginalImages = false; } } @@ -7395,6 +7397,12 @@ $HorzAlign = Center settings.VobSubOcr.CloudVisionLanguage = subNode.InnerText; } + subNode = node.SelectSingleNode("CloudVisionSendOriginalImages"); + if (subNode != null) + { + settings.VobSubOcr.CloudVisionSendOriginalImages = Convert.ToBoolean(subNode.InnerText, CultureInfo.InvariantCulture); + } + foreach (XmlNode groupNode in doc.DocumentElement.SelectNodes("MultipleSearchAndReplaceGroups/Group")) { var group = new MultipleSearchAndReplaceGroup @@ -10476,6 +10484,7 @@ $HorzAlign = Center textWriter.WriteElementString("UnfocusedAttentionPlaySoundCount", settings.VobSubOcr.UnfocusedAttentionPlaySoundCount.ToString(CultureInfo.InvariantCulture)); textWriter.WriteElementString("CloudVisionAPIKey", settings.VobSubOcr.CloudVisionAPIKey); textWriter.WriteElementString("CloudVisionLanguage", settings.VobSubOcr.CloudVisionLanguage); + textWriter.WriteElementString("CloudVisionSendOriginalImages", settings.VobSubOcr.CloudVisionSendOriginalImages.ToString(CultureInfo.InvariantCulture)); textWriter.WriteEndElement(); diff --git a/src/ui/Forms/Ocr/VobSubOcr.Designer.cs b/src/ui/Forms/Ocr/VobSubOcr.Designer.cs index d9605c9a6..6d017e70a 100644 --- a/src/ui/Forms/Ocr/VobSubOcr.Designer.cs +++ b/src/ui/Forms/Ocr/VobSubOcr.Designer.cs @@ -56,6 +56,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.groupBoxOcrMethod = new System.Windows.Forms.GroupBox(); this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox(); this.groupBoxCloudVision = new System.Windows.Forms.GroupBox(); + this.checkBoxCloudVisionSendOriginalImages = new System.Windows.Forms.CheckBox(); + this.comboBoxCloudVisionLanguageHint = new System.Windows.Forms.ComboBox(); + this.labelCloudVisionLanguageHint = new System.Windows.Forms.Label(); this.textBoxCloudVisionAPIKey = new System.Windows.Forms.TextBox(); this.labelCloudVisionAPIKey = new System.Windows.Forms.Label(); this.groupBoxNOCR = new System.Windows.Forms.GroupBox(); @@ -174,8 +177,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.underlineToolStripMenuItem1 = new System.Windows.Forms.ToolStripMenuItem(); this.subtitleListView1 = new Nikse.SubtitleEdit.Controls.SubtitleListView(); this.timerHideStatus = new System.Windows.Forms.Timer(this.components); - this.labelCloudVisionLanguage = new System.Windows.Forms.Label(); - this.comboBoxCloudVisionLanguageHint = new System.Windows.Forms.ComboBox(); this.contextMenuStripListview.SuspendLayout(); this.groupBoxOcrMethod.SuspendLayout(); this.groupBoxCloudVision.SuspendLayout(); @@ -530,8 +531,9 @@ namespace Nikse.SubtitleEdit.Forms.Ocr // // groupBoxCloudVision // + this.groupBoxCloudVision.Controls.Add(this.checkBoxCloudVisionSendOriginalImages); this.groupBoxCloudVision.Controls.Add(this.comboBoxCloudVisionLanguageHint); - this.groupBoxCloudVision.Controls.Add(this.labelCloudVisionLanguage); + this.groupBoxCloudVision.Controls.Add(this.labelCloudVisionLanguageHint); this.groupBoxCloudVision.Controls.Add(this.textBoxCloudVisionAPIKey); this.groupBoxCloudVision.Controls.Add(this.labelCloudVisionAPIKey); this.groupBoxCloudVision.Location = new System.Drawing.Point(7, 38); @@ -541,6 +543,97 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.groupBoxCloudVision.TabStop = false; this.groupBoxCloudVision.Text = "Cloud Vision API"; // + // checkBoxCloudVisionSendOriginalImages + // + this.checkBoxCloudVisionSendOriginalImages.AutoSize = true; + this.checkBoxCloudVisionSendOriginalImages.Location = new System.Drawing.Point(9, 80); + this.checkBoxCloudVisionSendOriginalImages.Name = "checkBoxCloudVisionSendOriginalImages"; + this.checkBoxCloudVisionSendOriginalImages.Size = new System.Drawing.Size(123, 17); + this.checkBoxCloudVisionSendOriginalImages.TabIndex = 4; + this.checkBoxCloudVisionSendOriginalImages.Text = "Send original images"; + this.checkBoxCloudVisionSendOriginalImages.UseVisualStyleBackColor = true; + this.checkBoxCloudVisionSendOriginalImages.Visible = false; + this.checkBoxCloudVisionSendOriginalImages.CheckedChanged += new System.EventHandler(this.checkBoxCloudVisionSendOriginalImages_CheckedChanged); + // + // comboBoxCloudVisionLanguageHint + // + this.comboBoxCloudVisionLanguageHint.FormattingEnabled = true; + this.comboBoxCloudVisionLanguageHint.Items.AddRange(new object[] { + "af", + "ar", + "be", + "bg", + "bn", + "ca", + "cs", + "da", + "de", + "el", + "en", + "es", + "et", + "fa", + "fi", + "fil", + "fr", + "gu", + "hi", + "hr", + "hu", + "hy", + "id", + "is", + "it", + "iw", + "ja", + "km", + "kn", + "ko", + "lo", + "lt", + "lv", + "mk", + "ml", + "mr", + "ms", + "ne", + "nl", + "no", + "pa", + "pl", + "pt", + "ro", + "ru", + "ru-PETR1708", + "sk", + "sl", + "sq", + "sr", + "sr-Latn", + "sv", + "ta", + "te", + "th", + "tl", + "tr", + "uk", + "vi", + "yi", + "zh"}); + this.comboBoxCloudVisionLanguageHint.Location = new System.Drawing.Point(87, 49); + this.comboBoxCloudVisionLanguageHint.Name = "comboBoxCloudVisionLanguageHint"; + this.comboBoxCloudVisionLanguageHint.Size = new System.Drawing.Size(279, 21); + this.comboBoxCloudVisionLanguageHint.TabIndex = 3; + // + // labelCloudVisionLanguageHint + // + this.labelCloudVisionLanguageHint.AutoSize = true; + this.labelCloudVisionLanguageHint.Location = new System.Drawing.Point(6, 52); + this.labelCloudVisionLanguageHint.Name = "labelCloudVisionLanguageHint"; + this.labelCloudVisionLanguageHint.Size = new System.Drawing.Size(75, 13); + this.labelCloudVisionLanguageHint.TabIndex = 2; + this.labelCloudVisionLanguageHint.Text = "Language hint"; + // // textBoxCloudVisionAPIKey // this.textBoxCloudVisionAPIKey.Location = new System.Drawing.Point(87, 22); @@ -2000,85 +2093,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr this.timerHideStatus.Interval = 2000; this.timerHideStatus.Tick += new System.EventHandler(this.timerHideStatus_Tick); // - // labelCloudVisionLanguage - // - this.labelCloudVisionLanguage.AutoSize = true; - this.labelCloudVisionLanguage.Location = new System.Drawing.Point(6, 52); - this.labelCloudVisionLanguage.Name = "labelCloudVisionLanguage"; - this.labelCloudVisionLanguage.Size = new System.Drawing.Size(75, 13); - this.labelCloudVisionLanguage.TabIndex = 2; - this.labelCloudVisionLanguage.Text = "Language hint"; - // - // comboBoxCloudVisionLanguageHint - // - this.comboBoxCloudVisionLanguageHint.FormattingEnabled = true; - this.comboBoxCloudVisionLanguageHint.Items.AddRange(new object[] { - "af", - "ar", - "be", - "bg", - "bn", - "ca", - "cs", - "da", - "de", - "el", - "en", - "es", - "et", - "fa", - "fi", - "fil", - "fr", - "gu", - "hi", - "hr", - "hu", - "hy", - "id", - "is", - "it", - "iw", - "ja", - "km", - "kn", - "ko", - "lo", - "lt", - "lv", - "mk", - "ml", - "mr", - "ms", - "ne", - "nl", - "no", - "pa", - "pl", - "pt", - "ro", - "ru", - "ru-PETR1708", - "sk", - "sl", - "sq", - "sr", - "sr-Latn", - "sv", - "ta", - "te", - "th", - "tl", - "tr", - "uk", - "vi", - "yi", - "zh"}); - this.comboBoxCloudVisionLanguageHint.Location = new System.Drawing.Point(87, 49); - this.comboBoxCloudVisionLanguageHint.Name = "comboBoxCloudVisionLanguageHint"; - this.comboBoxCloudVisionLanguageHint.Size = new System.Drawing.Size(279, 21); - this.comboBoxCloudVisionLanguageHint.TabIndex = 3; - // // VobSubOcr // this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); @@ -2314,6 +2328,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr private System.Windows.Forms.TextBox textBoxCloudVisionAPIKey; private System.Windows.Forms.Label labelCloudVisionAPIKey; private System.Windows.Forms.ComboBox comboBoxCloudVisionLanguageHint; - private System.Windows.Forms.Label labelCloudVisionLanguage; + private System.Windows.Forms.Label labelCloudVisionLanguageHint; + private System.Windows.Forms.CheckBox checkBoxCloudVisionSendOriginalImages; } } \ No newline at end of file diff --git a/src/ui/Forms/Ocr/VobSubOcr.cs b/src/ui/Forms/Ocr/VobSubOcr.cs index e73660176..b22c3b770 100644 --- a/src/ui/Forms/Ocr/VobSubOcr.cs +++ b/src/ui/Forms/Ocr/VobSubOcr.cs @@ -589,10 +589,12 @@ namespace Nikse.SubtitleEdit.Forms.Ocr numericUpDownNOcrMaxWrongPixels.Value = Configuration.Settings.VobSubOcr.LineOcrMaxErrorPixels; labelCloudVisionAPIKey.Text = language.APIKey; - labelCloudVisionLanguage.Text = language.LanguageHint; + labelCloudVisionLanguageHint.Text = language.LanguageHint; + checkBoxCloudVisionSendOriginalImages.Text = language.SendOriginalImages; textBoxCloudVisionAPIKey.Text = Configuration.Settings.VobSubOcr.CloudVisionAPIKey; comboBoxCloudVisionLanguageHint.Text = Configuration.Settings.VobSubOcr.CloudVisionLanguage; + checkBoxCloudVisionSendOriginalImages.Checked = Configuration.Settings.VobSubOcr.CloudVisionSendOriginalImages; comboBoxTesseractLanguages.Left = labelTesseractLanguage.Left + labelTesseractLanguage.Width; buttonGetTesseractDictionaries.Left = comboBoxTesseractLanguages.Left + comboBoxTesseractLanguages.Width + 5; @@ -1670,10 +1672,19 @@ namespace Nikse.SubtitleEdit.Forms.Ocr } else { - foreach (string fn in fileNames) { fullFileName = Path.Combine(Path.GetDirectoryName(_bdnFileName), fn); + + if (checkBoxCloudVisionSendOriginalImages.Visible && checkBoxCloudVisionSendOriginalImages.Checked) + { + var originalFileName = GetVSFOriginalImageFileName(fullFileName); + if (originalFileName != fullFileName && File.Exists(originalFileName)) + { + fullFileName = originalFileName; + } + } + if (!File.Exists(fullFileName)) { // fix AVISubDetector lines @@ -8073,6 +8084,17 @@ namespace Nikse.SubtitleEdit.Forms.Ocr pictureBoxEmphasis2.BackColor = Color.White; } + checkBoxCloudVisionSendOriginalImages.Visible = false; + if (bdnSubtitle.Paragraphs.Count > 0) + { + var firstImageFileName = bdnSubtitle.Paragraphs[0].Text; + var originalImageFileName = GetVSFOriginalImageFileName(firstImageFileName); + if (firstImageFileName != originalImageFileName && File.Exists(originalImageFileName)) + { + checkBoxCloudVisionSendOriginalImages.Visible = true; + } + } + SetButtonsStartOcr(); progressBar1.Visible = false; progressBar1.Maximum = 100; @@ -8092,7 +8114,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr autoTransparentBackgroundToolStripMenuItem.Checked = true; autoTransparentBackgroundToolStripMenuItem.Visible = true; - } private void SetOcrMethod() @@ -10142,5 +10163,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr _mainOcrSelectedIndices = subtitleListView1.GetSelectedIndices().ToList(); ButtonStartOcrClick(null, null); } + + private string GetVSFOriginalImageFileName(string fileName) + { + return fileName.Replace("\\RGBResults", "\\RGBImages").Replace("\\TXTImages", "\\RGBImages").Replace(".jpeg.png", ".jpeg").Replace(".png", ".jpeg"); + } + + private void checkBoxCloudVisionSendOriginalImages_CheckedChanged(object sender, EventArgs e) + { + // Toggle subtitle image refresh + SubtitleListView1SelectedIndexChanged(sender, e); + } } } diff --git a/src/ui/Logic/Language.cs b/src/ui/Logic/Language.cs index 9aebdc846..bf6966bc8 100644 --- a/src/ui/Logic/Language.cs +++ b/src/ui/Logic/Language.cs @@ -3383,6 +3383,7 @@ Keep changes?", ImagesWithTimeCodesInFileName = "Images with time codes in file name...", APIKey = "API key", LanguageHint = "Language hint", + SendOriginalImages = "Send original images", }; VobSubOcrCharacter = new LanguageStructure.VobSubOcrCharacter diff --git a/src/ui/Logic/LanguageStructure.cs b/src/ui/Logic/LanguageStructure.cs index 5efa83d7d..d258ae98c 100644 --- a/src/ui/Logic/LanguageStructure.cs +++ b/src/ui/Logic/LanguageStructure.cs @@ -3230,6 +3230,7 @@ public string ImagesWithTimeCodesInFileName { get; set; } public string APIKey { get; set; } public string LanguageHint { get; set; } + public string SendOriginalImages { get; set; } } public class VobSubOcrCharacter