Merge pull request #6195 from Flitskikker/feature/cloudvision-ocr

Add support for OCR via Google's Cloud Vision API
This commit is contained in:
Nikolaj Olsson 2022-09-01 01:34:10 -04:00 committed by GitHub
commit f57520a7f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 642 additions and 8 deletions

View File

@ -2063,6 +2063,9 @@ $HorzAlign = Center
public bool CaptureTopAlign { get; set; }
public int UnfocusedAttentionBlinkCount { get; set; }
public int UnfocusedAttentionPlaySoundCount { get; set; }
public string CloudVisionAPIKey { get; set; }
public string CloudVisionLanguage { get; set; }
public bool CloudVisionSendOriginalImages { get; set; }
public VobSubOcrSettings()
{
@ -2091,6 +2094,9 @@ $HorzAlign = Center
CaptureTopAlign = false;
UnfocusedAttentionBlinkCount = 50;
UnfocusedAttentionPlaySoundCount = 1;
CloudVisionAPIKey = string.Empty;
CloudVisionLanguage = "en";
CloudVisionSendOriginalImages = false;
}
}
@ -7379,6 +7385,24 @@ $HorzAlign = Center
settings.VobSubOcr.UnfocusedAttentionPlaySoundCount = Convert.ToInt32(subNode.InnerText, CultureInfo.InvariantCulture);
}
subNode = node.SelectSingleNode("CloudVisionAPIKey");
if (subNode != null)
{
settings.VobSubOcr.CloudVisionAPIKey = subNode.InnerText;
}
subNode = node.SelectSingleNode("CloudVisionLanguage");
if (subNode != null)
{
settings.VobSubOcr.CloudVisionLanguage = subNode.InnerText;
}
subNode = node.SelectSingleNode("CloudVisionSendOriginalImages");
if (subNode != null)
{
settings.VobSubOcr.CloudVisionSendOriginalImages = Convert.ToBoolean(subNode.InnerText, CultureInfo.InvariantCulture);
}
foreach (XmlNode groupNode in doc.DocumentElement.SelectNodes("MultipleSearchAndReplaceGroups/Group"))
{
var group = new MultipleSearchAndReplaceGroup
@ -10458,6 +10482,9 @@ $HorzAlign = Center
textWriter.WriteElementString("CaptureTopAlign", settings.VobSubOcr.CaptureTopAlign.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("UnfocusedAttentionBlinkCount", settings.VobSubOcr.UnfocusedAttentionBlinkCount.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("UnfocusedAttentionPlaySoundCount", settings.VobSubOcr.UnfocusedAttentionPlaySoundCount.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("CloudVisionAPIKey", settings.VobSubOcr.CloudVisionAPIKey);
textWriter.WriteElementString("CloudVisionLanguage", settings.VobSubOcr.CloudVisionLanguage);
textWriter.WriteElementString("CloudVisionSendOriginalImages", settings.VobSubOcr.CloudVisionSendOriginalImages.ToString(CultureInfo.InvariantCulture));
textWriter.WriteEndElement();

View File

@ -0,0 +1,20 @@
using Nikse.SubtitleEdit.Core.Common;
using System;
namespace Nikse.SubtitleEdit.Core.VobSub.Ocr
{
public static class OCRHelper
{
public static string PostOCR(string input, string language)
{
var s = input;
return FixInvalidCarriageReturnLineFeedCharacters(s);
}
private static string FixInvalidCarriageReturnLineFeedCharacters(string input)
{
// Fix new line chars
return string.Join(Environment.NewLine, input.SplitToLines()).Trim();
}
}
}

View File

@ -0,0 +1,227 @@
using Nikse.SubtitleEdit.Core.Common;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Runtime.Serialization;
using System.Runtime.Serialization.Json;
using System.Text;
namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service
{
/// <summary>
/// OCR via Google Cloud Vision API - see https://cloud.google.com/vision/docs/ocr
/// </summary>
public class GoogleCloudVisionAPI : IOCRStrategy
{
private readonly string _apiKey;
private readonly HttpClient _httpClient;
public string GetName()
{
return "Google Cloud Vision API";
}
public int GetMaxImageSize()
{
return 20000000;
}
public int GetMaximumRequestArraySize()
{
return 16;
}
public GoogleCloudVisionAPI(string apiKey)
{
_apiKey = apiKey;
_httpClient = HttpClientHelper.MakeHttpClient();
_httpClient.BaseAddress = new Uri("https://vision.googleapis.com/v1/images:annotate");
_httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
}
public string GetUrl()
{
return "https://cloud.google.com/vision/docs/ocr";
}
public List<string> PerformOCR(string language, List<Bitmap> images)
{
// Create a request body object
var requestBody = new RequestBody();
foreach (var image in images)
{
var imageBase64 = string.Empty;
using (MemoryStream memoryStream = new MemoryStream())
{
image.Save(memoryStream, System.Drawing.Imaging.ImageFormat.Png);
imageBase64 = Convert.ToBase64String(memoryStream.ToArray());
}
var request = new RequestBody.Request(imageBase64, language);
requestBody.requests.Add(request);
}
// Convert to JSON string
var requestBodyString = string.Empty;
using (MemoryStream memoryStream = new MemoryStream())
{
new DataContractJsonSerializer(typeof(RequestBody)).WriteObject(memoryStream, requestBody);
requestBodyString = Encoding.Default.GetString(memoryStream.ToArray());
}
// Do request
var uri = $"?key={_apiKey}";
string content;
try
{
var result = _httpClient.PostAsync(uri, new StringContent(requestBodyString)).Result;
if ((int)result.StatusCode == 400)
{
throw new OCRException("API key invalid (or perhaps billing is not enabled)?");
}
if ((int)result.StatusCode == 403)
{
throw new OCRException("\"Perhaps billing is not enabled (or API key is invalid)?\"");
}
if (!result.IsSuccessStatusCode)
{
throw new OCRException($"An error occurred calling Cloud Vision API - status code: {result.StatusCode}");
}
content = result.Content.ReadAsStringAsync().Result;
}
catch (WebException webException)
{
var message = string.Empty;
if (webException.Message.Contains("(400) Bad Request"))
{
message = "API key invalid (or perhaps billing is not enabled)?";
}
else if (webException.Message.Contains("(403) Forbidden."))
{
message = "Perhaps billing is not enabled (or API key is invalid)?";
}
throw new OCRException(message, webException);
}
var resultList = new List<string>();
var parser = new JsonParser();
var jsonObject = (Dictionary<string, object>)parser.Parse(content);
if (jsonObject.ContainsKey("responses"))
{
if (jsonObject["responses"] is List<object> responses)
{
foreach (var responseObject in responses)
{
var result = string.Empty;
if (responseObject is Dictionary<string, object> response)
{
if (response.ContainsKey("textAnnotations"))
{
if (response["textAnnotations"] is List<object> textAnnotations)
{
if (textAnnotations.Count > 0)
{
if (textAnnotations[0] is Dictionary<string, object> firstTextAnnotation)
{
if (firstTextAnnotation.ContainsKey("description"))
{
if (firstTextAnnotation["description"] is string description)
{
result = OCRHelper.PostOCR(description, language);
}
}
}
}
}
}
}
resultList.Add(result);
}
}
}
return resultList;
}
[DataContract, Serializable]
public class RequestBody
{
[DataMember]
public List<Request> requests { get; set; }
public RequestBody()
{
this.requests = new List<Request>();
}
[DataContract, Serializable]
public class Request
{
[DataMember]
public Image image { get; set; }
[DataMember]
public ImageContext imageContext { get; set; }
[DataMember]
public List<Feature> features { get; set; }
public Request(string imageContent, string language)
{
this.image = new Image(imageContent);
this.imageContext = new ImageContext(new List<string>() { language, "en" });
this.features = new List<Feature>() { new Feature("TEXT_DETECTION", 1) };
}
[DataContract, Serializable]
public class Image
{
[DataMember]
public string content { get; set; }
public Image(string content)
{
this.content = content;
}
}
[DataContract, Serializable]
public class ImageContext
{
[DataMember]
public List<string> languageHints { get; set; }
public ImageContext(List<string> languageHints)
{
this.languageHints = languageHints;
}
}
[DataContract, Serializable]
public class Feature
{
[DataMember]
public string type { get; set; }
[DataMember]
public int maxResults { get; set; }
public Feature(string type, int maxResults)
{
this.type = type;
this.maxResults = maxResults;
}
}
}
}
}
}

View File

@ -0,0 +1,46 @@
using System.Collections.Generic;
using System.Drawing;
namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service
{
public class GoogleOCRService : IOCRService
{
private readonly IOCRStrategy _ocrStrategy;
public GoogleOCRService(IOCRStrategy translationStrategy)
{
_ocrStrategy = translationStrategy;
}
public string GetName()
{
return _ocrStrategy.GetName();
}
public override string ToString()
{
return GetName();
}
public int GetMaxImageSize()
{
return _ocrStrategy.GetMaxImageSize();
}
public int GetMaximumRequestArraySize()
{
return _ocrStrategy.GetMaximumRequestArraySize();
}
public string GetUrl()
{
return "";
}
public List<string> PerformOCR(string language, List<Bitmap> images)
{
return _ocrStrategy.PerformOCR(language, images);
}
}
}

View File

@ -0,0 +1,9 @@
using System.Collections.Generic;
namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service
{
public interface IOCRService : IOCRStrategy
{
}
}

View File

@ -0,0 +1,15 @@
using Nikse.SubtitleEdit.Core.Common;
using System.Collections.Generic;
using System.Drawing;
namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service
{
public interface IOCRStrategy
{
string GetName();
string GetUrl();
List<string> PerformOCR(string language, List<Bitmap> images);
int GetMaxImageSize();
int GetMaximumRequestArraySize();
}
}

View File

@ -0,0 +1,22 @@
using System;
using System.Net;
namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service
{
public class OCRException : Exception
{
public OCRException(WebException webException) : base("",webException)
{
}
public OCRException(string message, Exception exception) : base(message, exception)
{
}
public OCRException(string message) : base(message)
{
}
}
}

View File

@ -13,6 +13,7 @@ namespace Nikse.SubtitleEdit.Forms
{
// 0_00_01_042__0_00_03_919_01.jpeg
private static readonly Regex TimeCodeFormat1 = new Regex(@"^\d+_\d+_\d+_\d+__\d+_\d+_\d+_\d+_\d+$", RegexOptions.Compiled);
private static readonly Regex TimeCodeFormat1WithExtension = new Regex(@"^\d+_\d+_\d+_\d+__\d+_\d+_\d+_\d+_\d+\..+$", RegexOptions.Compiled);
private static readonly Regex TimeCodeFormat2 = new Regex(@"^\d+_\d+_\d+_\d+__\d+_\d+_\d+_\d+$", RegexOptions.Compiled);
public Subtitle Subtitle { get; private set; }
@ -99,7 +100,7 @@ namespace Nikse.SubtitleEdit.Forms
p.EndTime.TotalMilliseconds = endTime;
}
}
else if (TimeCodeFormat1.IsMatch(name) || TimeCodeFormat2.IsMatch(name))
else if (TimeCodeFormat1.IsMatch(name) || TimeCodeFormat1WithExtension.IsMatch(name) || TimeCodeFormat2.IsMatch(name))
{
var arr = name.Replace("__", "_").Split('_');
if (arr.Length >= 8)

View File

@ -55,6 +55,12 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.buttonCancel = new System.Windows.Forms.Button();
this.groupBoxOcrMethod = new System.Windows.Forms.GroupBox();
this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox();
this.groupBoxCloudVision = new System.Windows.Forms.GroupBox();
this.checkBoxCloudVisionSendOriginalImages = new System.Windows.Forms.CheckBox();
this.comboBoxCloudVisionLanguageHint = new System.Windows.Forms.ComboBox();
this.labelCloudVisionLanguageHint = new System.Windows.Forms.Label();
this.textBoxCloudVisionAPIKey = new System.Windows.Forms.TextBox();
this.labelCloudVisionAPIKey = new System.Windows.Forms.Label();
this.groupBoxNOCR = new System.Windows.Forms.GroupBox();
this.label3 = new System.Windows.Forms.Label();
this.comboBoxNOcrLineSplitMinHeight = new System.Windows.Forms.ComboBox();
@ -173,6 +179,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.timerHideStatus = new System.Windows.Forms.Timer(this.components);
this.contextMenuStripListview.SuspendLayout();
this.groupBoxOcrMethod.SuspendLayout();
this.groupBoxCloudVision.SuspendLayout();
this.groupBoxNOCR.SuspendLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNOcrMaxWrongPixels)).BeginInit();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).BeginInit();
@ -494,6 +501,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
// groupBoxOcrMethod
//
this.groupBoxOcrMethod.Controls.Add(this.comboBoxOcrMethod);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxCloudVision);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR);
this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod);
this.groupBoxOcrMethod.Controls.Add(this.GroupBoxTesseractMethod);
@ -521,6 +529,127 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.comboBoxOcrMethod.TabIndex = 0;
this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged);
//
// groupBoxCloudVision
//
this.groupBoxCloudVision.Controls.Add(this.checkBoxCloudVisionSendOriginalImages);
this.groupBoxCloudVision.Controls.Add(this.comboBoxCloudVisionLanguageHint);
this.groupBoxCloudVision.Controls.Add(this.labelCloudVisionLanguageHint);
this.groupBoxCloudVision.Controls.Add(this.textBoxCloudVisionAPIKey);
this.groupBoxCloudVision.Controls.Add(this.labelCloudVisionAPIKey);
this.groupBoxCloudVision.Location = new System.Drawing.Point(7, 38);
this.groupBoxCloudVision.Name = "groupBoxCloudVision";
this.groupBoxCloudVision.Size = new System.Drawing.Size(372, 143);
this.groupBoxCloudVision.TabIndex = 8;
this.groupBoxCloudVision.TabStop = false;
this.groupBoxCloudVision.Text = "Cloud Vision API";
//
// checkBoxCloudVisionSendOriginalImages
//
this.checkBoxCloudVisionSendOriginalImages.AutoSize = true;
this.checkBoxCloudVisionSendOriginalImages.Location = new System.Drawing.Point(9, 80);
this.checkBoxCloudVisionSendOriginalImages.Name = "checkBoxCloudVisionSendOriginalImages";
this.checkBoxCloudVisionSendOriginalImages.Size = new System.Drawing.Size(123, 17);
this.checkBoxCloudVisionSendOriginalImages.TabIndex = 4;
this.checkBoxCloudVisionSendOriginalImages.Text = "Send original images";
this.checkBoxCloudVisionSendOriginalImages.UseVisualStyleBackColor = true;
this.checkBoxCloudVisionSendOriginalImages.Visible = false;
this.checkBoxCloudVisionSendOriginalImages.CheckedChanged += new System.EventHandler(this.checkBoxCloudVisionSendOriginalImages_CheckedChanged);
//
// comboBoxCloudVisionLanguageHint
//
this.comboBoxCloudVisionLanguageHint.FormattingEnabled = true;
this.comboBoxCloudVisionLanguageHint.Items.AddRange(new object[] {
"af",
"ar",
"be",
"bg",
"bn",
"ca",
"cs",
"da",
"de",
"el",
"en",
"es",
"et",
"fa",
"fi",
"fil",
"fr",
"gu",
"hi",
"hr",
"hu",
"hy",
"id",
"is",
"it",
"iw",
"ja",
"km",
"kn",
"ko",
"lo",
"lt",
"lv",
"mk",
"ml",
"mr",
"ms",
"ne",
"nl",
"no",
"pa",
"pl",
"pt",
"ro",
"ru",
"ru-PETR1708",
"sk",
"sl",
"sq",
"sr",
"sr-Latn",
"sv",
"ta",
"te",
"th",
"tl",
"tr",
"uk",
"vi",
"yi",
"zh"});
this.comboBoxCloudVisionLanguageHint.Location = new System.Drawing.Point(87, 49);
this.comboBoxCloudVisionLanguageHint.Name = "comboBoxCloudVisionLanguageHint";
this.comboBoxCloudVisionLanguageHint.Size = new System.Drawing.Size(279, 21);
this.comboBoxCloudVisionLanguageHint.TabIndex = 3;
//
// labelCloudVisionLanguageHint
//
this.labelCloudVisionLanguageHint.AutoSize = true;
this.labelCloudVisionLanguageHint.Location = new System.Drawing.Point(6, 52);
this.labelCloudVisionLanguageHint.Name = "labelCloudVisionLanguageHint";
this.labelCloudVisionLanguageHint.Size = new System.Drawing.Size(75, 13);
this.labelCloudVisionLanguageHint.TabIndex = 2;
this.labelCloudVisionLanguageHint.Text = "Language hint";
//
// textBoxCloudVisionAPIKey
//
this.textBoxCloudVisionAPIKey.Location = new System.Drawing.Point(87, 22);
this.textBoxCloudVisionAPIKey.Name = "textBoxCloudVisionAPIKey";
this.textBoxCloudVisionAPIKey.Size = new System.Drawing.Size(279, 21);
this.textBoxCloudVisionAPIKey.TabIndex = 1;
//
// labelCloudVisionAPIKey
//
this.labelCloudVisionAPIKey.AutoSize = true;
this.labelCloudVisionAPIKey.Location = new System.Drawing.Point(6, 25);
this.labelCloudVisionAPIKey.Name = "labelCloudVisionAPIKey";
this.labelCloudVisionAPIKey.Size = new System.Drawing.Size(44, 13);
this.labelCloudVisionAPIKey.TabIndex = 0;
this.labelCloudVisionAPIKey.Text = "API key";
//
// groupBoxNOCR
//
this.groupBoxNOCR.Controls.Add(this.label3);
@ -1990,6 +2119,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
this.Resize += new System.EventHandler(this.VobSubOcr_Resize);
this.contextMenuStripListview.ResumeLayout(false);
this.groupBoxOcrMethod.ResumeLayout(false);
this.groupBoxCloudVision.ResumeLayout(false);
this.groupBoxCloudVision.PerformLayout();
this.groupBoxNOCR.ResumeLayout(false);
this.groupBoxNOCR.PerformLayout();
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNOcrMaxWrongPixels)).EndInit();
@ -2193,5 +2324,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private System.Windows.Forms.ToolStripMenuItem removeAllXToolStripMenuItem;
private System.Windows.Forms.ToolStripMenuItem oCRSelectedLinesToolStripMenuItem;
private System.Windows.Forms.ToolStripSeparator toolStripSeparatorOcrSelected;
private System.Windows.Forms.GroupBox groupBoxCloudVision;
private System.Windows.Forms.TextBox textBoxCloudVisionAPIKey;
private System.Windows.Forms.Label labelCloudVisionAPIKey;
private System.Windows.Forms.ComboBox comboBoxCloudVisionLanguageHint;
private System.Windows.Forms.Label labelCloudVisionLanguageHint;
private System.Windows.Forms.CheckBox checkBoxCloudVisionSendOriginalImages;
}
}

View File

@ -6,6 +6,7 @@ using Nikse.SubtitleEdit.Core.ContainerFormats.TransportStream;
using Nikse.SubtitleEdit.Core.Interfaces;
using Nikse.SubtitleEdit.Core.SubtitleFormats;
using Nikse.SubtitleEdit.Core.VobSub;
using Nikse.SubtitleEdit.Core.VobSub.Ocr.Service;
using Nikse.SubtitleEdit.Logic;
using Nikse.SubtitleEdit.Logic.Ocr;
using Nikse.SubtitleEdit.Logic.Ocr.Binary;
@ -346,6 +347,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private NOcrThreadResult[] _nOcrThreadResults;
private bool _ocrThreadStop;
private IOCRService _ocrService;
private readonly Keys _italicShortcut = UiUtil.GetKeys(Configuration.Settings.Shortcuts.MainListViewItalic);
private readonly Keys _mainGeneralGoToNextSubtitle = UiUtil.GetKeys(Configuration.Settings.Shortcuts.GeneralGoToNextSubtitle);
private readonly Keys _mainGeneralGoToPrevSubtitle = UiUtil.GetKeys(Configuration.Settings.Shortcuts.GeneralGoToPrevSubtitle);
@ -370,6 +373,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
private readonly int _ocrMethodTesseract5 = -1;
private readonly int _ocrMethodModi = -1;
private readonly int _ocrMethodNocr = -1;
private readonly int _ocrMethodCloudVision = -1;
private FindReplaceDialogHelper _findHelper;
@ -514,6 +518,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
_ocrMethodNocr = comboBoxOcrMethod.Items.Add(language.OcrViaNOCR);
_ocrMethodCloudVision = comboBoxOcrMethod.Items.Add(language.OcrViaCloudVision);
checkBoxTesseractItalicsOn.Checked = Configuration.Settings.VobSubOcr.UseItalicsInTesseract;
checkBoxTesseractItalicsOn.Text = LanguageSettings.Current.General.Italic;
@ -583,6 +588,14 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
checkBoxNOcrItalic.Checked = Configuration.Settings.VobSubOcr.LineOcrAdvancedItalic;
numericUpDownNOcrMaxWrongPixels.Value = Configuration.Settings.VobSubOcr.LineOcrMaxErrorPixels;
labelCloudVisionAPIKey.Text = language.APIKey;
labelCloudVisionLanguageHint.Text = language.LanguageHint;
checkBoxCloudVisionSendOriginalImages.Text = language.SendOriginalImages;
textBoxCloudVisionAPIKey.Text = Configuration.Settings.VobSubOcr.CloudVisionAPIKey;
comboBoxCloudVisionLanguageHint.Text = Configuration.Settings.VobSubOcr.CloudVisionLanguage;
checkBoxCloudVisionSendOriginalImages.Checked = Configuration.Settings.VobSubOcr.CloudVisionSendOriginalImages;
comboBoxTesseractLanguages.Left = labelTesseractLanguage.Left + labelTesseractLanguage.Width;
buttonGetTesseractDictionaries.Left = comboBoxTesseractLanguages.Left + comboBoxTesseractLanguages.Width + 5;
@ -989,6 +1002,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
text = OcrViaNOCR(GetSubtitleBitmap(i), i);
}
else if (_ocrMethodIndex == _ocrMethodCloudVision)
{
text = OcrViaCloudVision(GetSubtitleBitmap(i), i);
}
else
{
text = OcrViaTesseract(GetSubtitleBitmap(i), i);
@ -1655,10 +1672,19 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
else
{
foreach (string fn in fileNames)
{
fullFileName = Path.Combine(Path.GetDirectoryName(_bdnFileName), fn);
if (checkBoxCloudVisionSendOriginalImages.Visible && checkBoxCloudVisionSendOriginalImages.Checked)
{
var originalFileName = GetVSFOriginalImageFileName(fullFileName);
if (originalFileName != fullFileName && File.Exists(originalFileName))
{
fullFileName = originalFileName;
}
}
if (!File.Exists(fullFileName))
{
// fix AVISubDetector lines
@ -3949,7 +3975,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
else
{
matches.Add(new CompareMatch("*", false, 0, null));
string text = _vobSubOcrCharacter.ManualRecognizedCharacters;
if (text != "")
{
matches.Add(new CompareMatch(text, _vobSubOcrCharacter.IsItalic, 0, null));
}
else
{
matches.Add(new CompareMatch("*", false, 0, null));
}
}
_italicCheckedLast = _vobSubOcrCharacter.IsItalic;
@ -4005,7 +4040,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
else
{
matches.Add(new CompareMatch("*", false, 0, null, item));
string text = _vobSubOcrCharacter.ManualRecognizedCharacters;
if (text != "")
{
matches.Add(new CompareMatch(text, _vobSubOcrCharacter.IsItalic, 0, null));
}
else
{
matches.Add(new CompareMatch("*", false, 0, null));
}
}
_italicCheckedLast = _vobSubOcrCharacter.IsItalic;
@ -4369,7 +4413,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
else
{
matches.Add(new CompareMatch("*", false, 0, null));
var text = _vobSubOcrNOcrCharacter.NOcrChar.Text;
if (text != "")
{
matches.Add(new CompareMatch(text, _vobSubOcrNOcrCharacter.IsItalic, 0, null));
}
else
{
matches.Add(new CompareMatch("*", false, 0, null));
}
}
_italicCheckedLast = _vobSubOcrNOcrCharacter.IsItalic;
@ -4404,7 +4457,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
else
{
matches.Add(new CompareMatch("*", false, 0, null));
var text = _vobSubOcrNOcrCharacter.NOcrChar.Text;
if (text != "")
{
matches.Add(new CompareMatch(text, _vobSubOcrNOcrCharacter.IsItalic, 0, null));
}
else
{
matches.Add(new CompareMatch("*", false, 0, null));
}
}
_italicCheckedLast = _vobSubOcrNOcrCharacter.IsItalic;
@ -5185,6 +5247,13 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
_ocrMinLineHeight = -1;
}
}
else if (_ocrMethodIndex == _ocrMethodCloudVision)
{
if (_ocrService == null)
{
_ocrService = new GoogleOCRService(new GoogleCloudVisionAPI(textBoxCloudVisionAPIKey.Text));
}
}
progressBar1.Maximum = max;
@ -5564,6 +5633,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
text = CallModi(i);
}
else if (_ocrMethodIndex == _ocrMethodCloudVision)
{
text = OcrViaCloudVision(bmp, i);
}
_lastLine = text;
@ -5603,7 +5676,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
if (_abort)
{
textBoxCurrentText.Text = text;
if (textBoxCurrentText.Text == "")
{
textBoxCurrentText.Text = text;
}
_mainOcrRunning = false;
SetButtonsEnabledAfterOcrDone();
return true;
@ -6763,6 +6839,19 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
}
}
private string OcrViaCloudVision(Bitmap bitmap, int listViewIndex)
{
var language = comboBoxCloudVisionLanguageHint.Text;
var cloudVisionResult = _ocrService.PerformOCR(language, new List<Bitmap>() { bitmap });
if (cloudVisionResult.Count > 0)
{
return cloudVisionResult[0];
}
return string.Empty;
}
private void InitializeNOcrForBatch(string db)
{
_ocrMethodIndex = _ocrMethodNocr;
@ -7522,6 +7611,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
ShowOcrMethodGroupBox(groupBoxModiMethod);
Configuration.Settings.VobSubOcr.LastOcrMethod = "MODI";
}
else if (_ocrMethodIndex == _ocrMethodCloudVision)
{
ShowOcrMethodGroupBox(groupBoxCloudVision);
Configuration.Settings.VobSubOcr.LastOcrMethod = "CloudVision";
}
_ocrFixEngine = null;
SubtitleListView1SelectedIndexChanged(null, null);
@ -7543,6 +7637,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
groupBoxImageCompareMethod.Visible = false;
groupBoxModiMethod.Visible = false;
groupBoxNOCR.Visible = false;
groupBoxCloudVision.Visible = false;
groupBox.Visible = true;
groupBox.BringToFront();
@ -7989,6 +8084,17 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
pictureBoxEmphasis2.BackColor = Color.White;
}
checkBoxCloudVisionSendOriginalImages.Visible = false;
if (bdnSubtitle.Paragraphs.Count > 0)
{
var firstImageFileName = bdnSubtitle.Paragraphs[0].Text;
var originalImageFileName = GetVSFOriginalImageFileName(firstImageFileName);
if (firstImageFileName != originalImageFileName && File.Exists(originalImageFileName))
{
checkBoxCloudVisionSendOriginalImages.Visible = true;
}
}
SetButtonsStartOcr();
progressBar1.Visible = false;
progressBar1.Maximum = 100;
@ -8008,7 +8114,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
autoTransparentBackgroundToolStripMenuItem.Checked = true;
autoTransparentBackgroundToolStripMenuItem.Visible = true;
}
private void SetOcrMethod()
@ -8017,6 +8122,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
{
comboBoxOcrMethod.SelectedIndex = _ocrMethodBinaryImageCompare;
}
else if (Configuration.Settings.VobSubOcr.LastOcrMethod == "CloudVision" && comboBoxOcrMethod.Items.Count > _ocrMethodCloudVision)
{
comboBoxOcrMethod.SelectedIndex = _ocrMethodCloudVision;
}
else if (Configuration.Settings.VobSubOcr.LastOcrMethod == "MODI" && comboBoxOcrMethod.Items.Count > _ocrMethodModi)
{
comboBoxOcrMethod.SelectedIndex = _ocrMethodModi;
@ -8490,6 +8599,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
Configuration.Settings.VobSubOcr.LineOcrMaxErrorPixels = (int)numericUpDownNOcrMaxWrongPixels.Value;
Configuration.Settings.VobSubOcr.UseTesseractFallback = checkBoxTesseractFallback.Checked;
Configuration.Settings.VobSubOcr.CaptureTopAlign = toolStripMenuItemCaptureTopAlign.Checked;
Configuration.Settings.VobSubOcr.CloudVisionAPIKey = textBoxCloudVisionAPIKey.Text;
Configuration.Settings.VobSubOcr.CloudVisionLanguage = comboBoxCloudVisionLanguageHint.Text;
if (_ocrMethodIndex == _ocrMethodBinaryImageCompare)
{
@ -10052,5 +10163,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
_mainOcrSelectedIndices = subtitleListView1.GetSelectedIndices().ToList();
ButtonStartOcrClick(null, null);
}
private string GetVSFOriginalImageFileName(string fileName)
{
return fileName.Replace("\\RGBResults", "\\RGBImages").Replace("\\TXTImages", "\\RGBImages").Replace(".jpeg.png", ".jpeg").Replace(".png", ".jpeg");
}
private void checkBoxCloudVisionSendOriginalImages_CheckedChanged(object sender, EventArgs e)
{
// Toggle subtitle image refresh
SubtitleListView1SelectedIndexChanged(sender, e);
}
}
}

View File

@ -3311,6 +3311,7 @@ Keep changes?",
OcrViaImageCompare = "Binary image compare",
OcrViaModi = "Microsoft Office Document Imaging (MODI). Requires Microsoft Office",
OcrViaNOCR = "OCR via nOCR",
OcrViaCloudVision = "OCR via Cloud Vision API",
TesseractEngineMode = "Engine mode",
TesseractEngineModeLegacy = "Original Tesseract only (can detect italic)",
TesseractEngineModeNeural = "Neural nets LSTM only",
@ -3380,6 +3381,9 @@ Keep changes?",
StartTraining = "Start training",
NowTraining = "Now training font '{1}'. Total chars trained: {0:#,###,##0}, {2:#,###,##0} known",
ImagesWithTimeCodesInFileName = "Images with time codes in file name...",
APIKey = "API key",
LanguageHint = "Language hint",
SendOriginalImages = "Send original images",
};
VobSubOcrCharacter = new LanguageStructure.VobSubOcrCharacter

View File

@ -3158,6 +3158,7 @@
public string OcrViaImageCompare { get; set; }
public string OcrViaModi { get; set; }
public string OcrViaNOCR { get; set; }
public string OcrViaCloudVision { get; set; }
public string TesseractEngineMode { get; set; }
public string TesseractEngineModeLegacy { get; set; }
public string TesseractEngineModeNeural { get; set; }
@ -3227,6 +3228,9 @@
public string StartTraining { get; set; }
public string NowTraining { get; set; }
public string ImagesWithTimeCodesInFileName { get; set; }
public string APIKey { get; set; }
public string LanguageHint { get; set; }
public string SendOriginalImages { get; set; }
}
public class VobSubOcrCharacter