mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-25 20:52:44 +01:00
Merge pull request #6195 from Flitskikker/feature/cloudvision-ocr
Add support for OCR via Google's Cloud Vision API
This commit is contained in:
commit
f57520a7f8
@ -2063,6 +2063,9 @@ $HorzAlign = Center
|
||||
public bool CaptureTopAlign { get; set; }
|
||||
public int UnfocusedAttentionBlinkCount { get; set; }
|
||||
public int UnfocusedAttentionPlaySoundCount { get; set; }
|
||||
public string CloudVisionAPIKey { get; set; }
|
||||
public string CloudVisionLanguage { get; set; }
|
||||
public bool CloudVisionSendOriginalImages { get; set; }
|
||||
|
||||
public VobSubOcrSettings()
|
||||
{
|
||||
@ -2091,6 +2094,9 @@ $HorzAlign = Center
|
||||
CaptureTopAlign = false;
|
||||
UnfocusedAttentionBlinkCount = 50;
|
||||
UnfocusedAttentionPlaySoundCount = 1;
|
||||
CloudVisionAPIKey = string.Empty;
|
||||
CloudVisionLanguage = "en";
|
||||
CloudVisionSendOriginalImages = false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -7379,6 +7385,24 @@ $HorzAlign = Center
|
||||
settings.VobSubOcr.UnfocusedAttentionPlaySoundCount = Convert.ToInt32(subNode.InnerText, CultureInfo.InvariantCulture);
|
||||
}
|
||||
|
||||
subNode = node.SelectSingleNode("CloudVisionAPIKey");
|
||||
if (subNode != null)
|
||||
{
|
||||
settings.VobSubOcr.CloudVisionAPIKey = subNode.InnerText;
|
||||
}
|
||||
|
||||
subNode = node.SelectSingleNode("CloudVisionLanguage");
|
||||
if (subNode != null)
|
||||
{
|
||||
settings.VobSubOcr.CloudVisionLanguage = subNode.InnerText;
|
||||
}
|
||||
|
||||
subNode = node.SelectSingleNode("CloudVisionSendOriginalImages");
|
||||
if (subNode != null)
|
||||
{
|
||||
settings.VobSubOcr.CloudVisionSendOriginalImages = Convert.ToBoolean(subNode.InnerText, CultureInfo.InvariantCulture);
|
||||
}
|
||||
|
||||
foreach (XmlNode groupNode in doc.DocumentElement.SelectNodes("MultipleSearchAndReplaceGroups/Group"))
|
||||
{
|
||||
var group = new MultipleSearchAndReplaceGroup
|
||||
@ -10458,6 +10482,9 @@ $HorzAlign = Center
|
||||
textWriter.WriteElementString("CaptureTopAlign", settings.VobSubOcr.CaptureTopAlign.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("UnfocusedAttentionBlinkCount", settings.VobSubOcr.UnfocusedAttentionBlinkCount.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("UnfocusedAttentionPlaySoundCount", settings.VobSubOcr.UnfocusedAttentionPlaySoundCount.ToString(CultureInfo.InvariantCulture));
|
||||
textWriter.WriteElementString("CloudVisionAPIKey", settings.VobSubOcr.CloudVisionAPIKey);
|
||||
textWriter.WriteElementString("CloudVisionLanguage", settings.VobSubOcr.CloudVisionLanguage);
|
||||
textWriter.WriteElementString("CloudVisionSendOriginalImages", settings.VobSubOcr.CloudVisionSendOriginalImages.ToString(CultureInfo.InvariantCulture));
|
||||
|
||||
textWriter.WriteEndElement();
|
||||
|
||||
|
20
src/libse/VobSub/Ocr/OCRHelper.cs
Normal file
20
src/libse/VobSub/Ocr/OCRHelper.cs
Normal file
@ -0,0 +1,20 @@
|
||||
using Nikse.SubtitleEdit.Core.Common;
|
||||
using System;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core.VobSub.Ocr
|
||||
{
|
||||
public static class OCRHelper
|
||||
{
|
||||
public static string PostOCR(string input, string language)
|
||||
{
|
||||
var s = input;
|
||||
return FixInvalidCarriageReturnLineFeedCharacters(s);
|
||||
}
|
||||
|
||||
private static string FixInvalidCarriageReturnLineFeedCharacters(string input)
|
||||
{
|
||||
// Fix new line chars
|
||||
return string.Join(Environment.NewLine, input.SplitToLines()).Trim();
|
||||
}
|
||||
}
|
||||
}
|
227
src/libse/VobSub/Ocr/Service/GoogleCloudVisionAPI.cs
Normal file
227
src/libse/VobSub/Ocr/Service/GoogleCloudVisionAPI.cs
Normal file
@ -0,0 +1,227 @@
|
||||
using Nikse.SubtitleEdit.Core.Common;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Drawing;
|
||||
using System.IO;
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
using System.Net.Http.Headers;
|
||||
using System.Runtime.Serialization;
|
||||
using System.Runtime.Serialization.Json;
|
||||
using System.Text;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service
|
||||
{
|
||||
/// <summary>
|
||||
/// OCR via Google Cloud Vision API - see https://cloud.google.com/vision/docs/ocr
|
||||
/// </summary>
|
||||
public class GoogleCloudVisionAPI : IOCRStrategy
|
||||
{
|
||||
private readonly string _apiKey;
|
||||
private readonly HttpClient _httpClient;
|
||||
|
||||
public string GetName()
|
||||
{
|
||||
return "Google Cloud Vision API";
|
||||
}
|
||||
|
||||
public int GetMaxImageSize()
|
||||
{
|
||||
return 20000000;
|
||||
}
|
||||
|
||||
public int GetMaximumRequestArraySize()
|
||||
{
|
||||
return 16;
|
||||
}
|
||||
|
||||
public GoogleCloudVisionAPI(string apiKey)
|
||||
{
|
||||
_apiKey = apiKey;
|
||||
_httpClient = HttpClientHelper.MakeHttpClient();
|
||||
_httpClient.BaseAddress = new Uri("https://vision.googleapis.com/v1/images:annotate");
|
||||
_httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
|
||||
}
|
||||
|
||||
public string GetUrl()
|
||||
{
|
||||
return "https://cloud.google.com/vision/docs/ocr";
|
||||
}
|
||||
|
||||
public List<string> PerformOCR(string language, List<Bitmap> images)
|
||||
{
|
||||
// Create a request body object
|
||||
var requestBody = new RequestBody();
|
||||
|
||||
foreach (var image in images)
|
||||
{
|
||||
var imageBase64 = string.Empty;
|
||||
using (MemoryStream memoryStream = new MemoryStream())
|
||||
{
|
||||
image.Save(memoryStream, System.Drawing.Imaging.ImageFormat.Png);
|
||||
imageBase64 = Convert.ToBase64String(memoryStream.ToArray());
|
||||
}
|
||||
|
||||
var request = new RequestBody.Request(imageBase64, language);
|
||||
requestBody.requests.Add(request);
|
||||
}
|
||||
|
||||
// Convert to JSON string
|
||||
var requestBodyString = string.Empty;
|
||||
using (MemoryStream memoryStream = new MemoryStream())
|
||||
{
|
||||
new DataContractJsonSerializer(typeof(RequestBody)).WriteObject(memoryStream, requestBody);
|
||||
requestBodyString = Encoding.Default.GetString(memoryStream.ToArray());
|
||||
}
|
||||
|
||||
// Do request
|
||||
var uri = $"?key={_apiKey}";
|
||||
string content;
|
||||
try
|
||||
{
|
||||
var result = _httpClient.PostAsync(uri, new StringContent(requestBodyString)).Result;
|
||||
if ((int)result.StatusCode == 400)
|
||||
{
|
||||
throw new OCRException("API key invalid (or perhaps billing is not enabled)?");
|
||||
}
|
||||
if ((int)result.StatusCode == 403)
|
||||
{
|
||||
throw new OCRException("\"Perhaps billing is not enabled (or API key is invalid)?\"");
|
||||
}
|
||||
|
||||
if (!result.IsSuccessStatusCode)
|
||||
{
|
||||
throw new OCRException($"An error occurred calling Cloud Vision API - status code: {result.StatusCode}");
|
||||
}
|
||||
|
||||
content = result.Content.ReadAsStringAsync().Result;
|
||||
}
|
||||
catch (WebException webException)
|
||||
{
|
||||
var message = string.Empty;
|
||||
if (webException.Message.Contains("(400) Bad Request"))
|
||||
{
|
||||
message = "API key invalid (or perhaps billing is not enabled)?";
|
||||
}
|
||||
else if (webException.Message.Contains("(403) Forbidden."))
|
||||
{
|
||||
message = "Perhaps billing is not enabled (or API key is invalid)?";
|
||||
}
|
||||
throw new OCRException(message, webException);
|
||||
}
|
||||
|
||||
var resultList = new List<string>();
|
||||
var parser = new JsonParser();
|
||||
var jsonObject = (Dictionary<string, object>)parser.Parse(content);
|
||||
|
||||
if (jsonObject.ContainsKey("responses"))
|
||||
{
|
||||
if (jsonObject["responses"] is List<object> responses)
|
||||
{
|
||||
foreach (var responseObject in responses)
|
||||
{
|
||||
var result = string.Empty;
|
||||
|
||||
if (responseObject is Dictionary<string, object> response)
|
||||
{
|
||||
if (response.ContainsKey("textAnnotations"))
|
||||
{
|
||||
if (response["textAnnotations"] is List<object> textAnnotations)
|
||||
{
|
||||
if (textAnnotations.Count > 0)
|
||||
{
|
||||
if (textAnnotations[0] is Dictionary<string, object> firstTextAnnotation)
|
||||
{
|
||||
if (firstTextAnnotation.ContainsKey("description"))
|
||||
{
|
||||
if (firstTextAnnotation["description"] is string description)
|
||||
{
|
||||
result = OCRHelper.PostOCR(description, language);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resultList.Add(result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return resultList;
|
||||
}
|
||||
|
||||
[DataContract, Serializable]
|
||||
public class RequestBody
|
||||
{
|
||||
[DataMember]
|
||||
public List<Request> requests { get; set; }
|
||||
|
||||
public RequestBody()
|
||||
{
|
||||
this.requests = new List<Request>();
|
||||
}
|
||||
|
||||
|
||||
[DataContract, Serializable]
|
||||
public class Request
|
||||
{
|
||||
[DataMember]
|
||||
public Image image { get; set; }
|
||||
[DataMember]
|
||||
public ImageContext imageContext { get; set; }
|
||||
[DataMember]
|
||||
public List<Feature> features { get; set; }
|
||||
|
||||
public Request(string imageContent, string language)
|
||||
{
|
||||
this.image = new Image(imageContent);
|
||||
this.imageContext = new ImageContext(new List<string>() { language, "en" });
|
||||
this.features = new List<Feature>() { new Feature("TEXT_DETECTION", 1) };
|
||||
}
|
||||
|
||||
|
||||
[DataContract, Serializable]
|
||||
public class Image
|
||||
{
|
||||
[DataMember]
|
||||
public string content { get; set; }
|
||||
|
||||
public Image(string content)
|
||||
{
|
||||
this.content = content;
|
||||
}
|
||||
}
|
||||
|
||||
[DataContract, Serializable]
|
||||
public class ImageContext
|
||||
{
|
||||
[DataMember]
|
||||
public List<string> languageHints { get; set; }
|
||||
|
||||
public ImageContext(List<string> languageHints)
|
||||
{
|
||||
this.languageHints = languageHints;
|
||||
}
|
||||
}
|
||||
|
||||
[DataContract, Serializable]
|
||||
public class Feature
|
||||
{
|
||||
[DataMember]
|
||||
public string type { get; set; }
|
||||
[DataMember]
|
||||
public int maxResults { get; set; }
|
||||
|
||||
public Feature(string type, int maxResults)
|
||||
{
|
||||
this.type = type;
|
||||
this.maxResults = maxResults;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
46
src/libse/VobSub/Ocr/Service/GoogleOCRService.cs
Normal file
46
src/libse/VobSub/Ocr/Service/GoogleOCRService.cs
Normal file
@ -0,0 +1,46 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Drawing;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service
|
||||
{
|
||||
public class GoogleOCRService : IOCRService
|
||||
{
|
||||
|
||||
private readonly IOCRStrategy _ocrStrategy;
|
||||
|
||||
public GoogleOCRService(IOCRStrategy translationStrategy)
|
||||
{
|
||||
_ocrStrategy = translationStrategy;
|
||||
}
|
||||
|
||||
public string GetName()
|
||||
{
|
||||
return _ocrStrategy.GetName();
|
||||
}
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
return GetName();
|
||||
}
|
||||
|
||||
public int GetMaxImageSize()
|
||||
{
|
||||
return _ocrStrategy.GetMaxImageSize();
|
||||
}
|
||||
|
||||
public int GetMaximumRequestArraySize()
|
||||
{
|
||||
return _ocrStrategy.GetMaximumRequestArraySize();
|
||||
}
|
||||
|
||||
public string GetUrl()
|
||||
{
|
||||
return "";
|
||||
}
|
||||
|
||||
public List<string> PerformOCR(string language, List<Bitmap> images)
|
||||
{
|
||||
return _ocrStrategy.PerformOCR(language, images);
|
||||
}
|
||||
}
|
||||
}
|
9
src/libse/VobSub/Ocr/Service/IOCRService.cs
Normal file
9
src/libse/VobSub/Ocr/Service/IOCRService.cs
Normal file
@ -0,0 +1,9 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service
|
||||
{
|
||||
public interface IOCRService : IOCRStrategy
|
||||
{
|
||||
|
||||
}
|
||||
}
|
15
src/libse/VobSub/Ocr/Service/IOCRStrategy.cs
Normal file
15
src/libse/VobSub/Ocr/Service/IOCRStrategy.cs
Normal file
@ -0,0 +1,15 @@
|
||||
using Nikse.SubtitleEdit.Core.Common;
|
||||
using System.Collections.Generic;
|
||||
using System.Drawing;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service
|
||||
{
|
||||
public interface IOCRStrategy
|
||||
{
|
||||
string GetName();
|
||||
string GetUrl();
|
||||
List<string> PerformOCR(string language, List<Bitmap> images);
|
||||
int GetMaxImageSize();
|
||||
int GetMaximumRequestArraySize();
|
||||
}
|
||||
}
|
22
src/libse/VobSub/Ocr/Service/OCRException.cs
Normal file
22
src/libse/VobSub/Ocr/Service/OCRException.cs
Normal file
@ -0,0 +1,22 @@
|
||||
using System;
|
||||
using System.Net;
|
||||
|
||||
namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service
|
||||
{
|
||||
public class OCRException : Exception
|
||||
{
|
||||
public OCRException(WebException webException) : base("",webException)
|
||||
{
|
||||
}
|
||||
|
||||
public OCRException(string message, Exception exception) : base(message, exception)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
public OCRException(string message) : base(message)
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
}
|
@ -13,6 +13,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
{
|
||||
// 0_00_01_042__0_00_03_919_01.jpeg
|
||||
private static readonly Regex TimeCodeFormat1 = new Regex(@"^\d+_\d+_\d+_\d+__\d+_\d+_\d+_\d+_\d+$", RegexOptions.Compiled);
|
||||
private static readonly Regex TimeCodeFormat1WithExtension = new Regex(@"^\d+_\d+_\d+_\d+__\d+_\d+_\d+_\d+_\d+\..+$", RegexOptions.Compiled);
|
||||
private static readonly Regex TimeCodeFormat2 = new Regex(@"^\d+_\d+_\d+_\d+__\d+_\d+_\d+_\d+$", RegexOptions.Compiled);
|
||||
|
||||
public Subtitle Subtitle { get; private set; }
|
||||
@ -99,7 +100,7 @@ namespace Nikse.SubtitleEdit.Forms
|
||||
p.EndTime.TotalMilliseconds = endTime;
|
||||
}
|
||||
}
|
||||
else if (TimeCodeFormat1.IsMatch(name) || TimeCodeFormat2.IsMatch(name))
|
||||
else if (TimeCodeFormat1.IsMatch(name) || TimeCodeFormat1WithExtension.IsMatch(name) || TimeCodeFormat2.IsMatch(name))
|
||||
{
|
||||
var arr = name.Replace("__", "_").Split('_');
|
||||
if (arr.Length >= 8)
|
||||
|
137
src/ui/Forms/Ocr/VobSubOcr.Designer.cs
generated
137
src/ui/Forms/Ocr/VobSubOcr.Designer.cs
generated
@ -55,6 +55,12 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.buttonCancel = new System.Windows.Forms.Button();
|
||||
this.groupBoxOcrMethod = new System.Windows.Forms.GroupBox();
|
||||
this.comboBoxOcrMethod = new System.Windows.Forms.ComboBox();
|
||||
this.groupBoxCloudVision = new System.Windows.Forms.GroupBox();
|
||||
this.checkBoxCloudVisionSendOriginalImages = new System.Windows.Forms.CheckBox();
|
||||
this.comboBoxCloudVisionLanguageHint = new System.Windows.Forms.ComboBox();
|
||||
this.labelCloudVisionLanguageHint = new System.Windows.Forms.Label();
|
||||
this.textBoxCloudVisionAPIKey = new System.Windows.Forms.TextBox();
|
||||
this.labelCloudVisionAPIKey = new System.Windows.Forms.Label();
|
||||
this.groupBoxNOCR = new System.Windows.Forms.GroupBox();
|
||||
this.label3 = new System.Windows.Forms.Label();
|
||||
this.comboBoxNOcrLineSplitMinHeight = new System.Windows.Forms.ComboBox();
|
||||
@ -173,6 +179,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.timerHideStatus = new System.Windows.Forms.Timer(this.components);
|
||||
this.contextMenuStripListview.SuspendLayout();
|
||||
this.groupBoxOcrMethod.SuspendLayout();
|
||||
this.groupBoxCloudVision.SuspendLayout();
|
||||
this.groupBoxNOCR.SuspendLayout();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNOcrMaxWrongPixels)).BeginInit();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNumberOfPixelsIsSpaceNOCR)).BeginInit();
|
||||
@ -494,6 +501,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
// groupBoxOcrMethod
|
||||
//
|
||||
this.groupBoxOcrMethod.Controls.Add(this.comboBoxOcrMethod);
|
||||
this.groupBoxOcrMethod.Controls.Add(this.groupBoxCloudVision);
|
||||
this.groupBoxOcrMethod.Controls.Add(this.groupBoxNOCR);
|
||||
this.groupBoxOcrMethod.Controls.Add(this.groupBoxImageCompareMethod);
|
||||
this.groupBoxOcrMethod.Controls.Add(this.GroupBoxTesseractMethod);
|
||||
@ -521,6 +529,127 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.comboBoxOcrMethod.TabIndex = 0;
|
||||
this.comboBoxOcrMethod.SelectedIndexChanged += new System.EventHandler(this.ComboBoxOcrMethodSelectedIndexChanged);
|
||||
//
|
||||
// groupBoxCloudVision
|
||||
//
|
||||
this.groupBoxCloudVision.Controls.Add(this.checkBoxCloudVisionSendOriginalImages);
|
||||
this.groupBoxCloudVision.Controls.Add(this.comboBoxCloudVisionLanguageHint);
|
||||
this.groupBoxCloudVision.Controls.Add(this.labelCloudVisionLanguageHint);
|
||||
this.groupBoxCloudVision.Controls.Add(this.textBoxCloudVisionAPIKey);
|
||||
this.groupBoxCloudVision.Controls.Add(this.labelCloudVisionAPIKey);
|
||||
this.groupBoxCloudVision.Location = new System.Drawing.Point(7, 38);
|
||||
this.groupBoxCloudVision.Name = "groupBoxCloudVision";
|
||||
this.groupBoxCloudVision.Size = new System.Drawing.Size(372, 143);
|
||||
this.groupBoxCloudVision.TabIndex = 8;
|
||||
this.groupBoxCloudVision.TabStop = false;
|
||||
this.groupBoxCloudVision.Text = "Cloud Vision API";
|
||||
//
|
||||
// checkBoxCloudVisionSendOriginalImages
|
||||
//
|
||||
this.checkBoxCloudVisionSendOriginalImages.AutoSize = true;
|
||||
this.checkBoxCloudVisionSendOriginalImages.Location = new System.Drawing.Point(9, 80);
|
||||
this.checkBoxCloudVisionSendOriginalImages.Name = "checkBoxCloudVisionSendOriginalImages";
|
||||
this.checkBoxCloudVisionSendOriginalImages.Size = new System.Drawing.Size(123, 17);
|
||||
this.checkBoxCloudVisionSendOriginalImages.TabIndex = 4;
|
||||
this.checkBoxCloudVisionSendOriginalImages.Text = "Send original images";
|
||||
this.checkBoxCloudVisionSendOriginalImages.UseVisualStyleBackColor = true;
|
||||
this.checkBoxCloudVisionSendOriginalImages.Visible = false;
|
||||
this.checkBoxCloudVisionSendOriginalImages.CheckedChanged += new System.EventHandler(this.checkBoxCloudVisionSendOriginalImages_CheckedChanged);
|
||||
//
|
||||
// comboBoxCloudVisionLanguageHint
|
||||
//
|
||||
this.comboBoxCloudVisionLanguageHint.FormattingEnabled = true;
|
||||
this.comboBoxCloudVisionLanguageHint.Items.AddRange(new object[] {
|
||||
"af",
|
||||
"ar",
|
||||
"be",
|
||||
"bg",
|
||||
"bn",
|
||||
"ca",
|
||||
"cs",
|
||||
"da",
|
||||
"de",
|
||||
"el",
|
||||
"en",
|
||||
"es",
|
||||
"et",
|
||||
"fa",
|
||||
"fi",
|
||||
"fil",
|
||||
"fr",
|
||||
"gu",
|
||||
"hi",
|
||||
"hr",
|
||||
"hu",
|
||||
"hy",
|
||||
"id",
|
||||
"is",
|
||||
"it",
|
||||
"iw",
|
||||
"ja",
|
||||
"km",
|
||||
"kn",
|
||||
"ko",
|
||||
"lo",
|
||||
"lt",
|
||||
"lv",
|
||||
"mk",
|
||||
"ml",
|
||||
"mr",
|
||||
"ms",
|
||||
"ne",
|
||||
"nl",
|
||||
"no",
|
||||
"pa",
|
||||
"pl",
|
||||
"pt",
|
||||
"ro",
|
||||
"ru",
|
||||
"ru-PETR1708",
|
||||
"sk",
|
||||
"sl",
|
||||
"sq",
|
||||
"sr",
|
||||
"sr-Latn",
|
||||
"sv",
|
||||
"ta",
|
||||
"te",
|
||||
"th",
|
||||
"tl",
|
||||
"tr",
|
||||
"uk",
|
||||
"vi",
|
||||
"yi",
|
||||
"zh"});
|
||||
this.comboBoxCloudVisionLanguageHint.Location = new System.Drawing.Point(87, 49);
|
||||
this.comboBoxCloudVisionLanguageHint.Name = "comboBoxCloudVisionLanguageHint";
|
||||
this.comboBoxCloudVisionLanguageHint.Size = new System.Drawing.Size(279, 21);
|
||||
this.comboBoxCloudVisionLanguageHint.TabIndex = 3;
|
||||
//
|
||||
// labelCloudVisionLanguageHint
|
||||
//
|
||||
this.labelCloudVisionLanguageHint.AutoSize = true;
|
||||
this.labelCloudVisionLanguageHint.Location = new System.Drawing.Point(6, 52);
|
||||
this.labelCloudVisionLanguageHint.Name = "labelCloudVisionLanguageHint";
|
||||
this.labelCloudVisionLanguageHint.Size = new System.Drawing.Size(75, 13);
|
||||
this.labelCloudVisionLanguageHint.TabIndex = 2;
|
||||
this.labelCloudVisionLanguageHint.Text = "Language hint";
|
||||
//
|
||||
// textBoxCloudVisionAPIKey
|
||||
//
|
||||
this.textBoxCloudVisionAPIKey.Location = new System.Drawing.Point(87, 22);
|
||||
this.textBoxCloudVisionAPIKey.Name = "textBoxCloudVisionAPIKey";
|
||||
this.textBoxCloudVisionAPIKey.Size = new System.Drawing.Size(279, 21);
|
||||
this.textBoxCloudVisionAPIKey.TabIndex = 1;
|
||||
//
|
||||
// labelCloudVisionAPIKey
|
||||
//
|
||||
this.labelCloudVisionAPIKey.AutoSize = true;
|
||||
this.labelCloudVisionAPIKey.Location = new System.Drawing.Point(6, 25);
|
||||
this.labelCloudVisionAPIKey.Name = "labelCloudVisionAPIKey";
|
||||
this.labelCloudVisionAPIKey.Size = new System.Drawing.Size(44, 13);
|
||||
this.labelCloudVisionAPIKey.TabIndex = 0;
|
||||
this.labelCloudVisionAPIKey.Text = "API key";
|
||||
//
|
||||
// groupBoxNOCR
|
||||
//
|
||||
this.groupBoxNOCR.Controls.Add(this.label3);
|
||||
@ -1990,6 +2119,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
this.Resize += new System.EventHandler(this.VobSubOcr_Resize);
|
||||
this.contextMenuStripListview.ResumeLayout(false);
|
||||
this.groupBoxOcrMethod.ResumeLayout(false);
|
||||
this.groupBoxCloudVision.ResumeLayout(false);
|
||||
this.groupBoxCloudVision.PerformLayout();
|
||||
this.groupBoxNOCR.ResumeLayout(false);
|
||||
this.groupBoxNOCR.PerformLayout();
|
||||
((System.ComponentModel.ISupportInitialize)(this.numericUpDownNOcrMaxWrongPixels)).EndInit();
|
||||
@ -2193,5 +2324,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
private System.Windows.Forms.ToolStripMenuItem removeAllXToolStripMenuItem;
|
||||
private System.Windows.Forms.ToolStripMenuItem oCRSelectedLinesToolStripMenuItem;
|
||||
private System.Windows.Forms.ToolStripSeparator toolStripSeparatorOcrSelected;
|
||||
private System.Windows.Forms.GroupBox groupBoxCloudVision;
|
||||
private System.Windows.Forms.TextBox textBoxCloudVisionAPIKey;
|
||||
private System.Windows.Forms.Label labelCloudVisionAPIKey;
|
||||
private System.Windows.Forms.ComboBox comboBoxCloudVisionLanguageHint;
|
||||
private System.Windows.Forms.Label labelCloudVisionLanguageHint;
|
||||
private System.Windows.Forms.CheckBox checkBoxCloudVisionSendOriginalImages;
|
||||
}
|
||||
}
|
@ -6,6 +6,7 @@ using Nikse.SubtitleEdit.Core.ContainerFormats.TransportStream;
|
||||
using Nikse.SubtitleEdit.Core.Interfaces;
|
||||
using Nikse.SubtitleEdit.Core.SubtitleFormats;
|
||||
using Nikse.SubtitleEdit.Core.VobSub;
|
||||
using Nikse.SubtitleEdit.Core.VobSub.Ocr.Service;
|
||||
using Nikse.SubtitleEdit.Logic;
|
||||
using Nikse.SubtitleEdit.Logic.Ocr;
|
||||
using Nikse.SubtitleEdit.Logic.Ocr.Binary;
|
||||
@ -346,6 +347,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
private NOcrThreadResult[] _nOcrThreadResults;
|
||||
private bool _ocrThreadStop;
|
||||
|
||||
private IOCRService _ocrService;
|
||||
|
||||
private readonly Keys _italicShortcut = UiUtil.GetKeys(Configuration.Settings.Shortcuts.MainListViewItalic);
|
||||
private readonly Keys _mainGeneralGoToNextSubtitle = UiUtil.GetKeys(Configuration.Settings.Shortcuts.GeneralGoToNextSubtitle);
|
||||
private readonly Keys _mainGeneralGoToPrevSubtitle = UiUtil.GetKeys(Configuration.Settings.Shortcuts.GeneralGoToPrevSubtitle);
|
||||
@ -370,6 +373,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
private readonly int _ocrMethodTesseract5 = -1;
|
||||
private readonly int _ocrMethodModi = -1;
|
||||
private readonly int _ocrMethodNocr = -1;
|
||||
private readonly int _ocrMethodCloudVision = -1;
|
||||
|
||||
private FindReplaceDialogHelper _findHelper;
|
||||
|
||||
@ -514,6 +518,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
}
|
||||
|
||||
_ocrMethodNocr = comboBoxOcrMethod.Items.Add(language.OcrViaNOCR);
|
||||
_ocrMethodCloudVision = comboBoxOcrMethod.Items.Add(language.OcrViaCloudVision);
|
||||
|
||||
checkBoxTesseractItalicsOn.Checked = Configuration.Settings.VobSubOcr.UseItalicsInTesseract;
|
||||
checkBoxTesseractItalicsOn.Text = LanguageSettings.Current.General.Italic;
|
||||
@ -583,6 +588,14 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
checkBoxNOcrItalic.Checked = Configuration.Settings.VobSubOcr.LineOcrAdvancedItalic;
|
||||
numericUpDownNOcrMaxWrongPixels.Value = Configuration.Settings.VobSubOcr.LineOcrMaxErrorPixels;
|
||||
|
||||
labelCloudVisionAPIKey.Text = language.APIKey;
|
||||
labelCloudVisionLanguageHint.Text = language.LanguageHint;
|
||||
checkBoxCloudVisionSendOriginalImages.Text = language.SendOriginalImages;
|
||||
|
||||
textBoxCloudVisionAPIKey.Text = Configuration.Settings.VobSubOcr.CloudVisionAPIKey;
|
||||
comboBoxCloudVisionLanguageHint.Text = Configuration.Settings.VobSubOcr.CloudVisionLanguage;
|
||||
checkBoxCloudVisionSendOriginalImages.Checked = Configuration.Settings.VobSubOcr.CloudVisionSendOriginalImages;
|
||||
|
||||
comboBoxTesseractLanguages.Left = labelTesseractLanguage.Left + labelTesseractLanguage.Width;
|
||||
buttonGetTesseractDictionaries.Left = comboBoxTesseractLanguages.Left + comboBoxTesseractLanguages.Width + 5;
|
||||
|
||||
@ -989,6 +1002,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
{
|
||||
text = OcrViaNOCR(GetSubtitleBitmap(i), i);
|
||||
}
|
||||
else if (_ocrMethodIndex == _ocrMethodCloudVision)
|
||||
{
|
||||
text = OcrViaCloudVision(GetSubtitleBitmap(i), i);
|
||||
}
|
||||
else
|
||||
{
|
||||
text = OcrViaTesseract(GetSubtitleBitmap(i), i);
|
||||
@ -1655,10 +1672,19 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
foreach (string fn in fileNames)
|
||||
{
|
||||
fullFileName = Path.Combine(Path.GetDirectoryName(_bdnFileName), fn);
|
||||
|
||||
if (checkBoxCloudVisionSendOriginalImages.Visible && checkBoxCloudVisionSendOriginalImages.Checked)
|
||||
{
|
||||
var originalFileName = GetVSFOriginalImageFileName(fullFileName);
|
||||
if (originalFileName != fullFileName && File.Exists(originalFileName))
|
||||
{
|
||||
fullFileName = originalFileName;
|
||||
}
|
||||
}
|
||||
|
||||
if (!File.Exists(fullFileName))
|
||||
{
|
||||
// fix AVISubDetector lines
|
||||
@ -3949,7 +3975,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
}
|
||||
else
|
||||
{
|
||||
matches.Add(new CompareMatch("*", false, 0, null));
|
||||
string text = _vobSubOcrCharacter.ManualRecognizedCharacters;
|
||||
|
||||
if (text != "")
|
||||
{
|
||||
matches.Add(new CompareMatch(text, _vobSubOcrCharacter.IsItalic, 0, null));
|
||||
}
|
||||
else
|
||||
{
|
||||
matches.Add(new CompareMatch("*", false, 0, null));
|
||||
}
|
||||
}
|
||||
|
||||
_italicCheckedLast = _vobSubOcrCharacter.IsItalic;
|
||||
@ -4005,7 +4040,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
}
|
||||
else
|
||||
{
|
||||
matches.Add(new CompareMatch("*", false, 0, null, item));
|
||||
string text = _vobSubOcrCharacter.ManualRecognizedCharacters;
|
||||
|
||||
if (text != "")
|
||||
{
|
||||
matches.Add(new CompareMatch(text, _vobSubOcrCharacter.IsItalic, 0, null));
|
||||
}
|
||||
else
|
||||
{
|
||||
matches.Add(new CompareMatch("*", false, 0, null));
|
||||
}
|
||||
}
|
||||
|
||||
_italicCheckedLast = _vobSubOcrCharacter.IsItalic;
|
||||
@ -4369,7 +4413,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
}
|
||||
else
|
||||
{
|
||||
matches.Add(new CompareMatch("*", false, 0, null));
|
||||
var text = _vobSubOcrNOcrCharacter.NOcrChar.Text;
|
||||
|
||||
if (text != "")
|
||||
{
|
||||
matches.Add(new CompareMatch(text, _vobSubOcrNOcrCharacter.IsItalic, 0, null));
|
||||
}
|
||||
else
|
||||
{
|
||||
matches.Add(new CompareMatch("*", false, 0, null));
|
||||
}
|
||||
}
|
||||
|
||||
_italicCheckedLast = _vobSubOcrNOcrCharacter.IsItalic;
|
||||
@ -4404,7 +4457,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
}
|
||||
else
|
||||
{
|
||||
matches.Add(new CompareMatch("*", false, 0, null));
|
||||
var text = _vobSubOcrNOcrCharacter.NOcrChar.Text;
|
||||
|
||||
if (text != "")
|
||||
{
|
||||
matches.Add(new CompareMatch(text, _vobSubOcrNOcrCharacter.IsItalic, 0, null));
|
||||
}
|
||||
else
|
||||
{
|
||||
matches.Add(new CompareMatch("*", false, 0, null));
|
||||
}
|
||||
}
|
||||
|
||||
_italicCheckedLast = _vobSubOcrNOcrCharacter.IsItalic;
|
||||
@ -5185,6 +5247,13 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
{
|
||||
_ocrMinLineHeight = -1;
|
||||
}
|
||||
}
|
||||
else if (_ocrMethodIndex == _ocrMethodCloudVision)
|
||||
{
|
||||
if (_ocrService == null)
|
||||
{
|
||||
_ocrService = new GoogleOCRService(new GoogleCloudVisionAPI(textBoxCloudVisionAPIKey.Text));
|
||||
}
|
||||
}
|
||||
|
||||
progressBar1.Maximum = max;
|
||||
@ -5564,6 +5633,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
{
|
||||
text = CallModi(i);
|
||||
}
|
||||
else if (_ocrMethodIndex == _ocrMethodCloudVision)
|
||||
{
|
||||
text = OcrViaCloudVision(bmp, i);
|
||||
}
|
||||
|
||||
_lastLine = text;
|
||||
|
||||
@ -5603,7 +5676,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
|
||||
if (_abort)
|
||||
{
|
||||
textBoxCurrentText.Text = text;
|
||||
if (textBoxCurrentText.Text == "")
|
||||
{
|
||||
textBoxCurrentText.Text = text;
|
||||
}
|
||||
_mainOcrRunning = false;
|
||||
SetButtonsEnabledAfterOcrDone();
|
||||
return true;
|
||||
@ -6763,6 +6839,19 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
}
|
||||
}
|
||||
|
||||
private string OcrViaCloudVision(Bitmap bitmap, int listViewIndex)
|
||||
{
|
||||
var language = comboBoxCloudVisionLanguageHint.Text;
|
||||
var cloudVisionResult = _ocrService.PerformOCR(language, new List<Bitmap>() { bitmap });
|
||||
|
||||
if (cloudVisionResult.Count > 0)
|
||||
{
|
||||
return cloudVisionResult[0];
|
||||
}
|
||||
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
private void InitializeNOcrForBatch(string db)
|
||||
{
|
||||
_ocrMethodIndex = _ocrMethodNocr;
|
||||
@ -7522,6 +7611,11 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
ShowOcrMethodGroupBox(groupBoxModiMethod);
|
||||
Configuration.Settings.VobSubOcr.LastOcrMethod = "MODI";
|
||||
}
|
||||
else if (_ocrMethodIndex == _ocrMethodCloudVision)
|
||||
{
|
||||
ShowOcrMethodGroupBox(groupBoxCloudVision);
|
||||
Configuration.Settings.VobSubOcr.LastOcrMethod = "CloudVision";
|
||||
}
|
||||
|
||||
_ocrFixEngine = null;
|
||||
SubtitleListView1SelectedIndexChanged(null, null);
|
||||
@ -7543,6 +7637,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
groupBoxImageCompareMethod.Visible = false;
|
||||
groupBoxModiMethod.Visible = false;
|
||||
groupBoxNOCR.Visible = false;
|
||||
groupBoxCloudVision.Visible = false;
|
||||
|
||||
groupBox.Visible = true;
|
||||
groupBox.BringToFront();
|
||||
@ -7989,6 +8084,17 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
pictureBoxEmphasis2.BackColor = Color.White;
|
||||
}
|
||||
|
||||
checkBoxCloudVisionSendOriginalImages.Visible = false;
|
||||
if (bdnSubtitle.Paragraphs.Count > 0)
|
||||
{
|
||||
var firstImageFileName = bdnSubtitle.Paragraphs[0].Text;
|
||||
var originalImageFileName = GetVSFOriginalImageFileName(firstImageFileName);
|
||||
if (firstImageFileName != originalImageFileName && File.Exists(originalImageFileName))
|
||||
{
|
||||
checkBoxCloudVisionSendOriginalImages.Visible = true;
|
||||
}
|
||||
}
|
||||
|
||||
SetButtonsStartOcr();
|
||||
progressBar1.Visible = false;
|
||||
progressBar1.Maximum = 100;
|
||||
@ -8008,7 +8114,6 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
|
||||
autoTransparentBackgroundToolStripMenuItem.Checked = true;
|
||||
autoTransparentBackgroundToolStripMenuItem.Visible = true;
|
||||
|
||||
}
|
||||
|
||||
private void SetOcrMethod()
|
||||
@ -8017,6 +8122,10 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
{
|
||||
comboBoxOcrMethod.SelectedIndex = _ocrMethodBinaryImageCompare;
|
||||
}
|
||||
else if (Configuration.Settings.VobSubOcr.LastOcrMethod == "CloudVision" && comboBoxOcrMethod.Items.Count > _ocrMethodCloudVision)
|
||||
{
|
||||
comboBoxOcrMethod.SelectedIndex = _ocrMethodCloudVision;
|
||||
}
|
||||
else if (Configuration.Settings.VobSubOcr.LastOcrMethod == "MODI" && comboBoxOcrMethod.Items.Count > _ocrMethodModi)
|
||||
{
|
||||
comboBoxOcrMethod.SelectedIndex = _ocrMethodModi;
|
||||
@ -8490,6 +8599,8 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
Configuration.Settings.VobSubOcr.LineOcrMaxErrorPixels = (int)numericUpDownNOcrMaxWrongPixels.Value;
|
||||
Configuration.Settings.VobSubOcr.UseTesseractFallback = checkBoxTesseractFallback.Checked;
|
||||
Configuration.Settings.VobSubOcr.CaptureTopAlign = toolStripMenuItemCaptureTopAlign.Checked;
|
||||
Configuration.Settings.VobSubOcr.CloudVisionAPIKey = textBoxCloudVisionAPIKey.Text;
|
||||
Configuration.Settings.VobSubOcr.CloudVisionLanguage = comboBoxCloudVisionLanguageHint.Text;
|
||||
|
||||
if (_ocrMethodIndex == _ocrMethodBinaryImageCompare)
|
||||
{
|
||||
@ -10052,5 +10163,16 @@ namespace Nikse.SubtitleEdit.Forms.Ocr
|
||||
_mainOcrSelectedIndices = subtitleListView1.GetSelectedIndices().ToList();
|
||||
ButtonStartOcrClick(null, null);
|
||||
}
|
||||
|
||||
private string GetVSFOriginalImageFileName(string fileName)
|
||||
{
|
||||
return fileName.Replace("\\RGBResults", "\\RGBImages").Replace("\\TXTImages", "\\RGBImages").Replace(".jpeg.png", ".jpeg").Replace(".png", ".jpeg");
|
||||
}
|
||||
|
||||
private void checkBoxCloudVisionSendOriginalImages_CheckedChanged(object sender, EventArgs e)
|
||||
{
|
||||
// Toggle subtitle image refresh
|
||||
SubtitleListView1SelectedIndexChanged(sender, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3311,6 +3311,7 @@ Keep changes?",
|
||||
OcrViaImageCompare = "Binary image compare",
|
||||
OcrViaModi = "Microsoft Office Document Imaging (MODI). Requires Microsoft Office",
|
||||
OcrViaNOCR = "OCR via nOCR",
|
||||
OcrViaCloudVision = "OCR via Cloud Vision API",
|
||||
TesseractEngineMode = "Engine mode",
|
||||
TesseractEngineModeLegacy = "Original Tesseract only (can detect italic)",
|
||||
TesseractEngineModeNeural = "Neural nets LSTM only",
|
||||
@ -3380,6 +3381,9 @@ Keep changes?",
|
||||
StartTraining = "Start training",
|
||||
NowTraining = "Now training font '{1}'. Total chars trained: {0:#,###,##0}, {2:#,###,##0} known",
|
||||
ImagesWithTimeCodesInFileName = "Images with time codes in file name...",
|
||||
APIKey = "API key",
|
||||
LanguageHint = "Language hint",
|
||||
SendOriginalImages = "Send original images",
|
||||
};
|
||||
|
||||
VobSubOcrCharacter = new LanguageStructure.VobSubOcrCharacter
|
||||
|
@ -3158,6 +3158,7 @@
|
||||
public string OcrViaImageCompare { get; set; }
|
||||
public string OcrViaModi { get; set; }
|
||||
public string OcrViaNOCR { get; set; }
|
||||
public string OcrViaCloudVision { get; set; }
|
||||
public string TesseractEngineMode { get; set; }
|
||||
public string TesseractEngineModeLegacy { get; set; }
|
||||
public string TesseractEngineModeNeural { get; set; }
|
||||
@ -3227,6 +3228,9 @@
|
||||
public string StartTraining { get; set; }
|
||||
public string NowTraining { get; set; }
|
||||
public string ImagesWithTimeCodesInFileName { get; set; }
|
||||
public string APIKey { get; set; }
|
||||
public string LanguageHint { get; set; }
|
||||
public string SendOriginalImages { get; set; }
|
||||
}
|
||||
|
||||
public class VobSubOcrCharacter
|
||||
|
Loading…
Reference in New Issue
Block a user