Merge pull request #6463 from Flitskikker/feature/improve-ocr-linebreaks-empty-images

Fix exception in JsonToStringList for empty images
This commit is contained in:
Nikolaj Olsson 2022-11-25 15:49:28 -05:00 committed by GitHub
commit 6adb81221d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -232,82 +232,85 @@ namespace Nikse.SubtitleEdit.Core.VobSub.Ocr.Service
if (Configuration.Settings.Tools.OcrGoogleCloudVisionSeHandlesTextMerge)
{
var annotations = GetAnnotations(content).ToList();
var lines = new List<List<Annotation>>();
Annotation last = null;
var lineThreshold = Math.Max(9, annotations.Average(p => p.Height) / 4.0);
var lineIndex = 0;
// Split to lines
foreach (var a in annotations.OrderBy(p => p.GetMediumY()))
if (annotations.Count > 0)
{
if (last != null)
{
var diff = Math.Abs(last.GetMediumY() - a.GetMediumY());
if (diff > lineThreshold && last.Vertices.Max(p => p.Y) <= a.Vertices.Min(p => p.Y))
{
lineIndex++;
lines.Add(new List<Annotation>());
lines[lineIndex].Add(a);
}
else
{
lines[lineIndex].Add(a);
}
}
else
{
lines.Add(new List<Annotation>());
lines[lineIndex].Add(a);
}
var lines = new List<List<Annotation>>();
Annotation last = null;
var lineThreshold = Math.Max(9, annotations.Average(p => p.Height) / 4.0);
var lineIndex = 0;
last = a;
}
// Merge lines ordered by X
var sb = new StringBuilder();
var spaceThreshold = Math.Max(12, annotations.Average(p => p.Width) / 2.7);
foreach (var line in lines)
{
var sbLine = new StringBuilder();
last = null;
foreach (var l in line.OrderBy(p => p.Vertices.Min(p2 => p2.X)))
// Split to lines
foreach (var a in annotations.OrderBy(p => p.GetMediumY()))
{
if (last != null)
{
var diff = l.Vertices.Min(p => p.X) - last.Vertices.Max(p => p.X);
if (diff > spaceThreshold || last.DetectedBreak == "SPACE" || last.DetectedBreak == "EOL_SURE_SPACE" || last.DetectedBreak == "LINE_BREAK")
var diff = Math.Abs(last.GetMediumY() - a.GetMediumY());
if (diff > lineThreshold && last.Vertices.Max(p => p.Y) <= a.Vertices.Min(p => p.Y))
{
sbLine.Append(" ");
lineIndex++;
lines.Add(new List<Annotation>());
lines[lineIndex].Add(a);
}
else
{
lines[lineIndex].Add(a);
}
sbLine.Append(l.Text);
}
else
{
sbLine.Append(l.Text);
lines.Add(new List<Annotation>());
lines[lineIndex].Add(a);
}
last = l;
last = a;
}
if (language == "fr")
// Merge lines ordered by X
var sb = new StringBuilder();
var spaceThreshold = Math.Max(12, annotations.Average(p => p.Width) / 2.7);
foreach (var line in lines)
{
sb.AppendLine(sbLine.ToString().Trim());
}
else
{
sb.AppendLine(sbLine.ToString().Trim()
.Replace(" .", ".")
.Replace(" ,", ",")
.Replace(" ?", "?")
.Replace(" !", "!"));
}
}
var sbLine = new StringBuilder();
last = null;
foreach (var l in line.OrderBy(p => p.Vertices.Min(p2 => p2.X)))
{
if (last != null)
{
var diff = l.Vertices.Min(p => p.X) - last.Vertices.Max(p => p.X);
if (diff > spaceThreshold || last.DetectedBreak == "SPACE" || last.DetectedBreak == "EOL_SURE_SPACE" || last.DetectedBreak == "LINE_BREAK")
{
sbLine.Append(" ");
}
var ocrResult = sb.ToString().Trim();
if (ocrResult.Length > 0)
{
return new List<string> { ocrResult };
sbLine.Append(l.Text);
}
else
{
sbLine.Append(l.Text);
}
last = l;
}
if (language == "fr")
{
sb.AppendLine(sbLine.ToString().Trim());
}
else
{
sb.AppendLine(sbLine.ToString().Trim()
.Replace(" .", ".")
.Replace(" ,", ",")
.Replace(" ?", "?")
.Replace(" !", "!"));
}
}
var ocrResult = sb.ToString().Trim();
if (ocrResult.Length > 0)
{
return new List<string> { ocrResult };
}
}
}