mirror of
https://github.com/SubtitleEdit/subtitleedit.git
synced 2024-11-25 20:52:44 +01:00
More work related to word-split-list
This commit is contained in:
parent
4f4f22120b
commit
5ca26ec918
@ -88,6 +88,7 @@
|
|||||||
<word>colonoscopy</word>
|
<word>colonoscopy</word>
|
||||||
<word>colours</word>
|
<word>colours</word>
|
||||||
<word>contractualism</word>
|
<word>contractualism</word>
|
||||||
|
<word>copain</word>
|
||||||
<word>copernicium</word>
|
<word>copernicium</word>
|
||||||
<word>copper</word>
|
<word>copper</word>
|
||||||
<word>copperish</word>
|
<word>copperish</word>
|
||||||
@ -96,6 +97,9 @@
|
|||||||
<word>cottonoid</word>
|
<word>cottonoid</word>
|
||||||
<word>could've</word>
|
<word>could've</word>
|
||||||
<word>craniotomy</word>
|
<word>craniotomy</word>
|
||||||
|
<word>crossbone</word>
|
||||||
|
<word>crosshair</word>
|
||||||
|
<word>crosshairs</word>
|
||||||
<word>cryo</word>
|
<word>cryo</word>
|
||||||
<word>cryotube</word>
|
<word>cryotube</word>
|
||||||
<word>cudgelled</word>
|
<word>cudgelled</word>
|
||||||
@ -113,6 +117,7 @@
|
|||||||
<word>douchey</word>
|
<word>douchey</word>
|
||||||
<word>dreamt</word>
|
<word>dreamt</word>
|
||||||
<word>dubnium</word>
|
<word>dubnium</word>
|
||||||
|
<word>dumbass</word>
|
||||||
<word>dysesthesia</word>
|
<word>dysesthesia</word>
|
||||||
<word>dysprosium</word>
|
<word>dysprosium</word>
|
||||||
<word>einsteinium</word>
|
<word>einsteinium</word>
|
||||||
@ -130,8 +135,10 @@
|
|||||||
<word>everything's</word>
|
<word>everything's</word>
|
||||||
<word>extradural</word>
|
<word>extradural</word>
|
||||||
<word>faggot</word>
|
<word>faggot</word>
|
||||||
|
<word>failsafe</word>
|
||||||
<word>falafel</word>
|
<word>falafel</word>
|
||||||
<word>fallin'</word>
|
<word>fallin'</word>
|
||||||
|
<word>fanboy</word>
|
||||||
<word>favour</word>
|
<word>favour</word>
|
||||||
<word>favoured</word>
|
<word>favoured</word>
|
||||||
<word>favourite</word>
|
<word>favourite</word>
|
||||||
@ -145,6 +152,7 @@
|
|||||||
<word>fluorine</word>
|
<word>fluorine</word>
|
||||||
<word>flushin'</word>
|
<word>flushin'</word>
|
||||||
<word>flyer</word>
|
<word>flyer</word>
|
||||||
|
<word>forevermore</word>
|
||||||
<word>francium</word>
|
<word>francium</word>
|
||||||
<word>fuckable</word>
|
<word>fuckable</word>
|
||||||
<word>fundraiser</word>
|
<word>fundraiser</word>
|
||||||
@ -178,6 +186,7 @@
|
|||||||
<word>hematoma</word>
|
<word>hematoma</word>
|
||||||
<word>hiei</word>
|
<word>hiei</word>
|
||||||
<word>hijab</word>
|
<word>hijab</word>
|
||||||
|
<word>hitman</word>
|
||||||
<word>hm</word>
|
<word>hm</word>
|
||||||
<word>holmium</word>
|
<word>holmium</word>
|
||||||
<word>hottie</word>
|
<word>hottie</word>
|
||||||
@ -190,6 +199,7 @@
|
|||||||
<word>hypothermic</word>
|
<word>hypothermic</word>
|
||||||
<word>immersive</word>
|
<word>immersive</word>
|
||||||
<word>immunotherapy</word>
|
<word>immunotherapy</word>
|
||||||
|
<word>inbox</word>
|
||||||
<word>incontinentia</word>
|
<word>incontinentia</word>
|
||||||
<word>indium</word>
|
<word>indium</word>
|
||||||
<word>intercostal</word>
|
<word>intercostal</word>
|
||||||
@ -219,6 +229,7 @@
|
|||||||
<word>laryngopharyngeal</word>
|
<word>laryngopharyngeal</word>
|
||||||
<word>lawrencium</word>
|
<word>lawrencium</word>
|
||||||
<word>lead</word>
|
<word>lead</word>
|
||||||
|
<word>lightsaber</word>
|
||||||
<word>lithium</word>
|
<word>lithium</word>
|
||||||
<word>lobectomy</word>
|
<word>lobectomy</word>
|
||||||
<word>lockdown</word>
|
<word>lockdown</word>
|
||||||
@ -265,6 +276,8 @@
|
|||||||
<word>neuroscientific</word>
|
<word>neuroscientific</word>
|
||||||
<word>neurotypical</word>
|
<word>neurotypical</word>
|
||||||
<word>neurovascular</word>
|
<word>neurovascular</word>
|
||||||
|
<word>newsfeed</word>
|
||||||
|
<word>newsfeeds</word>
|
||||||
<word>nickel</word>
|
<word>nickel</word>
|
||||||
<word>niobium</word>
|
<word>niobium</word>
|
||||||
<word>nitrogen</word>
|
<word>nitrogen</word>
|
||||||
@ -330,6 +343,8 @@
|
|||||||
<word>rubidium</word>
|
<word>rubidium</word>
|
||||||
<word>ruthenium</word>
|
<word>ruthenium</word>
|
||||||
<word>rutherfordium</word>
|
<word>rutherfordium</word>
|
||||||
|
<word>safehouse</word>
|
||||||
|
<word>safeword</word>
|
||||||
<word>saké</word>
|
<word>saké</word>
|
||||||
<word>saltimbocca</word>
|
<word>saltimbocca</word>
|
||||||
<word>samarium</word>
|
<word>samarium</word>
|
||||||
@ -346,6 +361,8 @@
|
|||||||
<word>señorita</word>
|
<word>señorita</word>
|
||||||
<word>sensei</word>
|
<word>sensei</word>
|
||||||
<word>sharpshoot</word>
|
<word>sharpshoot</word>
|
||||||
|
<word>shitbox</word>
|
||||||
|
<word>shithead</word>
|
||||||
<word>shithead's</word>
|
<word>shithead's</word>
|
||||||
<word>shithole</word>
|
<word>shithole</word>
|
||||||
<word>should've</word>
|
<word>should've</word>
|
||||||
@ -379,6 +396,7 @@
|
|||||||
<word>sudoku</word>
|
<word>sudoku</word>
|
||||||
<word>sulfur</word>
|
<word>sulfur</word>
|
||||||
<word>sulphur</word>
|
<word>sulphur</word>
|
||||||
|
<word>sunglass</word>
|
||||||
<word>supervolcano</word>
|
<word>supervolcano</word>
|
||||||
<word>synchronicity</word>
|
<word>synchronicity</word>
|
||||||
<word>syncopal</word>
|
<word>syncopal</word>
|
||||||
@ -440,6 +458,7 @@
|
|||||||
<word>vegetations</word>
|
<word>vegetations</word>
|
||||||
<word>voicemail</word>
|
<word>voicemail</word>
|
||||||
<word>voila</word>
|
<word>voila</word>
|
||||||
|
<word>walkthrough</word>
|
||||||
<word>weirding</word>
|
<word>weirding</word>
|
||||||
<word>what'd</word>
|
<word>what'd</word>
|
||||||
<word>what're</word>
|
<word>what're</word>
|
||||||
|
@ -135,6 +135,7 @@ namespace Nikse.SubtitleEdit.Core.Common
|
|||||||
public string OcrTrainFonts { get; set; }
|
public string OcrTrainFonts { get; set; }
|
||||||
public string OcrTrainMergedLetters { get; set; }
|
public string OcrTrainMergedLetters { get; set; }
|
||||||
public string OcrTrainSrtFile { get; set; }
|
public string OcrTrainSrtFile { get; set; }
|
||||||
|
public bool OcrUseWordSplitList { get; set; }
|
||||||
public string BDOpenIn { get; set; }
|
public string BDOpenIn { get; set; }
|
||||||
public string Interjections { get; set; }
|
public string Interjections { get; set; }
|
||||||
public string MicrosoftBingApiId { get; set; }
|
public string MicrosoftBingApiId { get; set; }
|
||||||
@ -384,6 +385,7 @@ namespace Nikse.SubtitleEdit.Core.Common
|
|||||||
OcrAddLetterRow2 = "♫;Á;É;Í;Ó;Ö;Ő;Ú;Ü;Ű;Ç;Ñ;Å;¡";
|
OcrAddLetterRow2 = "♫;Á;É;Í;Ó;Ö;Ő;Ú;Ü;Ű;Ç;Ñ;Å;¡";
|
||||||
OcrTrainFonts = "Arial;Calibri;Corbel;Futura Std Book;Futura Bis;Helvetica Neue;Lucida Console;Tahoma;Trebuchet MS;Verdana";
|
OcrTrainFonts = "Arial;Calibri;Corbel;Futura Std Book;Futura Bis;Helvetica Neue;Lucida Console;Tahoma;Trebuchet MS;Verdana";
|
||||||
OcrTrainMergedLetters = "ff ft fi fj fy fl rf rt rv rw ry rt rz ryt tt TV tw yt yw wy wf ryt xy";
|
OcrTrainMergedLetters = "ff ft fi fj fy fl rf rt rv rw ry rt rz ryt tt TV tw yt yw wy wf ryt xy";
|
||||||
|
OcrUseWordSplitList = true;
|
||||||
Interjections = "Ah;Ahem;Ahh;Ahhh;Ahhhh;Eh;Ehh;Ehhh;Hm;Hmm;Hmmm;Huh;Mm;Mmm;Mmmm;Phew;Gah;Oh;Ohh;Ohhh;Ow;Oww;Owww;Ugh;Ughh;Uh;Uhh;Uhhh;Whew";
|
Interjections = "Ah;Ahem;Ahh;Ahhh;Ahhhh;Eh;Ehh;Ehhh;Hm;Hmm;Hmmm;Huh;Mm;Mmm;Mmmm;Phew;Gah;Oh;Ohh;Ohhh;Ow;Oww;Owww;Ugh;Ughh;Uh;Uhh;Uhhh;Whew";
|
||||||
MicrosoftTranslatorTokenEndpoint = "https://api.cognitive.microsoft.com/sts/v1.0/issueToken";
|
MicrosoftTranslatorTokenEndpoint = "https://api.cognitive.microsoft.com/sts/v1.0/issueToken";
|
||||||
GoogleTranslateNoKeyWarningShow = true;
|
GoogleTranslateNoKeyWarningShow = true;
|
||||||
@ -4143,6 +4145,12 @@ $HorzAlign = Center
|
|||||||
settings.Tools.OcrTrainSrtFile = subNode.InnerText;
|
settings.Tools.OcrTrainSrtFile = subNode.InnerText;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
subNode = node.SelectSingleNode("OcrUseWordSplitList");
|
||||||
|
if (subNode != null)
|
||||||
|
{
|
||||||
|
settings.Tools.OcrUseWordSplitList = Convert.ToBoolean(subNode.InnerText, CultureInfo.InvariantCulture);
|
||||||
|
}
|
||||||
|
|
||||||
subNode = node.SelectSingleNode("BDOpenIn");
|
subNode = node.SelectSingleNode("BDOpenIn");
|
||||||
if (subNode != null)
|
if (subNode != null)
|
||||||
{
|
{
|
||||||
@ -9198,6 +9206,7 @@ $HorzAlign = Center
|
|||||||
textWriter.WriteElementString("OcrTrainFonts", settings.Tools.OcrTrainFonts);
|
textWriter.WriteElementString("OcrTrainFonts", settings.Tools.OcrTrainFonts);
|
||||||
textWriter.WriteElementString("OcrTrainMergedLetters", settings.Tools.OcrTrainMergedLetters);
|
textWriter.WriteElementString("OcrTrainMergedLetters", settings.Tools.OcrTrainMergedLetters);
|
||||||
textWriter.WriteElementString("OcrTrainSrtFile", settings.Tools.OcrTrainSrtFile);
|
textWriter.WriteElementString("OcrTrainSrtFile", settings.Tools.OcrTrainSrtFile);
|
||||||
|
textWriter.WriteElementString("OcrUseWordSplitList", settings.Tools.OcrUseWordSplitList.ToString(CultureInfo.InvariantCulture));
|
||||||
textWriter.WriteElementString("BDOpenIn", settings.Tools.BDOpenIn);
|
textWriter.WriteElementString("BDOpenIn", settings.Tools.BDOpenIn);
|
||||||
textWriter.WriteElementString("Interjections", settings.Tools.Interjections);
|
textWriter.WriteElementString("Interjections", settings.Tools.Interjections);
|
||||||
textWriter.WriteElementString("MicrosoftBingApiId", settings.Tools.MicrosoftBingApiId);
|
textWriter.WriteElementString("MicrosoftBingApiId", settings.Tools.MicrosoftBingApiId);
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
|
using Nikse.SubtitleEdit.Core.Common;
|
||||||
|
|
||||||
namespace Nikse.SubtitleEdit.Core.Dictionaries
|
namespace Nikse.SubtitleEdit.Core.Dictionaries
|
||||||
{
|
{
|
||||||
@ -8,6 +9,11 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
|
|||||||
{
|
{
|
||||||
public static string SplitWord(string[] words, string input)
|
public static string SplitWord(string[] words, string input)
|
||||||
{
|
{
|
||||||
|
if (!Configuration.Settings.Tools.OcrUseWordSplitList)
|
||||||
|
{
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
|
||||||
var usedWords = new List<string>();
|
var usedWords = new List<string>();
|
||||||
var result = SplitWord(words, input, string.Empty, usedWords);
|
var result = SplitWord(words, input, string.Empty, usedWords);
|
||||||
if (result != input)
|
if (result != input)
|
||||||
|
Loading…
Reference in New Issue
Block a user