From 86fb003192f5b156e819d8295c91a4d47cab16db Mon Sep 17 00:00:00 2001 From: niksedk Date: Fri, 21 Jan 2022 19:44:40 +0100 Subject: [PATCH] Update dictionaries --- Dictionaries/en_US_user.xml | 10 ++++++++++ Dictionaries/en_names.xml | 2 ++ Dictionaries/names.xml | 2 ++ src/ui/Forms/Ocr/WordSplitDictionaryGenerator.cs | 10 ++++++++-- 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/Dictionaries/en_US_user.xml b/Dictionaries/en_US_user.xml index 65d4955f6..2c8d99a50 100644 --- a/Dictionaries/en_US_user.xml +++ b/Dictionaries/en_US_user.xml @@ -198,11 +198,13 @@ financials flavour flavours + floricultural fluorine flushin' flyer forevermore francium + frenemy fuckable fundraiser gadolinium @@ -229,6 +231,7 @@ hashtag hassium hatchling + hawala heh helium heloooooo @@ -356,6 +359,8 @@ parents' percutaneously perfusionist + pescatarian + pescatarians phosphorus photoshopped piccaninny @@ -436,6 +441,7 @@ splenectomy spork spotteth + squirrelly stenosis stent stenting @@ -450,6 +456,8 @@ sulfur sulphur sunglass + supervillain + supervillains supervolcano synchronicity syncopal @@ -512,8 +520,10 @@ voicemail voila walkthrough + weaponized weirding what'd + what'll what're when'd where'd diff --git a/Dictionaries/en_names.xml b/Dictionaries/en_names.xml index c942d0f06..ac4e17e0e 100644 --- a/Dictionaries/en_names.xml +++ b/Dictionaries/en_names.xml @@ -1531,6 +1531,7 @@ Peruvians Philippines Pinella + Pippi Longstocking Pinkman Pitcairn Islands Poindexter @@ -1585,6 +1586,7 @@ Rayne Rebekah Reece + Reddington Regan Reid Remington diff --git a/Dictionaries/names.xml b/Dictionaries/names.xml index a5ec3a446..2c7cca656 100644 --- a/Dictionaries/names.xml +++ b/Dictionaries/names.xml @@ -2561,6 +2561,7 @@ This file is case sensitive. Idris Elba Igor III + Ikaris Ikea Ilana Ilithyia @@ -3918,6 +3919,7 @@ This file is case sensitive. Munich Muray Murdoch + Murdock Muriel Murmillo Murphy diff --git a/src/ui/Forms/Ocr/WordSplitDictionaryGenerator.cs b/src/ui/Forms/Ocr/WordSplitDictionaryGenerator.cs index bf3fc9878..a2626d423 100644 --- a/src/ui/Forms/Ocr/WordSplitDictionaryGenerator.cs +++ b/src/ui/Forms/Ocr/WordSplitDictionaryGenerator.cs @@ -14,7 +14,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { public partial class WordSplitDictionaryGenerator : Form { - private List _subtitleList; + private readonly List _subtitleList; private Hunspell _hunspell; public WordSplitDictionaryGenerator() @@ -103,7 +103,7 @@ namespace Nikse.SubtitleEdit.Forms.Ocr if (!FileUtil.IsBluRaySup(fileName) && !FileUtil.IsVobSub(fileName) && !((ext == ".mkv" || ext == ".mks") && FileUtil.IsMatroskaFile(fileName))) { - SubtitleFormat format = sub.LoadSubtitle(fileName, out _, null); + var format = sub.LoadSubtitle(fileName, out _, null); if (format == null) { @@ -151,6 +151,12 @@ namespace Nikse.SubtitleEdit.Forms.Ocr { foreach (var p in subtitle.Paragraphs) { + if (p.Text.Contains("Synced and corrected by", StringComparison.OrdinalIgnoreCase) || + p.Text.Contains("www.")) + { + continue; + } + var words = SpellCheckWordLists.Split(HtmlUtil.RemoveHtmlTags(p.Text, true)); foreach (var word in words) {