Work on interjections skip list

This commit is contained in:
niksedk 2023-11-04 15:35:10 +01:00
parent e9f877da6b
commit e6e08bb753
6 changed files with 62 additions and 15 deletions

View File

@ -77,11 +77,12 @@ namespace Test.Logic.Forms
private static InterjectionRemoveContext GetRemoveInterjectionContext(string text, bool onlyInSeparatedLine)
{
SetInterjections();
var interjections = RemoveTextForHI.GetInterjectionList(_interjectionsLanguageCode, out var skipList);
return new InterjectionRemoveContext
{
OnlySeparatedLines = onlyInSeparatedLine,
Interjections = RemoveTextForHI.GetInterjectionList(_interjectionsLanguageCode),
Interjections = interjections,
InterjectionsSkipIfStartsWith = skipList,
Text = text,
};
}

View File

@ -0,0 +1,10 @@
using System.Collections.Generic;
namespace Nikse.SubtitleEdit.Core.Common
{
public class InterjectionsLists
{
public List<string> Interjections { get; set; }
public List<string> SkipIfStartsWith { get; set; }
}
}

View File

@ -11,13 +11,13 @@ namespace Nikse.SubtitleEdit.Core.Common
private const string UserFileName = "_interjections_user.xml";
private const string SeFileName = "_interjections_se.xml";
public static List<string> LoadInterjections(string twoLetterIsoLanguageName)
public static InterjectionsLists LoadInterjections(string twoLetterIsoLanguageName)
{
var seFileName = twoLetterIsoLanguageName + SeFileName;
var userFileName = twoLetterIsoLanguageName + UserFileName;
var interjections = new List<string>();
var se = LoadInterjections(seFileName, out _);
var user = LoadInterjections(userFileName, out var ignoreList);
var se = LoadInterjections(seFileName, out _, out var skipIfStartsWithList);
var user = LoadInterjections(userFileName, out var ignoreList, out var skipIfStartsWithList2);
foreach (var w in se)
{
@ -35,14 +35,19 @@ namespace Nikse.SubtitleEdit.Core.Common
}
}
return interjections.OrderBy(p => p).ToList();
skipIfStartsWithList.AddRange(skipIfStartsWithList2);
return new InterjectionsLists
{
Interjections = interjections.OrderBy(p => p).ToList(),
SkipIfStartsWith = skipIfStartsWithList.OrderByDescending(p=>p.Length).ToList(),
};
}
public static void SaveInterjections(string twoLetterIsoLanguageName, List<string> interjections)
{
var seFileName = twoLetterIsoLanguageName + SeFileName;
var userFileName = twoLetterIsoLanguageName + UserFileName;
var se = LoadInterjections(seFileName, out _);
var se = LoadInterjections(seFileName, out _, out _);
var ignoreList = new List<string>();
foreach (var w in se)
@ -77,9 +82,10 @@ namespace Nikse.SubtitleEdit.Core.Common
xmlDocument.Save(fullFileName);
}
private static List<string> LoadInterjections(string fileName, out List<string> ignoreList)
private static List<string> LoadInterjections(string fileName, out List<string> ignoreList, out List<string> skipIfStartsWithList)
{
ignoreList = new List<string>();
skipIfStartsWithList = new List<string>();
var interjections = new List<string>();
var fullFileName = Path.Combine(Configuration.DictionariesDirectory, fileName);
if (File.Exists(fullFileName))
@ -104,6 +110,14 @@ namespace Nikse.SubtitleEdit.Core.Common
}
}
foreach (XmlNode node in xmlDocument.DocumentElement.SelectNodes("skipIfStartsWith/text"))
{
var w = node.InnerText.Trim();
if (!string.IsNullOrEmpty(w) && !skipIfStartsWithList.Contains(w))
{
skipIfStartsWithList.Add(w);
}
}
}
return interjections;

View File

@ -17,6 +17,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
/// The check list that will be used to check interjections.
/// </summary>
public IList<string> Interjections { get; set; }
public IList<string> InterjectionsSkipIfStartsWith { get; set; }
/// <summary>
/// Text from which the interjections will be removed from.
@ -26,8 +27,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
public class RemoveInterjection
{
// https://github.com/SubtitleEdit/subtitleedit/issues/1421
private IList<string> _ignoreList = new List<string>();
// https://github.com/SubtitleEdit/subtitleedit/issues/1421 + https://github.com/SubtitleEdit/subtitleedit/issues/7563
public string Invoke(InterjectionRemoveContext context)
{
@ -51,6 +51,22 @@ namespace Nikse.SubtitleEdit.Core.Forms
if (match.Success)
{
var index = match.Index;
var fromIndexPart = text.Substring(match.Index);
var doSkip = false;
foreach (var skipIfStartsWith in context.InterjectionsSkipIfStartsWith)
{
if (fromIndexPart.StartsWith(skipIfStartsWith, StringComparison.OrdinalIgnoreCase))
{
doSkip = true;
break;
}
}
if (doSkip)
{
break;
}
var temp = text.Remove(index, s.Length);
if (index == 0 && temp.StartsWith("... ", StringComparison.Ordinal))

View File

@ -19,6 +19,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
private readonly InterjectionRemoveContext _interjectionRemoveContext;
private readonly RemoveInterjection _removeInterjection;
private IList<string> _interjections;
private IList<string> _interjectionsSkipIfStartsWith;
public RemoveTextForHI(RemoveTextForHISettings removeTextForHISettings)
{
@ -1030,6 +1031,7 @@ namespace Nikse.SubtitleEdit.Core.Forms
_interjectionRemoveContext.Text = text;
_interjectionRemoveContext.OnlySeparatedLines = Settings.RemoveInterjectionsOnlySeparateLine;
_interjectionRemoveContext.Interjections = _interjections;
_interjectionRemoveContext.InterjectionsSkipIfStartsWith = _interjectionsSkipIfStartsWith;
text = _removeInterjection.Invoke(_interjectionRemoveContext);
}
@ -1603,10 +1605,14 @@ namespace Nikse.SubtitleEdit.Core.Forms
return words;
}
public static IList<string> GetInterjectionList(string twoLetterIsoLanguageName)
public static IList<string> GetInterjectionList(string twoLetterIsoLanguageName, out List<string> skipIfStartsWith)
{
var interjections = InterjectionsRepository.LoadInterjections(twoLetterIsoLanguageName);
skipIfStartsWith = interjections.SkipIfStartsWith;
var interjectionList = new HashSet<string>();
foreach (var s in InterjectionsRepository.LoadInterjections(twoLetterIsoLanguageName))
foreach (var s in interjections.Interjections)
{
if (s.Length <= 0)
{
@ -1626,7 +1632,8 @@ namespace Nikse.SubtitleEdit.Core.Forms
public void ReloadInterjection(string twoLetterIsoLanguageName)
{
_interjections = GetInterjectionList(twoLetterIsoLanguageName);
_interjections = GetInterjectionList(twoLetterIsoLanguageName, out var skipList);
_interjectionsSkipIfStartsWith = skipList;
}
}
}

View File

@ -8,7 +8,6 @@ using System.Drawing;
using System.Globalization;
using System.Linq;
using System.Windows.Forms;
using System.Xml;
namespace Nikse.SubtitleEdit.Forms
{
@ -314,7 +313,7 @@ namespace Nikse.SubtitleEdit.Forms
lang = l.Code.TwoLetterISOLanguageName;
}
using (var editInterjections = new InterjectionsEditList(InterjectionsRepository.LoadInterjections(lang)))
using (var editInterjections = new InterjectionsEditList(InterjectionsRepository.LoadInterjections(lang).Interjections))
{
if (editInterjections.ShowDialog(this) == DialogResult.OK)
{