Optimize reading of names lists

About 45% faster (now uses "XmlReader", results might depend on disk speed too)
This commit is contained in:
Nikolaj Olsson 2017-04-16 17:37:50 +02:00
parent 5f14741133
commit 7f4439f013

View File

@ -22,37 +22,29 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
_namesMultiList = new HashSet<string>(); _namesMultiList = new HashSet<string>();
_blackList = new HashSet<string>(); _blackList = new HashSet<string>();
// Should be called 1st in order to init blacklist.
LoadNamesList(GetLocalNamesFileName()); // e.g: en_names.xml (culture insensitive) LoadNamesList(GetLocalNamesFileName()); // e.g: en_names.xml (culture insensitive)
if (useOnlineNamesEtc && !string.IsNullOrEmpty(namesEtcUrl)) if (useOnlineNamesEtc && !string.IsNullOrEmpty(namesEtcUrl))
{ {
try try
{ {
// load name from https://raw.githubusercontent.com/SubtitleEdit/subtitleedit/master/Dictionaries/names.xml LoadNamesList(Configuration.Settings.WordLists.NamesUrl);
var xml = Utilities.DownloadString(Configuration.Settings.WordLists.NamesUrl);
var nameListXml = new XmlDocument();
nameListXml.LoadXml(xml);
// name present in blacklist won't be loaded!
LoadNamesList(nameListXml);
} }
#if DEBUG
catch (Exception exception) catch (Exception exception)
{ {
System.Diagnostics.Debug.WriteLine(exception.Message); System.Diagnostics.Debug.WriteLine(exception.Message);
} }
#else
catch
{
// ignore
}
#endif
} }
else else
{ {
// names present in blacklist won't be loaded!
LoadNamesList(Path.Combine(_dictionaryFolder, "names.xml")); LoadNamesList(Path.Combine(_dictionaryFolder, "names.xml"));
} }
foreach (var name in _blackList)
{
if (_namesList.Contains(name))
_namesList.Remove(name);
if (_namesMultiList.Contains(name))
_namesMultiList.Remove(name);
}
} }
public List<string> GetAllNames() public List<string> GetAllNames()
@ -73,32 +65,9 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
return _namesMultiList; return _namesMultiList;
} }
private void InitBlackList(XmlDocument namesXml)
{
if (namesXml == null || namesXml.DocumentElement == null)
{
return;
}
XmlNode xnode = namesXml.DocumentElement.SelectSingleNode("blacklist");
// No blacklist element present.
if (xnode == null)
{
return;
}
foreach (XmlNode node in xnode.SelectNodes("name"))
{
string name = node.InnerText.Trim();
if (name.Length > 0)
{
_blackList.Add(name);
}
}
}
/// <summary> /// <summary>
/// Returns two letters ISO language name (Neutral culture). /// Returns two letters ISO language name (Neutral culture).
/// </summary> /// </summary>
/// <returns></returns>
private string GetLocalNamesFileName() private string GetLocalNamesFileName()
{ {
// Converts e.g en_US => en (Neutral culture). // Converts e.g en_US => en (Neutral culture).
@ -112,41 +81,57 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
private void LoadNamesList(string fileName) private void LoadNamesList(string fileName)
{ {
if (!File.Exists(fileName)) if (string.IsNullOrEmpty(fileName) ||
(!File.Exists(fileName) &&
!fileName.StartsWith("http", StringComparison.InvariantCultureIgnoreCase) &&
!fileName.StartsWith("\\", StringComparison.InvariantCultureIgnoreCase)))
{ {
return; return;
} }
var nameListXml = new XmlDocument();
nameListXml.Load(fileName);
if (nameListXml.DocumentElement == null)
{
return;
}
LoadNamesList(nameListXml);
}
private void LoadNamesList(XmlDocument nameListXml) using (XmlReader reader = XmlReader.Create(fileName))
{
// Initialize blacklist.
InitBlackList(nameListXml);
foreach (XmlNode node in nameListXml.DocumentElement.SelectNodes("name"))
{ {
string name = node.InnerText.Trim(); reader.MoveToContent();
// skip names in blacklist while (reader.Read())
if (_blackList.Contains(name))
{ {
continue; if (reader.NodeType == XmlNodeType.Element)
} {
if (name.Contains(' ') && !_namesMultiList.Contains(name)) if (reader.Name == "blacklist")
{ {
_namesMultiList.Add(name); while (reader.Read() && reader.NodeType != XmlNodeType.EndElement)
} {
else if (!_namesList.Contains(name)) if (reader.Name == "name")
{ {
_namesList.Add(name); string name = reader.ReadElementContentAsString().Trim();
if (name.Length > 0 && !_blackList.Contains(name))
{
_blackList.Add(name);
}
}
}
}
else if (reader.Name == "name")
{
string name = reader.ReadElementContentAsString().Trim();
if (name.Length > 0)
{
if (name.Contains(' '))
{
if (!_namesMultiList.Contains(name))
{
_namesMultiList.Add(name);
}
}
else if (!_namesList.Contains(name))
{
_namesList.Add(name);
}
}
}
}
} }
} }
} }
public bool Remove(string name) public bool Remove(string name)
@ -195,6 +180,7 @@ namespace Nikse.SubtitleEdit.Core.Dictionaries
} }
catch catch
{ {
System.Diagnostics.Debug.WriteLine("NamesList.RemoveRemove failed");
} }
} }
return false; return false;