mirror of
https://github.com/Radarr/Radarr.git
synced 2024-11-04 10:02:40 +01:00
Fixed: some Parser problems (Improved editions and German releases)
- Moved the ReportEditionRegex up because we use it in another 2 regexes (so it's not 3 times the same regex). Also added an optional bracket at the beginning. - Added Recut to the edition regex - The Regex for german and french tracker formats (ReportMovieTitleLenientRegexBefore) has been updated to support the same editions as the english versions, but the regex is only used if ParsingLeniency is set to Lenient. Should resolve a lot of cases for german releases where the movietitle wasn't parsed correctly before. - Updated acronym method. Fixed wrong dots for "World War Z.", "World War Z. 2", but still supports "R.I.P.D.", "V.H.S. 2", "G.I. Joe" and "2 Tage in L.A." - Added a lot of tests for this changes (there is a new test for german releases that works without lenient parsing, and another one that only works if lenient parsing is activated)
This commit is contained in:
parent
1ef31e8094
commit
73eba0f95d
@ -1,8 +1,10 @@
|
||||
using System.Linq;
|
||||
using FluentAssertions;
|
||||
using FluentAssertions.Execution;
|
||||
using NUnit.Framework;
|
||||
using NzbDrone.Core.Languages;
|
||||
using NzbDrone.Core.Parser;
|
||||
using NzbDrone.Core.Parser.Model;
|
||||
using NzbDrone.Core.Test.Framework;
|
||||
|
||||
namespace NzbDrone.Core.Test.ParserTests
|
||||
@ -56,12 +58,58 @@ public void should_remove_accents_from_title()
|
||||
[TestCase("Leaving Jeruselem by Railway (1897) [DVD].mp4", "Leaving Jeruselem by Railway")]
|
||||
[TestCase("Climax.2018.1080p.AMZN.WEB-DL.DD5.1.H.264-NTG", "Climax")]
|
||||
[TestCase("Movie.Title.Imax.2018.1080p.AMZN.WEB-DL.DD5.1.H.264-NTG", "Movie Title")]
|
||||
[TestCase("World.War.Z.EXTENDED.2013.German.DL.1080p.BluRay.AVC-XANOR", "World War Z")]
|
||||
[TestCase("World.War.Z.2.EXTENDED.2013.German.DL.1080p.BluRay.AVC-XANOR", "World War Z 2")]
|
||||
[TestCase("G.I.Joe.Retaliation.2013.THEATRiCAL.COMPLETE.BLURAY-GLiMMER", "G.I. Joe Retaliation")]
|
||||
[TestCase("www.Torrenting.org - Revenge.2008.720p.X264-DIMENSION", "Revenge")]
|
||||
public void should_parse_movie_title(string postTitle, string title)
|
||||
{
|
||||
Parser.Parser.ParseMovieTitle(postTitle, true).MovieTitle.Should().Be(title);
|
||||
}
|
||||
|
||||
[TestCase("Avatar.Aufbruch.nach.Pandora.Extended.2009.German.DTS.720p.BluRay.x264-SoW", "Avatar Aufbruch nach Pandora", "Extended", 2009)]
|
||||
[TestCase("Drop.Zone.1994.German.AC3D.DL.720p.BluRay.x264-KLASSiGERHD", "Drop Zone", "", 1994)]
|
||||
[TestCase("Kick.Ass.2.2013.German.DTS.DL.720p.BluRay.x264-Pate", "Kick Ass 2", "", 2013)]
|
||||
[TestCase("Paradise.Hills.2019.German.DL.AC3.Dubbed.1080p.BluRay.x264-muhHD", "Paradise Hills", "", 2019)]
|
||||
[TestCase("96.Hours.Taken.3.EXTENDED.2014.German.DL.1080p.BluRay.x264-ENCOUNTERS", "96 Hours Taken 3", "EXTENDED", 2014)]
|
||||
[TestCase("World.War.Z.EXTENDED.CUT.2013.German.DL.1080p.BluRay.x264-HQX", "World War Z", "EXTENDED CUT", 2013)]
|
||||
[TestCase("Sin.City.2005.RECUT.EXTENDED.German.DL.1080p.BluRay.x264-DETAiLS", "Sin City", "RECUT EXTENDED", 2005)]
|
||||
[TestCase("Die.Klasse.von.1999.1990.German.720p.HDTV.x264-NORETAiL", "Die Klasse von 1999", "", 1990)] //year in the title
|
||||
[TestCase("2.Tage.in.L.A.1996.GERMAN.DL.720p.WEB.H264-SOV", "2 Tage in L.A.", "", 1996)]
|
||||
[TestCase("8.2019.GERMAN.720p.BluRay.x264-UNiVERSUM", "8", "", 2019)]
|
||||
[TestCase("Life.Partners.2014.German.DL.PAL.DVDR-ETM", "Life Partners", "", 2014)]
|
||||
[TestCase("Joe.Dreck.2.EXTENDED.EDITION.2015.German.DL.PAL.DVDR-ETM", "Joe Dreck 2", "EXTENDED EDITION", 2015)]
|
||||
[TestCase("Rango.EXTENDED.2011.HDRip.AC3.German.XviD-POE", "Rango", "EXTENDED", 2011)]
|
||||
[TestCase("Suicide.Squad.2016.EXTENDED.German.DL.AC3.BDRip.x264-hqc", "Suicide Squad", "EXTENDED", 2016)] //edition after year
|
||||
public void should_parse_german_movie(string postTitle, string title, string edition, int year)
|
||||
{
|
||||
ParsedMovieInfo movie = Parser.Parser.ParseMovieTitle(postTitle, false);
|
||||
using (new AssertionScope())
|
||||
{
|
||||
movie.MovieTitle.Should().Be(title);
|
||||
movie.Edition.Should().Be(edition);
|
||||
movie.Year.Should().Be(year);
|
||||
}
|
||||
}
|
||||
|
||||
[TestCase("Der.Hobbit.Eine.Unerwartete.Reise.Extended.German.720p.BluRay.x264-EXQUiSiTE", "Der Hobbit Eine Unerwartete Reise", "Extended", 0)] //no year
|
||||
[TestCase("Die.Unfassbaren.Now.You.See.Me.EXTENDED.German.DTS.720p.BluRay.x264-RHD", "Die Unfassbaren Now You See Me", "EXTENDED", 0)] //no year
|
||||
[TestCase("Der.Soldat.James.German.Bluray.FuckYou.Pso.Why.cant.you.follow.scene.rules.1998", "Der Soldat James", "", 1998)]
|
||||
[TestCase("Passengers.German.DL.AC3.Dubbed..BluRay.x264-PsO", "Passengers", "", 0)] //no year
|
||||
[TestCase("Das.A.Team.Der.Film.Extended.Cut.German.720p.BluRay.x264-ANCIENT", "Das A Team Der Film", "Extended Cut", 0)] //no year
|
||||
[TestCase("Cars.2.German.DL.720p.BluRay.x264-EmpireHD", "Cars 2", "", 0)] //no year
|
||||
[TestCase("Der.Film.deines.Lebens.German.2011.PAL.DVDR-ETM", "Der Film deines Lebens", "", 2011)] //year at wrong position
|
||||
public void should_parse_german_movie_lenient(string postTitle, string title, string edition, int year)
|
||||
{
|
||||
ParsedMovieInfo movie = Parser.Parser.ParseMovieTitle(postTitle, true);
|
||||
using (new AssertionScope())
|
||||
{
|
||||
movie.MovieTitle.Should().Be(title);
|
||||
movie.Edition.Should().Be(edition);
|
||||
movie.Year.Should().Be(year);
|
||||
}
|
||||
}
|
||||
|
||||
[TestCase("(1995) Ghost in the Shell", "Ghost in the Shell")]
|
||||
public void should_parse_movie_folder_name(string postTitle, string title)
|
||||
{
|
||||
|
@ -16,10 +16,14 @@ public static class Parser
|
||||
{
|
||||
private static readonly Logger Logger = NzbDroneLogger.GetLogger(typeof(Parser));
|
||||
|
||||
private static readonly Regex ReportYearRegex = new Regex(@"^.*(?<year>(19|20)\d{2}).*$", RegexOptions.Compiled);
|
||||
|
||||
private static readonly Regex ReportEditionRegex = new Regex(@"\(?\b(?<edition>(((Recut.|Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Rogue|Special|Despecialized|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1))))))\b\)?", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
private static readonly Regex[] ReportMovieTitleRegex = new[]
|
||||
{
|
||||
//Special, Despecialized, etc. Edition Movies, e.g: Mission.Impossible.3.Special.Edition.2011
|
||||
new Regex(@"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*\(?\b(?<edition>(((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Rogue|Special|Despecialized|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1))))))\b\)?.{1,3}(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)",
|
||||
new Regex(@"^(?<title>(?![(\[]).+?)?(?:(?:[-_\W](?<![)\[!]))*" + ReportEditionRegex + @".{1,3}(?<year>(1(8|9)|20)\d{2}(?!p|i|\d+|\]|\W\d+)))+(\W+|_|$)(?!\\)",
|
||||
RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
|
||||
//Special, Despecialized, etc. Edition Movies, e.g: Mission.Impossible.3.2011.Special.Edition //TODO: Seems to slow down parsing heavily!
|
||||
@ -48,7 +52,7 @@ public static class Parser
|
||||
private static readonly Regex[] ReportMovieTitleLenientRegexBefore = new[]
|
||||
{
|
||||
//Some german or french tracker formats
|
||||
new Regex(@"^(?<title>(?![(\[]).+?)((\W|_))(?:(?<!(19|20)\d{2}.)(German|French|TrueFrench))(.+?)(?=((19|20)\d{2}|$))(?<year>(19|20)\d{2}(?!p|i|\d+|\]|\W\d+))?(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
new Regex(@"^(?<title>(?![(\[]).+?)((\W|_))(" + ReportEditionRegex + @".{1,3})?(?:(?<!(19|20)\d{2}.)(German|French|TrueFrench))(.+?)(?=((19|20)\d{2}|$))(?<year>(19|20)\d{2}(?!p|i|\d+|\]|\W\d+))?(\W+|_|$)(?!\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled),
|
||||
};
|
||||
|
||||
private static readonly Regex[] ReportMovieTitleLenientRegexAfter = new Regex[]
|
||||
@ -135,10 +139,6 @@ public static class Parser
|
||||
|
||||
private static readonly Regex RequestInfoRegex = new Regex(@"^(?:\[.+?\])+", RegexOptions.Compiled);
|
||||
|
||||
private static readonly Regex ReportYearRegex = new Regex(@"^.*(?<year>(19|20)\d{2}).*$", RegexOptions.Compiled);
|
||||
|
||||
private static readonly Regex ReportEditionRegex = new Regex(@"\b(?<edition>(((Extended.|Ultimate.)?(Director.?s|Collector.?s|Theatrical|Ultimate|Final(?=(.(Cut|Edition|Version)))|Extended|Rogue|Special|Despecialized|\d{2,3}(th)?.Anniversary)(.(Cut|Edition|Version))?(.(Extended|Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit))?|((Uncensored|Remastered|Unrated|Uncut|IMAX|Fan.?Edit|Edition|Restored|((2|3|4)in1))))))\)?\b", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
private static readonly string[] Numbers = new[] { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine" };
|
||||
private static Dictionary<string, string> _umlautMappings = new Dictionary<string, string>
|
||||
{
|
||||
@ -488,13 +488,19 @@ private static ParsedMovieInfo ParseMovieMatchCollection(MatchCollection matchCo
|
||||
{
|
||||
nextPart = parts[n + 1];
|
||||
}
|
||||
else
|
||||
{
|
||||
nextPart = "";
|
||||
}
|
||||
|
||||
if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out n))
|
||||
if (part.Length == 1 && part.ToLower() != "a" && !int.TryParse(part, out _) &&
|
||||
(previousAcronym || n < parts.Length - 1) &&
|
||||
(previousAcronym || nextPart.Length != 1 || !int.TryParse(nextPart, out _)))
|
||||
{
|
||||
movieName += part + ".";
|
||||
previousAcronym = true;
|
||||
}
|
||||
else if (part.ToLower() == "a" && (previousAcronym == true || nextPart.Length == 1))
|
||||
else if (part.ToLower() == "a" && (previousAcronym || nextPart.Length == 1))
|
||||
{
|
||||
movieName += part + ".";
|
||||
previousAcronym = true;
|
||||
|
Loading…
Reference in New Issue
Block a user