From e9ce98caa44ff2adaae25eecf0ed7fa082e2e87c Mon Sep 17 00:00:00 2001 From: Mark McDowall Date: Sat, 23 Apr 2011 01:04:30 -0700 Subject: [PATCH] Removed Year from EpisodeParseResult (we never used it anyways). Episode parsing REGEX will properly handle filenames with S01E01/1x01 naming. Added REGEX to support seasons with more than 100 episodes (0-99)... Stupid soaps. Title Normalizing REGEX will keep the year (has to start with 19 or 20 and be exactly 4 digits long) --- NzbDrone.Core.Test/ParserTest.cs | 30 +++++++++++++++---- NzbDrone.Core/Model/EpisodeParseResult.cs | 1 - NzbDrone.Core/Parser.cs | 36 ++++++++--------------- 3 files changed, 38 insertions(+), 29 deletions(-) diff --git a/NzbDrone.Core.Test/ParserTest.cs b/NzbDrone.Core.Test/ParserTest.cs index 1163457b9..08a2ca125 100644 --- a/NzbDrone.Core.Test/ParserTest.cs +++ b/NzbDrone.Core.Test/ParserTest.cs @@ -24,13 +24,18 @@ public class ParserTest [Row("Chuck.4x05.HDTV.XviD-LOL", "Chuck", 4, 5)] [Row("The.Girls.Next.Door.S03E06.DVDRip.XviD-WiDE", "The.Girls.Next.Door", 3, 6)] [Row("Degrassi.S10E27.WS.DSR.XviD-2HD", "Degrassi", 10, 27)] - [Row("Parenthood.2010.S02E14.HDTV.XviD-LOL", "Parenthood", 2, 14)] - [Row("Hawaii Five 0 S01E19 720p WEB DL DD5 1 H 264 NT", "Hawaii Five0", 1, 19)] + [Row("Parenthood.2010.S02E14.HDTV.XviD-LOL", "Parenthood 2010", 2, 14)] + [Row("Hawaii Five 0 S01E19 720p WEB DL DD5 1 H 264 NT", "Hawaii Five", 1, 19)] [Row("The Event S01E14 A Message Back 720p WEB DL DD5 1 H264 SURFER", "The Event", 1, 14)] [Row("Adam Hills In Gordon St Tonight S01E07 WS PDTV XviD FUtV", "Adam Hills In Gordon St Tonight", 1, 7)] [Row("Adam Hills In Gordon St Tonight S01E07 WS PDTV XviD FUtV", "Adam Hills In Gordon St Tonight", 1, 7)] [Row("Adventure.Inc.S03E19.DVDRip.XviD-OSiTV", "Adventure.Inc", 3, 19)] - //[Row("The.Kennedys.Part.2.DSR.XviD-SYS", 1, 2)] + [Row("S03E09 WS PDTV XviD FUtV", "", 3, 9)] + [Row("5x10 WS PDTV XviD FUtV", "", 5, 10)] + [Row("Castle.2009.S01E14.HDTV.XviD-LOL", "Castle 2009", 1, 14)] + [Row("Pride.and.Prejudice.1995.S03E20.HDTV.XviD-LOL", "Pride and Prejudice 1995", 3, 20)] + //[Row(@"Season 4\07 WS PDTV XviD FUtV", "", 4, 7)] + [Row("The.Office.S03E115.DVDRip.XviD-OSiTV", "The.Office", 3, 115)] public void episode_parse(string postTitle, string title, int season, int episode) { var result = Parser.ParseEpisodeInfo(postTitle); @@ -109,6 +114,18 @@ public void episode_daily_parse(string postTitle, string title, int year, int mo Assert.AreEqual(airDate, result.AirDate); } + [Test] + [Row("Conan", "conan")] + [Row("The Tonight Show With Jay Leno", "tonightshowwithjayleno")] + [Row("The.Daily.Show", "dailyshow")] + [Row("Castle (2009)", "castle2009")] + [Row("Parenthood.2010", "parenthood2010")] + public void series_name_normalize(string parsedSeriesName, string seriesName) + { + var result = Parser.NormalizeTitle(parsedSeriesName); + Assert.AreEqual(seriesName, result); + } + [Test] [Row(@"c:\test\", @"c:\test")] [Row(@"c:\\test\\", @"c:\test")] @@ -163,8 +180,11 @@ public void Normalize_removed_common_words(string word) [Test] [Row("the")] - [Row("And")] - [Row("Or")] + [Row("and")] + [Row("or")] + [Row("a")] + [Row("an")] + [Row("of")] public void Normalize_not_removed_common_words_in_the_middle(string word) { var dirtyFormat = new[] diff --git a/NzbDrone.Core/Model/EpisodeParseResult.cs b/NzbDrone.Core/Model/EpisodeParseResult.cs index 64312c2e4..0cbd32756 100644 --- a/NzbDrone.Core/Model/EpisodeParseResult.cs +++ b/NzbDrone.Core/Model/EpisodeParseResult.cs @@ -11,7 +11,6 @@ public class EpisodeParseResult internal int SeasonNumber { get; set; } internal List Episodes { get; set; } - internal int Year { get; set; } internal string EpisodeTitle { get; set; } diff --git a/NzbDrone.Core/Parser.cs b/NzbDrone.Core/Parser.cs index ef5cb08b5..9e6bef3b0 100644 --- a/NzbDrone.Core/Parser.cs +++ b/NzbDrone.Core/Parser.cs @@ -13,18 +13,17 @@ public static class Parser private static readonly Logger Logger = LogManager.GetCurrentClassLogger(); private static readonly Regex[] ReportTitleRegex = new[] - { - new Regex( - @"(?.+?)?\W?(?<year>\d{4}?)?\W+(?<airyear>\d{4})\W+(?<airmonth>\d{2})\W+(?<airday>\d{2})\W?(?!\\)", - RegexOptions.IgnoreCase | RegexOptions.Compiled), - new Regex( - @"(?<title>.+?)?\W?(?<year>\d{4}?)?(?:\WS?(?<season>\d{1,2})(?:(?:\-|\.|[ex]|\s|to)+(?<episode>\d+))+)+\W?(?!\\)", - RegexOptions.IgnoreCase | RegexOptions.Compiled), - new Regex( - @"(?<title>.+?)?\W?(?<year>\d{4}?)?(?:\W(?<season>\d+)(?<episode>\d{2}))+\W?(?!\\)", - RegexOptions.IgnoreCase | RegexOptions.Compiled) - //Supports 103/113 naming - }; + { + new Regex(@"^(?<title>.+?)?\W?(?<year>\d{4}?)?\W+(?<airyear>\d{4})\W+(?<airmonth>\d{2})\W+(?<airday>\d{2})\W?(?!\\)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + new Regex(@"^(?<title>.*?)?(?:\W?S?(?<season>\d{1,2}(?!\d+))(?:(?:\-|\.|[ex]|\s|to)+(?<episode>\d{1,2}(?!\d+)))+)+\W?(?!\\)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + new Regex(@"^(?<title>.+?)?\W?(?<year>\d{4}?)?(?:\W(?<season>\d+)(?<episode>\d{2}))+\W?(?!\\)", + RegexOptions.IgnoreCase | RegexOptions.Compiled), + //Supports 103/113 naming + new Regex(@"^(?<title>.*?)?(?:\W?S?(?<season>\d{1,2}(?!\d+))(?:(?:\-|\.|[ex]|\s|to)+(?<episode>\d+))+)+\W?(?!\\)", + RegexOptions.IgnoreCase | RegexOptions.Compiled) + }; private static readonly Regex[] SeasonReportTitleRegex = new[] { @@ -34,7 +33,7 @@ public static class Parser RegexOptions.Compiled), }; - private static readonly Regex NormalizeRegex = new Regex(@"((^|\W)(a|an|the|and|or|of)($|\W))|\W", + private static readonly Regex NormalizeRegex = new Regex(@"((^|\W)(a|an|the|and|or|of)($|\W))|\W|\b(?!(?:19\d{2}|20\d{2}))\d+\b", RegexOptions.IgnoreCase | RegexOptions.Compiled); /// <summary> @@ -55,14 +54,7 @@ internal static EpisodeParseResult ParseEpisodeInfo(string title) if (match.Count != 0) { var seriesName = NormalizeTitle(match[0].Groups["title"].Value); - var year = 0; - Int32.TryParse(match[0].Groups["year"].Value, out year); - - if (year < 1900 || year > DateTime.Now.Year + 1) - { - year = 0; - } - + var airyear = 0; Int32.TryParse(match[0].Groups["airyear"].Value, out airyear); @@ -78,7 +70,6 @@ internal static EpisodeParseResult ParseEpisodeInfo(string title) Proper = title.ToLower().Contains("proper"), CleanTitle = seriesName, SeasonNumber = season, - Year = year, Episodes = new List<int>() }; @@ -107,7 +98,6 @@ internal static EpisodeParseResult ParseEpisodeInfo(string title) { Proper = title.ToLower().Contains("proper"), CleanTitle = seriesName, - Year = year, AirDate = new DateTime(airyear, airmonth, airday) }; }