diff --git a/src/NzbDrone.Core/DecisionEngine/DownloadDecisionMaker.cs b/src/NzbDrone.Core/DecisionEngine/DownloadDecisionMaker.cs index 32e7087c6..093e16a89 100644 --- a/src/NzbDrone.Core/DecisionEngine/DownloadDecisionMaker.cs +++ b/src/NzbDrone.Core/DecisionEngine/DownloadDecisionMaker.cs @@ -52,6 +52,13 @@ private IEnumerable GetDecisions(List reports, Se _logger.ProgressInfo("No reports found"); } + // get series from search criteria + Tv.Series series = null; + if (searchCriteria != null) + { + series = searchCriteria.Series; + } + var reportNumber = 1; foreach (var report in reports) @@ -61,8 +68,21 @@ private IEnumerable GetDecisions(List reports, Se try { + // use parsing service to parse episode info (this allows us to do episode title searches against the episode repository) var parsedEpisodeInfo = Parser.Parser.ParseTitle(report.Title); + // do we have a possible special episode? + if (parsedEpisodeInfo == null || parsedEpisodeInfo.IsPossibleSpecialEpisode()) + { + // try to parse as a special episode + var specialEpisodeInfo = _parsingService.ParseSpecialEpisodeTitle(report.Title, series); + if (specialEpisodeInfo != null) + { + // use special episode + parsedEpisodeInfo = specialEpisodeInfo; + } + } + if (parsedEpisodeInfo != null && !string.IsNullOrWhiteSpace(parsedEpisodeInfo.SeriesTitle)) { var remoteEpisode = _parsingService.Map(parsedEpisodeInfo, report.TvRageId, searchCriteria); diff --git a/src/NzbDrone.Core/Parser/Model/ParsedEpisodeInfo.cs b/src/NzbDrone.Core/Parser/Model/ParsedEpisodeInfo.cs index 7ae94f647..2b6a808af 100644 --- a/src/NzbDrone.Core/Parser/Model/ParsedEpisodeInfo.cs +++ b/src/NzbDrone.Core/Parser/Model/ParsedEpisodeInfo.cs @@ -33,6 +33,12 @@ public bool IsAbsoluteNumbering() return AbsoluteEpisodeNumbers.Any(); } + public bool IsPossibleSpecialEpisode() + { + // if we dont have eny episode numbers we are likely a special episode and need to do a search by episode title + return string.IsNullOrEmpty(AirDate) && (EpisodeNumbers.Length == 0 || SeasonNumber == 0 || String.IsNullOrWhiteSpace(SeriesTitle)); + } + public override string ToString() { string episodeString = "[Unknown Episode]"; diff --git a/src/NzbDrone.Core/Parser/Parser.cs b/src/NzbDrone.Core/Parser/Parser.cs index 7797a61cd..d59ce4e80 100644 --- a/src/NzbDrone.Core/Parser/Parser.cs +++ b/src/NzbDrone.Core/Parser/Parser.cs @@ -114,6 +114,11 @@ public static class Parser private static readonly Regex YearInTitleRegex = new Regex(@"^(?.+?)(?:\W|_)?(?<year>\d{4})", RegexOptions.IgnoreCase | RegexOptions.Compiled); + private static readonly Regex NonWordRegex = new Regex(@"\W+", RegexOptions.Compiled); + private static readonly Regex CommonWordRegex = new Regex(@"\b(a|an|the|and|or|of|part)\b\s?", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + public static ParsedEpisodeInfo ParsePath(string path) { var fileInfo = new FileInfo(path); @@ -220,6 +225,15 @@ public static string CleanupEpisodeTitle(string title) return MultiPartCleanupRegex.Replace(title, string.Empty).Trim(); } + public static string NormalizeEpisodeTitle(string title) + { + // convert any non-word characters to a single space + string normalizedSpaces = NonWordRegex.Replace(title, " ").ToLower(); + // remove common words + string normalized = CommonWordRegex.Replace(normalizedSpaces, String.Empty); + return normalized; + } + public static string ParseReleaseGroup(string title) { const string defaultReleaseGroup = "DRONE"; diff --git a/src/NzbDrone.Core/Parser/ParsingService.cs b/src/NzbDrone.Core/Parser/ParsingService.cs index 0134bbe4a..a2dfee939 100644 --- a/src/NzbDrone.Core/Parser/ParsingService.cs +++ b/src/NzbDrone.Core/Parser/ParsingService.cs @@ -12,6 +12,7 @@ namespace NzbDrone.Core.Parser { public interface IParsingService { + ParsedEpisodeInfo ParseSpecialEpisodeTitle(string title, Series series); LocalEpisode GetEpisodes(string filename, Series series, bool sceneSource); Series GetSeries(string title); RemoteEpisode Map(ParsedEpisodeInfo parsedEpisodeInfo, int tvRageId, SearchCriteriaBase searchCriteria = null); @@ -39,10 +40,68 @@ public ParsingService(IEpisodeService episodeService, _logger = logger; } + public ParsedEpisodeInfo ParseSpecialEpisodeTitle(string title, Series series) + { + try + { + if (series == null) + { + // find series if we dont have it already + // we use an inexact match here since the series name is often mangled with the episode title + series = _seriesService.FindByTitleInexact(title); + if (series == null) + { + // no series matched + return null; + } + } + + // find special episode in series season 0 + Episode episode = _episodeService.FindEpisodeByName(series.Id, 0, title); + if (episode != null) + { + // created parsed info from tv episode that we found + var info = new ParsedEpisodeInfo(); + info.SeriesTitle = series.Title; + info.SeriesTitleInfo = new SeriesTitleInfo(); + info.SeriesTitleInfo.Title = info.SeriesTitle; + info.SeasonNumber = episode.SeasonNumber; + info.EpisodeNumbers = new int[1] { episode.EpisodeNumber }; + info.FullSeason = false; + info.Quality = QualityParser.ParseQuality(title); + info.ReleaseGroup = Parser.ParseReleaseGroup(title); + + _logger.Info("Found special episode {0} for title '{1}'", info, title); + return info; + } + } + catch (Exception e) + { + _logger.ErrorException("An error has occurred while trying to parse special episode " + title, e); + } + + return null; + } + + + public LocalEpisode GetEpisodes(string filename, Series series, bool sceneSource) { var parsedEpisodeInfo = Parser.ParsePath(filename); + // do we have a possible special episode? + if (parsedEpisodeInfo == null || parsedEpisodeInfo.IsPossibleSpecialEpisode()) + { + // try to parse as a special episode + var title = System.IO.Path.GetFileNameWithoutExtension(filename); + var specialEpisodeInfo = ParseSpecialEpisodeTitle(title, series); + if (specialEpisodeInfo != null) + { + // use special episode + parsedEpisodeInfo = specialEpisodeInfo; + } + } + if (parsedEpisodeInfo == null) { return null; diff --git a/src/NzbDrone.Core/Tv/EpisodeService.cs b/src/NzbDrone.Core/Tv/EpisodeService.cs index 96bfb1e52..5d8064ec5 100644 --- a/src/NzbDrone.Core/Tv/EpisodeService.cs +++ b/src/NzbDrone.Core/Tv/EpisodeService.cs @@ -15,6 +15,7 @@ public interface IEpisodeService Episode GetEpisode(int id); Episode FindEpisode(int seriesId, int seasonNumber, int episodeNumber, bool useScene = false); Episode FindEpisode(int seriesId, int absoluteEpisodeNumber); + Episode FindEpisodeByName(int seriesId, int seasonNumber, string episodeTitle); Episode GetEpisode(int seriesId, String date); Episode FindEpisode(int seriesId, String date); List<Episode> GetEpisodeBySeries(int seriesId); @@ -88,6 +89,21 @@ public List<Episode> GetEpisodesBySeason(int seriesId, int seasonNumber) return _episodeRepository.GetEpisodes(seriesId, seasonNumber); } + public Episode FindEpisodeByName(int seriesId, int seasonNumber, string episodeTitle) + { + // TODO: can replace this search mechanism with something smarter/faster/better + var search = Parser.Parser.NormalizeEpisodeTitle(episodeTitle); + return _episodeRepository.GetEpisodes(seriesId, seasonNumber) + .FirstOrDefault(e => + { + // normalize episode title + string title = Parser.Parser.NormalizeEpisodeTitle(e.Title); + // find episode title within search string + return (title.Length > 0) && search.Contains(title); + }); + } + + public PagingSpec<Episode> EpisodesWithoutFiles(PagingSpec<Episode> pagingSpec) { var episodeResult = _episodeRepository.EpisodesWithoutFiles(pagingSpec, false); diff --git a/src/NzbDrone.Core/Tv/SeriesService.cs b/src/NzbDrone.Core/Tv/SeriesService.cs index 18b67a732..60937f7a9 100644 --- a/src/NzbDrone.Core/Tv/SeriesService.cs +++ b/src/NzbDrone.Core/Tv/SeriesService.cs @@ -20,6 +20,7 @@ public interface ISeriesService Series FindByTvRageId(int tvRageId); Series FindByTitle(string title); Series FindByTitle(string title, int year); + Series FindByTitleInexact(string title); void SetSeriesType(int seriesId, SeriesTypes seriesTypes); void DeleteSeries(int seriesId, bool deleteFiles); List<Series> GetAllSeries(); @@ -100,6 +101,55 @@ public Series FindByTitle(string title) return _seriesRepository.FindByTitle(Parser.Parser.CleanSeriesTitle(title)); } + public Series FindByTitleInexact(string title) + { + // perform fuzzy matching of series name + // TODO: can replace this search mechanism with something smarter/faster/better + + // find any series clean title within the provided release title + string cleanTitle = Parser.Parser.CleanSeriesTitle(title); + var list = _seriesRepository.All().Where(s => cleanTitle.Contains(s.CleanTitle)).ToList(); + if (!list.Any()) + { + // no series matched + return null; + } + else if (list.Count == 1) + { + // return the first series if there is only one + return list.Single(); + } + else + { + // build ordered list of series by position in the search string + var query = + list.Select(series => new + { + position = cleanTitle.IndexOf(series.CleanTitle), + length = series.CleanTitle.Length, + series = series + }) + .Where(s => (s.position>=0)) + .ToList() + .OrderBy(s => s.position) + .ThenByDescending(s => s.length) + .ToList(); + + // get the leftmost series that is the longest + // series are usually the first thing in release title, so we select the leftmost and longest match + // we could have multiple matches for series which have a common prefix like "Love it", "Love it Too" so we pick the longest one + var match = query.First().series; + + _logger.Trace("Multiple series matched {0} from title {1}", match.Title, title); + foreach (var entry in list) + { + _logger.Trace("Multiple series match candidate: {0} cleantitle: {1}", entry.Title, entry.CleanTitle); + } + + return match; + } + } + public Series FindByTitle(string title, int year) { return _seriesRepository.FindByTitle(title, year);