jellyfin/MediaBrowser.Providers/Plugins/Tmdb/Movies/TmdbSearch.cs

292 lines
12 KiB
C#
Raw Normal View History

#pragma warning disable CS1591
using System;
2014-01-30 14:23:54 -07:00
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Net;
2020-08-17 12:10:02 -07:00
using System.Net.Http;
using System.Net.Http.Headers;
2020-05-30 23:23:09 -07:00
using System.Text.RegularExpressions;
2014-01-30 14:23:54 -07:00
using System.Threading;
using System.Threading.Tasks;
using MediaBrowser.Controller.Library;
using MediaBrowser.Controller.Providers;
using MediaBrowser.Model.Entities;
using MediaBrowser.Model.Providers;
using MediaBrowser.Model.Serialization;
2020-05-30 23:23:09 -07:00
using MediaBrowser.Providers.Plugins.Tmdb.Models.Search;
using Microsoft.Extensions.Logging;
2014-01-30 14:23:54 -07:00
2020-05-30 23:23:09 -07:00
namespace MediaBrowser.Providers.Plugins.Tmdb.Movies
2014-01-30 14:23:54 -07:00
{
public class TmdbSearch
2014-01-30 14:23:54 -07:00
{
private static readonly CultureInfo _usCulture = new CultureInfo("en-US");
private static readonly Regex _cleanEnclosed = new Regex(@"\p{Ps}.*\p{Pe}", RegexOptions.Compiled);
private static readonly Regex _cleanNonWord = new Regex(@"[\W_]+", RegexOptions.Compiled);
private static readonly Regex _cleanStopWords = new Regex(@"\b( # Start at word boundary
19[0-9]{2}|20[0-9]{2}| # 1900-2099
S[0-9]{2}| # Season
E[0-9]{2}| # Episode
(2160|1080|720|576|480)[ip]?| # Resolution
[xh]?264| # Encoding
(web|dvd|bd|hdtv|hd)rip| # *Rip
web|hdtv|mp4|bluray|ktr|dl|single|imageset|internal|doku|dubbed|retail|xxx|flac
).* # Match rest of string",
RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
2020-08-07 10:26:28 -07:00
private const string SearchUrl = TmdbUtils.BaseTmdbApiUrl + @"3/search/{3}?api_key={1}&query={0}&language={2}";
private const string SearchUrlWithYear = TmdbUtils.BaseTmdbApiUrl + @"3/search/{3}?api_key={1}&query={0}&language={2}&first_air_date_year={4}";
2016-06-23 10:04:18 -07:00
2014-01-30 14:23:54 -07:00
private readonly ILogger _logger;
private readonly IJsonSerializer _json;
2014-11-17 19:48:22 -07:00
private readonly ILibraryManager _libraryManager;
2014-01-30 14:23:54 -07:00
public TmdbSearch(ILogger logger, IJsonSerializer json, ILibraryManager libraryManager)
2014-01-30 14:23:54 -07:00
{
_logger = logger;
_json = json;
2014-11-17 19:48:22 -07:00
_libraryManager = libraryManager;
2014-01-30 14:23:54 -07:00
}
2014-03-02 10:09:35 -07:00
public Task<IEnumerable<RemoteSearchResult>> GetSearchResults(SeriesInfo idInfo, CancellationToken cancellationToken)
2014-02-08 23:08:10 -07:00
{
2014-03-01 15:34:27 -07:00
return GetSearchResults(idInfo, "tv", cancellationToken);
2014-02-08 23:08:10 -07:00
}
2014-03-02 10:09:35 -07:00
public Task<IEnumerable<RemoteSearchResult>> GetMovieSearchResults(ItemLookupInfo idInfo, CancellationToken cancellationToken)
2014-01-30 14:23:54 -07:00
{
2014-03-01 15:34:27 -07:00
return GetSearchResults(idInfo, "movie", cancellationToken);
2014-01-30 14:23:54 -07:00
}
2014-03-02 10:09:35 -07:00
public Task<IEnumerable<RemoteSearchResult>> GetSearchResults(BoxSetInfo idInfo, CancellationToken cancellationToken)
2014-01-30 14:23:54 -07:00
{
2014-03-01 15:34:27 -07:00
return GetSearchResults(idInfo, "collection", cancellationToken);
2014-01-30 14:23:54 -07:00
}
2014-03-02 10:09:35 -07:00
private async Task<IEnumerable<RemoteSearchResult>> GetSearchResults(ItemLookupInfo idInfo, string searchType, CancellationToken cancellationToken)
2014-01-30 14:23:54 -07:00
{
var name = idInfo.Name;
2014-02-12 22:11:54 -07:00
var year = idInfo.Year;
2014-02-13 21:00:13 -07:00
2016-06-23 10:04:18 -07:00
if (string.IsNullOrWhiteSpace(name))
{
return new List<RemoteSearchResult>();
}
var tmdbSettings = await TmdbMovieProvider.Current.GetTmdbSettings(cancellationToken).ConfigureAwait(false);
2014-03-02 10:09:35 -07:00
2018-09-12 10:26:21 -07:00
var tmdbImageUrl = tmdbSettings.images.GetImageUrl("original");
2014-11-16 15:46:01 -07:00
// ParseName is required here.
// Caller provides the filename with extension stripped and NOT the parsed filename
var parsedName = _libraryManager.ParseName(name);
var yearInName = parsedName.Year;
name = parsedName.Name;
year ??= yearInName;
2014-01-30 14:23:54 -07:00
2019-01-27 04:03:43 -07:00
var language = idInfo.MetadataLanguage.ToLowerInvariant();
2014-01-30 14:23:54 -07:00
// Replace sequences of non-word characters with space
// TMDB expects a space separated list of words make sure that is the case
name = _cleanNonWord.Replace(name, " ").Trim();
2014-01-30 14:23:54 -07:00
_logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name, year);
2014-03-02 10:09:35 -07:00
var results = await GetSearchResults(name, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
2016-06-23 10:04:18 -07:00
2014-03-01 15:34:27 -07:00
if (results.Count == 0)
2014-01-30 14:23:54 -07:00
{
2020-06-14 02:11:11 -07:00
// try in english if wasn't before
2014-03-01 15:34:27 -07:00
if (!string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
2014-01-30 14:23:54 -07:00
{
2014-03-02 10:09:35 -07:00
results = await GetSearchResults(name, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
2014-01-30 14:23:54 -07:00
}
2014-03-01 15:34:27 -07:00
}
2014-01-30 14:23:54 -07:00
// TODO: retrying alternatives should be done outside the search
// provider so that the retry logic can be common for all search
// providers
2014-03-01 15:34:27 -07:00
if (results.Count == 0)
{
var name2 = parsedName.Name;
// Remove things enclosed in []{}() etc
name2 = _cleanEnclosed.Replace(name2, string.Empty);
// Replace sequences of non-word characters with space
name2 = _cleanNonWord.Replace(name2, " ");
// Clean based on common stop words / tokens
name2 = _cleanStopWords.Replace(name2, string.Empty);
// Trim whitespace
name2 = name2.Trim();
2014-01-30 14:23:54 -07:00
2014-03-01 15:34:27 -07:00
// Search again if the new name is different
2020-08-07 10:26:28 -07:00
if (!string.Equals(name2, name, StringComparison.Ordinal) && !string.IsNullOrWhiteSpace(name2))
2014-03-01 15:34:27 -07:00
{
_logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name2, year);
results = await GetSearchResults(name2, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
2014-03-01 15:34:27 -07:00
if (results.Count == 0 && !string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
{
2020-06-14 02:11:11 -07:00
// one more time, in english
results = await GetSearchResults(name2, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
2014-01-30 14:23:54 -07:00
}
}
}
2014-03-02 10:09:35 -07:00
return results.Where(i =>
{
if (year.HasValue && i.ProductionYear.HasValue)
{
// Allow one year tolerance
return Math.Abs(year.Value - i.ProductionYear.Value) <= 1;
2014-03-02 10:09:35 -07:00
}
return true;
});
2014-01-30 14:23:54 -07:00
}
2016-06-23 10:04:18 -07:00
private Task<List<RemoteSearchResult>> GetSearchResults(string name, string type, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
2014-01-30 14:23:54 -07:00
{
switch (type)
{
case "tv":
2016-06-23 10:04:18 -07:00
return GetSearchResultsTv(name, year, language, baseImageUrl, cancellationToken);
default:
2016-06-23 10:04:18 -07:00
return GetSearchResultsGeneric(name, type, year, language, baseImageUrl, cancellationToken);
}
}
private async Task<List<RemoteSearchResult>> GetSearchResultsGeneric(string name, string type, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
{
2016-06-23 10:04:18 -07:00
if (string.IsNullOrWhiteSpace(name))
{
2020-08-07 10:26:28 -07:00
throw new ArgumentException("String can't be null or empty.", nameof(name));
2016-06-23 10:04:18 -07:00
}
2020-08-07 10:26:28 -07:00
string url3;
if (year != null && string.Equals(type, "movie", StringComparison.OrdinalIgnoreCase))
{
url3 = string.Format(
CultureInfo.InvariantCulture,
SearchUrl,
WebUtility.UrlEncode(name),
TmdbUtils.ApiKey,
language,
type) + "&primary_release_year=" + year;
}
else
{
url3 = string.Format(
CultureInfo.InvariantCulture,
SearchUrl,
WebUtility.UrlEncode(name),
TmdbUtils.ApiKey,
language,
type);
}
2014-01-30 14:23:54 -07:00
2020-08-17 12:10:02 -07:00
var requestMessage = new HttpRequestMessage(HttpMethod.Get, url3);
2020-08-17 12:59:29 -07:00
foreach (var header in TmdbUtils.AcceptHeaders)
{
requestMessage.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue(header));
}
2020-08-17 12:10:02 -07:00
using var response = await TmdbMovieProvider.Current.GetMovieDbResponse(requestMessage).ConfigureAwait(false);
await using var stream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false);
var searchResults = await _json.DeserializeFromStreamAsync<TmdbSearchResult<MovieResult>>(stream).ConfigureAwait(false);
2014-01-30 14:23:54 -07:00
2020-08-17 12:10:02 -07:00
var results = searchResults.Results ?? new List<MovieResult>();
2014-03-01 15:34:27 -07:00
2020-08-17 12:10:02 -07:00
return results
.Select(i =>
{
var remoteResult = new RemoteSearchResult {SearchProviderName = TmdbMovieProvider.Current.Name, Name = i.Title ?? i.Name ?? i.Original_Title, ImageUrl = string.IsNullOrWhiteSpace(i.Poster_Path) ? null : baseImageUrl + i.Poster_Path};
2014-03-01 15:34:27 -07:00
2020-08-17 12:10:02 -07:00
if (!string.IsNullOrWhiteSpace(i.Release_Date))
{
// These dates are always in this exact format
if (DateTime.TryParseExact(i.Release_Date, "yyyy-MM-dd", _usCulture, DateTimeStyles.None, out var r))
2014-03-02 10:09:35 -07:00
{
2020-08-17 12:10:02 -07:00
remoteResult.PremiereDate = r.ToUniversalTime();
remoteResult.ProductionYear = remoteResult.PremiereDate.Value.Year;
}
}
remoteResult.SetProviderId(MetadataProvider.Tmdb, i.Id.ToString(_usCulture));
return remoteResult;
})
.ToList();
2014-03-01 15:34:27 -07:00
}
private async Task<List<RemoteSearchResult>> GetSearchResultsTv(string name, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
{
2016-06-23 10:04:18 -07:00
if (string.IsNullOrWhiteSpace(name))
{
2020-08-07 10:26:28 -07:00
throw new ArgumentException("String can't be null or empty.", nameof(name));
2016-06-23 10:04:18 -07:00
}
2020-08-07 10:26:28 -07:00
string url3;
if (year == null)
{
url3 = string.Format(
CultureInfo.InvariantCulture,
SearchUrl,
WebUtility.UrlEncode(name),
TmdbUtils.ApiKey,
language,
"tv");
}
else
{
url3 = string.Format(
CultureInfo.InvariantCulture,
SearchUrlWithYear,
WebUtility.UrlEncode(name),
TmdbUtils.ApiKey,
language,
"tv",
year);
}
2020-08-17 12:10:02 -07:00
var requestMessage = new HttpRequestMessage(HttpMethod.Get, url3);
2020-08-17 12:59:29 -07:00
foreach (var header in TmdbUtils.AcceptHeaders)
{
requestMessage.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue(header));
}
2020-08-17 12:10:02 -07:00
using var response = await TmdbMovieProvider.Current.GetMovieDbResponse(requestMessage).ConfigureAwait(false);
await using var stream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false);
var searchResults = await _json.DeserializeFromStreamAsync<TmdbSearchResult<TvResult>>(stream).ConfigureAwait(false);
2020-08-17 12:10:02 -07:00
var results = searchResults.Results ?? new List<TvResult>();
2020-08-17 12:10:02 -07:00
return results
.Select(i =>
{
var remoteResult = new RemoteSearchResult {SearchProviderName = TmdbMovieProvider.Current.Name, Name = i.Name ?? i.Original_Name, ImageUrl = string.IsNullOrWhiteSpace(i.Poster_Path) ? null : baseImageUrl + i.Poster_Path};
if (!string.IsNullOrWhiteSpace(i.First_Air_Date))
{
// These dates are always in this exact format
if (DateTime.TryParseExact(i.First_Air_Date, "yyyy-MM-dd", _usCulture, DateTimeStyles.None, out var r))
{
2020-08-17 12:10:02 -07:00
remoteResult.PremiereDate = r.ToUniversalTime();
remoteResult.ProductionYear = remoteResult.PremiereDate.Value.Year;
}
}
remoteResult.SetProviderId(MetadataProvider.Tmdb, i.Id.ToString(_usCulture));
return remoteResult;
})
.ToList();
}
2014-01-30 14:23:54 -07:00
}
}