gallery-dl/gallery_dl/extractor/gfycat.py

# -*- coding: utf-8 -*-

# Copyright 2017-2020 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://gfycat.com/"""

from .common import Extractor, Message
from .. import text
from ..cache import cache


class GfycatExtractor(Extractor):
    """Base class for gfycat extractors"""
    category = "gfycat"
    filename_fmt = "{category}_{gfyName}{title:?_//}.{extension}"
    archive_fmt = "{gfyName}"
    root = "https://gfycat.com"

    def __init__(self, match):
        Extractor.__init__(self, match)
        self.key = match.group(1)
        self.formats = (self.config("format", "mp4"), "mp4", "webm", "gif")

    def items(self):
        metadata = self.metadata()
        for gfycat in self.gfycats():
            url = self._select_format(gfycat)
            gfycat.update(metadata)
            yield Message.Directory, gfycat
            yield Message.Url, url, gfycat

    def _select_format(self, gfyitem):
        for fmt in self.formats:
            key = fmt + "Url"
            if key in gfyitem:
                url = gfyitem[key]
                gfyitem["extension"] = url.rpartition(".")[2]
                return url
        return ""

    def metadata(self):
        return {}

    def gfycats(self):
        return ()


class GfycatUserExtractor(GfycatExtractor):
    """Extractor for gfycat user profiles"""
    subcategory = "user"
    directory_fmt = ("{category}", "{userName}")
    pattern = r"(?:https?://)?gfycat\.com/@([^/?&#]+)"
    test = ("https://gfycat.com/@gretta", {
        "pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4",
        "count": ">= 100",
    })

    def gfycats(self):
        return GfycatAPI(self).user(self.key)


class GfycatSearchExtractor(GfycatExtractor):
    """Extractor for gfycat search results"""
    subcategory = "search"
    directory_fmt = ("{category}", "Search", "{search}")
    pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?&#]+)"
    test = ("https://gfycat.com/gifs/search/funny+animals", {
        "pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",
        "archive": False,
        "range": "100-300",
        "count": "> 200",
    })

    def metadata(self):
        self.key = text.unquote(self.key).replace("+", " ")
        return {"search": self.key}

    def gfycats(self):
        return GfycatAPI(self).search(self.key)


class GfycatImageExtractor(GfycatExtractor):
    """Extractor for individual images from gfycat.com"""
    subcategory = "image"
    pattern = (r"(?:https?://)?(?:\w+\.)?gfycat\.com"
               r"/(?:gifs/detail/|\w+/)?([A-Za-z]{8,})")
    test = (
        ("https://gfycat.com/GrayGenerousCowrie", {
            "url": "e0b5e1d7223108249b15c3c7898dd358dbfae045",
            "content": "5786028e04b155baa20b87c5f4f77453cd5edc37",
            "keyword": {
                "gfyId": "graygenerouscowrie",
                "gfyName": "GrayGenerousCowrie",
                "gfyNumber": "755075459",
                "title": "Bottom's up",
                "userName": "jackson3oh3",
                "createDate": 1495884169,
                "md5": "a4796e05b0db9ba9ce5140145cd318aa",
                "width": 400,
                "height": 224,
                "frameRate": 23,
                "numFrames": 158,
                "views": int,
            },
        }),
        (("https://thumbs.gfycat.com/SillyLameIsabellinewheatear"
          "-size_restricted.gif"), {
            "url": "13b32e6cc169d086577d7dd3fd36ee6cdbc02726",
        }),
        ("https://gfycat.com/detail/UnequaledHastyAnkole?tagname=aww", {
            "url": "e24c9f69897fd223343782425a429c5cab6a768e",
        }),
        ("https://gfycat.com/gifs/detail/UnequaledHastyAnkole"),
        ("https://gfycat.com/ifr/UnequaledHastyAnkole"),
        ("https://gfycat.com/ru/UnequaledHastyAnkole"),
    )

    def gfycats(self):
        url = "https://api.gfycat.com/v1/gfycats/" + self.key
        return (self.request(url).json()["gfyItem"],)


class GfycatAPI():
    API_ROOT = "https://api.gfycat.com"
    ACCESS_KEY = "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa"

    def __init__(self, extractor):
        self.extractor = extractor
        self.headers = {}

    def gfycat(self, gfycat_id):
        endpoint = "/v1/gfycats/" + gfycat_id
        return self._call(endpoint)["gfyItem"]

    def user(self, user):
        endpoint = "/v1/users/{}/gfycats".format(user.lower())
        params = {"count": 100}
        return self._pagination(endpoint, params)

    def search(self, query):
        endpoint = "/v1/gfycats/search"
        params = {"search_text": query, "count": 150}
        return self._pagination(endpoint, params)

    @cache(keyarg=1, maxage=3600)
    def _authenticate_impl(self, category):
        url = "https://weblogin." + category + ".com/oauth/webtoken"
        data = {"access_key": self.ACCESS_KEY}
        headers = {"Referer": self.extractor.root + "/",
                   "Origin" : self.extractor.root}
        response = self.extractor.request(
            url, method="POST", headers=headers, json=data)
        return "Bearer " + response.json()["access_token"]

    def _call(self, endpoint, params=None):
        url = self.API_ROOT + endpoint
        self.headers["Authorization"] = self._authenticate_impl(
            self.extractor.category)
        return self.extractor.request(
            url, params=params, headers=self.headers).json()

    def _pagination(self, endpoint, params):
        while True:
            data = self._call(endpoint, params)
            gfycats = data["gfycats"]
            yield from gfycats

            if "found" not in data and len(gfycats) < params["count"] or \
                    not data["gfycats"]:
                return
            params["cursor"] = data["cursor"]
[gfycat] add image extractor 2017-05-28 17:09:54 +02:00			`# -- coding: utf-8 --`

[redgifs] add 'user' and 'search' extractors (closes #724) 2020-06-10 22:03:52 +02:00			`# Copyright 2017-2020 Mike Fährmann`
[gfycat] add image extractor 2017-05-28 17:09:54 +02:00			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

[gfycat] add 'user' and 'search' extractors 2020-07-16 14:48:31 +02:00			`"""Extractors for https://gfycat.com/"""`
[gfycat] add image extractor 2017-05-28 17:09:54 +02:00
			`from .common import Extractor, Message`
[gfycat] add 'user' and 'search' extractors 2020-07-16 14:48:31 +02:00			`from .. import text`
			`from ..cache import cache`
[gfycat] add image extractor 2017-05-28 17:09:54 +02:00

			`class GfycatExtractor(Extractor):`
			`"""Base class for gfycat extractors"""`
			`category = "gfycat"`
[gfycat] include title in default filenames (closes #434) 2019-10-02 21:46:01 +02:00			`filename_fmt = "{category}_{gfyName}{title:?_//}.{extension}"`
set 'archive_fmt' values These are going to be used to create an unique id for each image. 2018-01-30 22:49:16 +01:00			`archive_fmt = "{gfyName}"`
[gfycat] test-updates and code-adjustments 2018-08-18 18:47:28 +02:00			`root = "https://gfycat.com"`
[gfycat] add image extractor 2017-05-28 17:09:54 +02:00
propagate 'match' to base extractor constructor 2019-02-11 13:31:10 +01:00			`def __init__(self, match):`
			`Extractor.__init__(self, match)`
[redgifs] add 'user' and 'search' extractors (closes #724) 2020-06-10 22:03:52 +02:00			`self.key = match.group(1)`
[gfycat] add "format" config key to select a video format Possible values: - one of "mp4" (default), "webm", "gif", "webp", "mjpg" If the selected format is not available, "mp4", "webm" and "gif" (in that order) will be tried instead, until an available format is found. 2017-05-29 09:24:59 +02:00			`self.formats = (self.config("format", "mp4"), "mp4", "webm", "gif")`

[redgifs] add 'user' and 'search' extractors (closes #724) 2020-06-10 22:03:52 +02:00			`def items(self):`
			`metadata = self.metadata()`
			`for gfycat in self.gfycats():`
			`url = self._select_format(gfycat)`
			`gfycat.update(metadata)`
			`yield Message.Directory, gfycat`
			`yield Message.Url, url, gfycat`

[gfycat] test-updates and code-adjustments 2018-08-18 18:47:28 +02:00			`def _select_format(self, gfyitem):`
[gfycat] add "format" config key to select a video format Possible values: - one of "mp4" (default), "webm", "gif", "webp", "mjpg" If the selected format is not available, "mp4", "webm" and "gif" (in that order) will be tried instead, until an available format is found. 2017-05-29 09:24:59 +02:00			`for fmt in self.formats:`
			`key = fmt + "Url"`
[gfycat] test-updates and code-adjustments 2018-08-18 18:47:28 +02:00			`if key in gfyitem:`
			`url = gfyitem[key]`
			`gfyitem["extension"] = url.rpartition(".")[2]`
[gfycat] add "format" config key to select a video format Possible values: - one of "mp4" (default), "webm", "gif", "webp", "mjpg" If the selected format is not available, "mp4", "webm" and "gif" (in that order) will be tried instead, until an available format is found. 2017-05-29 09:24:59 +02:00			`return url`
[gfycat] test-updates and code-adjustments 2018-08-18 18:47:28 +02:00			`return ""`
[gfycat] add image extractor 2017-05-28 17:09:54 +02:00
[redgifs] add 'user' and 'search' extractors (closes #724) 2020-06-10 22:03:52 +02:00			`def metadata(self):`
			`return {}`

			`def gfycats(self):`
			`return ()`
[gfycat] add image extractor 2017-05-28 17:09:54 +02:00

[gfycat] add 'user' and 'search' extractors 2020-07-16 14:48:31 +02:00			`class GfycatUserExtractor(GfycatExtractor):`
			`"""Extractor for gfycat user profiles"""`
			`subcategory = "user"`
			`directory_fmt = ("{category}", "{userName}")`
			`pattern = r"(?:https?://)?gfycat\.com/@([^/?&#]+)"`
			`test = ("https://gfycat.com/@gretta", {`
			`"pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4",`
			`"count": ">= 100",`
			`})`

			`def gfycats(self):`
			`return GfycatAPI(self).user(self.key)`


			`class GfycatSearchExtractor(GfycatExtractor):`
			`"""Extractor for gfycat search results"""`
			`subcategory = "search"`
			`directory_fmt = ("{category}", "Search", "{search}")`
			`pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?&#]+)"`
			`test = ("https://gfycat.com/gifs/search/funny+animals", {`
			`"pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",`
			`"archive": False,`
			`"range": "100-300",`
			`"count": "> 200",`
			`})`

			`def metadata(self):`
			`self.key = text.unquote(self.key).replace("+", " ")`
			`return {"search": self.key}`

			`def gfycats(self):`
			`return GfycatAPI(self).search(self.key)`


[gfycat] add image extractor 2017-05-28 17:09:54 +02:00			`class GfycatImageExtractor(GfycatExtractor):`
			`"""Extractor for individual images from gfycat.com"""`
			`subcategory = "image"`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 2019-02-08 13:45:40 +01:00			`pattern = (r"(?:https?://)?(?:\w+\.)?gfycat\.com"`
[gfycat] add 'user' and 'search' extractors 2020-07-16 14:48:31 +02:00			`r"/(?:gifs/detail/\|\w+/)?([A-Za-z]{8,})")`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 2019-02-08 13:45:40 +01:00			`test = (`
[gfycat] add image extractor 2017-05-28 17:09:54 +02:00			`("https://gfycat.com/GrayGenerousCowrie", {`
[gfycat] add "format" config key to select a video format Possible values: - one of "mp4" (default), "webm", "gif", "webp", "mjpg" If the selected format is not available, "mp4", "webm" and "gif" (in that order) will be tried instead, until an available format is found. 2017-05-29 09:24:59 +02:00			`"url": "e0b5e1d7223108249b15c3c7898dd358dbfae045",`
update/restore tests 2018-08-23 15:47:40 +02:00			`"content": "5786028e04b155baa20b87c5f4f77453cd5edc37",`
[gfycat] test-updates and code-adjustments 2018-08-18 18:47:28 +02:00			`"keyword": {`
			`"gfyId": "graygenerouscowrie",`
			`"gfyName": "GrayGenerousCowrie",`
			`"gfyNumber": "755075459",`
			`"title": "Bottom's up",`
			`"userName": "jackson3oh3",`
[gfycat] fix extraction /cajax/get/<id> doesn't work anymore 2018-11-28 13:26:21 +01:00			`"createDate": 1495884169,`
[gfycat] test-updates and code-adjustments 2018-08-18 18:47:28 +02:00			`"md5": "a4796e05b0db9ba9ce5140145cd318aa",`
[gfycat] fix extraction /cajax/get/<id> doesn't work anymore 2018-11-28 13:26:21 +01:00			`"width": 400,`
			`"height": 224,`
			`"frameRate": 23,`
			`"numFrames": 158,`
[gfycat] test-updates and code-adjustments 2018-08-18 18:47:28 +02:00			`"views": int,`
			`},`
[gfycat] add image extractor 2017-05-28 17:09:54 +02:00			`}),`
			`(("https://thumbs.gfycat.com/SillyLameIsabellinewheatear"`
			`"-size_restricted.gif"), {`
[gfycat] add "format" config key to select a video format Possible values: - one of "mp4" (default), "webm", "gif", "webp", "mjpg" If the selected format is not available, "mp4", "webm" and "gif" (in that order) will be tried instead, until an available format is found. 2017-05-29 09:24:59 +02:00			`"url": "13b32e6cc169d086577d7dd3fd36ee6cdbc02726",`
[gfycat] add image extractor 2017-05-28 17:09:54 +02:00			`}),`
			`("https://gfycat.com/detail/UnequaledHastyAnkole?tagname=aww", {`
[gfycat] add "format" config key to select a video format Possible values: - one of "mp4" (default), "webm", "gif", "webp", "mjpg" If the selected format is not available, "mp4", "webm" and "gif" (in that order) will be tried instead, until an available format is found. 2017-05-29 09:24:59 +02:00			`"url": "e24c9f69897fd223343782425a429c5cab6a768e",`
[gfycat] add image extractor 2017-05-28 17:09:54 +02:00			`}),`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 2019-02-08 13:45:40 +01:00			`("https://gfycat.com/gifs/detail/UnequaledHastyAnkole"),`
			`("https://gfycat.com/ifr/UnequaledHastyAnkole"),`
			`("https://gfycat.com/ru/UnequaledHastyAnkole"),`
			`)`
[gfycat] add image extractor 2017-05-28 17:09:54 +02:00
[redgifs] add 'user' and 'search' extractors (closes #724) 2020-06-10 22:03:52 +02:00			`def gfycats(self):`
			`url = "https://api.gfycat.com/v1/gfycats/" + self.key`
			`return (self.request(url).json()["gfyItem"],)`
[gfycat] add 'user' and 'search' extractors 2020-07-16 14:48:31 +02:00

			`class GfycatAPI():`
			`API_ROOT = "https://api.gfycat.com"`
			`ACCESS_KEY = "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa"`

			`def __init__(self, extractor):`
			`self.extractor = extractor`
			`self.headers = {}`

			`def gfycat(self, gfycat_id):`
			`endpoint = "/v1/gfycats/" + gfycat_id`
			`return self._call(endpoint)["gfyItem"]`

			`def user(self, user):`
			`endpoint = "/v1/users/{}/gfycats".format(user.lower())`
			`params = {"count": 100}`
			`return self._pagination(endpoint, params)`

			`def search(self, query):`
			`endpoint = "/v1/gfycats/search"`
			`params = {"search_text": query, "count": 150}`
			`return self._pagination(endpoint, params)`

			`@cache(keyarg=1, maxage=3600)`
			`def _authenticate_impl(self, category):`
			`url = "https://weblogin." + category + ".com/oauth/webtoken"`
			`data = {"access_key": self.ACCESS_KEY}`
			`headers = {"Referer": self.extractor.root + "/",`
			`"Origin" : self.extractor.root}`
			`response = self.extractor.request(`
			`url, method="POST", headers=headers, json=data)`
			`return "Bearer " + response.json()["access_token"]`

			`def _call(self, endpoint, params=None):`
			`url = self.API_ROOT + endpoint`
			`self.headers["Authorization"] = self._authenticate_impl(`
			`self.extractor.category)`
			`return self.extractor.request(`
			`url, params=params, headers=self.headers).json()`

			`def _pagination(self, endpoint, params):`
			`while True:`
			`data = self._call(endpoint, params)`
			`gfycats = data["gfycats"]`
			`yield from gfycats`

			`if "found" not in data and len(gfycats) < params["count"] or \`
			`not data["gfycats"]:`
			`return`
			`params["cursor"] = data["cursor"]`