2017-05-28 17:09:54 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2020-06-10 22:03:52 +02:00
|
|
|
# Copyright 2017-2020 Mike Fährmann
|
2017-05-28 17:09:54 +02:00
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2020-07-16 14:48:31 +02:00
|
|
|
"""Extractors for https://gfycat.com/"""
|
2017-05-28 17:09:54 +02:00
|
|
|
|
|
|
|
from .common import Extractor, Message
|
2020-07-16 14:48:31 +02:00
|
|
|
from .. import text
|
|
|
|
from ..cache import cache
|
2017-05-28 17:09:54 +02:00
|
|
|
|
|
|
|
|
|
|
|
class GfycatExtractor(Extractor):
|
|
|
|
"""Base class for gfycat extractors"""
|
|
|
|
category = "gfycat"
|
2019-10-02 21:46:01 +02:00
|
|
|
filename_fmt = "{category}_{gfyName}{title:?_//}.{extension}"
|
2018-01-30 22:49:16 +01:00
|
|
|
archive_fmt = "{gfyName}"
|
2018-08-18 18:47:28 +02:00
|
|
|
root = "https://gfycat.com"
|
2017-05-28 17:09:54 +02:00
|
|
|
|
2019-02-11 13:31:10 +01:00
|
|
|
def __init__(self, match):
|
|
|
|
Extractor.__init__(self, match)
|
2020-06-10 22:03:52 +02:00
|
|
|
self.key = match.group(1)
|
2017-05-29 09:24:59 +02:00
|
|
|
self.formats = (self.config("format", "mp4"), "mp4", "webm", "gif")
|
|
|
|
|
2020-06-10 22:03:52 +02:00
|
|
|
def items(self):
|
|
|
|
metadata = self.metadata()
|
|
|
|
for gfycat in self.gfycats():
|
|
|
|
url = self._select_format(gfycat)
|
|
|
|
gfycat.update(metadata)
|
|
|
|
yield Message.Directory, gfycat
|
|
|
|
yield Message.Url, url, gfycat
|
|
|
|
|
2018-08-18 18:47:28 +02:00
|
|
|
def _select_format(self, gfyitem):
|
2017-05-29 09:24:59 +02:00
|
|
|
for fmt in self.formats:
|
|
|
|
key = fmt + "Url"
|
2018-08-18 18:47:28 +02:00
|
|
|
if key in gfyitem:
|
|
|
|
url = gfyitem[key]
|
|
|
|
gfyitem["extension"] = url.rpartition(".")[2]
|
2017-05-29 09:24:59 +02:00
|
|
|
return url
|
2018-08-18 18:47:28 +02:00
|
|
|
return ""
|
2017-05-28 17:09:54 +02:00
|
|
|
|
2020-06-10 22:03:52 +02:00
|
|
|
def metadata(self):
|
|
|
|
return {}
|
|
|
|
|
|
|
|
def gfycats(self):
|
|
|
|
return ()
|
2017-05-28 17:09:54 +02:00
|
|
|
|
|
|
|
|
2020-07-16 14:48:31 +02:00
|
|
|
class GfycatUserExtractor(GfycatExtractor):
|
|
|
|
"""Extractor for gfycat user profiles"""
|
|
|
|
subcategory = "user"
|
|
|
|
directory_fmt = ("{category}", "{userName}")
|
|
|
|
pattern = r"(?:https?://)?gfycat\.com/@([^/?&#]+)"
|
|
|
|
test = ("https://gfycat.com/@gretta", {
|
|
|
|
"pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4",
|
|
|
|
"count": ">= 100",
|
|
|
|
})
|
|
|
|
|
|
|
|
def gfycats(self):
|
|
|
|
return GfycatAPI(self).user(self.key)
|
|
|
|
|
|
|
|
|
|
|
|
class GfycatSearchExtractor(GfycatExtractor):
|
|
|
|
"""Extractor for gfycat search results"""
|
|
|
|
subcategory = "search"
|
|
|
|
directory_fmt = ("{category}", "Search", "{search}")
|
|
|
|
pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?&#]+)"
|
|
|
|
test = ("https://gfycat.com/gifs/search/funny+animals", {
|
|
|
|
"pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",
|
|
|
|
"archive": False,
|
|
|
|
"range": "100-300",
|
|
|
|
"count": "> 200",
|
|
|
|
})
|
|
|
|
|
|
|
|
def metadata(self):
|
|
|
|
self.key = text.unquote(self.key).replace("+", " ")
|
|
|
|
return {"search": self.key}
|
|
|
|
|
|
|
|
def gfycats(self):
|
|
|
|
return GfycatAPI(self).search(self.key)
|
|
|
|
|
|
|
|
|
2017-05-28 17:09:54 +02:00
|
|
|
class GfycatImageExtractor(GfycatExtractor):
|
|
|
|
"""Extractor for individual images from gfycat.com"""
|
|
|
|
subcategory = "image"
|
2019-02-08 13:45:40 +01:00
|
|
|
pattern = (r"(?:https?://)?(?:\w+\.)?gfycat\.com"
|
2020-07-16 14:48:31 +02:00
|
|
|
r"/(?:gifs/detail/|\w+/)?([A-Za-z]{8,})")
|
2019-02-08 13:45:40 +01:00
|
|
|
test = (
|
2017-05-28 17:09:54 +02:00
|
|
|
("https://gfycat.com/GrayGenerousCowrie", {
|
2017-05-29 09:24:59 +02:00
|
|
|
"url": "e0b5e1d7223108249b15c3c7898dd358dbfae045",
|
2018-08-23 15:47:40 +02:00
|
|
|
"content": "5786028e04b155baa20b87c5f4f77453cd5edc37",
|
2018-08-18 18:47:28 +02:00
|
|
|
"keyword": {
|
|
|
|
"gfyId": "graygenerouscowrie",
|
|
|
|
"gfyName": "GrayGenerousCowrie",
|
|
|
|
"gfyNumber": "755075459",
|
|
|
|
"title": "Bottom's up",
|
|
|
|
"userName": "jackson3oh3",
|
2018-11-28 13:26:21 +01:00
|
|
|
"createDate": 1495884169,
|
2018-08-18 18:47:28 +02:00
|
|
|
"md5": "a4796e05b0db9ba9ce5140145cd318aa",
|
2018-11-28 13:26:21 +01:00
|
|
|
"width": 400,
|
|
|
|
"height": 224,
|
|
|
|
"frameRate": 23,
|
|
|
|
"numFrames": 158,
|
2018-08-18 18:47:28 +02:00
|
|
|
"views": int,
|
|
|
|
},
|
2017-05-28 17:09:54 +02:00
|
|
|
}),
|
|
|
|
(("https://thumbs.gfycat.com/SillyLameIsabellinewheatear"
|
|
|
|
"-size_restricted.gif"), {
|
2017-05-29 09:24:59 +02:00
|
|
|
"url": "13b32e6cc169d086577d7dd3fd36ee6cdbc02726",
|
2017-05-28 17:09:54 +02:00
|
|
|
}),
|
|
|
|
("https://gfycat.com/detail/UnequaledHastyAnkole?tagname=aww", {
|
2017-05-29 09:24:59 +02:00
|
|
|
"url": "e24c9f69897fd223343782425a429c5cab6a768e",
|
2017-05-28 17:09:54 +02:00
|
|
|
}),
|
2019-02-08 13:45:40 +01:00
|
|
|
("https://gfycat.com/gifs/detail/UnequaledHastyAnkole"),
|
|
|
|
("https://gfycat.com/ifr/UnequaledHastyAnkole"),
|
|
|
|
("https://gfycat.com/ru/UnequaledHastyAnkole"),
|
|
|
|
)
|
2017-05-28 17:09:54 +02:00
|
|
|
|
2020-06-10 22:03:52 +02:00
|
|
|
def gfycats(self):
|
|
|
|
url = "https://api.gfycat.com/v1/gfycats/" + self.key
|
|
|
|
return (self.request(url).json()["gfyItem"],)
|
2020-07-16 14:48:31 +02:00
|
|
|
|
|
|
|
|
|
|
|
class GfycatAPI():
|
|
|
|
API_ROOT = "https://api.gfycat.com"
|
|
|
|
ACCESS_KEY = "Anr96uuqt9EdamSCwK4txKPjMsf2M95Rfa5FLLhPFucu8H5HTzeutyAa"
|
|
|
|
|
|
|
|
def __init__(self, extractor):
|
|
|
|
self.extractor = extractor
|
|
|
|
self.headers = {}
|
|
|
|
|
|
|
|
def gfycat(self, gfycat_id):
|
|
|
|
endpoint = "/v1/gfycats/" + gfycat_id
|
|
|
|
return self._call(endpoint)["gfyItem"]
|
|
|
|
|
|
|
|
def user(self, user):
|
|
|
|
endpoint = "/v1/users/{}/gfycats".format(user.lower())
|
|
|
|
params = {"count": 100}
|
|
|
|
return self._pagination(endpoint, params)
|
|
|
|
|
|
|
|
def search(self, query):
|
|
|
|
endpoint = "/v1/gfycats/search"
|
|
|
|
params = {"search_text": query, "count": 150}
|
|
|
|
return self._pagination(endpoint, params)
|
|
|
|
|
|
|
|
@cache(keyarg=1, maxage=3600)
|
|
|
|
def _authenticate_impl(self, category):
|
|
|
|
url = "https://weblogin." + category + ".com/oauth/webtoken"
|
|
|
|
data = {"access_key": self.ACCESS_KEY}
|
|
|
|
headers = {"Referer": self.extractor.root + "/",
|
|
|
|
"Origin" : self.extractor.root}
|
|
|
|
response = self.extractor.request(
|
|
|
|
url, method="POST", headers=headers, json=data)
|
|
|
|
return "Bearer " + response.json()["access_token"]
|
|
|
|
|
|
|
|
def _call(self, endpoint, params=None):
|
|
|
|
url = self.API_ROOT + endpoint
|
|
|
|
self.headers["Authorization"] = self._authenticate_impl(
|
|
|
|
self.extractor.category)
|
|
|
|
return self.extractor.request(
|
|
|
|
url, params=params, headers=self.headers).json()
|
|
|
|
|
|
|
|
def _pagination(self, endpoint, params):
|
|
|
|
while True:
|
|
|
|
data = self._call(endpoint, params)
|
|
|
|
gfycats = data["gfycats"]
|
|
|
|
yield from gfycats
|
|
|
|
|
|
|
|
if "found" not in data and len(gfycats) < params["count"] or \
|
|
|
|
not data["gfycats"]:
|
|
|
|
return
|
|
|
|
params["cursor"] = data["cursor"]
|