mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 02:32:33 +01:00
[urlshortener] add support for bit.ly & t.co
This commit is contained in:
parent
82f83c18e8
commit
9e2a945013
@ -1270,6 +1270,22 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<td></td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td colspan="4"><strong>URL Shorteners</strong></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Bitly</td>
|
||||
<td>https://bit.ly/</td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Twitter t.co</td>
|
||||
<td>https://t.co/</td>
|
||||
<td></td>
|
||||
<td></td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td colspan="4"><strong>vichan Imageboards</strong></td>
|
||||
</tr>
|
||||
|
@ -153,6 +153,7 @@ modules = [
|
||||
"twitter",
|
||||
"unsplash",
|
||||
"uploadir",
|
||||
"urlshortener",
|
||||
"vanillarock",
|
||||
"vichan",
|
||||
"vk",
|
||||
|
59
gallery_dl/extractor/urlshortener.py
Normal file
59
gallery_dl/extractor/urlshortener.py
Normal file
@ -0,0 +1,59 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractor for general-purpose URL shorteners"""
|
||||
|
||||
from .common import BaseExtractor, Message
|
||||
from .. import exception
|
||||
|
||||
|
||||
class UrlshortenerExtractor(BaseExtractor):
|
||||
"""Base class for general-purpose URL shorteners"""
|
||||
basecategory = "urlshortener"
|
||||
test = (
|
||||
("https://bit.ly/3cWIUgq", {
|
||||
"count": 1,
|
||||
"pattern": "^https://gumroad.com/l/storm_b1"
|
||||
}),
|
||||
("https://t.co/bCgBY8Iv5n", {
|
||||
"count": 1,
|
||||
"pattern": ("^https://twitter.com/elonmusk/status/"
|
||||
"1421395561324896257/photo/1")
|
||||
}),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
BaseExtractor.__init__(self, match)
|
||||
self.headers = INSTANCES[self.category].get("headers")
|
||||
self.url = match.group()
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
kwargs["headers"] = self.headers
|
||||
return BaseExtractor.request(self, url, **kwargs)
|
||||
|
||||
def items(self):
|
||||
response = self.request(
|
||||
self.url, method="HEAD", allow_redirects=False, notfound="URL")
|
||||
if "location" not in response.headers:
|
||||
raise exception.StopExtraction("Unable to resolve short URL")
|
||||
yield Message.Queue, response.headers["location"], {}
|
||||
|
||||
|
||||
INSTANCES = {
|
||||
"bitly": {
|
||||
"root": "https://bit.ly",
|
||||
"pattern": r"bit\.ly",
|
||||
},
|
||||
"tco": {
|
||||
# t.co sends 'http-equiv="refresh"' (200) when using browser UA
|
||||
"headers": {"User-Agent": None},
|
||||
"root": "https://t.co",
|
||||
"pattern": r"t\.co",
|
||||
},
|
||||
}
|
||||
|
||||
UrlshortenerExtractor.pattern = \
|
||||
UrlshortenerExtractor.update(INSTANCES) + r"/[^/?#&]+"
|
@ -112,6 +112,7 @@ CATEGORY_MAP = {
|
||||
"subscribestar" : "SubscribeStar",
|
||||
"tbib" : "The Big ImageBoard",
|
||||
"tcbscans" : "TCB Scans",
|
||||
"tco" : "Twitter t.co",
|
||||
"thatpervert" : "ThatPervert",
|
||||
"thebarchive" : "The /b/ Archive",
|
||||
"thecollection" : "The /co/llection",
|
||||
@ -132,6 +133,7 @@ CATEGORY_MAP = {
|
||||
}
|
||||
|
||||
SUBCATEGORY_MAP = {
|
||||
"" : "",
|
||||
"art" : "Art",
|
||||
"audio" : "Audio",
|
||||
"doujin" : "Doujin",
|
||||
@ -266,6 +268,7 @@ BASE_MAP = {
|
||||
"lynxchan" : "LynxChan Imageboards",
|
||||
"moebooru" : "Moebooru and MyImouto",
|
||||
"szurubooru" : "szurubooru Instances",
|
||||
"urlshortener": "URL Shorteners",
|
||||
"vichan" : "vichan Imageboards",
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user