1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 04:02:32 +01:00

merge #3841: [urlshortener] add support for bit.ly & t.co

This commit is contained in:
Mike Fährmann 2023-04-15 18:08:21 +02:00
commit d253a3c542
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
5 changed files with 93 additions and 0 deletions

View File

@ -317,6 +317,10 @@
"archive": "~/gallery-dl/custom-archive-file-for-TBIB.db",
"filename": "{id}_{md5}.{extension}",
"sleep-request": [0, 1.2]
},
"urlshortener": {
"tinyurl": {"root": "https://tinyurl.com"}
}
},

View File

@ -1270,6 +1270,22 @@ Consider all sites to be NSFW unless otherwise known.
<td></td>
</tr>
<tr>
<td colspan="4"><strong>URL Shorteners</strong></td>
</tr>
<tr>
<td>Bitly</td>
<td>https://bit.ly/</td>
<td>Links</td>
<td></td>
</tr>
<tr>
<td>Twitter t.co</td>
<td>https://t.co/</td>
<td>Links</td>
<td></td>
</tr>
<tr>
<td colspan="4"><strong>vichan Imageboards</strong></td>
</tr>

View File

@ -153,6 +153,7 @@ modules = [
"twitter",
"unsplash",
"uploadir",
"urlshortener",
"vanillarock",
"vichan",
"vk",

View File

@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for general-purpose URL shorteners"""
from .common import BaseExtractor, Message
from .. import exception
class UrlshortenerExtractor(BaseExtractor):
"""Base class for URL shortener extractors"""
basecategory = "urlshortener"
INSTANCES = {
"bitly": {
"root": "https://bit.ly",
"pattern": r"bit\.ly",
},
"tco": {
# t.co sends 'http-equiv="refresh"' (200) when using browser UA
"headers": {"User-Agent": None},
"root": "https://t.co",
"pattern": r"t\.co",
},
}
BASE_PATTERN = UrlshortenerExtractor.update(INSTANCES)
class UrlshortenerLinkExtractor(UrlshortenerExtractor):
"""Extractor for general-purpose URL shorteners"""
subcategory = "link"
pattern = BASE_PATTERN + r"/([^/?&#]+)"
test = (
("https://bit.ly/3cWIUgq", {
"count": 1,
"pattern": "^https://gumroad.com/l/storm_b1",
}),
("https://t.co/bCgBY8Iv5n", {
"count": 1,
"pattern": "^https://twitter.com/elonmusk/status/"
"1421395561324896257/photo/1",
}),
("https://t.co/abcdefghij", {
"exception": exception.NotFoundError,
}),
)
def __init__(self, match):
UrlshortenerExtractor.__init__(self, match)
self.id = match.group(match.lastindex)
try:
self.headers = INSTANCES[self.category]["headers"]
except Exception:
self.headers = None
def items(self):
response = self.request(
"{}/{}".format(self.root, self.id), headers=self.headers,
method="HEAD", allow_redirects=False, notfound="URL")
try:
yield Message.Queue, response.headers["location"], {}
except KeyError:
raise exception.StopExtraction("Unable to resolve short URL")

View File

@ -112,6 +112,7 @@ CATEGORY_MAP = {
"subscribestar" : "SubscribeStar",
"tbib" : "The Big ImageBoard",
"tcbscans" : "TCB Scans",
"tco" : "Twitter t.co",
"thatpervert" : "ThatPervert",
"thebarchive" : "The /b/ Archive",
"thecollection" : "The /co/llection",
@ -132,6 +133,7 @@ CATEGORY_MAP = {
}
SUBCATEGORY_MAP = {
"" : "",
"art" : "Art",
"audio" : "Audio",
"doujin" : "Doujin",
@ -266,6 +268,7 @@ BASE_MAP = {
"lynxchan" : "LynxChan Imageboards",
"moebooru" : "Moebooru and MyImouto",
"szurubooru" : "szurubooru Instances",
"urlshortener": "URL Shorteners",
"vichan" : "vichan Imageboards",
}