1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-23 03:02:50 +01:00

support redirects on 4chan archives

This commit is contained in:
Mike Fährmann 2017-07-14 13:24:09 +02:00
parent 98464d1f1b
commit 30d3a5f9b2
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
2 changed files with 18 additions and 10 deletions

View File

@ -75,12 +75,12 @@ class FutabaThreadExtractor(Extractor):
@staticmethod
def _extract_post(post):
return text.extract_all(post, (
("no" , 'name="', '"'),
("post" , '<b>', '</b>'),
("name" , '<b>', ' </b>'),
("now" , '</font> ', ' '),
(None , '<blockquote', ''),
("com" , '>', '</blockquote>'),
("no" , 'name="', '"'),
("post", '<b>', '</b>'),
("name", '<b>', ' </b>'),
("now" , '</font> ', ' '),
(None , '<blockquote', ''),
("com" , '>', '</blockquote>'),
))[0]
@staticmethod

View File

@ -10,6 +10,7 @@
from .common import Extractor, Message
from .. import text
import itertools
class ChanThreadExtractor(Extractor):
@ -76,13 +77,22 @@ class FoolfuukaThreadExtractor(Extractor):
def items(self):
op = True
yield Message.Version, 1
yield Message.Headers, self.session.headers
for post in self.posts():
if op:
yield Message.Directory, post
op = False
if not post["media"]:
continue
url = post["media"]["media_link"]
media = post["media"]
url = media["media_link"]
if not url and "remote_media_link" in media:
needle = '<meta http-equiv="Refresh" content="0; url='
page = self.request(media["remote_media_link"]).text
url = text.extract(page, needle, '"')[0]
post["extension"] = url.rpartition(".")[2]
yield Message.Url, url, post
@ -90,6 +100,4 @@ class FoolfuukaThreadExtractor(Extractor):
url = self.root + "/_/api/chan/thread/"
params = {"board": self.board, "num": self.thread}
data = self.request(url, params=params).json()[self.thread]
yield data["op"]
yield from data["posts"].values()
return itertools.chain((data["op"],), data["posts"].values())