1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 20:22:36 +01:00

implement decorator for cloudflare bypass

this method for enabling and caching a cloudflare bypass for a
requests.session object allows for different cache-timeouts for
different domains
This commit is contained in:
Mike Fährmann 2016-11-20 18:05:49 +01:00
parent 6e98538d36
commit 9e3788175e
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88
3 changed files with 20 additions and 7 deletions

View File

@ -14,13 +14,20 @@ import urllib.parse
from . import text
from .cache import cache
def bypass(url, maxage):
def decorator(func):
solve = cache(maxage=maxage, keyarg=1)(solve_challenge)
def wrap(self, *args):
self.session.cookies = solve(self.session, url)
return func(self, *args)
return wrap
return decorator
def bypass_ddos_protection(session, url):
"""Prepare a requests.session to access 'url' behind Cloudflare protection"""
session.cookies = solve_challenge(session, url)
return session
# TODO: this is only a temporary workaround for readcomiconline.to
@cache(maxage=30*60, keyarg=1)
def solve_challenge(session, url):
session.headers["Referer"] = url
page = session.get(url).text
@ -30,7 +37,8 @@ def solve_challenge(session, url):
))[0]
params["jschl_answer"] = solve_jschl(url, page)
time.sleep(4)
session.get(urllib.parse.urljoin(url, "/cdn-cgi/l/chk_jschl"), params=params)
url = urllib.parse.urljoin(url, "/cdn-cgi/l/chk_jschl")
session.get(url, params=params)
return session.cookies
def solve_jschl(url, page):
@ -50,7 +58,7 @@ def solve_jschl(url, page):
value = evaluate_expression(expr[vlength+2:])
solution = func(solution, value)
elif expr.startswith("a.value"):
return solution + len(urllib.parse.urlparse(url).netloc)
return solution + len(urllib.parse.urlsplit(url).netloc)
def evaluate_expression(expr):
"""Evaluate a Javascript expression for the challange and return its value"""

View File

@ -22,6 +22,9 @@ class KissmangaExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self)
self.url = match.group(0)
self.session.headers["Referer"] = self.url_base
request = cloudflare.bypass(url_base, 24*60*60)(Extractor.request)
class KissmangaMangaExtractor(KissmangaExtractor):
@ -33,7 +36,6 @@ class KissmangaMangaExtractor(KissmangaExtractor):
})]
def items(self):
cloudflare.bypass_ddos_protection(self.session, self.url_base)
yield Message.Version, 1
for chapter in self.get_chapters():
yield Message.Queue, self.url_base + chapter
@ -62,7 +64,6 @@ class KissmangaChapterExtractor(KissmangaExtractor):
]
def items(self):
cloudflare.bypass_ddos_protection(self.session, self.url_base)
page = self.request(self.url).text
data = self.get_job_metadata(page)
imgs = self.get_image_urls(page)

View File

@ -9,7 +9,7 @@
"""Extract comic-issues and entire comics from http://readcomiconline.to/"""
from .common import Extractor
from .. import text
from .. import text, cloudflare
from . import kissmanga
import re
@ -23,6 +23,10 @@ class ReadcomiconlineExtractor(Extractor):
def __init__(self, match):
Extractor.__init__(self)
self.url = match.group(0)
self.session.headers["Referer"] = self.url_base
request = cloudflare.bypass(url_base, 30*60)(Extractor.request)
class ReadcomiconlineComicExtractor(ReadcomiconlineExtractor,