1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 18:53:21 +01:00
gallery-dl/gallery_dl/cloudflare.py

85 lines
2.6 KiB
Python
Raw Normal View History

2015-11-07 02:30:08 +01:00
# -*- coding: utf-8 -*-
# Copyright 2015, 2016 Mike Fährmann
2015-11-07 02:30:08 +01:00
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
2015-11-07 13:06:23 +01:00
"""Methods to access sites behind Cloudflare protection"""
2015-11-07 02:30:08 +01:00
import time
import operator
2015-11-07 13:06:23 +01:00
import urllib.parse
2015-11-07 02:30:08 +01:00
from . import text
from .cache import cache
2015-11-07 02:30:08 +01:00
def bypass_ddos_protection(session, url):
2015-11-07 13:06:23 +01:00
"""Prepare a requests.session to access 'url' behind Cloudflare protection"""
session.cookies = solve_challenge(session, url)
return session
@cache(maxage=24*60*60, keyarg=1)
def solve_challenge(session, url):
2015-11-07 02:30:08 +01:00
session.headers["Referer"] = url
page = session.get(url).text
params = text.extract_all(page, (
('jschl_vc', 'name="jschl_vc" value="', '"'),
('pass' , 'name="pass" value="', '"'),
))[0]
2015-11-07 13:06:23 +01:00
params["jschl_answer"] = solve_jschl(url, page)
2015-11-07 02:30:08 +01:00
time.sleep(4)
2015-11-07 13:06:23 +01:00
session.get(urllib.parse.urljoin(url, "/cdn-cgi/l/chk_jschl"), params=params)
return session.cookies
2015-11-07 02:30:08 +01:00
2015-11-07 13:06:23 +01:00
def solve_jschl(url, page):
"""Solve challenge to get 'jschl_answer' value"""
2015-11-07 02:30:08 +01:00
data, pos = text.extract_all(page, (
('var' , 'var t,r,a,f, ', '='),
('key' , '"', '"'),
('expr', ':', '}')
))
2015-11-07 13:06:23 +01:00
solution = evaluate_expression(data["expr"])
2015-11-07 02:30:08 +01:00
variable = "{}.{}".format(data["var"], data["key"])
vlength = len(variable)
expressions = text.extract(page, "'challenge-form');", "f.submit();", pos)[0]
for expr in expressions.split(";")[1:]:
if expr.startswith(variable):
func = operator_functions[expr[vlength]]
2015-11-07 13:06:23 +01:00
value = evaluate_expression(expr[vlength+2:])
2015-11-07 02:30:08 +01:00
solution = func(solution, value)
elif expr.startswith("a.value"):
2015-11-07 13:06:23 +01:00
return solution + len(urllib.parse.urlparse(url).netloc)
2015-11-07 02:30:08 +01:00
2015-11-07 13:06:23 +01:00
def evaluate_expression(expr):
"""Evaluate a Javascript expression for the challange and return its value"""
2015-11-07 02:30:08 +01:00
stack = []
ranges = []
value = ""
for index, char in enumerate(expr):
if char == "(":
stack.append(index+1)
elif char == ")":
begin = stack.pop()
if stack:
ranges.append((begin, index))
for subexpr in [expr[begin:end] for begin, end in ranges] or (expr,):
num = 0
for part in subexpr.split("[]"):
num += expression_values[part]
value += str(num)
return int(value)
operator_functions = {
"+": operator.add,
"-": operator.sub,
"*": operator.mul,
}
expression_values = {
"": 0,
"+": 0,
"!+": 1,
"+!!": 1,
}