[pixhost] add extractor

2024-11-22 18:53:21 +01:00 · 2016-11-09 12:03:14 +01:00 · 2016-11-09 12:03:14 +01:00 · 88193718e8
commit 88193718e8
parent 07e9e2c4f1
1 changed files with 18 additions and 1 deletions
--- a/gallery_dl/extractor/imagehosts.py
+++ b/gallery_dl/extractor/imagehosts.py
@ -20,6 +20,7 @@ class ImagehostImageExtractor(Extractor):
    https = False
    method = "post"
    params = "simple"
+    cookies = None

    def __init__(self, match):
        Extractor.__init__(self)
@ -42,7 +43,8 @@ class ImagehostImageExtractor(Extractor):
            self.method = "get"

    def items(self):
-        page = self.request(self.url, method=self.method, data=self.params).text
+        page = self.request(self.url, method=self.method, data=self.params,
+                            cookies=self.cookies).text
        url, filename = self.get_info(page)
        data = text.nameext_from_url(filename, {"token": self.token})
        if self.https and url.startswith("http:"):
@ -249,6 +251,21 @@ class ImgtrexImageExtractor(ImagehostImageExtractor):
        return url, filename


+class PixhostImageExtractor(ImagehostImageExtractor):
+    """Extractor for single images from pixhost.org"""
+    category = "pixhost"
+    pattern = [(r"(?:https?://)?((?:www\.)?pixhost\.org/show/"
+                r"\d+/(\d+)_[^/]+)")]
+    params = None
+    cookies = {"pixhostads": "1", "pixhosttest": "1"}
+
+    def get_info(self, page):
+        filename, pos = text.extract(page, '<div id="text">\n<h2>', '</h2>')
+        url     , pos = text.extract(page, '<img id="show_image" src="', '"', pos)
+        pos = filename.find("_")
+        return url, filename[pos+1:] if pos != -1 else url
+
+
 class TurboimagehostImageExtractor(ImagehostImageExtractor):
    """Extractor for single images from turboimagehost.com"""
    category = "turboimagehost"