[urlgalleries] add support

2024-11-22 02:32:33 +01:00 · 2023-12-05 07:07:06 -05:00 · 2023-12-05 07:07:06 -05:00 · 1770c31e63
commit 1770c31e63
parent 4dde36889c
3 changed files with 50 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -889,6 +889,12 @@ Consider all sites to be NSFW unless otherwise known.
    <td>Files</td>
    <td></td>
 </tr>
+<tr>
+    <td>Urlgalleries</td>
+    <td>https://urlgalleries.net/</td>
+    <td>Galleries</td>
+    <td></td>
+</tr>
 <tr>
    <td>Vipergirls</td>
    <td>https://vipergirls.to/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -155,6 +155,7 @@ modules = [
    "tumblrgallery",
    "twibooru",
    "twitter",
+    "urlgalleries",
    "unsplash",
    "uploadir",
    "urlshortener",
--- a/gallery_dl/extractor/urlgalleries.py
+++ b/gallery_dl/extractor/urlgalleries.py
@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://urlgalleries.net/"""
+
+from .common import GalleryExtractor
+from .. import text
+
+
+class UrlgalleriesExtractor(GalleryExtractor):
+    """Base class for Urlgalleries extractors"""
+    category = "urlgalleries"
+    root = "urlgalleries.net"
+    directory_fmt = ("{category}", "{title}")
+    pattern = r"(?:https?://)([^/?#]+)?\.urlgalleries\.net/([^/?#]+)/([^/?#]+)"
+    example = "https://blog.urlgalleries.net/gallery-1234567/a-title--1234"
+
+    def __init__(self, match):
+        self.blog = match.group(1)
+        self.gallery_id = match.group(2)
+        self.title = match.group(3)
+        url = "{}.urlgalleries.net/{}/{}&a=10000".format(
+            self.blog, self.gallery_id, self.title)
+        GalleryExtractor.__init__(self, match, text.ensure_http_scheme(url))
+
+    def images(self, page):
+        extr = text.extr(page, 'id="wtf"', "</div>")
+        url = "{}{{}}".format(self.root).format
+        return [
+            (text.ensure_http_scheme(url(i)), None)
+            for i in text.extract_iter(extr, "href='", "'")
+        ]
+
+    def metadata(self, page):
+        date = text.extr(
+            page, "float:left;'>  ", '</div>').split(" | ")[-1]
+        return {
+            'title': self.title,
+            'date': text.parse_datetime(date, format='%B %d, %Y T%H:%M')
+        }