[urlgalleries] add support

2024-11-25 12:12:34 +01:00 · 2023-12-05 07:07:06 -05:00 · 2023-12-05 07:07:06 -05:00 · 1770c31e63
commit 1770c31e63
parent 4dde36889c
3 changed files with 50 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -889,6 +889,12 @@ Consider all sites to be NSFW unless otherwise known.
    <td>Files</td>
    <td></td>
 </tr>
 <tr>
    <td>Urlgalleries</td>
    <td>https://urlgalleries.net/</td>
    <td>Galleries</td>
    <td></td>
 </tr>
 <tr>
    <td>Vipergirls</td>
    <td>https://vipergirls.to/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -155,6 +155,7 @@ modules = [
    "tumblrgallery",
    "twibooru",
    "twitter",
    "urlgalleries",
    "unsplash",
    "uploadir",
    "urlshortener",
--- a/gallery_dl/extractor/urlgalleries.py
+++ b/gallery_dl/extractor/urlgalleries.py
@ -0,0 +1,43 @@
 # -*- coding: utf-8 -*-
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.
 """Extractors for https://urlgalleries.net/"""
 from .common import GalleryExtractor
 from .. import text
 class UrlgalleriesExtractor(GalleryExtractor):
    """Base class for Urlgalleries extractors"""
    category = "urlgalleries"
    root = "urlgalleries.net"
    directory_fmt = ("{category}", "{title}")
    pattern = r"(?:https?://)([^/?#]+)?\.urlgalleries\.net/([^/?#]+)/([^/?#]+)"
    example = "https://blog.urlgalleries.net/gallery-1234567/a-title--1234"
    def __init__(self, match):
        self.blog = match.group(1)
        self.gallery_id = match.group(2)
        self.title = match.group(3)
        url = "{}.urlgalleries.net/{}/{}&a=10000".format(
            self.blog, self.gallery_id, self.title)
        GalleryExtractor.__init__(self, match, text.ensure_http_scheme(url))
    def images(self, page):
        extr = text.extr(page, 'id="wtf"', "</div>")
        url = "{}{{}}".format(self.root).format
        return [
            (text.ensure_http_scheme(url(i)), None)
            for i in text.extract_iter(extr, "href='", "'")
        ]
    def metadata(self, page):
        date = text.extr(
            page, "float:left;'>  ", '</div>').split(" | ")[-1]
        return {
            'title': self.title,
            'date': text.parse_datetime(date, format='%B %d, %Y T%H:%M')
        }