1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-25 12:12:34 +01:00
gallery-dl/gallery_dl/extractor/desktopography.py

96 lines
3.1 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
2021-09-17 20:09:24 +02:00
"""Extractors for https://desktopography.net/"""
from .common import Extractor, Message
from .. import text
BASE_PATTERN = r"(?:https?://)?desktopography\.net"
class DesktopographyExtractor(Extractor):
"""Base class for desktopography extractors"""
category = "desktopography"
archive_fmt = "{filename}"
root = "https://desktopography.net"
2021-09-17 20:09:24 +02:00
class DesktopographySiteExtractor(DesktopographyExtractor):
"""Extractor for all desktopography exhibitions """
subcategory = "site"
pattern = BASE_PATTERN + r"/$"
example = "https://desktopography.net/"
2021-09-17 20:09:24 +02:00
def items(self):
page = self.request(self.root).text
data = {"_extractor": DesktopographyExhibitionExtractor}
2021-09-17 20:09:24 +02:00
for exhibition_year in text.extract_iter(
page,
'<a href="https://desktopography.net/exhibition-',
2021-09-17 20:09:24 +02:00
'/">'):
2021-09-17 20:09:24 +02:00
url = self.root + "/exhibition-" + exhibition_year + "/"
yield Message.Queue, url, data
class DesktopographyExhibitionExtractor(DesktopographyExtractor):
2021-09-17 20:09:24 +02:00
"""Extractor for a yearly desktopography exhibition"""
subcategory = "exhibition"
2021-09-17 20:09:24 +02:00
pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/"
example = "https://desktopography.net/exhibition-2020/"
def __init__(self, match):
DesktopographyExtractor.__init__(self, match)
self.year = match.group(1)
def items(self):
url = "{}/exhibition-{}/".format(self.root, self.year)
base_entry_url = "https://desktopography.net/portfolios/"
page = self.request(url).text
2021-09-17 20:09:24 +02:00
data = {
"_extractor": DesktopographyEntryExtractor,
"year": self.year,
}
for entry_url in text.extract_iter(
page,
'<a class="overlay-background" href="' + base_entry_url,
2021-09-17 20:09:24 +02:00
'">'):
2021-09-17 20:09:24 +02:00
url = base_entry_url + entry_url
yield Message.Queue, url, data
class DesktopographyEntryExtractor(DesktopographyExtractor):
"""Extractor for all resolutions of a desktopography wallpaper"""
subcategory = "entry"
2021-09-17 20:09:24 +02:00
pattern = BASE_PATTERN + r"/portfolios/([\w-]+)"
example = "https://desktopography.net/portfolios/NAME/"
def __init__(self, match):
DesktopographyExtractor.__init__(self, match)
self.entry = match.group(1)
def items(self):
url = "{}/portfolios/{}".format(self.root, self.entry)
page = self.request(url).text
2021-09-17 20:09:24 +02:00
entry_data = {"entry": self.entry}
yield Message.Directory, entry_data
2021-09-17 20:09:24 +02:00
for image_data in text.extract_iter(
page,
'<a target="_blank" href="https://desktopography.net',
2021-09-17 20:09:24 +02:00
'">'):
2021-09-17 20:09:24 +02:00
path, _, filename = image_data.partition(
'" class="wallpaper-button" download="')
text.nameext_from_url(filename, entry_data)
yield Message.Url, self.root + path, entry_data