2021-09-17 19:59:51 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License version 2 as
|
|
|
|
# published by the Free Software Foundation.
|
|
|
|
|
2021-09-17 20:09:24 +02:00
|
|
|
"""Extractors for https://desktopography.net/"""
|
2021-09-17 19:59:51 +02:00
|
|
|
|
|
|
|
from .common import Extractor, Message
|
|
|
|
from .. import text
|
|
|
|
|
|
|
|
BASE_PATTERN = r"(?:https?://)?desktopography\.net"
|
|
|
|
|
|
|
|
|
|
|
|
class DesktopographyExtractor(Extractor):
|
|
|
|
"""Base class for desktopography extractors"""
|
|
|
|
category = "desktopography"
|
|
|
|
archive_fmt = "{filename}"
|
|
|
|
root = "https://desktopography.net"
|
|
|
|
|
|
|
|
|
2021-09-17 20:09:24 +02:00
|
|
|
class DesktopographySiteExtractor(DesktopographyExtractor):
|
|
|
|
"""Extractor for all desktopography exhibitions """
|
|
|
|
subcategory = "site"
|
|
|
|
pattern = BASE_PATTERN + r"/$"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = "https://desktopography.net/"
|
2021-09-17 19:59:51 +02:00
|
|
|
|
2021-09-17 20:09:24 +02:00
|
|
|
def items(self):
|
|
|
|
page = self.request(self.root).text
|
|
|
|
data = {"_extractor": DesktopographyExhibitionExtractor}
|
2021-09-17 19:59:51 +02:00
|
|
|
|
2021-09-17 20:09:24 +02:00
|
|
|
for exhibition_year in text.extract_iter(
|
2021-09-17 19:59:51 +02:00
|
|
|
page,
|
|
|
|
'<a href="https://desktopography.net/exhibition-',
|
2021-09-17 20:09:24 +02:00
|
|
|
'/">'):
|
2021-09-17 19:59:51 +02:00
|
|
|
|
2021-09-17 20:09:24 +02:00
|
|
|
url = self.root + "/exhibition-" + exhibition_year + "/"
|
|
|
|
yield Message.Queue, url, data
|
2021-09-17 19:59:51 +02:00
|
|
|
|
|
|
|
|
|
|
|
class DesktopographyExhibitionExtractor(DesktopographyExtractor):
|
2021-09-17 20:09:24 +02:00
|
|
|
"""Extractor for a yearly desktopography exhibition"""
|
2021-09-17 19:59:51 +02:00
|
|
|
subcategory = "exhibition"
|
2021-09-17 20:09:24 +02:00
|
|
|
pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = "https://desktopography.net/exhibition-2020/"
|
2021-09-17 19:59:51 +02:00
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
DesktopographyExtractor.__init__(self, match)
|
|
|
|
self.year = match.group(1)
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
url = "{}/exhibition-{}/".format(self.root, self.year)
|
|
|
|
base_entry_url = "https://desktopography.net/portfolios/"
|
|
|
|
page = self.request(url).text
|
|
|
|
|
2021-09-17 20:09:24 +02:00
|
|
|
data = {
|
|
|
|
"_extractor": DesktopographyEntryExtractor,
|
|
|
|
"year": self.year,
|
|
|
|
}
|
|
|
|
|
|
|
|
for entry_url in text.extract_iter(
|
2021-09-17 19:59:51 +02:00
|
|
|
page,
|
|
|
|
'<a class="overlay-background" href="' + base_entry_url,
|
2021-09-17 20:09:24 +02:00
|
|
|
'">'):
|
2021-09-17 19:59:51 +02:00
|
|
|
|
2021-09-17 20:09:24 +02:00
|
|
|
url = base_entry_url + entry_url
|
|
|
|
yield Message.Queue, url, data
|
2021-09-17 19:59:51 +02:00
|
|
|
|
|
|
|
|
|
|
|
class DesktopographyEntryExtractor(DesktopographyExtractor):
|
|
|
|
"""Extractor for all resolutions of a desktopography wallpaper"""
|
|
|
|
subcategory = "entry"
|
2021-09-17 20:09:24 +02:00
|
|
|
pattern = BASE_PATTERN + r"/portfolios/([\w-]+)"
|
2023-09-11 16:30:55 +02:00
|
|
|
example = "https://desktopography.net/portfolios/NAME/"
|
2021-09-17 19:59:51 +02:00
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
DesktopographyExtractor.__init__(self, match)
|
|
|
|
self.entry = match.group(1)
|
|
|
|
|
|
|
|
def items(self):
|
|
|
|
url = "{}/portfolios/{}".format(self.root, self.entry)
|
|
|
|
page = self.request(url).text
|
|
|
|
|
2021-09-17 20:09:24 +02:00
|
|
|
entry_data = {"entry": self.entry}
|
2021-09-17 19:59:51 +02:00
|
|
|
yield Message.Directory, entry_data
|
|
|
|
|
2021-09-17 20:09:24 +02:00
|
|
|
for image_data in text.extract_iter(
|
2021-09-17 19:59:51 +02:00
|
|
|
page,
|
|
|
|
'<a target="_blank" href="https://desktopography.net',
|
2021-09-17 20:09:24 +02:00
|
|
|
'">'):
|
2021-09-17 19:59:51 +02:00
|
|
|
|
2021-09-17 20:09:24 +02:00
|
|
|
path, _, filename = image_data.partition(
|
|
|
|
'" class="wallpaper-button" download="')
|
|
|
|
text.nameext_from_url(filename, entry_data)
|
|
|
|
yield Message.Url, self.root + path, entry_data
|