# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://desktopography.net/"""
from .common import Extractor, Message
from .. import text
BASE_PATTERN = r"(?:https?://)?desktopography\.net"
class DesktopographyExtractor(Extractor):
"""Base class for desktopography extractors"""
category = "desktopography"
archive_fmt = "{filename}"
root = "https://desktopography.net"
class DesktopographySiteExtractor(DesktopographyExtractor):
"""Extractor for all desktopography exhibitions """
subcategory = "site"
pattern = BASE_PATTERN + r"/$"
example = "https://desktopography.net/"
def items(self):
page = self.request(self.root).text
data = {"_extractor": DesktopographyExhibitionExtractor}
for exhibition_year in text.extract_iter(
page,
''):
url = self.root + "/exhibition-" + exhibition_year + "/"
yield Message.Queue, url, data
class DesktopographyExhibitionExtractor(DesktopographyExtractor):
"""Extractor for a yearly desktopography exhibition"""
subcategory = "exhibition"
pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/"
example = "https://desktopography.net/exhibition-2020/"
def __init__(self, match):
DesktopographyExtractor.__init__(self, match)
self.year = match.group(1)
def items(self):
url = "{}/exhibition-{}/".format(self.root, self.year)
base_entry_url = "https://desktopography.net/portfolios/"
page = self.request(url).text
data = {
"_extractor": DesktopographyEntryExtractor,
"year": self.year,
}
for entry_url in text.extract_iter(
page,
''):
url = base_entry_url + entry_url
yield Message.Queue, url, data
class DesktopographyEntryExtractor(DesktopographyExtractor):
"""Extractor for all resolutions of a desktopography wallpaper"""
subcategory = "entry"
pattern = BASE_PATTERN + r"/portfolios/([\w-]+)"
example = "https://desktopography.net/portfolios/NAME/"
def __init__(self, match):
DesktopographyExtractor.__init__(self, match)
self.entry = match.group(1)
def items(self):
url = "{}/portfolios/{}".format(self.root, self.entry)
page = self.request(url).text
entry_data = {"entry": self.entry}
yield Message.Directory, entry_data
for image_data in text.extract_iter(
page,
''):
path, _, filename = image_data.partition(
'" class="wallpaper-button" download="')
text.nameext_from_url(filename, entry_data)
yield Message.Url, self.root + path, entry_data