From af61d2b03702c19c4ef8b2e9c3f129c75b9fcc9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 10 Feb 2024 03:00:58 +0100 Subject: [PATCH] [wikimedia] combine most wikimedia.org sites (#1443) add wikidata.org and wikivoyage.org --- docs/supportedsites.md | 42 +++---------------------------- gallery_dl/extractor/wikimedia.py | 41 ++++++------------------------ scripts/supportedsites.py | 3 ++- test/results/wikidata.py | 23 +++++++++++++++++ test/results/wikivoyage.py | 23 +++++++++++++++++ 5 files changed, 59 insertions(+), 73 deletions(-) create mode 100644 test/results/wikidata.py create mode 100644 test/results/wikivoyage.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 869c003d..07aff3d9 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -124,7 +124,7 @@ Consider all listed sites to potentially be NSFW. Bluesky https://bsky.app/ - Likes, Media Files, Posts, Replies, User Profiles + Feeds, Followed Users, Likes, Lists, Media Files, Posts, Replies, User Profiles Supported @@ -1470,44 +1470,8 @@ Consider all listed sites to potentially be NSFW. Wikimedia Instances - Wikipedia - https://www.wikipedia.org/ - Articles - - - - Wiktionary - https://www.wiktionary.org/ - Articles - - - - Wikiquote - https://www.wikiquote.org/ - Articles - - - - Wikibooks - https://www.wikibooks.org/ - Articles - - - - Wikisource - https://www.wikisource.org/ - Articles - - - - Wikinews - https://www.wikinews.org/ - Articles - - - - Wikiversity - https://www.wikiversity.org/ + Wikimedia + https://www.wikimedia.org/ Articles diff --git a/gallery_dl/extractor/wikimedia.py b/gallery_dl/extractor/wikimedia.py index 44a462e3..7f4b763d 100644 --- a/gallery_dl/extractor/wikimedia.py +++ b/gallery_dl/extractor/wikimedia.py @@ -25,7 +25,9 @@ class WikimediaExtractor(BaseExtractor): BaseExtractor.__init__(self, match) path = match.group(match.lastindex) - if self.category == "fandom": + if self.category == "wikimedia": + self.category = self.root.split(".")[-2] + elif self.category == "fandom": self.category = \ "fandom-" + self.root.partition(".")[0].rpartition("/")[2] @@ -119,39 +121,12 @@ class WikimediaExtractor(BaseExtractor): BASE_PATTERN = WikimediaExtractor.update({ - "wikipedia": { + "wikimedia": { "root": None, - "pattern": r"[a-z]{2,}\.wikipedia\.org", - "api-path": "/w/api.php", - }, - "wiktionary": { - "root": None, - "pattern": r"[a-z]{2,}\.wiktionary\.org", - "api-path": "/w/api.php", - }, - "wikiquote": { - "root": None, - "pattern": r"[a-z]{2,}\.wikiquote\.org", - "api-path": "/w/api.php", - }, - "wikibooks": { - "root": None, - "pattern": r"[a-z]{2,}\.wikibooks\.org", - "api-path": "/w/api.php", - }, - "wikisource": { - "root": None, - "pattern": r"[a-z]{2,}\.wikisource\.org", - "api-path": "/w/api.php", - }, - "wikinews": { - "root": None, - "pattern": r"[a-z]{2,}\.wikinews\.org", - "api-path": "/w/api.php", - }, - "wikiversity": { - "root": None, - "pattern": r"[a-z]{2,}\.wikiversity\.org", + "pattern": r"[a-z]{2,}\." + r"wik(?:i(?:pedia|quote|books|source|news|versity|data" + r"|voyage)|tionary)" + r"\.org", "api-path": "/w/api.php", }, "wikispecies": { diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 68db90e9..98a23234 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -325,7 +325,8 @@ BASE_MAP = { } URL_MAP = { - "blogspot": "https://www.blogger.com/", + "blogspot" : "https://www.blogger.com/", + "wikimedia": "https://www.wikimedia.org/", } _OAUTH = 'OAuth' diff --git a/test/results/wikidata.py b/test/results/wikidata.py new file mode 100644 index 00000000..c0e2eb6a --- /dev/null +++ b/test/results/wikidata.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import wikimedia + + +__tests__ = ( +{ + "#url" : "https://www.wikidata.org/wiki/Title", + "#category": ("wikimedia", "wikidata", "article"), + "#class" : wikimedia.WikimediaArticleExtractor, +}, + +{ + "#url" : "https://en.wikidata.org/wiki/Category:Title", + "#category": ("wikimedia", "wikidata", "category"), + "#class" : wikimedia.WikimediaArticleExtractor, +}, + +) diff --git a/test/results/wikivoyage.py b/test/results/wikivoyage.py new file mode 100644 index 00000000..860e68de --- /dev/null +++ b/test/results/wikivoyage.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import wikimedia + + +__tests__ = ( +{ + "#url" : "https://www.wikivoyage.org/wiki/Title", + "#category": ("wikimedia", "wikivoyage", "article"), + "#class" : wikimedia.WikimediaArticleExtractor, +}, + +{ + "#url" : "https://en.wikivoyage.org/wiki/Category:Title", + "#category": ("wikimedia", "wikivoyage", "category"), + "#class" : wikimedia.WikimediaArticleExtractor, +}, + +)