1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

Merge branch 'mikf:master' into master

This commit is contained in:
Fannovel16 2023-09-21 21:36:17 +07:00 committed by GitHub
commit 9e5b2ef10e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
445 changed files with 20117 additions and 10139 deletions

View File

@ -32,8 +32,7 @@ jobs:
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install "flake8<4" "importlib-metadata<5"
pip install youtube-dl
pip install flake8 youtube-dl
- name: Install yt-dlp
run: |
@ -53,7 +52,14 @@ jobs:
- name: Lint with flake8
run: |
flake8 .
case "${{ matrix.python-version }}" in
3.4|3.5|3.6|3.7)
flake8 --extend-exclude scripts/export_tests.py .
;;
*)
flake8 .
;;
esac
- name: Run tests
run: |

View File

@ -390,7 +390,6 @@ Description
* ``e621`` (*)
* ``e926`` (*)
* ``exhentai``
* ``gfycat``
* ``idolcomplex``
* ``imgbb``
* ``inkbunny``
@ -534,7 +533,7 @@ extractor.*.user-agent
Type
``string``
Default
``"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0"``
``"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0"``
Description
User-Agent header value to be used for HTTP requests.
@ -566,6 +565,21 @@ Description
browser would use HTTP/2.
extractor.*.referer
-------------------
Type
* ``bool``
* ``string``
Default
``true``
Description
Send `Referer <https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referer>`__
headers with all outgoing HTTP requests.
If this is a ``string``, send it as Referer
instead of the extractor's ``root`` domain.
extractor.*.headers
-------------------
Type
@ -577,7 +591,8 @@ Default
"User-Agent" : "<extractor.*.user-agent>",
"Accept" : "*/*",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate"
"Accept-Encoding": "gzip, deflate",
"Referer" : "<extractor.*.referer>"
}
Description
@ -714,7 +729,7 @@ Type
Default
``["oauth", "recursive", "test"]`` + current extractor category
Example
``["imgur", "gfycat:user", "*:image"]``
``["imgur", "redgifs:user", "*:image"]``
Description
A list of extractor identifiers to ignore (or allow)
when spawning child extractors for unknown URLs,
@ -723,7 +738,7 @@ Description
Each identifier can be
* A category or basecategory name (``"imgur"``, ``"mastodon"``)
* | A (base)category-subcategory pair, where both names are separated by a colon (``"gfycat:user"``).
* | A (base)category-subcategory pair, where both names are separated by a colon (``"redgifs:user"``).
| Both names can be a `*` or left empty, matching all possible names (``"*:image"``, ``":user"``).
Note: Any ``blacklist`` setting will automatically include
@ -1475,6 +1490,22 @@ Description
* ``"exhentai.org"``: Use ``exhentai.org`` for all URLs
extractor.exhentai.fav
----------------------
Type
``string``
Example
``"4"``
Description
After downloading a gallery,
add it to your account's favorites as the given category number.
Note: Set this to `"favdel"` to remove galleries from your favorites.
Note: This will remove any Favorite Notes when applied
to already favorited galleries.
extractor.exhentai.limits
-------------------------
Type
@ -1690,29 +1721,6 @@ Description
even ones without a ``generic:`` prefix.
extractor.gfycat.format
-----------------------
Type
* ``string``
* ``list`` of ``strings``
Default
``["mp4", "webm", "mobile", "gif"]``
Description
List of names of the preferred animation format, which can be
``"mp4"``,
``"webm"``,
``"mobile"``,
``"gif"``, or
``"webp"``.
If a selected format is not available, the next one in the list will be
tried until an available format is found.
If the format is given as ``string``, it will be extended with
``["mp4", "webm", "mobile", "gif"]``. Use a list with one element to
restrict it to only one possible format.
extractor.gofile.api-token
--------------------------
Type
@ -3336,7 +3344,7 @@ extractor.twitter.users
Type
``string``
Default
``"timeline"``
``"user"``
Example
``"https://twitter.com/search?q=from:{legacy[screen_name]}"``
Description
@ -3347,7 +3355,8 @@ Description
Special values:
* ``"timeline"``: ``https://twitter.com/i/user/{rest_id}``
* ``"user"``: ``https://twitter.com/i/user/{rest_id}``
* ``"timeline"``: ``https://twitter.com/id:{rest_id}/timeline``
* ``"tweets"``: ``https://twitter.com/id:{rest_id}/tweets``
* ``"media"``: ``https://twitter.com/id:{rest_id}/media``

View File

@ -165,7 +165,7 @@
"reddit":
{
"#": "only spawn child extractors for links to specific sites",
"whitelist": ["imgur", "redgifs", "gfycat"],
"whitelist": ["imgur", "redgifs"],
"#": "put files from child extractors into the reddit directory",
"parent-directory": true,

View File

@ -10,7 +10,7 @@
"proxy": null,
"skip": true,
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0",
"retries": 4,
"timeout": 30.0,
"verify": true,
@ -125,10 +125,6 @@
"api-key": null,
"user-id": null
},
"gfycat":
{
"format": ["mp4", "webm", "mobile", "gif"]
},
"gofile": {
"api-token": null,
"website-token": null
@ -339,7 +335,7 @@
"text-tweets": false,
"twitpic": false,
"unique": true,
"users": "timeline",
"users": "user",
"videos": true
},
"unsplash":

View File

@ -88,7 +88,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>ArtStation</td>
<td>https://www.artstation.com/</td>
<td>Albums, Artwork Listings, Challenges, individual Images, Likes, Search Results, User Profiles</td>
<td>Albums, Artwork Listings, Challenges, Followed Users, individual Images, Likes, Search Results, User Profiles</td>
<td></td>
</tr>
<tr>
@ -148,7 +148,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>DeviantArt</td>
<td>https://www.deviantart.com/</td>
<td>Collections, Deviations, Favorites, Folders, Galleries, Gallery Searches, Journals, Popular Images, Scraps, Search Results, Sta.sh, Status Updates, Tag Searches, User Profiles, Watches</td>
<td>Collections, Deviations, Favorites, Folders, Followed Users, Galleries, Gallery Searches, Journals, Popular Images, Scraps, Search Results, Sta.sh, Status Updates, Tag Searches, User Profiles, Watches</td>
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
</tr>
<tr>
@ -226,7 +226,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>Fur Affinity</td>
<td>https://www.furaffinity.net/</td>
<td>Favorites, Galleries, Posts, Scraps, Search Results, User Profiles</td>
<td>Favorites, Followed Users, Galleries, Posts, Scraps, Search Results, User Profiles</td>
<td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
</tr>
<tr>
@ -247,12 +247,6 @@ Consider all sites to be NSFW unless otherwise known.
<td>Favorites, Pools, Posts, Redirects, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>Gfycat</td>
<td>https://gfycat.com/</td>
<td>Collections, individual Images, Search Results, User Profiles</td>
<td>Supported</td>
</tr>
<tr>
<td>Gofile</td>
<td>https://gofile.io/</td>
@ -357,7 +351,7 @@ Consider all sites to be NSFW unless otherwise known.
</tr>
<tr>
<td>Imagevenue</td>
<td>https://imagevenue.com/</td>
<td>https://www.imagevenue.com/</td>
<td>individual Images</td>
<td></td>
</tr>
@ -406,13 +400,13 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>Inkbunny</td>
<td>https://inkbunny.net/</td>
<td>Favorites, Pools, Posts, Search Results, User Profiles</td>
<td>Favorites, Followed Users, Pools, Posts, Search Results, User Profiles</td>
<td>Supported</td>
</tr>
<tr>
<td>Instagram</td>
<td>https://www.instagram.com/</td>
<td>Avatars, Collections, Guides, Highlights, Posts, Reels, Saved Posts, Stories, Tag Searches, Tagged Posts, User Profiles</td>
<td>Avatars, Collections, Followed Users, Guides, Highlights, Posts, Reels, Saved Posts, Stories, Tag Searches, Tagged Posts, User Profiles</td>
<td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
</tr>
<tr>
@ -435,7 +429,7 @@ Consider all sites to be NSFW unless otherwise known.
</tr>
<tr>
<td>JPG Fish</td>
<td>https://jpeg.pet/</td>
<td>https://jpg1.su/</td>
<td>Albums, individual Images, User Profiles</td>
<td></td>
</tr>
@ -574,7 +568,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>Newgrounds</td>
<td>https://www.newgrounds.com/</td>
<td>Art, Audio, Favorites, Games, individual Images, Media Files, Movies, Search Results, User Profiles</td>
<td>Art, Audio, Favorites, Followed Users, Games, individual Images, Media Files, Movies, Search Results, User Profiles</td>
<td>Supported</td>
</tr>
<tr>
@ -699,7 +693,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>Pornhub</td>
<td>https://www.pornhub.com/</td>
<td>Galleries, User Profiles</td>
<td>Galleries, Gifs, Photos, User Profiles</td>
<td></td>
</tr>
<tr>
@ -783,7 +777,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>Skeb</td>
<td>https://skeb.jp/</td>
<td>Posts, Search Results, User Profiles</td>
<td>Followed Users, Posts, Search Results, User Profiles</td>
<td></td>
</tr>
<tr>
@ -879,7 +873,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>Twitter</td>
<td>https://twitter.com/</td>
<td>Avatars, Backgrounds, Bookmarks, Events, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets, User Profiles</td>
<td>Avatars, Backgrounds, Bookmarks, Events, Followed Users, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets, User Profiles</td>
<td>Supported</td>
</tr>
<tr>
@ -1126,6 +1120,12 @@ Consider all sites to be NSFW unless otherwise known.
<td>Favorites, Pools, Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>Xbooru</td>
<td>https://xbooru.com/</td>
<td>Favorites, Pools, Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td colspan="4"><strong>jschan Imageboards</strong></td>
@ -1165,19 +1165,19 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>Misskey.io</td>
<td>https://misskey.io/</td>
<td>Favorites, Images from Notes, User Profiles</td>
<td>Favorites, Followed Users, Images from Notes, User Profiles</td>
<td></td>
</tr>
<tr>
<td>Lesbian.energy</td>
<td>https://lesbian.energy/</td>
<td>Favorites, Images from Notes, User Profiles</td>
<td>Favorites, Followed Users, Images from Notes, User Profiles</td>
<td></td>
</tr>
<tr>
<td>Sushi.ski</td>
<td>https://sushi.ski/</td>
<td>Favorites, Images from Notes, User Profiles</td>
<td>Favorites, Followed Users, Images from Notes, User Profiles</td>
<td></td>
</tr>
@ -1477,19 +1477,19 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>mastodon.social</td>
<td>https://mastodon.social/</td>
<td>Bookmarks, Images from Statuses, User Profiles</td>
<td>Bookmarks, Followed Users, Images from Statuses, User Profiles</td>
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
</tr>
<tr>
<td>Pawoo</td>
<td>https://pawoo.net/</td>
<td>Bookmarks, Images from Statuses, User Profiles</td>
<td>Bookmarks, Followed Users, Images from Statuses, User Profiles</td>
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
</tr>
<tr>
<td>baraag</td>
<td>https://baraag.net/</td>
<td>Bookmarks, Images from Statuses, User Profiles</td>
<td>Bookmarks, Followed Users, Images from Statuses, User Profiles</td>
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
</tr>

View File

@ -196,16 +196,15 @@ def main():
elif args.list_extractors:
write = sys.stdout.write
fmt = "{}\n{}\nCategory: {} - Subcategory: {}{}\n\n".format
fmt = ("{}{}\nCategory: {} - Subcategory: {}"
"\nExample : {}\n\n").format
for extr in extractor.extractors():
if not extr.__doc__:
continue
test = next(extr._get_tests(), None)
write(fmt(
extr.__name__, extr.__doc__,
extr.__name__,
"\n" + extr.__doc__ if extr.__doc__ else "",
extr.category, extr.subcategory,
"\nExample : " + test[0] if test else "",
extr.example,
))
elif args.clear_cache:
@ -297,7 +296,7 @@ def main():
return retval
except KeyboardInterrupt:
sys.exit("\nKeyboardInterrupt")
raise SystemExit("\nKeyboardInterrupt")
except BrokenPipeError:
pass
except OSError as exc:

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2017-2019 Mike Fährmann
# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -17,4 +17,4 @@ if __package__ is None and not hasattr(sys, "frozen"):
import gallery_dl
if __name__ == "__main__":
sys.exit(gallery_dl.main())
raise SystemExit(gallery_dl.main())

View File

@ -9,7 +9,6 @@
""" """
import re
import sys
import logging
import operator
from . import util, exception
@ -98,7 +97,7 @@ def action_exit(opts):
pass
def _exit(args):
sys.exit(opts)
raise SystemExit(opts)
return _exit

View File

@ -100,12 +100,12 @@ def load(files=None, strict=False, loads=util.json_loads):
except OSError as exc:
if strict:
log.error(exc)
sys.exit(1)
raise SystemExit(1)
except Exception as exc:
log.error("%s when loading '%s': %s",
exc.__class__.__name__, path, exc)
if strict:
sys.exit(2)
raise SystemExit(2)
else:
if not _config:
_config.update(conf)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2022 Mike Fährmann
# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -20,26 +20,8 @@ class _2chanThreadExtractor(Extractor):
filename_fmt = "{tim}.{extension}"
archive_fmt = "{board}_{thread}_{tim}"
url_fmt = "https://{server}.2chan.net/{board}/src/{filename}"
pattern = r"(?:https?://)?([\w-]+)\.2chan\.net/([^/]+)/res/(\d+)"
test = ("https://dec.2chan.net/70/res/14565.htm", {
"pattern": r"https://dec\.2chan\.net/70/src/\d{13}\.jpg",
"count": ">= 3",
"keyword": {
"board": "70",
"board_name": "新板提案",
"com": str,
"fsize": r"re:\d+",
"name": "名無し",
"no": r"re:1[45]\d\d\d",
"now": r"re:22/../..\(.\)..:..:..",
"post": "無題",
"server": "dec",
"thread": "14565",
"tim": r"re:^\d{13}$",
"time": r"re:^\d{10}$",
"title": "ヒロアカ板"
},
})
pattern = r"(?:https?://)?([\w-]+)\.2chan\.net/([^/?#]+)/res/(\d+)"
example = "https://dec.2chan.net/12/res/12345.htm"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -21,26 +21,7 @@ class _2chenThreadExtractor(Extractor):
filename_fmt = "{time} {filename}.{extension}"
archive_fmt = "{board}_{thread}_{hash}_{time}"
pattern = BASE_PATTERN + r"/([^/?#]+)/(\d+)"
test = (
("https://sturdychan.help/tv/268929", {
"pattern": r"https://sturdychan\.help/assets/images"
r"/src/\w{40}\.\w+$",
"count": ">= 179",
"keyword": {
"board": "tv",
"date": "type:datetime",
"hash": r"re:[0-9a-f]{40}",
"name": "Anonymous",
"no": r"re:\d+",
"thread": "268929",
"time": int,
"title": "「/ttg/ #118: 🇧🇷 edition」",
"url": str,
},
}),
("https://2chen.club/tv/1"),
("https://2chen.moe/jp/303786"),
)
example = "https://sturdychan.help/a/12345/"
def __init__(self, match):
Extractor.__init__(self, match)
@ -101,14 +82,7 @@ class _2chenBoardExtractor(Extractor):
subcategory = "board"
root = "https://sturdychan.help"
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/catalog|/?$)"
test = (
("https://sturdychan.help/co/", {
"pattern": _2chenThreadExtractor.pattern
}),
("https://2chen.moe/co"),
("https://2chen.club/tv"),
("https://2chen.moe/co/catalog"),
)
example = "https://sturdychan.help/a/"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2022 Mike Fährmann
# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -101,20 +101,7 @@ class _35photoUserExtractor(_35photoExtractor):
subcategory = "user"
pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro"
r"/(?!photo_|genre_|tags/|rating/)([^/?#]+)")
test = (
("https://35photo.pro/liya", {
"pattern": r"https://([a-z][0-9]\.)?35photo\.pro"
r"/photos_(main|series)/.*\.jpg",
"count": 9,
}),
("https://35photo.pro/suhoveev", {
# last photo ID (1267028) isn't given as 'photo-id="<id>"
# there are only 23 photos without the last one
"count": ">= 33",
}),
("https://en.35photo.pro/liya"),
("https://ru.35photo.pro/liya"),
)
example = "https://35photo.pro/USER"
def __init__(self, match):
_35photoExtractor.__init__(self, match)
@ -143,11 +130,7 @@ class _35photoTagExtractor(_35photoExtractor):
directory_fmt = ("{category}", "Tags", "{search_tag}")
archive_fmt = "t{search_tag}_{id}_{num}"
pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/tags/([^/?#]+)"
test = ("https://35photo.pro/tags/landscape/", {
"range": "1-25",
"count": 25,
"archive": False,
})
example = "https://35photo.pro/tags/TAG/"
def __init__(self, match):
_35photoExtractor.__init__(self, match)
@ -180,7 +163,7 @@ class _35photoGenreExtractor(_35photoExtractor):
directory_fmt = ("{category}", "Genre", "{genre}")
archive_fmt = "g{genre_id}_{id}_{num}"
pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/genre_(\d+)(/new/)?"
test = ("https://35photo.pro/genre_109/",)
example = "https://35photo.pro/genre_12345/"
def __init__(self, match):
_35photoExtractor.__init__(self, match)
@ -212,24 +195,7 @@ class _35photoImageExtractor(_35photoExtractor):
"""Extractor for individual images from 35photo.pro"""
subcategory = "image"
pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/photo_(\d+)"
test = ("https://35photo.pro/photo_753340/", {
"count": 1,
"keyword": {
"url" : r"re:https://35photo\.pro/photos_main/.*\.jpg",
"id" : 753340,
"title" : "Winter walk",
"description": str,
"tags" : list,
"views" : int,
"favorites" : int,
"score" : int,
"type" : 0,
"date" : "15 авг, 2014",
"user" : "liya",
"user_id" : 20415,
"user_name" : "Liya Mirzaeva",
},
})
example = "https://35photo.pro/photo_12345/"
def __init__(self, match):
_35photoExtractor.__init__(self, match)

View File

@ -27,10 +27,7 @@ class _3dbooruTagExtractor(_3dbooruBase, moebooru.MoebooruTagExtractor):
"""Extractor for images from behoimi.org based on search-tags"""
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org/post"
r"(?:/(?:index)?)?\?tags=(?P<tags>[^&#]+)")
test = ("http://behoimi.org/post?tags=himekawa_azuru+dress", {
"url": "ecb30c6aaaf8a6ff8f55255737a9840832a483c1",
"content": "11cbda40c287e026c1ce4ca430810f761f2d0b2a",
})
example = "http://behoimi.org/post?tags=TAG"
def posts(self):
params = {"tags": self.tags}
@ -40,10 +37,7 @@ class _3dbooruTagExtractor(_3dbooruBase, moebooru.MoebooruTagExtractor):
class _3dbooruPoolExtractor(_3dbooruBase, moebooru.MoebooruPoolExtractor):
"""Extractor for image-pools from behoimi.org"""
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P<pool>\d+)"
test = ("http://behoimi.org/pool/show/27", {
"url": "da75d2d1475449d5ef0c266cb612683b110a30f2",
"content": "fd5b37c5c6c2de4b4d6f1facffdefa1e28176554",
})
example = "http://behoimi.org/pool/show/12345"
def posts(self):
params = {"tags": "pool:" + self.pool_id}
@ -53,17 +47,7 @@ class _3dbooruPoolExtractor(_3dbooruBase, moebooru.MoebooruPoolExtractor):
class _3dbooruPostExtractor(_3dbooruBase, moebooru.MoebooruPostExtractor):
"""Extractor for single images from behoimi.org"""
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P<post>\d+)"
test = ("http://behoimi.org/post/show/140852", {
"url": "ce874ea26f01d6c94795f3cc3aaaaa9bc325f2f6",
"content": "26549d55b82aa9a6c1686b96af8bfcfa50805cd4",
"options": (("tags", True),),
"keyword": {
"tags_character": "furude_rika",
"tags_copyright": "higurashi_no_naku_koro_ni",
"tags_model": "himekawa_azuru",
"tags_general": str,
},
})
example = "http://behoimi.org/post/show/12345"
def posts(self):
params = {"tags": "id:" + self.post_id}
@ -76,7 +60,4 @@ class _3dbooruPopularExtractor(
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org"
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
r"(?:\?(?P<query>[^#]*))?")
test = ("http://behoimi.org/post/popular_by_month?month=2&year=2013", {
"pattern": r"http://behoimi\.org/data/../../[0-9a-f]{32}\.jpg",
"count": 20,
})
example = "http://behoimi.org/post/popular_by_month"

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2019 Mike Fährmann
# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -21,17 +21,7 @@ class _4chanThreadExtractor(Extractor):
archive_fmt = "{board}_{thread}_{tim}"
pattern = (r"(?:https?://)?boards\.4chan(?:nel)?\.org"
r"/([^/]+)/thread/(\d+)")
test = (
("https://boards.4chan.org/tg/thread/15396072/", {
"url": "39082ad166161966d7ba8e37f2173a824eb540f0",
"keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a",
"content": "20b7b51afa51c9c31a0020a0737b889532c8d7ec",
}),
("https://boards.4channel.org/tg/thread/15396072/", {
"url": "39082ad166161966d7ba8e37f2173a824eb540f0",
"keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a",
}),
)
example = "https://boards.4channel.org/a/thread/12345/"
def __init__(self, match):
Extractor.__init__(self, match)
@ -65,10 +55,7 @@ class _4chanBoardExtractor(Extractor):
category = "4chan"
subcategory = "board"
pattern = r"(?:https?://)?boards\.4chan(?:nel)?\.org/([^/?#]+)/\d*$"
test = ("https://boards.4channel.org/po/", {
"pattern": _4chanThreadExtractor.pattern,
"count": ">= 100",
})
example = "https://boards.4channel.org/a/"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -21,21 +21,7 @@ class _4chanarchivesThreadExtractor(Extractor):
filename_fmt = "{no}-{filename}.{extension}"
archive_fmt = "{board}_{thread}_{no}"
pattern = r"(?:https?://)?4chanarchives\.com/board/([^/?#]+)/thread/(\d+)"
test = (
("https://4chanarchives.com/board/c/thread/2707110", {
"pattern": r"https://i\.imgur\.com/(0wLGseE|qbByWDc)\.jpg",
"count": 2,
"keyword": {
"board": "c",
"com": str,
"name": "Anonymous",
"no": int,
"thread": "2707110",
"time": r"re:2016-07-1\d \d\d:\d\d:\d\d",
"title": "Ren Kagami from 'Oyako Neburi'",
},
}),
)
example = "https://4chanarchives.com/board/a/thread/12345/"
def __init__(self, match):
Extractor.__init__(self, match)
@ -106,15 +92,7 @@ class _4chanarchivesBoardExtractor(Extractor):
subcategory = "board"
root = "https://4chanarchives.com"
pattern = r"(?:https?://)?4chanarchives\.com/board/([^/?#]+)(?:/(\d+))?/?$"
test = (
("https://4chanarchives.com/board/c/", {
"pattern": _4chanarchivesThreadExtractor.pattern,
"range": "1-40",
"count": 40,
}),
("https://4chanarchives.com/board/c"),
("https://4chanarchives.com/board/c/10"),
)
example = "https://4chanarchives.com/board/a/"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -23,9 +23,6 @@ class _500pxExtractor(Extractor):
root = "https://500px.com"
cookies_domain = ".500px.com"
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def items(self):
data = self.metadata()
@ -96,15 +93,7 @@ class _500pxUserExtractor(_500pxExtractor):
"""Extractor for photos from a user's photostream on 500px.com"""
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!photo/|liked)(?:p/)?([^/?#]+)/?(?:$|[?#])"
test = (
("https://500px.com/p/light_expression_photography", {
"pattern": r"https?://drscdn.500px.org/photo/\d+/m%3D4096/v2",
"range": "1-99",
"count": 99,
}),
("https://500px.com/light_expression_photography"),
("https://web.500px.com/light_expression_photography"),
)
example = "https://500px.com/USER"
def __init__(self, match):
_500pxExtractor.__init__(self, match)
@ -134,17 +123,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
directory_fmt = ("{category}", "{user[username]}", "{gallery[name]}")
pattern = (BASE_PATTERN + r"/(?!photo/)(?:p/)?"
r"([^/?#]+)/galleries/([^/?#]+)")
test = (
("https://500px.com/p/fashvamp/galleries/lera", {
"url": "002dc81dee5b4a655f0e31ad8349e8903b296df6",
"count": 3,
"keyword": {
"gallery": dict,
"user": dict,
},
}),
("https://500px.com/fashvamp/galleries/lera"),
)
example = "https://500px.com/USER/galleries/GALLERY"
def __init__(self, match):
_500pxExtractor.__init__(self, match)
@ -200,7 +179,7 @@ class _500pxFavoriteExtractor(_500pxExtractor):
"""Extractor for favorite 500px photos"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/liked/?$"
test = ("https://500px.com/liked",)
example = "https://500px.com/liked"
def photos(self):
variables = {"pageSize": 20}
@ -224,50 +203,7 @@ class _500pxImageExtractor(_500pxExtractor):
"""Extractor for individual images from 500px.com"""
subcategory = "image"
pattern = BASE_PATTERN + r"/photo/(\d+)"
test = ("https://500px.com/photo/222049255/queen-of-coasts", {
"url": "fbdf7df39325cae02f5688e9f92935b0e7113315",
"count": 1,
"keyword": {
"camera": "Canon EOS 600D",
"camera_info": dict,
"comments": list,
"comments_count": int,
"created_at": "2017-08-01T08:40:05+00:00",
"description": str,
"editored_by": None,
"editors_choice": False,
"extension": "jpg",
"feature": "popular",
"feature_date": "2017-08-01T09:58:28+00:00",
"focal_length": "208",
"height": 3111,
"id": 222049255,
"image_format": "jpg",
"image_url": list,
"images": list,
"iso": "100",
"lens": "EF-S55-250mm f/4-5.6 IS II",
"lens_info": dict,
"liked": None,
"location": None,
"location_details": dict,
"name": "Queen Of Coasts",
"nsfw": False,
"privacy": False,
"profile": True,
"rating": float,
"status": 1,
"tags": list,
"taken_at": "2017-05-04T17:36:51+00:00",
"times_viewed": int,
"url": "/photo/222049255/Queen-Of-Coasts-by-Alice-Nabieva",
"user": dict,
"user_id": 12847235,
"votes_count": int,
"watermark": True,
"width": 4637,
},
})
example = "https://500px.com/photo/12345/TITLE"
def __init__(self, match):
_500pxExtractor.__init__(self, match)

View File

@ -57,48 +57,7 @@ class _8chanThreadExtractor(_8chanExtractor):
filename_fmt = "{postId}{num:?-//} {filename[:200]}.{extension}"
archive_fmt = "{boardUri}_{postId}_{num}"
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
test = (
("https://8chan.moe/vhs/res/4.html", {
"pattern": r"https://8chan\.moe/\.media/[0-9a-f]{64}\.\w+$",
"count": 14,
"keyword": {
"archived": False,
"autoSage": False,
"boardDescription": "Film and Cinema",
"boardMarkdown": None,
"boardName": "Movies",
"boardUri": "vhs",
"creation": r"re:\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}Z",
"cyclic": False,
"email": None,
"id": "re:^[0-9a-f]{6}$",
"locked": False,
"markdown": str,
"maxFileCount": 5,
"maxFileSize": "32.00 MB",
"maxMessageLength": 8001,
"message": str,
"mime": str,
"name": "Anonymous",
"num": int,
"originalName": str,
"path": r"re:/.media/[0-9a-f]{64}\.\w+$",
"pinned": False,
"postId": int,
"signedRole": None,
"size": int,
"threadId": 4,
"thumb": r"re:/.media/t_[0-9a-f]{64}$",
"uniquePosters": 9,
"usesCustomCss": True,
"usesCustomJs": False,
"?wsPort": 8880,
"?wssPort": 2087,
},
}),
("https://8chan.se/vhs/res/4.html"),
("https://8chan.cc/vhs/res/4.html"),
)
example = "https://8chan.moe/a/res/12345.html"
def __init__(self, match):
_8chanExtractor.__init__(self, match)
@ -137,23 +96,12 @@ class _8chanBoardExtractor(_8chanExtractor):
"""Extractor for 8chan boards"""
subcategory = "board"
pattern = BASE_PATTERN + r"/([^/?#]+)/(?:(\d+)\.html)?$"
test = (
("https://8chan.moe/vhs/"),
("https://8chan.moe/vhs/2.html", {
"pattern": _8chanThreadExtractor.pattern,
"count": 23,
}),
("https://8chan.se/vhs/"),
("https://8chan.cc/vhs/"),
)
example = "https://8chan.moe/a/"
def __init__(self, match):
_8chanExtractor.__init__(self, match)
_, self.board, self.page = match.groups()
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def items(self):
page = text.parse_int(self.page, 1)
url = "{}/{}/{}.json".format(self.root, self.board, page)

View File

@ -22,51 +22,7 @@ class _8musesAlbumExtractor(Extractor):
root = "https://comics.8muses.com"
pattern = (r"(?:https?://)?(?:comics\.|www\.)?8muses\.com"
r"(/comics/album/[^?#]+)(\?[^#]+)?")
test = (
("https://comics.8muses.com/comics/album/Fakku-Comics/mogg/Liar", {
"url": "6286ac33087c236c5a7e51f8a9d4e4d5548212d4",
"pattern": r"https://comics.8muses.com/image/fl/[\w-]+",
"keyword": {
"url" : str,
"hash" : str,
"page" : int,
"count": 6,
"album": {
"id" : 10467,
"title" : "Liar",
"path" : "Fakku Comics/mogg/Liar",
"parts" : ["Fakku Comics", "mogg", "Liar"],
"private": False,
"url" : "https://comics.8muses.com/comics"
"/album/Fakku-Comics/mogg/Liar",
"parent" : 10464,
"views" : int,
"likes" : int,
"date" : "dt:2018-07-10 00:00:00",
},
},
}),
("https://www.8muses.com/comics/album/Fakku-Comics/santa", {
"count": ">= 3",
"pattern": pattern,
"keyword": {
"url" : str,
"name" : str,
"private": False,
},
}),
# custom sorting
("https://www.8muses.com/comics/album/Fakku-Comics/11?sort=az", {
"count": ">= 70",
"keyword": {"name": r"re:^[R-Zr-z]"},
}),
# non-ASCII characters
(("https://comics.8muses.com/comics/album/Various-Authors/Chessire88"
"/From-Trainers-to-Pokmons"), {
"count": 2,
"keyword": {"name": "re:From Trainers to Pokémons"},
}),
)
example = "https://comics.8muses.com/comics/album/PATH/TITLE"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -50,7 +50,6 @@ modules = [
"gelbooru",
"gelbooru_v01",
"gelbooru_v02",
"gfycat",
"gofile",
"hbrowse",
"hentai2read",

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019 Mike Fährmann
# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -18,17 +18,7 @@ class AdultempireGalleryExtractor(GalleryExtractor):
root = "https://www.adultempire.com"
pattern = (r"(?:https?://)?(?:www\.)?adult(?:dvd)?empire\.com"
r"(/(\d+)/gallery\.html)")
test = (
("https://www.adultempire.com/5998/gallery.html", {
"range": "1",
"keyword": "5b3266e69801db0d78c22181da23bc102886e027",
"content": "5c6beb31e5e3cdc90ee5910d5c30f9aaec977b9e",
}),
("https://www.adultdvdempire.com/5683/gallery.html", {
"url": "b12cd1a65cae8019d837505adb4d6a2c1ed4d70d",
"keyword": "8d448d79c4ac5f5b10a3019d5b5129ddb43655e5",
}),
)
example = "https://www.adultempire.com/12345/gallery.html"
def __init__(self, match):
GalleryExtractor.__init__(self, match)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2021 Mike Fährmann
# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -21,25 +21,7 @@ class ArchitizerProjectExtractor(GalleryExtractor):
filename_fmt = "{filename}.{extension}"
archive_fmt = "{gid}_{num}"
pattern = r"(?:https?://)?architizer\.com/projects/([^/?#]+)"
test = ("https://architizer.com/projects/house-lo/", {
"pattern": r"https://architizer-prod\.imgix\.net/media/mediadata"
r"/uploads/.+\.jpg$",
"keyword": {
"count": 27,
"description": str,
"firm": "Atelier Lina Bellovicova",
"gid": "225496",
"location": "Czechia",
"num": int,
"size": "1000 sqft - 3000 sqft",
"slug": "house-lo",
"status": "Built",
"subcategory": "project",
"title": "House LO",
"type": "Residential Private House",
"year": "2020",
},
})
example = "https://architizer.com/projects/NAME/"
def __init__(self, match):
url = "{}/projects/{}/".format(self.root, match.group(1))
@ -47,11 +29,13 @@ class ArchitizerProjectExtractor(GalleryExtractor):
def metadata(self, page):
extr = text.extract_from(page)
extr('id="Pages"', "")
return {
"title" : extr("data-name='", "'"),
"slug" : extr("data-slug='", "'"),
"gid" : extr("data-gid='", "'").rpartition(".")[2],
"firm" : extr("data-firm-leaders-str='", "'"),
"title" : extr('data-name="', '"'),
"slug" : extr('data-slug="', '"'),
"gid" : extr('data-gid="', '"').rpartition(".")[2],
"firm" : extr('data-firm-leaders-str="', '"'),
"location" : extr("<h2>", "<").strip(),
"type" : text.unescape(text.remove_html(extr(
'<div class="title">Type</div>', '<br'))),
@ -70,7 +54,7 @@ class ArchitizerProjectExtractor(GalleryExtractor):
return [
(url, None)
for url in text.extract_iter(
page, "property='og:image:secure_url' content='", "?")
page, 'property="og:image:secure_url" content="', "?")
]
@ -80,10 +64,7 @@ class ArchitizerFirmExtractor(Extractor):
subcategory = "firm"
root = "https://architizer.com"
pattern = r"(?:https?://)?architizer\.com/firms/([^/?#]+)"
test = ("https://architizer.com/firms/olson-kundig/", {
"pattern": ArchitizerProjectExtractor.pattern,
"count": ">= 90",
})
example = "https://architizer.com/firms/NAME/"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -117,7 +117,6 @@ class ArtstationExtractor(Extractor):
headers = {
"Accept" : "application/json, text/plain, */*",
"Origin" : self.root,
"Referer": self.root + "/",
}
if json:
@ -147,7 +146,6 @@ class ArtstationExtractor(Extractor):
headers = {
"Accept" : "*/*",
"Origin" : self.root,
"Referer": self.root + "/",
}
return self.request(
url, method="POST", headers=headers, json={},
@ -178,17 +176,7 @@ class ArtstationUserExtractor(ArtstationExtractor):
pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?#]+)(?:/albums/all)?"
r"|((?!www)\w+)\.artstation\.com(?:/projects)?)/?$")
test = (
("https://www.artstation.com/sungchoi/", {
"pattern": r"https://\w+\.artstation\.com/p/assets/images"
r"/images/\d+/\d+/\d+/(4k|large|medium|small)/[^/]+",
"range": "1-10",
"count": ">= 10",
}),
("https://www.artstation.com/sungchoi/albums/all/"),
("https://sungchoi.artstation.com/"),
("https://sungchoi.artstation.com/projects/"),
)
example = "https://www.artstation.com/USER"
def projects(self):
url = "{}/users/{}/projects.json".format(self.root, self.user)
@ -205,15 +193,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?#]+)"
r"|((?!www)\w+)\.artstation\.com)/albums/(\d+)")
test = (
("https://www.artstation.com/huimeiye/albums/770899", {
"count": 2,
}),
("https://www.artstation.com/huimeiye/albums/770898", {
"exception": exception.NotFoundError,
}),
("https://huimeiye.artstation.com/albums/770899"),
)
example = "https://www.artstation.com/USER/albums/12345"
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
@ -247,17 +227,7 @@ class ArtstationLikesExtractor(ArtstationExtractor):
archive_fmt = "f_{userinfo[id]}_{asset[id]}"
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?#]+)/likes/?")
test = (
("https://www.artstation.com/mikf/likes", {
"pattern": r"https://\w+\.artstation\.com/p/assets/images"
r"/images/\d+/\d+/\d+/(4k|large|medium|small)/[^/]+",
"count": 6,
}),
# no likes
("https://www.artstation.com/sungchoi/likes", {
"count": 0,
}),
)
example = "https://www.artstation.com/USER/likes"
def projects(self):
url = "{}/users/{}/likes.json".format(self.root, self.user)
@ -274,14 +244,7 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
r"/contests/[^/?#]+/challenges/(\d+)"
r"/?(?:\?sorting=([a-z]+))?")
test = (
("https://www.artstation.com/contests/thu-2017/challenges/20"),
(("https://www.artstation.com/contests/beyond-human"
"/challenges/23?sorting=winners"), {
"range": "1-30",
"count": 30,
}),
)
example = "https://www.artstation.com/contests/NAME/challenges/12345"
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
@ -327,10 +290,7 @@ class ArtstationSearchExtractor(ArtstationExtractor):
archive_fmt = "s_{search[query]}_{asset[id]}"
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
r"/search/?\?([^#]+)")
test = ("https://www.artstation.com/search?query=ancient&sort_by=rank", {
"range": "1-20",
"count": 20,
})
example = "https://www.artstation.com/search?query=QUERY"
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
@ -377,10 +337,7 @@ class ArtstationArtworkExtractor(ArtstationExtractor):
archive_fmt = "A_{asset[id]}"
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
r"/artwork/?\?([^#]+)")
test = ("https://www.artstation.com/artwork?sorting=latest", {
"range": "1-20",
"count": 20,
})
example = "https://www.artstation.com/artwork?sorting=SORT"
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
@ -400,32 +357,7 @@ class ArtstationImageExtractor(ArtstationExtractor):
pattern = (r"(?:https?://)?(?:"
r"(?:\w+\.)?artstation\.com/(?:artwork|projects|search)"
r"|artstn\.co/p)/(\w+)")
test = (
("https://www.artstation.com/artwork/LQVJr", {
"pattern": r"https?://\w+\.artstation\.com/p/assets"
r"/images/images/008/760/279/4k/.+",
"content": "7b113871465fdc09d127adfdc2767d51cf45a7e9",
# SHA1 hash without _no_cache()
# "content": "44b80f9af36d40efc5a2668cdd11d36d6793bae9",
}),
# multiple images per project
("https://www.artstation.com/artwork/Db3dy", {
"count": 4,
}),
# embedded youtube video
("https://www.artstation.com/artwork/g4WPK", {
"range": "2",
"options": (("external", True),),
"pattern": "ytdl:https://www.youtube.com/embed/JNFfJtwwrU0",
}),
# 404 (#3016)
("https://www.artstation.com/artwork/3q3mXB", {
"count": 0,
}),
# alternate URL patterns
("https://sungchoi.artstation.com/projects/LQVJr"),
("https://artstn.co/p/LQVJr"),
)
example = "https://www.artstation.com/artwork/abcde"
def __init__(self, match):
ArtstationExtractor.__init__(self, match)
@ -453,10 +385,7 @@ class ArtstationFollowingExtractor(ArtstationExtractor):
subcategory = "following"
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
r"/(?!artwork|projects|search)([^/?#]+)/following")
test = ("https://www.artstation.com/sungchoi/following", {
"pattern": ArtstationUserExtractor.pattern,
"count": ">= 50",
})
example = "https://www.artstation.com/USER/following"
def items(self):
url = "{}/users/{}/following.json".format(self.root, self.user)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2020-2022 Mike Fährmann
# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -176,16 +176,7 @@ class AryionGalleryExtractor(AryionExtractor):
subcategory = "gallery"
categorytransfer = True
pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?#]+)"
test = (
("https://aryion.com/g4/gallery/jameshoward", {
"options": (("recursive", False),),
"pattern": r"https://aryion\.com/g4/data\.php\?id=\d+$",
"range": "48-52",
"count": 5,
}),
("https://aryion.com/g4/user/jameshoward"),
("https://aryion.com/g4/latest.php?name=jameshoward"),
)
example = "https://aryion.com/g4/gallery/USER"
def __init__(self, match):
AryionExtractor.__init__(self, match)
@ -215,9 +206,7 @@ class AryionTagExtractor(AryionExtractor):
directory_fmt = ("{category}", "tags", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/tags\.php\?([^#]+)"
test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=19", {
"count": ">= 5",
})
example = "https://aryion.com/g4/tags.php?tag=TAG"
def _init(self):
self.params = text.parse_query(self.user)
@ -235,40 +224,7 @@ class AryionPostExtractor(AryionExtractor):
"""Extractor for individual posts on eka's portal"""
subcategory = "post"
pattern = BASE_PATTERN + r"/view/(\d+)"
test = (
("https://aryion.com/g4/view/510079", {
"url": "f233286fa5558c07ae500f7f2d5cb0799881450e",
"keyword": {
"artist" : "jameshoward",
"user" : "jameshoward",
"filename" : "jameshoward-510079-subscribestar_150",
"extension": "jpg",
"id" : 510079,
"width" : 1665,
"height" : 1619,
"size" : 784239,
"title" : "I'm on subscribestar now too!",
"description": r"re:Doesn't hurt to have a backup, right\?",
"tags" : ["Non-Vore", "subscribestar"],
"date" : "dt:2019-02-16 19:30:34",
"path" : [],
"views" : int,
"favorites": int,
"comments" : int,
"_mtime" : "Sat, 16 Feb 2019 19:30:34 GMT",
},
}),
# x-folder (#694)
("https://aryion.com/g4/view/588928", {
"pattern": pattern,
"count": ">= 8",
}),
# x-comic-folder (#945)
("https://aryion.com/g4/view/537379", {
"pattern": pattern,
"count": 2,
}),
)
example = "https://aryion.com/g4/view/12345"
def posts(self):
post_id, self.user = self.user, None

View File

@ -23,18 +23,7 @@ class BbcGalleryExtractor(GalleryExtractor):
filename_fmt = "{num:>02}.{extension}"
archive_fmt = "{programme}_{num}"
pattern = BASE_PATTERN + r"[^/?#]+(?!/galleries)(?:/[^/?#]+)?)$"
test = (
("https://www.bbc.co.uk/programmes/p084qtzs/p085g9kg", {
"pattern": r"https://ichef\.bbci\.co\.uk"
r"/images/ic/1920xn/\w+\.jpg",
"count": 37,
"keyword": {
"programme": "p084qtzs",
"path": ["BBC One", "Doctor Who", "The Timeless Children"],
},
}),
("https://www.bbc.co.uk/programmes/p084qtzs"),
)
example = "https://www.bbc.co.uk/programmes/PATH"
def metadata(self, page):
data = util.json_loads(text.extr(
@ -72,17 +61,7 @@ class BbcProgrammeExtractor(Extractor):
subcategory = "programme"
root = "https://www.bbc.co.uk"
pattern = BASE_PATTERN + r"[^/?#]+/galleries)(?:/?\?page=(\d+))?"
test = (
("https://www.bbc.co.uk/programmes/b006q2x0/galleries", {
"pattern": BbcGalleryExtractor.pattern,
"range": "1-50",
"count": ">= 50",
}),
("https://www.bbc.co.uk/programmes/b006q2x0/galleries?page=40", {
"pattern": BbcGalleryExtractor.pattern,
"count": ">= 100",
}),
)
example = "https://www.bbc.co.uk/programmes/ID/galleries"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -35,9 +35,8 @@ class BehanceExtractor(Extractor):
def _request_graphql(self, endpoint, variables):
url = self.root + "/v3/graphql"
headers = {
"Origin" : self.root,
"Referer": self.root + "/",
"X-BCP" : self._bcp,
"Origin": self.root,
"X-BCP" : self._bcp,
"X-Requested-With": "XMLHttpRequest",
}
data = {
@ -84,43 +83,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
filename_fmt = "{category}_{id}_{num:>02}.{extension}"
archive_fmt = "{id}_{num}"
pattern = r"(?:https?://)?(?:www\.)?behance\.net/gallery/(\d+)"
test = (
("https://www.behance.net/gallery/17386197/A-Short-Story", {
"count": 2,
"url": "ab79bd3bef8d3ae48e6ac74fd995c1dfaec1b7d2",
"keyword": {
"id": 17386197,
"name": 're:"Hi". A short story about the important things ',
"owners": ["Place Studio", "Julio César Velazquez"],
"fields": ["Animation", "Character Design", "Directing"],
"tags": list,
"module": dict,
"date": "dt:2014-06-03 15:41:51",
},
}),
("https://www.behance.net/gallery/21324767/Nevada-City", {
"count": 6,
"url": "0258fe194fe7d828d6f2c7f6086a9a0a4140db1d",
"keyword": {"owners": ["Alex Strohl"]},
}),
# 'media_collection' modules
("https://www.behance.net/gallery/88276087/Audi-R8-RWD", {
"count": 20,
"url": "6bebff0d37f85349f9ad28bd8b76fd66627c1e2f",
"pattern": r"https://mir-s3-cdn-cf\.behance\.net/project_modules"
r"/source/[0-9a-f]+.[0-9a-f]+\.jpg"
}),
# 'video' modules (#1282)
("https://www.behance.net/gallery/101185577/COLCCI", {
"pattern": r"https://cdn-prod-ccv\.adobe\.com/\w+"
r"/rend/\w+_720\.mp4\?",
"count": 3,
}),
# mature content (#4417)
("https://www.behance.net/gallery/177464639/Kimori", {
"exception": exception.AuthorizationError,
}),
)
example = "https://www.behance.net/gallery/12345/TITLE"
def __init__(self, match):
BehanceExtractor.__init__(self, match)
@ -177,7 +140,13 @@ class BehanceGalleryExtractor(BehanceExtractor):
append((url, module))
elif mtype == "VideoModule":
renditions = module["videoData"]["renditions"]
try:
renditions = module["videoData"]["renditions"]
except Exception:
self.log.warning("No download URLs for video %s",
module.get("id") or "???")
continue
try:
url = [
r["url"] for r in renditions
@ -186,6 +155,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
except Exception as exc:
self.log.debug("%s: %s", exc.__class__.__name__, exc)
url = "ytdl:" + renditions[-1]["url"]
append((url, module))
elif mtype == "MediaCollectionModule":
@ -210,10 +180,7 @@ class BehanceUserExtractor(BehanceExtractor):
subcategory = "user"
categorytransfer = True
pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$"
test = ("https://www.behance.net/alexstrohl", {
"count": ">= 11",
"pattern": BehanceGalleryExtractor.pattern,
})
example = "https://www.behance.net/USER"
def __init__(self, match):
BehanceExtractor.__init__(self, match)
@ -223,7 +190,7 @@ class BehanceUserExtractor(BehanceExtractor):
endpoint = "GetProfileProjects"
variables = {
"username": self.user,
"after" : "MAo=",
"after" : "MAo=", # "0" in base64
}
while True:
@ -241,10 +208,7 @@ class BehanceCollectionExtractor(BehanceExtractor):
subcategory = "collection"
categorytransfer = True
pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"
test = ("https://www.behance.net/collection/71340149/inspiration", {
"count": ">= 150",
"pattern": BehanceGalleryExtractor.pattern,
})
example = "https://www.behance.net/collection/12345/TITLE"
def __init__(self, match):
BehanceExtractor.__init__(self, match)
@ -253,7 +217,7 @@ class BehanceCollectionExtractor(BehanceExtractor):
def galleries(self):
endpoint = "GetMoodboardItemsAndRecommendations"
variables = {
"afterItem": "MAo=",
"afterItem": "MAo=", # "0" in base64
"firstItem": 40,
"id" : int(self.collection_id),
"shouldGetItems" : True,

View File

@ -95,59 +95,8 @@ class BloggerExtractor(Extractor):
class BloggerPostExtractor(BloggerExtractor):
"""Extractor for a single blog post"""
subcategory = "post"
pattern = BASE_PATTERN + r"(/\d{4}/\d\d/[^/?#]+\.html)"
test = (
("https://julianbphotography.blogspot.com/2010/12/moon-rise.html", {
"url": "9928429fb62f712eb4de80f53625eccecc614aae",
"pattern": r"https://3.bp.blogspot.com/.*/s0/Icy-Moonrise-.*.jpg",
"keyword": {
"blog": {
"date" : "dt:2010-11-21 18:19:42",
"description": "",
"id" : "5623928067739466034",
"kind" : "blogger#blog",
"locale" : dict,
"name" : "Julian Bunker Photography",
"pages" : int,
"posts" : int,
"published" : "2010-11-21T10:19:42-08:00",
"updated" : str,
"url" : "http://julianbphotography.blogspot.com/",
},
"post": {
"author" : "Julian Bunker",
"content" : str,
"date" : "dt:2010-12-26 01:08:00",
"etag" : str,
"id" : "6955139236418998998",
"kind" : "blogger#post",
"published" : "2010-12-25T17:08:00-08:00",
"replies" : "0",
"title" : "Moon Rise",
"updated" : "2011-12-06T05:21:24-08:00",
"url" : "re:.+/2010/12/moon-rise.html$",
},
"num": int,
"url": str,
},
}),
("blogger:http://www.julianbunker.com/2010/12/moon-rise.html"),
# video (#587)
(("http://cfnmscenesinmovies.blogspot.com/2011/11/"
"cfnm-scene-jenna-fischer-in-office.html"), {
"pattern": r"https://.+\.googlevideo\.com/videoplayback",
}),
# image URLs with width/height (#1061)
# ("https://aaaninja.blogspot.com/2020/08/altera-boob-press-2.html", {
# "pattern": r"https://1.bp.blogspot.com/.+/s0/altera_.+png",
# }),
# new image domain (#2204)
(("https://randomthingsthroughmyletterbox.blogspot.com/2022/01"
"/bitter-flowers-by-gunnar-staalesen-blog.html"), {
"pattern": r"https://blogger.googleusercontent.com/img/a/.+=s0$",
"count": 8,
}),
)
pattern = BASE_PATTERN + r"(/\d\d\d\d/\d\d/[^/?#]+\.html)"
example = "https://BLOG.blogspot.com/1970/01/TITLE.html"
def __init__(self, match):
BloggerExtractor.__init__(self, match)
@ -161,17 +110,7 @@ class BloggerBlogExtractor(BloggerExtractor):
"""Extractor for an entire Blogger blog"""
subcategory = "blog"
pattern = BASE_PATTERN + r"/?$"
test = (
("https://julianbphotography.blogspot.com/", {
"range": "1-25",
"count": 25,
"pattern": r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
}),
("blogger:https://www.kefblog.com.ng/", {
"range": "1-25",
"count": 25,
}),
)
example = "https://BLOG.blogspot.com/"
def posts(self, blog):
return self.api.blog_posts(blog["id"])
@ -181,12 +120,7 @@ class BloggerSearchExtractor(BloggerExtractor):
"""Extractor for Blogger search resuls"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search/?\?q=([^&#]+)"
test = (
("https://julianbphotography.blogspot.com/search?q=400mm", {
"count": "< 10",
"keyword": {"query": "400mm"},
}),
)
example = "https://BLOG.blogspot.com/search?q=QUERY"
def __init__(self, match):
BloggerExtractor.__init__(self, match)
@ -203,13 +137,7 @@ class BloggerLabelExtractor(BloggerExtractor):
"""Extractor for Blogger posts by label"""
subcategory = "label"
pattern = BASE_PATTERN + r"/search/label/([^/?#]+)"
test = (
("https://dmmagazine.blogspot.com/search/label/D%26D", {
"range": "1-25",
"count": 25,
"keyword": {"label": "D&D"},
}),
)
example = "https://BLOG.blogspot.com/search/label/LABEL"
def __init__(self, match):
BloggerExtractor.__init__(self, match)

View File

@ -15,6 +15,7 @@ from urllib.parse import urlsplit, urlunsplit
MEDIA_DOMAIN_OVERRIDES = {
"cdn9.bunkr.ru" : "c9.bunkr.ru",
"cdn12.bunkr.ru": "media-files12.bunkr.la",
"cdn-pizza.bunkr.ru": "pizza.bunkr.ru",
}
CDN_HOSTED_EXTENSIONS = (
@ -28,53 +29,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
category = "bunkr"
root = "https://bunkrr.su"
pattern = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)/a/([^/?#]+)"
test = (
("https://bunkrr.su/a/Lktg9Keq", {
"pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
"album_id": "Lktg9Keq",
"album_name": 'test テスト "&>',
"count": 1,
"filename": 'test-テスト-"&>-QjgneIQv',
"id": "QjgneIQv",
"name": 'test-テスト-"&>',
"num": int,
},
}),
# mp4 (#2239)
("https://app.bunkr.ru/a/ptRHaCn2", {
"pattern": r"https://media-files\.bunkr\.ru/_-RnHoW69L\.mp4",
"content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
}),
# cdn4
("https://bunkr.is/a/iXTTc1o2", {
"pattern": r"https://(cdn|media-files)4\.bunkr\.ru/",
"content": "da29aae371b7adc8c5ef8e6991b66b69823791e8",
"keyword": {
"album_id": "iXTTc1o2",
"album_name": "test2",
"album_size": "691.1 KB",
"count": 2,
"description": "072022",
"filename": "re:video-wFO9FtxG|image-sZrQUeOx",
"id": "re:wFO9FtxG|sZrQUeOx",
"name": "re:video|image",
"num": int,
},
}),
# cdn12 .ru TLD (#4147)
("https://bunkrr.su/a/j1G29CnD", {
"pattern": r"https://(cdn12.bunkr.ru|media-files12.bunkr.la)/\w+",
"count": 8,
}),
("https://bunkrr.su/a/Lktg9Keq"),
("https://bunkr.la/a/Lktg9Keq"),
("https://bunkr.su/a/Lktg9Keq"),
("https://bunkr.ru/a/Lktg9Keq"),
("https://bunkr.is/a/Lktg9Keq"),
("https://bunkr.to/a/Lktg9Keq"),
)
example = "https://bunkrr.su/a/ID"
def fetch_album(self, album_id):
# album metadata
@ -87,7 +42,6 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
cdn = None
files = []
append = files.append
headers = {"Referer": self.root + "/"}
pos = page.index('class="grid-images')
for url in text.extract_iter(page, '<a href="', '"', pos):
@ -108,7 +62,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
else:
domain = domain.replace("cdn", "media-files", 1)
url = urlunsplit((scheme, domain, path, query, fragment))
append({"file": url, "_http_headers": headers})
append({"file": url})
return files, {
"album_id" : self.album_id,

View File

@ -21,22 +21,7 @@ class CatboxAlbumExtractor(GalleryExtractor):
directory_fmt = ("{category}", "{album_name} ({album_id})")
archive_fmt = "{album_id}_{filename}"
pattern = r"(?:https?://)?(?:www\.)?catbox\.moe(/c/[^/?#]+)"
test = (
("https://catbox.moe/c/1igcbe", {
"url": "35866a88c29462814f103bc22ec031eaeb380f8a",
"content": "70ddb9de3872e2d17cc27e48e6bf395e5c8c0b32",
"pattern": r"https://files\.catbox\.moe/\w+\.\w{3}$",
"count": 3,
"keyword": {
"album_id": "1igcbe",
"album_name": "test",
"date": "dt:2022-08-18 00:00:00",
"description": "album test &>",
},
}),
("https://www.catbox.moe/c/cd90s1"),
("https://catbox.moe/c/w7tm47#"),
)
example = "https://catbox.moe/c/ID"
def metadata(self, page):
extr = text.extract_from(page)
@ -62,15 +47,7 @@ class CatboxFileExtractor(Extractor):
subcategory = "file"
archive_fmt = "{filename}"
pattern = r"(?:https?://)?(?:files|litter|de)\.catbox\.moe/([^/?#]+)"
test = (
("https://files.catbox.moe/8ih3y7.png", {
"pattern": r"^https://files\.catbox\.moe/8ih3y7\.png$",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"count": 1,
}),
("https://litter.catbox.moe/t8v3n9.png"),
("https://de.catbox.moe/bjdmz1.jpg"),
)
example = "https://files.catbox.moe/NAME.EXT"
def items(self):
url = text.ensure_http_scheme(self.url)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2021 Mike Fährmann
# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -25,19 +25,7 @@ class ComicvineTagExtractor(BooruExtractor):
archive_fmt = "{id}"
pattern = (r"(?:https?://)?comicvine\.gamespot\.com"
r"(/([^/?#]+)/(\d+-\d+)/images/.*)")
test = (
("https://comicvine.gamespot.com/jock/4040-5653/images/", {
"pattern": r"https://comicvine\.gamespot\.com/a/uploads"
r"/original/\d+/\d+/\d+-.+\.(jpe?g|png)",
"count": ">= 140",
}),
(("https://comicvine.gamespot.com/batman/4005-1699"
"/images/?tag=Fan%20Art%20%26%20Cosplay"), {
"pattern": r"https://comicvine\.gamespot\.com/a/uploads"
r"/original/\d+/\d+/\d+-.+",
"count": ">= 450",
}),
)
example = "https://comicvine.gamespot.com/TAG/123-45/images/"
def __init__(self, match):
BooruExtractor.__init__(self, match)

View File

@ -35,7 +35,6 @@ class Extractor():
cookies_domain = ""
browser = None
root = ""
test = None
request_interval = 0.0
request_interval_min = 0.0
request_timestamp = 0.0
@ -299,7 +298,7 @@ class Extractor():
useragent = self.config("user-agent")
if useragent is None:
useragent = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
"rv:115.0) Gecko/20100101 Firefox/115.0")
"rv:109.0) Gecko/20100101 Firefox/115.0")
elif useragent == "browser":
useragent = _browser_useragent()
headers["User-Agent"] = useragent
@ -311,6 +310,13 @@ class Extractor():
else:
headers["Accept-Encoding"] = "gzip, deflate"
custom_referer = self.config("referer", True)
if custom_referer:
if isinstance(custom_referer, str):
headers["Referer"] = custom_referer
elif self.root:
headers["Referer"] = self.root + "/"
custom_headers = self.config("headers")
if custom_headers:
headers.update(custom_headers)
@ -508,21 +514,6 @@ class Extractor():
result.append((Message.Queue, url, {"_extractor": extr}))
return iter(result)
@classmethod
def _get_tests(cls):
"""Yield an extractor's test cases as (URL, RESULTS) tuples"""
tests = cls.test
if not tests:
return
if len(tests) == 2 and (not tests[1] or isinstance(tests[1], dict)):
tests = (tests,)
for test in tests:
if isinstance(test, str):
test = (test, None)
yield test
@classmethod
def _dump(cls, obj):
util.dump_json(obj, ensure_ascii=False, indent=2)
@ -831,8 +822,8 @@ _browser_cookies = {}
HTTP_HEADERS = {
"firefox": (
("User-Agent", "Mozilla/5.0 ({}; rv:115.0) "
"Gecko/20100101 Firefox/115.0"),
("User-Agent", "Mozilla/5.0 ({}; "
"rv:109.0) Gecko/20100101 Firefox/115.0"),
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,"
"image/avif,image/webp,*/*;q=0.8"),
("Accept-Language", "en-US,en;q=0.5"),

View File

@ -14,32 +14,7 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
category = "cyberdrop"
root = "https://cyberdrop.me"
pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.(?:me|to)/a/([^/?#]+)"
test = (
# images
("https://cyberdrop.me/a/keKRjm4t", {
"pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.(jpg|png|webp)$",
"keyword": {
"album_id": "keKRjm4t",
"album_name": "Fate (SFW)",
"album_size": 150069254,
"count": 62,
"date": "dt:2020-06-18 13:14:20",
"description": "",
"id": r"re:\w{8}",
},
}),
# videos
("https://cyberdrop.to/a/l8gIAXVD", {
"pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.mp4$",
"count": 31,
"keyword": {
"album_id": "l8gIAXVD",
"album_name": "Achelois17 videos",
"album_size": 652037121,
"date": "dt:2020-06-16 15:40:44",
},
}),
)
example = "https://cyberdrop.me/a/ID"
def fetch_album(self, album_id):
url = self.root + "/a/" + self.album_id

View File

@ -173,38 +173,7 @@ class DanbooruTagExtractor(DanbooruExtractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=([^&#]*)"
test = (
("https://danbooru.donmai.us/posts?tags=bonocho", {
"content": "b196fb9f1668109d7774a0a82efea3ffdda07746",
}),
# test page transitions
("https://danbooru.donmai.us/posts?tags=mushishi", {
"count": ">= 300",
}),
# 'external' option (#1747)
("https://danbooru.donmai.us/posts?tags=pixiv_id%3A1476533", {
"options": (("external", True),),
"pattern": r"https://i\.pximg\.net/img-original/img"
r"/2008/08/28/02/35/48/1476533_p0\.jpg",
}),
("https://booru.allthefallen.moe/posts?tags=yume_shokunin", {
"count": 12,
}),
("https://aibooru.online/posts?tags=center_frills&z=1", {
"pattern": r"https://cdn\.aibooru\.online/original"
r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w+",
"count": ">= 3",
}),
("https://booru.borvar.art/posts?tags=chibi&z=1", {
"pattern": r"https://booru\.borvar\.art/data/original"
r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w+",
"count": ">= 3",
}),
("https://hijiribe.donmai.us/posts?tags=bonocho"),
("https://sonohara.donmai.us/posts?tags=bonocho"),
("https://safebooru.donmai.us/posts?tags=bonocho"),
("https://safe.aibooru.online/posts?tags=center_frills"),
)
example = "https://danbooru.donmai.us/posts?tags=TAG"
def __init__(self, match):
DanbooruExtractor.__init__(self, match)
@ -238,21 +207,7 @@ class DanbooruPoolExtractor(DanbooruExtractor):
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name]}")
archive_fmt = "p_{pool[id]}_{id}"
pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
test = (
("https://danbooru.donmai.us/pools/7659", {
"content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99",
}),
("https://booru.allthefallen.moe/pools/9", {
"url": "902549ffcdb00fe033c3f63e12bc3cb95c5fd8d5",
"count": 6,
}),
("https://booru.borvar.art/pools/2", {
"url": "77fa3559a3fc919f72611f4e3dd0f919d19d3e0d",
"count": 4,
}),
("https://aibooru.online/pools/1"),
("https://danbooru.donmai.us/pool/show/7659"),
)
example = "https://danbooru.donmai.us/pools/12345"
def __init__(self, match):
DanbooruExtractor.__init__(self, match)
@ -275,26 +230,7 @@ class DanbooruPostExtractor(DanbooruExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
test = (
("https://danbooru.donmai.us/posts/294929", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
"keyword": {"date": "dt:2008-08-12 04:46:05"},
}),
("https://danbooru.donmai.us/posts/3613024", {
"pattern": r"https?://.+\.zip$",
"options": (("ugoira", True),)
}),
("https://booru.allthefallen.moe/posts/22", {
"content": "21dda68e1d7e0a554078e62923f537d8e895cac8",
}),
("https://aibooru.online/posts/1", {
"content": "54d548743cd67799a62c77cbae97cfa0fec1b7e9",
}),
("https://booru.borvar.art/posts/1487", {
"content": "91273ac1ea413a12be468841e2b5804656a50bff",
}),
("https://danbooru.donmai.us/post/show/294929"),
)
example = "https://danbooru.donmai.us/posts/12345"
def __init__(self, match):
DanbooruExtractor.__init__(self, match)
@ -315,17 +251,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
archive_fmt = "P_{scale[0]}_{date}_{id}"
pattern = BASE_PATTERN + r"/(?:explore/posts/)?popular(?:\?([^#]*))?"
test = (
("https://danbooru.donmai.us/explore/posts/popular"),
(("https://danbooru.donmai.us/explore/posts/popular"
"?date=2013-06-06&scale=week"), {
"range": "1-120",
"count": 120,
}),
("https://booru.allthefallen.moe/explore/posts/popular"),
("https://aibooru.online/explore/posts/popular"),
("https://booru.borvar.art/explore/posts/popular"),
)
example = "https://danbooru.donmai.us/explore/posts/popular"
def __init__(self, match):
DanbooruExtractor.__init__(self, match)

View File

@ -23,7 +23,7 @@ class DesktopographySiteExtractor(DesktopographyExtractor):
"""Extractor for all desktopography exhibitions """
subcategory = "site"
pattern = BASE_PATTERN + r"/$"
test = ("https://desktopography.net/",)
example = "https://desktopography.net/"
def items(self):
page = self.request(self.root).text
@ -42,7 +42,7 @@ class DesktopographyExhibitionExtractor(DesktopographyExtractor):
"""Extractor for a yearly desktopography exhibition"""
subcategory = "exhibition"
pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/"
test = ("https://desktopography.net/exhibition-2020/",)
example = "https://desktopography.net/exhibition-2020/"
def __init__(self, match):
DesktopographyExtractor.__init__(self, match)
@ -71,7 +71,7 @@ class DesktopographyEntryExtractor(DesktopographyExtractor):
"""Extractor for all resolutions of a desktopography wallpaper"""
subcategory = "entry"
pattern = BASE_PATTERN + r"/portfolios/([\w-]+)"
test = ("https://desktopography.net/portfolios/new-era/",)
example = "https://desktopography.net/portfolios/NAME/"
def __init__(self, match):
DesktopographyExtractor.__init__(self, match)

View File

@ -440,18 +440,7 @@ class DeviantartUserExtractor(DeviantartExtractor):
"""Extractor for an artist's user profile"""
subcategory = "user"
pattern = BASE_PATTERN + r"/?$"
test = (
("https://www.deviantart.com/shimoda7", {
"pattern": r"/shimoda7/gallery$",
}),
("https://www.deviantart.com/shimoda7", {
"options": (("include", "all"),),
"pattern": r"/shimoda7/"
r"(gallery(/scraps)?|posts(/statuses)?|favourites)$",
"count": 5,
}),
("https://shimoda7.deviantart.com/"),
)
example = "https://www.deviantart.com/USER"
def initialize(self):
pass
@ -475,84 +464,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
subcategory = "gallery"
archive_fmt = "g_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$"
test = (
("https://www.deviantart.com/shimoda7/gallery/", {
"pattern": r"https://(images-)?wixmp-[^.]+\.wixmp\.com"
r"/f/.+/.+\.(jpg|png)\?token=.+",
"count": ">= 30",
"keyword": {
"allows_comments": bool,
"author": {
"type": "regular",
"usericon": str,
"userid": "9AE51FC7-0278-806C-3FFF-F4961ABF9E2B",
"username": "shimoda7",
},
"category_path": str,
"content": {
"filesize": int,
"height": int,
"src": str,
"transparency": bool,
"width": int,
},
"da_category": str,
"date": "type:datetime",
"deviationid": str,
"?download_filesize": int,
"extension": str,
"index": int,
"is_deleted": bool,
"is_downloadable": bool,
"is_favourited": bool,
"is_mature": bool,
"preview": {
"height": int,
"src": str,
"transparency": bool,
"width": int,
},
"published_time": int,
"stats": {
"comments": int,
"favourites": int,
},
"target": dict,
"thumbs": list,
"title": str,
"url": r"re:https://www.deviantart.com/shimoda7/art/[^/]+-\d+",
"username": "shimoda7",
},
}),
# group
("https://www.deviantart.com/yakuzafc/gallery", {
"pattern": r"https://www.deviantart.com/yakuzafc/gallery"
r"/\w{8}-\w{4}-\w{4}-\w{4}-\w{12}/",
"count": ">= 15",
}),
# 'folders' option (#276)
("https://www.deviantart.com/justatest235723/gallery", {
"count": 3,
"options": (("metadata", 1), ("folders", 1), ("original", 0)),
"keyword": {
"description": str,
"folders": list,
"is_watching": bool,
"license": str,
"tags": list,
},
}),
("https://www.deviantart.com/shimoda8/gallery/", {
"exception": exception.NotFoundError,
}),
("https://www.deviantart.com/shimoda7/gallery"),
("https://www.deviantart.com/shimoda7/gallery/all"),
("https://www.deviantart.com/shimoda7/gallery/?catpath=/"),
("https://shimoda7.deviantart.com/gallery/"),
("https://shimoda7.deviantart.com/gallery/all/"),
("https://shimoda7.deviantart.com/gallery/?catpath=/"),
)
example = "https://www.deviantart.com/USER/gallery/"
def deviations(self):
if self.flat and not self.group:
@ -567,32 +479,7 @@ class DeviantartFolderExtractor(DeviantartExtractor):
directory_fmt = ("{category}", "{username}", "{folder[title]}")
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/([^/?#]+)"
test = (
# user
("https://www.deviantart.com/shimoda7/gallery/722019/Miscellaneous", {
"count": 5,
"options": (("original", False),),
}),
# group
("https://www.deviantart.com/yakuzafc/gallery/37412168/Crafts", {
"count": ">= 4",
"options": (("original", False),),
}),
# uuid
(("https://www.deviantart.com/shimoda7/gallery"
"/B38E3C6A-2029-6B45-757B-3C8D3422AD1A/misc"), {
"count": 5,
"options": (("original", False),),
}),
# name starts with '_', special characters (#1451)
(("https://www.deviantart.com/justatest235723"
"/gallery/69302698/-test-b-c-d-e-f-"), {
"count": 1,
"options": (("original", False),),
}),
("https://shimoda7.deviantart.com/gallery/722019/Miscellaneous"),
("https://yakuzafc.deviantart.com/gallery/37412168/Crafts"),
)
example = "https://www.deviantart.com/USER/gallery/12345/TITLE"
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
@ -621,33 +508,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
subcategory = "stash"
archive_fmt = "{index}.{extension}"
pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
test = (
("https://sta.sh/022c83odnaxc", {
"pattern": r"https://wixmp-[^.]+\.wixmp\.com"
r"/f/.+/.+\.png\?token=.+",
"content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f",
"count": 1,
}),
# multiple stash items
("https://sta.sh/21jf51j7pzl2", {
"options": (("original", False),),
"count": 4,
}),
# downloadable, but no "content" field (#307)
("https://sta.sh/024t4coz16mi", {
"pattern": r"https://wixmp-[^.]+\.wixmp\.com"
r"/f/.+/.+\.rar\?token=.+",
"count": 1,
}),
# mixed folders and images (#659)
("https://sta.sh/215twi387vfj", {
"options": (("original", False),),
"count": 4,
}),
("https://sta.sh/abcdefghijkl", {
"count": 0,
}),
)
example = "https://sta.sh/abcde"
skip = Extractor.skip
@ -692,20 +553,7 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
directory_fmt = ("{category}", "{username}", "Favourites")
archive_fmt = "f_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/favourites(?:/all|/?\?catpath=)?/?$"
test = (
("https://www.deviantart.com/h3813067/favourites/", {
"options": (("metadata", True), ("flat", False)), # issue #271
"count": 1,
}),
("https://www.deviantart.com/h3813067/favourites/", {
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}),
("https://www.deviantart.com/h3813067/favourites/all"),
("https://www.deviantart.com/h3813067/favourites/?catpath=/"),
("https://h3813067.deviantart.com/favourites/"),
("https://h3813067.deviantart.com/favourites/all"),
("https://h3813067.deviantart.com/favourites/?catpath=/"),
)
example = "https://www.deviantart.com/USER/favourites/"
def deviations(self):
if self.flat:
@ -722,20 +570,7 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
"{collection[title]}")
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
pattern = BASE_PATTERN + r"/favourites/([^/?#]+)/([^/?#]+)"
test = (
(("https://www.deviantart.com/pencilshadings/favourites"
"/70595441/3D-Favorites"), {
"count": ">= 15",
"options": (("original", False),),
}),
(("https://www.deviantart.com/pencilshadings/favourites"
"/F050486B-CB62-3C66-87FB-1105A7F6379F/3D Favorites"), {
"count": ">= 15",
"options": (("original", False),),
}),
("https://pencilshadings.deviantart.com"
"/favourites/70595441/3D-Favorites"),
)
example = "https://www.deviantart.com/USER/favourites/12345/TITLE"
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
@ -766,24 +601,7 @@ class DeviantartJournalExtractor(DeviantartExtractor):
directory_fmt = ("{category}", "{username}", "Journal")
archive_fmt = "j_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
test = (
("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
"url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44",
}),
("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
"url": "b2a8e74d275664b1a4acee0fca0a6fd33298571e",
"options": (("journals", "text"),),
}),
("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
"count": 0,
"options": (("journals", "none"),),
}),
("https://www.deviantart.com/shimoda7/posts/"),
("https://www.deviantart.com/shimoda7/journal/"),
("https://www.deviantart.com/shimoda7/journal/?catpath=/"),
("https://shimoda7.deviantart.com/journal/"),
("https://shimoda7.deviantart.com/journal/?catpath=/"),
)
example = "https://www.deviantart.com/USER/posts/journals/"
def deviations(self):
return self.api.browse_user_journals(self.user, self.offset)
@ -796,45 +614,7 @@ class DeviantartStatusExtractor(DeviantartExtractor):
filename_fmt = "{category}_{index}_{title}_{date}.{extension}"
archive_fmt = "S_{_username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/posts/statuses"
test = (
("https://www.deviantart.com/t1na/posts/statuses", {
"count": 0,
}),
("https://www.deviantart.com/justgalym/posts/statuses", {
"count": 4,
"url": "bf4c44c0c60ff2648a880f4c3723464ad3e7d074",
}),
# shared deviation
("https://www.deviantart.com/justgalym/posts/statuses", {
"options": (("journals", "none"),),
"count": 1,
"pattern": r"https://images-wixmp-\w+\.wixmp\.com/f"
r"/[^/]+/[^.]+\.jpg\?token=",
}),
# shared sta.sh item
("https://www.deviantart.com/vanillaghosties/posts/statuses", {
"options": (("journals", "none"), ("original", False)),
"range": "5-",
"count": 1,
"keyword": {
"index" : int,
"index_base36": "re:^[0-9a-z]+$",
"url" : "re:^https://sta.sh",
},
}),
# "deleted" deviations in 'items'
("https://www.deviantart.com/AndrejSKalin/posts/statuses", {
"options": (("journals", "none"), ("original", 0),
("image-filter", "deviationid[:8] == '147C8B03'")),
"count": 2,
"archive": False,
"keyword": {"deviationid": "147C8B03-7D34-AE93-9241-FA3C6DBBC655"}
}),
("https://www.deviantart.com/justgalym/posts/statuses", {
"options": (("journals", "text"),),
"url": "c8744f7f733a3029116607b826321233c5ca452d",
}),
)
example = "https://www.deviantart.com/USER/posts/statuses/"
def deviations(self):
for status in self.api.user_statuses(self.user, self.offset):
@ -898,19 +678,7 @@ class DeviantartPopularExtractor(DeviantartExtractor):
r"(?:deviations/?)?\?order=(popular-[^/?#]+)"
r"|((?:[\w-]+/)*)(popular-[^/?#]+)"
r")/?(?:\?([^#]*))?")
test = (
("https://www.deviantart.com/?order=popular-all-time", {
"options": (("original", False),),
"range": "1-30",
"count": 30,
}),
("https://www.deviantart.com/popular-24-hours/?q=tree+house", {
"options": (("original", False),),
"range": "1-30",
"count": 30,
}),
("https://www.deviantart.com/artisan/popular-all-time/?q=tree"),
)
example = "https://www.deviantart.com/popular-24-hours/"
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
@ -955,11 +723,7 @@ class DeviantartTagExtractor(DeviantartExtractor):
directory_fmt = ("{category}", "Tags", "{search_tags}")
archive_fmt = "T_{search_tags}_{index}.{extension}"
pattern = r"(?:https?://)?www\.deviantart\.com/tag/([^/?#]+)"
test = ("https://www.deviantart.com/tag/nature", {
"options": (("original", False),),
"range": "1-30",
"count": 30,
})
example = "https://www.deviantart.com/tag/TAG"
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
@ -978,10 +742,7 @@ class DeviantartWatchExtractor(DeviantartExtractor):
subcategory = "watch"
pattern = (r"(?:https?://)?(?:www\.)?deviantart\.com"
r"/(?:watch/deviations|notifications/watch)()()")
test = (
("https://www.deviantart.com/watch/deviations"),
("https://www.deviantart.com/notifications/watch"),
)
example = "https://www.deviantart.com/watch/deviations"
def deviations(self):
return self.api.browse_deviantsyouwatch()
@ -991,7 +752,7 @@ class DeviantartWatchPostsExtractor(DeviantartExtractor):
"""Extractor for Posts from watched users"""
subcategory = "watch-posts"
pattern = r"(?:https?://)?(?:www\.)?deviantart\.com/watch/posts()()"
test = ("https://www.deviantart.com/watch/posts",)
example = "https://www.deviantart.com/watch/posts"
def deviations(self):
return self.api.browse_posts_deviantsyouwatch()
@ -1009,100 +770,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
r"(?:view/|deviation/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)"
r"(\d+)" # bare deviation ID without slug
r"|(?:https?://)?fav\.me/d([0-9a-z]+)") # base36
test = (
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
"options": (("original", 0),),
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}),
("https://www.deviantart.com/zzz/art/zzz-1234567890", {
"exception": exception.NotFoundError,
}),
(("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), {
"options": (("comments", True),),
"keyword": {"comments": list},
"pattern": r"https://wixmp-[^.]+\.wixmp\.com"
r"/f/.+/.+\.jpg\?token=.+",
}),
# wixmp URL rewrite
(("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
"pattern": (r"https://images-wixmp-\w+\.wixmp\.com/f"
r"/[^/]+/[^.]+\.jpg\?token="),
}),
# GIF (#242)
(("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), {
"pattern": r"https://wixmp-\w+\.wixmp\.com/f/03fd2413-efe9-4e5c-"
r"8734-2b72605b3fbb/dcxbsnb-1bbf0b38-42af-4070-8878-"
r"f30961955bec\.gif\?token=ey...",
}),
# Flash animation with GIF preview (#1731)
("https://www.deviantart.com/yuumei/art/Flash-Comic-214724929", {
"pattern": r"https://wixmp-[^.]+\.wixmp\.com"
r"/f/.+/.+\.swf\?token=.+",
"keyword": {
"filename": "flash_comic_tutorial_by_yuumei-d3juatd",
"extension": "swf",
},
}),
# sta.sh URLs from description (#302)
(("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), {
"options": (("extra", 1), ("original", 0)),
"pattern": DeviantartStashExtractor.pattern,
"range": "2-",
"count": 4,
}),
# sta.sh URL from deviation["text_content"]["body"]["features"]
(("https://www.deviantart.com"
"/cimar-wildehopps/art/Honorary-Vixen-859809305"), {
"options": (("extra", 1),),
"pattern": ("text:<!DOCTYPE html>\n|" +
DeviantartStashExtractor.pattern),
"count": 2,
}),
# journal
("https://www.deviantart.com/shimoda7/journal/ARTility-583755752", {
"url": "d34b2c9f873423e665a1b8ced20fcb75951694a3",
"pattern": "text:<!DOCTYPE html>\n",
}),
# journal-like post with isJournal == False (#419)
("https://www.deviantart.com/gliitchlord/art/brashstrokes-812942668", {
"url": "e2e0044bd255304412179b6118536dbd9bb3bb0e",
"pattern": "text:<!DOCTYPE html>\n",
}),
# /view/ URLs
("https://deviantart.com/view/904858796/", {
"content": "8770ec40ad1c1d60f6b602b16301d124f612948f",
}),
("http://www.deviantart.com/view/890672057", {
"content": "1497e13d925caeb13a250cd666b779a640209236",
}),
("https://www.deviantart.com/view/706871727", {
"content": "3f62ae0c2fca2294ac28e41888ea06bb37c22c65",
}),
("https://www.deviantart.com/view/1", {
"exception": exception.NotFoundError,
}),
# /deviation/ (#3558)
("https://www.deviantart.com/deviation/817215762"),
# fav.me (#3558)
("https://fav.me/ddijrpu", {
"count": 1,
}),
("https://fav.me/dddd", {
"exception": exception.NotFoundError,
}),
# old-style URLs
("https://shimoda7.deviantart.com"
"/art/For-the-sake-of-a-memory-10073852"),
("https://myria-moon.deviantart.com"
"/art/Aime-Moi-part-en-vadrouille-261986576"),
("https://zzz.deviantart.com/art/zzz-1234567890"),
# old /view/ URLs from the Wayback Machine
("https://www.deviantart.com/view.php?id=14864502"),
("http://www.deviantart.com/view-full.php?id=100842"),
("https://www.fxdeviantart.com/zzz/art/zzz-1234567890"),
("https://www.fxdeviantart.com/view/1234567890"),
)
example = "https://www.deviantart.com/UsER/art/TITLE-12345"
skip = Extractor.skip
@ -1133,13 +801,7 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
archive_fmt = "s_{_username}_{index}.{extension}"
cookies_domain = ".deviantart.com"
pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
test = (
("https://www.deviantart.com/shimoda7/gallery/scraps", {
"count": 12,
}),
("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"),
("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
)
example = "https://www.deviantart.com/USER/gallery/scraps"
def deviations(self):
self.login()
@ -1157,11 +819,7 @@ class DeviantartSearchExtractor(DeviantartExtractor):
cookies_domain = ".deviantart.com"
pattern = (r"(?:https?://)?www\.deviantart\.com"
r"/search(?:/deviations)?/?\?([^#]+)")
test = (
("https://www.deviantart.com/search?q=tree"),
("https://www.deviantart.com/search/deviations?order=popular-1-week"),
)
example = "https://www.deviantart.com/search?q=QUERY"
skip = Extractor.skip
def __init__(self, match):
@ -1212,13 +870,7 @@ class DeviantartGallerySearchExtractor(DeviantartExtractor):
archive_fmt = "g_{_username}_{index}.{extension}"
cookies_domain = ".deviantart.com"
pattern = BASE_PATTERN + r"/gallery/?\?(q=[^#]+)"
test = (
("https://www.deviantart.com/shimoda7/gallery?q=memory", {
"options": (("original", 0),),
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}),
("https://www.deviantart.com/shimoda7/gallery?q=memory&sort=popular"),
)
example = "https://www.deviantart.com/USER/gallery?q=QUERY"
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
@ -1250,11 +902,7 @@ class DeviantartFollowingExtractor(DeviantartExtractor):
"""Extractor for user's watched users"""
subcategory = "following"
pattern = BASE_PATTERN + "/about#watching$"
test = ("https://www.deviantart.com/shimoda7/about#watching", {
"pattern": DeviantartUserExtractor.pattern,
"range": "1-50",
"count": 50,
})
example = "https://www.deviantart.com/USER/about#watching"
def items(self):
eclipse_api = DeviantartEclipseAPI(self)
@ -1774,11 +1422,9 @@ class DeviantartEclipseAPI():
def _call(self, endpoint, params):
url = "https://www.deviantart.com/_napi" + endpoint
headers = {"Referer": "https://www.deviantart.com/"}
params["csrf_token"] = self.csrf_token or self._fetch_csrf_token()
response = self.request(
url, params=params, headers=headers, fatal=None)
response = self.request(url, params=params, fatal=None)
if response.status_code == 404:
raise exception.StopExtraction(

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2022 Mike Fährmann
# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -20,36 +20,7 @@ class DirectlinkExtractor(Extractor):
pattern = (r"(?i)https?://(?P<domain>[^/?#]+)/(?P<path>[^?#]+\."
r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
r"(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?$")
test = (
(("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), {
"url": "18c5d00077332e98e53be9fed2ee4be66154b88d",
"keyword": "105770a3f4393618ab7b811b731b22663b5d3794",
}),
# empty path
(("https://example.org/file.webm"), {
"url": "2d807ed7059d1b532f1bb71dc24b510b80ff943f",
"keyword": "29dad729c40fb09349f83edafa498dba1297464a",
}),
# more complex example
("https://example.org/path/to/file.webm?que=1?&ry=2/#fragment", {
"url": "6fb1061390f8aada3db01cb24b51797c7ee42b31",
"keyword": "3d7abc31d45ba324e59bc599c3b4862452d5f29c",
}),
# percent-encoded characters
("https://example.org/%27%3C%23/%23%3E%27.jpg?key=%3C%26%3E", {
"url": "2627e8140727fdf743f86fe18f69f99a052c9718",
"keyword": "831790fddda081bdddd14f96985ab02dc5b5341f",
}),
# upper case file extension (#296)
("https://post-phinf.pstatic.net/MjAxOTA1MjlfMTQ4/MDAxNTU5MTI2NjcyNTkw"
".JUzkGb4V6dj9DXjLclrOoqR64uDxHFUO5KDriRdKpGwg.88mCtd4iT1NHlpVKSCaUpP"
"mZPiDgT8hmQdQ5K_gYyu0g.JPEG/2.JPG"),
# internationalized domain name
("https://räksmörgås.josefsson.org/raksmorgas.jpg", {
"url": "a65667f670b194afbd1e3ea5e7a78938d36747da",
"keyword": "fd5037fe86eebd4764e176cbaf318caec0f700be",
}),
)
example = "https://en.wikipedia.org/static/images/project-logos/enwiki.png"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -43,18 +43,7 @@ class DynastyscansBase():
class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
"""Extractor for manga-chapters from dynasty-scans.com"""
pattern = BASE_PATTERN + r"(/chapters/[^/?#]+)"
test = (
(("http://dynasty-scans.com/chapters/"
"hitoribocchi_no_oo_seikatsu_ch33"), {
"url": "dce64e8c504118f1ab4135c00245ea12413896cb",
"keyword": "b67599703c27316a2fe4f11c3232130a1904e032",
}),
(("http://dynasty-scans.com/chapters/"
"new_game_the_spinoff_special_13"), {
"url": "dbe5bbb74da2edcfb1832895a484e2a40bc8b538",
"keyword": "6b674eb3a274999153f6be044973b195008ced2f",
}),
)
example = "https://dynasty-scans.com/chapters/NAME"
def metadata(self, page):
extr = text.extract_from(page)
@ -93,10 +82,7 @@ class DynastyscansMangaExtractor(DynastyscansBase, MangaExtractor):
chapterclass = DynastyscansChapterExtractor
reverse = False
pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
test = ("https://dynasty-scans.com/series/hitoribocchi_no_oo_seikatsu", {
"pattern": DynastyscansChapterExtractor.pattern,
"count": ">= 100",
})
example = "https://dynasty-scans.com/series/NAME"
def chapters(self, page):
return [
@ -112,16 +98,7 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
filename_fmt = "{image_id}.{extension}"
archive_fmt = "i_{image_id}"
pattern = BASE_PATTERN + r"/images/?(?:\?([^#]+))?$"
test = (
("https://dynasty-scans.com/images?with[]=4930&with[]=5211", {
"url": "22cf0fb64e12b29e79b0a3d26666086a48f9916a",
"keyword": "11cbc555a15528d25567977b8808e10369c4c3ee",
}),
("https://dynasty-scans.com/images", {
"range": "1",
"count": 1,
}),
)
example = "https://dynasty-scans.com/images?QUERY"
def __init__(self, match):
Extractor.__init__(self, match)
@ -150,10 +127,7 @@ class DynastyscansImageExtractor(DynastyscansSearchExtractor):
"""Extractor for individual images on dynasty-scans.com"""
subcategory = "image"
pattern = BASE_PATTERN + r"/images/(\d+)"
test = ("https://dynasty-scans.com/images/1245", {
"url": "15e54bd94148a07ed037f387d046c27befa043b2",
"keyword": "0d8976c2d6fbc9ed6aa712642631b96e456dc37f",
})
example = "https://dynasty-scans.com/images/12345"
def images(self):
return (self.query,)

View File

@ -84,48 +84,13 @@ BASE_PATTERN = E621Extractor.update({
class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor):
"""Extractor for e621 posts from tag searches"""
pattern = BASE_PATTERN + r"/posts?(?:\?.*?tags=|/index/\d+/)([^&#]+)"
test = (
("https://e621.net/posts?tags=anry", {
"url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
"content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
}),
("https://e621.net/post/index/1/anry"),
("https://e621.net/post?tags=anry"),
("https://e926.net/posts?tags=anry", {
"url": "12198b275c62ffe2de67cca676c8e64de80c425d",
"content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
}),
("https://e926.net/post/index/1/anry"),
("https://e926.net/post?tags=anry"),
("https://e6ai.net/posts?tags=anry"),
("https://e6ai.net/post/index/1/anry"),
("https://e6ai.net/post?tags=anry"),
)
example = "https://e621.net/posts?tags=TAG"
class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
"""Extractor for e621 pools"""
pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
test = (
("https://e621.net/pools/73", {
"url": "1bd09a72715286a79eea3b7f09f51b3493eb579a",
"content": "91abe5d5334425d9787811d7f06d34c77974cd22",
}),
("https://e621.net/pool/show/73"),
("https://e926.net/pools/73", {
"url": "6936f1b6a18c5c25bee7cad700088dbc2503481b",
"content": "91abe5d5334425d9787811d7f06d34c77974cd22",
}),
("https://e926.net/pool/show/73"),
("https://e6ai.net/pools/3", {
"url": "a6d1ad67a3fa9b9f73731d34d5f6f26f7e85855f",
}),
("https://e6ai.net/pool/show/3"),
)
example = "https://e621.net/pools/12345"
def posts(self):
self.log.info("Fetching posts of pool %s", self.pool_id)
@ -151,67 +116,7 @@ class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
"""Extractor for single e621 posts"""
pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
test = (
("https://e621.net/posts/535", {
"url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
"content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
"keyword": {"date": "dt:2007-02-17 19:02:32"},
}),
("https://e621.net/posts/3181052", {
"options": (("metadata", "notes,pools"),),
"pattern": r"https://static\d\.e621\.net/data/c6/8c"
r"/c68cca0643890b615f75fb2719589bff\.png",
"keyword": {
"notes": [
{
"body": "Little Legends 2",
"created_at": "2022-05-16T13:58:38.877-04:00",
"creator_id": 517450,
"creator_name": "EeveeCuddler69",
"height": 475,
"id": 321296,
"is_active": True,
"post_id": 3181052,
"updated_at": "2022-05-16T13:59:02.050-04:00",
"version": 3,
"width": 809,
"x": 83,
"y": 117,
},
],
"pools": [
{
"category": "series",
"created_at": "2022-02-17T00:29:22.669-05:00",
"creator_id": 1077440,
"creator_name": "Yeetus90",
"description": "* \"Little Legends\":/pools/27971\r\n"
"* Little Legends 2\r\n"
"* \"Little Legends 3\":/pools/27481",
"id": 27492,
"is_active": False,
"name": "Little Legends 2",
"post_count": 39,
"post_ids": list,
"updated_at": "2022-03-27T06:30:03.382-04:00"
},
],
},
}),
("https://e621.net/post/show/535"),
("https://e926.net/posts/535", {
"url": "17aec8ebd8fab098d321adcb62a2db59dab1f4bf",
"content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
}),
("https://e926.net/post/show/535"),
("https://e6ai.net/posts/23", {
"url": "3c85a806b3d9eec861948af421fe0e8ad6b8f881",
"content": "a05a484e4eb64637d56d751c02e659b4bc8ea5d5",
}),
("https://e6ai.net/post/show/23"),
)
example = "https://e621.net/posts/12345"
def posts(self):
url = "{}/posts/{}.json".format(self.root, self.post_id)
@ -221,23 +126,7 @@ class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor):
"""Extractor for popular images from e621"""
pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?"
test = (
("https://e621.net/explore/posts/popular"),
(("https://e621.net/explore/posts/popular"
"?date=2019-06-01&scale=month"), {
"pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
"count": ">= 70",
}),
("https://e926.net/explore/posts/popular"),
(("https://e926.net/explore/posts/popular"
"?date=2019-06-01&scale=month"), {
"pattern": r"https://static\d.e926.net/data/../../[0-9a-f]+",
"count": ">= 70",
}),
("https://e6ai.net/explore/posts/popular"),
)
example = "https://e621.net/explore/posts/popular"
def posts(self):
return self._pagination("/popular.json", self.params)
@ -249,21 +138,7 @@ class E621FavoriteExtractor(E621Extractor):
directory_fmt = ("{category}", "Favorites", "{user_id}")
archive_fmt = "f_{user_id}_{id}"
pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
test = (
("https://e621.net/favorites"),
("https://e621.net/favorites?page=2&user_id=53275", {
"pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
"count": "> 260",
}),
("https://e926.net/favorites"),
("https://e926.net/favorites?page=2&user_id=53275", {
"pattern": r"https://static\d.e926.net/data/../../[0-9a-f]+",
"count": "> 260",
}),
("https://e6ai.net/favorites"),
)
example = "https://e621.net/favorites"
def __init__(self, match):
E621Extractor.__init__(self, match)

View File

@ -91,29 +91,7 @@ class EromeAlbumExtractor(EromeExtractor):
"""Extractor for albums on erome.com"""
subcategory = "album"
pattern = BASE_PATTERN + r"/a/(\w+)"
test = (
("https://www.erome.com/a/NQgdlWvk", {
"pattern": r"https://v\d+\.erome\.com/\d+"
r"/NQgdlWvk/j7jlzmYB_480p\.mp4",
"count": 1,
"keyword": {
"album_id": "NQgdlWvk",
"num": 1,
"title": "porn",
"user": "yYgWBZw8o8qsMzM",
},
}),
("https://www.erome.com/a/TdbZ4ogi", {
"pattern": r"https://s\d+\.erome\.com/\d+/TdbZ4ogi/\w+",
"count": 6,
"keyword": {
"album_id": "TdbZ4ogi",
"num": int,
"title": "82e78cfbb461ad87198f927fcb1fda9a1efac9ff.",
"user": "yYgWBZw8o8qsMzM",
},
}),
)
example = "https://www.erome.com/a/ID"
def albums(self):
return (self.item,)
@ -122,10 +100,7 @@ class EromeAlbumExtractor(EromeExtractor):
class EromeUserExtractor(EromeExtractor):
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)"
test = ("https://www.erome.com/yYgWBZw8o8qsMzM", {
"range": "1-25",
"count": 25,
})
example = "https://www.erome.com/USER"
def albums(self):
url = "{}/{}".format(self.root, self.item)
@ -135,10 +110,7 @@ class EromeUserExtractor(EromeExtractor):
class EromeSearchExtractor(EromeExtractor):
subcategory = "search"
pattern = BASE_PATTERN + r"/search\?q=([^&#]+)"
test = ("https://www.erome.com/search?q=cute", {
"range": "1-25",
"count": 25,
})
example = "https://www.erome.com/search?q=QUERY"
def albums(self):
url = self.root + "/search"

View File

@ -45,7 +45,6 @@ class ExhentaiExtractor(Extractor):
if self.version != "ex":
self.cookies.set("nw", "1", domain=self.cookies_domain)
self.session.headers["Referer"] = self.root + "/"
self.original = self.config("original", True)
limits = self.config("limits", False)
@ -109,61 +108,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
pattern = (BASE_PATTERN +
r"(?:/g/(\d+)/([\da-f]{10})"
r"|/s/([\da-f]{10})/(\d+)-(\d+))")
test = (
("https://exhentai.org/g/1200119/d55c44d3d0/", {
"options": (("original", False),),
"keyword": {
"cost": int,
"date": "dt:2018-03-18 20:14:00",
"eh_category": "Non-H",
"expunged": False,
"favorites": r"re:^[12]\d$",
"filecount": "4",
"filesize": 1488978,
"gid": 1200119,
"height": int,
"image_token": "re:[0-9a-f]{10}",
"lang": "ja",
"language": "Japanese",
"parent": "",
"rating": r"re:\d\.\d+",
"size": int,
"tags": [
"parody:komi-san wa komyushou desu.",
"character:shouko komi",
"group:seventh lowlife",
"other:sample",
],
"thumb": "https://exhentai.org/t/ce/0a/ce0a5bcb583229a9b07c0f8"
"3bcb1630ab1350640-624622-736-1036-jpg_250.jpg",
"title": "C93 [Seventh_Lowlife] Komi-san ha Tokidoki Daitan de"
"su (Komi-san wa Komyushou desu) [Sample]",
"title_jpn": "(C93) [Comiketjack (わ!)] 古見さんは、時々大胆"
"です。 (古見さんは、コミュ症です。) [見本]",
"token": "d55c44d3d0",
"torrentcount": "0",
"uploader": "klorpa",
"width": int,
},
"content": ("2c68cff8a7ca540a78c36fdbf5fbae0260484f87",
"e9891a4c017ed0bb734cd1efba5cd03f594d31ff"),
}),
("https://exhentai.org/g/960461/4f0e369d82/", {
"exception": exception.NotFoundError,
}),
("http://exhentai.org/g/962698/7f02358e00/", {
"exception": exception.AuthorizationError,
}),
("https://exhentai.org/s/f68367b4c8/1200119-3", {
"options": (("original", False),),
"count": 2,
}),
("https://e-hentai.org/s/f68367b4c8/1200119-3", {
"options": (("original", False),),
"count": 2,
}),
("https://g.e-hentai.org/g/1200119/d55c44d3d0/"),
)
example = "https://e-hentai.org/g/12345/67890abcde/"
def __init__(self, match):
ExhentaiExtractor.__init__(self, match)
@ -179,6 +124,20 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
if source == "hitomi":
self.items = self._items_hitomi
def favorite(self, slot="0"):
url = self.root + "/gallerypopups.php"
params = {
"gid": self.gallery_id,
"t" : self.gallery_token,
"act": "addfav",
}
data = {
"favcat" : slot,
"apply" : "Apply Changes",
"update" : "1",
}
self.request(url, method="POST", params=params, data=data)
def items(self):
self.login()
@ -223,6 +182,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
data["_http_validate"] = None
yield Message.Url, url, data
fav = self.config("fav")
if fav is not None:
self.favorite(fav)
def _items_hitomi(self):
if self.config("metadata", False):
data = self.metadata_from_api()
@ -463,26 +426,10 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
"""Extractor for exhentai search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/(?:\?([^#]*)|tag/([^/?#]+))"
test = (
("https://e-hentai.org/?f_search=touhou"),
("https://exhentai.org/?f_cats=767&f_search=touhou"),
("https://exhentai.org/tag/parody:touhou+project"),
(("https://exhentai.org/?f_doujinshi=0&f_manga=0&f_artistcg=0"
"&f_gamecg=0&f_western=0&f_non-h=1&f_imageset=0&f_cosplay=0"
"&f_asianporn=0&f_misc=0&f_search=touhou&f_apply=Apply+Filter"), {
"pattern": ExhentaiGalleryExtractor.pattern,
"range": "1-30",
"count": 30,
"keyword": {
"gallery_id": int,
"gallery_token": r"re:^[0-9a-f]{10}$"
},
}),
)
example = "https://e-hentai.org/?f_search=QUERY"
def __init__(self, match):
ExhentaiExtractor.__init__(self, match)
self.search_url = self.root
_, query, tag = match.groups()
if tag:
@ -497,6 +444,9 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
if "next" not in self.params:
self.params["page"] = text.parse_int(self.params.get("page"))
def _init(self):
self.search_url = self.root
def items(self):
self.login()
data = {"_extractor": ExhentaiGalleryExtractor}
@ -533,15 +483,7 @@ class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
"""Extractor for favorited exhentai galleries"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/favorites\.php(?:\?([^#]*)())?"
test = (
("https://e-hentai.org/favorites.php", {
"count": 1,
"pattern": r"https?://e-hentai\.org/g/1200119/d55c44d3d0"
}),
("https://exhentai.org/favorites.php?favcat=1&f_search=touhou"
"&f_apply=Search+Favorites"),
)
example = "https://e-hentai.org/favorites.php"
def __init__(self, match):
ExhentaiSearchExtractor.__init__(self, match)
def _init(self):
self.search_url = self.root + "/favorites.php"

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2019 Mike Fährmann
# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -13,24 +13,11 @@ from .. import text, util
class FallenangelsChapterExtractor(ChapterExtractor):
"""Extractor for manga-chapters from fascans.com"""
"""Extractor for manga chapters from fascans.com"""
category = "fallenangels"
pattern = (r"(?:https?://)?(manga|truyen)\.fascans\.com"
r"/manga/([^/?#]+)/([^/?#]+)")
test = (
("https://manga.fascans.com/manga/chronos-ruler/20/1", {
"url": "4604a7914566cc2da0ff789aa178e2d1c8c241e3",
"keyword": "2dfcc50020e32cd207be88e2a8fac0933e36bdfb",
}),
("http://truyen.fascans.com/manga/hungry-marie/8", {
"url": "1f923d9cb337d5e7bbf4323719881794a951c6ae",
"keyword": "2bdb7334c0e3eceb9946ffd3132df679b4a94f6a",
}),
("http://manga.fascans.com/manga/rakudai-kishi-no-eiyuutan/19.5", {
"url": "273f6863966c83ea79ad5846a2866e08067d3f0e",
"keyword": "d1065685bfe0054c4ff2a0f20acb089de4cec253",
}),
)
example = "https://manga.fascans.com/manga/NAME/CHAPTER/"
def __init__(self, match):
self.version, self.manga, self.chapter = match.groups()
@ -66,16 +53,7 @@ class FallenangelsMangaExtractor(MangaExtractor):
chapterclass = FallenangelsChapterExtractor
category = "fallenangels"
pattern = r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$"
test = (
("https://manga.fascans.com/manga/chronos-ruler", {
"url": "eea07dd50f5bc4903aa09e2cc3e45c7241c9a9c2",
"keyword": "c414249525d4c74ad83498b3c59a813557e59d7e",
}),
("https://truyen.fascans.com/manga/rakudai-kishi-no-eiyuutan", {
"url": "51a731a6b82d5eb7a335fbae6b02d06aeb2ab07b",
"keyword": "2d2a2a5d9ea5925eb9a47bb13d848967f3af086c",
}),
)
example = "https://manga.fascans.com/manga/NAME"
def __init__(self, match):
url = "https://" + match.group(1)

View File

@ -10,7 +10,6 @@ from .common import Extractor, Message
from .. import text
import re
BASE_PATTERN = (
r"(?:https?://)?(?:"
r"(?!www\.)([\w-]+)\.fanbox\.cc|"
@ -30,12 +29,12 @@ class FanboxExtractor(Extractor):
def _init(self):
self.embeds = self.config("embeds", True)
def items(self):
if self._warning:
if not self.cookies_check(("FANBOXSESSID",)):
self.log.warning("no 'FANBOXSESSID' cookie set")
FanboxExtractor._warning = False
def items(self):
for content_body, post in self.posts():
yield Message.Directory, post
yield from self._get_urls_from_post(content_body, post)
@ -243,20 +242,7 @@ class FanboxCreatorExtractor(FanboxExtractor):
"""Extractor for a Fanbox creator's works"""
subcategory = "creator"
pattern = BASE_PATTERN + r"(?:/posts)?/?$"
test = (
("https://xub.fanbox.cc", {
"range": "1-15",
"count": ">= 15",
"keyword": {
"creatorId" : "xub",
"tags" : list,
"title" : str,
},
}),
("https://xub.fanbox.cc/posts"),
("https://www.fanbox.cc/@xub/"),
("https://www.fanbox.cc/@xub/posts"),
)
example = "https://USER.fanbox.cc/"
def __init__(self, match):
FanboxExtractor.__init__(self, match)
@ -271,55 +257,7 @@ class FanboxPostExtractor(FanboxExtractor):
"""Extractor for media from a single Fanbox post"""
subcategory = "post"
pattern = BASE_PATTERN + r"/posts/(\d+)"
test = (
("https://www.fanbox.cc/@xub/posts/1910054", {
"count": 3,
"keyword": {
"title": "えま★おうがすと",
"tags": list,
"hasAdultContent": True,
"isCoverImage": False
},
}),
# entry post type, image embedded in html of the post
("https://nekoworks.fanbox.cc/posts/915", {
"count": 2,
"keyword": {
"title": "【SAYORI FAN CLUB】お届け内容",
"tags": list,
"html": str,
"hasAdultContent": True
},
}),
# article post type, imageMap, 2 twitter embeds, fanbox embed
("https://steelwire.fanbox.cc/posts/285502", {
"options": (("embeds", True),),
"count": 10,
"keyword": {
"title": "イラスト+SS義足の炭鉱少年が義足を見せてくれるだけ 【全体公開版】",
"tags": list,
"articleBody": dict,
"hasAdultContent": True
},
}),
# 'content' metadata (#3020)
("https://www.fanbox.cc/@official-en/posts/4326303", {
"keyword": {
"content": r"re:(?s)^Greetings from FANBOX.\n \nAs of Monday, "
r"September 5th, 2022, we are happy to announce "
r"the start of the FANBOX hashtag event "
r"#MySetupTour ! \nAbout the event\nTo join this "
r"event .+ \nPlease check this page for further "
r"details regarding the Privacy & Terms.\n"
r"https://fanbox.pixiv.help/.+/10184952456601\n\n\n"
r"Thank you for your continued support of FANBOX.$",
},
}),
# imageMap file order (#2718)
("https://mochirong.fanbox.cc/posts/3746116", {
"url": "c92ddd06f2efc4a5fe30ec67e21544f79a5c4062",
}),
)
example = "https://USER.fanbox.cc/posts/12345"
def __init__(self, match):
FanboxExtractor.__init__(self, match)
@ -334,9 +272,7 @@ class FanboxRedirectExtractor(Extractor):
category = "fanbox"
subcategory = "redirect"
pattern = r"(?:https?://)?(?:www\.)?pixiv\.net/fanbox/creator/(\d+)"
test = ("https://www.pixiv.net/fanbox/creator/52336352", {
"pattern": FanboxCreatorExtractor.pattern,
})
example = "https://www.pixiv.net/fanbox/creator/12345"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -7,7 +7,7 @@
"""Extractors for https://fanleaks.club/"""
from .common import Extractor, Message
from .. import text, exception
from .. import text
class FanleaksExtractor(Extractor):
@ -36,34 +36,10 @@ class FanleaksExtractor(Extractor):
class FanleaksPostExtractor(FanleaksExtractor):
"""Extractor for individual posts on fanleak.club"""
"""Extractor for individual posts on fanleaks.club"""
subcategory = "post"
pattern = r"(?:https?://)?(?:www\.)?fanleaks\.club/([^/?#]+)/(\d+)"
test = (
("https://fanleaks.club/selti/880", {
"pattern": (r"https://fanleaks\.club//models"
r"/selti/images/selti_0880\.jpg"),
"keyword": {
"model_id": "selti",
"model" : "Selti",
"id" : 880,
"type" : "photo",
},
}),
("https://fanleaks.club/daisy-keech/1038", {
"pattern": (r"https://fanleaks\.club//models"
r"/daisy-keech/videos/daisy-keech_1038\.mp4"),
"keyword": {
"model_id": "daisy-keech",
"model" : "Daisy Keech",
"id" : 1038,
"type" : "video",
},
}),
("https://fanleaks.club/hannahowo/000", {
"exception": exception.NotFoundError,
}),
)
example = "https://fanleaks.club/MODEL/12345"
def __init__(self, match):
FanleaksExtractor.__init__(self, match)
@ -79,22 +55,7 @@ class FanleaksModelExtractor(FanleaksExtractor):
subcategory = "model"
pattern = (r"(?:https?://)?(?:www\.)?fanleaks\.club"
r"/(?!latest/?$)([^/?#]+)/?$")
test = (
("https://fanleaks.club/hannahowo", {
"pattern": (r"https://fanleaks\.club//models"
r"/hannahowo/(images|videos)/hannahowo_\d+\.\w+"),
"range" : "1-100",
"count" : 100,
}),
("https://fanleaks.club/belle-delphine", {
"pattern": (r"https://fanleaks\.club//models"
r"/belle-delphine/(images|videos)"
r"/belle-delphine_\d+\.\w+"),
"range" : "1-100",
"count" : 100,
}),
("https://fanleaks.club/daisy-keech"),
)
example = "https://fanleaks.club/MODEL"
def items(self):
page_num = 1
@ -102,8 +63,7 @@ class FanleaksModelExtractor(FanleaksExtractor):
self.root + "/" + self.model_id, notfound="model").text
data = {
"model_id": self.model_id,
"model" : text.unescape(
text.extr(page, 'mt-4">', "</h1>")),
"model" : text.unescape(text.extr(page, 'mt-4">', "</h1>")),
"type" : "photo",
}
page_url = text.extr(page, "url: '", "'")

View File

@ -22,7 +22,6 @@ class FantiaExtractor(Extractor):
def _init(self):
self.headers = {
"Accept" : "application/json, text/plain, */*",
"Referer": self.root,
"X-Requested-With": "XMLHttpRequest",
}
self._empty_plan = {
@ -65,11 +64,9 @@ class FantiaExtractor(Extractor):
def _pagination(self, url):
params = {"page": 1}
headers = self.headers.copy()
del headers["X-Requested-With"]
while True:
page = self.request(url, params=params, headers=headers).text
page = self.request(url, params=params).text
self._csrf_token(page)
post_id = None
@ -173,17 +170,7 @@ class FantiaCreatorExtractor(FantiaExtractor):
"""Extractor for a Fantia creator's works"""
subcategory = "creator"
pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/fanclubs/(\d+)"
test = (
("https://fantia.jp/fanclubs/6939", {
"range": "1-25",
"count": ">= 25",
"keyword": {
"fanclub_user_id" : 52152,
"tags" : list,
"title" : str,
},
}),
)
example = "https://fantia.jp/fanclubs/12345"
def __init__(self, match):
FantiaExtractor.__init__(self, match)
@ -198,53 +185,7 @@ class FantiaPostExtractor(FantiaExtractor):
"""Extractor for media from a single Fantia post"""
subcategory = "post"
pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/posts/(\d+)"
test = (
("https://fantia.jp/posts/1166373", {
"pattern": r"https://("
r"c\.fantia\.jp/uploads/post/file/1166373/|"
r"cc\.fantia\.jp/uploads/post_content_photo"
r"/file/732549[01]|"
r"fantia\.jp/posts/1166373/album_image\?)",
"keyword": {
"blogpost_text": r"re:^$|"
r"This is a test.\n\nThis is a test.\n\n|"
r"Link to video:\nhttps://www.youtube.com"
r"/watch\?v=5SSdvNcAagI\n\nhtml img from "
r"another site:\n\n\n\n\n\n",
"comment": "\n\n",
"content_category": "re:thumb|blog|photo_gallery",
"content_comment": str,
"content_filename": "re:|",
"content_title": r"re:Test (Blog Content \d+|Image Gallery)"
r"|thumb",
"date": "dt:2022-03-09 16:46:12",
"fanclub_id": 356320,
"fanclub_name": "Test Fantia",
"fanclub_url": "https://fantia.jp/fanclubs/356320",
"fanclub_user_id": 7487131,
"fanclub_user_name": "2022/03/08 15:13:52の名無し",
"file_url": str,
"filename": str,
"num": int,
"plan": dict,
"post_id": 1166373,
"post_title": "Test Fantia Post",
"post_url": "https://fantia.jp/posts/1166373",
"posted_at": "Thu, 10 Mar 2022 01:46:12 +0900",
"rating": "general",
"tags": [],
},
}),
("https://fantia.jp/posts/508363", {
"count": 6,
"keyword": {
"post_title": "zunda逆バニーでおしりコッショリ",
"tags": list,
"rating": "adult",
"post_id": 508363
},
}),
)
example = "https://fantia.jp/posts/12345"
def __init__(self, match):
FantiaExtractor.__init__(self, match)

View File

@ -14,25 +14,13 @@ class FapachiPostExtractor(Extractor):
"""Extractor for individual posts on fapachi.com"""
category = "fapachi"
subcategory = "post"
root = "https://fapachi.com"
directory_fmt = ("{category}", "{user}")
filename_fmt = "{user}_{id}.{extension}"
archive_fmt = "{user}_{id}"
pattern = (r"(?:https?://)?(?:www\.)?fapachi\.com"
r"/(?!search/)([^/?#]+)/media/(\d+)")
root = "https://fapachi.com"
test = (
# NSFW
("https://fapachi.com/sonson/media/0082", {
"pattern": (r"https://fapachi\.com/models/s/o/"
r"sonson/1/full/sonson_0082\.jpeg"),
"keyword": {
"user": "sonson",
"id" : "0082",
},
}),
# NSFW
("https://fapachi.com/ferxiita/media/0159"),
)
example = "https://fapachi.com/MODEL/media/12345"
def __init__(self, match):
Extractor.__init__(self, match)
@ -54,17 +42,10 @@ class FapachiUserExtractor(Extractor):
"""Extractor for all posts from a fapachi user"""
category = "fapachi"
subcategory = "user"
root = "https://fapachi.com"
pattern = (r"(?:https?://)?(?:www\.)?fapachi\.com"
r"/(?!search(?:/|$))([^/?#]+)(?:/page/(\d+))?$")
root = "https://fapachi.com"
test = (
("https://fapachi.com/sonson", {
"pattern": FapachiPostExtractor.pattern,
"range" : "1-50",
"count" : 50,
}),
("https://fapachi.com/ferxiita/page/3"),
)
example = "https://fapachi.com/MODEL"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -19,32 +19,7 @@ class FapelloPostExtractor(Extractor):
archive_fmt = "{type}_{model}_{id}"
pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)")
test = (
("https://fapello.com/carrykey/530/", {
"pattern": (r"https://fapello\.com/content/c/a"
r"/carrykey/1000/carrykey_0530\.jpg"),
"keyword": {
"model": "carrykey",
"id" : 530,
"type" : "photo",
"thumbnail": "",
},
}),
("https://fapello.com/vladislava-661/693/", {
"pattern": (r"https://cdn\.fapello\.com/content/v/l"
r"/vladislava-661/1000/vladislava-661_0693\.mp4"),
"keyword": {
"model": "vladislava-661",
"id" : 693,
"type" : "video",
"thumbnail": ("https://fapello.com/content/v/l"
"/vladislava-661/1000/vladislava-661_0693.jpg"),
},
}),
("https://fapello.com/carrykey/000/", {
"exception": exception.NotFoundError,
}),
)
example = "https://fapello.com/MODEL/12345/"
def __init__(self, match):
Extractor.__init__(self, match)
@ -77,14 +52,7 @@ class FapelloModelExtractor(Extractor):
r"/(?!top-(?:likes|followers)|popular_videos"
r"|videos|trending|search/?$)"
r"([^/?#]+)/?$")
test = (
("https://fapello.com/hyoon/", {
"pattern": FapelloPostExtractor.pattern,
"range" : "1-50",
"count" : 50,
}),
("https://fapello.com/kobaebeefboo/"),
)
example = "https://fapello.com/model/"
def __init__(self, match):
Extractor.__init__(self, match)
@ -112,22 +80,7 @@ class FapelloPathExtractor(Extractor):
pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
r"/(?!search/?$)(top-(?:likes|followers)|videos|trending"
r"|popular_videos/[^/?#]+)/?$")
test = (
("https://fapello.com/top-likes/", {
"pattern": FapelloModelExtractor.pattern,
"range" : "1-10",
"count" : 10,
}),
("https://fapello.com/videos/", {
"pattern": FapelloPostExtractor.pattern,
"range" : "1-10",
"count" : 10,
}),
("https://fapello.com/top-followers/"),
("https://fapello.com/trending/"),
("https://fapello.com/popular_videos/twelve_hours/"),
("https://fapello.com/popular_videos/week/"),
)
example = "https://fapello.com/trending/"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2022 Mike Fährmann
# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -64,42 +64,7 @@ class FlickrImageExtractor(FlickrExtractor):
r"(?:(?:www\.|secure\.|m\.)?flickr\.com/photos/[^/?#]+/"
r"|[\w-]+\.static\.?flickr\.com/(?:\d+/)+)(\d+)"
r"|flic\.kr/p/([A-Za-z1-9]+))")
test = (
("https://www.flickr.com/photos/departingyyz/16089302239", {
"pattern": pattern,
"content": ("3133006c6d657fe54cf7d4c46b82abbcb0efaf9f",
"0821a28ee46386e85b02b67cf2720063440a228c"),
"keyword": {
"comments": int,
"description": str,
"extension": "jpg",
"filename": "16089302239_de18cd8017_b",
"id": 16089302239,
"height": 683,
"label": "Large",
"media": "photo",
"url": str,
"views": int,
"width": 1024,
},
}),
("https://secure.flickr.com/photos/departingyyz/16089302239"),
("https://m.flickr.com/photos/departingyyz/16089302239"),
("https://flickr.com/photos/departingyyz/16089302239"),
("https://www.flickr.com/photos/145617051@N08/46733161535", {
"count": 1,
"keyword": {"media": "video"},
}),
("http://c2.staticflickr.com/2/1475/24531000464_9a7503ae68_b.jpg", {
"pattern": pattern}),
("https://farm2.static.flickr.com/1035/1188352415_cb139831d0.jpg", {
"pattern": pattern}),
("https://flic.kr/p/FPVo9U", {
"pattern": pattern}),
("https://www.flickr.com/photos/zzz/16089302238", {
"exception": exception.NotFoundError}),
)
example = "https://www.flickr.com/photos/USER/12345"
def __init__(self, match):
FlickrExtractor.__init__(self, match)
@ -145,18 +110,7 @@ class FlickrAlbumExtractor(FlickrExtractor):
"Albums", "{album[id]} {album[title]}")
archive_fmt = "a_{album[id]}_{id}"
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/(?:album|set)s(?:/(\d+))?"
test = (
(("https://www.flickr.com/photos/shona_s/albums/72157633471741607"), {
"pattern": FlickrImageExtractor.pattern,
"count": 6,
}),
("https://www.flickr.com/photos/shona_s/albums", {
"pattern": pattern,
"count": 2,
}),
("https://secure.flickr.com/photos/shona_s/albums"),
("https://m.flickr.com/photos/shona_s/albums"),
)
example = "https://www.flickr.com/photos/USER/albums/12345"
def __init__(self, match):
FlickrExtractor.__init__(self, match)
@ -194,11 +148,7 @@ class FlickrGalleryExtractor(FlickrExtractor):
"Galleries", "{gallery[gallery_id]} {gallery[title]}")
archive_fmt = "g_{gallery[id]}_{id}"
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/galleries/(\d+)"
test = (("https://www.flickr.com/photos/flickr/"
"galleries/72157681572514792/"), {
"pattern": FlickrImageExtractor.pattern,
"count": ">= 10",
})
example = "https://www.flickr.com/photos/USER/galleries/12345/"
def __init__(self, match):
FlickrExtractor.__init__(self, match)
@ -219,10 +169,7 @@ class FlickrGroupExtractor(FlickrExtractor):
directory_fmt = ("{category}", "Groups", "{group[groupname]}")
archive_fmt = "G_{group[nsid]}_{id}"
pattern = BASE_PATTERN + r"/groups/([^/?#]+)"
test = ("https://www.flickr.com/groups/bird_headshots/", {
"pattern": FlickrImageExtractor.pattern,
"count": "> 150",
})
example = "https://www.flickr.com/groups/NAME/"
def metadata(self):
self.group = self.api.urls_lookupGroup(self.item_id)
@ -237,10 +184,7 @@ class FlickrUserExtractor(FlickrExtractor):
subcategory = "user"
archive_fmt = "u_{user[nsid]}_{id}"
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/?$"
test = ("https://www.flickr.com/photos/shona_s/", {
"pattern": FlickrImageExtractor.pattern,
"count": 28,
})
example = "https://www.flickr.com/photos/USER/"
def photos(self):
return self.api.people_getPhotos(self.user["nsid"])
@ -252,10 +196,7 @@ class FlickrFavoriteExtractor(FlickrExtractor):
directory_fmt = ("{category}", "{user[username]}", "Favorites")
archive_fmt = "f_{user[nsid]}_{id}"
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/favorites"
test = ("https://www.flickr.com/photos/shona_s/favorites", {
"pattern": FlickrImageExtractor.pattern,
"count": 4,
})
example = "https://www.flickr.com/photos/USER/favorites"
def photos(self):
return self.api.favorites_getList(self.user["nsid"])
@ -267,11 +208,7 @@ class FlickrSearchExtractor(FlickrExtractor):
directory_fmt = ("{category}", "Search", "{search[text]}")
archive_fmt = "s_{search}_{id}"
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
test = (
("https://flickr.com/search/?text=mountain"),
("https://flickr.com/search/?text=tree%20cloud%20house"
"&color_codes=4&styles=minimalism"),
)
example = "https://flickr.com/search/?text=QUERY"
def __init__(self, match):
FlickrExtractor.__init__(self, match)

View File

@ -25,9 +25,6 @@ class FoolfuukaExtractor(BaseExtractor):
if self.category == "b4k":
self.remote = self._remote_direct
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def items(self):
yield Message.Directory, self.metadata()
for post in self.posts():
@ -111,43 +108,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
directory_fmt = ("{category}", "{board[shortname]}",
"{thread_num} {title|comment[:50]}")
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)"
test = (
("https://archive.4plebs.org/tg/thread/54059290", {
"url": "fd823f17b5001442b941fddcd9ec91bafedfbc79",
}),
("https://archived.moe/gd/thread/309639/", {
"url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
"content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573",
}),
("https://archived.moe/a/thread/159767162/", {
"url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
}),
("https://archiveofsins.com/h/thread/4668813/", {
"url": "f612d287087e10a228ef69517cf811539db9a102",
"content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
}),
("https://arch.b4k.co/meta/thread/196/", {
"url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
}),
("https://desuarchive.org/a/thread/159542679/", {
"url": "e7d624aded15a069194e38dc731ec23217a422fb",
}),
("https://boards.fireden.net/sci/thread/11264294/", {
"url": "61cab625c95584a12a30049d054931d64f8d20aa",
}),
("https://archive.palanq.win/c/thread/4209598/", {
"url": "1f9b5570d228f1f2991c827a6631030bc0e5933c",
}),
("https://rbt.asia/g/thread/61487650/", {
"url": "fadd274b25150a1bdf03a40c58db320fa3b617c4",
}),
("https://archive.rebeccablacktech.com/g/thread/61487650/", {
"url": "fadd274b25150a1bdf03a40c58db320fa3b617c4",
}),
("https://thebarchive.com/b/thread/739772332/", {
"url": "e8b18001307d130d67db31740ce57c8561b5d80c",
}),
)
example = "https://archived.moe/a/thread/12345/"
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
@ -175,17 +136,7 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
"""Base extractor for FoolFuuka based boards/archives"""
subcategory = "board"
pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
test = (
("https://archive.4plebs.org/tg/"),
("https://archived.moe/gd/"),
("https://archiveofsins.com/h/"),
("https://arch.b4k.co/meta/"),
("https://desuarchive.org/a/"),
("https://boards.fireden.net/sci/"),
("https://archive.palanq.win/c/"),
("https://rbt.asia/g/"),
("https://thebarchive.com/b/"),
)
example = "https://archived.moe/a/"
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
@ -217,18 +168,8 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
subcategory = "search"
directory_fmt = ("{category}", "search", "{search}")
pattern = BASE_PATTERN + r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
example = "https://archived.moe/_/search/text/QUERY/"
request_interval = 1.0
test = (
("https://archive.4plebs.org/_/search/text/test/"),
("https://archived.moe/_/search/text/test/"),
("https://archiveofsins.com/_/search/text/test/"),
("https://archiveofsins.com/_/search/text/test/"),
("https://desuarchive.org/_/search/text/test/"),
("https://boards.fireden.net/_/search/text/test/"),
("https://archive.palanq.win/_/search/text/test/"),
("https://rbt.asia/_/search/text/test/"),
("https://thebarchive.com/_/search/text/test/"),
)
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
@ -283,17 +224,7 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor):
subcategory = "gallery"
directory_fmt = ("{category}", "{board}", "gallery")
pattern = BASE_PATTERN + r"/([^/?#]+)/gallery(?:/(\d+))?"
test = (
("https://archive.4plebs.org/tg/gallery/1"),
("https://archived.moe/gd/gallery/2"),
("https://archiveofsins.com/h/gallery/3"),
("https://arch.b4k.co/meta/gallery/"),
("https://desuarchive.org/a/gallery/5"),
("https://boards.fireden.net/sci/gallery/6"),
("https://archive.palanq.win/c/gallery"),
("https://rbt.asia/g/gallery/8"),
("https://thebarchive.com/b/gallery/9"),
)
example = "https://archived.moe/a/gallery"
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)

View File

@ -53,13 +53,7 @@ class FoolslideChapterExtractor(FoolslideExtractor):
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
test = (
(("https://read.powermanga.org"
"/read/one_piece_digital_colour_comics/en/0/75/"), {
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
"keyword": "a60c42f2634b7387899299d411ff494ed0ad6dbe",
}),
)
example = "https://read.powermanga.org/read/MANGA/en/0/123/"
def items(self):
page = self.request(self.gallery_url).text
@ -103,23 +97,7 @@ class FoolslideMangaExtractor(FoolslideExtractor):
subcategory = "manga"
categorytransfer = True
pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
test = (
(("https://read.powermanga.org"
"/series/one_piece_digital_colour_comics/"), {
"count": ">= 1",
"keyword": {
"chapter": int,
"chapter_minor": str,
"chapter_string": str,
"group": "PowerManga",
"lang": "en",
"language": "English",
"manga": "One Piece Digital Colour Comics",
"title": str,
"volume": int,
},
}),
)
example = "https://read.powermanga.org/series/MANGA/"
def items(self):
page = self.request(self.gallery_url).text

View File

@ -31,6 +31,7 @@ class FuraffinityExtractor(Extractor):
def _init(self):
self.offset = 0
self.external = self.config("external", False)
if self.config("descriptions") == "html":
self._process_description = str.strip
@ -41,13 +42,12 @@ class FuraffinityExtractor(Extractor):
else:
self._new_layout = None
def items(self):
if self._warning:
if not self.cookies_check(self.cookies_names):
self.log.warning("no 'a' and 'b' session cookies set")
FuraffinityExtractor._warning = False
external = self.config("external", False)
def items(self):
metadata = self.metadata()
for post_id in util.advance(self.posts(), self.offset):
post = self._parse_post(post_id)
@ -57,7 +57,7 @@ class FuraffinityExtractor(Extractor):
yield Message.Directory, post
yield Message.Url, post["url"], post
if external:
if self.external:
for url in text.extract_iter(
post["_description"], 'href="http', '"'):
yield Message.Queue, "http" + url, post
@ -219,12 +219,7 @@ class FuraffinityGalleryExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's gallery"""
subcategory = "gallery"
pattern = BASE_PATTERN + r"/gallery/([^/?#]+)"
test = ("https://www.furaffinity.net/gallery/mirlinthloth/", {
"pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
r"/art/mirlinthloth/\d+/\d+.\w+\.\w+",
"range": "45-50",
"count": 6,
})
example = "https://www.furaffinity.net/gallery/USER/"
def posts(self):
return self._pagination("gallery")
@ -235,11 +230,7 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor):
subcategory = "scraps"
directory_fmt = ("{category}", "{user!l}", "Scraps")
pattern = BASE_PATTERN + r"/scraps/([^/?#]+)"
test = ("https://www.furaffinity.net/scraps/mirlinthloth/", {
"pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
r"/art/[^/]+(/stories)?/\d+/\d+.\w+.",
"count": ">= 3",
})
example = "https://www.furaffinity.net/scraps/USER/"
def posts(self):
return self._pagination("scraps")
@ -250,13 +241,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
subcategory = "favorite"
directory_fmt = ("{category}", "{user!l}", "Favorites")
pattern = BASE_PATTERN + r"/favorites/([^/?#]+)"
test = ("https://www.furaffinity.net/favorites/mirlinthloth/", {
"pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
r"/art/[^/]+/\d+/\d+.\w+\.\w+",
"keyword": {"favorite_id": int},
"range": "45-50",
"count": 6,
})
example = "https://www.furaffinity.net/favorites/USER/"
def posts(self):
return self._pagination_favorites()
@ -273,19 +258,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
pattern = BASE_PATTERN + r"/search(?:/([^/?#]+))?/?[?&]([^#]+)"
test = (
("https://www.furaffinity.net/search/?q=cute", {
"pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
r"/art/[^/]+/\d+/\d+.\w+\.\w+",
"range": "45-50",
"count": 6,
}),
# first page of search results (#2402)
("https://www.furaffinity.net/search/?q=leaf&range=1day", {
"range": "1-3",
"count": 3,
}),
)
example = "https://www.furaffinity.net/search/?q=QUERY"
def __init__(self, match):
FuraffinityExtractor.__init__(self, match)
@ -304,65 +277,7 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
"""Extractor for individual posts on furaffinity"""
subcategory = "post"
pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)"
test = (
("https://www.furaffinity.net/view/21835115/", {
"pattern": r"https://d\d*\.f(uraffinity|acdn)\.net/(download/)?art"
r"/mirlinthloth/music/1488278723/1480267446.mirlinthlot"
r"h_dj_fennmink_-_bude_s_4_ever\.mp3",
"keyword": {
"artist" : "mirlinthloth",
"artist_url" : "mirlinthloth",
"date" : "dt:2016-11-27 17:24:06",
"description": "A Song made playing the game Cosmic DJ.",
"extension" : "mp3",
"filename" : r"re:\d+\.\w+_dj_fennmink_-_bude_s_4_ever",
"id" : 21835115,
"tags" : list,
"title" : "Bude's 4 Ever",
"url" : r"re:https://d\d?\.f(uraffinity|acdn)\.net/art",
"user" : "mirlinthloth",
"views" : int,
"favorites" : int,
"comments" : int,
"rating" : "General",
"fa_category": "Music",
"theme" : "All",
"species" : "Unspecified / Any",
"gender" : "Any",
"width" : 120,
"height" : 120,
},
}),
# 'external' option (#1492)
("https://www.furaffinity.net/view/42166511/", {
"options": (("external", True),),
"pattern": r"https://d\d*\.f(uraffinity|acdn)\.net/"
r"|http://www\.postybirb\.com",
"count": 2,
}),
# no tags (#2277)
("https://www.furaffinity.net/view/45331225/", {
"keyword": {
"artist": "Kota_Remminders",
"artist_url": "kotaremminders",
"date": "dt:2022-01-03 17:49:33",
"fa_category": "Adoptables",
"filename": "1641232173.kotaremminders_chidopts1",
"gender": "Any",
"height": 905,
"id": 45331225,
"rating": "General",
"species": "Unspecified / Any",
"tags": [],
"theme": "All",
"title": "REMINDER",
"width": 1280,
},
}),
("https://furaffinity.net/view/21835115/"),
("https://sfw.furaffinity.net/view/21835115/"),
("https://www.furaffinity.net/full/21835115/"),
)
example = "https://www.furaffinity.net/view/12345/"
def posts(self):
post_id = self.user
@ -375,16 +290,7 @@ class FuraffinityUserExtractor(FuraffinityExtractor):
subcategory = "user"
cookies_domain = None
pattern = BASE_PATTERN + r"/user/([^/?#]+)"
test = (
("https://www.furaffinity.net/user/mirlinthloth/", {
"pattern": r"/gallery/mirlinthloth/$",
}),
("https://www.furaffinity.net/user/mirlinthloth/", {
"options": (("include", "all"),),
"pattern": r"/(gallery|scraps|favorites)/mirlinthloth/$",
"count": 3,
}),
)
example = "https://www.furaffinity.net/user/USER/"
def initialize(self):
pass
@ -402,11 +308,7 @@ class FuraffinityFollowingExtractor(FuraffinityExtractor):
"""Extractor for a furaffinity user's watched users"""
subcategory = "following"
pattern = BASE_PATTERN + "/watchlist/by/([^/?#]+)"
test = ("https://www.furaffinity.net/watchlist/by/mirlinthloth/", {
"pattern": FuraffinityUserExtractor.pattern,
"range": "176-225",
"count": 50,
})
example = "https://www.furaffinity.net/watchlist/by/USER/"
def items(self):
url = "{}/watchlist/by/{}/".format(self.root, self.user)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019 Mike Fährmann
# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -18,22 +18,7 @@ class FuskatorGalleryExtractor(GalleryExtractor):
category = "fuskator"
root = "https://fuskator.com"
pattern = r"(?:https?://)?fuskator\.com/(?:thumbs|expanded)/([^/?#]+)"
test = (
("https://fuskator.com/thumbs/d0GnIzXrSKU/", {
"pattern": r"https://i\d+.fuskator.com/large/d0GnIzXrSKU/.+\.jpg",
"count": 22,
"keyword": {
"gallery_id": 473023,
"gallery_hash": "d0GnIzXrSKU",
"title": "re:Shaved Brunette Babe Maria Ryabushkina with ",
"views": int,
"score": float,
"count": 22,
"tags": list,
},
}),
("https://fuskator.com/expanded/gXpKzjgIidA/index.html"),
)
example = "https://fuskator.com/thumbs/ID/"
def __init__(self, match):
self.gallery_hash = match.group(1)
@ -82,13 +67,7 @@ class FuskatorSearchExtractor(Extractor):
subcategory = "search"
root = "https://fuskator.com"
pattern = r"(?:https?://)?fuskator\.com(/(?:search|page)/.+)"
test = (
("https://fuskator.com/search/red_swimsuit/", {
"pattern": FuskatorGalleryExtractor.pattern,
"count": ">= 40",
}),
("https://fuskator.com/page/3/swimsuit/quality/"),
)
example = "https://fuskator.com/search/TAG/"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -115,18 +115,7 @@ class GelbooruTagExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02TagExtractor):
"""Extractor for images from gelbooru.com based on search-tags"""
pattern = BASE_PATTERN + r"page=post&s=list&tags=([^&#]+)"
test = (
("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", {
"count": 5,
}),
("https://gelbooru.com/index.php?page=post&s=list&tags=meiya_neon", {
"range": "196-204",
"url": "845a61aa1f90fb4ced841e8b7e62098be2e967bf",
"pattern": r"https://img\d\.gelbooru\.com"
r"/images/../../[0-9a-f]{32}\.jpg",
"count": 9,
}),
)
example = "https://gelbooru.com/index.php?page=post&s=list&tags=TAG"
class GelbooruPoolExtractor(GelbooruBase,
@ -134,11 +123,7 @@ class GelbooruPoolExtractor(GelbooruBase,
"""Extractor for gelbooru pools"""
per_page = 45
pattern = BASE_PATTERN + r"page=pool&s=show&id=(\d+)"
test = (
("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
"count": 6,
}),
)
example = "https://gelbooru.com/index.php?page=pool&s=show&id=12345"
skip = GelbooruBase._skip_offset
@ -169,9 +154,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
"""Extractor for gelbooru favorites"""
per_page = 100
pattern = BASE_PATTERN + r"page=favorites&s=view&id=(\d+)"
test = ("https://gelbooru.com/index.php?page=favorites&s=view&id=279415", {
"count": 3,
})
example = "https://gelbooru.com/index.php?page=favorites&s=view&id=12345"
skip = GelbooruBase._skip_offset
@ -221,76 +204,21 @@ class GelbooruPostExtractor(GelbooruBase,
r"(?=(?:[^#]+&)?page=post(?:&|#|$))"
r"(?=(?:[^#]+&)?s=view(?:&|#|$))"
r"(?:[^#]+&)?id=(\d+)")
test = (
("https://gelbooru.com/index.php?page=post&s=view&id=313638", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
"count": 1,
}),
("https://gelbooru.com/index.php?page=post&s=view&id=313638"),
("https://gelbooru.com/index.php?s=view&page=post&id=313638"),
("https://gelbooru.com/index.php?page=post&id=313638&s=view"),
("https://gelbooru.com/index.php?s=view&id=313638&page=post"),
("https://gelbooru.com/index.php?id=313638&page=post&s=view"),
("https://gelbooru.com/index.php?id=313638&s=view&page=post"),
("https://gelbooru.com/index.php?page=post&s=view&id=6018318", {
"options": (("tags", True),),
"content": "977caf22f27c72a5d07ea4d4d9719acdab810991",
"keyword": {
"tags_artist": "kirisaki_shuusei",
"tags_character": str,
"tags_copyright": "vocaloid",
"tags_general": str,
"tags_metadata": str,
},
}),
# video
("https://gelbooru.com/index.php?page=post&s=view&id=5938076", {
"content": "6360452fa8c2f0c1137749e81471238564df832a",
"pattern": r"https://img\d\.gelbooru\.com/images"
r"/22/61/226111273615049235b001b381707bd0\.webm",
}),
# notes
("https://gelbooru.com/index.php?page=post&s=view&id=5997331", {
"options": (("notes", True),),
"keyword": {
"notes": [
{
"body": "Look over this way when you talk~",
"height": 553,
"width": 246,
"x": 35,
"y": 72,
},
{
"body": "Hey~\nAre you listening~?",
"height": 557,
"width": 246,
"x": 1233,
"y": 109,
},
],
},
}),
)
example = "https://gelbooru.com/index.php?page=post&s=view&id=12345"
class GelbooruRedirectExtractor(GelbooruBase, Extractor):
subcategory = "redirect"
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com"
r"/redirect\.php\?s=([^&#]+)")
test = (("https://gelbooru.com/redirect.php?s=Ly9nZWxib29ydS5jb20vaW5kZXgu"
"cGhwP3BhZ2U9cG9zdCZzPXZpZXcmaWQ9MTgzMDA0Ng=="), {
"pattern": r"https://gelbooru.com/index.php"
r"\?page=post&s=view&id=1830046"
})
example = "https://gelbooru.com/redirect.php?s=BASE64"
def __init__(self, match):
Extractor.__init__(self, match)
self.redirect_url = text.ensure_http_scheme(
binascii.a2b_base64(match.group(1)).decode())
self.url_base64 = match.group(1)
def items(self):
url = text.ensure_http_scheme(binascii.a2b_base64(
self.url_base64).decode())
data = {"_extractor": GelbooruPostExtractor}
yield Message.Queue, self.redirect_url, data
yield Message.Queue, url, data

View File

@ -90,24 +90,7 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
test = (
(("https://the-collection.booru.org"
"/index.php?page=post&s=list&tags=parody"), {
"range": "1-25",
"count": 25,
}),
(("https://illusioncards.booru.org"
"/index.php?page=post&s=list&tags=koikatsu"), {
"range": "1-25",
"count": 25,
}),
("https://allgirl.booru.org/index.php?page=post&s=list&tags=dress", {
"range": "1-25",
"count": 25,
}),
("https://drawfriends.booru.org/index.php?page=post&s=list&tags=all"),
("https://vidyart2.booru.org/index.php?page=post&s=list&tags=all"),
)
example = "https://allgirl.booru.org/index.php?page=post&s=list&tags=TAG"
def __init__(self, match):
GelbooruV01Extractor.__init__(self, match)
@ -128,21 +111,7 @@ class GelbooruV01FavoriteExtractor(GelbooruV01Extractor):
archive_fmt = "f_{favorite_id}_{id}"
per_page = 50
pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
test = (
(("https://the-collection.booru.org"
"/index.php?page=favorites&s=view&id=1166"), {
"count": 2,
}),
(("https://illusioncards.booru.org"
"/index.php?page=favorites&s=view&id=84887"), {
"count": 2,
}),
("https://allgirl.booru.org/index.php?page=favorites&s=view&id=380", {
"count": 4,
}),
("https://drawfriends.booru.org/index.php?page=favorites&s=view&id=1"),
("https://vidyart2.booru.org/index.php?page=favorites&s=view&id=1"),
)
example = "https://allgirl.booru.org/index.php?page=favorites&s=view&id=1"
def __init__(self, match):
GelbooruV01Extractor.__init__(self, match)
@ -161,40 +130,7 @@ class GelbooruV01PostExtractor(GelbooruV01Extractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
test = (
(("https://the-collection.booru.org"
"/index.php?page=post&s=view&id=100520"), {
"url": "0329ac8588bb93cf242ca0edbe3e995b4ba554e8",
"content": "1e585874e7b874f7937df1060dd1517fef2f4dfb",
}),
(("https://illusioncards.booru.org"
"/index.php?page=post&s=view&id=82746"), {
"url": "3f9cd2fadf78869b90bc5422f27b48f1af0e0909",
"content": "159e60b92d05597bd1bb63510c2c3e4a4bada1dc",
}),
("https://allgirl.booru.org/index.php?page=post&s=view&id=107213", {
"url": "b416800d2d2b072f80d3b37cfca9cb806fb25d51",
"content": "3e3c65e0854a988696e11adf0de52f8fa90a51c7",
"keyword": {
"created_at": "2021-02-13 16:27:39",
"date": "dt:2021-02-13 16:27:39",
"file_url": "https://img.booru.org/allgirl//images/107"
"/2aaa0438d58fc7baa75a53b4a9621bb89a9d3fdb.jpg",
"height": "1200",
"id": "107213",
"md5": "2aaa0438d58fc7baa75a53b4a9621bb89a9d3fdb",
"rating": "s",
"score": str,
"source": "",
"tags": "blush dress green_eyes green_hair hatsune_miku "
"long_hair twintails vocaloid",
"uploader": "Honochi31",
"width": "1600"
},
}),
("https://drawfriends.booru.org/index.php?page=post&s=view&id=107474"),
("https://vidyart2.booru.org/index.php?page=post&s=view&id=39168"),
)
example = "https://allgirl.booru.org/index.php?page=post&s=view&id=12345"
def __init__(self, match):
GelbooruV01Extractor.__init__(self, match)

View File

@ -183,6 +183,10 @@ INSTANCES = {
"root": "https://hypnohub.net",
"pattern": r"hypnohub\.net",
},
"xbooru": {
"root": "https://xbooru.com",
"pattern": r"xbooru\.com",
},
}
BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES)
@ -193,27 +197,7 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
test = (
("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
"content": ("5c6ae9ee13e6d4bc9cb8bdce224c84e67fbfa36c",
"622e80be3f496672c44aab5c47fbc6941c61bc79"),
"pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
"count": 2,
}),
("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
}),
("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
"count": ">= 64",
}),
("https://tbib.org/index.php?page=post&s=list&tags=yuyaiyaui", {
"count": ">= 120",
}),
("https://hypnohub.net/index.php?page=post&s=list&tags=gonoike_biwa", {
"url": "fe662b86d38c331fcac9c62af100167d404937dc",
}),
)
example = "https://safebooru.org/index.php?page=post&s=list&tags=TAG"
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
@ -232,21 +216,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)"
test = (
("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
"count": 3,
}),
("https://safebooru.org/index.php?page=pool&s=show&id=11", {
"count": 5,
}),
("https://realbooru.com/index.php?page=pool&s=show&id=1", {
"count": 3,
}),
("https://hypnohub.net/index.php?page=pool&s=show&id=61", {
"url": "d314826280073441a2da609f70ee814d1f4b9407",
"count": 3,
}),
)
example = "https://safebooru.org/index.php?page=pool&s=show&id=12345"
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
@ -298,23 +268,7 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
archive_fmt = "f_{favorite_id}_{id}"
per_page = 50
pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
test = (
("https://rule34.xxx/index.php?page=favorites&s=view&id=1030218", {
"count": 3,
}),
("https://safebooru.org/index.php?page=favorites&s=view&id=17567", {
"count": 2,
}),
("https://realbooru.com/index.php?page=favorites&s=view&id=274", {
"count": 2,
}),
("https://tbib.org/index.php?page=favorites&s=view&id=7881", {
"count": 3,
}),
("https://hypnohub.net/index.php?page=favorites&s=view&id=43546", {
"count": 3,
}),
)
example = "https://safebooru.org/index.php?page=favorites&s=view&id=12345"
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)
@ -335,112 +289,7 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
test = (
("https://rule34.xxx/index.php?page=post&s=view&id=863", {
"pattern": r"https://api-cdn\.rule34\.xxx/images"
r"/1/6aafbdb3e22f3f3b412ea2cf53321317a37063f3\.jpg",
"content": ("a43f418aa350039af0d11cae501396a33bbe2201",
"67b516295950867e1c1ab6bc13b35d3b762ed2a3"),
"options": (("tags", True), ("notes", True)),
"keyword": {
"tags_artist": "reverse_noise yamu_(reverse_noise)",
"tags_character": "hong_meiling",
"tags_copyright": "touhou",
"tags_general": str,
"tags_metadata": "censored translated",
"notes": [
{
"body": "It feels angry, I'm losing myself... "
"It won't calm down!",
"height": 65,
"id": 93586,
"width": 116,
"x": 22,
"y": 333,
},
{
"body": "REPUTATION OF RAGE",
"height": 272,
"id": 93587,
"width": 199,
"x": 78,
"y": 442,
},
],
},
}),
("https://hypnohub.net/index.php?page=post&s=view&id=1439", {
"pattern": r"https://hypnohub\.net/images"
r"/90/24/90245c3c5250c2a8173255d3923a010b\.jpg",
"content": "5987c5d2354f22e5fa9b7ee7ce4a6f7beb8b2b71",
"options": (("tags", True), ("notes", True)),
"keyword": {
"tags_artist": "brokenteapot",
"tags_character": "hsien-ko",
"tags_copyright": "capcom darkstalkers",
"tags_general": str,
"tags_metadata": "dialogue text translated",
"notes": [
{
"body": "Master Master Master "
"Master Master Master",
"height": 83,
"id": 10577,
"width": 129,
"x": 259,
"y": 20,
},
{
"body": "Response Response Response "
"Response Response Response",
"height": 86,
"id": 10578,
"width": 125,
"x": 126,
"y": 20,
},
{
"body": "Obedience Obedience Obedience "
"Obedience Obedience Obedience",
"height": 80,
"id": 10579,
"width": 98,
"x": 20,
"y": 20,
},
],
},
}),
("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",
"options": (("tags", True),),
"keyword": {
"tags_artist": "kawanakajima",
"tags_character": "heath_ledger ronald_mcdonald the_joker",
"tags_copyright": "dc_comics mcdonald's the_dark_knight",
"tags_general": str,
},
}),
("https://realbooru.com/index.php?page=post&s=view&id=668483", {
"pattern": r"https://realbooru\.com//?images/dc/b5"
r"/dcb5c0ce9ec0bf74a6930608985f4719\.jpeg",
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
"options": (("tags", True),),
"keyword": {
"tags_general": "1girl blonde blonde_hair blue_eyes cute "
"female female_only looking_at_viewer smile "
"solo solo_female teeth",
"tags_model": "jennifer_lawrence",
},
}),
("https://tbib.org/index.php?page=post&s=view&id=9233957", {
"url": "5a6ebe07bfff8e6d27f7c30b5480f27abcb577d2",
"content": "1c3831b6fbaa4686e3c79035b5d98460b1c85c43",
}),
)
example = "https://safebooru.org/index.php?page=post&s=view&id=12345"
def __init__(self, match):
GelbooruV02Extractor.__init__(self, match)

View File

@ -34,31 +34,7 @@ class GenericExtractor(Extractor):
r"(?:\?(?P<query>[^#]*))?" # optional query
r"(?:\#(?P<fragment>.*))?" # optional fragment
)
test = (
("generic:https://www.nongnu.org/lzip/", {
"count": 1,
"content": "40be5c77773d3e91db6e1c5df720ee30afb62368",
"keyword": {
"description": "Lossless data compressor",
"imageurl": "https://www.nongnu.org/lzip/lzip.png",
"keywords": "lzip, clzip, plzip, lzlib, LZMA, bzip2, "
"gzip, data compression, GNU, free software",
"pageurl": "https://www.nongnu.org/lzip/",
},
}),
# internationalized domain name
("generic:https://räksmörgås.josefsson.org/", {
"count": 2,
"pattern": "^https://räksmörgås.josefsson.org/",
}),
("g:https://en.wikipedia.org/Main_Page"),
("g:https://example.org/path/to/file?que=1?&ry=2/#fragment"),
("g:https://example.org/%27%3C%23/%23%3E%27.htm?key=%3C%26%3E"),
("generic:https://en.wikipedia.org/Main_Page"),
("generic:https://example.org/path/to/file?que=1?&ry=2/#fragment"),
("generic:https://example.org/%27%3C%23/%23%3E%27.htm?key=%3C%26%3E"),
)
example = "generic:https://www.nongnu.org/lzip/"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -1,306 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2017-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://gfycat.com/"""
from .common import Extractor, Message
from .. import text, exception
from ..cache import cache
class GfycatExtractor(Extractor):
"""Base class for gfycat extractors"""
category = "gfycat"
filename_fmt = "{category}_{gfyName}{title:?_//}.{extension}"
archive_fmt = "{gfyName}"
root = "https://gfycat.com"
def __init__(self, match):
Extractor.__init__(self, match)
self.key = match.group(1).lower()
def _init(self):
formats = self.config("format")
if formats is None:
formats = ("mp4", "webm", "mobile", "gif")
elif isinstance(formats, str):
formats = (formats, "mp4", "webm", "mobile", "gif")
self.formats = formats
def items(self):
metadata = self.metadata()
for gfycat in self.gfycats():
if "gfyName" not in gfycat:
self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
continue
url = self._process(gfycat)
if not url:
self.log.warning("Skipping '%s' (format not available)",
gfycat["gfyId"])
continue
gfycat.update(metadata)
yield Message.Directory, gfycat
yield Message.Url, url, gfycat
def _process(self, gfycat):
gfycat["_fallback"] = formats = self._formats(gfycat)
gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
return next(formats, None)
def _formats(self, gfycat):
for fmt in self.formats:
key = fmt + "Url"
if key in gfycat:
url = gfycat[key]
if url.startswith("http:"):
url = "https" + url[4:]
gfycat["extension"] = url.rpartition(".")[2]
yield url
def metadata(self):
return {}
def gfycats(self):
return ()
class GfycatUserExtractor(GfycatExtractor):
"""Extractor for gfycat user profiles"""
subcategory = "user"
directory_fmt = ("{category}", "{username}")
pattern = r"(?:https?://)?gfycat\.com/@([^/?#]+)/?(?:$|\?|#)"
test = ("https://gfycat.com/@gretta", {
"pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4",
"count": ">= 100",
})
def gfycats(self):
if self.key == "me":
return GfycatAPI(self).me()
return GfycatAPI(self).user(self.key)
class GfycatCollectionExtractor(GfycatExtractor):
"""Extractor for a gfycat collection"""
subcategory = "collection"
directory_fmt = ("{category}", "{collection_owner}",
"{collection_name|collection_id}")
pattern = (r"(?:https?://)?gfycat\.com/@([^/?#]+)/collections"
r"/(\w+)(?:/([^/?#]+))?")
test = ("https://gfycat.com/@reactions/collections/nHgy2DtE/no-text", {
"pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",
"count": ">= 100",
})
def __init__(self, match):
GfycatExtractor.__init__(self, match)
self.collection_id = match.group(2)
self.collection_name = match.group(3)
def metadata(self):
return {
"collection_owner": self.key,
"collection_name" : self.collection_name,
"collection_id" : self.collection_id,
}
def gfycats(self):
return GfycatAPI(self).collection(self.key, self.collection_id)
class GfycatCollectionsExtractor(GfycatExtractor):
"""Extractor for a gfycat user's collections"""
subcategory = "collections"
pattern = r"(?:https?://)?gfycat\.com/@([^/?#]+)/collections/?(?:$|\?|#)"
test = ("https://gfycat.com/@sannahparker/collections", {
"pattern": GfycatCollectionExtractor.pattern,
"count": ">= 20",
})
def items(self):
for col in GfycatAPI(self).collections(self.key):
url = "https://gfycat.com/@{}/collections/{}/{}".format(
col["userId"], col["folderId"], col["linkText"])
col["_extractor"] = GfycatCollectionExtractor
yield Message.Queue, url, col
class GfycatSearchExtractor(GfycatExtractor):
"""Extractor for gfycat search results"""
subcategory = "search"
directory_fmt = ("{category}", "Search", "{search}")
pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?#]+)"
test = ("https://gfycat.com/gifs/search/funny+animals", {
"pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",
"archive": False,
"range": "100-300",
"count": "> 200",
})
def metadata(self):
self.key = text.unquote(self.key).replace("+", " ")
return {"search": self.key}
def gfycats(self):
return GfycatAPI(self).search(self.key)
class GfycatImageExtractor(GfycatExtractor):
"""Extractor for individual images from gfycat.com"""
subcategory = "image"
pattern = (r"(?:https?://)?(?:\w+\.)?gfycat\.com"
r"/(?:gifs/detail/|\w+/)?([A-Za-z]{8,})")
test = (
("https://gfycat.com/GrayGenerousCowrie", {
"url": "e0b5e1d7223108249b15c3c7898dd358dbfae045",
"content": "5786028e04b155baa20b87c5f4f77453cd5edc37",
"keyword": {
"gfyId": "graygenerouscowrie",
"gfyName": "GrayGenerousCowrie",
"gfyNumber": 755075459,
"title": "Bottom's up",
"username": "jackson3oh3",
"createDate": 1495884169,
"date": "dt:2017-05-27 11:22:49",
"md5": "a4796e05b0db9ba9ce5140145cd318aa",
"width": 400,
"height": 224,
"frameRate": 23.0,
"numFrames": 158.0,
"views": int,
},
}),
(("https://thumbs.gfycat.com/SillyLameIsabellinewheatear"
"-size_restricted.gif"), {
"url": "13b32e6cc169d086577d7dd3fd36ee6cdbc02726",
}),
("https://gfycat.com/detail/UnequaledHastyAnkole?tagname=aww", {
"url": "e24c9f69897fd223343782425a429c5cab6a768e",
}),
# retry 404'ed videos on redgifs (#874)
("https://www.gfycat.com/foolishforkedabyssiniancat", {
"pattern": "https://redgifs.com/watch/foolishforkedabyssiniancat",
}),
# malformed API response (#902)
("https://gfycat.com/illexcitablehairstreak", {
"count": 0,
}),
("https://gfycat.com/gifs/detail/UnequaledHastyAnkole"),
("https://gfycat.com/ifr/UnequaledHastyAnkole"),
("https://gfycat.com/ru/UnequaledHastyAnkole"),
)
def items(self):
try:
gfycat = GfycatAPI(self).gfycat(self.key)
except exception.HttpError:
from .redgifs import RedgifsImageExtractor
url = "https://redgifs.com/watch/" + self.key
data = {"_extractor": RedgifsImageExtractor}
yield Message.Queue, url, data
else:
if "gfyName" not in gfycat:
self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
return
url = self._process(gfycat)
if not url:
self.log.warning("Skipping '%s' (format not available)",
gfycat["gfyId"])
return
yield Message.Directory, gfycat
yield Message.Url, url, gfycat
class GfycatAPI():
API_ROOT = "https://api.gfycat.com"
def __init__(self, extractor):
self.extractor = extractor
self.headers = {}
self.username, self.password = extractor._get_auth_info()
def collection(self, user, collection):
endpoint = "/v1/users/{}/collections/{}/gfycats".format(
user, collection)
params = {"count": 100}
return self._pagination(endpoint, params)
def collections(self, user):
endpoint = "/v1/users/{}/collections".format(user)
params = {"count": 100}
return self._pagination(endpoint, params, "gfyCollections")
def gfycat(self, gfycat_id):
endpoint = "/v1/gfycats/" + gfycat_id
return self._call(endpoint)["gfyItem"]
def me(self):
endpoint = "/v1/me/gfycats"
params = {"count": 100}
return self._pagination(endpoint, params)
def search(self, query):
endpoint = "/v1/gfycats/search"
params = {"search_text": query, "count": 150}
return self._pagination(endpoint, params)
def user(self, user):
endpoint = "/v1/users/{}/gfycats".format(user.lower())
params = {"count": 100}
return self._pagination(endpoint, params)
def authenticate(self):
self.headers["Authorization"] = \
self._authenticate_impl(self.username, self.password)
@cache(maxage=3600, keyarg=1)
def _authenticate_impl(self, username, password):
self.extractor.log.info("Logging in as %s", username)
url = "https://weblogin.gfycat.com/oauth/webtoken"
headers = {"Origin": "https://gfycat.com"}
data = {
"access_key": "Anr96uuqt9EdamSCwK4txKPjMsf2"
"M95Rfa5FLLhPFucu8H5HTzeutyAa",
}
response = self.extractor.request(
url, method="POST", headers=headers, json=data).json()
url = "https://weblogin.gfycat.com/oauth/weblogin"
headers["authorization"] = "Bearer " + response["access_token"]
data = {
"grant_type": "password",
"username" : username,
"password" : password,
}
response = self.extractor.request(
url, method="POST", headers=headers, json=data, fatal=None).json()
if "errorMessage" in response:
raise exception.AuthenticationError(
response["errorMessage"]["description"])
return "Bearer " + response["access_token"]
def _call(self, endpoint, params=None):
if self.username:
self.authenticate()
url = self.API_ROOT + endpoint
return self.extractor.request(
url, params=params, headers=self.headers).json()
def _pagination(self, endpoint, params, key="gfycats"):
while True:
data = self._call(endpoint, params)
yield from data[key]
if not data["cursor"]:
return
params["cursor"] = data["cursor"]

View File

@ -4,6 +4,8 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://gofile.io/"""
from .common import Extractor, Message
from .. import text, exception
from ..cache import cache, memcache
@ -17,49 +19,7 @@ class GofileFolderExtractor(Extractor):
directory_fmt = ("{category}", "{name} ({code})")
archive_fmt = "{id}"
pattern = r"(?:https?://)?(?:www\.)?gofile\.io/d/([^/?#]+)"
test = (
("https://gofile.io/d/k6BomI", {
"pattern": r"https://store\d+\.gofile\.io/download"
r"/\w{8}-\w{4}-\w{4}-\w{4}-\w{12}"
r"/test-%E3%83%86%E3%82%B9%E3%83%88-%2522%26!\.png",
"keyword": {
"createTime": int,
"directLink": "re:https://store5.gofile.io/download/direct/.+",
"downloadCount": int,
"extension": "png",
"filename": "test-テスト-%22&!",
"folder": {
"childs": [
"b0367d79-b8ba-407f-8342-aaf8eb815443",
"7fd4a36a-c1dd-49ff-9223-d93f7d24093f"
],
"code": "k6BomI",
"createTime": 1654076165,
"id": "fafb59f9-a7c7-4fea-a098-b29b8d97b03c",
"name": "root",
"public": True,
"totalDownloadCount": int,
"totalSize": 182,
"type": "folder"
},
"id": r"re:\w{8}-\w{4}-\w{4}-\w{4}-\w{12}",
"link": r"re:https://store5.gofile.io/download/.+\.png",
"md5": "re:[0-9a-f]{32}",
"mimetype": "image/png",
"name": "test-テスト-%22&!.png",
"num": int,
"parentFolder": "fafb59f9-a7c7-4fea-a098-b29b8d97b03c",
"serverChoosen": "store5",
"size": 182,
"thumbnail": r"re:https://store5.gofile.io/download/.+\.png",
"type": "file"
},
}),
("https://gofile.io/d/7fd4a36a-c1dd-49ff-9223-d93f7d24093f", {
"options": (("website-token", None),),
"content": "0c8768055e4e20e7c7259608b67799171b691140",
}),
)
example = "https://gofile.io/d/ID"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -47,11 +47,7 @@ class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor):
"{page:>03}.{extension}")
archive_fmt = "{manga_id}_{chapter}_{page}"
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/(\d+)/c(\d+))"
test = ("https://www.hbrowse.com/10363/c00000", {
"url": "6feefbc9f4b98e20d8425ddffa9dd111791dc3e6",
"keyword": "274996f6c809e5250b6ff3abbc5147e29f89d9a5",
"content": "44578ebbe176c2c27434966aef22945787e2781e",
})
example = "https://www.hbrowse.com/12345/c00000"
def __init__(self, match):
self.path, self.gid, self.chapter = match.groups()
@ -75,10 +71,7 @@ class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):
chapterclass = HbrowseChapterExtractor
reverse = False
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/\d+)/?$"
test = ("https://www.hbrowse.com/10363", {
"url": "b89682bfb86c11d2af0dc47463804ec3ac4aadd6",
"keyword": "4b15fda1858a69de1fbf5afddfe47dd893397312",
})
example = "https://www.hbrowse.com/12345"
def chapters(self, page):
results = []

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2022 Mike Fährmann
# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -23,31 +23,7 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
"""Extractor for a single manga chapter from hentai2read.com"""
archive_fmt = "{chapter_id}_{page}"
pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+/([^/?#]+))"
test = (
("https://hentai2read.com/amazon_elixir/1/", {
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
"keyword": "85645b02d34aa11b3deb6dadd7536863476e1bad",
}),
("https://hentai2read.com/popuni_kei_joshi_panic/2.5/", {
"pattern": r"https://hentaicdn\.com/hentai"
r"/13088/2\.5y/ccdn00\d+\.jpg",
"count": 36,
"keyword": {
"author": "Kurisu",
"chapter": 2,
"chapter_id": 75152,
"chapter_minor": ".5",
"count": 36,
"lang": "en",
"language": "English",
"manga": "Popuni Kei Joshi Panic!",
"manga_id": 13088,
"page": int,
"title": "Popuni Kei Joshi Panic! 2.5",
"type": "Original",
},
}),
)
example = "https://hentai2read.com/TITLE/1/"
def __init__(self, match):
self.chapter = match.group(2)
@ -85,31 +61,7 @@ class Hentai2readMangaExtractor(Hentai2readBase, MangaExtractor):
"""Extractor for hmanga from hentai2read.com"""
chapterclass = Hentai2readChapterExtractor
pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+)/?$"
test = (
("https://hentai2read.com/amazon_elixir/", {
"url": "273073752d418ec887d7f7211e42b832e8c403ba",
"keyword": "5c1b712258e78e120907121d3987c71f834d13e1",
}),
("https://hentai2read.com/oshikage_riot/", {
"url": "6595f920a3088a15c2819c502862d45f8eb6bea6",
"keyword": "a2e9724acb221040d4b29bf9aa8cb75b2240d8af",
}),
("https://hentai2read.com/popuni_kei_joshi_panic/", {
"pattern": Hentai2readChapterExtractor.pattern,
"range": "2-3",
"keyword": {
"chapter": int,
"chapter_id": int,
"chapter_minor": ".5",
"lang": "en",
"language": "English",
"manga": "Popuni Kei Joshi Panic!",
"manga_id": 13088,
"title": str,
"type": "Original",
},
}),
)
example = "https://hentai2read.com/TITLE/"
def chapters(self, page):
results = []

View File

@ -21,36 +21,7 @@ class HentaicosplaysGalleryExtractor(GalleryExtractor):
pattern = r"((?:https?://)?(?:\w{2}\.)?" \
r"(hentai-cosplays|hentai-img|porn-images-xxx)\.com)/" \
r"(?:image|story)/([\w-]+)"
test = (
("https://hentai-cosplays.com/image/---devilism--tide-kurihara-/", {
"pattern": r"https://static\d?.hentai-cosplays.com/upload/"
r"\d+/\d+/\d+/\d+.jpg$",
"keyword": {
"count": 18,
"site": "hentai-cosplays",
"slug": "---devilism--tide-kurihara-",
"title": "艦 こ れ-devilism の tide Kurihara 憂",
},
}),
("https://fr.porn-images-xxx.com/image/enako-enako-24/", {
"pattern": r"https://static\d?.porn-images-xxx.com/upload/"
r"\d+/\d+/\d+/\d+.jpg$",
"keyword": {
"count": 11,
"site": "porn-images-xxx",
"title": str,
},
}),
("https://ja.hentai-img.com/image/hollow-cora-502/", {
"pattern": r"https://static\d?.hentai-img.com/upload/"
r"\d+/\d+/\d+/\d+.jpg$",
"keyword": {
"count": 2,
"site": "hentai-img",
"title": str,
},
}),
)
example = "https://hentai-cosplays.com/image/TITLE/"
def __init__(self, match):
root, self.site, self.slug = match.groups()

View File

@ -168,7 +168,7 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor):
"""Extractor for a hentaifoundry user profile"""
subcategory = "user"
pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile"
test = ("https://www.hentai-foundry.com/user/Tenpura/profile",)
example = "https://www.hentai-foundry.com/user/USER/profile"
def initialize(self):
pass
@ -192,12 +192,7 @@ class HentaifoundryPicturesExtractor(HentaifoundryExtractor):
"""Extractor for all pictures of a hentaifoundry user"""
subcategory = "pictures"
pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)(?:/page/(\d+))?/?$"
test = (
("https://www.hentai-foundry.com/pictures/user/Tenpura", {
"url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28",
}),
("https://www.hentai-foundry.com/pictures/user/Tenpura/page/3"),
)
example = "https://www.hentai-foundry.com/pictures/user/USER"
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match)
@ -209,13 +204,7 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor):
subcategory = "scraps"
directory_fmt = ("{category}", "{user}", "Scraps")
pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)/scraps"
test = (
("https://www.hentai-foundry.com/pictures/user/Evulchibi/scraps", {
"url": "7cd9c6ec6258c4ab8c44991f7731be82337492a7",
}),
("https://www.hentai-foundry.com"
"/pictures/user/Evulchibi/scraps/page/3"),
)
example = "https://www.hentai-foundry.com/pictures/user/USER/scraps"
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match)
@ -229,13 +218,7 @@ class HentaifoundryFavoriteExtractor(HentaifoundryExtractor):
directory_fmt = ("{category}", "{user}", "Favorites")
archive_fmt = "f_{user}_{index}"
pattern = BASE_PATTERN + r"/user/([^/?#]+)/faves/pictures"
test = (
("https://www.hentai-foundry.com/user/Tenpura/faves/pictures", {
"url": "56f9ae2e89fe855e9fe1da9b81e5ec6212b0320b",
}),
("https://www.hentai-foundry.com"
"/user/Tenpura/faves/pictures/page/3"),
)
example = "https://www.hentai-foundry.com/user/USER/faves/pictures"
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match)
@ -249,10 +232,7 @@ class HentaifoundryRecentExtractor(HentaifoundryExtractor):
directory_fmt = ("{category}", "Recent Pictures", "{date}")
archive_fmt = "r_{index}"
pattern = BASE_PATTERN + r"/pictures/recent/(\d\d\d\d-\d\d-\d\d)"
test = ("https://www.hentai-foundry.com/pictures/recent/2018-09-20", {
"pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?#]+/\d+/",
"range": "20-30",
})
example = "https://www.hentai-foundry.com/pictures/recent/1970-01-01"
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match)
@ -268,10 +248,7 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor):
directory_fmt = ("{category}", "Popular Pictures")
archive_fmt = "p_{index}"
pattern = BASE_PATTERN + r"/pictures/popular()"
test = ("https://www.hentai-foundry.com/pictures/popular", {
"pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?#]+/\d+/",
"range": "20-30",
})
example = "https://www.hentai-foundry.com/pictures/popular"
def __init__(self, match):
HentaifoundryExtractor.__init__(self, match)
@ -283,34 +260,8 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
subcategory = "image"
pattern = (r"(https?://)?(?:www\.|pictures\.)?hentai-foundry\.com"
r"/(?:pictures/user|[^/?#])/([^/?#]+)/(\d+)")
test = (
(("https://www.hentai-foundry.com"
"/pictures/user/Tenpura/407501/shimakaze"), {
"url": "fbf2fd74906738094e2575d2728e8dc3de18a8a3",
"content": "91bf01497c39254b6dfb234a18e8f01629c77fd1",
"keyword": {
"artist" : "Tenpura",
"date" : "dt:2016-02-22 14:41:19",
"description": "Thank you!",
"height" : 700,
"index" : 407501,
"media" : "Other digital art",
"ratings": ["Sexual content", "Contains female nudity"],
"score" : int,
"tags" : ["collection", "kancolle", "kantai", "shimakaze"],
"title" : "shimakaze",
"user" : "Tenpura",
"views" : int,
"width" : 495,
},
}),
("http://www.hentai-foundry.com/pictures/user/Tenpura/407501/", {
"pattern": "http://pictures.hentai-foundry.com/t/Tenpura/407501/",
}),
("https://www.hentai-foundry.com/pictures/user/Tenpura/407501/"),
("https://pictures.hentai-foundry.com"
"/t/Tenpura/407501/Tenpura-407501-shimakaze.png"),
)
example = "https://www.hentai-foundry.com/pictures/user/USER/12345/TITLE"
skip = Extractor.skip
def __init__(self, match):
@ -331,24 +282,7 @@ class HentaifoundryStoriesExtractor(HentaifoundryExtractor):
subcategory = "stories"
archive_fmt = "s_{index}"
pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)(?:/page/(\d+))?/?$"
test = ("https://www.hentai-foundry.com/stories/user/SnowWolf35", {
"count": ">= 35",
"keyword": {
"author" : "SnowWolf35",
"chapters" : int,
"comments" : int,
"date" : "type:datetime",
"description": str,
"index" : int,
"rating" : int,
"ratings" : list,
"status" : "re:(Inc|C)omplete",
"title" : str,
"user" : "SnowWolf35",
"views" : int,
"words" : int,
},
})
example = "https://www.hentai-foundry.com/stories/user/USER"
def items(self):
self._init_site_filters()
@ -367,11 +301,8 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor):
subcategory = "story"
archive_fmt = "s_{index}"
pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)/(\d+)"
test = (("https://www.hentai-foundry.com/stories/user/SnowWolf35"
"/26416/Overwatch-High-Chapter-Voting-Location"), {
"url": "5a67cfa8c3bf7634c8af8485dd07c1ea74ee0ae8",
"keyword": {"title": "Overwatch High Chapter Voting Location"},
})
example = "https://www.hentai-foundry.com/stories/user/USER/12345/TITLE"
skip = Extractor.skip
def __init__(self, match):

View File

@ -21,40 +21,7 @@ class HentaifoxBase():
class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
"""Extractor for image galleries on hentaifox.com"""
pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))"
test = (
("https://hentaifox.com/gallery/56622/", {
"pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
"keyword": "bcd6b67284f378e5cc30b89b761140e3e60fcd92",
"count": 24,
}),
# 'split_tag' element (#1378)
("https://hentaifox.com/gallery/630/", {
"keyword": {
"artist": ["beti", "betty", "magi", "mimikaki"],
"characters": [
"aerith gainsborough",
"tifa lockhart",
"yuffie kisaragi"
],
"count": 32,
"gallery_id": 630,
"group": ["cu-little2"],
"parody": ["darkstalkers | vampire", "final fantasy vii"],
"tags": ["femdom", "fingering", "masturbation", "yuri"],
"title": "Cu-Little Bakanya~",
"type": "doujinshi",
},
}),
# email-protected title (#4201)
("https://hentaifox.com/gallery/35261/", {
"keyword": {
"gallery_id": 35261,
"title": "ManageM@ster!",
"artist": ["haritama hiroki"],
"group": ["studio n.ball"],
},
}),
)
example = "https://hentaifox.com/gallery/12345/"
def __init__(self, match):
GalleryExtractor.__init__(self, match)
@ -116,22 +83,7 @@ class HentaifoxSearchExtractor(HentaifoxBase, Extractor):
subcategory = "search"
pattern = (r"(?:https?://)?(?:www\.)?hentaifox\.com"
r"(/(?:parody|tag|artist|character|search|group)/[^/?%#]+)")
test = (
("https://hentaifox.com/parody/touhou-project/"),
("https://hentaifox.com/character/reimu-hakurei/"),
("https://hentaifox.com/artist/distance/"),
("https://hentaifox.com/search/touhou/"),
("https://hentaifox.com/group/v-slash/"),
("https://hentaifox.com/tag/heterochromia/", {
"pattern": HentaifoxGalleryExtractor.pattern,
"count": ">= 60",
"keyword": {
"url" : str,
"gallery_id": int,
"title" : str,
},
}),
)
example = "https://hentaifox.com/tag/TAG/"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -17,27 +17,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
category = "hentaihand"
root = "https://hentaihand.com"
pattern = r"(?:https?://)?(?:www\.)?hentaihand\.com/\w+/comic/([\w-]+)"
test = (
(("https://hentaihand.com/en/comic/c75-takumi-na-muchi-choudenji-hou-"
"no-aishi-kata-how-to-love-a-super-electromagnetic-gun-toaru-kagaku-"
"no-railgun-english"), {
"pattern": r"https://cdn.hentaihand.com/.*/images/37387/\d+.jpg$",
"count": 50,
"keyword": {
"artists" : ["Takumi Na Muchi"],
"date" : "dt:2014-06-28 00:00:00",
"gallery_id": 37387,
"lang" : "en",
"language" : "English",
"parodies" : ["Toaru Kagaku No Railgun"],
"relationships": list,
"tags" : list,
"title" : r"re:\(C75\) \[Takumi na Muchi\] Choudenji Hou ",
"title_alt" : r"re:\(C75\) \[たくみなむち\] 超電磁砲のあいしかた",
"type" : "Doujinshi",
},
}),
)
example = "https://hentaihand.com/en/comic/TITLE"
def __init__(self, match):
self.slug = match.group(1)
@ -76,15 +56,7 @@ class HentaihandTagExtractor(Extractor):
pattern = (r"(?i)(?:https?://)?(?:www\.)?hentaihand\.com"
r"/\w+/(parody|character|tag|artist|group|language"
r"|category|relationship)/([^/?#]+)")
test = (
("https://hentaihand.com/en/artist/takumi-na-muchi", {
"pattern": HentaihandGalleryExtractor.pattern,
"count": ">= 6",
}),
("https://hentaihand.com/en/tag/full-color"),
("https://hentaihand.com/fr/language/japanese"),
("https://hentaihand.com/zh/category/manga"),
)
example = "https://hentaihand.com/en/tag/TAG"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -23,32 +23,7 @@ class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor):
"""Extractor for a single manga chapter from hentaihere.com"""
archive_fmt = "{chapter_id}_{page}"
pattern = r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/([^/?#]+)"
test = (
("https://hentaihere.com/m/S13812/1/1/", {
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
"keyword": "0207d20eea3a15d2a8d1496755bdfa49de7cfa9d",
}),
("https://hentaihere.com/m/S23048/1.5/1/", {
"pattern": r"https://hentaicdn\.com/hentai"
r"/23048/1\.5/ccdn00\d+\.jpg",
"count": 32,
"keyword": {
"author": "Shinozuka Yuuji",
"chapter": 1,
"chapter_id": 80186,
"chapter_minor": ".5",
"count": 32,
"lang": "en",
"language": "English",
"manga": "High School Slut's Love Consultation",
"manga_id": 23048,
"page": int,
"title": "High School Slut's Love Consultation + "
"Girlfriend [Full Color]",
"type": "Original",
},
}),
)
example = "https://hentaihere.com/m/S12345/1/1/"
def __init__(self, match):
self.manga_id, self.chapter = match.groups()
@ -87,26 +62,7 @@ class HentaihereMangaExtractor(HentaihereBase, MangaExtractor):
"""Extractor for hmanga from hentaihere.com"""
chapterclass = HentaihereChapterExtractor
pattern = r"(?:https?://)?(?:www\.)?hentaihere\.com(/m/S\d+)/?$"
test = (
("https://hentaihere.com/m/S13812", {
"url": "d1ba6e28bb2162e844f8559c2b2725ba0a093559",
"keyword": "5c1b712258e78e120907121d3987c71f834d13e1",
}),
("https://hentaihere.com/m/S7608", {
"url": "6c5239758dc93f6b1b4175922836c10391b174f7",
"keyword": {
"chapter": int,
"chapter_id": int,
"chapter_minor": "",
"lang": "en",
"language": "English",
"manga": "Oshikake Riot",
"manga_id": 7608,
"title": r"re:Oshikake Riot( \d+)?",
"type": "Original",
},
}),
)
example = "https://hentaihere.com/m/S12345"
def chapters(self, page):
results = []

View File

@ -31,7 +31,7 @@ class HiperdexBase():
return {
"manga" : text.unescape(extr(
"<title>", "<").rpartition(" - ")[0].strip()),
"<title>", "<").rpartition(" Manga - ")[0].strip()),
"url" : text.unescape(extr(
'property="og:url" content="', '"')),
"score" : text.parse_float(extr(
@ -69,30 +69,7 @@ class HiperdexBase():
class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
"""Extractor for manga chapters from hiperdex.com"""
pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))"
test = (
("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", {
"pattern": r"https://(1st)?hiperdex\d?.(com|net|info)"
r"/wp-content/uploads/WP-manga/data"
r"/manga_\w+/[0-9a-f]{32}/\d+\.webp",
"count": 9,
"keyword": {
"artist" : "Sasuga Kei",
"author" : "Sasuga Kei",
"chapter": 154,
"chapter_minor": ".5",
"description": "re:Natsuo Fujii is in love with his teacher, ",
"genre" : list,
"manga" : "Domestic na Kanojo",
"release": 2014,
"score" : float,
"type" : "Manga",
},
}),
("https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/"),
("https://hiperdex2.com/manga/domestic-na-kanojo/154-5/"),
("https://hiperdex.net/manga/domestic-na-kanojo/154-5/"),
("https://hiperdex.info/manga/domestic-na-kanojo/154-5/"),
)
example = "https://hiperdex.com/manga/MANGA/CHAPTER/"
def __init__(self, match):
root, path, self.manga, self.chapter = match.groups()
@ -114,30 +91,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
"""Extractor for manga from hiperdex.com"""
chapterclass = HiperdexChapterExtractor
pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$"
test = (
("https://hiperdex.com/manga/1603231576-youre-not-that-special/", {
"count": 51,
"pattern": HiperdexChapterExtractor.pattern,
"keyword": {
"artist" : "Bolp",
"author" : "Abyo4",
"chapter": int,
"chapter_minor": "",
"description": "re:I didnt think much of the creepy girl in ",
"genre" : list,
"manga" : "Youre Not That Special!",
"release": 2019,
"score" : float,
"status" : "Completed",
"type" : "Manhwa",
},
}),
("https://hiperdex.com/manga/youre-not-that-special/"),
("https://1sthiperdex.com/manga/youre-not-that-special/"),
("https://hiperdex2.com/manga/youre-not-that-special/"),
("https://hiperdex.net/manga/youre-not-that-special/"),
("https://hiperdex.info/manga/youre-not-that-special/"),
)
example = "https://hiperdex.com/manga/MANGA/"
def __init__(self, match):
root, path, self.manga = match.groups()
@ -173,16 +127,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
chapterclass = HiperdexMangaExtractor
reverse = False
pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/(?:[^/?#]+))"
test = (
("https://1sthiperdex.com/manga-artist/beck-ho-an/"),
("https://hiperdex.net/manga-artist/beck-ho-an/"),
("https://hiperdex2.com/manga-artist/beck-ho-an/"),
("https://hiperdex.info/manga-artist/beck-ho-an/"),
("https://hiperdex.com/manga-author/viagra/", {
"pattern": HiperdexMangaExtractor.pattern,
"count": ">= 6",
}),
)
example = "https://hiperdex.com/manga-artist/NAME/"
def __init__(self, match):
self.root = text.ensure_http_scheme(match.group(1))

View File

@ -23,47 +23,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
pattern = (r"(?:https?://)?hitomi\.la"
r"/(?:manga|doujinshi|cg|gamecg|galleries|reader)"
r"/(?:[^/?#]+-)?(\d+)")
test = (
("https://hitomi.la/galleries/867789.html", {
"pattern": r"https://[a-c]a\.hitomi\.la/webp/\d+/\d+"
r"/[0-9a-f]{64}\.webp",
"keyword": "86af5371f38117a07407f11af689bdd460b09710",
"count": 16,
}),
# download test
("https://hitomi.la/galleries/1401410.html", {
"range": "1",
"content": "d75d5a3d1302a48469016b20e53c26b714d17745",
}),
# Game CG with scenes (#321)
("https://hitomi.la/galleries/733697.html", {
"count": 210,
}),
# fallback for galleries only available through /reader/ URLs
("https://hitomi.la/galleries/1045954.html", {
"count": 1413,
}),
# gallery with "broken" redirect
("https://hitomi.la/cg/scathacha-sama-okuchi-ecchi-1291900.html", {
"count": 10,
"options": (("format", "original"),),
"pattern": r"https://[a-c]b\.hitomi\.la/images/\d+/\d+"
r"/[0-9a-f]{64}\.jpg",
}),
# no tags
("https://hitomi.la/cg/1615823.html", {
"count": 22,
"options": (("format", "avif"),),
"pattern": r"https://[a-c]a\.hitomi\.la/avif/\d+/\d+"
r"/[0-9a-f]{64}\.avif",
}),
("https://hitomi.la/manga/amazon-no-hiyaku-867789.html"),
("https://hitomi.la/manga/867789.html"),
("https://hitomi.la/doujinshi/867789.html"),
("https://hitomi.la/cg/867789.html"),
("https://hitomi.la/gamecg/867789.html"),
("https://hitomi.la/reader/867789.html"),
)
example = "https://hitomi.la/manga/TITLE-867789.html"
def __init__(self, match):
self.gid = match.group(1)
@ -149,17 +109,7 @@ class HitomiTagExtractor(Extractor):
pattern = (r"(?:https?://)?hitomi\.la/"
r"(tag|artist|group|series|type|character)/"
r"([^/?#]+)\.html")
test = (
("https://hitomi.la/tag/screenshots-japanese.html", {
"pattern": HitomiGalleryExtractor.pattern,
"count": ">= 35",
}),
("https://hitomi.la/artist/a1-all-1.html"),
("https://hitomi.la/group/initial%2Dg-all-1.html"),
("https://hitomi.la/series/amnesia-all-1.html"),
("https://hitomi.la/type/doujinshi-all-1.html"),
("https://hitomi.la/character/a2-all-1.html"),
)
example = "https://hitomi.la/tag/TAG-LANG.html"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -21,9 +21,6 @@ class HotleakExtractor(Extractor):
archive_fmt = "{type}_{creator}_{id}"
root = "https://hotleak.vip"
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def items(self):
for post in self.posts():
yield Message.Directory, post
@ -59,30 +56,7 @@ class HotleakPostExtractor(HotleakExtractor):
subcategory = "post"
pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))"
r"([^/]+)/(photo|video)/(\d+)")
test = (
("https://hotleak.vip/kaiyakawaii/photo/1617145", {
"pattern": r"https://hotleak\.vip/storage/images/3625"
r"/1617145/fefdd5988dfcf6b98cc9e11616018868\.jpg",
"keyword": {
"id": 1617145,
"creator": "kaiyakawaii",
"type": "photo",
"filename": "fefdd5988dfcf6b98cc9e11616018868",
"extension": "jpg",
},
}),
("https://hotleak.vip/lilmochidoll/video/1625538", {
"pattern": r"ytdl:https://cdn8-leak\.camhdxx\.com"
r"/1661/1625538/index\.m3u8",
"keyword": {
"id": 1625538,
"creator": "lilmochidoll",
"type": "video",
"filename": "index",
"extension": "mp4",
},
}),
)
example = "https://hotleak.vip/MODEL/photo/12345"
def __init__(self, match):
HotleakExtractor.__init__(self, match)
@ -118,18 +92,7 @@ class HotleakCreatorExtractor(HotleakExtractor):
subcategory = "creator"
pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))"
r"([^/?#]+)/?$")
test = (
("https://hotleak.vip/kaiyakawaii", {
"range": "1-200",
"count": 200,
}),
("https://hotleak.vip/stellaviolet", {
"count": "> 600"
}),
("https://hotleak.vip/doesnotexist", {
"exception": exception.NotFoundError,
}),
)
example = "https://hotleak.vip/MODEL"
def __init__(self, match):
HotleakExtractor.__init__(self, match)
@ -182,20 +145,7 @@ class HotleakCategoryExtractor(HotleakExtractor):
"""Extractor for hotleak categories"""
subcategory = "category"
pattern = BASE_PATTERN + r"/(hot|creators|videos|photos)(?:/?\?([^#]+))?"
test = (
("https://hotleak.vip/photos", {
"pattern": HotleakPostExtractor.pattern,
"range": "1-50",
"count": 50,
}),
("https://hotleak.vip/videos"),
("https://hotleak.vip/creators", {
"pattern": HotleakCreatorExtractor.pattern,
"range": "1-50",
"count": 50,
}),
("https://hotleak.vip/hot"),
)
example = "https://hotleak.vip/photos"
def __init__(self, match):
HotleakExtractor.__init__(self, match)
@ -217,14 +167,7 @@ class HotleakSearchExtractor(HotleakExtractor):
"""Extractor for hotleak search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search(?:/?\?([^#]+))"
test = (
("https://hotleak.vip/search?search=gallery-dl", {
"count": 0,
}),
("https://hotleak.vip/search?search=hannah", {
"count": "> 30",
}),
)
example = "https://hotleak.vip/search?search=QUERY"
def __init__(self, match):
HotleakExtractor.__init__(self, match)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018-2021 Mike Fährmann
# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -133,20 +133,7 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"
test = (
("https://idol.sankakucomplex.com/?tags=lyumos", {
"count": 5,
"range": "18-22",
"pattern": r"https://is\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
}),
("https://idol.sankakucomplex.com/?tags=order:favcount", {
"count": 5,
"range": "18-22",
}),
("https://idol.sankakucomplex.com"
"/?tags=lyumos+wreath&page=3&next=694215"),
)
example = "https://idol.sankakucomplex.com/?tags=TAGS"
per_page = 20
def __init__(self, match):
@ -214,9 +201,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/pool/show/(\d+)"
test = ("https://idol.sankakucomplex.com/pool/show/145", {
"count": 3,
})
example = "https://idol.sankakucomplex.com/pool/show/12345"
per_page = 24
def __init__(self, match):
@ -251,17 +236,7 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"
test = ("https://idol.sankakucomplex.com/post/show/694215", {
"content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
"options": (("tags", True),),
"keyword": {
"tags_character": "shani_(the_witcher)",
"tags_copyright": "the_witcher",
"tags_idol": str,
"tags_medium": str,
"tags_general": str,
},
})
example = "https://idol.sankakucomplex.com/post/show/12345"
def __init__(self, match):
IdolcomplexExtractor.__init__(self, match)

View File

@ -9,7 +9,7 @@
"""Extractors for https://www.imagebam.com/"""
from .common import Extractor, Message
from .. import text, exception
from .. import text
import re
@ -46,26 +46,7 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
archive_fmt = "{gallery_key}_{image_key}"
pattern = (r"(?:https?://)?(?:www\.)?imagebam\.com"
r"(/(?:gallery/|view/G)[a-zA-Z0-9]+)")
test = (
("https://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
"url": "76d976788ae2757ac81694736b07b72356f5c4c8",
"keyword": "b048478b1bbba3072a7fa9fcc40630b3efad1f6c",
"content": "596e6bfa157f2c7169805d50075c2986549973a8",
}),
("http://www.imagebam.com/gallery/op9dwcklwdrrguibnkoe7jxgvig30o5p", {
# more than 100 images; see issue #219
"count": 107,
"url": "32ae6fe5dc3e4ca73ff6252e522d16473595d1d1",
}),
("http://www.imagebam.com/gallery/gsl8teckymt4vbvx1stjkyk37j70va2c", {
"exception": exception.HttpError,
}),
# /view/ path (#2378)
("https://www.imagebam.com/view/GA3MT1", {
"url": "35018ce1e00a2d2825a33d3cd37857edaf804919",
"keyword": "3a9f98178f73694c527890c0d7ca9a92b46987ba",
}),
)
example = "https://www.imagebam.com/view/GID"
def items(self):
page = self.request(self.root + self.path).text
@ -110,24 +91,7 @@ class ImagebamImageExtractor(ImagebamExtractor):
archive_fmt = "{image_key}"
pattern = (r"(?:https?://)?(?:\w+\.)?imagebam\.com"
r"(/(?:image/|view/M|(?:[0-9a-f]{2}/){3})[a-zA-Z0-9]+)")
test = (
("https://www.imagebam.com/image/94d56c502511890", {
"url": "5e9ba3b1451f8ded0ae3a1b84402888893915d4a",
"keyword": "2a4380d4b57554ff793898c2d6ec60987c86d1a1",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
}),
("http://images3.imagebam.com/1d/8c/44/94d56c502511890.png"),
# NSFW (#1534)
("https://www.imagebam.com/image/0850951366904951", {
"url": "d37297b17ed1615b4311c8ed511e50ce46e4c748",
}),
# /view/ path (#2378)
("https://www.imagebam.com/view/ME8JOQP", {
"url": "4dca72bbe61a0360185cf4ab2bed8265b49565b8",
"keyword": "15a494c02fd30846b41b42a26117aedde30e4ceb",
"content": "f81008666b17a42d8834c4749b910e1dc10a6e83",
}),
)
example = "https://www.imagebam.com/view/MID"
def items(self):
path = self.path

View File

@ -18,29 +18,7 @@ class ImagechestGalleryExtractor(GalleryExtractor):
category = "imagechest"
root = "https://imgchest.com"
pattern = r"(?:https?://)?(?:www\.)?imgchest\.com/p/([A-Za-z0-9]{11})"
test = (
("https://imgchest.com/p/3na7kr3by8d", {
"pattern": r"https://cdn\.imgchest\.com/files/\w+\.(jpg|png)",
"keyword": {
"count": 3,
"gallery_id": "3na7kr3by8d",
"num": int,
"title": "Wizardry - Video Game From The Mid 80's",
},
"url": "7328ca4ec2459378d725e3be19f661d2b045feda",
"content": "076959e65be30249a2c651fbe6090dc30ba85193",
"count": 3
}),
# "Load More Files" button (#4028)
("https://imgchest.com/p/9p4n3q2z7nq", {
"pattern": r"https://cdn\.imgchest\.com/files/\w+\.(jpg|png)",
"url": "f5674e8ba79d336193c9f698708d9dcc10e78cc7",
"count": 52,
}),
("https://imgchest.com/p/xxxxxxxxxxx", {
"exception": exception.NotFoundError,
}),
)
example = "https://imgchest.com/p/abcdefghijk"
def __init__(self, match):
self.gallery_id = match.group(1)

View File

@ -23,9 +23,6 @@ class ImagefapExtractor(Extractor):
archive_fmt = "{gallery_id}_{image_id}"
request_interval = (2.0, 4.0)
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
@ -43,50 +40,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
"""Extractor for image galleries from imagefap.com"""
subcategory = "gallery"
pattern = BASE_PATTERN + r"/(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)"
test = (
("https://www.imagefap.com/gallery/7102714", {
"pattern": r"https://cdnh?\.imagefap\.com"
r"/images/full/\d+/\d+/\d+\.jpg",
"keyword": "bdcb75b1e4b9dddc718f3d66e1a58afa9d81a38b",
"content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab",
}),
("https://www.imagefap.com/gallery/7876223", {
"pattern": r"https://cdnh?\.imagefap\.com"
r"/images/full/\d+/\d+/\d+\.jpg",
"keyword": {
"categories": ["Asses", "Softcore", "Pornstars"],
"count": 44,
"description": "",
"gallery_id": 7876223,
"image_id": int,
"num": int,
"tags": ["big ass", "panties", "horny",
"pussy", "exposed", "outdoor"],
"title": "Kelsi Monroe in lingerie",
"uploader": "BdRachel",
},
"count": 44,
}),
# description (#3905)
("https://www.imagefap.com/gallery/6180555", {
"range": "1",
"keyword": {
"categories": ["Amateur", "Softcore", "Homemade"],
"count": 36,
"description": "Nude and dressed sluts showing off the goods",
"gallery_id": 6180555,
"image_id": int,
"num": int,
"tags": [] ,
"title": "Dressed or Undressed MG*",
"uploader": "splitopen",
},
}),
("https://www.imagefap.com/pictures/7102714"),
("https://www.imagefap.com/gallery.php?gid=7102714"),
("https://beta.imagefap.com/gallery.php?gid=7102714"),
)
example = "https://www.imagefap.com/gallery/12345"
def __init__(self, match):
ImagefapExtractor.__init__(self, match)
@ -157,22 +111,7 @@ class ImagefapImageExtractor(ImagefapExtractor):
"""Extractor for single images from imagefap.com"""
subcategory = "image"
pattern = BASE_PATTERN + r"/photo/(\d+)"
test = (
("https://www.imagefap.com/photo/1962981893", {
"pattern": r"https://cdnh?\.imagefap\.com"
r"/images/full/65/196/1962981893\.jpg",
"keyword": {
"date": "21/08/2014",
"gallery_id": 7876223,
"height": 1600,
"image_id": 1962981893,
"title": "Kelsi Monroe in lingerie",
"uploader": "BdRachel",
"width": 1066,
},
}),
("https://beta.imagefap.com/photo/1962981893"),
)
example = "https://www.imagefap.com/photo/12345"
def __init__(self, match):
ImagefapExtractor.__init__(self, match)
@ -213,35 +152,7 @@ class ImagefapFolderExtractor(ImagefapExtractor):
pattern = (BASE_PATTERN + r"/(?:organizer/|"
r"(?:usergallery\.php\?user(id)?=([^&#]+)&"
r"|profile/([^/?#]+)/galleries\?)folderid=)(\d+|-1)")
test = (
("https://www.imagefap.com/organizer/409758", {
"pattern": r"https://www\.imagefap\.com/gallery/7876223",
"url": "37822523e6e4a56feb9dea35653760c86b44ff89",
"count": 1,
}),
(("https://www.imagefap.com/usergallery.php"
"?userid=1981976&folderid=409758"), {
"url": "37822523e6e4a56feb9dea35653760c86b44ff89",
}),
(("https://www.imagefap.com/usergallery.php"
"?user=BdRachel&folderid=409758"), {
"url": "37822523e6e4a56feb9dea35653760c86b44ff89",
}),
("https://www.imagefap.com/profile/BdRachel/galleries?folderid=-1", {
"pattern": ImagefapGalleryExtractor.pattern,
"range": "1-40",
}),
(("https://www.imagefap.com/usergallery.php"
"?userid=1981976&folderid=-1"), {
"pattern": ImagefapGalleryExtractor.pattern,
"range": "1-40",
}),
(("https://www.imagefap.com/usergallery.php"
"?user=BdRachel&folderid=-1"), {
"pattern": ImagefapGalleryExtractor.pattern,
"range": "1-40",
}),
)
example = "https://www.imagefap.com/organizer/12345"
def __init__(self, match):
ImagefapExtractor.__init__(self, match)
@ -293,20 +204,7 @@ class ImagefapUserExtractor(ImagefapExtractor):
pattern = (BASE_PATTERN +
r"/(?:profile(?:\.php\?user=|/)([^/?#]+)(?:/galleries)?"
r"|usergallery\.php\?userid=(\d+))(?:$|#)")
test = (
("https://www.imagefap.com/profile/BdRachel", {
"pattern": ImagefapFolderExtractor.pattern,
"count": ">= 18",
}),
("https://www.imagefap.com/usergallery.php?userid=1862791", {
"pattern": r"https://www\.imagefap\.com"
r"/profile/LucyRae/galleries\?folderid=-1",
"count": 1,
}),
("https://www.imagefap.com/profile/BdRachel/galleries"),
("https://www.imagefap.com/profile.php?user=BdRachel"),
("https://beta.imagefap.com/profile.php?user=BdRachel"),
)
example = "https://www.imagefap.com/profile/USER"
def __init__(self, match):
ImagefapExtractor.__init__(self, match)

View File

@ -74,34 +74,7 @@ class ImxtoImageExtractor(ImagehostImageExtractor):
category = "imxto"
pattern = (r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)"
r"/(?:i/|img-)(\w+)(\.html)?)")
test = (
("https://imx.to/i/1qdeva", { # new-style URL
"url": "ab2173088a6cdef631d7a47dec4a5da1c6a00130",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
"size" : 18,
"width" : 64,
"height": 32,
"hash" : "94d56c599223c59f3feb71ea603484d1",
},
}),
("https://imx.to/img-57a2050547b97.html", { # old-style URL
"url": "a83fe6ef1909a318c4d49fcf2caf62f36c3f9204",
"content": "54592f2635674c25677c6872db3709d343cdf92f",
"keyword": {
"size" : 5284,
"width" : 320,
"height": 160,
"hash" : "40da6aaa7b8c42b18ef74309bbc713fc",
},
}),
("https://img.yt/img-57a2050547b97.html", { # img.yt domain
"url": "a83fe6ef1909a318c4d49fcf2caf62f36c3f9204",
}),
("https://imx.to/img-57a2050547b98.html", {
"exception": exception.NotFoundError,
}),
)
example = "https://imx.to/i/ID"
_params = "simple"
_encoding = "utf-8"
@ -140,11 +113,7 @@ class ImxtoGalleryExtractor(ImagehostImageExtractor):
category = "imxto"
subcategory = "gallery"
pattern = r"(?:https?://)?(?:www\.)?(imx\.to/g/([^/?#]+))"
test = ("https://imx.to/g/ozdy", {
"pattern": ImxtoImageExtractor.pattern,
"keyword": {"title": "untitled gallery"},
"count": 40,
})
example = "https://imx.to/g/ID"
def items(self):
page = self.request(self.page_url).text
@ -162,11 +131,7 @@ class AcidimgImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from acidimg.cc"""
category = "acidimg"
pattern = r"(?:https?://)?((?:www\.)?acidimg\.cc/img-([a-z0-9]+)\.html)"
test = ("https://acidimg.cc/img-5acb6b9de4640.html", {
"url": "f132a630006e8d84f52d59555191ed82b3b64c04",
"keyword": "135347ab4345002fc013863c0d9419ba32d98f78",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})
example = "https://acidimg.cc/img-abc123.html"
_params = "simple"
_encoding = "utf-8"
@ -189,26 +154,13 @@ class ImagevenueImageExtractor(ImagehostImageExtractor):
category = "imagevenue"
pattern = (r"(?:https?://)?((?:www|img\d+)\.imagevenue\.com"
r"/([A-Z0-9]{8,10}|view/.*|img\.php\?.*))")
test = (
("https://www.imagevenue.com/ME13LS07", {
"pattern": r"https://cdn-images\.imagevenue\.com"
r"/10/ac/05/ME13LS07_o\.png",
"keyword": "ae15d6e3b2095f019eee84cd896700cd34b09c36",
"content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee",
}),
(("https://www.imagevenue.com/view/o?i=92518_13732377"
"annakarina424200712535AM_122_486lo.jpg&h=img150&l=loc486"), {
"url": "8bf0254e29250d8f5026c0105bbdda3ee3d84980",
}),
(("http://img28116.imagevenue.com/img.php"
"?image=th_52709_test_122_64lo.jpg"), {
"url": "f98e3091df7f48a05fb60fbd86f789fc5ec56331",
}),
)
example = "https://www.imagevenue.com/ME123456789"
def get_info(self, page):
pos = page.index('class="card-body')
url, pos = text.extract(page, '<img src="', '"', pos)
if url.endswith("/loader.svg"):
url, pos = text.extract(page, '<img src="', '"', pos)
filename, pos = text.extract(page, 'alt="', '"', pos)
return url, text.unescape(filename)
@ -218,17 +170,7 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
category = "imagetwist"
pattern = (r"(?:https?://)?((?:www\.|phun\.)?"
r"image(?:twist|haha)\.com/([a-z0-9]{12}))")
test = (
("https://imagetwist.com/f1i2s4vhvbrq/test.png", {
"url": "8d5e168c0bee30211f821c6f3b2116e419d42671",
"keyword": "d1060a4c2e3b73b83044e20681712c0ffdd6cfef",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
}),
("https://www.imagetwist.com/f1i2s4vhvbrq/test.png"),
("https://phun.imagetwist.com/f1i2s4vhvbrq/test.png"),
("https://imagehaha.com/f1i2s4vhvbrq/test.png"),
("https://www.imagehaha.com/f1i2s4vhvbrq/test.png"),
)
example = "https://imagetwist.com/123456abcdef/NAME.EXT"
@property
@memcache(maxage=3*3600)
@ -245,11 +187,7 @@ class ImgspiceImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imgspice.com"""
category = "imgspice"
pattern = r"(?:https?://)?((?:www\.)?imgspice\.com/([^/?#]+))"
test = ("https://imgspice.com/nwfwtpyog50y/test.png.html", {
"url": "b8c30a8f51ee1012959a4cfd46197fabf14de984",
"keyword": "100e310a19a2fa22d87e1bbc427ecb9f6501e0c0",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})
example = "https://imgspice.com/ID/NAME.EXT.html"
def get_info(self, page):
pos = page.find('id="imgpreview"')
@ -265,11 +203,7 @@ class PixhostImageExtractor(ImagehostImageExtractor):
category = "pixhost"
pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
r"/show/\d+/(\d+)_[^/?#]+)")
test = ("http://pixhost.to/show/190/130327671_test-.png", {
"url": "4e5470dcf6513944773044d40d883221bbc46cff",
"keyword": "3bad6d59db42a5ebbd7842c2307e1c3ebd35e6b0",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})
example = "https://pixhost.to/show/123/12345_NAME.EXT"
_cookies = {"pixhostads": "1", "pixhosttest": "1"}
def get_info(self, page):
@ -284,10 +218,7 @@ class PixhostGalleryExtractor(ImagehostImageExtractor):
subcategory = "gallery"
pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
r"/gallery/([^/?#]+))")
test = ("https://pixhost.to/gallery/jSMFq", {
"pattern": PixhostImageExtractor.pattern,
"count": 3,
})
example = "https://pixhost.to/gallery/ID"
def items(self):
page = text.extr(self.request(
@ -300,13 +231,9 @@ class PixhostGalleryExtractor(ImagehostImageExtractor):
class PostimgImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from postimages.org"""
category = "postimg"
pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)"
r"/(?!gallery/)(?:image/)?([^/?#]+)/?)")
test = ("https://postimg.cc/Wtn2b3hC", {
"url": "72f3c8b1d6c6601a20ad58f35635494b4891a99e",
"keyword": "2d05808d04e4e83e33200db83521af06e3147a84",
"content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee",
})
pattern = (r"(?:https?://)?((?:www\.)?(?:postim(?:ages|g)|pixxxels)"
r"\.(?:cc|org)/(?!gallery/)(?:image/)?([^/?#]+)/?)")
example = "https://postimages.org/ID"
def get_info(self, page):
pos = page.index(' id="download"')
@ -319,12 +246,9 @@ class PostimgGalleryExtractor(ImagehostImageExtractor):
"""Extractor for images galleries from postimages.org"""
category = "postimg"
subcategory = "gallery"
pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)"
r"/(?:gallery/)([^/?#]+)/?)")
test = ("https://postimg.cc/gallery/wxpDLgX", {
"pattern": PostimgImageExtractor.pattern,
"count": 22,
})
pattern = (r"(?:https?://)?((?:www\.)?(?:postim(?:ages|g)|pixxxels)"
r"\.(?:cc|org)/gallery/([^/?#]+))")
example = "https://postimages.org/gallery/ID"
def items(self):
page = self.request(self.page_url).text
@ -338,11 +262,7 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor):
category = "turboimagehost"
pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com"
r"/p/(\d+)/[^/?#]+\.html)")
test = ("https://www.turboimagehost.com/p/39078423/test--.png.html", {
"url": "b94de43612318771ced924cb5085976f13b3b90e",
"keyword": "704757ca8825f51cec516ec44c1e627c1f2058ca",
"content": "f38b54b17cd7462e687b58d83f00fca88b1b105a",
})
example = "https://www.turboimagehost.com/p/12345/NAME.EXT.html"
def get_info(self, page):
url = text.extract(page, 'src="', '"', page.index("<img "))[0]
@ -353,10 +273,7 @@ class ViprImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from vipr.im"""
category = "vipr"
pattern = r"(?:https?://)?(vipr\.im/(\w+))"
test = ("https://vipr.im/kcd5jcuhgs3v.html", {
"url": "88f6a3ecbf3356a11ae0868b518c60800e070202",
"keyword": "c432e8a1836b0d97045195b745731c2b1bb0e771",
})
example = "https://vipr.im/abc123.html"
def get_info(self, page):
url = text.extr(page, '<img src="', '"')
@ -367,11 +284,7 @@ class ImgclickImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from imgclick.net"""
category = "imgclick"
pattern = r"(?:https?://)?((?:www\.)?imgclick\.net/([^/?#]+))"
test = ("http://imgclick.net/4tbrre1oxew9/test-_-_.png.html", {
"url": "140dcb250a325f2d26b2d918c18b8ac6a2a0f6ab",
"keyword": "6895256143eab955622fc149aa367777a8815ba3",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})
example = "http://imgclick.net/abc123/NAME.EXT.html"
_https = False
_params = "complex"
@ -385,11 +298,7 @@ class FappicImageExtractor(ImagehostImageExtractor):
"""Extractor for single images from fappic.com"""
category = "fappic"
pattern = r"(?:https?://)?((?:www\.)?fappic\.com/(\w+)/[^/?#]+)"
test = ("https://www.fappic.com/98wxqcklyh8k/test.png", {
"pattern": r"https://img\d+\.fappic\.com/img/\w+/test\.png",
"keyword": "433b1d310b0ff12ad8a71ac7b9d8ba3f8cd1e898",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
})
example = "https://fappic.com/abc123/NAME.EXT"
def get_info(self, page):
url , pos = text.extract(page, '<a href="#"><img src="', '"')

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019 Mike Fährmann
# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -114,27 +114,7 @@ class ImgbbAlbumExtractor(ImgbbExtractor):
subcategory = "album"
directory_fmt = ("{category}", "{user}", "{album_name} {album_id}")
pattern = r"(?:https?://)?ibb\.co/album/([^/?#]+)/?(?:\?([^#]+))?"
test = (
("https://ibb.co/album/i5PggF", {
"range": "1-80",
"url": "70afec9fcc3a6de62a6b644b487d892d8d47cf1a",
"keyword": "569e1d88ebdd27655387559cdf1cd526a3e1ab69",
}),
("https://ibb.co/album/i5PggF?sort=title_asc", {
"range": "1-80",
"url": "afdf5fc95d8e09d77e8f44312f3e9b843987bb5a",
"keyword": "f090e14d0e5f7868595082b2c95da1309c84872d",
}),
# no user data (#471)
("https://ibb.co/album/kYKpwF", {
"url": "ac0abcfcb89f4df6adc2f7e4ff872f3b03ef1bc7",
"keyword": {"user": ""},
}),
# private
("https://ibb.co/album/hqgWrF", {
"exception": exception.HttpError,
}),
)
example = "https://ibb.co/album/ID"
def __init__(self, match):
ImgbbExtractor.__init__(self, match)
@ -169,10 +149,7 @@ class ImgbbUserExtractor(ImgbbExtractor):
"""Extractor for user profiles in imgbb.com"""
subcategory = "user"
pattern = r"(?:https?://)?([\w-]+)\.imgbb\.com/?(?:\?([^#]+))?$"
test = ("https://folkie.imgbb.com", {
"range": "1-80",
"pattern": r"https?://i\.ibb\.co/\w+/[^/?#]+",
})
example = "https://USER.imgbb.com"
def __init__(self, match):
ImgbbExtractor.__init__(self, match)
@ -196,19 +173,7 @@ class ImgbbUserExtractor(ImgbbExtractor):
class ImgbbImageExtractor(ImgbbExtractor):
subcategory = "image"
pattern = r"(?:https?://)?ibb\.co/(?!album/)([^/?#]+)"
test = ("https://ibb.co/fUqh5b", {
"pattern": r"https://i\.ibb\.co/g3kvx80/Arundel-Ireeman-5\.jpg",
"content": "c5a0965178a8b357acd8aa39660092918c63795e",
"keyword": {
"id" : "fUqh5b",
"title" : "Arundel Ireeman 5",
"url" : "https://i.ibb.co/g3kvx80/Arundel-Ireeman-5.jpg",
"width" : 960,
"height": 719,
"user" : "folkie",
"extension": "jpg",
},
})
example = "https://ibb.co/ID"
def __init__(self, match):
ImgbbExtractor.__init__(self, match)

View File

@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2019 Mike Fährmann
# Copyright 2014-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extract images from galleries at https://imgbox.com/"""
"""Extractors for https://imgbox.com/"""
from .common import Extractor, Message, AsynchronousMixin
from .. import text, exception
@ -63,20 +63,7 @@ class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
filename_fmt = "{num:>03}-{filename}.{extension}"
archive_fmt = "{gallery_key}_{image_key}"
pattern = r"(?:https?://)?(?:www\.)?imgbox\.com/g/([A-Za-z0-9]{10})"
test = (
("https://imgbox.com/g/JaX5V5HX7g", {
"url": "da4f15b161461119ee78841d4b8e8d054d95f906",
"keyword": "4b1e62820ac2c6205b7ad0b6322cc8e00dbe1b0c",
"content": "d20307dc8511ac24d688859c55abf2e2cc2dd3cc",
}),
("https://imgbox.com/g/cUGEkRbdZZ", {
"url": "76506a3aab175c456910851f66227e90484ca9f7",
"keyword": "fb0427b87983197849fb2887905e758f3e50cb6e",
}),
("https://imgbox.com/g/JaX5V5HX7h", {
"exception": exception.NotFoundError,
}),
)
example = "https://imgbox.com/g/12345abcde"
def __init__(self, match):
ImgboxExtractor.__init__(self, match)
@ -106,16 +93,7 @@ class ImgboxImageExtractor(ImgboxExtractor):
subcategory = "image"
archive_fmt = "{image_key}"
pattern = r"(?:https?://)?(?:www\.)?imgbox\.com/([A-Za-z0-9]{8})"
test = (
("https://imgbox.com/qHhw7lpG", {
"url": "ee9cdea6c48ad0161c1b5f81f6b0c9110997038c",
"keyword": "dfc72310026b45f3feb4f9cada20c79b2575e1af",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
}),
("https://imgbox.com/qHhw7lpH", {
"exception": exception.NotFoundError,
}),
)
example = "https://imgbox.com/1234abcd"
def __init__(self, match):
ImgboxExtractor.__init__(self, match)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2022 Mike Fährmann
# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -17,24 +17,7 @@ class ImgthGalleryExtractor(GalleryExtractor):
category = "imgth"
root = "https://imgth.com"
pattern = r"(?:https?://)?(?:www\.)?imgth\.com/gallery/(\d+)"
test = (
("https://imgth.com/gallery/37/wallpaper-anime", {
"url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
"pattern": r"https://imgth\.com/images/2009/11/25"
r"/wallpaper-anime_\w+\.jpg",
"keyword": {
"count": 12,
"date": "dt:2009-11-25 18:21:00",
"extension": "jpg",
"filename": r"re:wallpaper-anime_\w+",
"gallery_id": 37,
"num": int,
"title": "Wallpaper anime",
"user": "celebrities",
},
}),
("https://www.imgth.com/gallery/37/wallpaper-anime"),
)
example = "https://imgth.com/gallery/123/TITLE"
def __init__(self, match):
self.gallery_id = gid = match.group(1)

View File

@ -11,7 +11,6 @@
from .common import Extractor, Message
from .. import text, exception
BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.(?:com|io)"
@ -65,69 +64,7 @@ class ImgurImageExtractor(ImgurExtractor):
archive_fmt = "{id}"
pattern = (BASE_PATTERN + r"/(?!gallery|search)"
r"(?:r/\w+/)?(\w{7}|\w{5})[sbtmlh]?")
test = (
("https://imgur.com/21yMxCS", {
"url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
"account_id" : 0,
"comment_count" : int,
"cover_id" : "21yMxCS",
"date" : "dt:2016-11-10 14:24:35",
"description" : "",
"downvote_count": int,
"duration" : 0,
"ext" : "png",
"favorite" : False,
"favorite_count": 0,
"has_sound" : False,
"height" : 32,
"id" : "21yMxCS",
"image_count" : 1,
"in_most_viral" : False,
"is_ad" : False,
"is_album" : False,
"is_animated" : False,
"is_looping" : False,
"is_mature" : False,
"is_pending" : False,
"mime_type" : "image/png",
"name" : "test-テスト",
"point_count" : int,
"privacy" : "",
"score" : int,
"size" : 182,
"title" : "Test",
"upvote_count" : int,
"url" : "https://i.imgur.com/21yMxCS.png",
"view_count" : int,
"width" : 64,
},
}),
("http://imgur.com/0gybAXR", { # gifv/mp4 video
"url": "a2220eb265a55b0c95e0d3d721ec7665460e3fd7",
"content": "a3c080e43f58f55243ab830569ba02309d59abfc",
}),
("https://imgur.com/XFfsmuC", { # missing title in API response (#467)
"keyword": {"title": "Tears are a natural response to irritants"},
}),
("https://imgur.com/1Nily2P", { # animated png
"pattern": "https://i.imgur.com/1Nily2P.png",
}),
("https://imgur.com/zzzzzzz", { # not found
"exception": exception.HttpError,
}),
("https://m.imgur.com/r/Celebs/iHJ7tsM"),
("https://www.imgur.com/21yMxCS"), # www
("https://m.imgur.com/21yMxCS"), # mobile
("https://imgur.com/zxaY6"), # 5 character key
("https://imgur.io/zxaY6"), # .io
("https://i.imgur.com/21yMxCS.png"), # direct link
("https://i.imgur.io/21yMxCS.png"), # direct link .io
("https://i.imgur.com/21yMxCSh.png"), # direct link thumbnail
("https://i.imgur.com/zxaY6.gif"), # direct link (short)
("https://i.imgur.com/zxaY6s.gif"), # direct link (short; thumb)
)
example = "https://imgur.com/abcdefg"
def items(self):
image = self.api.image(self.key)
@ -152,71 +89,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
filename_fmt = "{category}_{album[id]}_{num:>03}_{id}.{extension}"
archive_fmt = "{album[id]}_{id}"
pattern = BASE_PATTERN + r"/a/(\w{7}|\w{5})"
test = (
("https://imgur.com/a/TcBmP", {
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
"keyword": {
"album": {
"account_id" : 0,
"comment_count" : int,
"cover_id" : "693j2Kr",
"date" : "dt:2015-10-09 10:37:50",
"description" : "",
"downvote_count": 0,
"favorite" : False,
"favorite_count": 0,
"id" : "TcBmP",
"image_count" : 19,
"in_most_viral" : False,
"is_ad" : False,
"is_album" : True,
"is_mature" : False,
"is_pending" : False,
"privacy" : "private",
"score" : int,
"title" : "138",
"upvote_count" : int,
"url" : "https://imgur.com/a/TcBmP",
"view_count" : int,
"virality" : int,
},
"account_id" : 0,
"count" : 19,
"date" : "type:datetime",
"description": "",
"ext" : "jpg",
"has_sound" : False,
"height" : int,
"id" : str,
"is_animated": False,
"is_looping" : False,
"mime_type" : "image/jpeg",
"name" : str,
"num" : int,
"size" : int,
"title" : str,
"type" : "image",
"updated_at" : None,
"url" : str,
"width" : int,
},
}),
("https://imgur.com/a/eD9CT", { # large album
"url": "de748c181a04d18bef1de9d4f4866ef0a06d632b",
}),
("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash
"url": "695ef0c950023362a0163ee5041796300db76674",
}),
("https://imgur.com/a/TcBmQ", {
"exception": exception.HttpError,
}),
("https://imgur.com/a/pjOnJA0", { # empty, no 'media' (#2557)
"count": 0,
}),
("https://www.imgur.com/a/TcBmP"), # www
("https://imgur.io/a/TcBmP"), # .io
("https://m.imgur.com/a/TcBmP"), # mobile
)
example = "https://imgur.com/a/abcde"
def items(self):
album = self.api.album(self.key)
@ -249,17 +122,7 @@ class ImgurGalleryExtractor(ImgurExtractor):
"""Extractor for imgur galleries"""
subcategory = "gallery"
pattern = BASE_PATTERN + r"/(?:gallery|t/\w+)/(\w{7}|\w{5})"
test = (
("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380)
"pattern": "https://imgur.com/zf2fIms",
}),
("https://imgur.com/gallery/eD9CT", {
"pattern": "https://imgur.com/a/eD9CT",
}),
("https://imgur.com/t/unmuted/26sEhNr"),
("https://imgur.com/t/cat/qSB8NbN"),
("https://imgur.io/t/cat/qSB8NbN"), # .io
)
example = "https://imgur.com/gallery/abcde"
def items(self):
if self.api.gallery(self.key)["is_album"]:
@ -275,15 +138,7 @@ class ImgurUserExtractor(ImgurExtractor):
"""Extractor for all images posted by a user"""
subcategory = "user"
pattern = BASE_PATTERN + r"/user/([^/?#]+)(?:/posts|/submitted)?/?$"
test = (
("https://imgur.com/user/Miguenzo", {
"range": "1-100",
"count": 100,
"pattern": r"https://imgur\.com(/a)?/\w+$",
}),
("https://imgur.com/user/Miguenzo/posts"),
("https://imgur.com/user/Miguenzo/submitted"),
)
example = "https://imgur.com/user/USER"
def items(self):
return self._items_queue(self.api.account_submissions(self.key))
@ -293,11 +148,7 @@ class ImgurFavoriteExtractor(ImgurExtractor):
"""Extractor for a user's favorites"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/?$"
test = ("https://imgur.com/user/Miguenzo/favorites", {
"range": "1-100",
"count": 100,
"pattern": r"https://imgur\.com(/a)?/\w+$",
})
example = "https://imgur.com/user/USER/favorites"
def items(self):
return self._items_queue(self.api.account_favorites(self.key))
@ -307,16 +158,7 @@ class ImgurFavoriteFolderExtractor(ImgurExtractor):
"""Extractor for a user's favorites folder"""
subcategory = "favorite-folder"
pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/folder/(\d+)"
test = (
("https://imgur.com/user/mikf1/favorites/folder/11896757/public", {
"pattern": r"https://imgur\.com(/a)?/\w+$",
"count": 3,
}),
("https://imgur.com/user/mikf1/favorites/folder/11896741/private", {
"pattern": r"https://imgur\.com(/a)?/\w+$",
"count": 5,
}),
)
example = "https://imgur.com/user/USER/favorites/folder/12345/TITLE"
def __init__(self, match):
ImgurExtractor.__init__(self, match)
@ -331,11 +173,7 @@ class ImgurSubredditExtractor(ImgurExtractor):
"""Extractor for a subreddits's imgur links"""
subcategory = "subreddit"
pattern = BASE_PATTERN + r"/r/([^/?#]+)/?$"
test = ("https://imgur.com/r/pics", {
"range": "1-100",
"count": 100,
"pattern": r"https://imgur\.com(/a)?/\w+$",
})
example = "https://imgur.com/r/SUBREDDIT"
def items(self):
return self._items_queue(self.api.gallery_subreddit(self.key))
@ -345,11 +183,7 @@ class ImgurTagExtractor(ImgurExtractor):
"""Extractor for imgur tag searches"""
subcategory = "tag"
pattern = BASE_PATTERN + r"/t/([^/?#]+)$"
test = ("https://imgur.com/t/animals", {
"range": "1-100",
"count": 100,
"pattern": r"https://imgur\.com(/a)?/\w+$",
})
example = "https://imgur.com/t/TAG"
def items(self):
return self._items_queue(self.api.gallery_tag(self.key))
@ -359,11 +193,7 @@ class ImgurSearchExtractor(ImgurExtractor):
"""Extractor for imgur search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search(?:/[^?#]+)?/?\?q=([^&#]+)"
test = ("https://imgur.com/search?q=cute+cat", {
"range": "1-100",
"count": 100,
"pattern": r"https://imgur\.com(/a)?/\w+$",
})
example = "https://imgur.com/search?q=UERY"
def items(self):
key = text.unquote(self.key.replace("+", " "))
@ -451,11 +281,7 @@ class ImgurAPI():
params["client_id"] = self.client_id
params["page"] = 0
params["sort"] = "newest"
headers = {
"Referer": "https://imgur.com/",
"Origin": "https://imgur.com",
}
headers = {"Origin": "https://imgur.com"}
while True:
data = self._call(endpoint, params, headers)["data"]

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2020-2022 Mike Fährmann
# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -72,51 +72,7 @@ class InkbunnyUserExtractor(InkbunnyExtractor):
"""Extractor for inkbunny user profiles"""
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?(\w+)(?:$|[/?#])"
test = (
("https://inkbunny.net/soina", {
"pattern": r"https://[\w.]+\.metapix\.net/files/full"
r"/\d+/\d+_soina_.+",
"range": "20-50",
"keyword": {
"date" : "type:datetime",
"deleted" : bool,
"file_id" : "re:[0-9]+",
"filename" : r"re:[0-9]+_soina_\w+",
"full_file_md5": "re:[0-9a-f]{32}",
"mimetype" : str,
"submission_id": "re:[0-9]+",
"user_id" : "20969",
"comments_count" : "re:[0-9]+",
"deleted" : bool,
"favorite" : bool,
"favorites_count": "re:[0-9]+",
"friends_only" : bool,
"guest_block" : bool,
"hidden" : bool,
"pagecount" : "re:[0-9]+",
"pools" : list,
"pools_count" : int,
"public" : bool,
"rating_id" : "re:[0-9]+",
"rating_name" : str,
"ratings" : list,
"scraps" : bool,
"tags" : list,
"title" : str,
"type_name" : str,
"username" : "soina",
"views" : str,
},
}),
("https://inkbunny.net/gallery/soina", {
"range": "1-25",
"keyword": {"scraps": False},
}),
("https://inkbunny.net/scraps/soina", {
"range": "1-25",
"keyword": {"scraps": True},
}),
)
example = "https://inkbunny.net/USER"
def __init__(self, match):
kind, self.user = match.groups()
@ -148,14 +104,7 @@ class InkbunnyPoolExtractor(InkbunnyExtractor):
pattern = (BASE_PATTERN + r"/(?:"
r"poolview_process\.php\?pool_id=(\d+)|"
r"submissionsviewall\.php\?([^#]+&mode=pool&[^#]+))")
test = (
("https://inkbunny.net/poolview_process.php?pool_id=28985", {
"count": 9,
"keyword": {"pool_id": "28985"},
}),
("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
"&mode=pool&pool_id=28985&page=1&orderby=pool_order&random=no"),
)
example = "https://inkbunny.net/poolview_process.php?pool_id=12345"
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
@ -185,16 +134,8 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor):
pattern = (BASE_PATTERN + r"/(?:"
r"userfavorites_process\.php\?favs_user_id=(\d+)|"
r"submissionsviewall\.php\?([^#]+&mode=userfavs&[^#]+))")
test = (
("https://inkbunny.net/userfavorites_process.php?favs_user_id=20969", {
"pattern": r"https://[\w.]+\.metapix\.net/files/full"
r"/\d+/\d+_\w+_.+",
"range": "20-50",
"keyword": {"favs_user_id": "20969"},
}),
("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
"&mode=userfavs&random=no&orderby=fav_datetime&page=1&user_id=20969"),
)
example = ("https://inkbunny.net/userfavorites_process.php"
"?favs_user_id=12345")
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
@ -225,26 +166,8 @@ class InkbunnySearchExtractor(InkbunnyExtractor):
subcategory = "search"
pattern = (BASE_PATTERN +
r"/submissionsviewall\.php\?([^#]+&mode=search&[^#]+)")
test = (("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
"&mode=search&page=1&orderby=create_datetime&text=cute"
"&stringtype=and&keywords=yes&title=yes&description=no&artist="
"&favsby=&type=&days=&keyword_id=&user_id=&random=&md5="), {
"range": "1-10",
"count": 10,
"keyword": {
"search": {
"rid": "ffffffffff",
"mode": "search",
"page": "1",
"orderby": "create_datetime",
"text": "cute",
"stringtype": "and",
"keywords": "yes",
"title": "yes",
"description": "no",
},
},
})
example = ("https://inkbunny.net/submissionsviewall.php"
"?text=TAG&mode=search&type=")
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
@ -279,15 +202,8 @@ class InkbunnyFollowingExtractor(InkbunnyExtractor):
pattern = (BASE_PATTERN + r"/(?:"
r"watchlist_process\.php\?mode=watching&user_id=(\d+)|"
r"usersviewall\.php\?([^#]+&mode=watching&[^#]+))")
test = (
(("https://inkbunny.net/watchlist_process.php"
"?mode=watching&user_id=20969"), {
"pattern": InkbunnyUserExtractor.pattern,
"count": ">= 90",
}),
("https://inkbunny.net/usersviewall.php?rid=ffffffffff"
"&mode=watching&page=1&user_id=20969&orderby=added&namesonly="),
)
example = ("https://inkbunny.net/watchlist_process.php"
"?mode=watching&user_id=12345")
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)
@ -324,16 +240,7 @@ class InkbunnyPostExtractor(InkbunnyExtractor):
"""Extractor for individual Inkbunny posts"""
subcategory = "post"
pattern = BASE_PATTERN + r"/s/(\d+)"
test = (
("https://inkbunny.net/s/1829715", {
"pattern": r"https://[\w.]+\.metapix\.net/files/full"
r"/2626/2626843_soina_dscn2296\.jpg",
"content": "cf69d8dddf0822a12b4eef1f4b2258bd600b36c8",
}),
("https://inkbunny.net/s/2044094", {
"count": 4,
}),
)
example = "https://inkbunny.net/s/12345"
def __init__(self, match):
InkbunnyExtractor.__init__(self, match)

View File

@ -90,7 +90,9 @@ class InstagramExtractor(Extractor):
file["_http_headers"] = video_headers
text.nameext_from_url(url, file)
yield Message.Url, url, file
if not previews:
if previews:
file["media_id"] += "p"
else:
continue
url = file["display_url"]
@ -396,11 +398,7 @@ class InstagramUserExtractor(InstagramExtractor):
"""Extractor for an Instagram user profile"""
subcategory = "user"
pattern = USER_PATTERN + r"/?(?:$|[?#])"
test = (
("https://www.instagram.com/instagram/"),
("https://www.instagram.com/instagram/?hl=en"),
("https://www.instagram.com/id:25025320/"),
)
example = "https://www.instagram.com/USER/"
def initialize(self):
pass
@ -425,10 +423,7 @@ class InstagramPostsExtractor(InstagramExtractor):
"""Extractor for an Instagram user's posts"""
subcategory = "posts"
pattern = USER_PATTERN + r"/posts"
test = ("https://www.instagram.com/instagram/posts/", {
"range": "1-16",
"count": ">= 16",
})
example = "https://www.instagram.com/USER/posts/"
def posts(self):
uid = self.api.user_id(self.item)
@ -439,10 +434,7 @@ class InstagramReelsExtractor(InstagramExtractor):
"""Extractor for an Instagram user's reels"""
subcategory = "reels"
pattern = USER_PATTERN + r"/reels"
test = ("https://www.instagram.com/instagram/reels/", {
"range": "40-60",
"count": ">= 20",
})
example = "https://www.instagram.com/USER/reels/"
def posts(self):
uid = self.api.user_id(self.item)
@ -453,15 +445,7 @@ class InstagramTaggedExtractor(InstagramExtractor):
"""Extractor for an Instagram user's tagged posts"""
subcategory = "tagged"
pattern = USER_PATTERN + r"/tagged"
test = ("https://www.instagram.com/instagram/tagged/", {
"range": "1-16",
"count": ">= 16",
"keyword": {
"tagged_owner_id" : "25025320",
"tagged_username" : "instagram",
"tagged_full_name": "Instagram",
},
})
example = "https://www.instagram.com/USER/tagged/"
def metadata(self):
if self.item.startswith("id:"):
@ -485,11 +469,7 @@ class InstagramGuideExtractor(InstagramExtractor):
"""Extractor for an Instagram guide"""
subcategory = "guide"
pattern = USER_PATTERN + r"/guide/[^/?#]+/(\d+)"
test = (("https://www.instagram.com/kadakaofficial/guide"
"/knit-i-need-collection/18131821684305217/"), {
"range": "1-16",
"count": ">= 16",
})
example = "https://www.instagram.com/USER/guide/NAME/12345"
def __init__(self, match):
InstagramExtractor.__init__(self, match)
@ -506,10 +486,7 @@ class InstagramSavedExtractor(InstagramExtractor):
"""Extractor for an Instagram user's saved media"""
subcategory = "saved"
pattern = USER_PATTERN + r"/saved(?:/all-posts)?/?$"
test = (
("https://www.instagram.com/instagram/saved/"),
("https://www.instagram.com/instagram/saved/all-posts/"),
)
example = "https://www.instagram.com/USER/saved/"
def posts(self):
return self.api.user_saved()
@ -519,9 +496,7 @@ class InstagramCollectionExtractor(InstagramExtractor):
"""Extractor for Instagram collection"""
subcategory = "collection"
pattern = USER_PATTERN + r"/saved/([^/?#]+)/([^/?#]+)"
test = (
"https://www.instagram.com/instagram/saved/collection_name/123456789/",
)
example = "https://www.instagram.com/USER/saved/COLLECTION/12345"
def __init__(self, match):
InstagramExtractor.__init__(self, match)
@ -543,14 +518,7 @@ class InstagramStoriesExtractor(InstagramExtractor):
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
r"/s(?:tories/(?:highlights/(\d+)|([^/?#]+)(?:/(\d+))?)"
r"|/(aGlnaGxpZ2h0[^?#]+)(?:\?story_media_id=(\d+))?)")
test = (
("https://www.instagram.com/stories/instagram/"),
("https://www.instagram.com/stories/highlights/18042509488170095/"),
("https://instagram.com/stories/geekmig/2724343156064789461"),
("https://www.instagram.com/s/aGlnaGxpZ2h0OjE4MDQyNTA5NDg4MTcwMDk1"),
("https://www.instagram.com/s/aGlnaGxpZ2h0OjE4MDQyNTA5NDg4MTcwMDk1"
"?story_media_id=2724343156064789461"),
)
example = "https://www.instagram.com/stories/USER/"
def __init__(self, match):
h1, self.user, m1, h2, m2 = match.groups()
@ -585,22 +553,33 @@ class InstagramHighlightsExtractor(InstagramExtractor):
"""Extractor for an Instagram user's story highlights"""
subcategory = "highlights"
pattern = USER_PATTERN + r"/highlights"
test = ("https://www.instagram.com/instagram/highlights",)
example = "https://www.instagram.com/USER/highlights/"
def posts(self):
uid = self.api.user_id(self.item)
return self.api.highlights_media(uid)
class InstagramFollowingExtractor(InstagramExtractor):
"""Extractor for an Instagram user's followed users"""
subcategory = "following"
pattern = USER_PATTERN + r"/following"
example = "https://www.instagram.com/USER/following/"
def items(self):
uid = self.api.user_id(self.item)
for user in self.api.user_following(uid):
user["_extractor"] = InstagramUserExtractor
url = "{}/{}".format(self.root, user["username"])
yield Message.Queue, url, user
class InstagramTagExtractor(InstagramExtractor):
"""Extractor for Instagram tags"""
subcategory = "tag"
directory_fmt = ("{category}", "{subcategory}", "{tag}")
pattern = BASE_PATTERN + r"/explore/tags/([^/?#]+)"
test = ("https://www.instagram.com/explore/tags/instagram/", {
"range": "1-16",
"count": ">= 16",
})
example = "https://www.instagram.com/explore/tags/TAG/"
def metadata(self):
return {"tag": text.unquote(self.item)}
@ -613,10 +592,7 @@ class InstagramAvatarExtractor(InstagramExtractor):
"""Extractor for an Instagram user's avatar"""
subcategory = "avatar"
pattern = USER_PATTERN + r"/avatar"
test = ("https://www.instagram.com/instagram/avatar", {
"pattern": r"https://instagram\.[\w.-]+\.fbcdn\.net/v/t51\.2885-19"
r"/281440578_1088265838702675_6233856337905829714_n\.jpg",
})
example = "https://www.instagram.com/USER/avatar/"
def posts(self):
if self._logged_in:
@ -656,102 +632,7 @@ class InstagramPostExtractor(InstagramExtractor):
subcategory = "post"
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
r"/(?:[^/?#]+/)?(?:p|tv|reel)/([^/?#]+)")
test = (
# GraphImage
("https://www.instagram.com/p/BqvsDleB3lV/", {
"pattern": r"https://[^/]+\.(cdninstagram\.com|fbcdn\.net)"
r"/v(p/[0-9a-f]+/[0-9A-F]+)?/t51.2885-15/e35"
r"/44877605_725955034447492_3123079845831750529_n.jpg",
"keyword": {
"date": "dt:2018-11-29 01:04:04",
"description": str,
"height": int,
"likes": int,
"location_id": "214424288",
"location_slug": "hong-kong",
"location_url": "re:/explore/locations/214424288/hong-kong/",
"media_id": "1922949326347663701",
"shortcode": "BqvsDleB3lV",
"post_id": "1922949326347663701",
"post_shortcode": "BqvsDleB3lV",
"post_url": "https://www.instagram.com/p/BqvsDleB3lV/",
"tags": ["#WHPsquares"],
"typename": "GraphImage",
"username": "instagram",
"width": int,
}
}),
# GraphSidecar
("https://www.instagram.com/p/BoHk1haB5tM/", {
"count": 5,
"keyword": {
"sidecar_media_id": "1875629777499953996",
"sidecar_shortcode": "BoHk1haB5tM",
"post_id": "1875629777499953996",
"post_shortcode": "BoHk1haB5tM",
"post_url": "https://www.instagram.com/p/BoHk1haB5tM/",
"num": int,
"likes": int,
"username": "instagram",
}
}),
# GraphVideo
("https://www.instagram.com/p/Bqxp0VSBgJg/", {
"pattern": r"/46840863_726311431074534_7805566102611403091_n\.mp4",
"keyword": {
"date": "dt:2018-11-29 19:23:58",
"description": str,
"height": int,
"likes": int,
"media_id": "1923502432034620000",
"post_url": "https://www.instagram.com/p/Bqxp0VSBgJg/",
"shortcode": "Bqxp0VSBgJg",
"tags": ["#ASMR"],
"typename": "GraphVideo",
"username": "instagram",
"width": int,
}
}),
# GraphVideo (IGTV)
("https://www.instagram.com/tv/BkQjCfsBIzi/", {
"pattern": r"/10000000_597132547321814_702169244961988209_n\.mp4",
"keyword": {
"date": "dt:2018-06-20 19:51:32",
"description": str,
"height": int,
"likes": int,
"media_id": "1806097553666903266",
"post_url": "https://www.instagram.com/p/BkQjCfsBIzi/",
"shortcode": "BkQjCfsBIzi",
"typename": "GraphVideo",
"username": "instagram",
"width": int,
}
}),
# GraphSidecar with 2 embedded GraphVideo objects
("https://www.instagram.com/p/BtOvDOfhvRr/", {
"count": 2,
"keyword": {
"post_url": "https://www.instagram.com/p/BtOvDOfhvRr/",
"sidecar_media_id": "1967717017113261163",
"sidecar_shortcode": "BtOvDOfhvRr",
"video_url": str,
}
}),
# GraphImage with tagged user
("https://www.instagram.com/p/B_2lf3qAd3y/", {
"keyword": {
"tagged_users": [{
"id" : "1246468638",
"username" : "kaaymbl",
"full_name": "Call Me Kay",
}]
}
}),
# URL with username (#2085)
("https://www.instagram.com/dm/p/CW042g7B9CY/"),
("https://www.instagram.com/reel/CDg_6Y1pxWu/"),
)
example = "https://www.instagram.com/p/abcdefg/"
def posts(self):
return self.api.media(self.item)
@ -869,6 +750,11 @@ class InstagramRestAPI():
params = {"count": 30}
return self._pagination(endpoint, params)
def user_following(self, user_id):
endpoint = "/v1/friendships/{}/following/".format(user_id)
params = {"count": 12}
return self._pagination_following(endpoint, params)
def user_saved(self):
endpoint = "/v1/feed/saved/posts/"
params = {"count": 50}
@ -958,6 +844,20 @@ class InstagramRestAPI():
return extr._update_cursor(None)
params["max_id"] = extr._update_cursor(data["next_max_id"])
def _pagination_following(self, endpoint, params):
extr = self.extractor
params["max_id"] = text.parse_int(extr._init_cursor())
while True:
data = self._call(endpoint, params=params)
yield from data["users"]
if len(data["users"]) < params["count"]:
return extr._update_cursor(None)
params["max_id"] = extr._update_cursor(
params["max_id"] + params["count"])
class InstagramGraphqlAPI():

View File

@ -26,31 +26,7 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
filename_fmt = "{num:>03}.{extension}"
archive_fmt = "{document[publicationId]}_{num}"
pattern = r"(?:https?://)?issuu\.com(/[^/?#]+/docs/[^/?#]+)"
test = ("https://issuu.com/issuu/docs/motions-1-2019/", {
"pattern": r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg",
"count" : 36,
"keyword": {
"document": {
"access" : "PUBLIC",
"contentRating" : {
"isAdsafe" : True,
"isExplicit": False,
"isReviewed": True,
},
"date" : "dt:2019-09-16 00:00:00",
"description" : "re:Motions, the brand new publication by I",
"documentName" : "motions-1-2019",
"downloadable" : False,
"pageCount" : 36,
"publicationId" : "d99ec95935f15091b040cb8060f05510",
"title" : "Motions by Issuu - Issue 1",
"username" : "issuu",
},
"extension": "jpg",
"filename" : r"re:page_\d+",
"num" : int,
},
})
example = "https://issuu.com/issuu/docs/TITLE/"
def metadata(self, page):
data = util.json_loads(text.rextract(
@ -78,10 +54,7 @@ class IssuuUserExtractor(IssuuBase, Extractor):
"""Extractor for all publications of a user/publisher"""
subcategory = "user"
pattern = r"(?:https?://)?issuu\.com/([^/?#]+)/?$"
test = ("https://issuu.com/issuu", {
"pattern": IssuuPublicationExtractor.pattern,
"count" : "> 25",
})
example = "https://issuu.com/USER"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2022 Mike Fährmann
# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -63,12 +63,7 @@ class ItakuGalleryExtractor(ItakuExtractor):
"""Extractor for posts from an itaku user gallery"""
subcategory = "gallery"
pattern = BASE_PATTERN + r"/profile/([^/?#]+)/gallery"
test = ("https://itaku.ee/profile/piku/gallery", {
"pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_imgs"
r"/[^/?#]+\.(jpg|png|gif)",
"range": "1-10",
"count": 10,
})
example = "https://itaku.ee/profile/USER/gallery"
def posts(self):
return self.api.galleries_images(self.item)
@ -77,62 +72,7 @@ class ItakuGalleryExtractor(ItakuExtractor):
class ItakuImageExtractor(ItakuExtractor):
subcategory = "image"
pattern = BASE_PATTERN + r"/images/(\d+)"
test = (
("https://itaku.ee/images/100471", {
"pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_imgs"
r"/220504_oUNIAFT\.png",
"count": 1,
"keyword": {
"already_pinned": None,
"blacklisted": {
"blacklisted_tags": [],
"is_blacklisted": False
},
"can_reshare": True,
"date": "dt:2022-05-05 19:21:17",
"date_added": "2022-05-05T19:21:17.674148Z",
"date_edited": "2022-05-25T14:37:46.220612Z",
"description": "sketch from drawpile",
"extension": "png",
"filename": "220504_oUNIAFT",
"hotness_score": float,
"id": 100471,
"image": "https://d1wmr8tlk3viaj.cloudfront.net/gallery_imgs"
"/220504_oUNIAFT.png",
"image_xl": "https://d1wmr8tlk3viaj.cloudfront.net"
"/gallery_imgs/220504_oUNIAFT/lg.jpg",
"liked_by_you": False,
"maturity_rating": "SFW",
"num_comments": int,
"num_likes": int,
"num_reshares": int,
"obj_tags": 136446,
"owner": 16775,
"owner_avatar": "https://d1wmr8tlk3viaj.cloudfront.net"
"/profile_pics/av2022r_vKYVywc/md.jpg",
"owner_displayname": "Piku",
"owner_username": "piku",
"reshared_by_you": False,
"sections": ["Fanart/Miku"],
"tags": list,
"tags_character": ["hatsune_miku"],
"tags_copyright": ["vocaloid"],
"tags_general" : ["twintails", "green_hair", "flag",
"gloves", "green_eyes", "female",
"racing_miku"],
"title": "Racing Miku 2022 Ver.",
"too_mature": False,
"uncompressed_filesize": "0.62",
"video": None,
"visibility": "PUBLIC",
},
}),
# video
("https://itaku.ee/images/19465", {
"pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_vids"
r"/sleepy_af_OY5GHWw\.mp4",
}),
)
example = "https://itaku.ee/images/12345"
def posts(self):
return (self.api.image(self.item),)
@ -145,7 +85,6 @@ class ItakuAPI():
self.root = extractor.root + "/api"
self.headers = {
"Accept": "application/json, text/plain, */*",
"Referer": extractor.root + "/",
}
def galleries_images(self, username, section=None):

View File

@ -21,28 +21,7 @@ class ItchioGameExtractor(Extractor):
filename_fmt = "{game[title]} ({id}).{extension}"
archive_fmt = "{id}"
pattern = r"(?:https?://)?(\w+).itch\.io/([\w-]+)"
test = (
("https://sirtartarus.itch.io/a-craft-of-mine", {
"pattern": r"https://\w+\.ssl\.hwcdn\.net/upload2"
r"/game/1983311/7723751\?",
"count": 1,
"keyword": {
"extension": "",
"filename": "7723751",
"game": {
"id": 1983311,
"noun": "game",
"title": "A Craft Of Mine",
"url": "https://sirtartarus.itch.io/a-craft-of-mine",
},
"user": {
"id": 4060052,
"name": "SirTartarus",
"url": "https://sirtartarus.itch.io",
},
},
}),
)
example = "https://USER.itch.io/GAME"
def __init__(self, match):
self.user, self.slug = match.groups()

View File

@ -4,18 +4,18 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://jpeg.pet/"""
"""Extractors for https://jpg1.su/"""
from .common import Extractor, Message
from .. import text
BASE_PATTERN = r"(?:https?://)?jpe?g\.(?:pet|fish(?:ing)?|church)"
BASE_PATTERN = r"(?:https?://)?jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)"
class JpgfishExtractor(Extractor):
"""Base class for jpgfish extractors"""
category = "jpgfish"
root = "https://jpeg.pet"
root = "https://jpg1.su"
directory_fmt = ("{category}", "{user}", "{album}",)
archive_fmt = "{id}"
@ -35,28 +35,7 @@ class JpgfishImageExtractor(JpgfishExtractor):
"""Extractor for jpgfish Images"""
subcategory = "image"
pattern = BASE_PATTERN + r"/img/((?:[^/?#]+\.)?(\w+))"
test = (
("https://jpeg.pet/img/funnymeme.LecXGS", {
"pattern": r"https://simp3\.jpg\.church/images/funnymeme\.jpg",
"content": "098e5e9b17ad634358426e0ffd1c93871474d13c",
"keyword": {
"album": "",
"extension": "jpg",
"filename": "funnymeme",
"id": "LecXGS",
"url": "https://simp3.jpg.church/images/funnymeme.jpg",
"user": "exearco",
},
}),
("https://jpg.church/img/auCruA", {
"pattern": r"https://simp2\.jpg\.church/hannahowo_00457\.jpg",
"keyword": {"album": "401-500"},
}),
("https://jpg.pet/img/funnymeme.LecXGS"),
("https://jpg.fishing/img/funnymeme.LecXGS"),
("https://jpg.fish/img/funnymeme.LecXGS"),
("https://jpg.church/img/funnymeme.LecXGS"),
)
example = "https://jpg1.su/img/TITLE.ID"
def __init__(self, match):
JpgfishExtractor.__init__(self, match)
@ -83,21 +62,7 @@ class JpgfishAlbumExtractor(JpgfishExtractor):
"""Extractor for jpgfish Albums"""
subcategory = "album"
pattern = BASE_PATTERN + r"/a(?:lbum)?/([^/?#]+)(/sub)?"
test = (
("https://jpeg.pet/album/CDilP/?sort=date_desc&page=1", {
"count": 2,
}),
("https://jpg.fishing/a/gunggingnsk.N9OOI", {
"count": 114,
}),
("https://jpg.fish/a/101-200.aNJ6A/", {
"count": 100,
}),
("https://jpg.church/a/hannahowo.aNTdH/sub", {
"count": 606,
}),
("https://jpg.pet/album/CDilP/?sort=date_desc&page=1"),
)
example = "https://jpg1.su/album/TITLE.ID"
def __init__(self, match):
JpgfishExtractor.__init__(self, match)
@ -121,18 +86,7 @@ class JpgfishUserExtractor(JpgfishExtractor):
"""Extractor for jpgfish Users"""
subcategory = "user"
pattern = BASE_PATTERN + r"/(?!img|a(?:lbum)?)([^/?#]+)(/albums)?"
test = (
("https://jpeg.pet/exearco", {
"count": 3,
}),
("https://jpg.church/exearco/albums", {
"count": 1,
}),
("https://jpg.pet/exearco"),
("https://jpg.fishing/exearco"),
("https://jpg.fish/exearco"),
("https://jpg.church/exearco"),
)
example = "https://jpg1.su/USER"
def __init__(self, match):
JpgfishExtractor.__init__(self, match)

View File

@ -31,12 +31,7 @@ class JschanThreadExtractor(JschanExtractor):
filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
archive_fmt = "{board}_{postId}_{num}"
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)\.html"
test = (
("https://94chan.org/art/thread/25.html", {
"pattern": r"https://94chan.org/file/[0-9a-f]{64}(\.\w+)?",
"count": ">= 15"
})
)
example = "https://94chan.org/a/thread/12345.html"
def __init__(self, match):
JschanExtractor.__init__(self, match)
@ -71,15 +66,7 @@ class JschanBoardExtractor(JschanExtractor):
subcategory = "board"
pattern = (BASE_PATTERN + r"/([^/?#]+)"
r"(?:/index\.html|/catalog\.html|/\d+\.html|/?$)")
test = (
("https://94chan.org/art/", {
"pattern": JschanThreadExtractor.pattern,
"count": ">= 30"
}),
("https://94chan.org/art/2.html"),
("https://94chan.org/art/catalog.html"),
("https://94chan.org/art/index.html"),
)
example = "https://94chan.org/a/"
def __init__(self, match):
JschanExtractor.__init__(self, match)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2020 Mike Fährmann
# Copyright 2020-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -21,16 +21,7 @@ class KabeuchiUserExtractor(Extractor):
archive_fmt = "{id}_{num}"
root = "https://kabe-uchiroom.com"
pattern = r"(?:https?://)?kabe-uchiroom\.com/mypage/?\?id=(\d+)"
test = (
("https://kabe-uchiroom.com/mypage/?id=919865303848255493", {
"pattern": (r"https://kabe-uchiroom\.com/accounts/upfile/3/"
r"919865303848255493/\w+\.jpe?g"),
"count": ">= 24",
}),
("https://kabe-uchiroom.com/mypage/?id=123456789", {
"exception": exception.NotFoundError,
}),
)
example = "https://kabe-uchiroom.com/mypage/?id=12345"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019 Mike Fährmann
# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -20,28 +20,7 @@ class KeenspotComicExtractor(Extractor):
filename_fmt = "{filename}.{extension}"
archive_fmt = "{comic}_{filename}"
pattern = r"(?:https?://)?(?!www\.|forums\.)([\w-]+)\.keenspot\.com(/.+)?"
test = (
("http://marksmen.keenspot.com/", { # link
"range": "1-3",
"url": "83bcf029103bf8bc865a1988afa4aaeb23709ba6",
}),
("http://barkercomic.keenspot.com/", { # id
"range": "1-3",
"url": "c4080926db18d00bac641fdd708393b7d61379e6",
}),
("http://crowscare.keenspot.com/", { # id v2
"range": "1-3",
"url": "a00e66a133dd39005777317da90cef921466fcaa"
}),
("http://supernovas.keenspot.com/", { # ks
"range": "1-3",
"url": "de21b12887ef31ff82edccbc09d112e3885c3aab"
}),
("http://twokinds.keenspot.com/comic/1066/", { # "random" access
"range": "1-3",
"url": "6a784e11370abfb343dcad9adbb7718f9b7be350",
})
)
example = "http://COMIC.keenspot.com/"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -37,7 +37,6 @@ class KemonopartyExtractor(Extractor):
Extractor.__init__(self, match)
def _init(self):
self.session.headers["Referer"] = self.root + "/"
self._prepare_ddosguard_cookies()
self._find_inline = re.compile(
r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
@ -216,19 +215,7 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing"""
subcategory = "user"
pattern = USER_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
test = (
("https://kemono.party/fanbox/user/6993449", {
"range": "1-25",
"count": 25,
}),
# 'max-posts' option, 'o' query parameter (#1674)
("https://kemono.party/patreon/user/881792?o=150", {
"options": (("max-posts", 25),),
"count": "< 100",
}),
("https://kemono.su/subscribestar/user/alcorart"),
("https://kemono.party/subscribestar/user/alcorart"),
)
example = "https://kemono.party/SERVICE/user/12345"
def __init__(self, match):
_, _, service, user_id, offset = match.groups()
@ -256,87 +243,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
"""Extractor for a single kemono.party post"""
subcategory = "post"
pattern = USER_PATTERN + r"/post/([^/?#]+)"
test = (
("https://kemono.party/fanbox/user/6993449/post/506575", {
"pattern": r"https://kemono.party/data/21/0f"
r"/210f35388e28bbcf756db18dd516e2d82ce75[0-9a-f]+\.jpg",
"content": "900949cefc97ab8dc1979cc3664785aac5ba70dd",
"keyword": {
"added": "Wed, 06 May 2020 20:28:02 GMT",
"content": str,
"count": 1,
"date": "dt:2019-08-11 02:09:04",
"edited": None,
"embed": dict,
"extension": "jpeg",
"filename": "P058kDFYus7DbqAkGlfWTlOr",
"hash": "210f35388e28bbcf756db18dd516e2d8"
"2ce758e0d32881eeee76d43e1716d382",
"id": "506575",
"num": 1,
"published": "Sun, 11 Aug 2019 02:09:04 GMT",
"service": "fanbox",
"shared_file": False,
"subcategory": "fanbox",
"title": "c96取り置き",
"type": "file",
"user": "6993449",
},
}),
# inline image (#1286)
("https://kemono.party/fanbox/user/7356311/post/802343", {
"pattern": r"https://kemono\.party/data/47/b5/47b5c014ecdcfabdf2c8"
r"5eec53f1133a76336997ae8596f332e97d956a460ad2\.jpg",
"keyword": {"hash": "47b5c014ecdcfabdf2c85eec53f1133a"
"76336997ae8596f332e97d956a460ad2"},
}),
# kemono.party -> data.kemono.party
("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
"pattern": r"https://kemono\.party/data/("
r"a4/7b/a47bfe938d8c1682eef06e885927484cd8df1b.+\.jpg|"
r"c6/04/c6048f5067fd9dbfa7a8be565ac194efdfb6e4.+\.zip)",
}),
# username (#1548, #1652)
("https://kemono.party/gumroad/user/3252870377455/post/aJnAH", {
"options": (("metadata", True),),
"keyword": {"username": "Kudalyn's Creations"},
}),
# skip patreon duplicates
("https://kemono.party/patreon/user/4158582/post/32099982", {
"count": 2,
}),
# allow duplicates (#2440)
("https://kemono.party/patreon/user/4158582/post/32099982", {
"options": (("duplicates", True),),
"count": 3,
}),
# DMs (#2008)
("https://kemono.party/patreon/user/34134344/post/38129255", {
"options": (("dms", True),),
"keyword": {"dms": [{
"body": r"re:Hi! Thank you very much for supporting the work I"
r" did in May. Here's your reward pack! I hope you fin"
r"d something you enjoy in it. :\)\n\nhttps://www.medi"
r"afire.com/file/\w+/Set13_tier_2.zip/file",
"date": "2021-07-31 02:47:51.327865",
}]},
}),
# coomer.party (#2100)
("https://coomer.party/onlyfans/user/alinity/post/125962203", {
"pattern": r"https://coomer\.party/data/7d/3f/7d3fd9804583dc224968"
r"c0591163ec91794552b04f00a6c2f42a15b68231d5a8\.jpg",
}),
# invalid file (#3510)
("https://kemono.party/patreon/user/19623797/post/29035449", {
"pattern": r"907ba78b4545338d3539683e63ecb51c"
r"f51c10adc9dabd86e92bd52339f298b9\.txt",
"content": "da39a3ee5e6b4b0d3255bfef95601890afd80709", # empty
}),
("https://kemono.su/subscribestar/user/alcorart/post/184330"),
("https://kemono.party/subscribestar/user/alcorart/post/184330"),
("https://www.kemono.party/subscribestar/user/alcorart/post/184330"),
("https://beta.kemono.party/subscribestar/user/alcorart/post/184330"),
)
example = "https://kemono.party/SERVICE/user/12345/post/12345"
def __init__(self, match):
_, _, service, user_id, post_id = match.groups()
@ -359,30 +266,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
filename_fmt = "{id}_{num:>02}_{filename}.{extension}"
archive_fmt = "discord_{server}_{id}_{num}"
pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)"
test = (
(("https://kemono.party/discord"
"/server/488668827274444803#finish-work"), {
"count": 4,
"keyword": {"channel_name": "finish-work"},
}),
(("https://kemono.su/discord"
"/server/256559665620451329/channel/462437519519383555#"), {
"pattern": r"https://kemono\.su/data/("
r"e3/77/e377e3525164559484ace2e64425b0cec1db08.*\.png|"
r"51/45/51453640a5e0a4d23fbf57fb85390f9c5ec154.*\.gif)",
"keyword": {"hash": "re:e377e3525164559484ace2e64425b0cec1db08"
"|51453640a5e0a4d23fbf57fb85390f9c5ec154"},
"count": ">= 2",
}),
# 'inline' files
(("https://kemono.party/discord"
"/server/315262215055736843/channel/315262215055736843#general"), {
"pattern": r"https://cdn\.discordapp\.com/attachments/\d+/\d+/.+$",
"options": (("image-filter", "type == 'inline'"),),
"keyword": {"hash": ""},
"range": "1-5",
}),
)
example = "https://kemono.party/discord/server/12345#CHANNEL"
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
@ -461,16 +345,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
subcategory = "discord-server"
pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
test = (
("https://kemono.party/discord/server/488668827274444803", {
"pattern": KemonopartyDiscordExtractor.pattern,
"count": 13,
}),
("https://kemono.su/discord/server/488668827274444803", {
"pattern": KemonopartyDiscordExtractor.pattern,
"count": 13,
}),
)
example = "https://kemono.party/discord/server/12345"
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
@ -492,23 +367,7 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
"""Extractor for kemono.party favorites"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/favorites(?:/?\?([^#]+))?"
test = (
("https://kemono.party/favorites", {
"pattern": KemonopartyUserExtractor.pattern,
"url": "f4b5b796979bcba824af84206578c79101c7f0e1",
"count": 3,
}),
("https://kemono.party/favorites?type=post", {
"pattern": KemonopartyPostExtractor.pattern,
"url": "ecfccf5f0d50b8d14caa7bbdcf071de5c1e5b90f",
"count": 3,
}),
("https://kemono.su/favorites?type=post", {
"pattern": KemonopartyPostExtractor.pattern,
"url": "4be8e84cb384a907a8e7997baaf6287b451783b5",
"count": 3,
}),
)
example = "https://kemono.party/favorites"
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
@ -522,7 +381,7 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
if self.favorites == "artist":
users = self.request(
self.root + "/api/favorites?type=artist").json()
self.root + "/api/v1/account/favorites?type=artist").json()
for user in users:
user["_extractor"] = KemonopartyUserExtractor
url = "{}/{}/user/{}".format(
@ -531,7 +390,7 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
elif self.favorites == "post":
posts = self.request(
self.root + "/api/favorites?type=post").json()
self.root + "/api/v1/account/favorites?type=post").json()
for post in posts:
post["_extractor"] = KemonopartyPostExtractor
url = "{}/{}/user/{}/post/{}".format(

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2022 Mike Fährmann
# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -16,30 +16,13 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
"""Extractor for soundtracks from khinsider.com"""
category = "khinsider"
subcategory = "soundtrack"
root = "https://downloads.khinsider.com"
directory_fmt = ("{category}", "{album[name]}")
archive_fmt = "{filename}.{extension}"
pattern = (r"(?:https?://)?downloads\.khinsider\.com"
r"/game-soundtracks/album/([^/?#]+)")
root = "https://downloads.khinsider.com"
test = (("https://downloads.khinsider.com"
"/game-soundtracks/album/horizon-riders-wii"), {
"pattern": r"https?://vgm(site|downloads)\.com"
r"/soundtracks/horizon-riders-wii/[^/]+"
r"/Horizon%20Riders%20Wii%20-%20Full%20Soundtrack\.mp3",
"keyword": {
"album": {
"count": 1,
"date": "Sep 18th, 2016",
"name": "Horizon Riders",
"platform": "Wii",
"size": 26214400,
"type": "Gamerip",
},
"extension": "mp3",
"filename": "Horizon Riders Wii - Full Soundtrack",
},
"count": 1,
})
example = ("https://downloads.khinsider.com"
"/game-soundtracks/album/TITLE")
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2018-2022 Mike Fährmann
# Copyright 2018-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -48,18 +48,7 @@ class KomikcastBase():
class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
"""Extractor for manga-chapters from komikcast.site"""
pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/)"
test = (
(("https://komikcast.site/chapter"
"/apotheosis-chapter-02-2-bahasa-indonesia/"), {
"url": "f6b43fbc027697749b3ea1c14931c83f878d7936",
"keyword": "f3938e1aff9ad1f302f52447e9781b21f6da26d4",
}),
(("https://komikcast.me/chapter"
"/soul-land-ii-chapter-300-1-bahasa-indonesia/"), {
"url": "efd00a9bd95461272d51990d7bc54b79ff3ff2e6",
"keyword": "cb646cfed3d45105bd645ab38b2e9f7d8c436436",
}),
)
example = "https://komikcast.site/chapter/TITLE/"
def metadata(self, page):
info = text.extr(page, "<title>", " - Komikcast<")
@ -79,13 +68,7 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
"""Extractor for manga from komikcast.site"""
chapterclass = KomikcastChapterExtractor
pattern = BASE_PATTERN + r"(/(?:komik/)?[^/?#]+)/?$"
test = (
("https://komikcast.site/komik/090-eko-to-issho/", {
"url": "19d3d50d532e84be6280a3d61ff0fd0ca04dd6b4",
"keyword": "837a7e96867344ff59d840771c04c20dc46c0ab1",
}),
("https://komikcast.me/tonari-no-kashiwagi-san/"),
)
example = "https://komikcast.site/komik/TITLE"
def chapters(self, page):
results = []

View File

@ -48,19 +48,7 @@ class LensdumpBase():
class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
subcategory = "album"
pattern = BASE_PATTERN + r"/(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))"
test = (
("https://lensdump.com/a/1IhJr", {
"pattern": r"https://[abcd]\.l3n\.co/i/tq\w{4}\.png",
"keyword": {
"extension": "png",
"name": str,
"num": int,
"title": str,
"url": str,
"width": int,
},
}),
)
example = "https://lensdump.com/a/ID"
def __init__(self, match):
GalleryExtractor.__init__(self, match, match.string)
@ -100,7 +88,7 @@ class LensdumpAlbumsExtractor(LensdumpBase, Extractor):
"""Extractor for album list from lensdump.com"""
subcategory = "albums"
pattern = BASE_PATTERN + r"/\w+/albums"
test = ("https://lensdump.com/vstar925/albums",)
example = "https://lensdump.com/USER/albums"
def items(self):
for node in self.nodes():
@ -117,22 +105,7 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
directory_fmt = ("{category}",)
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/i/(\w+)"
test = (
("https://lensdump.com/i/tyoAyM", {
"pattern": r"https://c\.l3n\.co/i/tyoAyM\.webp",
"content": "1aa749ed2c0cf679ec8e1df60068edaf3875de46",
"keyword": {
"date": "dt:2022-08-01 08:24:28",
"extension": "webp",
"filename": "tyoAyM",
"height": 400,
"id": "tyoAyM",
"title": "MYOBI clovis bookcaseset",
"url": "https://c.l3n.co/i/tyoAyM.webp",
"width": 620,
},
}),
)
example = "https://lensdump.com/i/ID"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -20,37 +20,7 @@ class LexicaSearchExtractor(Extractor):
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "{id}"
pattern = r"(?:https?://)?lexica\.art/?\?q=([^&#]+)"
test = (
("https://lexica.art/?q=tree", {
"pattern": r"https://lexica-serve-encoded-images2\.sharif\."
r"workers.dev/full_jpg/[0-9a-f-]{36}$",
"range": "1-80",
"count": 80,
"keyword": {
"height": int,
"id": str,
"upscaled_height": int,
"upscaled_width": int,
"userid": str,
"width": int,
"prompt": {
"c": int,
"grid": bool,
"height": int,
"id": str,
"images": list,
"initImage": None,
"initImageStrength": None,
"model": "lexica-aperture-v2",
"negativePrompt": str,
"prompt": str,
"seed": str,
"timestamp": r"re:\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\dZ",
"width": int,
},
},
}),
)
example = "https://lexica.art/?q=QUERY"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -18,24 +18,7 @@ class LightroomGalleryExtractor(Extractor):
filename_fmt = "{num:>04}_{id}.{extension}"
archive_fmt = "{id}"
pattern = r"(?:https?://)?lightroom\.adobe\.com/shares/([0-9a-f]+)"
test = (
(("https://lightroom.adobe.com/shares/"
"0c9cce2033f24d24975423fe616368bf"), {
"keyword": {
"title": "Sterne und Nachtphotos",
"user": "Christian Schrang",
},
"count": ">= 55",
}),
(("https://lightroom.adobe.com/shares/"
"7ba68ad5a97e48608d2e6c57e6082813"), {
"keyword": {
"title": "HEBFC Snr/Res v Brighton",
"user": "",
},
"count": ">= 180",
}),
)
example = "https://lightroom.adobe.com/shares/0123456789abcdef"
def __init__(self, match):
Extractor.__init__(self, match)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2020 Mike Fährmann
# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -84,32 +84,7 @@ class LivedoorBlogExtractor(LivedoorExtractor):
"""Extractor for a user's blog on blog.livedoor.jp"""
subcategory = "blog"
pattern = r"(?:https?://)?blog\.livedoor\.jp/(\w+)/?(?:$|[?#])"
test = (
("http://blog.livedoor.jp/zatsu_ke/", {
"range": "1-50",
"count": 50,
"archive": False,
"pattern": r"https?://livedoor.blogimg.jp/\w+/imgs/\w/\w/\w+\.\w+",
"keyword": {
"post": {
"categories" : tuple,
"date" : "type:datetime",
"description": str,
"id" : int,
"tags" : list,
"title" : str,
"user" : "zatsu_ke"
},
"filename": str,
"hash" : r"re:\w{4,}",
"num" : int,
},
}),
("http://blog.livedoor.jp/uotapo/", {
"range": "1-5",
"count": 5,
}),
)
example = "http://blog.livedoor.jp/USER/"
def posts(self):
url = "{}/{}".format(self.root, self.user)
@ -129,20 +104,7 @@ class LivedoorPostExtractor(LivedoorExtractor):
"""Extractor for images from a blog post on blog.livedoor.jp"""
subcategory = "post"
pattern = r"(?:https?://)?blog\.livedoor\.jp/(\w+)/archives/(\d+)"
test = (
("http://blog.livedoor.jp/zatsu_ke/archives/51493859.html", {
"url": "9ca3bbba62722c8155be79ad7fc47be409e4a7a2",
"keyword": "1f5b558492e0734f638b760f70bfc0b65c5a97b9",
}),
("http://blog.livedoor.jp/amaumauma/archives/7835811.html", {
"url": "204bbd6a9db4969c50e0923855aeede04f2e4a62",
"keyword": "05821c7141360e6057ef2d382b046f28326a799d",
}),
("http://blog.livedoor.jp/uotapo/archives/1050616939.html", {
"url": "4b5ab144b7309eb870d9c08f8853d1abee9946d2",
"keyword": "84fbf6e4eef16675013d6333039a7cfcb22c2d50",
}),
)
example = "http://blog.livedoor.jp/USER/archives/12345.html"
def __init__(self, match):
LivedoorExtractor.__init__(self, match)

View File

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2021-2022 Mike Fährmann
# Copyright 2021-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -30,17 +30,7 @@ BASE_PATTERN = LolisafeExtractor.update({
class LolisafeAlbumExtractor(LolisafeExtractor):
subcategory = "album"
pattern = BASE_PATTERN + "/a/([^/?#]+)"
test = (
("https://xbunkr.com/a/TA0bu3F4", {
"pattern": r"https://media\.xbunkr\.com/[^.]+\.\w+",
"count": 861,
"keyword": {
"album_id": "TA0bu3F4",
"album_name": "Hannahowo Onlyfans Photos",
}
}),
("https://xbunkr.com/a/GNQc2I5d"),
)
example = "https://xbunkr.com/a/ID"
def __init__(self, match):
LolisafeExtractor.__init__(self, match)

View File

@ -47,73 +47,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
archive_fmt = "{album[id]}_{id}"
pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net"
r"/(?:albums|pictures/c/[^/?#]+/album)/[^/?#]+_(\d+)")
test = (
("https://luscious.net/albums/okinami-no-koigokoro_277031/", {
"pattern": r"https://storage\.bhs\.cloud\.ovh\.net/v1/AUTH_\w+"
r"/images/NTRshouldbeillegal/277031"
r"/luscious_net_\d+_\d+\.jpg$",
# "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
"keyword": {
"album": {
"__typename" : "Album",
"audiences" : list,
"content" : "Hentai",
"cover" : "re:https://\\w+.luscious.net/.+/277031/",
"created" : 1479625853,
"created_by" : "NTRshouldbeillegal",
"date" : "dt:2016-11-20 07:10:53",
"description" : "Enjoy.",
"download_url": "re:/download/(r/)?824778/277031/",
"genres" : list,
"id" : 277031,
"is_manga" : True,
"labels" : list,
"language" : "English",
"like_status" : "none",
"modified" : int,
"permissions" : list,
"rating" : float,
"slug" : "okinami-no-koigokoro",
"status" : None,
"tags" : list,
"title" : "Okinami no Koigokoro",
"url" : "/albums/okinami-no-koigokoro_277031/",
"marked_for_deletion": False,
"marked_for_processing": False,
"number_of_animated_pictures": 0,
"number_of_favorites": int,
"number_of_pictures": 18,
},
"aspect_ratio": r"re:\d+:\d+",
"category" : "luscious",
"created" : int,
"date" : "type:datetime",
"height" : int,
"id" : int,
"is_animated" : False,
"like_status" : "none",
"position" : int,
"resolution" : r"re:\d+x\d+",
"status" : None,
"tags" : list,
"thumbnail" : str,
"title" : str,
"width" : int,
"number_of_comments": int,
"number_of_favorites": int,
},
}),
("https://luscious.net/albums/not-found_277035/", {
"exception": exception.NotFoundError,
}),
("https://members.luscious.net/albums/login-required_323871/", {
"count": 64,
}),
("https://www.luscious.net/albums/okinami_277031/"),
("https://members.luscious.net/albums/okinami_277031/"),
("https://luscious.net/pictures/c/video_game_manga/album"
"/okinami-no-koigokoro_277031/sorted/position/id/16528978/@_1"),
)
example = "https://luscious.net/albums/TITLE_12345/"
def __init__(self, match):
LusciousExtractor.__init__(self, match)
@ -338,15 +272,7 @@ class LusciousSearchExtractor(LusciousExtractor):
subcategory = "search"
pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net"
r"/albums/list/?(?:\?([^#]+))?")
test = (
("https://members.luscious.net/albums/list/"),
("https://members.luscious.net/albums/list/"
"?display=date_newest&language_ids=%2B1&tagged=+full_color&page=1", {
"pattern": LusciousAlbumExtractor.pattern,
"range": "41-60",
"count": 20,
}),
)
example = "https://luscious.net/albums/list/?tagged=TAG"
def __init__(self, match):
LusciousExtractor.__init__(self, match)

View File

@ -40,22 +40,7 @@ class LynxchanThreadExtractor(LynxchanExtractor):
filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
archive_fmt = "{boardUri}_{postId}_{num}"
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
test = (
("https://bbw-chan.nl/bbwdraw/res/499.html", {
"pattern": r"https://bbw-chan\.nl/\.media/[0-9a-f]{64}(\.\w+)?$",
"count": ">= 352",
}),
("https://bbw-chan.nl/bbwdraw/res/489.html"),
("https://kohlchan.net/a/res/4594.html", {
"pattern": r"https://kohlchan\.net/\.media/[0-9a-f]{64}(\.\w+)?$",
"count": ">= 80",
}),
("https://endchan.org/yuri/res/193483.html", {
"pattern": r"https://endchan\.org/\.media/[^.]+(\.\w+)?$",
"count" : ">= 19",
}),
("https://endchan.org/yuri/res/33621.html"),
)
example = "https://bbw-chan.nl/a/res/12345.html"
def __init__(self, match):
LynxchanExtractor.__init__(self, match)
@ -86,24 +71,7 @@ class LynxchanBoardExtractor(LynxchanExtractor):
"""Extractor for LynxChan boards"""
subcategory = "board"
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
test = (
("https://bbw-chan.nl/bbwdraw/", {
"pattern": LynxchanThreadExtractor.pattern,
"count": ">= 148",
}),
("https://bbw-chan.nl/bbwdraw/2.html"),
("https://kohlchan.net/a/", {
"pattern": LynxchanThreadExtractor.pattern,
"count": ">= 100",
}),
("https://kohlchan.net/a/2.html"),
("https://kohlchan.net/a/catalog.html"),
("https://endchan.org/yuri/", {
"pattern": LynxchanThreadExtractor.pattern,
"count" : ">= 9",
}),
("https://endchan.org/yuri/catalog.html"),
)
example = "https://bbw-chan.nl/a/"
def __init__(self, match):
LynxchanExtractor.__init__(self, match)

View File

@ -98,25 +98,8 @@ class MangadexChapterExtractor(MangadexExtractor):
"""Extractor for manga-chapters from mangadex.org"""
subcategory = "chapter"
pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)"
test = (
("https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", {
"keyword": "e86128a79ebe7201b648f1caa828496a2878dc8f",
# "content": "50383a4c15124682057b197d40261641a98db514",
}),
# oneshot
("https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", {
"count": 64,
"keyword": "d11ed057a919854696853362be35fc0ba7dded4c",
}),
# MANGA Plus (#1154)
("https://mangadex.org/chapter/74149a55-e7c4-44ea-8a37-98e879c1096f", {
"exception": exception.StopExtraction,
}),
# 'externalUrl', but still downloadable (#2503)
("https://mangadex.org/chapter/364728a4-6909-4164-9eea-6b56354f7c78", {
"count": 0, # 404
}),
)
example = ("https://mangadex.org/chapter"
"/01234567-89ab-cdef-0123-456789abcdef")
def items(self):
try:
@ -148,48 +131,8 @@ class MangadexMangaExtractor(MangadexExtractor):
"""Extractor for manga from mangadex.org"""
subcategory = "manga"
pattern = BASE_PATTERN + r"/(?:title|manga)/(?!feed$)([0-9a-f-]+)"
test = (
("https://mangadex.org/title/f90c4398-8aad-4f51-8a1f-024ca09fdcbc", {
"count": ">= 5",
"keyword": {
"manga" : "Souten no Koumori",
"manga_id": "f90c4398-8aad-4f51-8a1f-024ca09fdcbc",
"title" : "re:One[Ss]hot",
"volume" : 0,
"chapter" : 0,
"chapter_minor": "",
"chapter_id": str,
"date" : "type:datetime",
"lang" : str,
"language": str,
"artist" : ["Arakawa Hiromu"],
"author" : ["Arakawa Hiromu"],
"status" : "completed",
"tags" : ["Oneshot", "Historical", "Action",
"Martial Arts", "Drama", "Tragedy"],
},
}),
# mutliple values for 'lang' (#4093)
("https://mangadex.org/title/f90c4398-8aad-4f51-8a1f-024ca09fdcbc", {
"options": (("lang", "fr,it"),),
"count": 2,
"keyword": {
"manga" : "Souten no Koumori",
"lang" : "re:fr|it",
"language": "re:French|Italian",
},
}),
("https://mangadex.cc/manga/d0c88e3b-ea64-4e07-9841-c1d2ac982f4a/", {
"options": (("lang", "en"),),
"count": ">= 100",
}),
("https://mangadex.org/title/7c1e2742-a086-4fd3-a3be-701fd6cf0be9", {
"count": 1,
}),
("https://mangadex.org/title/584ef094-b2ab-40ce-962c-bce341fb9d10", {
"count": ">= 20",
})
)
example = ("https://mangadex.org/title"
"/01234567-89ab-cdef-0123-456789abcdef")
def chapters(self):
return self.api.manga_feed(self.uuid)
@ -199,7 +142,7 @@ class MangadexFeedExtractor(MangadexExtractor):
"""Extractor for chapters from your Followed Feed"""
subcategory = "feed"
pattern = BASE_PATTERN + r"/title/feed$()"
test = ("https://mangadex.org/title/feed",)
example = "https://mangadex.org/title/feed"
def chapters(self):
return self.api.user_follows_manga_feed()

View File

@ -20,23 +20,13 @@ class MangafoxChapterExtractor(ChapterExtractor):
root = "https://m.fanfox.net"
pattern = BASE_PATTERN + \
r"(/manga/[^/?#]+/((?:v([^/?#]+)/)?c(\d+)([^/?#]*)))"
test = (
("http://fanfox.net/manga/kidou_keisatsu_patlabor/v05/c006.2/1.html", {
"keyword": "5661dab258d42d09d98f194f7172fb9851a49766",
"content": "5c50c252dcf12ffecf68801f4db8a2167265f66c",
}),
("http://mangafox.me/manga/kidou_keisatsu_patlabor/v05/c006.2/"),
("http://fanfox.net/manga/black_clover/vTBD/c295/1.html"),
)
example = "https://fanfox.net/manga/TITLE/v01/c001/1.html"
def __init__(self, match):
base, self.cstr, self.volume, self.chapter, self.minor = match.groups()
self.urlbase = self.root + base
ChapterExtractor.__init__(self, match, self.urlbase + "/1.html")
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def metadata(self, page):
manga, pos = text.extract(page, "<title>", "</title>")
count, pos = text.extract(
@ -73,36 +63,7 @@ class MangafoxMangaExtractor(MangaExtractor):
root = "https://m.fanfox.net"
chapterclass = MangafoxChapterExtractor
pattern = BASE_PATTERN + r"(/manga/[^/?#]+)/?$"
test = (
("https://fanfox.net/manga/kanojo_mo_kanojo", {
"pattern": MangafoxChapterExtractor.pattern,
"count": ">=60",
"keyword": {
"author": "HIROYUKI",
"chapter": int,
"chapter_minor": r"re:^(\.\d+)?$",
"chapter_string": r"re:(v\d+/)?c\d+",
"date": "type:datetime",
"description": "High school boy Naoya gets a confession from M"
"omi, a cute and friendly girl. However, Naoya "
"already has a girlfriend, Seki... but Momi is "
"too good a catch to let go. Momi and Nagoya's "
"goal becomes clear: convince Seki to accept be"
"ing an item with the two of them. Will she bud"
"ge?",
"lang": "en",
"language": "English",
"manga": "Kanojo mo Kanojo",
"tags": ["Comedy", "Romance", "School Life", "Shounen"],
"volume": int,
},
}),
("https://mangafox.me/manga/shangri_la_frontier", {
"pattern": MangafoxChapterExtractor.pattern,
"count": ">=45",
}),
("https://m.fanfox.net/manga/sentai_daishikkaku"),
)
example = "https://fanfox.net/manga/TITLE"
def chapters(self, page):
results = []

View File

@ -25,18 +25,7 @@ class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
"""Extractor for manga-chapters from mangahere.cc"""
pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]/manga/"
r"([^/]+(?:/v0*(\d+))?/c([^/?#]+))")
test = (
("https://www.mangahere.cc/manga/dongguo_xiaojie/c004.2/", {
"keyword": "7c98d7b50a47e6757b089aa875a53aa970cac66f",
"content": "708d475f06893b88549cbd30df1e3f9428f2c884",
}),
# URLs without HTTP scheme (#1070)
("https://www.mangahere.cc/manga/beastars/c196/1.html", {
"pattern": "https://zjcdn.mangahere.org/.*",
}),
("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
("http://m.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
)
example = "https://www.mangahere.cc/manga/TITLE/c001/1.html"
def __init__(self, match):
self.part, self.volume, self.chapter = match.groups()
@ -95,24 +84,7 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor):
chapterclass = MangahereChapterExtractor
pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]"
r"(/manga/[^/?#]+/?)(?:#.*)?$")
test = (
("https://www.mangahere.cc/manga/aria/", {
"url": "9c2e54ec42e9a87ad53096c328b33c90750af3e4",
"keyword": "71503c682c5d0c277a50409a8c5fd78e871e3d69",
"count": 71,
}),
("https://www.mangahere.cc/manga/hiyokoi/#50", {
"url": "654850570aa03825cd57e2ae2904af489602c523",
"keyword": "c8084d89a9ea6cf40353093669f9601a39bf5ca2",
}),
# adult filter (#556)
("http://www.mangahere.cc/manga/gunnm_mars_chronicle/", {
"pattern": MangahereChapterExtractor.pattern,
"count": ">= 50",
}),
("https://www.mangahere.co/manga/aria/"),
("https://m.mangahere.co/manga/aria/"),
)
example = "https://www.mangahere.cc/manga/TITLE"
def _init(self):
self.cookies.set("isAdult", "1", domain="www.mangahere.cc")

View File

@ -19,30 +19,18 @@ BASE_PATTERN = r"(?:https?://)?(?:ww[\dw]?\.)?mangakakalot\.tv"
class MangakakalotBase():
"""Base class for mangakakalot extractors"""
category = "mangakakalot"
root = "https://ww3.mangakakalot.tv"
root = "https://ww6.mangakakalot.tv"
class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
"""Extractor for manga chapters from mangakakalot.tv"""
pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/chapter[_-][^/?#]+)"
test = (
("https://ww3.mangakakalot.tv/chapter/manga-jk986845/chapter-34.2", {
"pattern": r"https://cm\.blazefast\.co"
r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg",
"keyword": "0f1586ff52f0f9cbbb25306ae64ab718f8a6a633",
"count": 9,
}),
("https://mangakakalot.tv/chapter"
"/hatarakanai_futari_the_jobless_siblings/chapter_20.1"),
)
example = "https://ww6.mangakakalot.tv/chapter/manga-ID/chapter-01"
def __init__(self, match):
self.path = match.group(1)
ChapterExtractor.__init__(self, match, self.root + self.path)
def _init(self):
self.session.headers['Referer'] = self.root + "/"
def metadata(self, page):
_ , pos = text.extract(page, '<span itemprop="title">', '<')
manga , pos = text.extract(page, '<span itemprop="title">', '<', pos)
@ -78,13 +66,7 @@ class MangakakalotMangaExtractor(MangakakalotBase, MangaExtractor):
"""Extractor for manga from mangakakalot.tv"""
chapterclass = MangakakalotChapterExtractor
pattern = BASE_PATTERN + r"(/manga/[^/?#]+)"
test = (
("https://ww3.mangakakalot.tv/manga/manga-jk986845", {
"pattern": MangakakalotChapterExtractor.pattern,
"count": ">= 30",
}),
("https://mangakakalot.tv/manga/lk921810"),
)
example = "https://ww6.mangakakalot.tv/manga/manga-ID"
def chapters(self, page):
data = {"lang": "en", "language": "English"}

View File

@ -23,8 +23,6 @@ class ManganeloBase():
super().__init__(match, "https://" + domain + path)
def _init(self):
self.session.headers['Referer'] = self.root + "/"
if self._match_chapter is None:
ManganeloBase._match_chapter = re.compile(
r"(?:[Vv]ol\.?\s*(\d+)\s?)?"
@ -55,27 +53,7 @@ class ManganeloBase():
class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
"""Extractor for manga chapters from manganelo.com"""
pattern = BASE_PATTERN + r"(/(?:manga-\w+|chapter/\w+)/chapter[-_][^/?#]+)"
test = (
("https://chapmanganato.com/manga-gn983696/chapter-23", {
"pattern": r"https://v\d+\.mkklcdnv6tempv5\.com/img/tab_17/03/23"
r"/39/gn983696/vol_3_chapter_23_24_yen/\d+-[no]\.jpg",
"keyword": "17faaea7f0fb8c2675a327bf3aa0bcd7a6311d68",
"count": 25,
}),
("https://chapmanganelo.com/manga-ti107776/chapter-4", {
"pattern": r"https://v\d+\.mkklcdnv6tempv5\.com/img/tab_17/01/92"
r"/08/ti970565/chapter_4_caster/\d+-o\.jpg",
"keyword": "06e01fa9b3fc9b5b954c0d4a98f0153b40922ded",
"count": 45,
}),
("https://chapmanganato.com/manga-no991297/chapter-8", {
"keyword": {"chapter": 8, "chapter_minor": "-1"},
"count": 20,
}),
("https://readmanganato.com/manga-gn983696/chapter-23"),
("https://manganelo.com/chapter/gamers/chapter_15"),
("https://manganelo.com/chapter/gq921227/chapter_23"),
)
example = "https://chapmanganato.com/manga-ID/chapter-01"
def metadata(self, page):
extr = text.extract_from(page)
@ -104,19 +82,7 @@ class ManganeloMangaExtractor(ManganeloBase, MangaExtractor):
"""Extractor for manga from manganelo.com"""
chapterclass = ManganeloChapterExtractor
pattern = BASE_PATTERN + r"(/(?:manga[-/]|read_)\w+)/?$"
test = (
("https://chapmanganato.com/manga-gn983696", {
"pattern": ManganeloChapterExtractor.pattern,
"count": ">= 25",
}),
("https://m.manganelo.com/manga-ti107776", {
"pattern": ManganeloChapterExtractor.pattern,
"count": ">= 12",
}),
("https://readmanganato.com/manga-gn983696"),
("https://manganelo.com/manga/read_otome_no_teikoku"),
("https://manganelo.com/manga/ol921234/"),
)
example = "https://manganato.com/manga-ID"
def chapters(self, page):
results = []

View File

@ -35,39 +35,7 @@ class MangaparkBase():
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"""Extractor for manga-chapters from mangapark.net"""
pattern = BASE_PATTERN + r"/title/[^/?#]+/(\d+)"
test = (
("https://mangapark.net/title/114972-aria/6710214-en-ch.60.2", {
"count": 70,
"pattern": r"https://[\w-]+\.mpcdn\.org/comic/2002/e67"
r"/61e29278a583b9227964076e/\d+_\d+_\d+_\d+\.jpeg"
r"\?acc=[^&#]+&exp=\d+",
"keyword": {
"artist": [],
"author": ["Amano Kozue"],
"chapter": 60,
"chapter_id": 6710214,
"chapter_minor": ".2",
"count": 70,
"date": "dt:2022-01-15 09:25:03",
"extension": "jpeg",
"filename": str,
"genre": ["adventure", "comedy", "drama", "sci_fi",
"shounen", "slice_of_life"],
"lang": "en",
"language": "English",
"manga": "Aria",
"manga_id": 114972,
"page": int,
"source": "Koala",
"title": "Special Navigation - Aquaria Ii",
"volume": 12,
},
}),
("https://mangapark.com/title/114972-aria/6710214-en-ch.60.2"),
("https://mangapark.org/title/114972-aria/6710214-en-ch.60.2"),
("https://mangapark.io/title/114972-aria/6710214-en-ch.60.2"),
("https://mangapark.me/title/114972-aria/6710214-en-ch.60.2"),
)
example = "https://mangapark.net/title/MANGA/12345-en-ch.01"
def __init__(self, match):
self.root = text.root_from_url(match.group(0))
@ -115,41 +83,7 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
"""Extractor for manga from mangapark.net"""
subcategory = "manga"
pattern = BASE_PATTERN + r"/title/(\d+)(?:-[^/?#]*)?/?$"
test = (
("https://mangapark.net/title/114972-aria", {
"count": 141,
"pattern": MangaparkChapterExtractor.pattern,
"keyword": {
"chapter": int,
"chapter_id": int,
"chapter_minor": str,
"date": "type:datetime",
"lang": "en",
"language": "English",
"manga_id": 114972,
"source": "re:Horse|Koala",
"source_id": int,
"title": str,
"volume": int,
},
}),
# 'source' option
("https://mangapark.net/title/114972-aria", {
"options": (("source", "koala"),),
"count": 70,
"pattern": MangaparkChapterExtractor.pattern,
"keyword": {
"source": "Koala",
"source_id": 15150116,
},
}),
("https://mangapark.com/title/114972-"),
("https://mangapark.com/title/114972"),
("https://mangapark.com/title/114972-aria"),
("https://mangapark.org/title/114972-aria"),
("https://mangapark.io/title/114972-aria"),
("https://mangapark.me/title/114972-aria"),
)
example = "https://mangapark.net/title/12345-MANGA"
def __init__(self, match):
self.root = text.root_from_url(match.group(0))

Some files were not shown because too many files have changed in this diff Show More