mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 02:32:33 +01:00
Merge branch 'mikf:master' into master
This commit is contained in:
commit
9e5b2ef10e
12
.github/workflows/tests.yml
vendored
12
.github/workflows/tests.yml
vendored
@ -32,8 +32,7 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install "flake8<4" "importlib-metadata<5"
|
||||
pip install youtube-dl
|
||||
pip install flake8 youtube-dl
|
||||
|
||||
- name: Install yt-dlp
|
||||
run: |
|
||||
@ -53,7 +52,14 @@ jobs:
|
||||
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
flake8 .
|
||||
case "${{ matrix.python-version }}" in
|
||||
3.4|3.5|3.6|3.7)
|
||||
flake8 --extend-exclude scripts/export_tests.py .
|
||||
;;
|
||||
*)
|
||||
flake8 .
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
|
@ -390,7 +390,6 @@ Description
|
||||
* ``e621`` (*)
|
||||
* ``e926`` (*)
|
||||
* ``exhentai``
|
||||
* ``gfycat``
|
||||
* ``idolcomplex``
|
||||
* ``imgbb``
|
||||
* ``inkbunny``
|
||||
@ -534,7 +533,7 @@ extractor.*.user-agent
|
||||
Type
|
||||
``string``
|
||||
Default
|
||||
``"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0"``
|
||||
``"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0"``
|
||||
Description
|
||||
User-Agent header value to be used for HTTP requests.
|
||||
|
||||
@ -566,6 +565,21 @@ Description
|
||||
browser would use HTTP/2.
|
||||
|
||||
|
||||
extractor.*.referer
|
||||
-------------------
|
||||
Type
|
||||
* ``bool``
|
||||
* ``string``
|
||||
Default
|
||||
``true``
|
||||
Description
|
||||
Send `Referer <https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referer>`__
|
||||
headers with all outgoing HTTP requests.
|
||||
|
||||
If this is a ``string``, send it as Referer
|
||||
instead of the extractor's ``root`` domain.
|
||||
|
||||
|
||||
extractor.*.headers
|
||||
-------------------
|
||||
Type
|
||||
@ -577,7 +591,8 @@ Default
|
||||
"User-Agent" : "<extractor.*.user-agent>",
|
||||
"Accept" : "*/*",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Accept-Encoding": "gzip, deflate"
|
||||
"Accept-Encoding": "gzip, deflate",
|
||||
"Referer" : "<extractor.*.referer>"
|
||||
}
|
||||
|
||||
Description
|
||||
@ -714,7 +729,7 @@ Type
|
||||
Default
|
||||
``["oauth", "recursive", "test"]`` + current extractor category
|
||||
Example
|
||||
``["imgur", "gfycat:user", "*:image"]``
|
||||
``["imgur", "redgifs:user", "*:image"]``
|
||||
Description
|
||||
A list of extractor identifiers to ignore (or allow)
|
||||
when spawning child extractors for unknown URLs,
|
||||
@ -723,7 +738,7 @@ Description
|
||||
Each identifier can be
|
||||
|
||||
* A category or basecategory name (``"imgur"``, ``"mastodon"``)
|
||||
* | A (base)category-subcategory pair, where both names are separated by a colon (``"gfycat:user"``).
|
||||
* | A (base)category-subcategory pair, where both names are separated by a colon (``"redgifs:user"``).
|
||||
| Both names can be a `*` or left empty, matching all possible names (``"*:image"``, ``":user"``).
|
||||
|
||||
Note: Any ``blacklist`` setting will automatically include
|
||||
@ -1475,6 +1490,22 @@ Description
|
||||
* ``"exhentai.org"``: Use ``exhentai.org`` for all URLs
|
||||
|
||||
|
||||
extractor.exhentai.fav
|
||||
----------------------
|
||||
Type
|
||||
``string``
|
||||
Example
|
||||
``"4"``
|
||||
Description
|
||||
After downloading a gallery,
|
||||
add it to your account's favorites as the given category number.
|
||||
|
||||
Note: Set this to `"favdel"` to remove galleries from your favorites.
|
||||
|
||||
Note: This will remove any Favorite Notes when applied
|
||||
to already favorited galleries.
|
||||
|
||||
|
||||
extractor.exhentai.limits
|
||||
-------------------------
|
||||
Type
|
||||
@ -1690,29 +1721,6 @@ Description
|
||||
even ones without a ``generic:`` prefix.
|
||||
|
||||
|
||||
extractor.gfycat.format
|
||||
-----------------------
|
||||
Type
|
||||
* ``string``
|
||||
* ``list`` of ``strings``
|
||||
Default
|
||||
``["mp4", "webm", "mobile", "gif"]``
|
||||
Description
|
||||
List of names of the preferred animation format, which can be
|
||||
``"mp4"``,
|
||||
``"webm"``,
|
||||
``"mobile"``,
|
||||
``"gif"``, or
|
||||
``"webp"``.
|
||||
|
||||
If a selected format is not available, the next one in the list will be
|
||||
tried until an available format is found.
|
||||
|
||||
If the format is given as ``string``, it will be extended with
|
||||
``["mp4", "webm", "mobile", "gif"]``. Use a list with one element to
|
||||
restrict it to only one possible format.
|
||||
|
||||
|
||||
extractor.gofile.api-token
|
||||
--------------------------
|
||||
Type
|
||||
@ -3336,7 +3344,7 @@ extractor.twitter.users
|
||||
Type
|
||||
``string``
|
||||
Default
|
||||
``"timeline"``
|
||||
``"user"``
|
||||
Example
|
||||
``"https://twitter.com/search?q=from:{legacy[screen_name]}"``
|
||||
Description
|
||||
@ -3347,7 +3355,8 @@ Description
|
||||
|
||||
Special values:
|
||||
|
||||
* ``"timeline"``: ``https://twitter.com/i/user/{rest_id}``
|
||||
* ``"user"``: ``https://twitter.com/i/user/{rest_id}``
|
||||
* ``"timeline"``: ``https://twitter.com/id:{rest_id}/timeline``
|
||||
* ``"tweets"``: ``https://twitter.com/id:{rest_id}/tweets``
|
||||
* ``"media"``: ``https://twitter.com/id:{rest_id}/media``
|
||||
|
||||
|
@ -165,7 +165,7 @@
|
||||
"reddit":
|
||||
{
|
||||
"#": "only spawn child extractors for links to specific sites",
|
||||
"whitelist": ["imgur", "redgifs", "gfycat"],
|
||||
"whitelist": ["imgur", "redgifs"],
|
||||
|
||||
"#": "put files from child extractors into the reddit directory",
|
||||
"parent-directory": true,
|
||||
|
@ -10,7 +10,7 @@
|
||||
"proxy": null,
|
||||
"skip": true,
|
||||
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0",
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0",
|
||||
"retries": 4,
|
||||
"timeout": 30.0,
|
||||
"verify": true,
|
||||
@ -125,10 +125,6 @@
|
||||
"api-key": null,
|
||||
"user-id": null
|
||||
},
|
||||
"gfycat":
|
||||
{
|
||||
"format": ["mp4", "webm", "mobile", "gif"]
|
||||
},
|
||||
"gofile": {
|
||||
"api-token": null,
|
||||
"website-token": null
|
||||
@ -339,7 +335,7 @@
|
||||
"text-tweets": false,
|
||||
"twitpic": false,
|
||||
"unique": true,
|
||||
"users": "timeline",
|
||||
"users": "user",
|
||||
"videos": true
|
||||
},
|
||||
"unsplash":
|
||||
|
@ -88,7 +88,7 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<tr>
|
||||
<td>ArtStation</td>
|
||||
<td>https://www.artstation.com/</td>
|
||||
<td>Albums, Artwork Listings, Challenges, individual Images, Likes, Search Results, User Profiles</td>
|
||||
<td>Albums, Artwork Listings, Challenges, Followed Users, individual Images, Likes, Search Results, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -148,7 +148,7 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<tr>
|
||||
<td>DeviantArt</td>
|
||||
<td>https://www.deviantart.com/</td>
|
||||
<td>Collections, Deviations, Favorites, Folders, Galleries, Gallery Searches, Journals, Popular Images, Scraps, Search Results, Sta.sh, Status Updates, Tag Searches, User Profiles, Watches</td>
|
||||
<td>Collections, Deviations, Favorites, Folders, Followed Users, Galleries, Gallery Searches, Journals, Popular Images, Scraps, Search Results, Sta.sh, Status Updates, Tag Searches, User Profiles, Watches</td>
|
||||
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -226,7 +226,7 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<tr>
|
||||
<td>Fur Affinity</td>
|
||||
<td>https://www.furaffinity.net/</td>
|
||||
<td>Favorites, Galleries, Posts, Scraps, Search Results, User Profiles</td>
|
||||
<td>Favorites, Followed Users, Galleries, Posts, Scraps, Search Results, User Profiles</td>
|
||||
<td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -247,12 +247,6 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<td>Favorites, Pools, Posts, Redirects, Tag Searches</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Gfycat</td>
|
||||
<td>https://gfycat.com/</td>
|
||||
<td>Collections, individual Images, Search Results, User Profiles</td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Gofile</td>
|
||||
<td>https://gofile.io/</td>
|
||||
@ -357,7 +351,7 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Imagevenue</td>
|
||||
<td>https://imagevenue.com/</td>
|
||||
<td>https://www.imagevenue.com/</td>
|
||||
<td>individual Images</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
@ -406,13 +400,13 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<tr>
|
||||
<td>Inkbunny</td>
|
||||
<td>https://inkbunny.net/</td>
|
||||
<td>Favorites, Pools, Posts, Search Results, User Profiles</td>
|
||||
<td>Favorites, Followed Users, Pools, Posts, Search Results, User Profiles</td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Instagram</td>
|
||||
<td>https://www.instagram.com/</td>
|
||||
<td>Avatars, Collections, Guides, Highlights, Posts, Reels, Saved Posts, Stories, Tag Searches, Tagged Posts, User Profiles</td>
|
||||
<td>Avatars, Collections, Followed Users, Guides, Highlights, Posts, Reels, Saved Posts, Stories, Tag Searches, Tagged Posts, User Profiles</td>
|
||||
<td><a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -435,7 +429,7 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
</tr>
|
||||
<tr>
|
||||
<td>JPG Fish</td>
|
||||
<td>https://jpeg.pet/</td>
|
||||
<td>https://jpg1.su/</td>
|
||||
<td>Albums, individual Images, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
@ -574,7 +568,7 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<tr>
|
||||
<td>Newgrounds</td>
|
||||
<td>https://www.newgrounds.com/</td>
|
||||
<td>Art, Audio, Favorites, Games, individual Images, Media Files, Movies, Search Results, User Profiles</td>
|
||||
<td>Art, Audio, Favorites, Followed Users, Games, individual Images, Media Files, Movies, Search Results, User Profiles</td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -699,7 +693,7 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<tr>
|
||||
<td>Pornhub</td>
|
||||
<td>https://www.pornhub.com/</td>
|
||||
<td>Galleries, User Profiles</td>
|
||||
<td>Galleries, Gifs, Photos, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -783,7 +777,7 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<tr>
|
||||
<td>Skeb</td>
|
||||
<td>https://skeb.jp/</td>
|
||||
<td>Posts, Search Results, User Profiles</td>
|
||||
<td>Followed Users, Posts, Search Results, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -879,7 +873,7 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<tr>
|
||||
<td>Twitter</td>
|
||||
<td>https://twitter.com/</td>
|
||||
<td>Avatars, Backgrounds, Bookmarks, Events, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets, User Profiles</td>
|
||||
<td>Avatars, Backgrounds, Bookmarks, Events, Followed Users, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets, User Profiles</td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -1126,6 +1120,12 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<td>Favorites, Pools, Posts, Tag Searches</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Xbooru</td>
|
||||
<td>https://xbooru.com/</td>
|
||||
<td>Favorites, Pools, Posts, Tag Searches</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td colspan="4"><strong>jschan Imageboards</strong></td>
|
||||
@ -1165,19 +1165,19 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<tr>
|
||||
<td>Misskey.io</td>
|
||||
<td>https://misskey.io/</td>
|
||||
<td>Favorites, Images from Notes, User Profiles</td>
|
||||
<td>Favorites, Followed Users, Images from Notes, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Lesbian.energy</td>
|
||||
<td>https://lesbian.energy/</td>
|
||||
<td>Favorites, Images from Notes, User Profiles</td>
|
||||
<td>Favorites, Followed Users, Images from Notes, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Sushi.ski</td>
|
||||
<td>https://sushi.ski/</td>
|
||||
<td>Favorites, Images from Notes, User Profiles</td>
|
||||
<td>Favorites, Followed Users, Images from Notes, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
|
||||
@ -1477,19 +1477,19 @@ Consider all sites to be NSFW unless otherwise known.
|
||||
<tr>
|
||||
<td>mastodon.social</td>
|
||||
<td>https://mastodon.social/</td>
|
||||
<td>Bookmarks, Images from Statuses, User Profiles</td>
|
||||
<td>Bookmarks, Followed Users, Images from Statuses, User Profiles</td>
|
||||
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Pawoo</td>
|
||||
<td>https://pawoo.net/</td>
|
||||
<td>Bookmarks, Images from Statuses, User Profiles</td>
|
||||
<td>Bookmarks, Followed Users, Images from Statuses, User Profiles</td>
|
||||
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>baraag</td>
|
||||
<td>https://baraag.net/</td>
|
||||
<td>Bookmarks, Images from Statuses, User Profiles</td>
|
||||
<td>Bookmarks, Followed Users, Images from Statuses, User Profiles</td>
|
||||
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
|
||||
</tr>
|
||||
|
||||
|
@ -196,16 +196,15 @@ def main():
|
||||
|
||||
elif args.list_extractors:
|
||||
write = sys.stdout.write
|
||||
fmt = "{}\n{}\nCategory: {} - Subcategory: {}{}\n\n".format
|
||||
fmt = ("{}{}\nCategory: {} - Subcategory: {}"
|
||||
"\nExample : {}\n\n").format
|
||||
|
||||
for extr in extractor.extractors():
|
||||
if not extr.__doc__:
|
||||
continue
|
||||
test = next(extr._get_tests(), None)
|
||||
write(fmt(
|
||||
extr.__name__, extr.__doc__,
|
||||
extr.__name__,
|
||||
"\n" + extr.__doc__ if extr.__doc__ else "",
|
||||
extr.category, extr.subcategory,
|
||||
"\nExample : " + test[0] if test else "",
|
||||
extr.example,
|
||||
))
|
||||
|
||||
elif args.clear_cache:
|
||||
@ -297,7 +296,7 @@ def main():
|
||||
return retval
|
||||
|
||||
except KeyboardInterrupt:
|
||||
sys.exit("\nKeyboardInterrupt")
|
||||
raise SystemExit("\nKeyboardInterrupt")
|
||||
except BrokenPipeError:
|
||||
pass
|
||||
except OSError as exc:
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2017-2019 Mike Fährmann
|
||||
# Copyright 2017-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -17,4 +17,4 @@ if __package__ is None and not hasattr(sys, "frozen"):
|
||||
import gallery_dl
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(gallery_dl.main())
|
||||
raise SystemExit(gallery_dl.main())
|
||||
|
@ -9,7 +9,6 @@
|
||||
""" """
|
||||
|
||||
import re
|
||||
import sys
|
||||
import logging
|
||||
import operator
|
||||
from . import util, exception
|
||||
@ -98,7 +97,7 @@ def action_exit(opts):
|
||||
pass
|
||||
|
||||
def _exit(args):
|
||||
sys.exit(opts)
|
||||
raise SystemExit(opts)
|
||||
return _exit
|
||||
|
||||
|
||||
|
@ -100,12 +100,12 @@ def load(files=None, strict=False, loads=util.json_loads):
|
||||
except OSError as exc:
|
||||
if strict:
|
||||
log.error(exc)
|
||||
sys.exit(1)
|
||||
raise SystemExit(1)
|
||||
except Exception as exc:
|
||||
log.error("%s when loading '%s': %s",
|
||||
exc.__class__.__name__, path, exc)
|
||||
if strict:
|
||||
sys.exit(2)
|
||||
raise SystemExit(2)
|
||||
else:
|
||||
if not _config:
|
||||
_config.update(conf)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2017-2022 Mike Fährmann
|
||||
# Copyright 2017-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -20,26 +20,8 @@ class _2chanThreadExtractor(Extractor):
|
||||
filename_fmt = "{tim}.{extension}"
|
||||
archive_fmt = "{board}_{thread}_{tim}"
|
||||
url_fmt = "https://{server}.2chan.net/{board}/src/{filename}"
|
||||
pattern = r"(?:https?://)?([\w-]+)\.2chan\.net/([^/]+)/res/(\d+)"
|
||||
test = ("https://dec.2chan.net/70/res/14565.htm", {
|
||||
"pattern": r"https://dec\.2chan\.net/70/src/\d{13}\.jpg",
|
||||
"count": ">= 3",
|
||||
"keyword": {
|
||||
"board": "70",
|
||||
"board_name": "新板提案",
|
||||
"com": str,
|
||||
"fsize": r"re:\d+",
|
||||
"name": "名無し",
|
||||
"no": r"re:1[45]\d\d\d",
|
||||
"now": r"re:22/../..\(.\)..:..:..",
|
||||
"post": "無題",
|
||||
"server": "dec",
|
||||
"thread": "14565",
|
||||
"tim": r"re:^\d{13}$",
|
||||
"time": r"re:^\d{10}$",
|
||||
"title": "ヒロアカ板"
|
||||
},
|
||||
})
|
||||
pattern = r"(?:https?://)?([\w-]+)\.2chan\.net/([^/?#]+)/res/(\d+)"
|
||||
example = "https://dec.2chan.net/12/res/12345.htm"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -21,26 +21,7 @@ class _2chenThreadExtractor(Extractor):
|
||||
filename_fmt = "{time} {filename}.{extension}"
|
||||
archive_fmt = "{board}_{thread}_{hash}_{time}"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/(\d+)"
|
||||
test = (
|
||||
("https://sturdychan.help/tv/268929", {
|
||||
"pattern": r"https://sturdychan\.help/assets/images"
|
||||
r"/src/\w{40}\.\w+$",
|
||||
"count": ">= 179",
|
||||
"keyword": {
|
||||
"board": "tv",
|
||||
"date": "type:datetime",
|
||||
"hash": r"re:[0-9a-f]{40}",
|
||||
"name": "Anonymous",
|
||||
"no": r"re:\d+",
|
||||
"thread": "268929",
|
||||
"time": int,
|
||||
"title": "「/ttg/ #118: 🇧🇷 edition」",
|
||||
"url": str,
|
||||
},
|
||||
}),
|
||||
("https://2chen.club/tv/1"),
|
||||
("https://2chen.moe/jp/303786"),
|
||||
)
|
||||
example = "https://sturdychan.help/a/12345/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
@ -101,14 +82,7 @@ class _2chenBoardExtractor(Extractor):
|
||||
subcategory = "board"
|
||||
root = "https://sturdychan.help"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/catalog|/?$)"
|
||||
test = (
|
||||
("https://sturdychan.help/co/", {
|
||||
"pattern": _2chenThreadExtractor.pattern
|
||||
}),
|
||||
("https://2chen.moe/co"),
|
||||
("https://2chen.club/tv"),
|
||||
("https://2chen.moe/co/catalog"),
|
||||
)
|
||||
example = "https://sturdychan.help/a/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019-2022 Mike Fährmann
|
||||
# Copyright 2019-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -101,20 +101,7 @@ class _35photoUserExtractor(_35photoExtractor):
|
||||
subcategory = "user"
|
||||
pattern = (r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro"
|
||||
r"/(?!photo_|genre_|tags/|rating/)([^/?#]+)")
|
||||
test = (
|
||||
("https://35photo.pro/liya", {
|
||||
"pattern": r"https://([a-z][0-9]\.)?35photo\.pro"
|
||||
r"/photos_(main|series)/.*\.jpg",
|
||||
"count": 9,
|
||||
}),
|
||||
("https://35photo.pro/suhoveev", {
|
||||
# last photo ID (1267028) isn't given as 'photo-id="<id>"
|
||||
# there are only 23 photos without the last one
|
||||
"count": ">= 33",
|
||||
}),
|
||||
("https://en.35photo.pro/liya"),
|
||||
("https://ru.35photo.pro/liya"),
|
||||
)
|
||||
example = "https://35photo.pro/USER"
|
||||
|
||||
def __init__(self, match):
|
||||
_35photoExtractor.__init__(self, match)
|
||||
@ -143,11 +130,7 @@ class _35photoTagExtractor(_35photoExtractor):
|
||||
directory_fmt = ("{category}", "Tags", "{search_tag}")
|
||||
archive_fmt = "t{search_tag}_{id}_{num}"
|
||||
pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/tags/([^/?#]+)"
|
||||
test = ("https://35photo.pro/tags/landscape/", {
|
||||
"range": "1-25",
|
||||
"count": 25,
|
||||
"archive": False,
|
||||
})
|
||||
example = "https://35photo.pro/tags/TAG/"
|
||||
|
||||
def __init__(self, match):
|
||||
_35photoExtractor.__init__(self, match)
|
||||
@ -180,7 +163,7 @@ class _35photoGenreExtractor(_35photoExtractor):
|
||||
directory_fmt = ("{category}", "Genre", "{genre}")
|
||||
archive_fmt = "g{genre_id}_{id}_{num}"
|
||||
pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/genre_(\d+)(/new/)?"
|
||||
test = ("https://35photo.pro/genre_109/",)
|
||||
example = "https://35photo.pro/genre_12345/"
|
||||
|
||||
def __init__(self, match):
|
||||
_35photoExtractor.__init__(self, match)
|
||||
@ -212,24 +195,7 @@ class _35photoImageExtractor(_35photoExtractor):
|
||||
"""Extractor for individual images from 35photo.pro"""
|
||||
subcategory = "image"
|
||||
pattern = r"(?:https?://)?(?:[a-z]+\.)?35photo\.pro/photo_(\d+)"
|
||||
test = ("https://35photo.pro/photo_753340/", {
|
||||
"count": 1,
|
||||
"keyword": {
|
||||
"url" : r"re:https://35photo\.pro/photos_main/.*\.jpg",
|
||||
"id" : 753340,
|
||||
"title" : "Winter walk",
|
||||
"description": str,
|
||||
"tags" : list,
|
||||
"views" : int,
|
||||
"favorites" : int,
|
||||
"score" : int,
|
||||
"type" : 0,
|
||||
"date" : "15 авг, 2014",
|
||||
"user" : "liya",
|
||||
"user_id" : 20415,
|
||||
"user_name" : "Liya Mirzaeva",
|
||||
},
|
||||
})
|
||||
example = "https://35photo.pro/photo_12345/"
|
||||
|
||||
def __init__(self, match):
|
||||
_35photoExtractor.__init__(self, match)
|
||||
|
@ -27,10 +27,7 @@ class _3dbooruTagExtractor(_3dbooruBase, moebooru.MoebooruTagExtractor):
|
||||
"""Extractor for images from behoimi.org based on search-tags"""
|
||||
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org/post"
|
||||
r"(?:/(?:index)?)?\?tags=(?P<tags>[^&#]+)")
|
||||
test = ("http://behoimi.org/post?tags=himekawa_azuru+dress", {
|
||||
"url": "ecb30c6aaaf8a6ff8f55255737a9840832a483c1",
|
||||
"content": "11cbda40c287e026c1ce4ca430810f761f2d0b2a",
|
||||
})
|
||||
example = "http://behoimi.org/post?tags=TAG"
|
||||
|
||||
def posts(self):
|
||||
params = {"tags": self.tags}
|
||||
@ -40,10 +37,7 @@ class _3dbooruTagExtractor(_3dbooruBase, moebooru.MoebooruTagExtractor):
|
||||
class _3dbooruPoolExtractor(_3dbooruBase, moebooru.MoebooruPoolExtractor):
|
||||
"""Extractor for image-pools from behoimi.org"""
|
||||
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(?P<pool>\d+)"
|
||||
test = ("http://behoimi.org/pool/show/27", {
|
||||
"url": "da75d2d1475449d5ef0c266cb612683b110a30f2",
|
||||
"content": "fd5b37c5c6c2de4b4d6f1facffdefa1e28176554",
|
||||
})
|
||||
example = "http://behoimi.org/pool/show/12345"
|
||||
|
||||
def posts(self):
|
||||
params = {"tags": "pool:" + self.pool_id}
|
||||
@ -53,17 +47,7 @@ class _3dbooruPoolExtractor(_3dbooruBase, moebooru.MoebooruPoolExtractor):
|
||||
class _3dbooruPostExtractor(_3dbooruBase, moebooru.MoebooruPostExtractor):
|
||||
"""Extractor for single images from behoimi.org"""
|
||||
pattern = r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(?P<post>\d+)"
|
||||
test = ("http://behoimi.org/post/show/140852", {
|
||||
"url": "ce874ea26f01d6c94795f3cc3aaaaa9bc325f2f6",
|
||||
"content": "26549d55b82aa9a6c1686b96af8bfcfa50805cd4",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_character": "furude_rika",
|
||||
"tags_copyright": "higurashi_no_naku_koro_ni",
|
||||
"tags_model": "himekawa_azuru",
|
||||
"tags_general": str,
|
||||
},
|
||||
})
|
||||
example = "http://behoimi.org/post/show/12345"
|
||||
|
||||
def posts(self):
|
||||
params = {"tags": "id:" + self.post_id}
|
||||
@ -76,7 +60,4 @@ class _3dbooruPopularExtractor(
|
||||
pattern = (r"(?:https?://)?(?:www\.)?behoimi\.org"
|
||||
r"/post/popular_(?P<scale>by_(?:day|week|month)|recent)"
|
||||
r"(?:\?(?P<query>[^#]*))?")
|
||||
test = ("http://behoimi.org/post/popular_by_month?month=2&year=2013", {
|
||||
"pattern": r"http://behoimi\.org/data/../../[0-9a-f]{32}\.jpg",
|
||||
"count": 20,
|
||||
})
|
||||
example = "http://behoimi.org/post/popular_by_month"
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015-2019 Mike Fährmann
|
||||
# Copyright 2015-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -21,17 +21,7 @@ class _4chanThreadExtractor(Extractor):
|
||||
archive_fmt = "{board}_{thread}_{tim}"
|
||||
pattern = (r"(?:https?://)?boards\.4chan(?:nel)?\.org"
|
||||
r"/([^/]+)/thread/(\d+)")
|
||||
test = (
|
||||
("https://boards.4chan.org/tg/thread/15396072/", {
|
||||
"url": "39082ad166161966d7ba8e37f2173a824eb540f0",
|
||||
"keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a",
|
||||
"content": "20b7b51afa51c9c31a0020a0737b889532c8d7ec",
|
||||
}),
|
||||
("https://boards.4channel.org/tg/thread/15396072/", {
|
||||
"url": "39082ad166161966d7ba8e37f2173a824eb540f0",
|
||||
"keyword": "7ae2f4049adf0d2f835eb91b6b26b7f4ec882e0a",
|
||||
}),
|
||||
)
|
||||
example = "https://boards.4channel.org/a/thread/12345/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
@ -65,10 +55,7 @@ class _4chanBoardExtractor(Extractor):
|
||||
category = "4chan"
|
||||
subcategory = "board"
|
||||
pattern = r"(?:https?://)?boards\.4chan(?:nel)?\.org/([^/?#]+)/\d*$"
|
||||
test = ("https://boards.4channel.org/po/", {
|
||||
"pattern": _4chanThreadExtractor.pattern,
|
||||
"count": ">= 100",
|
||||
})
|
||||
example = "https://boards.4channel.org/a/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -21,21 +21,7 @@ class _4chanarchivesThreadExtractor(Extractor):
|
||||
filename_fmt = "{no}-{filename}.{extension}"
|
||||
archive_fmt = "{board}_{thread}_{no}"
|
||||
pattern = r"(?:https?://)?4chanarchives\.com/board/([^/?#]+)/thread/(\d+)"
|
||||
test = (
|
||||
("https://4chanarchives.com/board/c/thread/2707110", {
|
||||
"pattern": r"https://i\.imgur\.com/(0wLGseE|qbByWDc)\.jpg",
|
||||
"count": 2,
|
||||
"keyword": {
|
||||
"board": "c",
|
||||
"com": str,
|
||||
"name": "Anonymous",
|
||||
"no": int,
|
||||
"thread": "2707110",
|
||||
"time": r"re:2016-07-1\d \d\d:\d\d:\d\d",
|
||||
"title": "Ren Kagami from 'Oyako Neburi'",
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://4chanarchives.com/board/a/thread/12345/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
@ -106,15 +92,7 @@ class _4chanarchivesBoardExtractor(Extractor):
|
||||
subcategory = "board"
|
||||
root = "https://4chanarchives.com"
|
||||
pattern = r"(?:https?://)?4chanarchives\.com/board/([^/?#]+)(?:/(\d+))?/?$"
|
||||
test = (
|
||||
("https://4chanarchives.com/board/c/", {
|
||||
"pattern": _4chanarchivesThreadExtractor.pattern,
|
||||
"range": "1-40",
|
||||
"count": 40,
|
||||
}),
|
||||
("https://4chanarchives.com/board/c"),
|
||||
("https://4chanarchives.com/board/c/10"),
|
||||
)
|
||||
example = "https://4chanarchives.com/board/a/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -23,9 +23,6 @@ class _500pxExtractor(Extractor):
|
||||
root = "https://500px.com"
|
||||
cookies_domain = ".500px.com"
|
||||
|
||||
def _init(self):
|
||||
self.session.headers["Referer"] = self.root + "/"
|
||||
|
||||
def items(self):
|
||||
data = self.metadata()
|
||||
|
||||
@ -96,15 +93,7 @@ class _500pxUserExtractor(_500pxExtractor):
|
||||
"""Extractor for photos from a user's photostream on 500px.com"""
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/(?!photo/|liked)(?:p/)?([^/?#]+)/?(?:$|[?#])"
|
||||
test = (
|
||||
("https://500px.com/p/light_expression_photography", {
|
||||
"pattern": r"https?://drscdn.500px.org/photo/\d+/m%3D4096/v2",
|
||||
"range": "1-99",
|
||||
"count": 99,
|
||||
}),
|
||||
("https://500px.com/light_expression_photography"),
|
||||
("https://web.500px.com/light_expression_photography"),
|
||||
)
|
||||
example = "https://500px.com/USER"
|
||||
|
||||
def __init__(self, match):
|
||||
_500pxExtractor.__init__(self, match)
|
||||
@ -134,17 +123,7 @@ class _500pxGalleryExtractor(_500pxExtractor):
|
||||
directory_fmt = ("{category}", "{user[username]}", "{gallery[name]}")
|
||||
pattern = (BASE_PATTERN + r"/(?!photo/)(?:p/)?"
|
||||
r"([^/?#]+)/galleries/([^/?#]+)")
|
||||
test = (
|
||||
("https://500px.com/p/fashvamp/galleries/lera", {
|
||||
"url": "002dc81dee5b4a655f0e31ad8349e8903b296df6",
|
||||
"count": 3,
|
||||
"keyword": {
|
||||
"gallery": dict,
|
||||
"user": dict,
|
||||
},
|
||||
}),
|
||||
("https://500px.com/fashvamp/galleries/lera"),
|
||||
)
|
||||
example = "https://500px.com/USER/galleries/GALLERY"
|
||||
|
||||
def __init__(self, match):
|
||||
_500pxExtractor.__init__(self, match)
|
||||
@ -200,7 +179,7 @@ class _500pxFavoriteExtractor(_500pxExtractor):
|
||||
"""Extractor for favorite 500px photos"""
|
||||
subcategory = "favorite"
|
||||
pattern = BASE_PATTERN + r"/liked/?$"
|
||||
test = ("https://500px.com/liked",)
|
||||
example = "https://500px.com/liked"
|
||||
|
||||
def photos(self):
|
||||
variables = {"pageSize": 20}
|
||||
@ -224,50 +203,7 @@ class _500pxImageExtractor(_500pxExtractor):
|
||||
"""Extractor for individual images from 500px.com"""
|
||||
subcategory = "image"
|
||||
pattern = BASE_PATTERN + r"/photo/(\d+)"
|
||||
test = ("https://500px.com/photo/222049255/queen-of-coasts", {
|
||||
"url": "fbdf7df39325cae02f5688e9f92935b0e7113315",
|
||||
"count": 1,
|
||||
"keyword": {
|
||||
"camera": "Canon EOS 600D",
|
||||
"camera_info": dict,
|
||||
"comments": list,
|
||||
"comments_count": int,
|
||||
"created_at": "2017-08-01T08:40:05+00:00",
|
||||
"description": str,
|
||||
"editored_by": None,
|
||||
"editors_choice": False,
|
||||
"extension": "jpg",
|
||||
"feature": "popular",
|
||||
"feature_date": "2017-08-01T09:58:28+00:00",
|
||||
"focal_length": "208",
|
||||
"height": 3111,
|
||||
"id": 222049255,
|
||||
"image_format": "jpg",
|
||||
"image_url": list,
|
||||
"images": list,
|
||||
"iso": "100",
|
||||
"lens": "EF-S55-250mm f/4-5.6 IS II",
|
||||
"lens_info": dict,
|
||||
"liked": None,
|
||||
"location": None,
|
||||
"location_details": dict,
|
||||
"name": "Queen Of Coasts",
|
||||
"nsfw": False,
|
||||
"privacy": False,
|
||||
"profile": True,
|
||||
"rating": float,
|
||||
"status": 1,
|
||||
"tags": list,
|
||||
"taken_at": "2017-05-04T17:36:51+00:00",
|
||||
"times_viewed": int,
|
||||
"url": "/photo/222049255/Queen-Of-Coasts-by-Alice-Nabieva",
|
||||
"user": dict,
|
||||
"user_id": 12847235,
|
||||
"votes_count": int,
|
||||
"watermark": True,
|
||||
"width": 4637,
|
||||
},
|
||||
})
|
||||
example = "https://500px.com/photo/12345/TITLE"
|
||||
|
||||
def __init__(self, match):
|
||||
_500pxExtractor.__init__(self, match)
|
||||
|
@ -57,48 +57,7 @@ class _8chanThreadExtractor(_8chanExtractor):
|
||||
filename_fmt = "{postId}{num:?-//} {filename[:200]}.{extension}"
|
||||
archive_fmt = "{boardUri}_{postId}_{num}"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
|
||||
test = (
|
||||
("https://8chan.moe/vhs/res/4.html", {
|
||||
"pattern": r"https://8chan\.moe/\.media/[0-9a-f]{64}\.\w+$",
|
||||
"count": 14,
|
||||
"keyword": {
|
||||
"archived": False,
|
||||
"autoSage": False,
|
||||
"boardDescription": "Film and Cinema",
|
||||
"boardMarkdown": None,
|
||||
"boardName": "Movies",
|
||||
"boardUri": "vhs",
|
||||
"creation": r"re:\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}Z",
|
||||
"cyclic": False,
|
||||
"email": None,
|
||||
"id": "re:^[0-9a-f]{6}$",
|
||||
"locked": False,
|
||||
"markdown": str,
|
||||
"maxFileCount": 5,
|
||||
"maxFileSize": "32.00 MB",
|
||||
"maxMessageLength": 8001,
|
||||
"message": str,
|
||||
"mime": str,
|
||||
"name": "Anonymous",
|
||||
"num": int,
|
||||
"originalName": str,
|
||||
"path": r"re:/.media/[0-9a-f]{64}\.\w+$",
|
||||
"pinned": False,
|
||||
"postId": int,
|
||||
"signedRole": None,
|
||||
"size": int,
|
||||
"threadId": 4,
|
||||
"thumb": r"re:/.media/t_[0-9a-f]{64}$",
|
||||
"uniquePosters": 9,
|
||||
"usesCustomCss": True,
|
||||
"usesCustomJs": False,
|
||||
"?wsPort": 8880,
|
||||
"?wssPort": 2087,
|
||||
},
|
||||
}),
|
||||
("https://8chan.se/vhs/res/4.html"),
|
||||
("https://8chan.cc/vhs/res/4.html"),
|
||||
)
|
||||
example = "https://8chan.moe/a/res/12345.html"
|
||||
|
||||
def __init__(self, match):
|
||||
_8chanExtractor.__init__(self, match)
|
||||
@ -137,23 +96,12 @@ class _8chanBoardExtractor(_8chanExtractor):
|
||||
"""Extractor for 8chan boards"""
|
||||
subcategory = "board"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/(?:(\d+)\.html)?$"
|
||||
test = (
|
||||
("https://8chan.moe/vhs/"),
|
||||
("https://8chan.moe/vhs/2.html", {
|
||||
"pattern": _8chanThreadExtractor.pattern,
|
||||
"count": 23,
|
||||
}),
|
||||
("https://8chan.se/vhs/"),
|
||||
("https://8chan.cc/vhs/"),
|
||||
)
|
||||
example = "https://8chan.moe/a/"
|
||||
|
||||
def __init__(self, match):
|
||||
_8chanExtractor.__init__(self, match)
|
||||
_, self.board, self.page = match.groups()
|
||||
|
||||
def _init(self):
|
||||
self.session.headers["Referer"] = self.root + "/"
|
||||
|
||||
def items(self):
|
||||
page = text.parse_int(self.page, 1)
|
||||
url = "{}/{}/{}.json".format(self.root, self.board, page)
|
||||
|
@ -22,51 +22,7 @@ class _8musesAlbumExtractor(Extractor):
|
||||
root = "https://comics.8muses.com"
|
||||
pattern = (r"(?:https?://)?(?:comics\.|www\.)?8muses\.com"
|
||||
r"(/comics/album/[^?#]+)(\?[^#]+)?")
|
||||
test = (
|
||||
("https://comics.8muses.com/comics/album/Fakku-Comics/mogg/Liar", {
|
||||
"url": "6286ac33087c236c5a7e51f8a9d4e4d5548212d4",
|
||||
"pattern": r"https://comics.8muses.com/image/fl/[\w-]+",
|
||||
"keyword": {
|
||||
"url" : str,
|
||||
"hash" : str,
|
||||
"page" : int,
|
||||
"count": 6,
|
||||
"album": {
|
||||
"id" : 10467,
|
||||
"title" : "Liar",
|
||||
"path" : "Fakku Comics/mogg/Liar",
|
||||
"parts" : ["Fakku Comics", "mogg", "Liar"],
|
||||
"private": False,
|
||||
"url" : "https://comics.8muses.com/comics"
|
||||
"/album/Fakku-Comics/mogg/Liar",
|
||||
"parent" : 10464,
|
||||
"views" : int,
|
||||
"likes" : int,
|
||||
"date" : "dt:2018-07-10 00:00:00",
|
||||
},
|
||||
},
|
||||
}),
|
||||
("https://www.8muses.com/comics/album/Fakku-Comics/santa", {
|
||||
"count": ">= 3",
|
||||
"pattern": pattern,
|
||||
"keyword": {
|
||||
"url" : str,
|
||||
"name" : str,
|
||||
"private": False,
|
||||
},
|
||||
}),
|
||||
# custom sorting
|
||||
("https://www.8muses.com/comics/album/Fakku-Comics/11?sort=az", {
|
||||
"count": ">= 70",
|
||||
"keyword": {"name": r"re:^[R-Zr-z]"},
|
||||
}),
|
||||
# non-ASCII characters
|
||||
(("https://comics.8muses.com/comics/album/Various-Authors/Chessire88"
|
||||
"/From-Trainers-to-Pokmons"), {
|
||||
"count": 2,
|
||||
"keyword": {"name": "re:From Trainers to Pokémons"},
|
||||
}),
|
||||
)
|
||||
example = "https://comics.8muses.com/comics/album/PATH/TITLE"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -50,7 +50,6 @@ modules = [
|
||||
"gelbooru",
|
||||
"gelbooru_v01",
|
||||
"gelbooru_v02",
|
||||
"gfycat",
|
||||
"gofile",
|
||||
"hbrowse",
|
||||
"hentai2read",
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019 Mike Fährmann
|
||||
# Copyright 2019-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -18,17 +18,7 @@ class AdultempireGalleryExtractor(GalleryExtractor):
|
||||
root = "https://www.adultempire.com"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?adult(?:dvd)?empire\.com"
|
||||
r"(/(\d+)/gallery\.html)")
|
||||
test = (
|
||||
("https://www.adultempire.com/5998/gallery.html", {
|
||||
"range": "1",
|
||||
"keyword": "5b3266e69801db0d78c22181da23bc102886e027",
|
||||
"content": "5c6beb31e5e3cdc90ee5910d5c30f9aaec977b9e",
|
||||
}),
|
||||
("https://www.adultdvdempire.com/5683/gallery.html", {
|
||||
"url": "b12cd1a65cae8019d837505adb4d6a2c1ed4d70d",
|
||||
"keyword": "8d448d79c4ac5f5b10a3019d5b5129ddb43655e5",
|
||||
}),
|
||||
)
|
||||
example = "https://www.adultempire.com/12345/gallery.html"
|
||||
|
||||
def __init__(self, match):
|
||||
GalleryExtractor.__init__(self, match)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2021 Mike Fährmann
|
||||
# Copyright 2021-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -21,25 +21,7 @@ class ArchitizerProjectExtractor(GalleryExtractor):
|
||||
filename_fmt = "{filename}.{extension}"
|
||||
archive_fmt = "{gid}_{num}"
|
||||
pattern = r"(?:https?://)?architizer\.com/projects/([^/?#]+)"
|
||||
test = ("https://architizer.com/projects/house-lo/", {
|
||||
"pattern": r"https://architizer-prod\.imgix\.net/media/mediadata"
|
||||
r"/uploads/.+\.jpg$",
|
||||
"keyword": {
|
||||
"count": 27,
|
||||
"description": str,
|
||||
"firm": "Atelier Lina Bellovicova",
|
||||
"gid": "225496",
|
||||
"location": "Czechia",
|
||||
"num": int,
|
||||
"size": "1000 sqft - 3000 sqft",
|
||||
"slug": "house-lo",
|
||||
"status": "Built",
|
||||
"subcategory": "project",
|
||||
"title": "House LO",
|
||||
"type": "Residential › Private House",
|
||||
"year": "2020",
|
||||
},
|
||||
})
|
||||
example = "https://architizer.com/projects/NAME/"
|
||||
|
||||
def __init__(self, match):
|
||||
url = "{}/projects/{}/".format(self.root, match.group(1))
|
||||
@ -47,11 +29,13 @@ class ArchitizerProjectExtractor(GalleryExtractor):
|
||||
|
||||
def metadata(self, page):
|
||||
extr = text.extract_from(page)
|
||||
extr('id="Pages"', "")
|
||||
|
||||
return {
|
||||
"title" : extr("data-name='", "'"),
|
||||
"slug" : extr("data-slug='", "'"),
|
||||
"gid" : extr("data-gid='", "'").rpartition(".")[2],
|
||||
"firm" : extr("data-firm-leaders-str='", "'"),
|
||||
"title" : extr('data-name="', '"'),
|
||||
"slug" : extr('data-slug="', '"'),
|
||||
"gid" : extr('data-gid="', '"').rpartition(".")[2],
|
||||
"firm" : extr('data-firm-leaders-str="', '"'),
|
||||
"location" : extr("<h2>", "<").strip(),
|
||||
"type" : text.unescape(text.remove_html(extr(
|
||||
'<div class="title">Type</div>', '<br'))),
|
||||
@ -70,7 +54,7 @@ class ArchitizerProjectExtractor(GalleryExtractor):
|
||||
return [
|
||||
(url, None)
|
||||
for url in text.extract_iter(
|
||||
page, "property='og:image:secure_url' content='", "?")
|
||||
page, 'property="og:image:secure_url" content="', "?")
|
||||
]
|
||||
|
||||
|
||||
@ -80,10 +64,7 @@ class ArchitizerFirmExtractor(Extractor):
|
||||
subcategory = "firm"
|
||||
root = "https://architizer.com"
|
||||
pattern = r"(?:https?://)?architizer\.com/firms/([^/?#]+)"
|
||||
test = ("https://architizer.com/firms/olson-kundig/", {
|
||||
"pattern": ArchitizerProjectExtractor.pattern,
|
||||
"count": ">= 90",
|
||||
})
|
||||
example = "https://architizer.com/firms/NAME/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -117,7 +117,6 @@ class ArtstationExtractor(Extractor):
|
||||
headers = {
|
||||
"Accept" : "application/json, text/plain, */*",
|
||||
"Origin" : self.root,
|
||||
"Referer": self.root + "/",
|
||||
}
|
||||
|
||||
if json:
|
||||
@ -147,7 +146,6 @@ class ArtstationExtractor(Extractor):
|
||||
headers = {
|
||||
"Accept" : "*/*",
|
||||
"Origin" : self.root,
|
||||
"Referer": self.root + "/",
|
||||
}
|
||||
return self.request(
|
||||
url, method="POST", headers=headers, json={},
|
||||
@ -178,17 +176,7 @@ class ArtstationUserExtractor(ArtstationExtractor):
|
||||
pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
|
||||
r"/(?!artwork|projects|search)([^/?#]+)(?:/albums/all)?"
|
||||
r"|((?!www)\w+)\.artstation\.com(?:/projects)?)/?$")
|
||||
test = (
|
||||
("https://www.artstation.com/sungchoi/", {
|
||||
"pattern": r"https://\w+\.artstation\.com/p/assets/images"
|
||||
r"/images/\d+/\d+/\d+/(4k|large|medium|small)/[^/]+",
|
||||
"range": "1-10",
|
||||
"count": ">= 10",
|
||||
}),
|
||||
("https://www.artstation.com/sungchoi/albums/all/"),
|
||||
("https://sungchoi.artstation.com/"),
|
||||
("https://sungchoi.artstation.com/projects/"),
|
||||
)
|
||||
example = "https://www.artstation.com/USER"
|
||||
|
||||
def projects(self):
|
||||
url = "{}/users/{}/projects.json".format(self.root, self.user)
|
||||
@ -205,15 +193,7 @@ class ArtstationAlbumExtractor(ArtstationExtractor):
|
||||
pattern = (r"(?:https?://)?(?:(?:www\.)?artstation\.com"
|
||||
r"/(?!artwork|projects|search)([^/?#]+)"
|
||||
r"|((?!www)\w+)\.artstation\.com)/albums/(\d+)")
|
||||
test = (
|
||||
("https://www.artstation.com/huimeiye/albums/770899", {
|
||||
"count": 2,
|
||||
}),
|
||||
("https://www.artstation.com/huimeiye/albums/770898", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
("https://huimeiye.artstation.com/albums/770899"),
|
||||
)
|
||||
example = "https://www.artstation.com/USER/albums/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
ArtstationExtractor.__init__(self, match)
|
||||
@ -247,17 +227,7 @@ class ArtstationLikesExtractor(ArtstationExtractor):
|
||||
archive_fmt = "f_{userinfo[id]}_{asset[id]}"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
|
||||
r"/(?!artwork|projects|search)([^/?#]+)/likes/?")
|
||||
test = (
|
||||
("https://www.artstation.com/mikf/likes", {
|
||||
"pattern": r"https://\w+\.artstation\.com/p/assets/images"
|
||||
r"/images/\d+/\d+/\d+/(4k|large|medium|small)/[^/]+",
|
||||
"count": 6,
|
||||
}),
|
||||
# no likes
|
||||
("https://www.artstation.com/sungchoi/likes", {
|
||||
"count": 0,
|
||||
}),
|
||||
)
|
||||
example = "https://www.artstation.com/USER/likes"
|
||||
|
||||
def projects(self):
|
||||
url = "{}/users/{}/likes.json".format(self.root, self.user)
|
||||
@ -274,14 +244,7 @@ class ArtstationChallengeExtractor(ArtstationExtractor):
|
||||
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
|
||||
r"/contests/[^/?#]+/challenges/(\d+)"
|
||||
r"/?(?:\?sorting=([a-z]+))?")
|
||||
test = (
|
||||
("https://www.artstation.com/contests/thu-2017/challenges/20"),
|
||||
(("https://www.artstation.com/contests/beyond-human"
|
||||
"/challenges/23?sorting=winners"), {
|
||||
"range": "1-30",
|
||||
"count": 30,
|
||||
}),
|
||||
)
|
||||
example = "https://www.artstation.com/contests/NAME/challenges/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
ArtstationExtractor.__init__(self, match)
|
||||
@ -327,10 +290,7 @@ class ArtstationSearchExtractor(ArtstationExtractor):
|
||||
archive_fmt = "s_{search[query]}_{asset[id]}"
|
||||
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
|
||||
r"/search/?\?([^#]+)")
|
||||
test = ("https://www.artstation.com/search?query=ancient&sort_by=rank", {
|
||||
"range": "1-20",
|
||||
"count": 20,
|
||||
})
|
||||
example = "https://www.artstation.com/search?query=QUERY"
|
||||
|
||||
def __init__(self, match):
|
||||
ArtstationExtractor.__init__(self, match)
|
||||
@ -377,10 +337,7 @@ class ArtstationArtworkExtractor(ArtstationExtractor):
|
||||
archive_fmt = "A_{asset[id]}"
|
||||
pattern = (r"(?:https?://)?(?:\w+\.)?artstation\.com"
|
||||
r"/artwork/?\?([^#]+)")
|
||||
test = ("https://www.artstation.com/artwork?sorting=latest", {
|
||||
"range": "1-20",
|
||||
"count": 20,
|
||||
})
|
||||
example = "https://www.artstation.com/artwork?sorting=SORT"
|
||||
|
||||
def __init__(self, match):
|
||||
ArtstationExtractor.__init__(self, match)
|
||||
@ -400,32 +357,7 @@ class ArtstationImageExtractor(ArtstationExtractor):
|
||||
pattern = (r"(?:https?://)?(?:"
|
||||
r"(?:\w+\.)?artstation\.com/(?:artwork|projects|search)"
|
||||
r"|artstn\.co/p)/(\w+)")
|
||||
test = (
|
||||
("https://www.artstation.com/artwork/LQVJr", {
|
||||
"pattern": r"https?://\w+\.artstation\.com/p/assets"
|
||||
r"/images/images/008/760/279/4k/.+",
|
||||
"content": "7b113871465fdc09d127adfdc2767d51cf45a7e9",
|
||||
# SHA1 hash without _no_cache()
|
||||
# "content": "44b80f9af36d40efc5a2668cdd11d36d6793bae9",
|
||||
}),
|
||||
# multiple images per project
|
||||
("https://www.artstation.com/artwork/Db3dy", {
|
||||
"count": 4,
|
||||
}),
|
||||
# embedded youtube video
|
||||
("https://www.artstation.com/artwork/g4WPK", {
|
||||
"range": "2",
|
||||
"options": (("external", True),),
|
||||
"pattern": "ytdl:https://www.youtube.com/embed/JNFfJtwwrU0",
|
||||
}),
|
||||
# 404 (#3016)
|
||||
("https://www.artstation.com/artwork/3q3mXB", {
|
||||
"count": 0,
|
||||
}),
|
||||
# alternate URL patterns
|
||||
("https://sungchoi.artstation.com/projects/LQVJr"),
|
||||
("https://artstn.co/p/LQVJr"),
|
||||
)
|
||||
example = "https://www.artstation.com/artwork/abcde"
|
||||
|
||||
def __init__(self, match):
|
||||
ArtstationExtractor.__init__(self, match)
|
||||
@ -453,10 +385,7 @@ class ArtstationFollowingExtractor(ArtstationExtractor):
|
||||
subcategory = "following"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?artstation\.com"
|
||||
r"/(?!artwork|projects|search)([^/?#]+)/following")
|
||||
test = ("https://www.artstation.com/sungchoi/following", {
|
||||
"pattern": ArtstationUserExtractor.pattern,
|
||||
"count": ">= 50",
|
||||
})
|
||||
example = "https://www.artstation.com/USER/following"
|
||||
|
||||
def items(self):
|
||||
url = "{}/users/{}/following.json".format(self.root, self.user)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2020-2022 Mike Fährmann
|
||||
# Copyright 2020-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -176,16 +176,7 @@ class AryionGalleryExtractor(AryionExtractor):
|
||||
subcategory = "gallery"
|
||||
categorytransfer = True
|
||||
pattern = BASE_PATTERN + r"/(?:gallery/|user/|latest.php\?name=)([^/?#]+)"
|
||||
test = (
|
||||
("https://aryion.com/g4/gallery/jameshoward", {
|
||||
"options": (("recursive", False),),
|
||||
"pattern": r"https://aryion\.com/g4/data\.php\?id=\d+$",
|
||||
"range": "48-52",
|
||||
"count": 5,
|
||||
}),
|
||||
("https://aryion.com/g4/user/jameshoward"),
|
||||
("https://aryion.com/g4/latest.php?name=jameshoward"),
|
||||
)
|
||||
example = "https://aryion.com/g4/gallery/USER"
|
||||
|
||||
def __init__(self, match):
|
||||
AryionExtractor.__init__(self, match)
|
||||
@ -215,9 +206,7 @@ class AryionTagExtractor(AryionExtractor):
|
||||
directory_fmt = ("{category}", "tags", "{search_tags}")
|
||||
archive_fmt = "t_{search_tags}_{id}"
|
||||
pattern = BASE_PATTERN + r"/tags\.php\?([^#]+)"
|
||||
test = ("https://aryion.com/g4/tags.php?tag=star+wars&p=19", {
|
||||
"count": ">= 5",
|
||||
})
|
||||
example = "https://aryion.com/g4/tags.php?tag=TAG"
|
||||
|
||||
def _init(self):
|
||||
self.params = text.parse_query(self.user)
|
||||
@ -235,40 +224,7 @@ class AryionPostExtractor(AryionExtractor):
|
||||
"""Extractor for individual posts on eka's portal"""
|
||||
subcategory = "post"
|
||||
pattern = BASE_PATTERN + r"/view/(\d+)"
|
||||
test = (
|
||||
("https://aryion.com/g4/view/510079", {
|
||||
"url": "f233286fa5558c07ae500f7f2d5cb0799881450e",
|
||||
"keyword": {
|
||||
"artist" : "jameshoward",
|
||||
"user" : "jameshoward",
|
||||
"filename" : "jameshoward-510079-subscribestar_150",
|
||||
"extension": "jpg",
|
||||
"id" : 510079,
|
||||
"width" : 1665,
|
||||
"height" : 1619,
|
||||
"size" : 784239,
|
||||
"title" : "I'm on subscribestar now too!",
|
||||
"description": r"re:Doesn't hurt to have a backup, right\?",
|
||||
"tags" : ["Non-Vore", "subscribestar"],
|
||||
"date" : "dt:2019-02-16 19:30:34",
|
||||
"path" : [],
|
||||
"views" : int,
|
||||
"favorites": int,
|
||||
"comments" : int,
|
||||
"_mtime" : "Sat, 16 Feb 2019 19:30:34 GMT",
|
||||
},
|
||||
}),
|
||||
# x-folder (#694)
|
||||
("https://aryion.com/g4/view/588928", {
|
||||
"pattern": pattern,
|
||||
"count": ">= 8",
|
||||
}),
|
||||
# x-comic-folder (#945)
|
||||
("https://aryion.com/g4/view/537379", {
|
||||
"pattern": pattern,
|
||||
"count": 2,
|
||||
}),
|
||||
)
|
||||
example = "https://aryion.com/g4/view/12345"
|
||||
|
||||
def posts(self):
|
||||
post_id, self.user = self.user, None
|
||||
|
@ -23,18 +23,7 @@ class BbcGalleryExtractor(GalleryExtractor):
|
||||
filename_fmt = "{num:>02}.{extension}"
|
||||
archive_fmt = "{programme}_{num}"
|
||||
pattern = BASE_PATTERN + r"[^/?#]+(?!/galleries)(?:/[^/?#]+)?)$"
|
||||
test = (
|
||||
("https://www.bbc.co.uk/programmes/p084qtzs/p085g9kg", {
|
||||
"pattern": r"https://ichef\.bbci\.co\.uk"
|
||||
r"/images/ic/1920xn/\w+\.jpg",
|
||||
"count": 37,
|
||||
"keyword": {
|
||||
"programme": "p084qtzs",
|
||||
"path": ["BBC One", "Doctor Who", "The Timeless Children"],
|
||||
},
|
||||
}),
|
||||
("https://www.bbc.co.uk/programmes/p084qtzs"),
|
||||
)
|
||||
example = "https://www.bbc.co.uk/programmes/PATH"
|
||||
|
||||
def metadata(self, page):
|
||||
data = util.json_loads(text.extr(
|
||||
@ -72,17 +61,7 @@ class BbcProgrammeExtractor(Extractor):
|
||||
subcategory = "programme"
|
||||
root = "https://www.bbc.co.uk"
|
||||
pattern = BASE_PATTERN + r"[^/?#]+/galleries)(?:/?\?page=(\d+))?"
|
||||
test = (
|
||||
("https://www.bbc.co.uk/programmes/b006q2x0/galleries", {
|
||||
"pattern": BbcGalleryExtractor.pattern,
|
||||
"range": "1-50",
|
||||
"count": ">= 50",
|
||||
}),
|
||||
("https://www.bbc.co.uk/programmes/b006q2x0/galleries?page=40", {
|
||||
"pattern": BbcGalleryExtractor.pattern,
|
||||
"count": ">= 100",
|
||||
}),
|
||||
)
|
||||
example = "https://www.bbc.co.uk/programmes/ID/galleries"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -35,9 +35,8 @@ class BehanceExtractor(Extractor):
|
||||
def _request_graphql(self, endpoint, variables):
|
||||
url = self.root + "/v3/graphql"
|
||||
headers = {
|
||||
"Origin" : self.root,
|
||||
"Referer": self.root + "/",
|
||||
"X-BCP" : self._bcp,
|
||||
"Origin": self.root,
|
||||
"X-BCP" : self._bcp,
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
}
|
||||
data = {
|
||||
@ -84,43 +83,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
|
||||
filename_fmt = "{category}_{id}_{num:>02}.{extension}"
|
||||
archive_fmt = "{id}_{num}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?behance\.net/gallery/(\d+)"
|
||||
test = (
|
||||
("https://www.behance.net/gallery/17386197/A-Short-Story", {
|
||||
"count": 2,
|
||||
"url": "ab79bd3bef8d3ae48e6ac74fd995c1dfaec1b7d2",
|
||||
"keyword": {
|
||||
"id": 17386197,
|
||||
"name": 're:"Hi". A short story about the important things ',
|
||||
"owners": ["Place Studio", "Julio César Velazquez"],
|
||||
"fields": ["Animation", "Character Design", "Directing"],
|
||||
"tags": list,
|
||||
"module": dict,
|
||||
"date": "dt:2014-06-03 15:41:51",
|
||||
},
|
||||
}),
|
||||
("https://www.behance.net/gallery/21324767/Nevada-City", {
|
||||
"count": 6,
|
||||
"url": "0258fe194fe7d828d6f2c7f6086a9a0a4140db1d",
|
||||
"keyword": {"owners": ["Alex Strohl"]},
|
||||
}),
|
||||
# 'media_collection' modules
|
||||
("https://www.behance.net/gallery/88276087/Audi-R8-RWD", {
|
||||
"count": 20,
|
||||
"url": "6bebff0d37f85349f9ad28bd8b76fd66627c1e2f",
|
||||
"pattern": r"https://mir-s3-cdn-cf\.behance\.net/project_modules"
|
||||
r"/source/[0-9a-f]+.[0-9a-f]+\.jpg"
|
||||
}),
|
||||
# 'video' modules (#1282)
|
||||
("https://www.behance.net/gallery/101185577/COLCCI", {
|
||||
"pattern": r"https://cdn-prod-ccv\.adobe\.com/\w+"
|
||||
r"/rend/\w+_720\.mp4\?",
|
||||
"count": 3,
|
||||
}),
|
||||
# mature content (#4417)
|
||||
("https://www.behance.net/gallery/177464639/Kimori", {
|
||||
"exception": exception.AuthorizationError,
|
||||
}),
|
||||
)
|
||||
example = "https://www.behance.net/gallery/12345/TITLE"
|
||||
|
||||
def __init__(self, match):
|
||||
BehanceExtractor.__init__(self, match)
|
||||
@ -177,7 +140,13 @@ class BehanceGalleryExtractor(BehanceExtractor):
|
||||
append((url, module))
|
||||
|
||||
elif mtype == "VideoModule":
|
||||
renditions = module["videoData"]["renditions"]
|
||||
try:
|
||||
renditions = module["videoData"]["renditions"]
|
||||
except Exception:
|
||||
self.log.warning("No download URLs for video %s",
|
||||
module.get("id") or "???")
|
||||
continue
|
||||
|
||||
try:
|
||||
url = [
|
||||
r["url"] for r in renditions
|
||||
@ -186,6 +155,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
|
||||
except Exception as exc:
|
||||
self.log.debug("%s: %s", exc.__class__.__name__, exc)
|
||||
url = "ytdl:" + renditions[-1]["url"]
|
||||
|
||||
append((url, module))
|
||||
|
||||
elif mtype == "MediaCollectionModule":
|
||||
@ -210,10 +180,7 @@ class BehanceUserExtractor(BehanceExtractor):
|
||||
subcategory = "user"
|
||||
categorytransfer = True
|
||||
pattern = r"(?:https?://)?(?:www\.)?behance\.net/([^/?#]+)/?$"
|
||||
test = ("https://www.behance.net/alexstrohl", {
|
||||
"count": ">= 11",
|
||||
"pattern": BehanceGalleryExtractor.pattern,
|
||||
})
|
||||
example = "https://www.behance.net/USER"
|
||||
|
||||
def __init__(self, match):
|
||||
BehanceExtractor.__init__(self, match)
|
||||
@ -223,7 +190,7 @@ class BehanceUserExtractor(BehanceExtractor):
|
||||
endpoint = "GetProfileProjects"
|
||||
variables = {
|
||||
"username": self.user,
|
||||
"after" : "MAo=",
|
||||
"after" : "MAo=", # "0" in base64
|
||||
}
|
||||
|
||||
while True:
|
||||
@ -241,10 +208,7 @@ class BehanceCollectionExtractor(BehanceExtractor):
|
||||
subcategory = "collection"
|
||||
categorytransfer = True
|
||||
pattern = r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"
|
||||
test = ("https://www.behance.net/collection/71340149/inspiration", {
|
||||
"count": ">= 150",
|
||||
"pattern": BehanceGalleryExtractor.pattern,
|
||||
})
|
||||
example = "https://www.behance.net/collection/12345/TITLE"
|
||||
|
||||
def __init__(self, match):
|
||||
BehanceExtractor.__init__(self, match)
|
||||
@ -253,7 +217,7 @@ class BehanceCollectionExtractor(BehanceExtractor):
|
||||
def galleries(self):
|
||||
endpoint = "GetMoodboardItemsAndRecommendations"
|
||||
variables = {
|
||||
"afterItem": "MAo=",
|
||||
"afterItem": "MAo=", # "0" in base64
|
||||
"firstItem": 40,
|
||||
"id" : int(self.collection_id),
|
||||
"shouldGetItems" : True,
|
||||
|
@ -95,59 +95,8 @@ class BloggerExtractor(Extractor):
|
||||
class BloggerPostExtractor(BloggerExtractor):
|
||||
"""Extractor for a single blog post"""
|
||||
subcategory = "post"
|
||||
pattern = BASE_PATTERN + r"(/\d{4}/\d\d/[^/?#]+\.html)"
|
||||
test = (
|
||||
("https://julianbphotography.blogspot.com/2010/12/moon-rise.html", {
|
||||
"url": "9928429fb62f712eb4de80f53625eccecc614aae",
|
||||
"pattern": r"https://3.bp.blogspot.com/.*/s0/Icy-Moonrise-.*.jpg",
|
||||
"keyword": {
|
||||
"blog": {
|
||||
"date" : "dt:2010-11-21 18:19:42",
|
||||
"description": "",
|
||||
"id" : "5623928067739466034",
|
||||
"kind" : "blogger#blog",
|
||||
"locale" : dict,
|
||||
"name" : "Julian Bunker Photography",
|
||||
"pages" : int,
|
||||
"posts" : int,
|
||||
"published" : "2010-11-21T10:19:42-08:00",
|
||||
"updated" : str,
|
||||
"url" : "http://julianbphotography.blogspot.com/",
|
||||
},
|
||||
"post": {
|
||||
"author" : "Julian Bunker",
|
||||
"content" : str,
|
||||
"date" : "dt:2010-12-26 01:08:00",
|
||||
"etag" : str,
|
||||
"id" : "6955139236418998998",
|
||||
"kind" : "blogger#post",
|
||||
"published" : "2010-12-25T17:08:00-08:00",
|
||||
"replies" : "0",
|
||||
"title" : "Moon Rise",
|
||||
"updated" : "2011-12-06T05:21:24-08:00",
|
||||
"url" : "re:.+/2010/12/moon-rise.html$",
|
||||
},
|
||||
"num": int,
|
||||
"url": str,
|
||||
},
|
||||
}),
|
||||
("blogger:http://www.julianbunker.com/2010/12/moon-rise.html"),
|
||||
# video (#587)
|
||||
(("http://cfnmscenesinmovies.blogspot.com/2011/11/"
|
||||
"cfnm-scene-jenna-fischer-in-office.html"), {
|
||||
"pattern": r"https://.+\.googlevideo\.com/videoplayback",
|
||||
}),
|
||||
# image URLs with width/height (#1061)
|
||||
# ("https://aaaninja.blogspot.com/2020/08/altera-boob-press-2.html", {
|
||||
# "pattern": r"https://1.bp.blogspot.com/.+/s0/altera_.+png",
|
||||
# }),
|
||||
# new image domain (#2204)
|
||||
(("https://randomthingsthroughmyletterbox.blogspot.com/2022/01"
|
||||
"/bitter-flowers-by-gunnar-staalesen-blog.html"), {
|
||||
"pattern": r"https://blogger.googleusercontent.com/img/a/.+=s0$",
|
||||
"count": 8,
|
||||
}),
|
||||
)
|
||||
pattern = BASE_PATTERN + r"(/\d\d\d\d/\d\d/[^/?#]+\.html)"
|
||||
example = "https://BLOG.blogspot.com/1970/01/TITLE.html"
|
||||
|
||||
def __init__(self, match):
|
||||
BloggerExtractor.__init__(self, match)
|
||||
@ -161,17 +110,7 @@ class BloggerBlogExtractor(BloggerExtractor):
|
||||
"""Extractor for an entire Blogger blog"""
|
||||
subcategory = "blog"
|
||||
pattern = BASE_PATTERN + r"/?$"
|
||||
test = (
|
||||
("https://julianbphotography.blogspot.com/", {
|
||||
"range": "1-25",
|
||||
"count": 25,
|
||||
"pattern": r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
|
||||
}),
|
||||
("blogger:https://www.kefblog.com.ng/", {
|
||||
"range": "1-25",
|
||||
"count": 25,
|
||||
}),
|
||||
)
|
||||
example = "https://BLOG.blogspot.com/"
|
||||
|
||||
def posts(self, blog):
|
||||
return self.api.blog_posts(blog["id"])
|
||||
@ -181,12 +120,7 @@ class BloggerSearchExtractor(BloggerExtractor):
|
||||
"""Extractor for Blogger search resuls"""
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"/search/?\?q=([^&#]+)"
|
||||
test = (
|
||||
("https://julianbphotography.blogspot.com/search?q=400mm", {
|
||||
"count": "< 10",
|
||||
"keyword": {"query": "400mm"},
|
||||
}),
|
||||
)
|
||||
example = "https://BLOG.blogspot.com/search?q=QUERY"
|
||||
|
||||
def __init__(self, match):
|
||||
BloggerExtractor.__init__(self, match)
|
||||
@ -203,13 +137,7 @@ class BloggerLabelExtractor(BloggerExtractor):
|
||||
"""Extractor for Blogger posts by label"""
|
||||
subcategory = "label"
|
||||
pattern = BASE_PATTERN + r"/search/label/([^/?#]+)"
|
||||
test = (
|
||||
("https://dmmagazine.blogspot.com/search/label/D%26D", {
|
||||
"range": "1-25",
|
||||
"count": 25,
|
||||
"keyword": {"label": "D&D"},
|
||||
}),
|
||||
)
|
||||
example = "https://BLOG.blogspot.com/search/label/LABEL"
|
||||
|
||||
def __init__(self, match):
|
||||
BloggerExtractor.__init__(self, match)
|
||||
|
@ -15,6 +15,7 @@ from urllib.parse import urlsplit, urlunsplit
|
||||
MEDIA_DOMAIN_OVERRIDES = {
|
||||
"cdn9.bunkr.ru" : "c9.bunkr.ru",
|
||||
"cdn12.bunkr.ru": "media-files12.bunkr.la",
|
||||
"cdn-pizza.bunkr.ru": "pizza.bunkr.ru",
|
||||
}
|
||||
|
||||
CDN_HOSTED_EXTENSIONS = (
|
||||
@ -28,53 +29,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
category = "bunkr"
|
||||
root = "https://bunkrr.su"
|
||||
pattern = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)/a/([^/?#]+)"
|
||||
test = (
|
||||
("https://bunkrr.su/a/Lktg9Keq", {
|
||||
"pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
"keyword": {
|
||||
"album_id": "Lktg9Keq",
|
||||
"album_name": 'test テスト "&>',
|
||||
"count": 1,
|
||||
"filename": 'test-テスト-"&>-QjgneIQv',
|
||||
"id": "QjgneIQv",
|
||||
"name": 'test-テスト-"&>',
|
||||
"num": int,
|
||||
},
|
||||
}),
|
||||
# mp4 (#2239)
|
||||
("https://app.bunkr.ru/a/ptRHaCn2", {
|
||||
"pattern": r"https://media-files\.bunkr\.ru/_-RnHoW69L\.mp4",
|
||||
"content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
|
||||
}),
|
||||
# cdn4
|
||||
("https://bunkr.is/a/iXTTc1o2", {
|
||||
"pattern": r"https://(cdn|media-files)4\.bunkr\.ru/",
|
||||
"content": "da29aae371b7adc8c5ef8e6991b66b69823791e8",
|
||||
"keyword": {
|
||||
"album_id": "iXTTc1o2",
|
||||
"album_name": "test2",
|
||||
"album_size": "691.1 KB",
|
||||
"count": 2,
|
||||
"description": "072022",
|
||||
"filename": "re:video-wFO9FtxG|image-sZrQUeOx",
|
||||
"id": "re:wFO9FtxG|sZrQUeOx",
|
||||
"name": "re:video|image",
|
||||
"num": int,
|
||||
},
|
||||
}),
|
||||
# cdn12 .ru TLD (#4147)
|
||||
("https://bunkrr.su/a/j1G29CnD", {
|
||||
"pattern": r"https://(cdn12.bunkr.ru|media-files12.bunkr.la)/\w+",
|
||||
"count": 8,
|
||||
}),
|
||||
("https://bunkrr.su/a/Lktg9Keq"),
|
||||
("https://bunkr.la/a/Lktg9Keq"),
|
||||
("https://bunkr.su/a/Lktg9Keq"),
|
||||
("https://bunkr.ru/a/Lktg9Keq"),
|
||||
("https://bunkr.is/a/Lktg9Keq"),
|
||||
("https://bunkr.to/a/Lktg9Keq"),
|
||||
)
|
||||
example = "https://bunkrr.su/a/ID"
|
||||
|
||||
def fetch_album(self, album_id):
|
||||
# album metadata
|
||||
@ -87,7 +42,6 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
cdn = None
|
||||
files = []
|
||||
append = files.append
|
||||
headers = {"Referer": self.root + "/"}
|
||||
|
||||
pos = page.index('class="grid-images')
|
||||
for url in text.extract_iter(page, '<a href="', '"', pos):
|
||||
@ -108,7 +62,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
else:
|
||||
domain = domain.replace("cdn", "media-files", 1)
|
||||
url = urlunsplit((scheme, domain, path, query, fragment))
|
||||
append({"file": url, "_http_headers": headers})
|
||||
append({"file": url})
|
||||
|
||||
return files, {
|
||||
"album_id" : self.album_id,
|
||||
|
@ -21,22 +21,7 @@ class CatboxAlbumExtractor(GalleryExtractor):
|
||||
directory_fmt = ("{category}", "{album_name} ({album_id})")
|
||||
archive_fmt = "{album_id}_{filename}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?catbox\.moe(/c/[^/?#]+)"
|
||||
test = (
|
||||
("https://catbox.moe/c/1igcbe", {
|
||||
"url": "35866a88c29462814f103bc22ec031eaeb380f8a",
|
||||
"content": "70ddb9de3872e2d17cc27e48e6bf395e5c8c0b32",
|
||||
"pattern": r"https://files\.catbox\.moe/\w+\.\w{3}$",
|
||||
"count": 3,
|
||||
"keyword": {
|
||||
"album_id": "1igcbe",
|
||||
"album_name": "test",
|
||||
"date": "dt:2022-08-18 00:00:00",
|
||||
"description": "album test &>",
|
||||
},
|
||||
}),
|
||||
("https://www.catbox.moe/c/cd90s1"),
|
||||
("https://catbox.moe/c/w7tm47#"),
|
||||
)
|
||||
example = "https://catbox.moe/c/ID"
|
||||
|
||||
def metadata(self, page):
|
||||
extr = text.extract_from(page)
|
||||
@ -62,15 +47,7 @@ class CatboxFileExtractor(Extractor):
|
||||
subcategory = "file"
|
||||
archive_fmt = "{filename}"
|
||||
pattern = r"(?:https?://)?(?:files|litter|de)\.catbox\.moe/([^/?#]+)"
|
||||
test = (
|
||||
("https://files.catbox.moe/8ih3y7.png", {
|
||||
"pattern": r"^https://files\.catbox\.moe/8ih3y7\.png$",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
"count": 1,
|
||||
}),
|
||||
("https://litter.catbox.moe/t8v3n9.png"),
|
||||
("https://de.catbox.moe/bjdmz1.jpg"),
|
||||
)
|
||||
example = "https://files.catbox.moe/NAME.EXT"
|
||||
|
||||
def items(self):
|
||||
url = text.ensure_http_scheme(self.url)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2021 Mike Fährmann
|
||||
# Copyright 2021-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -25,19 +25,7 @@ class ComicvineTagExtractor(BooruExtractor):
|
||||
archive_fmt = "{id}"
|
||||
pattern = (r"(?:https?://)?comicvine\.gamespot\.com"
|
||||
r"(/([^/?#]+)/(\d+-\d+)/images/.*)")
|
||||
test = (
|
||||
("https://comicvine.gamespot.com/jock/4040-5653/images/", {
|
||||
"pattern": r"https://comicvine\.gamespot\.com/a/uploads"
|
||||
r"/original/\d+/\d+/\d+-.+\.(jpe?g|png)",
|
||||
"count": ">= 140",
|
||||
}),
|
||||
(("https://comicvine.gamespot.com/batman/4005-1699"
|
||||
"/images/?tag=Fan%20Art%20%26%20Cosplay"), {
|
||||
"pattern": r"https://comicvine\.gamespot\.com/a/uploads"
|
||||
r"/original/\d+/\d+/\d+-.+",
|
||||
"count": ">= 450",
|
||||
}),
|
||||
)
|
||||
example = "https://comicvine.gamespot.com/TAG/123-45/images/"
|
||||
|
||||
def __init__(self, match):
|
||||
BooruExtractor.__init__(self, match)
|
||||
|
@ -35,7 +35,6 @@ class Extractor():
|
||||
cookies_domain = ""
|
||||
browser = None
|
||||
root = ""
|
||||
test = None
|
||||
request_interval = 0.0
|
||||
request_interval_min = 0.0
|
||||
request_timestamp = 0.0
|
||||
@ -299,7 +298,7 @@ class Extractor():
|
||||
useragent = self.config("user-agent")
|
||||
if useragent is None:
|
||||
useragent = ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; "
|
||||
"rv:115.0) Gecko/20100101 Firefox/115.0")
|
||||
"rv:109.0) Gecko/20100101 Firefox/115.0")
|
||||
elif useragent == "browser":
|
||||
useragent = _browser_useragent()
|
||||
headers["User-Agent"] = useragent
|
||||
@ -311,6 +310,13 @@ class Extractor():
|
||||
else:
|
||||
headers["Accept-Encoding"] = "gzip, deflate"
|
||||
|
||||
custom_referer = self.config("referer", True)
|
||||
if custom_referer:
|
||||
if isinstance(custom_referer, str):
|
||||
headers["Referer"] = custom_referer
|
||||
elif self.root:
|
||||
headers["Referer"] = self.root + "/"
|
||||
|
||||
custom_headers = self.config("headers")
|
||||
if custom_headers:
|
||||
headers.update(custom_headers)
|
||||
@ -508,21 +514,6 @@ class Extractor():
|
||||
result.append((Message.Queue, url, {"_extractor": extr}))
|
||||
return iter(result)
|
||||
|
||||
@classmethod
|
||||
def _get_tests(cls):
|
||||
"""Yield an extractor's test cases as (URL, RESULTS) tuples"""
|
||||
tests = cls.test
|
||||
if not tests:
|
||||
return
|
||||
|
||||
if len(tests) == 2 and (not tests[1] or isinstance(tests[1], dict)):
|
||||
tests = (tests,)
|
||||
|
||||
for test in tests:
|
||||
if isinstance(test, str):
|
||||
test = (test, None)
|
||||
yield test
|
||||
|
||||
@classmethod
|
||||
def _dump(cls, obj):
|
||||
util.dump_json(obj, ensure_ascii=False, indent=2)
|
||||
@ -831,8 +822,8 @@ _browser_cookies = {}
|
||||
|
||||
HTTP_HEADERS = {
|
||||
"firefox": (
|
||||
("User-Agent", "Mozilla/5.0 ({}; rv:115.0) "
|
||||
"Gecko/20100101 Firefox/115.0"),
|
||||
("User-Agent", "Mozilla/5.0 ({}; "
|
||||
"rv:109.0) Gecko/20100101 Firefox/115.0"),
|
||||
("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,"
|
||||
"image/avif,image/webp,*/*;q=0.8"),
|
||||
("Accept-Language", "en-US,en;q=0.5"),
|
||||
|
@ -14,32 +14,7 @@ class CyberdropAlbumExtractor(lolisafe.LolisafeAlbumExtractor):
|
||||
category = "cyberdrop"
|
||||
root = "https://cyberdrop.me"
|
||||
pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.(?:me|to)/a/([^/?#]+)"
|
||||
test = (
|
||||
# images
|
||||
("https://cyberdrop.me/a/keKRjm4t", {
|
||||
"pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.(jpg|png|webp)$",
|
||||
"keyword": {
|
||||
"album_id": "keKRjm4t",
|
||||
"album_name": "Fate (SFW)",
|
||||
"album_size": 150069254,
|
||||
"count": 62,
|
||||
"date": "dt:2020-06-18 13:14:20",
|
||||
"description": "",
|
||||
"id": r"re:\w{8}",
|
||||
},
|
||||
}),
|
||||
# videos
|
||||
("https://cyberdrop.to/a/l8gIAXVD", {
|
||||
"pattern": r"https://fs-\d+\.cyberdrop\.to/.*\.mp4$",
|
||||
"count": 31,
|
||||
"keyword": {
|
||||
"album_id": "l8gIAXVD",
|
||||
"album_name": "Achelois17 videos",
|
||||
"album_size": 652037121,
|
||||
"date": "dt:2020-06-16 15:40:44",
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://cyberdrop.me/a/ID"
|
||||
|
||||
def fetch_album(self, album_id):
|
||||
url = self.root + "/a/" + self.album_id
|
||||
|
@ -173,38 +173,7 @@ class DanbooruTagExtractor(DanbooruExtractor):
|
||||
directory_fmt = ("{category}", "{search_tags}")
|
||||
archive_fmt = "t_{search_tags}_{id}"
|
||||
pattern = BASE_PATTERN + r"/posts\?(?:[^&#]*&)*tags=([^&#]*)"
|
||||
test = (
|
||||
("https://danbooru.donmai.us/posts?tags=bonocho", {
|
||||
"content": "b196fb9f1668109d7774a0a82efea3ffdda07746",
|
||||
}),
|
||||
# test page transitions
|
||||
("https://danbooru.donmai.us/posts?tags=mushishi", {
|
||||
"count": ">= 300",
|
||||
}),
|
||||
# 'external' option (#1747)
|
||||
("https://danbooru.donmai.us/posts?tags=pixiv_id%3A1476533", {
|
||||
"options": (("external", True),),
|
||||
"pattern": r"https://i\.pximg\.net/img-original/img"
|
||||
r"/2008/08/28/02/35/48/1476533_p0\.jpg",
|
||||
}),
|
||||
("https://booru.allthefallen.moe/posts?tags=yume_shokunin", {
|
||||
"count": 12,
|
||||
}),
|
||||
("https://aibooru.online/posts?tags=center_frills&z=1", {
|
||||
"pattern": r"https://cdn\.aibooru\.online/original"
|
||||
r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w+",
|
||||
"count": ">= 3",
|
||||
}),
|
||||
("https://booru.borvar.art/posts?tags=chibi&z=1", {
|
||||
"pattern": r"https://booru\.borvar\.art/data/original"
|
||||
r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.\w+",
|
||||
"count": ">= 3",
|
||||
}),
|
||||
("https://hijiribe.donmai.us/posts?tags=bonocho"),
|
||||
("https://sonohara.donmai.us/posts?tags=bonocho"),
|
||||
("https://safebooru.donmai.us/posts?tags=bonocho"),
|
||||
("https://safe.aibooru.online/posts?tags=center_frills"),
|
||||
)
|
||||
example = "https://danbooru.donmai.us/posts?tags=TAG"
|
||||
|
||||
def __init__(self, match):
|
||||
DanbooruExtractor.__init__(self, match)
|
||||
@ -238,21 +207,7 @@ class DanbooruPoolExtractor(DanbooruExtractor):
|
||||
directory_fmt = ("{category}", "pool", "{pool[id]} {pool[name]}")
|
||||
archive_fmt = "p_{pool[id]}_{id}"
|
||||
pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
|
||||
test = (
|
||||
("https://danbooru.donmai.us/pools/7659", {
|
||||
"content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99",
|
||||
}),
|
||||
("https://booru.allthefallen.moe/pools/9", {
|
||||
"url": "902549ffcdb00fe033c3f63e12bc3cb95c5fd8d5",
|
||||
"count": 6,
|
||||
}),
|
||||
("https://booru.borvar.art/pools/2", {
|
||||
"url": "77fa3559a3fc919f72611f4e3dd0f919d19d3e0d",
|
||||
"count": 4,
|
||||
}),
|
||||
("https://aibooru.online/pools/1"),
|
||||
("https://danbooru.donmai.us/pool/show/7659"),
|
||||
)
|
||||
example = "https://danbooru.donmai.us/pools/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
DanbooruExtractor.__init__(self, match)
|
||||
@ -275,26 +230,7 @@ class DanbooruPostExtractor(DanbooruExtractor):
|
||||
subcategory = "post"
|
||||
archive_fmt = "{id}"
|
||||
pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
|
||||
test = (
|
||||
("https://danbooru.donmai.us/posts/294929", {
|
||||
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
|
||||
"keyword": {"date": "dt:2008-08-12 04:46:05"},
|
||||
}),
|
||||
("https://danbooru.donmai.us/posts/3613024", {
|
||||
"pattern": r"https?://.+\.zip$",
|
||||
"options": (("ugoira", True),)
|
||||
}),
|
||||
("https://booru.allthefallen.moe/posts/22", {
|
||||
"content": "21dda68e1d7e0a554078e62923f537d8e895cac8",
|
||||
}),
|
||||
("https://aibooru.online/posts/1", {
|
||||
"content": "54d548743cd67799a62c77cbae97cfa0fec1b7e9",
|
||||
}),
|
||||
("https://booru.borvar.art/posts/1487", {
|
||||
"content": "91273ac1ea413a12be468841e2b5804656a50bff",
|
||||
}),
|
||||
("https://danbooru.donmai.us/post/show/294929"),
|
||||
)
|
||||
example = "https://danbooru.donmai.us/posts/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
DanbooruExtractor.__init__(self, match)
|
||||
@ -315,17 +251,7 @@ class DanbooruPopularExtractor(DanbooruExtractor):
|
||||
directory_fmt = ("{category}", "popular", "{scale}", "{date}")
|
||||
archive_fmt = "P_{scale[0]}_{date}_{id}"
|
||||
pattern = BASE_PATTERN + r"/(?:explore/posts/)?popular(?:\?([^#]*))?"
|
||||
test = (
|
||||
("https://danbooru.donmai.us/explore/posts/popular"),
|
||||
(("https://danbooru.donmai.us/explore/posts/popular"
|
||||
"?date=2013-06-06&scale=week"), {
|
||||
"range": "1-120",
|
||||
"count": 120,
|
||||
}),
|
||||
("https://booru.allthefallen.moe/explore/posts/popular"),
|
||||
("https://aibooru.online/explore/posts/popular"),
|
||||
("https://booru.borvar.art/explore/posts/popular"),
|
||||
)
|
||||
example = "https://danbooru.donmai.us/explore/posts/popular"
|
||||
|
||||
def __init__(self, match):
|
||||
DanbooruExtractor.__init__(self, match)
|
||||
|
@ -23,7 +23,7 @@ class DesktopographySiteExtractor(DesktopographyExtractor):
|
||||
"""Extractor for all desktopography exhibitions """
|
||||
subcategory = "site"
|
||||
pattern = BASE_PATTERN + r"/$"
|
||||
test = ("https://desktopography.net/",)
|
||||
example = "https://desktopography.net/"
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.root).text
|
||||
@ -42,7 +42,7 @@ class DesktopographyExhibitionExtractor(DesktopographyExtractor):
|
||||
"""Extractor for a yearly desktopography exhibition"""
|
||||
subcategory = "exhibition"
|
||||
pattern = BASE_PATTERN + r"/exhibition-([^/?#]+)/"
|
||||
test = ("https://desktopography.net/exhibition-2020/",)
|
||||
example = "https://desktopography.net/exhibition-2020/"
|
||||
|
||||
def __init__(self, match):
|
||||
DesktopographyExtractor.__init__(self, match)
|
||||
@ -71,7 +71,7 @@ class DesktopographyEntryExtractor(DesktopographyExtractor):
|
||||
"""Extractor for all resolutions of a desktopography wallpaper"""
|
||||
subcategory = "entry"
|
||||
pattern = BASE_PATTERN + r"/portfolios/([\w-]+)"
|
||||
test = ("https://desktopography.net/portfolios/new-era/",)
|
||||
example = "https://desktopography.net/portfolios/NAME/"
|
||||
|
||||
def __init__(self, match):
|
||||
DesktopographyExtractor.__init__(self, match)
|
||||
|
@ -440,18 +440,7 @@ class DeviantartUserExtractor(DeviantartExtractor):
|
||||
"""Extractor for an artist's user profile"""
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/?$"
|
||||
test = (
|
||||
("https://www.deviantart.com/shimoda7", {
|
||||
"pattern": r"/shimoda7/gallery$",
|
||||
}),
|
||||
("https://www.deviantart.com/shimoda7", {
|
||||
"options": (("include", "all"),),
|
||||
"pattern": r"/shimoda7/"
|
||||
r"(gallery(/scraps)?|posts(/statuses)?|favourites)$",
|
||||
"count": 5,
|
||||
}),
|
||||
("https://shimoda7.deviantart.com/"),
|
||||
)
|
||||
example = "https://www.deviantart.com/USER"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
@ -475,84 +464,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
|
||||
subcategory = "gallery"
|
||||
archive_fmt = "g_{_username}_{index}.{extension}"
|
||||
pattern = BASE_PATTERN + r"/gallery(?:/all|/?\?catpath=)?/?$"
|
||||
test = (
|
||||
("https://www.deviantart.com/shimoda7/gallery/", {
|
||||
"pattern": r"https://(images-)?wixmp-[^.]+\.wixmp\.com"
|
||||
r"/f/.+/.+\.(jpg|png)\?token=.+",
|
||||
"count": ">= 30",
|
||||
"keyword": {
|
||||
"allows_comments": bool,
|
||||
"author": {
|
||||
"type": "regular",
|
||||
"usericon": str,
|
||||
"userid": "9AE51FC7-0278-806C-3FFF-F4961ABF9E2B",
|
||||
"username": "shimoda7",
|
||||
},
|
||||
"category_path": str,
|
||||
"content": {
|
||||
"filesize": int,
|
||||
"height": int,
|
||||
"src": str,
|
||||
"transparency": bool,
|
||||
"width": int,
|
||||
},
|
||||
"da_category": str,
|
||||
"date": "type:datetime",
|
||||
"deviationid": str,
|
||||
"?download_filesize": int,
|
||||
"extension": str,
|
||||
"index": int,
|
||||
"is_deleted": bool,
|
||||
"is_downloadable": bool,
|
||||
"is_favourited": bool,
|
||||
"is_mature": bool,
|
||||
"preview": {
|
||||
"height": int,
|
||||
"src": str,
|
||||
"transparency": bool,
|
||||
"width": int,
|
||||
},
|
||||
"published_time": int,
|
||||
"stats": {
|
||||
"comments": int,
|
||||
"favourites": int,
|
||||
},
|
||||
"target": dict,
|
||||
"thumbs": list,
|
||||
"title": str,
|
||||
"url": r"re:https://www.deviantart.com/shimoda7/art/[^/]+-\d+",
|
||||
"username": "shimoda7",
|
||||
},
|
||||
}),
|
||||
# group
|
||||
("https://www.deviantart.com/yakuzafc/gallery", {
|
||||
"pattern": r"https://www.deviantart.com/yakuzafc/gallery"
|
||||
r"/\w{8}-\w{4}-\w{4}-\w{4}-\w{12}/",
|
||||
"count": ">= 15",
|
||||
}),
|
||||
# 'folders' option (#276)
|
||||
("https://www.deviantart.com/justatest235723/gallery", {
|
||||
"count": 3,
|
||||
"options": (("metadata", 1), ("folders", 1), ("original", 0)),
|
||||
"keyword": {
|
||||
"description": str,
|
||||
"folders": list,
|
||||
"is_watching": bool,
|
||||
"license": str,
|
||||
"tags": list,
|
||||
},
|
||||
}),
|
||||
("https://www.deviantart.com/shimoda8/gallery/", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
|
||||
("https://www.deviantart.com/shimoda7/gallery"),
|
||||
("https://www.deviantart.com/shimoda7/gallery/all"),
|
||||
("https://www.deviantart.com/shimoda7/gallery/?catpath=/"),
|
||||
("https://shimoda7.deviantart.com/gallery/"),
|
||||
("https://shimoda7.deviantart.com/gallery/all/"),
|
||||
("https://shimoda7.deviantart.com/gallery/?catpath=/"),
|
||||
)
|
||||
example = "https://www.deviantart.com/USER/gallery/"
|
||||
|
||||
def deviations(self):
|
||||
if self.flat and not self.group:
|
||||
@ -567,32 +479,7 @@ class DeviantartFolderExtractor(DeviantartExtractor):
|
||||
directory_fmt = ("{category}", "{username}", "{folder[title]}")
|
||||
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
|
||||
pattern = BASE_PATTERN + r"/gallery/([^/?#]+)/([^/?#]+)"
|
||||
test = (
|
||||
# user
|
||||
("https://www.deviantart.com/shimoda7/gallery/722019/Miscellaneous", {
|
||||
"count": 5,
|
||||
"options": (("original", False),),
|
||||
}),
|
||||
# group
|
||||
("https://www.deviantart.com/yakuzafc/gallery/37412168/Crafts", {
|
||||
"count": ">= 4",
|
||||
"options": (("original", False),),
|
||||
}),
|
||||
# uuid
|
||||
(("https://www.deviantart.com/shimoda7/gallery"
|
||||
"/B38E3C6A-2029-6B45-757B-3C8D3422AD1A/misc"), {
|
||||
"count": 5,
|
||||
"options": (("original", False),),
|
||||
}),
|
||||
# name starts with '_', special characters (#1451)
|
||||
(("https://www.deviantart.com/justatest235723"
|
||||
"/gallery/69302698/-test-b-c-d-e-f-"), {
|
||||
"count": 1,
|
||||
"options": (("original", False),),
|
||||
}),
|
||||
("https://shimoda7.deviantart.com/gallery/722019/Miscellaneous"),
|
||||
("https://yakuzafc.deviantart.com/gallery/37412168/Crafts"),
|
||||
)
|
||||
example = "https://www.deviantart.com/USER/gallery/12345/TITLE"
|
||||
|
||||
def __init__(self, match):
|
||||
DeviantartExtractor.__init__(self, match)
|
||||
@ -621,33 +508,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
|
||||
subcategory = "stash"
|
||||
archive_fmt = "{index}.{extension}"
|
||||
pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
|
||||
test = (
|
||||
("https://sta.sh/022c83odnaxc", {
|
||||
"pattern": r"https://wixmp-[^.]+\.wixmp\.com"
|
||||
r"/f/.+/.+\.png\?token=.+",
|
||||
"content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f",
|
||||
"count": 1,
|
||||
}),
|
||||
# multiple stash items
|
||||
("https://sta.sh/21jf51j7pzl2", {
|
||||
"options": (("original", False),),
|
||||
"count": 4,
|
||||
}),
|
||||
# downloadable, but no "content" field (#307)
|
||||
("https://sta.sh/024t4coz16mi", {
|
||||
"pattern": r"https://wixmp-[^.]+\.wixmp\.com"
|
||||
r"/f/.+/.+\.rar\?token=.+",
|
||||
"count": 1,
|
||||
}),
|
||||
# mixed folders and images (#659)
|
||||
("https://sta.sh/215twi387vfj", {
|
||||
"options": (("original", False),),
|
||||
"count": 4,
|
||||
}),
|
||||
("https://sta.sh/abcdefghijkl", {
|
||||
"count": 0,
|
||||
}),
|
||||
)
|
||||
example = "https://sta.sh/abcde"
|
||||
|
||||
skip = Extractor.skip
|
||||
|
||||
@ -692,20 +553,7 @@ class DeviantartFavoriteExtractor(DeviantartExtractor):
|
||||
directory_fmt = ("{category}", "{username}", "Favourites")
|
||||
archive_fmt = "f_{_username}_{index}.{extension}"
|
||||
pattern = BASE_PATTERN + r"/favourites(?:/all|/?\?catpath=)?/?$"
|
||||
test = (
|
||||
("https://www.deviantart.com/h3813067/favourites/", {
|
||||
"options": (("metadata", True), ("flat", False)), # issue #271
|
||||
"count": 1,
|
||||
}),
|
||||
("https://www.deviantart.com/h3813067/favourites/", {
|
||||
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
|
||||
}),
|
||||
("https://www.deviantart.com/h3813067/favourites/all"),
|
||||
("https://www.deviantart.com/h3813067/favourites/?catpath=/"),
|
||||
("https://h3813067.deviantart.com/favourites/"),
|
||||
("https://h3813067.deviantart.com/favourites/all"),
|
||||
("https://h3813067.deviantart.com/favourites/?catpath=/"),
|
||||
)
|
||||
example = "https://www.deviantart.com/USER/favourites/"
|
||||
|
||||
def deviations(self):
|
||||
if self.flat:
|
||||
@ -722,20 +570,7 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
|
||||
"{collection[title]}")
|
||||
archive_fmt = "C_{collection[uuid]}_{index}.{extension}"
|
||||
pattern = BASE_PATTERN + r"/favourites/([^/?#]+)/([^/?#]+)"
|
||||
test = (
|
||||
(("https://www.deviantart.com/pencilshadings/favourites"
|
||||
"/70595441/3D-Favorites"), {
|
||||
"count": ">= 15",
|
||||
"options": (("original", False),),
|
||||
}),
|
||||
(("https://www.deviantart.com/pencilshadings/favourites"
|
||||
"/F050486B-CB62-3C66-87FB-1105A7F6379F/3D Favorites"), {
|
||||
"count": ">= 15",
|
||||
"options": (("original", False),),
|
||||
}),
|
||||
("https://pencilshadings.deviantart.com"
|
||||
"/favourites/70595441/3D-Favorites"),
|
||||
)
|
||||
example = "https://www.deviantart.com/USER/favourites/12345/TITLE"
|
||||
|
||||
def __init__(self, match):
|
||||
DeviantartExtractor.__init__(self, match)
|
||||
@ -766,24 +601,7 @@ class DeviantartJournalExtractor(DeviantartExtractor):
|
||||
directory_fmt = ("{category}", "{username}", "Journal")
|
||||
archive_fmt = "j_{_username}_{index}.{extension}"
|
||||
pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
|
||||
test = (
|
||||
("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
|
||||
"url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44",
|
||||
}),
|
||||
("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
|
||||
"url": "b2a8e74d275664b1a4acee0fca0a6fd33298571e",
|
||||
"options": (("journals", "text"),),
|
||||
}),
|
||||
("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
|
||||
"count": 0,
|
||||
"options": (("journals", "none"),),
|
||||
}),
|
||||
("https://www.deviantart.com/shimoda7/posts/"),
|
||||
("https://www.deviantart.com/shimoda7/journal/"),
|
||||
("https://www.deviantart.com/shimoda7/journal/?catpath=/"),
|
||||
("https://shimoda7.deviantart.com/journal/"),
|
||||
("https://shimoda7.deviantart.com/journal/?catpath=/"),
|
||||
)
|
||||
example = "https://www.deviantart.com/USER/posts/journals/"
|
||||
|
||||
def deviations(self):
|
||||
return self.api.browse_user_journals(self.user, self.offset)
|
||||
@ -796,45 +614,7 @@ class DeviantartStatusExtractor(DeviantartExtractor):
|
||||
filename_fmt = "{category}_{index}_{title}_{date}.{extension}"
|
||||
archive_fmt = "S_{_username}_{index}.{extension}"
|
||||
pattern = BASE_PATTERN + r"/posts/statuses"
|
||||
test = (
|
||||
("https://www.deviantart.com/t1na/posts/statuses", {
|
||||
"count": 0,
|
||||
}),
|
||||
("https://www.deviantart.com/justgalym/posts/statuses", {
|
||||
"count": 4,
|
||||
"url": "bf4c44c0c60ff2648a880f4c3723464ad3e7d074",
|
||||
}),
|
||||
# shared deviation
|
||||
("https://www.deviantart.com/justgalym/posts/statuses", {
|
||||
"options": (("journals", "none"),),
|
||||
"count": 1,
|
||||
"pattern": r"https://images-wixmp-\w+\.wixmp\.com/f"
|
||||
r"/[^/]+/[^.]+\.jpg\?token=",
|
||||
}),
|
||||
# shared sta.sh item
|
||||
("https://www.deviantart.com/vanillaghosties/posts/statuses", {
|
||||
"options": (("journals", "none"), ("original", False)),
|
||||
"range": "5-",
|
||||
"count": 1,
|
||||
"keyword": {
|
||||
"index" : int,
|
||||
"index_base36": "re:^[0-9a-z]+$",
|
||||
"url" : "re:^https://sta.sh",
|
||||
},
|
||||
}),
|
||||
# "deleted" deviations in 'items'
|
||||
("https://www.deviantart.com/AndrejSKalin/posts/statuses", {
|
||||
"options": (("journals", "none"), ("original", 0),
|
||||
("image-filter", "deviationid[:8] == '147C8B03'")),
|
||||
"count": 2,
|
||||
"archive": False,
|
||||
"keyword": {"deviationid": "147C8B03-7D34-AE93-9241-FA3C6DBBC655"}
|
||||
}),
|
||||
("https://www.deviantart.com/justgalym/posts/statuses", {
|
||||
"options": (("journals", "text"),),
|
||||
"url": "c8744f7f733a3029116607b826321233c5ca452d",
|
||||
}),
|
||||
)
|
||||
example = "https://www.deviantart.com/USER/posts/statuses/"
|
||||
|
||||
def deviations(self):
|
||||
for status in self.api.user_statuses(self.user, self.offset):
|
||||
@ -898,19 +678,7 @@ class DeviantartPopularExtractor(DeviantartExtractor):
|
||||
r"(?:deviations/?)?\?order=(popular-[^/?#]+)"
|
||||
r"|((?:[\w-]+/)*)(popular-[^/?#]+)"
|
||||
r")/?(?:\?([^#]*))?")
|
||||
test = (
|
||||
("https://www.deviantart.com/?order=popular-all-time", {
|
||||
"options": (("original", False),),
|
||||
"range": "1-30",
|
||||
"count": 30,
|
||||
}),
|
||||
("https://www.deviantart.com/popular-24-hours/?q=tree+house", {
|
||||
"options": (("original", False),),
|
||||
"range": "1-30",
|
||||
"count": 30,
|
||||
}),
|
||||
("https://www.deviantart.com/artisan/popular-all-time/?q=tree"),
|
||||
)
|
||||
example = "https://www.deviantart.com/popular-24-hours/"
|
||||
|
||||
def __init__(self, match):
|
||||
DeviantartExtractor.__init__(self, match)
|
||||
@ -955,11 +723,7 @@ class DeviantartTagExtractor(DeviantartExtractor):
|
||||
directory_fmt = ("{category}", "Tags", "{search_tags}")
|
||||
archive_fmt = "T_{search_tags}_{index}.{extension}"
|
||||
pattern = r"(?:https?://)?www\.deviantart\.com/tag/([^/?#]+)"
|
||||
test = ("https://www.deviantart.com/tag/nature", {
|
||||
"options": (("original", False),),
|
||||
"range": "1-30",
|
||||
"count": 30,
|
||||
})
|
||||
example = "https://www.deviantart.com/tag/TAG"
|
||||
|
||||
def __init__(self, match):
|
||||
DeviantartExtractor.__init__(self, match)
|
||||
@ -978,10 +742,7 @@ class DeviantartWatchExtractor(DeviantartExtractor):
|
||||
subcategory = "watch"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?deviantart\.com"
|
||||
r"/(?:watch/deviations|notifications/watch)()()")
|
||||
test = (
|
||||
("https://www.deviantart.com/watch/deviations"),
|
||||
("https://www.deviantart.com/notifications/watch"),
|
||||
)
|
||||
example = "https://www.deviantart.com/watch/deviations"
|
||||
|
||||
def deviations(self):
|
||||
return self.api.browse_deviantsyouwatch()
|
||||
@ -991,7 +752,7 @@ class DeviantartWatchPostsExtractor(DeviantartExtractor):
|
||||
"""Extractor for Posts from watched users"""
|
||||
subcategory = "watch-posts"
|
||||
pattern = r"(?:https?://)?(?:www\.)?deviantart\.com/watch/posts()()"
|
||||
test = ("https://www.deviantart.com/watch/posts",)
|
||||
example = "https://www.deviantart.com/watch/posts"
|
||||
|
||||
def deviations(self):
|
||||
return self.api.browse_posts_deviantsyouwatch()
|
||||
@ -1009,100 +770,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
|
||||
r"(?:view/|deviation/|view(?:-full)?\.php/*\?(?:[^#]+&)?id=)"
|
||||
r"(\d+)" # bare deviation ID without slug
|
||||
r"|(?:https?://)?fav\.me/d([0-9a-z]+)") # base36
|
||||
test = (
|
||||
(("https://www.deviantart.com/shimoda7/art/For-the-sake-10073852"), {
|
||||
"options": (("original", 0),),
|
||||
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
|
||||
}),
|
||||
("https://www.deviantart.com/zzz/art/zzz-1234567890", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
(("https://www.deviantart.com/myria-moon/art/Aime-Moi-261986576"), {
|
||||
"options": (("comments", True),),
|
||||
"keyword": {"comments": list},
|
||||
"pattern": r"https://wixmp-[^.]+\.wixmp\.com"
|
||||
r"/f/.+/.+\.jpg\?token=.+",
|
||||
}),
|
||||
# wixmp URL rewrite
|
||||
(("https://www.deviantart.com/citizenfresh/art/Hverarond-789295466"), {
|
||||
"pattern": (r"https://images-wixmp-\w+\.wixmp\.com/f"
|
||||
r"/[^/]+/[^.]+\.jpg\?token="),
|
||||
}),
|
||||
# GIF (#242)
|
||||
(("https://www.deviantart.com/skatergators/art/COM-Moni-781571783"), {
|
||||
"pattern": r"https://wixmp-\w+\.wixmp\.com/f/03fd2413-efe9-4e5c-"
|
||||
r"8734-2b72605b3fbb/dcxbsnb-1bbf0b38-42af-4070-8878-"
|
||||
r"f30961955bec\.gif\?token=ey...",
|
||||
}),
|
||||
# Flash animation with GIF preview (#1731)
|
||||
("https://www.deviantart.com/yuumei/art/Flash-Comic-214724929", {
|
||||
"pattern": r"https://wixmp-[^.]+\.wixmp\.com"
|
||||
r"/f/.+/.+\.swf\?token=.+",
|
||||
"keyword": {
|
||||
"filename": "flash_comic_tutorial_by_yuumei-d3juatd",
|
||||
"extension": "swf",
|
||||
},
|
||||
}),
|
||||
# sta.sh URLs from description (#302)
|
||||
(("https://www.deviantart.com/uotapo/art/INANAKI-Memo-590297498"), {
|
||||
"options": (("extra", 1), ("original", 0)),
|
||||
"pattern": DeviantartStashExtractor.pattern,
|
||||
"range": "2-",
|
||||
"count": 4,
|
||||
}),
|
||||
# sta.sh URL from deviation["text_content"]["body"]["features"]
|
||||
(("https://www.deviantart.com"
|
||||
"/cimar-wildehopps/art/Honorary-Vixen-859809305"), {
|
||||
"options": (("extra", 1),),
|
||||
"pattern": ("text:<!DOCTYPE html>\n|" +
|
||||
DeviantartStashExtractor.pattern),
|
||||
"count": 2,
|
||||
}),
|
||||
# journal
|
||||
("https://www.deviantart.com/shimoda7/journal/ARTility-583755752", {
|
||||
"url": "d34b2c9f873423e665a1b8ced20fcb75951694a3",
|
||||
"pattern": "text:<!DOCTYPE html>\n",
|
||||
}),
|
||||
# journal-like post with isJournal == False (#419)
|
||||
("https://www.deviantart.com/gliitchlord/art/brashstrokes-812942668", {
|
||||
"url": "e2e0044bd255304412179b6118536dbd9bb3bb0e",
|
||||
"pattern": "text:<!DOCTYPE html>\n",
|
||||
}),
|
||||
# /view/ URLs
|
||||
("https://deviantart.com/view/904858796/", {
|
||||
"content": "8770ec40ad1c1d60f6b602b16301d124f612948f",
|
||||
}),
|
||||
("http://www.deviantart.com/view/890672057", {
|
||||
"content": "1497e13d925caeb13a250cd666b779a640209236",
|
||||
}),
|
||||
("https://www.deviantart.com/view/706871727", {
|
||||
"content": "3f62ae0c2fca2294ac28e41888ea06bb37c22c65",
|
||||
}),
|
||||
("https://www.deviantart.com/view/1", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
# /deviation/ (#3558)
|
||||
("https://www.deviantart.com/deviation/817215762"),
|
||||
# fav.me (#3558)
|
||||
("https://fav.me/ddijrpu", {
|
||||
"count": 1,
|
||||
}),
|
||||
("https://fav.me/dddd", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
# old-style URLs
|
||||
("https://shimoda7.deviantart.com"
|
||||
"/art/For-the-sake-of-a-memory-10073852"),
|
||||
("https://myria-moon.deviantart.com"
|
||||
"/art/Aime-Moi-part-en-vadrouille-261986576"),
|
||||
("https://zzz.deviantart.com/art/zzz-1234567890"),
|
||||
# old /view/ URLs from the Wayback Machine
|
||||
("https://www.deviantart.com/view.php?id=14864502"),
|
||||
("http://www.deviantart.com/view-full.php?id=100842"),
|
||||
|
||||
("https://www.fxdeviantart.com/zzz/art/zzz-1234567890"),
|
||||
("https://www.fxdeviantart.com/view/1234567890"),
|
||||
)
|
||||
example = "https://www.deviantart.com/UsER/art/TITLE-12345"
|
||||
|
||||
skip = Extractor.skip
|
||||
|
||||
@ -1133,13 +801,7 @@ class DeviantartScrapsExtractor(DeviantartExtractor):
|
||||
archive_fmt = "s_{_username}_{index}.{extension}"
|
||||
cookies_domain = ".deviantart.com"
|
||||
pattern = BASE_PATTERN + r"/gallery/(?:\?catpath=)?scraps\b"
|
||||
test = (
|
||||
("https://www.deviantart.com/shimoda7/gallery/scraps", {
|
||||
"count": 12,
|
||||
}),
|
||||
("https://www.deviantart.com/shimoda7/gallery/?catpath=scraps"),
|
||||
("https://shimoda7.deviantart.com/gallery/?catpath=scraps"),
|
||||
)
|
||||
example = "https://www.deviantart.com/USER/gallery/scraps"
|
||||
|
||||
def deviations(self):
|
||||
self.login()
|
||||
@ -1157,11 +819,7 @@ class DeviantartSearchExtractor(DeviantartExtractor):
|
||||
cookies_domain = ".deviantart.com"
|
||||
pattern = (r"(?:https?://)?www\.deviantart\.com"
|
||||
r"/search(?:/deviations)?/?\?([^#]+)")
|
||||
test = (
|
||||
("https://www.deviantart.com/search?q=tree"),
|
||||
("https://www.deviantart.com/search/deviations?order=popular-1-week"),
|
||||
)
|
||||
|
||||
example = "https://www.deviantart.com/search?q=QUERY"
|
||||
skip = Extractor.skip
|
||||
|
||||
def __init__(self, match):
|
||||
@ -1212,13 +870,7 @@ class DeviantartGallerySearchExtractor(DeviantartExtractor):
|
||||
archive_fmt = "g_{_username}_{index}.{extension}"
|
||||
cookies_domain = ".deviantart.com"
|
||||
pattern = BASE_PATTERN + r"/gallery/?\?(q=[^#]+)"
|
||||
test = (
|
||||
("https://www.deviantart.com/shimoda7/gallery?q=memory", {
|
||||
"options": (("original", 0),),
|
||||
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
|
||||
}),
|
||||
("https://www.deviantart.com/shimoda7/gallery?q=memory&sort=popular"),
|
||||
)
|
||||
example = "https://www.deviantart.com/USER/gallery?q=QUERY"
|
||||
|
||||
def __init__(self, match):
|
||||
DeviantartExtractor.__init__(self, match)
|
||||
@ -1250,11 +902,7 @@ class DeviantartFollowingExtractor(DeviantartExtractor):
|
||||
"""Extractor for user's watched users"""
|
||||
subcategory = "following"
|
||||
pattern = BASE_PATTERN + "/about#watching$"
|
||||
test = ("https://www.deviantart.com/shimoda7/about#watching", {
|
||||
"pattern": DeviantartUserExtractor.pattern,
|
||||
"range": "1-50",
|
||||
"count": 50,
|
||||
})
|
||||
example = "https://www.deviantart.com/USER/about#watching"
|
||||
|
||||
def items(self):
|
||||
eclipse_api = DeviantartEclipseAPI(self)
|
||||
@ -1774,11 +1422,9 @@ class DeviantartEclipseAPI():
|
||||
|
||||
def _call(self, endpoint, params):
|
||||
url = "https://www.deviantart.com/_napi" + endpoint
|
||||
headers = {"Referer": "https://www.deviantart.com/"}
|
||||
params["csrf_token"] = self.csrf_token or self._fetch_csrf_token()
|
||||
|
||||
response = self.request(
|
||||
url, params=params, headers=headers, fatal=None)
|
||||
response = self.request(url, params=params, fatal=None)
|
||||
|
||||
if response.status_code == 404:
|
||||
raise exception.StopExtraction(
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2017-2022 Mike Fährmann
|
||||
# Copyright 2017-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -20,36 +20,7 @@ class DirectlinkExtractor(Extractor):
|
||||
pattern = (r"(?i)https?://(?P<domain>[^/?#]+)/(?P<path>[^?#]+\."
|
||||
r"(?:jpe?g|jpe|png|gif|web[mp]|mp4|mkv|og[gmv]|opus))"
|
||||
r"(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?$")
|
||||
test = (
|
||||
(("https://en.wikipedia.org/static/images/project-logos/enwiki.png"), {
|
||||
"url": "18c5d00077332e98e53be9fed2ee4be66154b88d",
|
||||
"keyword": "105770a3f4393618ab7b811b731b22663b5d3794",
|
||||
}),
|
||||
# empty path
|
||||
(("https://example.org/file.webm"), {
|
||||
"url": "2d807ed7059d1b532f1bb71dc24b510b80ff943f",
|
||||
"keyword": "29dad729c40fb09349f83edafa498dba1297464a",
|
||||
}),
|
||||
# more complex example
|
||||
("https://example.org/path/to/file.webm?que=1?&ry=2/#fragment", {
|
||||
"url": "6fb1061390f8aada3db01cb24b51797c7ee42b31",
|
||||
"keyword": "3d7abc31d45ba324e59bc599c3b4862452d5f29c",
|
||||
}),
|
||||
# percent-encoded characters
|
||||
("https://example.org/%27%3C%23/%23%3E%27.jpg?key=%3C%26%3E", {
|
||||
"url": "2627e8140727fdf743f86fe18f69f99a052c9718",
|
||||
"keyword": "831790fddda081bdddd14f96985ab02dc5b5341f",
|
||||
}),
|
||||
# upper case file extension (#296)
|
||||
("https://post-phinf.pstatic.net/MjAxOTA1MjlfMTQ4/MDAxNTU5MTI2NjcyNTkw"
|
||||
".JUzkGb4V6dj9DXjLclrOoqR64uDxHFUO5KDriRdKpGwg.88mCtd4iT1NHlpVKSCaUpP"
|
||||
"mZPiDgT8hmQdQ5K_gYyu0g.JPEG/2.JPG"),
|
||||
# internationalized domain name
|
||||
("https://räksmörgås.josefsson.org/raksmorgas.jpg", {
|
||||
"url": "a65667f670b194afbd1e3ea5e7a78938d36747da",
|
||||
"keyword": "fd5037fe86eebd4764e176cbaf318caec0f700be",
|
||||
}),
|
||||
)
|
||||
example = "https://en.wikipedia.org/static/images/project-logos/enwiki.png"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -43,18 +43,7 @@ class DynastyscansBase():
|
||||
class DynastyscansChapterExtractor(DynastyscansBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from dynasty-scans.com"""
|
||||
pattern = BASE_PATTERN + r"(/chapters/[^/?#]+)"
|
||||
test = (
|
||||
(("http://dynasty-scans.com/chapters/"
|
||||
"hitoribocchi_no_oo_seikatsu_ch33"), {
|
||||
"url": "dce64e8c504118f1ab4135c00245ea12413896cb",
|
||||
"keyword": "b67599703c27316a2fe4f11c3232130a1904e032",
|
||||
}),
|
||||
(("http://dynasty-scans.com/chapters/"
|
||||
"new_game_the_spinoff_special_13"), {
|
||||
"url": "dbe5bbb74da2edcfb1832895a484e2a40bc8b538",
|
||||
"keyword": "6b674eb3a274999153f6be044973b195008ced2f",
|
||||
}),
|
||||
)
|
||||
example = "https://dynasty-scans.com/chapters/NAME"
|
||||
|
||||
def metadata(self, page):
|
||||
extr = text.extract_from(page)
|
||||
@ -93,10 +82,7 @@ class DynastyscansMangaExtractor(DynastyscansBase, MangaExtractor):
|
||||
chapterclass = DynastyscansChapterExtractor
|
||||
reverse = False
|
||||
pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
|
||||
test = ("https://dynasty-scans.com/series/hitoribocchi_no_oo_seikatsu", {
|
||||
"pattern": DynastyscansChapterExtractor.pattern,
|
||||
"count": ">= 100",
|
||||
})
|
||||
example = "https://dynasty-scans.com/series/NAME"
|
||||
|
||||
def chapters(self, page):
|
||||
return [
|
||||
@ -112,16 +98,7 @@ class DynastyscansSearchExtractor(DynastyscansBase, Extractor):
|
||||
filename_fmt = "{image_id}.{extension}"
|
||||
archive_fmt = "i_{image_id}"
|
||||
pattern = BASE_PATTERN + r"/images/?(?:\?([^#]+))?$"
|
||||
test = (
|
||||
("https://dynasty-scans.com/images?with[]=4930&with[]=5211", {
|
||||
"url": "22cf0fb64e12b29e79b0a3d26666086a48f9916a",
|
||||
"keyword": "11cbc555a15528d25567977b8808e10369c4c3ee",
|
||||
}),
|
||||
("https://dynasty-scans.com/images", {
|
||||
"range": "1",
|
||||
"count": 1,
|
||||
}),
|
||||
)
|
||||
example = "https://dynasty-scans.com/images?QUERY"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
@ -150,10 +127,7 @@ class DynastyscansImageExtractor(DynastyscansSearchExtractor):
|
||||
"""Extractor for individual images on dynasty-scans.com"""
|
||||
subcategory = "image"
|
||||
pattern = BASE_PATTERN + r"/images/(\d+)"
|
||||
test = ("https://dynasty-scans.com/images/1245", {
|
||||
"url": "15e54bd94148a07ed037f387d046c27befa043b2",
|
||||
"keyword": "0d8976c2d6fbc9ed6aa712642631b96e456dc37f",
|
||||
})
|
||||
example = "https://dynasty-scans.com/images/12345"
|
||||
|
||||
def images(self):
|
||||
return (self.query,)
|
||||
|
@ -84,48 +84,13 @@ BASE_PATTERN = E621Extractor.update({
|
||||
class E621TagExtractor(E621Extractor, danbooru.DanbooruTagExtractor):
|
||||
"""Extractor for e621 posts from tag searches"""
|
||||
pattern = BASE_PATTERN + r"/posts?(?:\?.*?tags=|/index/\d+/)([^&#]+)"
|
||||
test = (
|
||||
("https://e621.net/posts?tags=anry", {
|
||||
"url": "8021e5ea28d47c474c1ffc9bd44863c4d45700ba",
|
||||
"content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
|
||||
}),
|
||||
("https://e621.net/post/index/1/anry"),
|
||||
("https://e621.net/post?tags=anry"),
|
||||
|
||||
("https://e926.net/posts?tags=anry", {
|
||||
"url": "12198b275c62ffe2de67cca676c8e64de80c425d",
|
||||
"content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58",
|
||||
}),
|
||||
("https://e926.net/post/index/1/anry"),
|
||||
("https://e926.net/post?tags=anry"),
|
||||
|
||||
("https://e6ai.net/posts?tags=anry"),
|
||||
("https://e6ai.net/post/index/1/anry"),
|
||||
("https://e6ai.net/post?tags=anry"),
|
||||
)
|
||||
example = "https://e621.net/posts?tags=TAG"
|
||||
|
||||
|
||||
class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
|
||||
"""Extractor for e621 pools"""
|
||||
pattern = BASE_PATTERN + r"/pool(?:s|/show)/(\d+)"
|
||||
test = (
|
||||
("https://e621.net/pools/73", {
|
||||
"url": "1bd09a72715286a79eea3b7f09f51b3493eb579a",
|
||||
"content": "91abe5d5334425d9787811d7f06d34c77974cd22",
|
||||
}),
|
||||
("https://e621.net/pool/show/73"),
|
||||
|
||||
("https://e926.net/pools/73", {
|
||||
"url": "6936f1b6a18c5c25bee7cad700088dbc2503481b",
|
||||
"content": "91abe5d5334425d9787811d7f06d34c77974cd22",
|
||||
}),
|
||||
("https://e926.net/pool/show/73"),
|
||||
|
||||
("https://e6ai.net/pools/3", {
|
||||
"url": "a6d1ad67a3fa9b9f73731d34d5f6f26f7e85855f",
|
||||
}),
|
||||
("https://e6ai.net/pool/show/3"),
|
||||
)
|
||||
example = "https://e621.net/pools/12345"
|
||||
|
||||
def posts(self):
|
||||
self.log.info("Fetching posts of pool %s", self.pool_id)
|
||||
@ -151,67 +116,7 @@ class E621PoolExtractor(E621Extractor, danbooru.DanbooruPoolExtractor):
|
||||
class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
|
||||
"""Extractor for single e621 posts"""
|
||||
pattern = BASE_PATTERN + r"/post(?:s|/show)/(\d+)"
|
||||
test = (
|
||||
("https://e621.net/posts/535", {
|
||||
"url": "f7f78b44c9b88f8f09caac080adc8d6d9fdaa529",
|
||||
"content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
|
||||
"keyword": {"date": "dt:2007-02-17 19:02:32"},
|
||||
}),
|
||||
("https://e621.net/posts/3181052", {
|
||||
"options": (("metadata", "notes,pools"),),
|
||||
"pattern": r"https://static\d\.e621\.net/data/c6/8c"
|
||||
r"/c68cca0643890b615f75fb2719589bff\.png",
|
||||
"keyword": {
|
||||
"notes": [
|
||||
{
|
||||
"body": "Little Legends 2",
|
||||
"created_at": "2022-05-16T13:58:38.877-04:00",
|
||||
"creator_id": 517450,
|
||||
"creator_name": "EeveeCuddler69",
|
||||
"height": 475,
|
||||
"id": 321296,
|
||||
"is_active": True,
|
||||
"post_id": 3181052,
|
||||
"updated_at": "2022-05-16T13:59:02.050-04:00",
|
||||
"version": 3,
|
||||
"width": 809,
|
||||
"x": 83,
|
||||
"y": 117,
|
||||
},
|
||||
],
|
||||
"pools": [
|
||||
{
|
||||
"category": "series",
|
||||
"created_at": "2022-02-17T00:29:22.669-05:00",
|
||||
"creator_id": 1077440,
|
||||
"creator_name": "Yeetus90",
|
||||
"description": "* \"Little Legends\":/pools/27971\r\n"
|
||||
"* Little Legends 2\r\n"
|
||||
"* \"Little Legends 3\":/pools/27481",
|
||||
"id": 27492,
|
||||
"is_active": False,
|
||||
"name": "Little Legends 2",
|
||||
"post_count": 39,
|
||||
"post_ids": list,
|
||||
"updated_at": "2022-03-27T06:30:03.382-04:00"
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
("https://e621.net/post/show/535"),
|
||||
|
||||
("https://e926.net/posts/535", {
|
||||
"url": "17aec8ebd8fab098d321adcb62a2db59dab1f4bf",
|
||||
"content": "66f46e96a893fba8e694c4e049b23c2acc9af462",
|
||||
}),
|
||||
("https://e926.net/post/show/535"),
|
||||
|
||||
("https://e6ai.net/posts/23", {
|
||||
"url": "3c85a806b3d9eec861948af421fe0e8ad6b8f881",
|
||||
"content": "a05a484e4eb64637d56d751c02e659b4bc8ea5d5",
|
||||
}),
|
||||
("https://e6ai.net/post/show/23"),
|
||||
)
|
||||
example = "https://e621.net/posts/12345"
|
||||
|
||||
def posts(self):
|
||||
url = "{}/posts/{}.json".format(self.root, self.post_id)
|
||||
@ -221,23 +126,7 @@ class E621PostExtractor(E621Extractor, danbooru.DanbooruPostExtractor):
|
||||
class E621PopularExtractor(E621Extractor, danbooru.DanbooruPopularExtractor):
|
||||
"""Extractor for popular images from e621"""
|
||||
pattern = BASE_PATTERN + r"/explore/posts/popular(?:\?([^#]*))?"
|
||||
test = (
|
||||
("https://e621.net/explore/posts/popular"),
|
||||
(("https://e621.net/explore/posts/popular"
|
||||
"?date=2019-06-01&scale=month"), {
|
||||
"pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
|
||||
"count": ">= 70",
|
||||
}),
|
||||
|
||||
("https://e926.net/explore/posts/popular"),
|
||||
(("https://e926.net/explore/posts/popular"
|
||||
"?date=2019-06-01&scale=month"), {
|
||||
"pattern": r"https://static\d.e926.net/data/../../[0-9a-f]+",
|
||||
"count": ">= 70",
|
||||
}),
|
||||
|
||||
("https://e6ai.net/explore/posts/popular"),
|
||||
)
|
||||
example = "https://e621.net/explore/posts/popular"
|
||||
|
||||
def posts(self):
|
||||
return self._pagination("/popular.json", self.params)
|
||||
@ -249,21 +138,7 @@ class E621FavoriteExtractor(E621Extractor):
|
||||
directory_fmt = ("{category}", "Favorites", "{user_id}")
|
||||
archive_fmt = "f_{user_id}_{id}"
|
||||
pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
|
||||
test = (
|
||||
("https://e621.net/favorites"),
|
||||
("https://e621.net/favorites?page=2&user_id=53275", {
|
||||
"pattern": r"https://static\d.e621.net/data/../../[0-9a-f]+",
|
||||
"count": "> 260",
|
||||
}),
|
||||
|
||||
("https://e926.net/favorites"),
|
||||
("https://e926.net/favorites?page=2&user_id=53275", {
|
||||
"pattern": r"https://static\d.e926.net/data/../../[0-9a-f]+",
|
||||
"count": "> 260",
|
||||
}),
|
||||
|
||||
("https://e6ai.net/favorites"),
|
||||
)
|
||||
example = "https://e621.net/favorites"
|
||||
|
||||
def __init__(self, match):
|
||||
E621Extractor.__init__(self, match)
|
||||
|
@ -91,29 +91,7 @@ class EromeAlbumExtractor(EromeExtractor):
|
||||
"""Extractor for albums on erome.com"""
|
||||
subcategory = "album"
|
||||
pattern = BASE_PATTERN + r"/a/(\w+)"
|
||||
test = (
|
||||
("https://www.erome.com/a/NQgdlWvk", {
|
||||
"pattern": r"https://v\d+\.erome\.com/\d+"
|
||||
r"/NQgdlWvk/j7jlzmYB_480p\.mp4",
|
||||
"count": 1,
|
||||
"keyword": {
|
||||
"album_id": "NQgdlWvk",
|
||||
"num": 1,
|
||||
"title": "porn",
|
||||
"user": "yYgWBZw8o8qsMzM",
|
||||
},
|
||||
}),
|
||||
("https://www.erome.com/a/TdbZ4ogi", {
|
||||
"pattern": r"https://s\d+\.erome\.com/\d+/TdbZ4ogi/\w+",
|
||||
"count": 6,
|
||||
"keyword": {
|
||||
"album_id": "TdbZ4ogi",
|
||||
"num": int,
|
||||
"title": "82e78cfbb461ad87198f927fcb1fda9a1efac9ff.",
|
||||
"user": "yYgWBZw8o8qsMzM",
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://www.erome.com/a/ID"
|
||||
|
||||
def albums(self):
|
||||
return (self.item,)
|
||||
@ -122,10 +100,7 @@ class EromeAlbumExtractor(EromeExtractor):
|
||||
class EromeUserExtractor(EromeExtractor):
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/(?!a/|search\?)([^/?#]+)"
|
||||
test = ("https://www.erome.com/yYgWBZw8o8qsMzM", {
|
||||
"range": "1-25",
|
||||
"count": 25,
|
||||
})
|
||||
example = "https://www.erome.com/USER"
|
||||
|
||||
def albums(self):
|
||||
url = "{}/{}".format(self.root, self.item)
|
||||
@ -135,10 +110,7 @@ class EromeUserExtractor(EromeExtractor):
|
||||
class EromeSearchExtractor(EromeExtractor):
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"/search\?q=([^&#]+)"
|
||||
test = ("https://www.erome.com/search?q=cute", {
|
||||
"range": "1-25",
|
||||
"count": 25,
|
||||
})
|
||||
example = "https://www.erome.com/search?q=QUERY"
|
||||
|
||||
def albums(self):
|
||||
url = self.root + "/search"
|
||||
|
@ -45,7 +45,6 @@ class ExhentaiExtractor(Extractor):
|
||||
|
||||
if self.version != "ex":
|
||||
self.cookies.set("nw", "1", domain=self.cookies_domain)
|
||||
self.session.headers["Referer"] = self.root + "/"
|
||||
self.original = self.config("original", True)
|
||||
|
||||
limits = self.config("limits", False)
|
||||
@ -109,61 +108,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
||||
pattern = (BASE_PATTERN +
|
||||
r"(?:/g/(\d+)/([\da-f]{10})"
|
||||
r"|/s/([\da-f]{10})/(\d+)-(\d+))")
|
||||
test = (
|
||||
("https://exhentai.org/g/1200119/d55c44d3d0/", {
|
||||
"options": (("original", False),),
|
||||
"keyword": {
|
||||
"cost": int,
|
||||
"date": "dt:2018-03-18 20:14:00",
|
||||
"eh_category": "Non-H",
|
||||
"expunged": False,
|
||||
"favorites": r"re:^[12]\d$",
|
||||
"filecount": "4",
|
||||
"filesize": 1488978,
|
||||
"gid": 1200119,
|
||||
"height": int,
|
||||
"image_token": "re:[0-9a-f]{10}",
|
||||
"lang": "ja",
|
||||
"language": "Japanese",
|
||||
"parent": "",
|
||||
"rating": r"re:\d\.\d+",
|
||||
"size": int,
|
||||
"tags": [
|
||||
"parody:komi-san wa komyushou desu.",
|
||||
"character:shouko komi",
|
||||
"group:seventh lowlife",
|
||||
"other:sample",
|
||||
],
|
||||
"thumb": "https://exhentai.org/t/ce/0a/ce0a5bcb583229a9b07c0f8"
|
||||
"3bcb1630ab1350640-624622-736-1036-jpg_250.jpg",
|
||||
"title": "C93 [Seventh_Lowlife] Komi-san ha Tokidoki Daitan de"
|
||||
"su (Komi-san wa Komyushou desu) [Sample]",
|
||||
"title_jpn": "(C93) [Comiketjack (わ!)] 古見さんは、時々大胆"
|
||||
"です。 (古見さんは、コミュ症です。) [見本]",
|
||||
"token": "d55c44d3d0",
|
||||
"torrentcount": "0",
|
||||
"uploader": "klorpa",
|
||||
"width": int,
|
||||
},
|
||||
"content": ("2c68cff8a7ca540a78c36fdbf5fbae0260484f87",
|
||||
"e9891a4c017ed0bb734cd1efba5cd03f594d31ff"),
|
||||
}),
|
||||
("https://exhentai.org/g/960461/4f0e369d82/", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
("http://exhentai.org/g/962698/7f02358e00/", {
|
||||
"exception": exception.AuthorizationError,
|
||||
}),
|
||||
("https://exhentai.org/s/f68367b4c8/1200119-3", {
|
||||
"options": (("original", False),),
|
||||
"count": 2,
|
||||
}),
|
||||
("https://e-hentai.org/s/f68367b4c8/1200119-3", {
|
||||
"options": (("original", False),),
|
||||
"count": 2,
|
||||
}),
|
||||
("https://g.e-hentai.org/g/1200119/d55c44d3d0/"),
|
||||
)
|
||||
example = "https://e-hentai.org/g/12345/67890abcde/"
|
||||
|
||||
def __init__(self, match):
|
||||
ExhentaiExtractor.__init__(self, match)
|
||||
@ -179,6 +124,20 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
||||
if source == "hitomi":
|
||||
self.items = self._items_hitomi
|
||||
|
||||
def favorite(self, slot="0"):
|
||||
url = self.root + "/gallerypopups.php"
|
||||
params = {
|
||||
"gid": self.gallery_id,
|
||||
"t" : self.gallery_token,
|
||||
"act": "addfav",
|
||||
}
|
||||
data = {
|
||||
"favcat" : slot,
|
||||
"apply" : "Apply Changes",
|
||||
"update" : "1",
|
||||
}
|
||||
self.request(url, method="POST", params=params, data=data)
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
|
||||
@ -223,6 +182,10 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
||||
data["_http_validate"] = None
|
||||
yield Message.Url, url, data
|
||||
|
||||
fav = self.config("fav")
|
||||
if fav is not None:
|
||||
self.favorite(fav)
|
||||
|
||||
def _items_hitomi(self):
|
||||
if self.config("metadata", False):
|
||||
data = self.metadata_from_api()
|
||||
@ -463,26 +426,10 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
|
||||
"""Extractor for exhentai search results"""
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"/(?:\?([^#]*)|tag/([^/?#]+))"
|
||||
test = (
|
||||
("https://e-hentai.org/?f_search=touhou"),
|
||||
("https://exhentai.org/?f_cats=767&f_search=touhou"),
|
||||
("https://exhentai.org/tag/parody:touhou+project"),
|
||||
(("https://exhentai.org/?f_doujinshi=0&f_manga=0&f_artistcg=0"
|
||||
"&f_gamecg=0&f_western=0&f_non-h=1&f_imageset=0&f_cosplay=0"
|
||||
"&f_asianporn=0&f_misc=0&f_search=touhou&f_apply=Apply+Filter"), {
|
||||
"pattern": ExhentaiGalleryExtractor.pattern,
|
||||
"range": "1-30",
|
||||
"count": 30,
|
||||
"keyword": {
|
||||
"gallery_id": int,
|
||||
"gallery_token": r"re:^[0-9a-f]{10}$"
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://e-hentai.org/?f_search=QUERY"
|
||||
|
||||
def __init__(self, match):
|
||||
ExhentaiExtractor.__init__(self, match)
|
||||
self.search_url = self.root
|
||||
|
||||
_, query, tag = match.groups()
|
||||
if tag:
|
||||
@ -497,6 +444,9 @@ class ExhentaiSearchExtractor(ExhentaiExtractor):
|
||||
if "next" not in self.params:
|
||||
self.params["page"] = text.parse_int(self.params.get("page"))
|
||||
|
||||
def _init(self):
|
||||
self.search_url = self.root
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
data = {"_extractor": ExhentaiGalleryExtractor}
|
||||
@ -533,15 +483,7 @@ class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
|
||||
"""Extractor for favorited exhentai galleries"""
|
||||
subcategory = "favorite"
|
||||
pattern = BASE_PATTERN + r"/favorites\.php(?:\?([^#]*)())?"
|
||||
test = (
|
||||
("https://e-hentai.org/favorites.php", {
|
||||
"count": 1,
|
||||
"pattern": r"https?://e-hentai\.org/g/1200119/d55c44d3d0"
|
||||
}),
|
||||
("https://exhentai.org/favorites.php?favcat=1&f_search=touhou"
|
||||
"&f_apply=Search+Favorites"),
|
||||
)
|
||||
example = "https://e-hentai.org/favorites.php"
|
||||
|
||||
def __init__(self, match):
|
||||
ExhentaiSearchExtractor.__init__(self, match)
|
||||
def _init(self):
|
||||
self.search_url = self.root + "/favorites.php"
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2017-2019 Mike Fährmann
|
||||
# Copyright 2017-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -13,24 +13,11 @@ from .. import text, util
|
||||
|
||||
|
||||
class FallenangelsChapterExtractor(ChapterExtractor):
|
||||
"""Extractor for manga-chapters from fascans.com"""
|
||||
"""Extractor for manga chapters from fascans.com"""
|
||||
category = "fallenangels"
|
||||
pattern = (r"(?:https?://)?(manga|truyen)\.fascans\.com"
|
||||
r"/manga/([^/?#]+)/([^/?#]+)")
|
||||
test = (
|
||||
("https://manga.fascans.com/manga/chronos-ruler/20/1", {
|
||||
"url": "4604a7914566cc2da0ff789aa178e2d1c8c241e3",
|
||||
"keyword": "2dfcc50020e32cd207be88e2a8fac0933e36bdfb",
|
||||
}),
|
||||
("http://truyen.fascans.com/manga/hungry-marie/8", {
|
||||
"url": "1f923d9cb337d5e7bbf4323719881794a951c6ae",
|
||||
"keyword": "2bdb7334c0e3eceb9946ffd3132df679b4a94f6a",
|
||||
}),
|
||||
("http://manga.fascans.com/manga/rakudai-kishi-no-eiyuutan/19.5", {
|
||||
"url": "273f6863966c83ea79ad5846a2866e08067d3f0e",
|
||||
"keyword": "d1065685bfe0054c4ff2a0f20acb089de4cec253",
|
||||
}),
|
||||
)
|
||||
example = "https://manga.fascans.com/manga/NAME/CHAPTER/"
|
||||
|
||||
def __init__(self, match):
|
||||
self.version, self.manga, self.chapter = match.groups()
|
||||
@ -66,16 +53,7 @@ class FallenangelsMangaExtractor(MangaExtractor):
|
||||
chapterclass = FallenangelsChapterExtractor
|
||||
category = "fallenangels"
|
||||
pattern = r"(?:https?://)?((manga|truyen)\.fascans\.com/manga/[^/]+)/?$"
|
||||
test = (
|
||||
("https://manga.fascans.com/manga/chronos-ruler", {
|
||||
"url": "eea07dd50f5bc4903aa09e2cc3e45c7241c9a9c2",
|
||||
"keyword": "c414249525d4c74ad83498b3c59a813557e59d7e",
|
||||
}),
|
||||
("https://truyen.fascans.com/manga/rakudai-kishi-no-eiyuutan", {
|
||||
"url": "51a731a6b82d5eb7a335fbae6b02d06aeb2ab07b",
|
||||
"keyword": "2d2a2a5d9ea5925eb9a47bb13d848967f3af086c",
|
||||
}),
|
||||
)
|
||||
example = "https://manga.fascans.com/manga/NAME"
|
||||
|
||||
def __init__(self, match):
|
||||
url = "https://" + match.group(1)
|
||||
|
@ -10,7 +10,6 @@ from .common import Extractor, Message
|
||||
from .. import text
|
||||
import re
|
||||
|
||||
|
||||
BASE_PATTERN = (
|
||||
r"(?:https?://)?(?:"
|
||||
r"(?!www\.)([\w-]+)\.fanbox\.cc|"
|
||||
@ -30,12 +29,12 @@ class FanboxExtractor(Extractor):
|
||||
def _init(self):
|
||||
self.embeds = self.config("embeds", True)
|
||||
|
||||
def items(self):
|
||||
if self._warning:
|
||||
if not self.cookies_check(("FANBOXSESSID",)):
|
||||
self.log.warning("no 'FANBOXSESSID' cookie set")
|
||||
FanboxExtractor._warning = False
|
||||
|
||||
def items(self):
|
||||
for content_body, post in self.posts():
|
||||
yield Message.Directory, post
|
||||
yield from self._get_urls_from_post(content_body, post)
|
||||
@ -243,20 +242,7 @@ class FanboxCreatorExtractor(FanboxExtractor):
|
||||
"""Extractor for a Fanbox creator's works"""
|
||||
subcategory = "creator"
|
||||
pattern = BASE_PATTERN + r"(?:/posts)?/?$"
|
||||
test = (
|
||||
("https://xub.fanbox.cc", {
|
||||
"range": "1-15",
|
||||
"count": ">= 15",
|
||||
"keyword": {
|
||||
"creatorId" : "xub",
|
||||
"tags" : list,
|
||||
"title" : str,
|
||||
},
|
||||
}),
|
||||
("https://xub.fanbox.cc/posts"),
|
||||
("https://www.fanbox.cc/@xub/"),
|
||||
("https://www.fanbox.cc/@xub/posts"),
|
||||
)
|
||||
example = "https://USER.fanbox.cc/"
|
||||
|
||||
def __init__(self, match):
|
||||
FanboxExtractor.__init__(self, match)
|
||||
@ -271,55 +257,7 @@ class FanboxPostExtractor(FanboxExtractor):
|
||||
"""Extractor for media from a single Fanbox post"""
|
||||
subcategory = "post"
|
||||
pattern = BASE_PATTERN + r"/posts/(\d+)"
|
||||
test = (
|
||||
("https://www.fanbox.cc/@xub/posts/1910054", {
|
||||
"count": 3,
|
||||
"keyword": {
|
||||
"title": "えま★おうがすと",
|
||||
"tags": list,
|
||||
"hasAdultContent": True,
|
||||
"isCoverImage": False
|
||||
},
|
||||
}),
|
||||
# entry post type, image embedded in html of the post
|
||||
("https://nekoworks.fanbox.cc/posts/915", {
|
||||
"count": 2,
|
||||
"keyword": {
|
||||
"title": "【SAYORI FAN CLUB】お届け内容",
|
||||
"tags": list,
|
||||
"html": str,
|
||||
"hasAdultContent": True
|
||||
},
|
||||
}),
|
||||
# article post type, imageMap, 2 twitter embeds, fanbox embed
|
||||
("https://steelwire.fanbox.cc/posts/285502", {
|
||||
"options": (("embeds", True),),
|
||||
"count": 10,
|
||||
"keyword": {
|
||||
"title": "イラスト+SS|義足の炭鉱少年が義足を見せてくれるだけ 【全体公開版】",
|
||||
"tags": list,
|
||||
"articleBody": dict,
|
||||
"hasAdultContent": True
|
||||
},
|
||||
}),
|
||||
# 'content' metadata (#3020)
|
||||
("https://www.fanbox.cc/@official-en/posts/4326303", {
|
||||
"keyword": {
|
||||
"content": r"re:(?s)^Greetings from FANBOX.\n \nAs of Monday, "
|
||||
r"September 5th, 2022, we are happy to announce "
|
||||
r"the start of the FANBOX hashtag event "
|
||||
r"#MySetupTour ! \nAbout the event\nTo join this "
|
||||
r"event .+ \nPlease check this page for further "
|
||||
r"details regarding the Privacy & Terms.\n"
|
||||
r"https://fanbox.pixiv.help/.+/10184952456601\n\n\n"
|
||||
r"Thank you for your continued support of FANBOX.$",
|
||||
},
|
||||
}),
|
||||
# imageMap file order (#2718)
|
||||
("https://mochirong.fanbox.cc/posts/3746116", {
|
||||
"url": "c92ddd06f2efc4a5fe30ec67e21544f79a5c4062",
|
||||
}),
|
||||
)
|
||||
example = "https://USER.fanbox.cc/posts/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
FanboxExtractor.__init__(self, match)
|
||||
@ -334,9 +272,7 @@ class FanboxRedirectExtractor(Extractor):
|
||||
category = "fanbox"
|
||||
subcategory = "redirect"
|
||||
pattern = r"(?:https?://)?(?:www\.)?pixiv\.net/fanbox/creator/(\d+)"
|
||||
test = ("https://www.pixiv.net/fanbox/creator/52336352", {
|
||||
"pattern": FanboxCreatorExtractor.pattern,
|
||||
})
|
||||
example = "https://www.pixiv.net/fanbox/creator/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -7,7 +7,7 @@
|
||||
"""Extractors for https://fanleaks.club/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
from .. import text
|
||||
|
||||
|
||||
class FanleaksExtractor(Extractor):
|
||||
@ -36,34 +36,10 @@ class FanleaksExtractor(Extractor):
|
||||
|
||||
|
||||
class FanleaksPostExtractor(FanleaksExtractor):
|
||||
"""Extractor for individual posts on fanleak.club"""
|
||||
"""Extractor for individual posts on fanleaks.club"""
|
||||
subcategory = "post"
|
||||
pattern = r"(?:https?://)?(?:www\.)?fanleaks\.club/([^/?#]+)/(\d+)"
|
||||
test = (
|
||||
("https://fanleaks.club/selti/880", {
|
||||
"pattern": (r"https://fanleaks\.club//models"
|
||||
r"/selti/images/selti_0880\.jpg"),
|
||||
"keyword": {
|
||||
"model_id": "selti",
|
||||
"model" : "Selti",
|
||||
"id" : 880,
|
||||
"type" : "photo",
|
||||
},
|
||||
}),
|
||||
("https://fanleaks.club/daisy-keech/1038", {
|
||||
"pattern": (r"https://fanleaks\.club//models"
|
||||
r"/daisy-keech/videos/daisy-keech_1038\.mp4"),
|
||||
"keyword": {
|
||||
"model_id": "daisy-keech",
|
||||
"model" : "Daisy Keech",
|
||||
"id" : 1038,
|
||||
"type" : "video",
|
||||
},
|
||||
}),
|
||||
("https://fanleaks.club/hannahowo/000", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
)
|
||||
example = "https://fanleaks.club/MODEL/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
FanleaksExtractor.__init__(self, match)
|
||||
@ -79,22 +55,7 @@ class FanleaksModelExtractor(FanleaksExtractor):
|
||||
subcategory = "model"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?fanleaks\.club"
|
||||
r"/(?!latest/?$)([^/?#]+)/?$")
|
||||
test = (
|
||||
("https://fanleaks.club/hannahowo", {
|
||||
"pattern": (r"https://fanleaks\.club//models"
|
||||
r"/hannahowo/(images|videos)/hannahowo_\d+\.\w+"),
|
||||
"range" : "1-100",
|
||||
"count" : 100,
|
||||
}),
|
||||
("https://fanleaks.club/belle-delphine", {
|
||||
"pattern": (r"https://fanleaks\.club//models"
|
||||
r"/belle-delphine/(images|videos)"
|
||||
r"/belle-delphine_\d+\.\w+"),
|
||||
"range" : "1-100",
|
||||
"count" : 100,
|
||||
}),
|
||||
("https://fanleaks.club/daisy-keech"),
|
||||
)
|
||||
example = "https://fanleaks.club/MODEL"
|
||||
|
||||
def items(self):
|
||||
page_num = 1
|
||||
@ -102,8 +63,7 @@ class FanleaksModelExtractor(FanleaksExtractor):
|
||||
self.root + "/" + self.model_id, notfound="model").text
|
||||
data = {
|
||||
"model_id": self.model_id,
|
||||
"model" : text.unescape(
|
||||
text.extr(page, 'mt-4">', "</h1>")),
|
||||
"model" : text.unescape(text.extr(page, 'mt-4">', "</h1>")),
|
||||
"type" : "photo",
|
||||
}
|
||||
page_url = text.extr(page, "url: '", "'")
|
||||
|
@ -22,7 +22,6 @@ class FantiaExtractor(Extractor):
|
||||
def _init(self):
|
||||
self.headers = {
|
||||
"Accept" : "application/json, text/plain, */*",
|
||||
"Referer": self.root,
|
||||
"X-Requested-With": "XMLHttpRequest",
|
||||
}
|
||||
self._empty_plan = {
|
||||
@ -65,11 +64,9 @@ class FantiaExtractor(Extractor):
|
||||
|
||||
def _pagination(self, url):
|
||||
params = {"page": 1}
|
||||
headers = self.headers.copy()
|
||||
del headers["X-Requested-With"]
|
||||
|
||||
while True:
|
||||
page = self.request(url, params=params, headers=headers).text
|
||||
page = self.request(url, params=params).text
|
||||
self._csrf_token(page)
|
||||
|
||||
post_id = None
|
||||
@ -173,17 +170,7 @@ class FantiaCreatorExtractor(FantiaExtractor):
|
||||
"""Extractor for a Fantia creator's works"""
|
||||
subcategory = "creator"
|
||||
pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/fanclubs/(\d+)"
|
||||
test = (
|
||||
("https://fantia.jp/fanclubs/6939", {
|
||||
"range": "1-25",
|
||||
"count": ">= 25",
|
||||
"keyword": {
|
||||
"fanclub_user_id" : 52152,
|
||||
"tags" : list,
|
||||
"title" : str,
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://fantia.jp/fanclubs/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
FantiaExtractor.__init__(self, match)
|
||||
@ -198,53 +185,7 @@ class FantiaPostExtractor(FantiaExtractor):
|
||||
"""Extractor for media from a single Fantia post"""
|
||||
subcategory = "post"
|
||||
pattern = r"(?:https?://)?(?:www\.)?fantia\.jp/posts/(\d+)"
|
||||
test = (
|
||||
("https://fantia.jp/posts/1166373", {
|
||||
"pattern": r"https://("
|
||||
r"c\.fantia\.jp/uploads/post/file/1166373/|"
|
||||
r"cc\.fantia\.jp/uploads/post_content_photo"
|
||||
r"/file/732549[01]|"
|
||||
r"fantia\.jp/posts/1166373/album_image\?)",
|
||||
"keyword": {
|
||||
"blogpost_text": r"re:^$|"
|
||||
r"This is a test.\n\nThis is a test.\n\n|"
|
||||
r"Link to video:\nhttps://www.youtube.com"
|
||||
r"/watch\?v=5SSdvNcAagI\n\nhtml img from "
|
||||
r"another site:\n\n\n\n\n\n",
|
||||
"comment": "\n\n",
|
||||
"content_category": "re:thumb|blog|photo_gallery",
|
||||
"content_comment": str,
|
||||
"content_filename": "re:|",
|
||||
"content_title": r"re:Test (Blog Content \d+|Image Gallery)"
|
||||
r"|thumb",
|
||||
"date": "dt:2022-03-09 16:46:12",
|
||||
"fanclub_id": 356320,
|
||||
"fanclub_name": "Test Fantia",
|
||||
"fanclub_url": "https://fantia.jp/fanclubs/356320",
|
||||
"fanclub_user_id": 7487131,
|
||||
"fanclub_user_name": "2022/03/08 15:13:52の名無し",
|
||||
"file_url": str,
|
||||
"filename": str,
|
||||
"num": int,
|
||||
"plan": dict,
|
||||
"post_id": 1166373,
|
||||
"post_title": "Test Fantia Post",
|
||||
"post_url": "https://fantia.jp/posts/1166373",
|
||||
"posted_at": "Thu, 10 Mar 2022 01:46:12 +0900",
|
||||
"rating": "general",
|
||||
"tags": [],
|
||||
},
|
||||
}),
|
||||
("https://fantia.jp/posts/508363", {
|
||||
"count": 6,
|
||||
"keyword": {
|
||||
"post_title": "zunda逆バニーでおしりコッショリ",
|
||||
"tags": list,
|
||||
"rating": "adult",
|
||||
"post_id": 508363
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://fantia.jp/posts/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
FantiaExtractor.__init__(self, match)
|
||||
|
@ -14,25 +14,13 @@ class FapachiPostExtractor(Extractor):
|
||||
"""Extractor for individual posts on fapachi.com"""
|
||||
category = "fapachi"
|
||||
subcategory = "post"
|
||||
root = "https://fapachi.com"
|
||||
directory_fmt = ("{category}", "{user}")
|
||||
filename_fmt = "{user}_{id}.{extension}"
|
||||
archive_fmt = "{user}_{id}"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?fapachi\.com"
|
||||
r"/(?!search/)([^/?#]+)/media/(\d+)")
|
||||
root = "https://fapachi.com"
|
||||
test = (
|
||||
# NSFW
|
||||
("https://fapachi.com/sonson/media/0082", {
|
||||
"pattern": (r"https://fapachi\.com/models/s/o/"
|
||||
r"sonson/1/full/sonson_0082\.jpeg"),
|
||||
"keyword": {
|
||||
"user": "sonson",
|
||||
"id" : "0082",
|
||||
},
|
||||
}),
|
||||
# NSFW
|
||||
("https://fapachi.com/ferxiita/media/0159"),
|
||||
)
|
||||
example = "https://fapachi.com/MODEL/media/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
@ -54,17 +42,10 @@ class FapachiUserExtractor(Extractor):
|
||||
"""Extractor for all posts from a fapachi user"""
|
||||
category = "fapachi"
|
||||
subcategory = "user"
|
||||
root = "https://fapachi.com"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?fapachi\.com"
|
||||
r"/(?!search(?:/|$))([^/?#]+)(?:/page/(\d+))?$")
|
||||
root = "https://fapachi.com"
|
||||
test = (
|
||||
("https://fapachi.com/sonson", {
|
||||
"pattern": FapachiPostExtractor.pattern,
|
||||
"range" : "1-50",
|
||||
"count" : 50,
|
||||
}),
|
||||
("https://fapachi.com/ferxiita/page/3"),
|
||||
)
|
||||
example = "https://fapachi.com/MODEL"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -19,32 +19,7 @@ class FapelloPostExtractor(Extractor):
|
||||
archive_fmt = "{type}_{model}_{id}"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
|
||||
r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)")
|
||||
test = (
|
||||
("https://fapello.com/carrykey/530/", {
|
||||
"pattern": (r"https://fapello\.com/content/c/a"
|
||||
r"/carrykey/1000/carrykey_0530\.jpg"),
|
||||
"keyword": {
|
||||
"model": "carrykey",
|
||||
"id" : 530,
|
||||
"type" : "photo",
|
||||
"thumbnail": "",
|
||||
},
|
||||
}),
|
||||
("https://fapello.com/vladislava-661/693/", {
|
||||
"pattern": (r"https://cdn\.fapello\.com/content/v/l"
|
||||
r"/vladislava-661/1000/vladislava-661_0693\.mp4"),
|
||||
"keyword": {
|
||||
"model": "vladislava-661",
|
||||
"id" : 693,
|
||||
"type" : "video",
|
||||
"thumbnail": ("https://fapello.com/content/v/l"
|
||||
"/vladislava-661/1000/vladislava-661_0693.jpg"),
|
||||
},
|
||||
}),
|
||||
("https://fapello.com/carrykey/000/", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
)
|
||||
example = "https://fapello.com/MODEL/12345/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
@ -77,14 +52,7 @@ class FapelloModelExtractor(Extractor):
|
||||
r"/(?!top-(?:likes|followers)|popular_videos"
|
||||
r"|videos|trending|search/?$)"
|
||||
r"([^/?#]+)/?$")
|
||||
test = (
|
||||
("https://fapello.com/hyoon/", {
|
||||
"pattern": FapelloPostExtractor.pattern,
|
||||
"range" : "1-50",
|
||||
"count" : 50,
|
||||
}),
|
||||
("https://fapello.com/kobaebeefboo/"),
|
||||
)
|
||||
example = "https://fapello.com/model/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
@ -112,22 +80,7 @@ class FapelloPathExtractor(Extractor):
|
||||
pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
|
||||
r"/(?!search/?$)(top-(?:likes|followers)|videos|trending"
|
||||
r"|popular_videos/[^/?#]+)/?$")
|
||||
test = (
|
||||
("https://fapello.com/top-likes/", {
|
||||
"pattern": FapelloModelExtractor.pattern,
|
||||
"range" : "1-10",
|
||||
"count" : 10,
|
||||
}),
|
||||
("https://fapello.com/videos/", {
|
||||
"pattern": FapelloPostExtractor.pattern,
|
||||
"range" : "1-10",
|
||||
"count" : 10,
|
||||
}),
|
||||
("https://fapello.com/top-followers/"),
|
||||
("https://fapello.com/trending/"),
|
||||
("https://fapello.com/popular_videos/twelve_hours/"),
|
||||
("https://fapello.com/popular_videos/week/"),
|
||||
)
|
||||
example = "https://fapello.com/trending/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2017-2022 Mike Fährmann
|
||||
# Copyright 2017-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -64,42 +64,7 @@ class FlickrImageExtractor(FlickrExtractor):
|
||||
r"(?:(?:www\.|secure\.|m\.)?flickr\.com/photos/[^/?#]+/"
|
||||
r"|[\w-]+\.static\.?flickr\.com/(?:\d+/)+)(\d+)"
|
||||
r"|flic\.kr/p/([A-Za-z1-9]+))")
|
||||
test = (
|
||||
("https://www.flickr.com/photos/departingyyz/16089302239", {
|
||||
"pattern": pattern,
|
||||
"content": ("3133006c6d657fe54cf7d4c46b82abbcb0efaf9f",
|
||||
"0821a28ee46386e85b02b67cf2720063440a228c"),
|
||||
"keyword": {
|
||||
"comments": int,
|
||||
"description": str,
|
||||
"extension": "jpg",
|
||||
"filename": "16089302239_de18cd8017_b",
|
||||
"id": 16089302239,
|
||||
"height": 683,
|
||||
"label": "Large",
|
||||
"media": "photo",
|
||||
"url": str,
|
||||
"views": int,
|
||||
"width": 1024,
|
||||
},
|
||||
}),
|
||||
("https://secure.flickr.com/photos/departingyyz/16089302239"),
|
||||
("https://m.flickr.com/photos/departingyyz/16089302239"),
|
||||
("https://flickr.com/photos/departingyyz/16089302239"),
|
||||
|
||||
("https://www.flickr.com/photos/145617051@N08/46733161535", {
|
||||
"count": 1,
|
||||
"keyword": {"media": "video"},
|
||||
}),
|
||||
("http://c2.staticflickr.com/2/1475/24531000464_9a7503ae68_b.jpg", {
|
||||
"pattern": pattern}),
|
||||
("https://farm2.static.flickr.com/1035/1188352415_cb139831d0.jpg", {
|
||||
"pattern": pattern}),
|
||||
("https://flic.kr/p/FPVo9U", {
|
||||
"pattern": pattern}),
|
||||
("https://www.flickr.com/photos/zzz/16089302238", {
|
||||
"exception": exception.NotFoundError}),
|
||||
)
|
||||
example = "https://www.flickr.com/photos/USER/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
FlickrExtractor.__init__(self, match)
|
||||
@ -145,18 +110,7 @@ class FlickrAlbumExtractor(FlickrExtractor):
|
||||
"Albums", "{album[id]} {album[title]}")
|
||||
archive_fmt = "a_{album[id]}_{id}"
|
||||
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/(?:album|set)s(?:/(\d+))?"
|
||||
test = (
|
||||
(("https://www.flickr.com/photos/shona_s/albums/72157633471741607"), {
|
||||
"pattern": FlickrImageExtractor.pattern,
|
||||
"count": 6,
|
||||
}),
|
||||
("https://www.flickr.com/photos/shona_s/albums", {
|
||||
"pattern": pattern,
|
||||
"count": 2,
|
||||
}),
|
||||
("https://secure.flickr.com/photos/shona_s/albums"),
|
||||
("https://m.flickr.com/photos/shona_s/albums"),
|
||||
)
|
||||
example = "https://www.flickr.com/photos/USER/albums/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
FlickrExtractor.__init__(self, match)
|
||||
@ -194,11 +148,7 @@ class FlickrGalleryExtractor(FlickrExtractor):
|
||||
"Galleries", "{gallery[gallery_id]} {gallery[title]}")
|
||||
archive_fmt = "g_{gallery[id]}_{id}"
|
||||
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/galleries/(\d+)"
|
||||
test = (("https://www.flickr.com/photos/flickr/"
|
||||
"galleries/72157681572514792/"), {
|
||||
"pattern": FlickrImageExtractor.pattern,
|
||||
"count": ">= 10",
|
||||
})
|
||||
example = "https://www.flickr.com/photos/USER/galleries/12345/"
|
||||
|
||||
def __init__(self, match):
|
||||
FlickrExtractor.__init__(self, match)
|
||||
@ -219,10 +169,7 @@ class FlickrGroupExtractor(FlickrExtractor):
|
||||
directory_fmt = ("{category}", "Groups", "{group[groupname]}")
|
||||
archive_fmt = "G_{group[nsid]}_{id}"
|
||||
pattern = BASE_PATTERN + r"/groups/([^/?#]+)"
|
||||
test = ("https://www.flickr.com/groups/bird_headshots/", {
|
||||
"pattern": FlickrImageExtractor.pattern,
|
||||
"count": "> 150",
|
||||
})
|
||||
example = "https://www.flickr.com/groups/NAME/"
|
||||
|
||||
def metadata(self):
|
||||
self.group = self.api.urls_lookupGroup(self.item_id)
|
||||
@ -237,10 +184,7 @@ class FlickrUserExtractor(FlickrExtractor):
|
||||
subcategory = "user"
|
||||
archive_fmt = "u_{user[nsid]}_{id}"
|
||||
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/?$"
|
||||
test = ("https://www.flickr.com/photos/shona_s/", {
|
||||
"pattern": FlickrImageExtractor.pattern,
|
||||
"count": 28,
|
||||
})
|
||||
example = "https://www.flickr.com/photos/USER/"
|
||||
|
||||
def photos(self):
|
||||
return self.api.people_getPhotos(self.user["nsid"])
|
||||
@ -252,10 +196,7 @@ class FlickrFavoriteExtractor(FlickrExtractor):
|
||||
directory_fmt = ("{category}", "{user[username]}", "Favorites")
|
||||
archive_fmt = "f_{user[nsid]}_{id}"
|
||||
pattern = BASE_PATTERN + r"/photos/([^/?#]+)/favorites"
|
||||
test = ("https://www.flickr.com/photos/shona_s/favorites", {
|
||||
"pattern": FlickrImageExtractor.pattern,
|
||||
"count": 4,
|
||||
})
|
||||
example = "https://www.flickr.com/photos/USER/favorites"
|
||||
|
||||
def photos(self):
|
||||
return self.api.favorites_getList(self.user["nsid"])
|
||||
@ -267,11 +208,7 @@ class FlickrSearchExtractor(FlickrExtractor):
|
||||
directory_fmt = ("{category}", "Search", "{search[text]}")
|
||||
archive_fmt = "s_{search}_{id}"
|
||||
pattern = BASE_PATTERN + r"/search/?\?([^#]+)"
|
||||
test = (
|
||||
("https://flickr.com/search/?text=mountain"),
|
||||
("https://flickr.com/search/?text=tree%20cloud%20house"
|
||||
"&color_codes=4&styles=minimalism"),
|
||||
)
|
||||
example = "https://flickr.com/search/?text=QUERY"
|
||||
|
||||
def __init__(self, match):
|
||||
FlickrExtractor.__init__(self, match)
|
||||
|
@ -25,9 +25,6 @@ class FoolfuukaExtractor(BaseExtractor):
|
||||
if self.category == "b4k":
|
||||
self.remote = self._remote_direct
|
||||
|
||||
def _init(self):
|
||||
self.session.headers["Referer"] = self.root + "/"
|
||||
|
||||
def items(self):
|
||||
yield Message.Directory, self.metadata()
|
||||
for post in self.posts():
|
||||
@ -111,43 +108,7 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
|
||||
directory_fmt = ("{category}", "{board[shortname]}",
|
||||
"{thread_num} {title|comment[:50]}")
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)"
|
||||
test = (
|
||||
("https://archive.4plebs.org/tg/thread/54059290", {
|
||||
"url": "fd823f17b5001442b941fddcd9ec91bafedfbc79",
|
||||
}),
|
||||
("https://archived.moe/gd/thread/309639/", {
|
||||
"url": "fdd533840e2d535abd162c02d6dfadbc12e2dcd8",
|
||||
"content": "c27e2a7be3bc989b5dd859f7789cc854db3f5573",
|
||||
}),
|
||||
("https://archived.moe/a/thread/159767162/", {
|
||||
"url": "ffec05a1a1b906b5ca85992513671c9155ee9e87",
|
||||
}),
|
||||
("https://archiveofsins.com/h/thread/4668813/", {
|
||||
"url": "f612d287087e10a228ef69517cf811539db9a102",
|
||||
"content": "0dd92d0d8a7bf6e2f7d1f5ac8954c1bcf18c22a4",
|
||||
}),
|
||||
("https://arch.b4k.co/meta/thread/196/", {
|
||||
"url": "d309713d2f838797096b3e9cb44fe514a9c9d07a",
|
||||
}),
|
||||
("https://desuarchive.org/a/thread/159542679/", {
|
||||
"url": "e7d624aded15a069194e38dc731ec23217a422fb",
|
||||
}),
|
||||
("https://boards.fireden.net/sci/thread/11264294/", {
|
||||
"url": "61cab625c95584a12a30049d054931d64f8d20aa",
|
||||
}),
|
||||
("https://archive.palanq.win/c/thread/4209598/", {
|
||||
"url": "1f9b5570d228f1f2991c827a6631030bc0e5933c",
|
||||
}),
|
||||
("https://rbt.asia/g/thread/61487650/", {
|
||||
"url": "fadd274b25150a1bdf03a40c58db320fa3b617c4",
|
||||
}),
|
||||
("https://archive.rebeccablacktech.com/g/thread/61487650/", {
|
||||
"url": "fadd274b25150a1bdf03a40c58db320fa3b617c4",
|
||||
}),
|
||||
("https://thebarchive.com/b/thread/739772332/", {
|
||||
"url": "e8b18001307d130d67db31740ce57c8561b5d80c",
|
||||
}),
|
||||
)
|
||||
example = "https://archived.moe/a/thread/12345/"
|
||||
|
||||
def __init__(self, match):
|
||||
FoolfuukaExtractor.__init__(self, match)
|
||||
@ -175,17 +136,7 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
||||
"""Base extractor for FoolFuuka based boards/archives"""
|
||||
subcategory = "board"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
|
||||
test = (
|
||||
("https://archive.4plebs.org/tg/"),
|
||||
("https://archived.moe/gd/"),
|
||||
("https://archiveofsins.com/h/"),
|
||||
("https://arch.b4k.co/meta/"),
|
||||
("https://desuarchive.org/a/"),
|
||||
("https://boards.fireden.net/sci/"),
|
||||
("https://archive.palanq.win/c/"),
|
||||
("https://rbt.asia/g/"),
|
||||
("https://thebarchive.com/b/"),
|
||||
)
|
||||
example = "https://archived.moe/a/"
|
||||
|
||||
def __init__(self, match):
|
||||
FoolfuukaExtractor.__init__(self, match)
|
||||
@ -217,18 +168,8 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
||||
subcategory = "search"
|
||||
directory_fmt = ("{category}", "search", "{search}")
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/search((?:/[^/?#]+/[^/?#]+)+)"
|
||||
example = "https://archived.moe/_/search/text/QUERY/"
|
||||
request_interval = 1.0
|
||||
test = (
|
||||
("https://archive.4plebs.org/_/search/text/test/"),
|
||||
("https://archived.moe/_/search/text/test/"),
|
||||
("https://archiveofsins.com/_/search/text/test/"),
|
||||
("https://archiveofsins.com/_/search/text/test/"),
|
||||
("https://desuarchive.org/_/search/text/test/"),
|
||||
("https://boards.fireden.net/_/search/text/test/"),
|
||||
("https://archive.palanq.win/_/search/text/test/"),
|
||||
("https://rbt.asia/_/search/text/test/"),
|
||||
("https://thebarchive.com/_/search/text/test/"),
|
||||
)
|
||||
|
||||
def __init__(self, match):
|
||||
FoolfuukaExtractor.__init__(self, match)
|
||||
@ -283,17 +224,7 @@ class FoolfuukaGalleryExtractor(FoolfuukaExtractor):
|
||||
subcategory = "gallery"
|
||||
directory_fmt = ("{category}", "{board}", "gallery")
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/gallery(?:/(\d+))?"
|
||||
test = (
|
||||
("https://archive.4plebs.org/tg/gallery/1"),
|
||||
("https://archived.moe/gd/gallery/2"),
|
||||
("https://archiveofsins.com/h/gallery/3"),
|
||||
("https://arch.b4k.co/meta/gallery/"),
|
||||
("https://desuarchive.org/a/gallery/5"),
|
||||
("https://boards.fireden.net/sci/gallery/6"),
|
||||
("https://archive.palanq.win/c/gallery"),
|
||||
("https://rbt.asia/g/gallery/8"),
|
||||
("https://thebarchive.com/b/gallery/9"),
|
||||
)
|
||||
example = "https://archived.moe/a/gallery"
|
||||
|
||||
def __init__(self, match):
|
||||
FoolfuukaExtractor.__init__(self, match)
|
||||
|
@ -53,13 +53,7 @@ class FoolslideChapterExtractor(FoolslideExtractor):
|
||||
"{manga}_c{chapter:>03}{chapter_minor:?//}_{page:>03}.{extension}")
|
||||
archive_fmt = "{id}"
|
||||
pattern = BASE_PATTERN + r"(/read/[^/?#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"
|
||||
test = (
|
||||
(("https://read.powermanga.org"
|
||||
"/read/one_piece_digital_colour_comics/en/0/75/"), {
|
||||
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
|
||||
"keyword": "a60c42f2634b7387899299d411ff494ed0ad6dbe",
|
||||
}),
|
||||
)
|
||||
example = "https://read.powermanga.org/read/MANGA/en/0/123/"
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.gallery_url).text
|
||||
@ -103,23 +97,7 @@ class FoolslideMangaExtractor(FoolslideExtractor):
|
||||
subcategory = "manga"
|
||||
categorytransfer = True
|
||||
pattern = BASE_PATTERN + r"(/series/[^/?#]+)"
|
||||
test = (
|
||||
(("https://read.powermanga.org"
|
||||
"/series/one_piece_digital_colour_comics/"), {
|
||||
"count": ">= 1",
|
||||
"keyword": {
|
||||
"chapter": int,
|
||||
"chapter_minor": str,
|
||||
"chapter_string": str,
|
||||
"group": "PowerManga",
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
"manga": "One Piece Digital Colour Comics",
|
||||
"title": str,
|
||||
"volume": int,
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://read.powermanga.org/series/MANGA/"
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.gallery_url).text
|
||||
|
@ -31,6 +31,7 @@ class FuraffinityExtractor(Extractor):
|
||||
|
||||
def _init(self):
|
||||
self.offset = 0
|
||||
self.external = self.config("external", False)
|
||||
|
||||
if self.config("descriptions") == "html":
|
||||
self._process_description = str.strip
|
||||
@ -41,13 +42,12 @@ class FuraffinityExtractor(Extractor):
|
||||
else:
|
||||
self._new_layout = None
|
||||
|
||||
def items(self):
|
||||
if self._warning:
|
||||
if not self.cookies_check(self.cookies_names):
|
||||
self.log.warning("no 'a' and 'b' session cookies set")
|
||||
FuraffinityExtractor._warning = False
|
||||
|
||||
external = self.config("external", False)
|
||||
def items(self):
|
||||
metadata = self.metadata()
|
||||
for post_id in util.advance(self.posts(), self.offset):
|
||||
post = self._parse_post(post_id)
|
||||
@ -57,7 +57,7 @@ class FuraffinityExtractor(Extractor):
|
||||
yield Message.Directory, post
|
||||
yield Message.Url, post["url"], post
|
||||
|
||||
if external:
|
||||
if self.external:
|
||||
for url in text.extract_iter(
|
||||
post["_description"], 'href="http', '"'):
|
||||
yield Message.Queue, "http" + url, post
|
||||
@ -219,12 +219,7 @@ class FuraffinityGalleryExtractor(FuraffinityExtractor):
|
||||
"""Extractor for a furaffinity user's gallery"""
|
||||
subcategory = "gallery"
|
||||
pattern = BASE_PATTERN + r"/gallery/([^/?#]+)"
|
||||
test = ("https://www.furaffinity.net/gallery/mirlinthloth/", {
|
||||
"pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
|
||||
r"/art/mirlinthloth/\d+/\d+.\w+\.\w+",
|
||||
"range": "45-50",
|
||||
"count": 6,
|
||||
})
|
||||
example = "https://www.furaffinity.net/gallery/USER/"
|
||||
|
||||
def posts(self):
|
||||
return self._pagination("gallery")
|
||||
@ -235,11 +230,7 @@ class FuraffinityScrapsExtractor(FuraffinityExtractor):
|
||||
subcategory = "scraps"
|
||||
directory_fmt = ("{category}", "{user!l}", "Scraps")
|
||||
pattern = BASE_PATTERN + r"/scraps/([^/?#]+)"
|
||||
test = ("https://www.furaffinity.net/scraps/mirlinthloth/", {
|
||||
"pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
|
||||
r"/art/[^/]+(/stories)?/\d+/\d+.\w+.",
|
||||
"count": ">= 3",
|
||||
})
|
||||
example = "https://www.furaffinity.net/scraps/USER/"
|
||||
|
||||
def posts(self):
|
||||
return self._pagination("scraps")
|
||||
@ -250,13 +241,7 @@ class FuraffinityFavoriteExtractor(FuraffinityExtractor):
|
||||
subcategory = "favorite"
|
||||
directory_fmt = ("{category}", "{user!l}", "Favorites")
|
||||
pattern = BASE_PATTERN + r"/favorites/([^/?#]+)"
|
||||
test = ("https://www.furaffinity.net/favorites/mirlinthloth/", {
|
||||
"pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
|
||||
r"/art/[^/]+/\d+/\d+.\w+\.\w+",
|
||||
"keyword": {"favorite_id": int},
|
||||
"range": "45-50",
|
||||
"count": 6,
|
||||
})
|
||||
example = "https://www.furaffinity.net/favorites/USER/"
|
||||
|
||||
def posts(self):
|
||||
return self._pagination_favorites()
|
||||
@ -273,19 +258,7 @@ class FuraffinitySearchExtractor(FuraffinityExtractor):
|
||||
subcategory = "search"
|
||||
directory_fmt = ("{category}", "Search", "{search}")
|
||||
pattern = BASE_PATTERN + r"/search(?:/([^/?#]+))?/?[?&]([^#]+)"
|
||||
test = (
|
||||
("https://www.furaffinity.net/search/?q=cute", {
|
||||
"pattern": r"https://d\d?\.f(uraffinity|acdn)\.net"
|
||||
r"/art/[^/]+/\d+/\d+.\w+\.\w+",
|
||||
"range": "45-50",
|
||||
"count": 6,
|
||||
}),
|
||||
# first page of search results (#2402)
|
||||
("https://www.furaffinity.net/search/?q=leaf&range=1day", {
|
||||
"range": "1-3",
|
||||
"count": 3,
|
||||
}),
|
||||
)
|
||||
example = "https://www.furaffinity.net/search/?q=QUERY"
|
||||
|
||||
def __init__(self, match):
|
||||
FuraffinityExtractor.__init__(self, match)
|
||||
@ -304,65 +277,7 @@ class FuraffinityPostExtractor(FuraffinityExtractor):
|
||||
"""Extractor for individual posts on furaffinity"""
|
||||
subcategory = "post"
|
||||
pattern = BASE_PATTERN + r"/(?:view|full)/(\d+)"
|
||||
test = (
|
||||
("https://www.furaffinity.net/view/21835115/", {
|
||||
"pattern": r"https://d\d*\.f(uraffinity|acdn)\.net/(download/)?art"
|
||||
r"/mirlinthloth/music/1488278723/1480267446.mirlinthlot"
|
||||
r"h_dj_fennmink_-_bude_s_4_ever\.mp3",
|
||||
"keyword": {
|
||||
"artist" : "mirlinthloth",
|
||||
"artist_url" : "mirlinthloth",
|
||||
"date" : "dt:2016-11-27 17:24:06",
|
||||
"description": "A Song made playing the game Cosmic DJ.",
|
||||
"extension" : "mp3",
|
||||
"filename" : r"re:\d+\.\w+_dj_fennmink_-_bude_s_4_ever",
|
||||
"id" : 21835115,
|
||||
"tags" : list,
|
||||
"title" : "Bude's 4 Ever",
|
||||
"url" : r"re:https://d\d?\.f(uraffinity|acdn)\.net/art",
|
||||
"user" : "mirlinthloth",
|
||||
"views" : int,
|
||||
"favorites" : int,
|
||||
"comments" : int,
|
||||
"rating" : "General",
|
||||
"fa_category": "Music",
|
||||
"theme" : "All",
|
||||
"species" : "Unspecified / Any",
|
||||
"gender" : "Any",
|
||||
"width" : 120,
|
||||
"height" : 120,
|
||||
},
|
||||
}),
|
||||
# 'external' option (#1492)
|
||||
("https://www.furaffinity.net/view/42166511/", {
|
||||
"options": (("external", True),),
|
||||
"pattern": r"https://d\d*\.f(uraffinity|acdn)\.net/"
|
||||
r"|http://www\.postybirb\.com",
|
||||
"count": 2,
|
||||
}),
|
||||
# no tags (#2277)
|
||||
("https://www.furaffinity.net/view/45331225/", {
|
||||
"keyword": {
|
||||
"artist": "Kota_Remminders",
|
||||
"artist_url": "kotaremminders",
|
||||
"date": "dt:2022-01-03 17:49:33",
|
||||
"fa_category": "Adoptables",
|
||||
"filename": "1641232173.kotaremminders_chidopts1",
|
||||
"gender": "Any",
|
||||
"height": 905,
|
||||
"id": 45331225,
|
||||
"rating": "General",
|
||||
"species": "Unspecified / Any",
|
||||
"tags": [],
|
||||
"theme": "All",
|
||||
"title": "REMINDER",
|
||||
"width": 1280,
|
||||
},
|
||||
}),
|
||||
("https://furaffinity.net/view/21835115/"),
|
||||
("https://sfw.furaffinity.net/view/21835115/"),
|
||||
("https://www.furaffinity.net/full/21835115/"),
|
||||
)
|
||||
example = "https://www.furaffinity.net/view/12345/"
|
||||
|
||||
def posts(self):
|
||||
post_id = self.user
|
||||
@ -375,16 +290,7 @@ class FuraffinityUserExtractor(FuraffinityExtractor):
|
||||
subcategory = "user"
|
||||
cookies_domain = None
|
||||
pattern = BASE_PATTERN + r"/user/([^/?#]+)"
|
||||
test = (
|
||||
("https://www.furaffinity.net/user/mirlinthloth/", {
|
||||
"pattern": r"/gallery/mirlinthloth/$",
|
||||
}),
|
||||
("https://www.furaffinity.net/user/mirlinthloth/", {
|
||||
"options": (("include", "all"),),
|
||||
"pattern": r"/(gallery|scraps|favorites)/mirlinthloth/$",
|
||||
"count": 3,
|
||||
}),
|
||||
)
|
||||
example = "https://www.furaffinity.net/user/USER/"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
@ -402,11 +308,7 @@ class FuraffinityFollowingExtractor(FuraffinityExtractor):
|
||||
"""Extractor for a furaffinity user's watched users"""
|
||||
subcategory = "following"
|
||||
pattern = BASE_PATTERN + "/watchlist/by/([^/?#]+)"
|
||||
test = ("https://www.furaffinity.net/watchlist/by/mirlinthloth/", {
|
||||
"pattern": FuraffinityUserExtractor.pattern,
|
||||
"range": "176-225",
|
||||
"count": 50,
|
||||
})
|
||||
example = "https://www.furaffinity.net/watchlist/by/USER/"
|
||||
|
||||
def items(self):
|
||||
url = "{}/watchlist/by/{}/".format(self.root, self.user)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019 Mike Fährmann
|
||||
# Copyright 2019-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -18,22 +18,7 @@ class FuskatorGalleryExtractor(GalleryExtractor):
|
||||
category = "fuskator"
|
||||
root = "https://fuskator.com"
|
||||
pattern = r"(?:https?://)?fuskator\.com/(?:thumbs|expanded)/([^/?#]+)"
|
||||
test = (
|
||||
("https://fuskator.com/thumbs/d0GnIzXrSKU/", {
|
||||
"pattern": r"https://i\d+.fuskator.com/large/d0GnIzXrSKU/.+\.jpg",
|
||||
"count": 22,
|
||||
"keyword": {
|
||||
"gallery_id": 473023,
|
||||
"gallery_hash": "d0GnIzXrSKU",
|
||||
"title": "re:Shaved Brunette Babe Maria Ryabushkina with ",
|
||||
"views": int,
|
||||
"score": float,
|
||||
"count": 22,
|
||||
"tags": list,
|
||||
},
|
||||
}),
|
||||
("https://fuskator.com/expanded/gXpKzjgIidA/index.html"),
|
||||
)
|
||||
example = "https://fuskator.com/thumbs/ID/"
|
||||
|
||||
def __init__(self, match):
|
||||
self.gallery_hash = match.group(1)
|
||||
@ -82,13 +67,7 @@ class FuskatorSearchExtractor(Extractor):
|
||||
subcategory = "search"
|
||||
root = "https://fuskator.com"
|
||||
pattern = r"(?:https?://)?fuskator\.com(/(?:search|page)/.+)"
|
||||
test = (
|
||||
("https://fuskator.com/search/red_swimsuit/", {
|
||||
"pattern": FuskatorGalleryExtractor.pattern,
|
||||
"count": ">= 40",
|
||||
}),
|
||||
("https://fuskator.com/page/3/swimsuit/quality/"),
|
||||
)
|
||||
example = "https://fuskator.com/search/TAG/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -115,18 +115,7 @@ class GelbooruTagExtractor(GelbooruBase,
|
||||
gelbooru_v02.GelbooruV02TagExtractor):
|
||||
"""Extractor for images from gelbooru.com based on search-tags"""
|
||||
pattern = BASE_PATTERN + r"page=post&s=list&tags=([^&#]+)"
|
||||
test = (
|
||||
("https://gelbooru.com/index.php?page=post&s=list&tags=bonocho", {
|
||||
"count": 5,
|
||||
}),
|
||||
("https://gelbooru.com/index.php?page=post&s=list&tags=meiya_neon", {
|
||||
"range": "196-204",
|
||||
"url": "845a61aa1f90fb4ced841e8b7e62098be2e967bf",
|
||||
"pattern": r"https://img\d\.gelbooru\.com"
|
||||
r"/images/../../[0-9a-f]{32}\.jpg",
|
||||
"count": 9,
|
||||
}),
|
||||
)
|
||||
example = "https://gelbooru.com/index.php?page=post&s=list&tags=TAG"
|
||||
|
||||
|
||||
class GelbooruPoolExtractor(GelbooruBase,
|
||||
@ -134,11 +123,7 @@ class GelbooruPoolExtractor(GelbooruBase,
|
||||
"""Extractor for gelbooru pools"""
|
||||
per_page = 45
|
||||
pattern = BASE_PATTERN + r"page=pool&s=show&id=(\d+)"
|
||||
test = (
|
||||
("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
|
||||
"count": 6,
|
||||
}),
|
||||
)
|
||||
example = "https://gelbooru.com/index.php?page=pool&s=show&id=12345"
|
||||
|
||||
skip = GelbooruBase._skip_offset
|
||||
|
||||
@ -169,9 +154,7 @@ class GelbooruFavoriteExtractor(GelbooruBase,
|
||||
"""Extractor for gelbooru favorites"""
|
||||
per_page = 100
|
||||
pattern = BASE_PATTERN + r"page=favorites&s=view&id=(\d+)"
|
||||
test = ("https://gelbooru.com/index.php?page=favorites&s=view&id=279415", {
|
||||
"count": 3,
|
||||
})
|
||||
example = "https://gelbooru.com/index.php?page=favorites&s=view&id=12345"
|
||||
|
||||
skip = GelbooruBase._skip_offset
|
||||
|
||||
@ -221,76 +204,21 @@ class GelbooruPostExtractor(GelbooruBase,
|
||||
r"(?=(?:[^#]+&)?page=post(?:&|#|$))"
|
||||
r"(?=(?:[^#]+&)?s=view(?:&|#|$))"
|
||||
r"(?:[^#]+&)?id=(\d+)")
|
||||
test = (
|
||||
("https://gelbooru.com/index.php?page=post&s=view&id=313638", {
|
||||
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
|
||||
"count": 1,
|
||||
}),
|
||||
|
||||
("https://gelbooru.com/index.php?page=post&s=view&id=313638"),
|
||||
("https://gelbooru.com/index.php?s=view&page=post&id=313638"),
|
||||
("https://gelbooru.com/index.php?page=post&id=313638&s=view"),
|
||||
("https://gelbooru.com/index.php?s=view&id=313638&page=post"),
|
||||
("https://gelbooru.com/index.php?id=313638&page=post&s=view"),
|
||||
("https://gelbooru.com/index.php?id=313638&s=view&page=post"),
|
||||
|
||||
("https://gelbooru.com/index.php?page=post&s=view&id=6018318", {
|
||||
"options": (("tags", True),),
|
||||
"content": "977caf22f27c72a5d07ea4d4d9719acdab810991",
|
||||
"keyword": {
|
||||
"tags_artist": "kirisaki_shuusei",
|
||||
"tags_character": str,
|
||||
"tags_copyright": "vocaloid",
|
||||
"tags_general": str,
|
||||
"tags_metadata": str,
|
||||
},
|
||||
}),
|
||||
# video
|
||||
("https://gelbooru.com/index.php?page=post&s=view&id=5938076", {
|
||||
"content": "6360452fa8c2f0c1137749e81471238564df832a",
|
||||
"pattern": r"https://img\d\.gelbooru\.com/images"
|
||||
r"/22/61/226111273615049235b001b381707bd0\.webm",
|
||||
}),
|
||||
# notes
|
||||
("https://gelbooru.com/index.php?page=post&s=view&id=5997331", {
|
||||
"options": (("notes", True),),
|
||||
"keyword": {
|
||||
"notes": [
|
||||
{
|
||||
"body": "Look over this way when you talk~",
|
||||
"height": 553,
|
||||
"width": 246,
|
||||
"x": 35,
|
||||
"y": 72,
|
||||
},
|
||||
{
|
||||
"body": "Hey~\nAre you listening~?",
|
||||
"height": 557,
|
||||
"width": 246,
|
||||
"x": 1233,
|
||||
"y": 109,
|
||||
},
|
||||
],
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://gelbooru.com/index.php?page=post&s=view&id=12345"
|
||||
|
||||
|
||||
class GelbooruRedirectExtractor(GelbooruBase, Extractor):
|
||||
subcategory = "redirect"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com"
|
||||
r"/redirect\.php\?s=([^&#]+)")
|
||||
test = (("https://gelbooru.com/redirect.php?s=Ly9nZWxib29ydS5jb20vaW5kZXgu"
|
||||
"cGhwP3BhZ2U9cG9zdCZzPXZpZXcmaWQ9MTgzMDA0Ng=="), {
|
||||
"pattern": r"https://gelbooru.com/index.php"
|
||||
r"\?page=post&s=view&id=1830046"
|
||||
})
|
||||
example = "https://gelbooru.com/redirect.php?s=BASE64"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.redirect_url = text.ensure_http_scheme(
|
||||
binascii.a2b_base64(match.group(1)).decode())
|
||||
self.url_base64 = match.group(1)
|
||||
|
||||
def items(self):
|
||||
url = text.ensure_http_scheme(binascii.a2b_base64(
|
||||
self.url_base64).decode())
|
||||
data = {"_extractor": GelbooruPostExtractor}
|
||||
yield Message.Queue, self.redirect_url, data
|
||||
yield Message.Queue, url, data
|
||||
|
@ -90,24 +90,7 @@ class GelbooruV01TagExtractor(GelbooruV01Extractor):
|
||||
directory_fmt = ("{category}", "{search_tags}")
|
||||
archive_fmt = "t_{search_tags}_{id}"
|
||||
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
|
||||
test = (
|
||||
(("https://the-collection.booru.org"
|
||||
"/index.php?page=post&s=list&tags=parody"), {
|
||||
"range": "1-25",
|
||||
"count": 25,
|
||||
}),
|
||||
(("https://illusioncards.booru.org"
|
||||
"/index.php?page=post&s=list&tags=koikatsu"), {
|
||||
"range": "1-25",
|
||||
"count": 25,
|
||||
}),
|
||||
("https://allgirl.booru.org/index.php?page=post&s=list&tags=dress", {
|
||||
"range": "1-25",
|
||||
"count": 25,
|
||||
}),
|
||||
("https://drawfriends.booru.org/index.php?page=post&s=list&tags=all"),
|
||||
("https://vidyart2.booru.org/index.php?page=post&s=list&tags=all"),
|
||||
)
|
||||
example = "https://allgirl.booru.org/index.php?page=post&s=list&tags=TAG"
|
||||
|
||||
def __init__(self, match):
|
||||
GelbooruV01Extractor.__init__(self, match)
|
||||
@ -128,21 +111,7 @@ class GelbooruV01FavoriteExtractor(GelbooruV01Extractor):
|
||||
archive_fmt = "f_{favorite_id}_{id}"
|
||||
per_page = 50
|
||||
pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
|
||||
test = (
|
||||
(("https://the-collection.booru.org"
|
||||
"/index.php?page=favorites&s=view&id=1166"), {
|
||||
"count": 2,
|
||||
}),
|
||||
(("https://illusioncards.booru.org"
|
||||
"/index.php?page=favorites&s=view&id=84887"), {
|
||||
"count": 2,
|
||||
}),
|
||||
("https://allgirl.booru.org/index.php?page=favorites&s=view&id=380", {
|
||||
"count": 4,
|
||||
}),
|
||||
("https://drawfriends.booru.org/index.php?page=favorites&s=view&id=1"),
|
||||
("https://vidyart2.booru.org/index.php?page=favorites&s=view&id=1"),
|
||||
)
|
||||
example = "https://allgirl.booru.org/index.php?page=favorites&s=view&id=1"
|
||||
|
||||
def __init__(self, match):
|
||||
GelbooruV01Extractor.__init__(self, match)
|
||||
@ -161,40 +130,7 @@ class GelbooruV01PostExtractor(GelbooruV01Extractor):
|
||||
subcategory = "post"
|
||||
archive_fmt = "{id}"
|
||||
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
|
||||
test = (
|
||||
(("https://the-collection.booru.org"
|
||||
"/index.php?page=post&s=view&id=100520"), {
|
||||
"url": "0329ac8588bb93cf242ca0edbe3e995b4ba554e8",
|
||||
"content": "1e585874e7b874f7937df1060dd1517fef2f4dfb",
|
||||
}),
|
||||
(("https://illusioncards.booru.org"
|
||||
"/index.php?page=post&s=view&id=82746"), {
|
||||
"url": "3f9cd2fadf78869b90bc5422f27b48f1af0e0909",
|
||||
"content": "159e60b92d05597bd1bb63510c2c3e4a4bada1dc",
|
||||
}),
|
||||
("https://allgirl.booru.org/index.php?page=post&s=view&id=107213", {
|
||||
"url": "b416800d2d2b072f80d3b37cfca9cb806fb25d51",
|
||||
"content": "3e3c65e0854a988696e11adf0de52f8fa90a51c7",
|
||||
"keyword": {
|
||||
"created_at": "2021-02-13 16:27:39",
|
||||
"date": "dt:2021-02-13 16:27:39",
|
||||
"file_url": "https://img.booru.org/allgirl//images/107"
|
||||
"/2aaa0438d58fc7baa75a53b4a9621bb89a9d3fdb.jpg",
|
||||
"height": "1200",
|
||||
"id": "107213",
|
||||
"md5": "2aaa0438d58fc7baa75a53b4a9621bb89a9d3fdb",
|
||||
"rating": "s",
|
||||
"score": str,
|
||||
"source": "",
|
||||
"tags": "blush dress green_eyes green_hair hatsune_miku "
|
||||
"long_hair twintails vocaloid",
|
||||
"uploader": "Honochi31",
|
||||
"width": "1600"
|
||||
},
|
||||
}),
|
||||
("https://drawfriends.booru.org/index.php?page=post&s=view&id=107474"),
|
||||
("https://vidyart2.booru.org/index.php?page=post&s=view&id=39168"),
|
||||
)
|
||||
example = "https://allgirl.booru.org/index.php?page=post&s=view&id=12345"
|
||||
|
||||
def __init__(self, match):
|
||||
GelbooruV01Extractor.__init__(self, match)
|
||||
|
@ -183,6 +183,10 @@ INSTANCES = {
|
||||
"root": "https://hypnohub.net",
|
||||
"pattern": r"hypnohub\.net",
|
||||
},
|
||||
"xbooru": {
|
||||
"root": "https://xbooru.com",
|
||||
"pattern": r"xbooru\.com",
|
||||
},
|
||||
}
|
||||
|
||||
BASE_PATTERN = GelbooruV02Extractor.update(INSTANCES)
|
||||
@ -193,27 +197,7 @@ class GelbooruV02TagExtractor(GelbooruV02Extractor):
|
||||
directory_fmt = ("{category}", "{search_tags}")
|
||||
archive_fmt = "t_{search_tags}_{id}"
|
||||
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=list&tags=([^&#]+)"
|
||||
test = (
|
||||
("https://rule34.xxx/index.php?page=post&s=list&tags=danraku", {
|
||||
"content": ("5c6ae9ee13e6d4bc9cb8bdce224c84e67fbfa36c",
|
||||
"622e80be3f496672c44aab5c47fbc6941c61bc79"),
|
||||
"pattern": r"https?://.*rule34\.xxx/images/\d+/[0-9a-f]+\.jpg",
|
||||
"count": 2,
|
||||
}),
|
||||
("https://safebooru.org/index.php?page=post&s=list&tags=bonocho", {
|
||||
"url": "17c61b386530cf4c30842c9f580d15ef1cd09586",
|
||||
"content": "e5ad4c5bf241b1def154958535bef6c2f6b733eb",
|
||||
}),
|
||||
("https://realbooru.com/index.php?page=post&s=list&tags=wine", {
|
||||
"count": ">= 64",
|
||||
}),
|
||||
("https://tbib.org/index.php?page=post&s=list&tags=yuyaiyaui", {
|
||||
"count": ">= 120",
|
||||
}),
|
||||
("https://hypnohub.net/index.php?page=post&s=list&tags=gonoike_biwa", {
|
||||
"url": "fe662b86d38c331fcac9c62af100167d404937dc",
|
||||
}),
|
||||
)
|
||||
example = "https://safebooru.org/index.php?page=post&s=list&tags=TAG"
|
||||
|
||||
def __init__(self, match):
|
||||
GelbooruV02Extractor.__init__(self, match)
|
||||
@ -232,21 +216,7 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
|
||||
directory_fmt = ("{category}", "pool", "{pool}")
|
||||
archive_fmt = "p_{pool}_{id}"
|
||||
pattern = BASE_PATTERN + r"/index\.php\?page=pool&s=show&id=(\d+)"
|
||||
test = (
|
||||
("https://rule34.xxx/index.php?page=pool&s=show&id=179", {
|
||||
"count": 3,
|
||||
}),
|
||||
("https://safebooru.org/index.php?page=pool&s=show&id=11", {
|
||||
"count": 5,
|
||||
}),
|
||||
("https://realbooru.com/index.php?page=pool&s=show&id=1", {
|
||||
"count": 3,
|
||||
}),
|
||||
("https://hypnohub.net/index.php?page=pool&s=show&id=61", {
|
||||
"url": "d314826280073441a2da609f70ee814d1f4b9407",
|
||||
"count": 3,
|
||||
}),
|
||||
)
|
||||
example = "https://safebooru.org/index.php?page=pool&s=show&id=12345"
|
||||
|
||||
def __init__(self, match):
|
||||
GelbooruV02Extractor.__init__(self, match)
|
||||
@ -298,23 +268,7 @@ class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
|
||||
archive_fmt = "f_{favorite_id}_{id}"
|
||||
per_page = 50
|
||||
pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
|
||||
test = (
|
||||
("https://rule34.xxx/index.php?page=favorites&s=view&id=1030218", {
|
||||
"count": 3,
|
||||
}),
|
||||
("https://safebooru.org/index.php?page=favorites&s=view&id=17567", {
|
||||
"count": 2,
|
||||
}),
|
||||
("https://realbooru.com/index.php?page=favorites&s=view&id=274", {
|
||||
"count": 2,
|
||||
}),
|
||||
("https://tbib.org/index.php?page=favorites&s=view&id=7881", {
|
||||
"count": 3,
|
||||
}),
|
||||
("https://hypnohub.net/index.php?page=favorites&s=view&id=43546", {
|
||||
"count": 3,
|
||||
}),
|
||||
)
|
||||
example = "https://safebooru.org/index.php?page=favorites&s=view&id=12345"
|
||||
|
||||
def __init__(self, match):
|
||||
GelbooruV02Extractor.__init__(self, match)
|
||||
@ -335,112 +289,7 @@ class GelbooruV02PostExtractor(GelbooruV02Extractor):
|
||||
subcategory = "post"
|
||||
archive_fmt = "{id}"
|
||||
pattern = BASE_PATTERN + r"/index\.php\?page=post&s=view&id=(\d+)"
|
||||
test = (
|
||||
("https://rule34.xxx/index.php?page=post&s=view&id=863", {
|
||||
"pattern": r"https://api-cdn\.rule34\.xxx/images"
|
||||
r"/1/6aafbdb3e22f3f3b412ea2cf53321317a37063f3\.jpg",
|
||||
"content": ("a43f418aa350039af0d11cae501396a33bbe2201",
|
||||
"67b516295950867e1c1ab6bc13b35d3b762ed2a3"),
|
||||
"options": (("tags", True), ("notes", True)),
|
||||
"keyword": {
|
||||
"tags_artist": "reverse_noise yamu_(reverse_noise)",
|
||||
"tags_character": "hong_meiling",
|
||||
"tags_copyright": "touhou",
|
||||
"tags_general": str,
|
||||
"tags_metadata": "censored translated",
|
||||
"notes": [
|
||||
{
|
||||
"body": "It feels angry, I'm losing myself... "
|
||||
"It won't calm down!",
|
||||
"height": 65,
|
||||
"id": 93586,
|
||||
"width": 116,
|
||||
"x": 22,
|
||||
"y": 333,
|
||||
},
|
||||
{
|
||||
"body": "REPUTATION OF RAGE",
|
||||
"height": 272,
|
||||
"id": 93587,
|
||||
"width": 199,
|
||||
"x": 78,
|
||||
"y": 442,
|
||||
},
|
||||
],
|
||||
|
||||
},
|
||||
}),
|
||||
("https://hypnohub.net/index.php?page=post&s=view&id=1439", {
|
||||
"pattern": r"https://hypnohub\.net/images"
|
||||
r"/90/24/90245c3c5250c2a8173255d3923a010b\.jpg",
|
||||
"content": "5987c5d2354f22e5fa9b7ee7ce4a6f7beb8b2b71",
|
||||
"options": (("tags", True), ("notes", True)),
|
||||
"keyword": {
|
||||
"tags_artist": "brokenteapot",
|
||||
"tags_character": "hsien-ko",
|
||||
"tags_copyright": "capcom darkstalkers",
|
||||
"tags_general": str,
|
||||
"tags_metadata": "dialogue text translated",
|
||||
"notes": [
|
||||
{
|
||||
"body": "Master Master Master "
|
||||
"Master Master Master",
|
||||
"height": 83,
|
||||
"id": 10577,
|
||||
"width": 129,
|
||||
"x": 259,
|
||||
"y": 20,
|
||||
},
|
||||
{
|
||||
"body": "Response Response Response "
|
||||
"Response Response Response",
|
||||
"height": 86,
|
||||
"id": 10578,
|
||||
"width": 125,
|
||||
"x": 126,
|
||||
"y": 20,
|
||||
},
|
||||
{
|
||||
"body": "Obedience Obedience Obedience "
|
||||
"Obedience Obedience Obedience",
|
||||
"height": 80,
|
||||
"id": 10579,
|
||||
"width": 98,
|
||||
"x": 20,
|
||||
"y": 20,
|
||||
},
|
||||
],
|
||||
|
||||
},
|
||||
}),
|
||||
("https://safebooru.org/index.php?page=post&s=view&id=1169132", {
|
||||
"url": "cf05e37a3c62b2d55788e2080b8eabedb00f999b",
|
||||
"content": "93b293b27dabd198afafabbaf87c49863ac82f27",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_artist": "kawanakajima",
|
||||
"tags_character": "heath_ledger ronald_mcdonald the_joker",
|
||||
"tags_copyright": "dc_comics mcdonald's the_dark_knight",
|
||||
"tags_general": str,
|
||||
},
|
||||
}),
|
||||
("https://realbooru.com/index.php?page=post&s=view&id=668483", {
|
||||
"pattern": r"https://realbooru\.com//?images/dc/b5"
|
||||
r"/dcb5c0ce9ec0bf74a6930608985f4719\.jpeg",
|
||||
"content": "7f5873ce3b6cd295ea2e81fcb49583098ea9c8da",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_general": "1girl blonde blonde_hair blue_eyes cute "
|
||||
"female female_only looking_at_viewer smile "
|
||||
"solo solo_female teeth",
|
||||
"tags_model": "jennifer_lawrence",
|
||||
},
|
||||
}),
|
||||
("https://tbib.org/index.php?page=post&s=view&id=9233957", {
|
||||
"url": "5a6ebe07bfff8e6d27f7c30b5480f27abcb577d2",
|
||||
"content": "1c3831b6fbaa4686e3c79035b5d98460b1c85c43",
|
||||
}),
|
||||
)
|
||||
example = "https://safebooru.org/index.php?page=post&s=view&id=12345"
|
||||
|
||||
def __init__(self, match):
|
||||
GelbooruV02Extractor.__init__(self, match)
|
||||
|
@ -34,31 +34,7 @@ class GenericExtractor(Extractor):
|
||||
r"(?:\?(?P<query>[^#]*))?" # optional query
|
||||
r"(?:\#(?P<fragment>.*))?" # optional fragment
|
||||
)
|
||||
|
||||
test = (
|
||||
("generic:https://www.nongnu.org/lzip/", {
|
||||
"count": 1,
|
||||
"content": "40be5c77773d3e91db6e1c5df720ee30afb62368",
|
||||
"keyword": {
|
||||
"description": "Lossless data compressor",
|
||||
"imageurl": "https://www.nongnu.org/lzip/lzip.png",
|
||||
"keywords": "lzip, clzip, plzip, lzlib, LZMA, bzip2, "
|
||||
"gzip, data compression, GNU, free software",
|
||||
"pageurl": "https://www.nongnu.org/lzip/",
|
||||
},
|
||||
}),
|
||||
# internationalized domain name
|
||||
("generic:https://räksmörgås.josefsson.org/", {
|
||||
"count": 2,
|
||||
"pattern": "^https://räksmörgås.josefsson.org/",
|
||||
}),
|
||||
("g:https://en.wikipedia.org/Main_Page"),
|
||||
("g:https://example.org/path/to/file?que=1?&ry=2/#fragment"),
|
||||
("g:https://example.org/%27%3C%23/%23%3E%27.htm?key=%3C%26%3E"),
|
||||
("generic:https://en.wikipedia.org/Main_Page"),
|
||||
("generic:https://example.org/path/to/file?que=1?&ry=2/#fragment"),
|
||||
("generic:https://example.org/%27%3C%23/%23%3E%27.htm?key=%3C%26%3E"),
|
||||
)
|
||||
example = "generic:https://www.nongnu.org/lzip/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -1,306 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2017-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://gfycat.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
from ..cache import cache
|
||||
|
||||
|
||||
class GfycatExtractor(Extractor):
|
||||
"""Base class for gfycat extractors"""
|
||||
category = "gfycat"
|
||||
filename_fmt = "{category}_{gfyName}{title:?_//}.{extension}"
|
||||
archive_fmt = "{gfyName}"
|
||||
root = "https://gfycat.com"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.key = match.group(1).lower()
|
||||
|
||||
def _init(self):
|
||||
formats = self.config("format")
|
||||
if formats is None:
|
||||
formats = ("mp4", "webm", "mobile", "gif")
|
||||
elif isinstance(formats, str):
|
||||
formats = (formats, "mp4", "webm", "mobile", "gif")
|
||||
self.formats = formats
|
||||
|
||||
def items(self):
|
||||
metadata = self.metadata()
|
||||
for gfycat in self.gfycats():
|
||||
if "gfyName" not in gfycat:
|
||||
self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
|
||||
continue
|
||||
|
||||
url = self._process(gfycat)
|
||||
if not url:
|
||||
self.log.warning("Skipping '%s' (format not available)",
|
||||
gfycat["gfyId"])
|
||||
continue
|
||||
|
||||
gfycat.update(metadata)
|
||||
yield Message.Directory, gfycat
|
||||
yield Message.Url, url, gfycat
|
||||
|
||||
def _process(self, gfycat):
|
||||
gfycat["_fallback"] = formats = self._formats(gfycat)
|
||||
gfycat["date"] = text.parse_timestamp(gfycat.get("createDate"))
|
||||
return next(formats, None)
|
||||
|
||||
def _formats(self, gfycat):
|
||||
for fmt in self.formats:
|
||||
key = fmt + "Url"
|
||||
if key in gfycat:
|
||||
url = gfycat[key]
|
||||
if url.startswith("http:"):
|
||||
url = "https" + url[4:]
|
||||
gfycat["extension"] = url.rpartition(".")[2]
|
||||
yield url
|
||||
|
||||
def metadata(self):
|
||||
return {}
|
||||
|
||||
def gfycats(self):
|
||||
return ()
|
||||
|
||||
|
||||
class GfycatUserExtractor(GfycatExtractor):
|
||||
"""Extractor for gfycat user profiles"""
|
||||
subcategory = "user"
|
||||
directory_fmt = ("{category}", "{username}")
|
||||
pattern = r"(?:https?://)?gfycat\.com/@([^/?#]+)/?(?:$|\?|#)"
|
||||
test = ("https://gfycat.com/@gretta", {
|
||||
"pattern": r"https://giant\.gfycat\.com/[A-Za-z]+\.mp4",
|
||||
"count": ">= 100",
|
||||
})
|
||||
|
||||
def gfycats(self):
|
||||
if self.key == "me":
|
||||
return GfycatAPI(self).me()
|
||||
return GfycatAPI(self).user(self.key)
|
||||
|
||||
|
||||
class GfycatCollectionExtractor(GfycatExtractor):
|
||||
"""Extractor for a gfycat collection"""
|
||||
subcategory = "collection"
|
||||
directory_fmt = ("{category}", "{collection_owner}",
|
||||
"{collection_name|collection_id}")
|
||||
pattern = (r"(?:https?://)?gfycat\.com/@([^/?#]+)/collections"
|
||||
r"/(\w+)(?:/([^/?#]+))?")
|
||||
test = ("https://gfycat.com/@reactions/collections/nHgy2DtE/no-text", {
|
||||
"pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",
|
||||
"count": ">= 100",
|
||||
})
|
||||
|
||||
def __init__(self, match):
|
||||
GfycatExtractor.__init__(self, match)
|
||||
self.collection_id = match.group(2)
|
||||
self.collection_name = match.group(3)
|
||||
|
||||
def metadata(self):
|
||||
return {
|
||||
"collection_owner": self.key,
|
||||
"collection_name" : self.collection_name,
|
||||
"collection_id" : self.collection_id,
|
||||
}
|
||||
|
||||
def gfycats(self):
|
||||
return GfycatAPI(self).collection(self.key, self.collection_id)
|
||||
|
||||
|
||||
class GfycatCollectionsExtractor(GfycatExtractor):
|
||||
"""Extractor for a gfycat user's collections"""
|
||||
subcategory = "collections"
|
||||
pattern = r"(?:https?://)?gfycat\.com/@([^/?#]+)/collections/?(?:$|\?|#)"
|
||||
test = ("https://gfycat.com/@sannahparker/collections", {
|
||||
"pattern": GfycatCollectionExtractor.pattern,
|
||||
"count": ">= 20",
|
||||
})
|
||||
|
||||
def items(self):
|
||||
for col in GfycatAPI(self).collections(self.key):
|
||||
url = "https://gfycat.com/@{}/collections/{}/{}".format(
|
||||
col["userId"], col["folderId"], col["linkText"])
|
||||
col["_extractor"] = GfycatCollectionExtractor
|
||||
yield Message.Queue, url, col
|
||||
|
||||
|
||||
class GfycatSearchExtractor(GfycatExtractor):
|
||||
"""Extractor for gfycat search results"""
|
||||
subcategory = "search"
|
||||
directory_fmt = ("{category}", "Search", "{search}")
|
||||
pattern = r"(?:https?://)?gfycat\.com/gifs/search/([^/?#]+)"
|
||||
test = ("https://gfycat.com/gifs/search/funny+animals", {
|
||||
"pattern": r"https://\w+\.gfycat\.com/[A-Za-z]+\.mp4",
|
||||
"archive": False,
|
||||
"range": "100-300",
|
||||
"count": "> 200",
|
||||
})
|
||||
|
||||
def metadata(self):
|
||||
self.key = text.unquote(self.key).replace("+", " ")
|
||||
return {"search": self.key}
|
||||
|
||||
def gfycats(self):
|
||||
return GfycatAPI(self).search(self.key)
|
||||
|
||||
|
||||
class GfycatImageExtractor(GfycatExtractor):
|
||||
"""Extractor for individual images from gfycat.com"""
|
||||
subcategory = "image"
|
||||
pattern = (r"(?:https?://)?(?:\w+\.)?gfycat\.com"
|
||||
r"/(?:gifs/detail/|\w+/)?([A-Za-z]{8,})")
|
||||
test = (
|
||||
("https://gfycat.com/GrayGenerousCowrie", {
|
||||
"url": "e0b5e1d7223108249b15c3c7898dd358dbfae045",
|
||||
"content": "5786028e04b155baa20b87c5f4f77453cd5edc37",
|
||||
"keyword": {
|
||||
"gfyId": "graygenerouscowrie",
|
||||
"gfyName": "GrayGenerousCowrie",
|
||||
"gfyNumber": 755075459,
|
||||
"title": "Bottom's up",
|
||||
"username": "jackson3oh3",
|
||||
"createDate": 1495884169,
|
||||
"date": "dt:2017-05-27 11:22:49",
|
||||
"md5": "a4796e05b0db9ba9ce5140145cd318aa",
|
||||
"width": 400,
|
||||
"height": 224,
|
||||
"frameRate": 23.0,
|
||||
"numFrames": 158.0,
|
||||
"views": int,
|
||||
},
|
||||
}),
|
||||
(("https://thumbs.gfycat.com/SillyLameIsabellinewheatear"
|
||||
"-size_restricted.gif"), {
|
||||
"url": "13b32e6cc169d086577d7dd3fd36ee6cdbc02726",
|
||||
}),
|
||||
("https://gfycat.com/detail/UnequaledHastyAnkole?tagname=aww", {
|
||||
"url": "e24c9f69897fd223343782425a429c5cab6a768e",
|
||||
}),
|
||||
# retry 404'ed videos on redgifs (#874)
|
||||
("https://www.gfycat.com/foolishforkedabyssiniancat", {
|
||||
"pattern": "https://redgifs.com/watch/foolishforkedabyssiniancat",
|
||||
}),
|
||||
# malformed API response (#902)
|
||||
("https://gfycat.com/illexcitablehairstreak", {
|
||||
"count": 0,
|
||||
}),
|
||||
("https://gfycat.com/gifs/detail/UnequaledHastyAnkole"),
|
||||
("https://gfycat.com/ifr/UnequaledHastyAnkole"),
|
||||
("https://gfycat.com/ru/UnequaledHastyAnkole"),
|
||||
)
|
||||
|
||||
def items(self):
|
||||
try:
|
||||
gfycat = GfycatAPI(self).gfycat(self.key)
|
||||
except exception.HttpError:
|
||||
from .redgifs import RedgifsImageExtractor
|
||||
url = "https://redgifs.com/watch/" + self.key
|
||||
data = {"_extractor": RedgifsImageExtractor}
|
||||
yield Message.Queue, url, data
|
||||
else:
|
||||
if "gfyName" not in gfycat:
|
||||
self.log.warning("Skipping '%s' (malformed)", gfycat["gfyId"])
|
||||
return
|
||||
url = self._process(gfycat)
|
||||
if not url:
|
||||
self.log.warning("Skipping '%s' (format not available)",
|
||||
gfycat["gfyId"])
|
||||
return
|
||||
yield Message.Directory, gfycat
|
||||
yield Message.Url, url, gfycat
|
||||
|
||||
|
||||
class GfycatAPI():
|
||||
API_ROOT = "https://api.gfycat.com"
|
||||
|
||||
def __init__(self, extractor):
|
||||
self.extractor = extractor
|
||||
self.headers = {}
|
||||
self.username, self.password = extractor._get_auth_info()
|
||||
|
||||
def collection(self, user, collection):
|
||||
endpoint = "/v1/users/{}/collections/{}/gfycats".format(
|
||||
user, collection)
|
||||
params = {"count": 100}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def collections(self, user):
|
||||
endpoint = "/v1/users/{}/collections".format(user)
|
||||
params = {"count": 100}
|
||||
return self._pagination(endpoint, params, "gfyCollections")
|
||||
|
||||
def gfycat(self, gfycat_id):
|
||||
endpoint = "/v1/gfycats/" + gfycat_id
|
||||
return self._call(endpoint)["gfyItem"]
|
||||
|
||||
def me(self):
|
||||
endpoint = "/v1/me/gfycats"
|
||||
params = {"count": 100}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def search(self, query):
|
||||
endpoint = "/v1/gfycats/search"
|
||||
params = {"search_text": query, "count": 150}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def user(self, user):
|
||||
endpoint = "/v1/users/{}/gfycats".format(user.lower())
|
||||
params = {"count": 100}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def authenticate(self):
|
||||
self.headers["Authorization"] = \
|
||||
self._authenticate_impl(self.username, self.password)
|
||||
|
||||
@cache(maxage=3600, keyarg=1)
|
||||
def _authenticate_impl(self, username, password):
|
||||
self.extractor.log.info("Logging in as %s", username)
|
||||
|
||||
url = "https://weblogin.gfycat.com/oauth/webtoken"
|
||||
headers = {"Origin": "https://gfycat.com"}
|
||||
data = {
|
||||
"access_key": "Anr96uuqt9EdamSCwK4txKPjMsf2"
|
||||
"M95Rfa5FLLhPFucu8H5HTzeutyAa",
|
||||
}
|
||||
response = self.extractor.request(
|
||||
url, method="POST", headers=headers, json=data).json()
|
||||
|
||||
url = "https://weblogin.gfycat.com/oauth/weblogin"
|
||||
headers["authorization"] = "Bearer " + response["access_token"]
|
||||
data = {
|
||||
"grant_type": "password",
|
||||
"username" : username,
|
||||
"password" : password,
|
||||
}
|
||||
response = self.extractor.request(
|
||||
url, method="POST", headers=headers, json=data, fatal=None).json()
|
||||
|
||||
if "errorMessage" in response:
|
||||
raise exception.AuthenticationError(
|
||||
response["errorMessage"]["description"])
|
||||
return "Bearer " + response["access_token"]
|
||||
|
||||
def _call(self, endpoint, params=None):
|
||||
if self.username:
|
||||
self.authenticate()
|
||||
|
||||
url = self.API_ROOT + endpoint
|
||||
return self.extractor.request(
|
||||
url, params=params, headers=self.headers).json()
|
||||
|
||||
def _pagination(self, endpoint, params, key="gfycats"):
|
||||
while True:
|
||||
data = self._call(endpoint, params)
|
||||
yield from data[key]
|
||||
|
||||
if not data["cursor"]:
|
||||
return
|
||||
params["cursor"] = data["cursor"]
|
@ -4,6 +4,8 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://gofile.io/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
from ..cache import cache, memcache
|
||||
@ -17,49 +19,7 @@ class GofileFolderExtractor(Extractor):
|
||||
directory_fmt = ("{category}", "{name} ({code})")
|
||||
archive_fmt = "{id}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?gofile\.io/d/([^/?#]+)"
|
||||
test = (
|
||||
("https://gofile.io/d/k6BomI", {
|
||||
"pattern": r"https://store\d+\.gofile\.io/download"
|
||||
r"/\w{8}-\w{4}-\w{4}-\w{4}-\w{12}"
|
||||
r"/test-%E3%83%86%E3%82%B9%E3%83%88-%2522%26!\.png",
|
||||
"keyword": {
|
||||
"createTime": int,
|
||||
"directLink": "re:https://store5.gofile.io/download/direct/.+",
|
||||
"downloadCount": int,
|
||||
"extension": "png",
|
||||
"filename": "test-テスト-%22&!",
|
||||
"folder": {
|
||||
"childs": [
|
||||
"b0367d79-b8ba-407f-8342-aaf8eb815443",
|
||||
"7fd4a36a-c1dd-49ff-9223-d93f7d24093f"
|
||||
],
|
||||
"code": "k6BomI",
|
||||
"createTime": 1654076165,
|
||||
"id": "fafb59f9-a7c7-4fea-a098-b29b8d97b03c",
|
||||
"name": "root",
|
||||
"public": True,
|
||||
"totalDownloadCount": int,
|
||||
"totalSize": 182,
|
||||
"type": "folder"
|
||||
},
|
||||
"id": r"re:\w{8}-\w{4}-\w{4}-\w{4}-\w{12}",
|
||||
"link": r"re:https://store5.gofile.io/download/.+\.png",
|
||||
"md5": "re:[0-9a-f]{32}",
|
||||
"mimetype": "image/png",
|
||||
"name": "test-テスト-%22&!.png",
|
||||
"num": int,
|
||||
"parentFolder": "fafb59f9-a7c7-4fea-a098-b29b8d97b03c",
|
||||
"serverChoosen": "store5",
|
||||
"size": 182,
|
||||
"thumbnail": r"re:https://store5.gofile.io/download/.+\.png",
|
||||
"type": "file"
|
||||
},
|
||||
}),
|
||||
("https://gofile.io/d/7fd4a36a-c1dd-49ff-9223-d93f7d24093f", {
|
||||
"options": (("website-token", None),),
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
}),
|
||||
)
|
||||
example = "https://gofile.io/d/ID"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -47,11 +47,7 @@ class HbrowseChapterExtractor(HbrowseBase, ChapterExtractor):
|
||||
"{page:>03}.{extension}")
|
||||
archive_fmt = "{manga_id}_{chapter}_{page}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/(\d+)/c(\d+))"
|
||||
test = ("https://www.hbrowse.com/10363/c00000", {
|
||||
"url": "6feefbc9f4b98e20d8425ddffa9dd111791dc3e6",
|
||||
"keyword": "274996f6c809e5250b6ff3abbc5147e29f89d9a5",
|
||||
"content": "44578ebbe176c2c27434966aef22945787e2781e",
|
||||
})
|
||||
example = "https://www.hbrowse.com/12345/c00000"
|
||||
|
||||
def __init__(self, match):
|
||||
self.path, self.gid, self.chapter = match.groups()
|
||||
@ -75,10 +71,7 @@ class HbrowseMangaExtractor(HbrowseBase, MangaExtractor):
|
||||
chapterclass = HbrowseChapterExtractor
|
||||
reverse = False
|
||||
pattern = r"(?:https?://)?(?:www\.)?hbrowse\.com(/\d+)/?$"
|
||||
test = ("https://www.hbrowse.com/10363", {
|
||||
"url": "b89682bfb86c11d2af0dc47463804ec3ac4aadd6",
|
||||
"keyword": "4b15fda1858a69de1fbf5afddfe47dd893397312",
|
||||
})
|
||||
example = "https://www.hbrowse.com/12345"
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016-2022 Mike Fährmann
|
||||
# Copyright 2016-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -23,31 +23,7 @@ class Hentai2readChapterExtractor(Hentai2readBase, ChapterExtractor):
|
||||
"""Extractor for a single manga chapter from hentai2read.com"""
|
||||
archive_fmt = "{chapter_id}_{page}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+/([^/?#]+))"
|
||||
test = (
|
||||
("https://hentai2read.com/amazon_elixir/1/", {
|
||||
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
|
||||
"keyword": "85645b02d34aa11b3deb6dadd7536863476e1bad",
|
||||
}),
|
||||
("https://hentai2read.com/popuni_kei_joshi_panic/2.5/", {
|
||||
"pattern": r"https://hentaicdn\.com/hentai"
|
||||
r"/13088/2\.5y/ccdn00\d+\.jpg",
|
||||
"count": 36,
|
||||
"keyword": {
|
||||
"author": "Kurisu",
|
||||
"chapter": 2,
|
||||
"chapter_id": 75152,
|
||||
"chapter_minor": ".5",
|
||||
"count": 36,
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
"manga": "Popuni Kei Joshi Panic!",
|
||||
"manga_id": 13088,
|
||||
"page": int,
|
||||
"title": "Popuni Kei Joshi Panic! 2.5",
|
||||
"type": "Original",
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://hentai2read.com/TITLE/1/"
|
||||
|
||||
def __init__(self, match):
|
||||
self.chapter = match.group(2)
|
||||
@ -85,31 +61,7 @@ class Hentai2readMangaExtractor(Hentai2readBase, MangaExtractor):
|
||||
"""Extractor for hmanga from hentai2read.com"""
|
||||
chapterclass = Hentai2readChapterExtractor
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentai2read\.com(/[^/?#]+)/?$"
|
||||
test = (
|
||||
("https://hentai2read.com/amazon_elixir/", {
|
||||
"url": "273073752d418ec887d7f7211e42b832e8c403ba",
|
||||
"keyword": "5c1b712258e78e120907121d3987c71f834d13e1",
|
||||
}),
|
||||
("https://hentai2read.com/oshikage_riot/", {
|
||||
"url": "6595f920a3088a15c2819c502862d45f8eb6bea6",
|
||||
"keyword": "a2e9724acb221040d4b29bf9aa8cb75b2240d8af",
|
||||
}),
|
||||
("https://hentai2read.com/popuni_kei_joshi_panic/", {
|
||||
"pattern": Hentai2readChapterExtractor.pattern,
|
||||
"range": "2-3",
|
||||
"keyword": {
|
||||
"chapter": int,
|
||||
"chapter_id": int,
|
||||
"chapter_minor": ".5",
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
"manga": "Popuni Kei Joshi Panic!",
|
||||
"manga_id": 13088,
|
||||
"title": str,
|
||||
"type": "Original",
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://hentai2read.com/TITLE/"
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
|
@ -21,36 +21,7 @@ class HentaicosplaysGalleryExtractor(GalleryExtractor):
|
||||
pattern = r"((?:https?://)?(?:\w{2}\.)?" \
|
||||
r"(hentai-cosplays|hentai-img|porn-images-xxx)\.com)/" \
|
||||
r"(?:image|story)/([\w-]+)"
|
||||
test = (
|
||||
("https://hentai-cosplays.com/image/---devilism--tide-kurihara-/", {
|
||||
"pattern": r"https://static\d?.hentai-cosplays.com/upload/"
|
||||
r"\d+/\d+/\d+/\d+.jpg$",
|
||||
"keyword": {
|
||||
"count": 18,
|
||||
"site": "hentai-cosplays",
|
||||
"slug": "---devilism--tide-kurihara-",
|
||||
"title": "艦 こ れ-devilism の tide Kurihara 憂",
|
||||
},
|
||||
}),
|
||||
("https://fr.porn-images-xxx.com/image/enako-enako-24/", {
|
||||
"pattern": r"https://static\d?.porn-images-xxx.com/upload/"
|
||||
r"\d+/\d+/\d+/\d+.jpg$",
|
||||
"keyword": {
|
||||
"count": 11,
|
||||
"site": "porn-images-xxx",
|
||||
"title": str,
|
||||
},
|
||||
}),
|
||||
("https://ja.hentai-img.com/image/hollow-cora-502/", {
|
||||
"pattern": r"https://static\d?.hentai-img.com/upload/"
|
||||
r"\d+/\d+/\d+/\d+.jpg$",
|
||||
"keyword": {
|
||||
"count": 2,
|
||||
"site": "hentai-img",
|
||||
"title": str,
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://hentai-cosplays.com/image/TITLE/"
|
||||
|
||||
def __init__(self, match):
|
||||
root, self.site, self.slug = match.groups()
|
||||
|
@ -168,7 +168,7 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor):
|
||||
"""Extractor for a hentaifoundry user profile"""
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/user/([^/?#]+)/profile"
|
||||
test = ("https://www.hentai-foundry.com/user/Tenpura/profile",)
|
||||
example = "https://www.hentai-foundry.com/user/USER/profile"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
@ -192,12 +192,7 @@ class HentaifoundryPicturesExtractor(HentaifoundryExtractor):
|
||||
"""Extractor for all pictures of a hentaifoundry user"""
|
||||
subcategory = "pictures"
|
||||
pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)(?:/page/(\d+))?/?$"
|
||||
test = (
|
||||
("https://www.hentai-foundry.com/pictures/user/Tenpura", {
|
||||
"url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28",
|
||||
}),
|
||||
("https://www.hentai-foundry.com/pictures/user/Tenpura/page/3"),
|
||||
)
|
||||
example = "https://www.hentai-foundry.com/pictures/user/USER"
|
||||
|
||||
def __init__(self, match):
|
||||
HentaifoundryExtractor.__init__(self, match)
|
||||
@ -209,13 +204,7 @@ class HentaifoundryScrapsExtractor(HentaifoundryExtractor):
|
||||
subcategory = "scraps"
|
||||
directory_fmt = ("{category}", "{user}", "Scraps")
|
||||
pattern = BASE_PATTERN + r"/pictures/user/([^/?#]+)/scraps"
|
||||
test = (
|
||||
("https://www.hentai-foundry.com/pictures/user/Evulchibi/scraps", {
|
||||
"url": "7cd9c6ec6258c4ab8c44991f7731be82337492a7",
|
||||
}),
|
||||
("https://www.hentai-foundry.com"
|
||||
"/pictures/user/Evulchibi/scraps/page/3"),
|
||||
)
|
||||
example = "https://www.hentai-foundry.com/pictures/user/USER/scraps"
|
||||
|
||||
def __init__(self, match):
|
||||
HentaifoundryExtractor.__init__(self, match)
|
||||
@ -229,13 +218,7 @@ class HentaifoundryFavoriteExtractor(HentaifoundryExtractor):
|
||||
directory_fmt = ("{category}", "{user}", "Favorites")
|
||||
archive_fmt = "f_{user}_{index}"
|
||||
pattern = BASE_PATTERN + r"/user/([^/?#]+)/faves/pictures"
|
||||
test = (
|
||||
("https://www.hentai-foundry.com/user/Tenpura/faves/pictures", {
|
||||
"url": "56f9ae2e89fe855e9fe1da9b81e5ec6212b0320b",
|
||||
}),
|
||||
("https://www.hentai-foundry.com"
|
||||
"/user/Tenpura/faves/pictures/page/3"),
|
||||
)
|
||||
example = "https://www.hentai-foundry.com/user/USER/faves/pictures"
|
||||
|
||||
def __init__(self, match):
|
||||
HentaifoundryExtractor.__init__(self, match)
|
||||
@ -249,10 +232,7 @@ class HentaifoundryRecentExtractor(HentaifoundryExtractor):
|
||||
directory_fmt = ("{category}", "Recent Pictures", "{date}")
|
||||
archive_fmt = "r_{index}"
|
||||
pattern = BASE_PATTERN + r"/pictures/recent/(\d\d\d\d-\d\d-\d\d)"
|
||||
test = ("https://www.hentai-foundry.com/pictures/recent/2018-09-20", {
|
||||
"pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?#]+/\d+/",
|
||||
"range": "20-30",
|
||||
})
|
||||
example = "https://www.hentai-foundry.com/pictures/recent/1970-01-01"
|
||||
|
||||
def __init__(self, match):
|
||||
HentaifoundryExtractor.__init__(self, match)
|
||||
@ -268,10 +248,7 @@ class HentaifoundryPopularExtractor(HentaifoundryExtractor):
|
||||
directory_fmt = ("{category}", "Popular Pictures")
|
||||
archive_fmt = "p_{index}"
|
||||
pattern = BASE_PATTERN + r"/pictures/popular()"
|
||||
test = ("https://www.hentai-foundry.com/pictures/popular", {
|
||||
"pattern": r"https://pictures.hentai-foundry.com/[^/]/[^/?#]+/\d+/",
|
||||
"range": "20-30",
|
||||
})
|
||||
example = "https://www.hentai-foundry.com/pictures/popular"
|
||||
|
||||
def __init__(self, match):
|
||||
HentaifoundryExtractor.__init__(self, match)
|
||||
@ -283,34 +260,8 @@ class HentaifoundryImageExtractor(HentaifoundryExtractor):
|
||||
subcategory = "image"
|
||||
pattern = (r"(https?://)?(?:www\.|pictures\.)?hentai-foundry\.com"
|
||||
r"/(?:pictures/user|[^/?#])/([^/?#]+)/(\d+)")
|
||||
test = (
|
||||
(("https://www.hentai-foundry.com"
|
||||
"/pictures/user/Tenpura/407501/shimakaze"), {
|
||||
"url": "fbf2fd74906738094e2575d2728e8dc3de18a8a3",
|
||||
"content": "91bf01497c39254b6dfb234a18e8f01629c77fd1",
|
||||
"keyword": {
|
||||
"artist" : "Tenpura",
|
||||
"date" : "dt:2016-02-22 14:41:19",
|
||||
"description": "Thank you!",
|
||||
"height" : 700,
|
||||
"index" : 407501,
|
||||
"media" : "Other digital art",
|
||||
"ratings": ["Sexual content", "Contains female nudity"],
|
||||
"score" : int,
|
||||
"tags" : ["collection", "kancolle", "kantai", "shimakaze"],
|
||||
"title" : "shimakaze",
|
||||
"user" : "Tenpura",
|
||||
"views" : int,
|
||||
"width" : 495,
|
||||
},
|
||||
}),
|
||||
("http://www.hentai-foundry.com/pictures/user/Tenpura/407501/", {
|
||||
"pattern": "http://pictures.hentai-foundry.com/t/Tenpura/407501/",
|
||||
}),
|
||||
("https://www.hentai-foundry.com/pictures/user/Tenpura/407501/"),
|
||||
("https://pictures.hentai-foundry.com"
|
||||
"/t/Tenpura/407501/Tenpura-407501-shimakaze.png"),
|
||||
)
|
||||
example = "https://www.hentai-foundry.com/pictures/user/USER/12345/TITLE"
|
||||
|
||||
skip = Extractor.skip
|
||||
|
||||
def __init__(self, match):
|
||||
@ -331,24 +282,7 @@ class HentaifoundryStoriesExtractor(HentaifoundryExtractor):
|
||||
subcategory = "stories"
|
||||
archive_fmt = "s_{index}"
|
||||
pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)(?:/page/(\d+))?/?$"
|
||||
test = ("https://www.hentai-foundry.com/stories/user/SnowWolf35", {
|
||||
"count": ">= 35",
|
||||
"keyword": {
|
||||
"author" : "SnowWolf35",
|
||||
"chapters" : int,
|
||||
"comments" : int,
|
||||
"date" : "type:datetime",
|
||||
"description": str,
|
||||
"index" : int,
|
||||
"rating" : int,
|
||||
"ratings" : list,
|
||||
"status" : "re:(Inc|C)omplete",
|
||||
"title" : str,
|
||||
"user" : "SnowWolf35",
|
||||
"views" : int,
|
||||
"words" : int,
|
||||
},
|
||||
})
|
||||
example = "https://www.hentai-foundry.com/stories/user/USER"
|
||||
|
||||
def items(self):
|
||||
self._init_site_filters()
|
||||
@ -367,11 +301,8 @@ class HentaifoundryStoryExtractor(HentaifoundryExtractor):
|
||||
subcategory = "story"
|
||||
archive_fmt = "s_{index}"
|
||||
pattern = BASE_PATTERN + r"/stories/user/([^/?#]+)/(\d+)"
|
||||
test = (("https://www.hentai-foundry.com/stories/user/SnowWolf35"
|
||||
"/26416/Overwatch-High-Chapter-Voting-Location"), {
|
||||
"url": "5a67cfa8c3bf7634c8af8485dd07c1ea74ee0ae8",
|
||||
"keyword": {"title": "Overwatch High Chapter Voting Location"},
|
||||
})
|
||||
example = "https://www.hentai-foundry.com/stories/user/USER/12345/TITLE"
|
||||
|
||||
skip = Extractor.skip
|
||||
|
||||
def __init__(self, match):
|
||||
|
@ -21,40 +21,7 @@ class HentaifoxBase():
|
||||
class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor):
|
||||
"""Extractor for image galleries on hentaifox.com"""
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))"
|
||||
test = (
|
||||
("https://hentaifox.com/gallery/56622/", {
|
||||
"pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg",
|
||||
"keyword": "bcd6b67284f378e5cc30b89b761140e3e60fcd92",
|
||||
"count": 24,
|
||||
}),
|
||||
# 'split_tag' element (#1378)
|
||||
("https://hentaifox.com/gallery/630/", {
|
||||
"keyword": {
|
||||
"artist": ["beti", "betty", "magi", "mimikaki"],
|
||||
"characters": [
|
||||
"aerith gainsborough",
|
||||
"tifa lockhart",
|
||||
"yuffie kisaragi"
|
||||
],
|
||||
"count": 32,
|
||||
"gallery_id": 630,
|
||||
"group": ["cu-little2"],
|
||||
"parody": ["darkstalkers | vampire", "final fantasy vii"],
|
||||
"tags": ["femdom", "fingering", "masturbation", "yuri"],
|
||||
"title": "Cu-Little Bakanya~",
|
||||
"type": "doujinshi",
|
||||
},
|
||||
}),
|
||||
# email-protected title (#4201)
|
||||
("https://hentaifox.com/gallery/35261/", {
|
||||
"keyword": {
|
||||
"gallery_id": 35261,
|
||||
"title": "ManageM@ster!",
|
||||
"artist": ["haritama hiroki"],
|
||||
"group": ["studio n.ball"],
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://hentaifox.com/gallery/12345/"
|
||||
|
||||
def __init__(self, match):
|
||||
GalleryExtractor.__init__(self, match)
|
||||
@ -116,22 +83,7 @@ class HentaifoxSearchExtractor(HentaifoxBase, Extractor):
|
||||
subcategory = "search"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?hentaifox\.com"
|
||||
r"(/(?:parody|tag|artist|character|search|group)/[^/?%#]+)")
|
||||
test = (
|
||||
("https://hentaifox.com/parody/touhou-project/"),
|
||||
("https://hentaifox.com/character/reimu-hakurei/"),
|
||||
("https://hentaifox.com/artist/distance/"),
|
||||
("https://hentaifox.com/search/touhou/"),
|
||||
("https://hentaifox.com/group/v-slash/"),
|
||||
("https://hentaifox.com/tag/heterochromia/", {
|
||||
"pattern": HentaifoxGalleryExtractor.pattern,
|
||||
"count": ">= 60",
|
||||
"keyword": {
|
||||
"url" : str,
|
||||
"gallery_id": int,
|
||||
"title" : str,
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://hentaifox.com/tag/TAG/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -17,27 +17,7 @@ class HentaihandGalleryExtractor(GalleryExtractor):
|
||||
category = "hentaihand"
|
||||
root = "https://hentaihand.com"
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentaihand\.com/\w+/comic/([\w-]+)"
|
||||
test = (
|
||||
(("https://hentaihand.com/en/comic/c75-takumi-na-muchi-choudenji-hou-"
|
||||
"no-aishi-kata-how-to-love-a-super-electromagnetic-gun-toaru-kagaku-"
|
||||
"no-railgun-english"), {
|
||||
"pattern": r"https://cdn.hentaihand.com/.*/images/37387/\d+.jpg$",
|
||||
"count": 50,
|
||||
"keyword": {
|
||||
"artists" : ["Takumi Na Muchi"],
|
||||
"date" : "dt:2014-06-28 00:00:00",
|
||||
"gallery_id": 37387,
|
||||
"lang" : "en",
|
||||
"language" : "English",
|
||||
"parodies" : ["Toaru Kagaku No Railgun"],
|
||||
"relationships": list,
|
||||
"tags" : list,
|
||||
"title" : r"re:\(C75\) \[Takumi na Muchi\] Choudenji Hou ",
|
||||
"title_alt" : r"re:\(C75\) \[たくみなむち\] 超電磁砲のあいしかた",
|
||||
"type" : "Doujinshi",
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://hentaihand.com/en/comic/TITLE"
|
||||
|
||||
def __init__(self, match):
|
||||
self.slug = match.group(1)
|
||||
@ -76,15 +56,7 @@ class HentaihandTagExtractor(Extractor):
|
||||
pattern = (r"(?i)(?:https?://)?(?:www\.)?hentaihand\.com"
|
||||
r"/\w+/(parody|character|tag|artist|group|language"
|
||||
r"|category|relationship)/([^/?#]+)")
|
||||
test = (
|
||||
("https://hentaihand.com/en/artist/takumi-na-muchi", {
|
||||
"pattern": HentaihandGalleryExtractor.pattern,
|
||||
"count": ">= 6",
|
||||
}),
|
||||
("https://hentaihand.com/en/tag/full-color"),
|
||||
("https://hentaihand.com/fr/language/japanese"),
|
||||
("https://hentaihand.com/zh/category/manga"),
|
||||
)
|
||||
example = "https://hentaihand.com/en/tag/TAG"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -23,32 +23,7 @@ class HentaihereChapterExtractor(HentaihereBase, ChapterExtractor):
|
||||
"""Extractor for a single manga chapter from hentaihere.com"""
|
||||
archive_fmt = "{chapter_id}_{page}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentaihere\.com/m/S(\d+)/([^/?#]+)"
|
||||
test = (
|
||||
("https://hentaihere.com/m/S13812/1/1/", {
|
||||
"url": "964b942cf492b3a129d2fe2608abfc475bc99e71",
|
||||
"keyword": "0207d20eea3a15d2a8d1496755bdfa49de7cfa9d",
|
||||
}),
|
||||
("https://hentaihere.com/m/S23048/1.5/1/", {
|
||||
"pattern": r"https://hentaicdn\.com/hentai"
|
||||
r"/23048/1\.5/ccdn00\d+\.jpg",
|
||||
"count": 32,
|
||||
"keyword": {
|
||||
"author": "Shinozuka Yuuji",
|
||||
"chapter": 1,
|
||||
"chapter_id": 80186,
|
||||
"chapter_minor": ".5",
|
||||
"count": 32,
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
"manga": "High School Slut's Love Consultation",
|
||||
"manga_id": 23048,
|
||||
"page": int,
|
||||
"title": "High School Slut's Love Consultation + "
|
||||
"Girlfriend [Full Color]",
|
||||
"type": "Original",
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://hentaihere.com/m/S12345/1/1/"
|
||||
|
||||
def __init__(self, match):
|
||||
self.manga_id, self.chapter = match.groups()
|
||||
@ -87,26 +62,7 @@ class HentaihereMangaExtractor(HentaihereBase, MangaExtractor):
|
||||
"""Extractor for hmanga from hentaihere.com"""
|
||||
chapterclass = HentaihereChapterExtractor
|
||||
pattern = r"(?:https?://)?(?:www\.)?hentaihere\.com(/m/S\d+)/?$"
|
||||
test = (
|
||||
("https://hentaihere.com/m/S13812", {
|
||||
"url": "d1ba6e28bb2162e844f8559c2b2725ba0a093559",
|
||||
"keyword": "5c1b712258e78e120907121d3987c71f834d13e1",
|
||||
}),
|
||||
("https://hentaihere.com/m/S7608", {
|
||||
"url": "6c5239758dc93f6b1b4175922836c10391b174f7",
|
||||
"keyword": {
|
||||
"chapter": int,
|
||||
"chapter_id": int,
|
||||
"chapter_minor": "",
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
"manga": "Oshikake Riot",
|
||||
"manga_id": 7608,
|
||||
"title": r"re:Oshikake Riot( \d+)?",
|
||||
"type": "Original",
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://hentaihere.com/m/S12345"
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
|
@ -31,7 +31,7 @@ class HiperdexBase():
|
||||
|
||||
return {
|
||||
"manga" : text.unescape(extr(
|
||||
"<title>", "<").rpartition(" - ")[0].strip()),
|
||||
"<title>", "<").rpartition(" Manga - ")[0].strip()),
|
||||
"url" : text.unescape(extr(
|
||||
'property="og:url" content="', '"')),
|
||||
"score" : text.parse_float(extr(
|
||||
@ -69,30 +69,7 @@ class HiperdexBase():
|
||||
class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
|
||||
"""Extractor for manga chapters from hiperdex.com"""
|
||||
pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))"
|
||||
test = (
|
||||
("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", {
|
||||
"pattern": r"https://(1st)?hiperdex\d?.(com|net|info)"
|
||||
r"/wp-content/uploads/WP-manga/data"
|
||||
r"/manga_\w+/[0-9a-f]{32}/\d+\.webp",
|
||||
"count": 9,
|
||||
"keyword": {
|
||||
"artist" : "Sasuga Kei",
|
||||
"author" : "Sasuga Kei",
|
||||
"chapter": 154,
|
||||
"chapter_minor": ".5",
|
||||
"description": "re:Natsuo Fujii is in love with his teacher, ",
|
||||
"genre" : list,
|
||||
"manga" : "Domestic na Kanojo",
|
||||
"release": 2014,
|
||||
"score" : float,
|
||||
"type" : "Manga",
|
||||
},
|
||||
}),
|
||||
("https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/"),
|
||||
("https://hiperdex2.com/manga/domestic-na-kanojo/154-5/"),
|
||||
("https://hiperdex.net/manga/domestic-na-kanojo/154-5/"),
|
||||
("https://hiperdex.info/manga/domestic-na-kanojo/154-5/"),
|
||||
)
|
||||
example = "https://hiperdex.com/manga/MANGA/CHAPTER/"
|
||||
|
||||
def __init__(self, match):
|
||||
root, path, self.manga, self.chapter = match.groups()
|
||||
@ -114,30 +91,7 @@ class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
|
||||
"""Extractor for manga from hiperdex.com"""
|
||||
chapterclass = HiperdexChapterExtractor
|
||||
pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$"
|
||||
test = (
|
||||
("https://hiperdex.com/manga/1603231576-youre-not-that-special/", {
|
||||
"count": 51,
|
||||
"pattern": HiperdexChapterExtractor.pattern,
|
||||
"keyword": {
|
||||
"artist" : "Bolp",
|
||||
"author" : "Abyo4",
|
||||
"chapter": int,
|
||||
"chapter_minor": "",
|
||||
"description": "re:I didn’t think much of the creepy girl in ",
|
||||
"genre" : list,
|
||||
"manga" : "You’re Not That Special!",
|
||||
"release": 2019,
|
||||
"score" : float,
|
||||
"status" : "Completed",
|
||||
"type" : "Manhwa",
|
||||
},
|
||||
}),
|
||||
("https://hiperdex.com/manga/youre-not-that-special/"),
|
||||
("https://1sthiperdex.com/manga/youre-not-that-special/"),
|
||||
("https://hiperdex2.com/manga/youre-not-that-special/"),
|
||||
("https://hiperdex.net/manga/youre-not-that-special/"),
|
||||
("https://hiperdex.info/manga/youre-not-that-special/"),
|
||||
)
|
||||
example = "https://hiperdex.com/manga/MANGA/"
|
||||
|
||||
def __init__(self, match):
|
||||
root, path, self.manga = match.groups()
|
||||
@ -173,16 +127,7 @@ class HiperdexArtistExtractor(HiperdexBase, MangaExtractor):
|
||||
chapterclass = HiperdexMangaExtractor
|
||||
reverse = False
|
||||
pattern = BASE_PATTERN + r"(/manga-a(?:rtist|uthor)/(?:[^/?#]+))"
|
||||
test = (
|
||||
("https://1sthiperdex.com/manga-artist/beck-ho-an/"),
|
||||
("https://hiperdex.net/manga-artist/beck-ho-an/"),
|
||||
("https://hiperdex2.com/manga-artist/beck-ho-an/"),
|
||||
("https://hiperdex.info/manga-artist/beck-ho-an/"),
|
||||
("https://hiperdex.com/manga-author/viagra/", {
|
||||
"pattern": HiperdexMangaExtractor.pattern,
|
||||
"count": ">= 6",
|
||||
}),
|
||||
)
|
||||
example = "https://hiperdex.com/manga-artist/NAME/"
|
||||
|
||||
def __init__(self, match):
|
||||
self.root = text.ensure_http_scheme(match.group(1))
|
||||
|
@ -23,47 +23,7 @@ class HitomiGalleryExtractor(GalleryExtractor):
|
||||
pattern = (r"(?:https?://)?hitomi\.la"
|
||||
r"/(?:manga|doujinshi|cg|gamecg|galleries|reader)"
|
||||
r"/(?:[^/?#]+-)?(\d+)")
|
||||
test = (
|
||||
("https://hitomi.la/galleries/867789.html", {
|
||||
"pattern": r"https://[a-c]a\.hitomi\.la/webp/\d+/\d+"
|
||||
r"/[0-9a-f]{64}\.webp",
|
||||
"keyword": "86af5371f38117a07407f11af689bdd460b09710",
|
||||
"count": 16,
|
||||
}),
|
||||
# download test
|
||||
("https://hitomi.la/galleries/1401410.html", {
|
||||
"range": "1",
|
||||
"content": "d75d5a3d1302a48469016b20e53c26b714d17745",
|
||||
}),
|
||||
# Game CG with scenes (#321)
|
||||
("https://hitomi.la/galleries/733697.html", {
|
||||
"count": 210,
|
||||
}),
|
||||
# fallback for galleries only available through /reader/ URLs
|
||||
("https://hitomi.la/galleries/1045954.html", {
|
||||
"count": 1413,
|
||||
}),
|
||||
# gallery with "broken" redirect
|
||||
("https://hitomi.la/cg/scathacha-sama-okuchi-ecchi-1291900.html", {
|
||||
"count": 10,
|
||||
"options": (("format", "original"),),
|
||||
"pattern": r"https://[a-c]b\.hitomi\.la/images/\d+/\d+"
|
||||
r"/[0-9a-f]{64}\.jpg",
|
||||
}),
|
||||
# no tags
|
||||
("https://hitomi.la/cg/1615823.html", {
|
||||
"count": 22,
|
||||
"options": (("format", "avif"),),
|
||||
"pattern": r"https://[a-c]a\.hitomi\.la/avif/\d+/\d+"
|
||||
r"/[0-9a-f]{64}\.avif",
|
||||
}),
|
||||
("https://hitomi.la/manga/amazon-no-hiyaku-867789.html"),
|
||||
("https://hitomi.la/manga/867789.html"),
|
||||
("https://hitomi.la/doujinshi/867789.html"),
|
||||
("https://hitomi.la/cg/867789.html"),
|
||||
("https://hitomi.la/gamecg/867789.html"),
|
||||
("https://hitomi.la/reader/867789.html"),
|
||||
)
|
||||
example = "https://hitomi.la/manga/TITLE-867789.html"
|
||||
|
||||
def __init__(self, match):
|
||||
self.gid = match.group(1)
|
||||
@ -149,17 +109,7 @@ class HitomiTagExtractor(Extractor):
|
||||
pattern = (r"(?:https?://)?hitomi\.la/"
|
||||
r"(tag|artist|group|series|type|character)/"
|
||||
r"([^/?#]+)\.html")
|
||||
test = (
|
||||
("https://hitomi.la/tag/screenshots-japanese.html", {
|
||||
"pattern": HitomiGalleryExtractor.pattern,
|
||||
"count": ">= 35",
|
||||
}),
|
||||
("https://hitomi.la/artist/a1-all-1.html"),
|
||||
("https://hitomi.la/group/initial%2Dg-all-1.html"),
|
||||
("https://hitomi.la/series/amnesia-all-1.html"),
|
||||
("https://hitomi.la/type/doujinshi-all-1.html"),
|
||||
("https://hitomi.la/character/a2-all-1.html"),
|
||||
)
|
||||
example = "https://hitomi.la/tag/TAG-LANG.html"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -21,9 +21,6 @@ class HotleakExtractor(Extractor):
|
||||
archive_fmt = "{type}_{creator}_{id}"
|
||||
root = "https://hotleak.vip"
|
||||
|
||||
def _init(self):
|
||||
self.session.headers["Referer"] = self.root + "/"
|
||||
|
||||
def items(self):
|
||||
for post in self.posts():
|
||||
yield Message.Directory, post
|
||||
@ -59,30 +56,7 @@ class HotleakPostExtractor(HotleakExtractor):
|
||||
subcategory = "post"
|
||||
pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))"
|
||||
r"([^/]+)/(photo|video)/(\d+)")
|
||||
test = (
|
||||
("https://hotleak.vip/kaiyakawaii/photo/1617145", {
|
||||
"pattern": r"https://hotleak\.vip/storage/images/3625"
|
||||
r"/1617145/fefdd5988dfcf6b98cc9e11616018868\.jpg",
|
||||
"keyword": {
|
||||
"id": 1617145,
|
||||
"creator": "kaiyakawaii",
|
||||
"type": "photo",
|
||||
"filename": "fefdd5988dfcf6b98cc9e11616018868",
|
||||
"extension": "jpg",
|
||||
},
|
||||
}),
|
||||
("https://hotleak.vip/lilmochidoll/video/1625538", {
|
||||
"pattern": r"ytdl:https://cdn8-leak\.camhdxx\.com"
|
||||
r"/1661/1625538/index\.m3u8",
|
||||
"keyword": {
|
||||
"id": 1625538,
|
||||
"creator": "lilmochidoll",
|
||||
"type": "video",
|
||||
"filename": "index",
|
||||
"extension": "mp4",
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://hotleak.vip/MODEL/photo/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
HotleakExtractor.__init__(self, match)
|
||||
@ -118,18 +92,7 @@ class HotleakCreatorExtractor(HotleakExtractor):
|
||||
subcategory = "creator"
|
||||
pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))"
|
||||
r"([^/?#]+)/?$")
|
||||
test = (
|
||||
("https://hotleak.vip/kaiyakawaii", {
|
||||
"range": "1-200",
|
||||
"count": 200,
|
||||
}),
|
||||
("https://hotleak.vip/stellaviolet", {
|
||||
"count": "> 600"
|
||||
}),
|
||||
("https://hotleak.vip/doesnotexist", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
)
|
||||
example = "https://hotleak.vip/MODEL"
|
||||
|
||||
def __init__(self, match):
|
||||
HotleakExtractor.__init__(self, match)
|
||||
@ -182,20 +145,7 @@ class HotleakCategoryExtractor(HotleakExtractor):
|
||||
"""Extractor for hotleak categories"""
|
||||
subcategory = "category"
|
||||
pattern = BASE_PATTERN + r"/(hot|creators|videos|photos)(?:/?\?([^#]+))?"
|
||||
test = (
|
||||
("https://hotleak.vip/photos", {
|
||||
"pattern": HotleakPostExtractor.pattern,
|
||||
"range": "1-50",
|
||||
"count": 50,
|
||||
}),
|
||||
("https://hotleak.vip/videos"),
|
||||
("https://hotleak.vip/creators", {
|
||||
"pattern": HotleakCreatorExtractor.pattern,
|
||||
"range": "1-50",
|
||||
"count": 50,
|
||||
}),
|
||||
("https://hotleak.vip/hot"),
|
||||
)
|
||||
example = "https://hotleak.vip/photos"
|
||||
|
||||
def __init__(self, match):
|
||||
HotleakExtractor.__init__(self, match)
|
||||
@ -217,14 +167,7 @@ class HotleakSearchExtractor(HotleakExtractor):
|
||||
"""Extractor for hotleak search results"""
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"/search(?:/?\?([^#]+))"
|
||||
test = (
|
||||
("https://hotleak.vip/search?search=gallery-dl", {
|
||||
"count": 0,
|
||||
}),
|
||||
("https://hotleak.vip/search?search=hannah", {
|
||||
"count": "> 30",
|
||||
}),
|
||||
)
|
||||
example = "https://hotleak.vip/search?search=QUERY"
|
||||
|
||||
def __init__(self, match):
|
||||
HotleakExtractor.__init__(self, match)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018-2021 Mike Fährmann
|
||||
# Copyright 2018-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -133,20 +133,7 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
|
||||
directory_fmt = ("{category}", "{search_tags}")
|
||||
archive_fmt = "t_{search_tags}_{id}"
|
||||
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"
|
||||
test = (
|
||||
("https://idol.sankakucomplex.com/?tags=lyumos", {
|
||||
"count": 5,
|
||||
"range": "18-22",
|
||||
"pattern": r"https://is\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}"
|
||||
r"/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
|
||||
}),
|
||||
("https://idol.sankakucomplex.com/?tags=order:favcount", {
|
||||
"count": 5,
|
||||
"range": "18-22",
|
||||
}),
|
||||
("https://idol.sankakucomplex.com"
|
||||
"/?tags=lyumos+wreath&page=3&next=694215"),
|
||||
)
|
||||
example = "https://idol.sankakucomplex.com/?tags=TAGS"
|
||||
per_page = 20
|
||||
|
||||
def __init__(self, match):
|
||||
@ -214,9 +201,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
|
||||
directory_fmt = ("{category}", "pool", "{pool}")
|
||||
archive_fmt = "p_{pool}_{id}"
|
||||
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/pool/show/(\d+)"
|
||||
test = ("https://idol.sankakucomplex.com/pool/show/145", {
|
||||
"count": 3,
|
||||
})
|
||||
example = "https://idol.sankakucomplex.com/pool/show/12345"
|
||||
per_page = 24
|
||||
|
||||
def __init__(self, match):
|
||||
@ -251,17 +236,7 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor):
|
||||
subcategory = "post"
|
||||
archive_fmt = "{id}"
|
||||
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"
|
||||
test = ("https://idol.sankakucomplex.com/post/show/694215", {
|
||||
"content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
|
||||
"options": (("tags", True),),
|
||||
"keyword": {
|
||||
"tags_character": "shani_(the_witcher)",
|
||||
"tags_copyright": "the_witcher",
|
||||
"tags_idol": str,
|
||||
"tags_medium": str,
|
||||
"tags_general": str,
|
||||
},
|
||||
})
|
||||
example = "https://idol.sankakucomplex.com/post/show/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
IdolcomplexExtractor.__init__(self, match)
|
||||
|
@ -9,7 +9,7 @@
|
||||
"""Extractors for https://www.imagebam.com/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
from .. import text
|
||||
import re
|
||||
|
||||
|
||||
@ -46,26 +46,7 @@ class ImagebamGalleryExtractor(ImagebamExtractor):
|
||||
archive_fmt = "{gallery_key}_{image_key}"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?imagebam\.com"
|
||||
r"(/(?:gallery/|view/G)[a-zA-Z0-9]+)")
|
||||
test = (
|
||||
("https://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", {
|
||||
"url": "76d976788ae2757ac81694736b07b72356f5c4c8",
|
||||
"keyword": "b048478b1bbba3072a7fa9fcc40630b3efad1f6c",
|
||||
"content": "596e6bfa157f2c7169805d50075c2986549973a8",
|
||||
}),
|
||||
("http://www.imagebam.com/gallery/op9dwcklwdrrguibnkoe7jxgvig30o5p", {
|
||||
# more than 100 images; see issue #219
|
||||
"count": 107,
|
||||
"url": "32ae6fe5dc3e4ca73ff6252e522d16473595d1d1",
|
||||
}),
|
||||
("http://www.imagebam.com/gallery/gsl8teckymt4vbvx1stjkyk37j70va2c", {
|
||||
"exception": exception.HttpError,
|
||||
}),
|
||||
# /view/ path (#2378)
|
||||
("https://www.imagebam.com/view/GA3MT1", {
|
||||
"url": "35018ce1e00a2d2825a33d3cd37857edaf804919",
|
||||
"keyword": "3a9f98178f73694c527890c0d7ca9a92b46987ba",
|
||||
}),
|
||||
)
|
||||
example = "https://www.imagebam.com/view/GID"
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.root + self.path).text
|
||||
@ -110,24 +91,7 @@ class ImagebamImageExtractor(ImagebamExtractor):
|
||||
archive_fmt = "{image_key}"
|
||||
pattern = (r"(?:https?://)?(?:\w+\.)?imagebam\.com"
|
||||
r"(/(?:image/|view/M|(?:[0-9a-f]{2}/){3})[a-zA-Z0-9]+)")
|
||||
test = (
|
||||
("https://www.imagebam.com/image/94d56c502511890", {
|
||||
"url": "5e9ba3b1451f8ded0ae3a1b84402888893915d4a",
|
||||
"keyword": "2a4380d4b57554ff793898c2d6ec60987c86d1a1",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
}),
|
||||
("http://images3.imagebam.com/1d/8c/44/94d56c502511890.png"),
|
||||
# NSFW (#1534)
|
||||
("https://www.imagebam.com/image/0850951366904951", {
|
||||
"url": "d37297b17ed1615b4311c8ed511e50ce46e4c748",
|
||||
}),
|
||||
# /view/ path (#2378)
|
||||
("https://www.imagebam.com/view/ME8JOQP", {
|
||||
"url": "4dca72bbe61a0360185cf4ab2bed8265b49565b8",
|
||||
"keyword": "15a494c02fd30846b41b42a26117aedde30e4ceb",
|
||||
"content": "f81008666b17a42d8834c4749b910e1dc10a6e83",
|
||||
}),
|
||||
)
|
||||
example = "https://www.imagebam.com/view/MID"
|
||||
|
||||
def items(self):
|
||||
path = self.path
|
||||
|
@ -18,29 +18,7 @@ class ImagechestGalleryExtractor(GalleryExtractor):
|
||||
category = "imagechest"
|
||||
root = "https://imgchest.com"
|
||||
pattern = r"(?:https?://)?(?:www\.)?imgchest\.com/p/([A-Za-z0-9]{11})"
|
||||
test = (
|
||||
("https://imgchest.com/p/3na7kr3by8d", {
|
||||
"pattern": r"https://cdn\.imgchest\.com/files/\w+\.(jpg|png)",
|
||||
"keyword": {
|
||||
"count": 3,
|
||||
"gallery_id": "3na7kr3by8d",
|
||||
"num": int,
|
||||
"title": "Wizardry - Video Game From The Mid 80's",
|
||||
},
|
||||
"url": "7328ca4ec2459378d725e3be19f661d2b045feda",
|
||||
"content": "076959e65be30249a2c651fbe6090dc30ba85193",
|
||||
"count": 3
|
||||
}),
|
||||
# "Load More Files" button (#4028)
|
||||
("https://imgchest.com/p/9p4n3q2z7nq", {
|
||||
"pattern": r"https://cdn\.imgchest\.com/files/\w+\.(jpg|png)",
|
||||
"url": "f5674e8ba79d336193c9f698708d9dcc10e78cc7",
|
||||
"count": 52,
|
||||
}),
|
||||
("https://imgchest.com/p/xxxxxxxxxxx", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
)
|
||||
example = "https://imgchest.com/p/abcdefghijk"
|
||||
|
||||
def __init__(self, match):
|
||||
self.gallery_id = match.group(1)
|
||||
|
@ -23,9 +23,6 @@ class ImagefapExtractor(Extractor):
|
||||
archive_fmt = "{gallery_id}_{image_id}"
|
||||
request_interval = (2.0, 4.0)
|
||||
|
||||
def _init(self):
|
||||
self.session.headers["Referer"] = self.root + "/"
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
response = Extractor.request(self, url, **kwargs)
|
||||
|
||||
@ -43,50 +40,7 @@ class ImagefapGalleryExtractor(ImagefapExtractor):
|
||||
"""Extractor for image galleries from imagefap.com"""
|
||||
subcategory = "gallery"
|
||||
pattern = BASE_PATTERN + r"/(?:gallery\.php\?gid=|gallery/|pictures/)(\d+)"
|
||||
|
||||
test = (
|
||||
("https://www.imagefap.com/gallery/7102714", {
|
||||
"pattern": r"https://cdnh?\.imagefap\.com"
|
||||
r"/images/full/\d+/\d+/\d+\.jpg",
|
||||
"keyword": "bdcb75b1e4b9dddc718f3d66e1a58afa9d81a38b",
|
||||
"content": "694a0a57385980a6f90fbc296cadcd6c11ba2dab",
|
||||
}),
|
||||
("https://www.imagefap.com/gallery/7876223", {
|
||||
"pattern": r"https://cdnh?\.imagefap\.com"
|
||||
r"/images/full/\d+/\d+/\d+\.jpg",
|
||||
"keyword": {
|
||||
"categories": ["Asses", "Softcore", "Pornstars"],
|
||||
"count": 44,
|
||||
"description": "",
|
||||
"gallery_id": 7876223,
|
||||
"image_id": int,
|
||||
"num": int,
|
||||
"tags": ["big ass", "panties", "horny",
|
||||
"pussy", "exposed", "outdoor"],
|
||||
"title": "Kelsi Monroe in lingerie",
|
||||
"uploader": "BdRachel",
|
||||
},
|
||||
"count": 44,
|
||||
}),
|
||||
# description (#3905)
|
||||
("https://www.imagefap.com/gallery/6180555", {
|
||||
"range": "1",
|
||||
"keyword": {
|
||||
"categories": ["Amateur", "Softcore", "Homemade"],
|
||||
"count": 36,
|
||||
"description": "Nude and dressed sluts showing off the goods",
|
||||
"gallery_id": 6180555,
|
||||
"image_id": int,
|
||||
"num": int,
|
||||
"tags": [] ,
|
||||
"title": "Dressed or Undressed MG*",
|
||||
"uploader": "splitopen",
|
||||
},
|
||||
}),
|
||||
("https://www.imagefap.com/pictures/7102714"),
|
||||
("https://www.imagefap.com/gallery.php?gid=7102714"),
|
||||
("https://beta.imagefap.com/gallery.php?gid=7102714"),
|
||||
)
|
||||
example = "https://www.imagefap.com/gallery/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
ImagefapExtractor.__init__(self, match)
|
||||
@ -157,22 +111,7 @@ class ImagefapImageExtractor(ImagefapExtractor):
|
||||
"""Extractor for single images from imagefap.com"""
|
||||
subcategory = "image"
|
||||
pattern = BASE_PATTERN + r"/photo/(\d+)"
|
||||
test = (
|
||||
("https://www.imagefap.com/photo/1962981893", {
|
||||
"pattern": r"https://cdnh?\.imagefap\.com"
|
||||
r"/images/full/65/196/1962981893\.jpg",
|
||||
"keyword": {
|
||||
"date": "21/08/2014",
|
||||
"gallery_id": 7876223,
|
||||
"height": 1600,
|
||||
"image_id": 1962981893,
|
||||
"title": "Kelsi Monroe in lingerie",
|
||||
"uploader": "BdRachel",
|
||||
"width": 1066,
|
||||
},
|
||||
}),
|
||||
("https://beta.imagefap.com/photo/1962981893"),
|
||||
)
|
||||
example = "https://www.imagefap.com/photo/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
ImagefapExtractor.__init__(self, match)
|
||||
@ -213,35 +152,7 @@ class ImagefapFolderExtractor(ImagefapExtractor):
|
||||
pattern = (BASE_PATTERN + r"/(?:organizer/|"
|
||||
r"(?:usergallery\.php\?user(id)?=([^&#]+)&"
|
||||
r"|profile/([^/?#]+)/galleries\?)folderid=)(\d+|-1)")
|
||||
test = (
|
||||
("https://www.imagefap.com/organizer/409758", {
|
||||
"pattern": r"https://www\.imagefap\.com/gallery/7876223",
|
||||
"url": "37822523e6e4a56feb9dea35653760c86b44ff89",
|
||||
"count": 1,
|
||||
}),
|
||||
(("https://www.imagefap.com/usergallery.php"
|
||||
"?userid=1981976&folderid=409758"), {
|
||||
"url": "37822523e6e4a56feb9dea35653760c86b44ff89",
|
||||
}),
|
||||
(("https://www.imagefap.com/usergallery.php"
|
||||
"?user=BdRachel&folderid=409758"), {
|
||||
"url": "37822523e6e4a56feb9dea35653760c86b44ff89",
|
||||
}),
|
||||
("https://www.imagefap.com/profile/BdRachel/galleries?folderid=-1", {
|
||||
"pattern": ImagefapGalleryExtractor.pattern,
|
||||
"range": "1-40",
|
||||
}),
|
||||
(("https://www.imagefap.com/usergallery.php"
|
||||
"?userid=1981976&folderid=-1"), {
|
||||
"pattern": ImagefapGalleryExtractor.pattern,
|
||||
"range": "1-40",
|
||||
}),
|
||||
(("https://www.imagefap.com/usergallery.php"
|
||||
"?user=BdRachel&folderid=-1"), {
|
||||
"pattern": ImagefapGalleryExtractor.pattern,
|
||||
"range": "1-40",
|
||||
}),
|
||||
)
|
||||
example = "https://www.imagefap.com/organizer/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
ImagefapExtractor.__init__(self, match)
|
||||
@ -293,20 +204,7 @@ class ImagefapUserExtractor(ImagefapExtractor):
|
||||
pattern = (BASE_PATTERN +
|
||||
r"/(?:profile(?:\.php\?user=|/)([^/?#]+)(?:/galleries)?"
|
||||
r"|usergallery\.php\?userid=(\d+))(?:$|#)")
|
||||
test = (
|
||||
("https://www.imagefap.com/profile/BdRachel", {
|
||||
"pattern": ImagefapFolderExtractor.pattern,
|
||||
"count": ">= 18",
|
||||
}),
|
||||
("https://www.imagefap.com/usergallery.php?userid=1862791", {
|
||||
"pattern": r"https://www\.imagefap\.com"
|
||||
r"/profile/LucyRae/galleries\?folderid=-1",
|
||||
"count": 1,
|
||||
}),
|
||||
("https://www.imagefap.com/profile/BdRachel/galleries"),
|
||||
("https://www.imagefap.com/profile.php?user=BdRachel"),
|
||||
("https://beta.imagefap.com/profile.php?user=BdRachel"),
|
||||
)
|
||||
example = "https://www.imagefap.com/profile/USER"
|
||||
|
||||
def __init__(self, match):
|
||||
ImagefapExtractor.__init__(self, match)
|
||||
|
@ -74,34 +74,7 @@ class ImxtoImageExtractor(ImagehostImageExtractor):
|
||||
category = "imxto"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?((?:imx\.to|img\.yt)"
|
||||
r"/(?:i/|img-)(\w+)(\.html)?)")
|
||||
test = (
|
||||
("https://imx.to/i/1qdeva", { # new-style URL
|
||||
"url": "ab2173088a6cdef631d7a47dec4a5da1c6a00130",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
"keyword": {
|
||||
"size" : 18,
|
||||
"width" : 64,
|
||||
"height": 32,
|
||||
"hash" : "94d56c599223c59f3feb71ea603484d1",
|
||||
},
|
||||
}),
|
||||
("https://imx.to/img-57a2050547b97.html", { # old-style URL
|
||||
"url": "a83fe6ef1909a318c4d49fcf2caf62f36c3f9204",
|
||||
"content": "54592f2635674c25677c6872db3709d343cdf92f",
|
||||
"keyword": {
|
||||
"size" : 5284,
|
||||
"width" : 320,
|
||||
"height": 160,
|
||||
"hash" : "40da6aaa7b8c42b18ef74309bbc713fc",
|
||||
},
|
||||
}),
|
||||
("https://img.yt/img-57a2050547b97.html", { # img.yt domain
|
||||
"url": "a83fe6ef1909a318c4d49fcf2caf62f36c3f9204",
|
||||
}),
|
||||
("https://imx.to/img-57a2050547b98.html", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
)
|
||||
example = "https://imx.to/i/ID"
|
||||
_params = "simple"
|
||||
_encoding = "utf-8"
|
||||
|
||||
@ -140,11 +113,7 @@ class ImxtoGalleryExtractor(ImagehostImageExtractor):
|
||||
category = "imxto"
|
||||
subcategory = "gallery"
|
||||
pattern = r"(?:https?://)?(?:www\.)?(imx\.to/g/([^/?#]+))"
|
||||
test = ("https://imx.to/g/ozdy", {
|
||||
"pattern": ImxtoImageExtractor.pattern,
|
||||
"keyword": {"title": "untitled gallery"},
|
||||
"count": 40,
|
||||
})
|
||||
example = "https://imx.to/g/ID"
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.page_url).text
|
||||
@ -162,11 +131,7 @@ class AcidimgImageExtractor(ImagehostImageExtractor):
|
||||
"""Extractor for single images from acidimg.cc"""
|
||||
category = "acidimg"
|
||||
pattern = r"(?:https?://)?((?:www\.)?acidimg\.cc/img-([a-z0-9]+)\.html)"
|
||||
test = ("https://acidimg.cc/img-5acb6b9de4640.html", {
|
||||
"url": "f132a630006e8d84f52d59555191ed82b3b64c04",
|
||||
"keyword": "135347ab4345002fc013863c0d9419ba32d98f78",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
})
|
||||
example = "https://acidimg.cc/img-abc123.html"
|
||||
_params = "simple"
|
||||
_encoding = "utf-8"
|
||||
|
||||
@ -189,26 +154,13 @@ class ImagevenueImageExtractor(ImagehostImageExtractor):
|
||||
category = "imagevenue"
|
||||
pattern = (r"(?:https?://)?((?:www|img\d+)\.imagevenue\.com"
|
||||
r"/([A-Z0-9]{8,10}|view/.*|img\.php\?.*))")
|
||||
test = (
|
||||
("https://www.imagevenue.com/ME13LS07", {
|
||||
"pattern": r"https://cdn-images\.imagevenue\.com"
|
||||
r"/10/ac/05/ME13LS07_o\.png",
|
||||
"keyword": "ae15d6e3b2095f019eee84cd896700cd34b09c36",
|
||||
"content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee",
|
||||
}),
|
||||
(("https://www.imagevenue.com/view/o?i=92518_13732377"
|
||||
"annakarina424200712535AM_122_486lo.jpg&h=img150&l=loc486"), {
|
||||
"url": "8bf0254e29250d8f5026c0105bbdda3ee3d84980",
|
||||
}),
|
||||
(("http://img28116.imagevenue.com/img.php"
|
||||
"?image=th_52709_test_122_64lo.jpg"), {
|
||||
"url": "f98e3091df7f48a05fb60fbd86f789fc5ec56331",
|
||||
}),
|
||||
)
|
||||
example = "https://www.imagevenue.com/ME123456789"
|
||||
|
||||
def get_info(self, page):
|
||||
pos = page.index('class="card-body')
|
||||
url, pos = text.extract(page, '<img src="', '"', pos)
|
||||
if url.endswith("/loader.svg"):
|
||||
url, pos = text.extract(page, '<img src="', '"', pos)
|
||||
filename, pos = text.extract(page, 'alt="', '"', pos)
|
||||
return url, text.unescape(filename)
|
||||
|
||||
@ -218,17 +170,7 @@ class ImagetwistImageExtractor(ImagehostImageExtractor):
|
||||
category = "imagetwist"
|
||||
pattern = (r"(?:https?://)?((?:www\.|phun\.)?"
|
||||
r"image(?:twist|haha)\.com/([a-z0-9]{12}))")
|
||||
test = (
|
||||
("https://imagetwist.com/f1i2s4vhvbrq/test.png", {
|
||||
"url": "8d5e168c0bee30211f821c6f3b2116e419d42671",
|
||||
"keyword": "d1060a4c2e3b73b83044e20681712c0ffdd6cfef",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
}),
|
||||
("https://www.imagetwist.com/f1i2s4vhvbrq/test.png"),
|
||||
("https://phun.imagetwist.com/f1i2s4vhvbrq/test.png"),
|
||||
("https://imagehaha.com/f1i2s4vhvbrq/test.png"),
|
||||
("https://www.imagehaha.com/f1i2s4vhvbrq/test.png"),
|
||||
)
|
||||
example = "https://imagetwist.com/123456abcdef/NAME.EXT"
|
||||
|
||||
@property
|
||||
@memcache(maxage=3*3600)
|
||||
@ -245,11 +187,7 @@ class ImgspiceImageExtractor(ImagehostImageExtractor):
|
||||
"""Extractor for single images from imgspice.com"""
|
||||
category = "imgspice"
|
||||
pattern = r"(?:https?://)?((?:www\.)?imgspice\.com/([^/?#]+))"
|
||||
test = ("https://imgspice.com/nwfwtpyog50y/test.png.html", {
|
||||
"url": "b8c30a8f51ee1012959a4cfd46197fabf14de984",
|
||||
"keyword": "100e310a19a2fa22d87e1bbc427ecb9f6501e0c0",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
})
|
||||
example = "https://imgspice.com/ID/NAME.EXT.html"
|
||||
|
||||
def get_info(self, page):
|
||||
pos = page.find('id="imgpreview"')
|
||||
@ -265,11 +203,7 @@ class PixhostImageExtractor(ImagehostImageExtractor):
|
||||
category = "pixhost"
|
||||
pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
|
||||
r"/show/\d+/(\d+)_[^/?#]+)")
|
||||
test = ("http://pixhost.to/show/190/130327671_test-.png", {
|
||||
"url": "4e5470dcf6513944773044d40d883221bbc46cff",
|
||||
"keyword": "3bad6d59db42a5ebbd7842c2307e1c3ebd35e6b0",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
})
|
||||
example = "https://pixhost.to/show/123/12345_NAME.EXT"
|
||||
_cookies = {"pixhostads": "1", "pixhosttest": "1"}
|
||||
|
||||
def get_info(self, page):
|
||||
@ -284,10 +218,7 @@ class PixhostGalleryExtractor(ImagehostImageExtractor):
|
||||
subcategory = "gallery"
|
||||
pattern = (r"(?:https?://)?((?:www\.)?pixhost\.(?:to|org)"
|
||||
r"/gallery/([^/?#]+))")
|
||||
test = ("https://pixhost.to/gallery/jSMFq", {
|
||||
"pattern": PixhostImageExtractor.pattern,
|
||||
"count": 3,
|
||||
})
|
||||
example = "https://pixhost.to/gallery/ID"
|
||||
|
||||
def items(self):
|
||||
page = text.extr(self.request(
|
||||
@ -300,13 +231,9 @@ class PixhostGalleryExtractor(ImagehostImageExtractor):
|
||||
class PostimgImageExtractor(ImagehostImageExtractor):
|
||||
"""Extractor for single images from postimages.org"""
|
||||
category = "postimg"
|
||||
pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)"
|
||||
r"/(?!gallery/)(?:image/)?([^/?#]+)/?)")
|
||||
test = ("https://postimg.cc/Wtn2b3hC", {
|
||||
"url": "72f3c8b1d6c6601a20ad58f35635494b4891a99e",
|
||||
"keyword": "2d05808d04e4e83e33200db83521af06e3147a84",
|
||||
"content": "cfaa8def53ed1a575e0c665c9d6d8cf2aac7a0ee",
|
||||
})
|
||||
pattern = (r"(?:https?://)?((?:www\.)?(?:postim(?:ages|g)|pixxxels)"
|
||||
r"\.(?:cc|org)/(?!gallery/)(?:image/)?([^/?#]+)/?)")
|
||||
example = "https://postimages.org/ID"
|
||||
|
||||
def get_info(self, page):
|
||||
pos = page.index(' id="download"')
|
||||
@ -319,12 +246,9 @@ class PostimgGalleryExtractor(ImagehostImageExtractor):
|
||||
"""Extractor for images galleries from postimages.org"""
|
||||
category = "postimg"
|
||||
subcategory = "gallery"
|
||||
pattern = (r"(?:https?://)?((?:www\.)?(?:postimg|pixxxels)\.(?:cc|org)"
|
||||
r"/(?:gallery/)([^/?#]+)/?)")
|
||||
test = ("https://postimg.cc/gallery/wxpDLgX", {
|
||||
"pattern": PostimgImageExtractor.pattern,
|
||||
"count": 22,
|
||||
})
|
||||
pattern = (r"(?:https?://)?((?:www\.)?(?:postim(?:ages|g)|pixxxels)"
|
||||
r"\.(?:cc|org)/gallery/([^/?#]+))")
|
||||
example = "https://postimages.org/gallery/ID"
|
||||
|
||||
def items(self):
|
||||
page = self.request(self.page_url).text
|
||||
@ -338,11 +262,7 @@ class TurboimagehostImageExtractor(ImagehostImageExtractor):
|
||||
category = "turboimagehost"
|
||||
pattern = (r"(?:https?://)?((?:www\.)?turboimagehost\.com"
|
||||
r"/p/(\d+)/[^/?#]+\.html)")
|
||||
test = ("https://www.turboimagehost.com/p/39078423/test--.png.html", {
|
||||
"url": "b94de43612318771ced924cb5085976f13b3b90e",
|
||||
"keyword": "704757ca8825f51cec516ec44c1e627c1f2058ca",
|
||||
"content": "f38b54b17cd7462e687b58d83f00fca88b1b105a",
|
||||
})
|
||||
example = "https://www.turboimagehost.com/p/12345/NAME.EXT.html"
|
||||
|
||||
def get_info(self, page):
|
||||
url = text.extract(page, 'src="', '"', page.index("<img "))[0]
|
||||
@ -353,10 +273,7 @@ class ViprImageExtractor(ImagehostImageExtractor):
|
||||
"""Extractor for single images from vipr.im"""
|
||||
category = "vipr"
|
||||
pattern = r"(?:https?://)?(vipr\.im/(\w+))"
|
||||
test = ("https://vipr.im/kcd5jcuhgs3v.html", {
|
||||
"url": "88f6a3ecbf3356a11ae0868b518c60800e070202",
|
||||
"keyword": "c432e8a1836b0d97045195b745731c2b1bb0e771",
|
||||
})
|
||||
example = "https://vipr.im/abc123.html"
|
||||
|
||||
def get_info(self, page):
|
||||
url = text.extr(page, '<img src="', '"')
|
||||
@ -367,11 +284,7 @@ class ImgclickImageExtractor(ImagehostImageExtractor):
|
||||
"""Extractor for single images from imgclick.net"""
|
||||
category = "imgclick"
|
||||
pattern = r"(?:https?://)?((?:www\.)?imgclick\.net/([^/?#]+))"
|
||||
test = ("http://imgclick.net/4tbrre1oxew9/test-_-_.png.html", {
|
||||
"url": "140dcb250a325f2d26b2d918c18b8ac6a2a0f6ab",
|
||||
"keyword": "6895256143eab955622fc149aa367777a8815ba3",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
})
|
||||
example = "http://imgclick.net/abc123/NAME.EXT.html"
|
||||
_https = False
|
||||
_params = "complex"
|
||||
|
||||
@ -385,11 +298,7 @@ class FappicImageExtractor(ImagehostImageExtractor):
|
||||
"""Extractor for single images from fappic.com"""
|
||||
category = "fappic"
|
||||
pattern = r"(?:https?://)?((?:www\.)?fappic\.com/(\w+)/[^/?#]+)"
|
||||
test = ("https://www.fappic.com/98wxqcklyh8k/test.png", {
|
||||
"pattern": r"https://img\d+\.fappic\.com/img/\w+/test\.png",
|
||||
"keyword": "433b1d310b0ff12ad8a71ac7b9d8ba3f8cd1e898",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
})
|
||||
example = "https://fappic.com/abc123/NAME.EXT"
|
||||
|
||||
def get_info(self, page):
|
||||
url , pos = text.extract(page, '<a href="#"><img src="', '"')
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019 Mike Fährmann
|
||||
# Copyright 2019-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -114,27 +114,7 @@ class ImgbbAlbumExtractor(ImgbbExtractor):
|
||||
subcategory = "album"
|
||||
directory_fmt = ("{category}", "{user}", "{album_name} {album_id}")
|
||||
pattern = r"(?:https?://)?ibb\.co/album/([^/?#]+)/?(?:\?([^#]+))?"
|
||||
test = (
|
||||
("https://ibb.co/album/i5PggF", {
|
||||
"range": "1-80",
|
||||
"url": "70afec9fcc3a6de62a6b644b487d892d8d47cf1a",
|
||||
"keyword": "569e1d88ebdd27655387559cdf1cd526a3e1ab69",
|
||||
}),
|
||||
("https://ibb.co/album/i5PggF?sort=title_asc", {
|
||||
"range": "1-80",
|
||||
"url": "afdf5fc95d8e09d77e8f44312f3e9b843987bb5a",
|
||||
"keyword": "f090e14d0e5f7868595082b2c95da1309c84872d",
|
||||
}),
|
||||
# no user data (#471)
|
||||
("https://ibb.co/album/kYKpwF", {
|
||||
"url": "ac0abcfcb89f4df6adc2f7e4ff872f3b03ef1bc7",
|
||||
"keyword": {"user": ""},
|
||||
}),
|
||||
# private
|
||||
("https://ibb.co/album/hqgWrF", {
|
||||
"exception": exception.HttpError,
|
||||
}),
|
||||
)
|
||||
example = "https://ibb.co/album/ID"
|
||||
|
||||
def __init__(self, match):
|
||||
ImgbbExtractor.__init__(self, match)
|
||||
@ -169,10 +149,7 @@ class ImgbbUserExtractor(ImgbbExtractor):
|
||||
"""Extractor for user profiles in imgbb.com"""
|
||||
subcategory = "user"
|
||||
pattern = r"(?:https?://)?([\w-]+)\.imgbb\.com/?(?:\?([^#]+))?$"
|
||||
test = ("https://folkie.imgbb.com", {
|
||||
"range": "1-80",
|
||||
"pattern": r"https?://i\.ibb\.co/\w+/[^/?#]+",
|
||||
})
|
||||
example = "https://USER.imgbb.com"
|
||||
|
||||
def __init__(self, match):
|
||||
ImgbbExtractor.__init__(self, match)
|
||||
@ -196,19 +173,7 @@ class ImgbbUserExtractor(ImgbbExtractor):
|
||||
class ImgbbImageExtractor(ImgbbExtractor):
|
||||
subcategory = "image"
|
||||
pattern = r"(?:https?://)?ibb\.co/(?!album/)([^/?#]+)"
|
||||
test = ("https://ibb.co/fUqh5b", {
|
||||
"pattern": r"https://i\.ibb\.co/g3kvx80/Arundel-Ireeman-5\.jpg",
|
||||
"content": "c5a0965178a8b357acd8aa39660092918c63795e",
|
||||
"keyword": {
|
||||
"id" : "fUqh5b",
|
||||
"title" : "Arundel Ireeman 5",
|
||||
"url" : "https://i.ibb.co/g3kvx80/Arundel-Ireeman-5.jpg",
|
||||
"width" : 960,
|
||||
"height": 719,
|
||||
"user" : "folkie",
|
||||
"extension": "jpg",
|
||||
},
|
||||
})
|
||||
example = "https://ibb.co/ID"
|
||||
|
||||
def __init__(self, match):
|
||||
ImgbbExtractor.__init__(self, match)
|
||||
|
@ -1,12 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2014-2019 Mike Fährmann
|
||||
# Copyright 2014-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extract images from galleries at https://imgbox.com/"""
|
||||
"""Extractors for https://imgbox.com/"""
|
||||
|
||||
from .common import Extractor, Message, AsynchronousMixin
|
||||
from .. import text, exception
|
||||
@ -63,20 +63,7 @@ class ImgboxGalleryExtractor(AsynchronousMixin, ImgboxExtractor):
|
||||
filename_fmt = "{num:>03}-{filename}.{extension}"
|
||||
archive_fmt = "{gallery_key}_{image_key}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?imgbox\.com/g/([A-Za-z0-9]{10})"
|
||||
test = (
|
||||
("https://imgbox.com/g/JaX5V5HX7g", {
|
||||
"url": "da4f15b161461119ee78841d4b8e8d054d95f906",
|
||||
"keyword": "4b1e62820ac2c6205b7ad0b6322cc8e00dbe1b0c",
|
||||
"content": "d20307dc8511ac24d688859c55abf2e2cc2dd3cc",
|
||||
}),
|
||||
("https://imgbox.com/g/cUGEkRbdZZ", {
|
||||
"url": "76506a3aab175c456910851f66227e90484ca9f7",
|
||||
"keyword": "fb0427b87983197849fb2887905e758f3e50cb6e",
|
||||
}),
|
||||
("https://imgbox.com/g/JaX5V5HX7h", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
)
|
||||
example = "https://imgbox.com/g/12345abcde"
|
||||
|
||||
def __init__(self, match):
|
||||
ImgboxExtractor.__init__(self, match)
|
||||
@ -106,16 +93,7 @@ class ImgboxImageExtractor(ImgboxExtractor):
|
||||
subcategory = "image"
|
||||
archive_fmt = "{image_key}"
|
||||
pattern = r"(?:https?://)?(?:www\.)?imgbox\.com/([A-Za-z0-9]{8})"
|
||||
test = (
|
||||
("https://imgbox.com/qHhw7lpG", {
|
||||
"url": "ee9cdea6c48ad0161c1b5f81f6b0c9110997038c",
|
||||
"keyword": "dfc72310026b45f3feb4f9cada20c79b2575e1af",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
}),
|
||||
("https://imgbox.com/qHhw7lpH", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
)
|
||||
example = "https://imgbox.com/1234abcd"
|
||||
|
||||
def __init__(self, match):
|
||||
ImgboxExtractor.__init__(self, match)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2015-2022 Mike Fährmann
|
||||
# Copyright 2015-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -17,24 +17,7 @@ class ImgthGalleryExtractor(GalleryExtractor):
|
||||
category = "imgth"
|
||||
root = "https://imgth.com"
|
||||
pattern = r"(?:https?://)?(?:www\.)?imgth\.com/gallery/(\d+)"
|
||||
test = (
|
||||
("https://imgth.com/gallery/37/wallpaper-anime", {
|
||||
"url": "4ae1d281ca2b48952cf5cca57e9914402ad72748",
|
||||
"pattern": r"https://imgth\.com/images/2009/11/25"
|
||||
r"/wallpaper-anime_\w+\.jpg",
|
||||
"keyword": {
|
||||
"count": 12,
|
||||
"date": "dt:2009-11-25 18:21:00",
|
||||
"extension": "jpg",
|
||||
"filename": r"re:wallpaper-anime_\w+",
|
||||
"gallery_id": 37,
|
||||
"num": int,
|
||||
"title": "Wallpaper anime",
|
||||
"user": "celebrities",
|
||||
},
|
||||
}),
|
||||
("https://www.imgth.com/gallery/37/wallpaper-anime"),
|
||||
)
|
||||
example = "https://imgth.com/gallery/123/TITLE"
|
||||
|
||||
def __init__(self, match):
|
||||
self.gallery_id = gid = match.group(1)
|
||||
|
@ -11,7 +11,6 @@
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.(?:com|io)"
|
||||
|
||||
|
||||
@ -65,69 +64,7 @@ class ImgurImageExtractor(ImgurExtractor):
|
||||
archive_fmt = "{id}"
|
||||
pattern = (BASE_PATTERN + r"/(?!gallery|search)"
|
||||
r"(?:r/\w+/)?(\w{7}|\w{5})[sbtmlh]?")
|
||||
test = (
|
||||
("https://imgur.com/21yMxCS", {
|
||||
"url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
|
||||
"content": "0c8768055e4e20e7c7259608b67799171b691140",
|
||||
"keyword": {
|
||||
"account_id" : 0,
|
||||
"comment_count" : int,
|
||||
"cover_id" : "21yMxCS",
|
||||
"date" : "dt:2016-11-10 14:24:35",
|
||||
"description" : "",
|
||||
"downvote_count": int,
|
||||
"duration" : 0,
|
||||
"ext" : "png",
|
||||
"favorite" : False,
|
||||
"favorite_count": 0,
|
||||
"has_sound" : False,
|
||||
"height" : 32,
|
||||
"id" : "21yMxCS",
|
||||
"image_count" : 1,
|
||||
"in_most_viral" : False,
|
||||
"is_ad" : False,
|
||||
"is_album" : False,
|
||||
"is_animated" : False,
|
||||
"is_looping" : False,
|
||||
"is_mature" : False,
|
||||
"is_pending" : False,
|
||||
"mime_type" : "image/png",
|
||||
"name" : "test-テスト",
|
||||
"point_count" : int,
|
||||
"privacy" : "",
|
||||
"score" : int,
|
||||
"size" : 182,
|
||||
"title" : "Test",
|
||||
"upvote_count" : int,
|
||||
"url" : "https://i.imgur.com/21yMxCS.png",
|
||||
"view_count" : int,
|
||||
"width" : 64,
|
||||
},
|
||||
}),
|
||||
("http://imgur.com/0gybAXR", { # gifv/mp4 video
|
||||
"url": "a2220eb265a55b0c95e0d3d721ec7665460e3fd7",
|
||||
"content": "a3c080e43f58f55243ab830569ba02309d59abfc",
|
||||
}),
|
||||
("https://imgur.com/XFfsmuC", { # missing title in API response (#467)
|
||||
"keyword": {"title": "Tears are a natural response to irritants"},
|
||||
}),
|
||||
("https://imgur.com/1Nily2P", { # animated png
|
||||
"pattern": "https://i.imgur.com/1Nily2P.png",
|
||||
}),
|
||||
("https://imgur.com/zzzzzzz", { # not found
|
||||
"exception": exception.HttpError,
|
||||
}),
|
||||
("https://m.imgur.com/r/Celebs/iHJ7tsM"),
|
||||
("https://www.imgur.com/21yMxCS"), # www
|
||||
("https://m.imgur.com/21yMxCS"), # mobile
|
||||
("https://imgur.com/zxaY6"), # 5 character key
|
||||
("https://imgur.io/zxaY6"), # .io
|
||||
("https://i.imgur.com/21yMxCS.png"), # direct link
|
||||
("https://i.imgur.io/21yMxCS.png"), # direct link .io
|
||||
("https://i.imgur.com/21yMxCSh.png"), # direct link thumbnail
|
||||
("https://i.imgur.com/zxaY6.gif"), # direct link (short)
|
||||
("https://i.imgur.com/zxaY6s.gif"), # direct link (short; thumb)
|
||||
)
|
||||
example = "https://imgur.com/abcdefg"
|
||||
|
||||
def items(self):
|
||||
image = self.api.image(self.key)
|
||||
@ -152,71 +89,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
|
||||
filename_fmt = "{category}_{album[id]}_{num:>03}_{id}.{extension}"
|
||||
archive_fmt = "{album[id]}_{id}"
|
||||
pattern = BASE_PATTERN + r"/a/(\w{7}|\w{5})"
|
||||
test = (
|
||||
("https://imgur.com/a/TcBmP", {
|
||||
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
|
||||
"keyword": {
|
||||
"album": {
|
||||
"account_id" : 0,
|
||||
"comment_count" : int,
|
||||
"cover_id" : "693j2Kr",
|
||||
"date" : "dt:2015-10-09 10:37:50",
|
||||
"description" : "",
|
||||
"downvote_count": 0,
|
||||
"favorite" : False,
|
||||
"favorite_count": 0,
|
||||
"id" : "TcBmP",
|
||||
"image_count" : 19,
|
||||
"in_most_viral" : False,
|
||||
"is_ad" : False,
|
||||
"is_album" : True,
|
||||
"is_mature" : False,
|
||||
"is_pending" : False,
|
||||
"privacy" : "private",
|
||||
"score" : int,
|
||||
"title" : "138",
|
||||
"upvote_count" : int,
|
||||
"url" : "https://imgur.com/a/TcBmP",
|
||||
"view_count" : int,
|
||||
"virality" : int,
|
||||
},
|
||||
"account_id" : 0,
|
||||
"count" : 19,
|
||||
"date" : "type:datetime",
|
||||
"description": "",
|
||||
"ext" : "jpg",
|
||||
"has_sound" : False,
|
||||
"height" : int,
|
||||
"id" : str,
|
||||
"is_animated": False,
|
||||
"is_looping" : False,
|
||||
"mime_type" : "image/jpeg",
|
||||
"name" : str,
|
||||
"num" : int,
|
||||
"size" : int,
|
||||
"title" : str,
|
||||
"type" : "image",
|
||||
"updated_at" : None,
|
||||
"url" : str,
|
||||
"width" : int,
|
||||
},
|
||||
}),
|
||||
("https://imgur.com/a/eD9CT", { # large album
|
||||
"url": "de748c181a04d18bef1de9d4f4866ef0a06d632b",
|
||||
}),
|
||||
("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash
|
||||
"url": "695ef0c950023362a0163ee5041796300db76674",
|
||||
}),
|
||||
("https://imgur.com/a/TcBmQ", {
|
||||
"exception": exception.HttpError,
|
||||
}),
|
||||
("https://imgur.com/a/pjOnJA0", { # empty, no 'media' (#2557)
|
||||
"count": 0,
|
||||
}),
|
||||
("https://www.imgur.com/a/TcBmP"), # www
|
||||
("https://imgur.io/a/TcBmP"), # .io
|
||||
("https://m.imgur.com/a/TcBmP"), # mobile
|
||||
)
|
||||
example = "https://imgur.com/a/abcde"
|
||||
|
||||
def items(self):
|
||||
album = self.api.album(self.key)
|
||||
@ -249,17 +122,7 @@ class ImgurGalleryExtractor(ImgurExtractor):
|
||||
"""Extractor for imgur galleries"""
|
||||
subcategory = "gallery"
|
||||
pattern = BASE_PATTERN + r"/(?:gallery|t/\w+)/(\w{7}|\w{5})"
|
||||
test = (
|
||||
("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380)
|
||||
"pattern": "https://imgur.com/zf2fIms",
|
||||
}),
|
||||
("https://imgur.com/gallery/eD9CT", {
|
||||
"pattern": "https://imgur.com/a/eD9CT",
|
||||
}),
|
||||
("https://imgur.com/t/unmuted/26sEhNr"),
|
||||
("https://imgur.com/t/cat/qSB8NbN"),
|
||||
("https://imgur.io/t/cat/qSB8NbN"), # .io
|
||||
)
|
||||
example = "https://imgur.com/gallery/abcde"
|
||||
|
||||
def items(self):
|
||||
if self.api.gallery(self.key)["is_album"]:
|
||||
@ -275,15 +138,7 @@ class ImgurUserExtractor(ImgurExtractor):
|
||||
"""Extractor for all images posted by a user"""
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/user/([^/?#]+)(?:/posts|/submitted)?/?$"
|
||||
test = (
|
||||
("https://imgur.com/user/Miguenzo", {
|
||||
"range": "1-100",
|
||||
"count": 100,
|
||||
"pattern": r"https://imgur\.com(/a)?/\w+$",
|
||||
}),
|
||||
("https://imgur.com/user/Miguenzo/posts"),
|
||||
("https://imgur.com/user/Miguenzo/submitted"),
|
||||
)
|
||||
example = "https://imgur.com/user/USER"
|
||||
|
||||
def items(self):
|
||||
return self._items_queue(self.api.account_submissions(self.key))
|
||||
@ -293,11 +148,7 @@ class ImgurFavoriteExtractor(ImgurExtractor):
|
||||
"""Extractor for a user's favorites"""
|
||||
subcategory = "favorite"
|
||||
pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/?$"
|
||||
test = ("https://imgur.com/user/Miguenzo/favorites", {
|
||||
"range": "1-100",
|
||||
"count": 100,
|
||||
"pattern": r"https://imgur\.com(/a)?/\w+$",
|
||||
})
|
||||
example = "https://imgur.com/user/USER/favorites"
|
||||
|
||||
def items(self):
|
||||
return self._items_queue(self.api.account_favorites(self.key))
|
||||
@ -307,16 +158,7 @@ class ImgurFavoriteFolderExtractor(ImgurExtractor):
|
||||
"""Extractor for a user's favorites folder"""
|
||||
subcategory = "favorite-folder"
|
||||
pattern = BASE_PATTERN + r"/user/([^/?#]+)/favorites/folder/(\d+)"
|
||||
test = (
|
||||
("https://imgur.com/user/mikf1/favorites/folder/11896757/public", {
|
||||
"pattern": r"https://imgur\.com(/a)?/\w+$",
|
||||
"count": 3,
|
||||
}),
|
||||
("https://imgur.com/user/mikf1/favorites/folder/11896741/private", {
|
||||
"pattern": r"https://imgur\.com(/a)?/\w+$",
|
||||
"count": 5,
|
||||
}),
|
||||
)
|
||||
example = "https://imgur.com/user/USER/favorites/folder/12345/TITLE"
|
||||
|
||||
def __init__(self, match):
|
||||
ImgurExtractor.__init__(self, match)
|
||||
@ -331,11 +173,7 @@ class ImgurSubredditExtractor(ImgurExtractor):
|
||||
"""Extractor for a subreddits's imgur links"""
|
||||
subcategory = "subreddit"
|
||||
pattern = BASE_PATTERN + r"/r/([^/?#]+)/?$"
|
||||
test = ("https://imgur.com/r/pics", {
|
||||
"range": "1-100",
|
||||
"count": 100,
|
||||
"pattern": r"https://imgur\.com(/a)?/\w+$",
|
||||
})
|
||||
example = "https://imgur.com/r/SUBREDDIT"
|
||||
|
||||
def items(self):
|
||||
return self._items_queue(self.api.gallery_subreddit(self.key))
|
||||
@ -345,11 +183,7 @@ class ImgurTagExtractor(ImgurExtractor):
|
||||
"""Extractor for imgur tag searches"""
|
||||
subcategory = "tag"
|
||||
pattern = BASE_PATTERN + r"/t/([^/?#]+)$"
|
||||
test = ("https://imgur.com/t/animals", {
|
||||
"range": "1-100",
|
||||
"count": 100,
|
||||
"pattern": r"https://imgur\.com(/a)?/\w+$",
|
||||
})
|
||||
example = "https://imgur.com/t/TAG"
|
||||
|
||||
def items(self):
|
||||
return self._items_queue(self.api.gallery_tag(self.key))
|
||||
@ -359,11 +193,7 @@ class ImgurSearchExtractor(ImgurExtractor):
|
||||
"""Extractor for imgur search results"""
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"/search(?:/[^?#]+)?/?\?q=([^&#]+)"
|
||||
test = ("https://imgur.com/search?q=cute+cat", {
|
||||
"range": "1-100",
|
||||
"count": 100,
|
||||
"pattern": r"https://imgur\.com(/a)?/\w+$",
|
||||
})
|
||||
example = "https://imgur.com/search?q=UERY"
|
||||
|
||||
def items(self):
|
||||
key = text.unquote(self.key.replace("+", " "))
|
||||
@ -451,11 +281,7 @@ class ImgurAPI():
|
||||
params["client_id"] = self.client_id
|
||||
params["page"] = 0
|
||||
params["sort"] = "newest"
|
||||
|
||||
headers = {
|
||||
"Referer": "https://imgur.com/",
|
||||
"Origin": "https://imgur.com",
|
||||
}
|
||||
headers = {"Origin": "https://imgur.com"}
|
||||
|
||||
while True:
|
||||
data = self._call(endpoint, params, headers)["data"]
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2020-2022 Mike Fährmann
|
||||
# Copyright 2020-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -72,51 +72,7 @@ class InkbunnyUserExtractor(InkbunnyExtractor):
|
||||
"""Extractor for inkbunny user profiles"""
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/(?!s/)(gallery/|scraps/)?(\w+)(?:$|[/?#])"
|
||||
test = (
|
||||
("https://inkbunny.net/soina", {
|
||||
"pattern": r"https://[\w.]+\.metapix\.net/files/full"
|
||||
r"/\d+/\d+_soina_.+",
|
||||
"range": "20-50",
|
||||
"keyword": {
|
||||
"date" : "type:datetime",
|
||||
"deleted" : bool,
|
||||
"file_id" : "re:[0-9]+",
|
||||
"filename" : r"re:[0-9]+_soina_\w+",
|
||||
"full_file_md5": "re:[0-9a-f]{32}",
|
||||
"mimetype" : str,
|
||||
"submission_id": "re:[0-9]+",
|
||||
"user_id" : "20969",
|
||||
"comments_count" : "re:[0-9]+",
|
||||
"deleted" : bool,
|
||||
"favorite" : bool,
|
||||
"favorites_count": "re:[0-9]+",
|
||||
"friends_only" : bool,
|
||||
"guest_block" : bool,
|
||||
"hidden" : bool,
|
||||
"pagecount" : "re:[0-9]+",
|
||||
"pools" : list,
|
||||
"pools_count" : int,
|
||||
"public" : bool,
|
||||
"rating_id" : "re:[0-9]+",
|
||||
"rating_name" : str,
|
||||
"ratings" : list,
|
||||
"scraps" : bool,
|
||||
"tags" : list,
|
||||
"title" : str,
|
||||
"type_name" : str,
|
||||
"username" : "soina",
|
||||
"views" : str,
|
||||
},
|
||||
}),
|
||||
("https://inkbunny.net/gallery/soina", {
|
||||
"range": "1-25",
|
||||
"keyword": {"scraps": False},
|
||||
}),
|
||||
("https://inkbunny.net/scraps/soina", {
|
||||
"range": "1-25",
|
||||
"keyword": {"scraps": True},
|
||||
}),
|
||||
)
|
||||
example = "https://inkbunny.net/USER"
|
||||
|
||||
def __init__(self, match):
|
||||
kind, self.user = match.groups()
|
||||
@ -148,14 +104,7 @@ class InkbunnyPoolExtractor(InkbunnyExtractor):
|
||||
pattern = (BASE_PATTERN + r"/(?:"
|
||||
r"poolview_process\.php\?pool_id=(\d+)|"
|
||||
r"submissionsviewall\.php\?([^#]+&mode=pool&[^#]+))")
|
||||
test = (
|
||||
("https://inkbunny.net/poolview_process.php?pool_id=28985", {
|
||||
"count": 9,
|
||||
"keyword": {"pool_id": "28985"},
|
||||
}),
|
||||
("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
|
||||
"&mode=pool&pool_id=28985&page=1&orderby=pool_order&random=no"),
|
||||
)
|
||||
example = "https://inkbunny.net/poolview_process.php?pool_id=12345"
|
||||
|
||||
def __init__(self, match):
|
||||
InkbunnyExtractor.__init__(self, match)
|
||||
@ -185,16 +134,8 @@ class InkbunnyFavoriteExtractor(InkbunnyExtractor):
|
||||
pattern = (BASE_PATTERN + r"/(?:"
|
||||
r"userfavorites_process\.php\?favs_user_id=(\d+)|"
|
||||
r"submissionsviewall\.php\?([^#]+&mode=userfavs&[^#]+))")
|
||||
test = (
|
||||
("https://inkbunny.net/userfavorites_process.php?favs_user_id=20969", {
|
||||
"pattern": r"https://[\w.]+\.metapix\.net/files/full"
|
||||
r"/\d+/\d+_\w+_.+",
|
||||
"range": "20-50",
|
||||
"keyword": {"favs_user_id": "20969"},
|
||||
}),
|
||||
("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
|
||||
"&mode=userfavs&random=no&orderby=fav_datetime&page=1&user_id=20969"),
|
||||
)
|
||||
example = ("https://inkbunny.net/userfavorites_process.php"
|
||||
"?favs_user_id=12345")
|
||||
|
||||
def __init__(self, match):
|
||||
InkbunnyExtractor.__init__(self, match)
|
||||
@ -225,26 +166,8 @@ class InkbunnySearchExtractor(InkbunnyExtractor):
|
||||
subcategory = "search"
|
||||
pattern = (BASE_PATTERN +
|
||||
r"/submissionsviewall\.php\?([^#]+&mode=search&[^#]+)")
|
||||
test = (("https://inkbunny.net/submissionsviewall.php?rid=ffffffffff"
|
||||
"&mode=search&page=1&orderby=create_datetime&text=cute"
|
||||
"&stringtype=and&keywords=yes&title=yes&description=no&artist="
|
||||
"&favsby=&type=&days=&keyword_id=&user_id=&random=&md5="), {
|
||||
"range": "1-10",
|
||||
"count": 10,
|
||||
"keyword": {
|
||||
"search": {
|
||||
"rid": "ffffffffff",
|
||||
"mode": "search",
|
||||
"page": "1",
|
||||
"orderby": "create_datetime",
|
||||
"text": "cute",
|
||||
"stringtype": "and",
|
||||
"keywords": "yes",
|
||||
"title": "yes",
|
||||
"description": "no",
|
||||
},
|
||||
},
|
||||
})
|
||||
example = ("https://inkbunny.net/submissionsviewall.php"
|
||||
"?text=TAG&mode=search&type=")
|
||||
|
||||
def __init__(self, match):
|
||||
InkbunnyExtractor.__init__(self, match)
|
||||
@ -279,15 +202,8 @@ class InkbunnyFollowingExtractor(InkbunnyExtractor):
|
||||
pattern = (BASE_PATTERN + r"/(?:"
|
||||
r"watchlist_process\.php\?mode=watching&user_id=(\d+)|"
|
||||
r"usersviewall\.php\?([^#]+&mode=watching&[^#]+))")
|
||||
test = (
|
||||
(("https://inkbunny.net/watchlist_process.php"
|
||||
"?mode=watching&user_id=20969"), {
|
||||
"pattern": InkbunnyUserExtractor.pattern,
|
||||
"count": ">= 90",
|
||||
}),
|
||||
("https://inkbunny.net/usersviewall.php?rid=ffffffffff"
|
||||
"&mode=watching&page=1&user_id=20969&orderby=added&namesonly="),
|
||||
)
|
||||
example = ("https://inkbunny.net/watchlist_process.php"
|
||||
"?mode=watching&user_id=12345")
|
||||
|
||||
def __init__(self, match):
|
||||
InkbunnyExtractor.__init__(self, match)
|
||||
@ -324,16 +240,7 @@ class InkbunnyPostExtractor(InkbunnyExtractor):
|
||||
"""Extractor for individual Inkbunny posts"""
|
||||
subcategory = "post"
|
||||
pattern = BASE_PATTERN + r"/s/(\d+)"
|
||||
test = (
|
||||
("https://inkbunny.net/s/1829715", {
|
||||
"pattern": r"https://[\w.]+\.metapix\.net/files/full"
|
||||
r"/2626/2626843_soina_dscn2296\.jpg",
|
||||
"content": "cf69d8dddf0822a12b4eef1f4b2258bd600b36c8",
|
||||
}),
|
||||
("https://inkbunny.net/s/2044094", {
|
||||
"count": 4,
|
||||
}),
|
||||
)
|
||||
example = "https://inkbunny.net/s/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
InkbunnyExtractor.__init__(self, match)
|
||||
|
@ -90,7 +90,9 @@ class InstagramExtractor(Extractor):
|
||||
file["_http_headers"] = video_headers
|
||||
text.nameext_from_url(url, file)
|
||||
yield Message.Url, url, file
|
||||
if not previews:
|
||||
if previews:
|
||||
file["media_id"] += "p"
|
||||
else:
|
||||
continue
|
||||
|
||||
url = file["display_url"]
|
||||
@ -396,11 +398,7 @@ class InstagramUserExtractor(InstagramExtractor):
|
||||
"""Extractor for an Instagram user profile"""
|
||||
subcategory = "user"
|
||||
pattern = USER_PATTERN + r"/?(?:$|[?#])"
|
||||
test = (
|
||||
("https://www.instagram.com/instagram/"),
|
||||
("https://www.instagram.com/instagram/?hl=en"),
|
||||
("https://www.instagram.com/id:25025320/"),
|
||||
)
|
||||
example = "https://www.instagram.com/USER/"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
@ -425,10 +423,7 @@ class InstagramPostsExtractor(InstagramExtractor):
|
||||
"""Extractor for an Instagram user's posts"""
|
||||
subcategory = "posts"
|
||||
pattern = USER_PATTERN + r"/posts"
|
||||
test = ("https://www.instagram.com/instagram/posts/", {
|
||||
"range": "1-16",
|
||||
"count": ">= 16",
|
||||
})
|
||||
example = "https://www.instagram.com/USER/posts/"
|
||||
|
||||
def posts(self):
|
||||
uid = self.api.user_id(self.item)
|
||||
@ -439,10 +434,7 @@ class InstagramReelsExtractor(InstagramExtractor):
|
||||
"""Extractor for an Instagram user's reels"""
|
||||
subcategory = "reels"
|
||||
pattern = USER_PATTERN + r"/reels"
|
||||
test = ("https://www.instagram.com/instagram/reels/", {
|
||||
"range": "40-60",
|
||||
"count": ">= 20",
|
||||
})
|
||||
example = "https://www.instagram.com/USER/reels/"
|
||||
|
||||
def posts(self):
|
||||
uid = self.api.user_id(self.item)
|
||||
@ -453,15 +445,7 @@ class InstagramTaggedExtractor(InstagramExtractor):
|
||||
"""Extractor for an Instagram user's tagged posts"""
|
||||
subcategory = "tagged"
|
||||
pattern = USER_PATTERN + r"/tagged"
|
||||
test = ("https://www.instagram.com/instagram/tagged/", {
|
||||
"range": "1-16",
|
||||
"count": ">= 16",
|
||||
"keyword": {
|
||||
"tagged_owner_id" : "25025320",
|
||||
"tagged_username" : "instagram",
|
||||
"tagged_full_name": "Instagram",
|
||||
},
|
||||
})
|
||||
example = "https://www.instagram.com/USER/tagged/"
|
||||
|
||||
def metadata(self):
|
||||
if self.item.startswith("id:"):
|
||||
@ -485,11 +469,7 @@ class InstagramGuideExtractor(InstagramExtractor):
|
||||
"""Extractor for an Instagram guide"""
|
||||
subcategory = "guide"
|
||||
pattern = USER_PATTERN + r"/guide/[^/?#]+/(\d+)"
|
||||
test = (("https://www.instagram.com/kadakaofficial/guide"
|
||||
"/knit-i-need-collection/18131821684305217/"), {
|
||||
"range": "1-16",
|
||||
"count": ">= 16",
|
||||
})
|
||||
example = "https://www.instagram.com/USER/guide/NAME/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
InstagramExtractor.__init__(self, match)
|
||||
@ -506,10 +486,7 @@ class InstagramSavedExtractor(InstagramExtractor):
|
||||
"""Extractor for an Instagram user's saved media"""
|
||||
subcategory = "saved"
|
||||
pattern = USER_PATTERN + r"/saved(?:/all-posts)?/?$"
|
||||
test = (
|
||||
("https://www.instagram.com/instagram/saved/"),
|
||||
("https://www.instagram.com/instagram/saved/all-posts/"),
|
||||
)
|
||||
example = "https://www.instagram.com/USER/saved/"
|
||||
|
||||
def posts(self):
|
||||
return self.api.user_saved()
|
||||
@ -519,9 +496,7 @@ class InstagramCollectionExtractor(InstagramExtractor):
|
||||
"""Extractor for Instagram collection"""
|
||||
subcategory = "collection"
|
||||
pattern = USER_PATTERN + r"/saved/([^/?#]+)/([^/?#]+)"
|
||||
test = (
|
||||
"https://www.instagram.com/instagram/saved/collection_name/123456789/",
|
||||
)
|
||||
example = "https://www.instagram.com/USER/saved/COLLECTION/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
InstagramExtractor.__init__(self, match)
|
||||
@ -543,14 +518,7 @@ class InstagramStoriesExtractor(InstagramExtractor):
|
||||
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
|
||||
r"/s(?:tories/(?:highlights/(\d+)|([^/?#]+)(?:/(\d+))?)"
|
||||
r"|/(aGlnaGxpZ2h0[^?#]+)(?:\?story_media_id=(\d+))?)")
|
||||
test = (
|
||||
("https://www.instagram.com/stories/instagram/"),
|
||||
("https://www.instagram.com/stories/highlights/18042509488170095/"),
|
||||
("https://instagram.com/stories/geekmig/2724343156064789461"),
|
||||
("https://www.instagram.com/s/aGlnaGxpZ2h0OjE4MDQyNTA5NDg4MTcwMDk1"),
|
||||
("https://www.instagram.com/s/aGlnaGxpZ2h0OjE4MDQyNTA5NDg4MTcwMDk1"
|
||||
"?story_media_id=2724343156064789461"),
|
||||
)
|
||||
example = "https://www.instagram.com/stories/USER/"
|
||||
|
||||
def __init__(self, match):
|
||||
h1, self.user, m1, h2, m2 = match.groups()
|
||||
@ -585,22 +553,33 @@ class InstagramHighlightsExtractor(InstagramExtractor):
|
||||
"""Extractor for an Instagram user's story highlights"""
|
||||
subcategory = "highlights"
|
||||
pattern = USER_PATTERN + r"/highlights"
|
||||
test = ("https://www.instagram.com/instagram/highlights",)
|
||||
example = "https://www.instagram.com/USER/highlights/"
|
||||
|
||||
def posts(self):
|
||||
uid = self.api.user_id(self.item)
|
||||
return self.api.highlights_media(uid)
|
||||
|
||||
|
||||
class InstagramFollowingExtractor(InstagramExtractor):
|
||||
"""Extractor for an Instagram user's followed users"""
|
||||
subcategory = "following"
|
||||
pattern = USER_PATTERN + r"/following"
|
||||
example = "https://www.instagram.com/USER/following/"
|
||||
|
||||
def items(self):
|
||||
uid = self.api.user_id(self.item)
|
||||
for user in self.api.user_following(uid):
|
||||
user["_extractor"] = InstagramUserExtractor
|
||||
url = "{}/{}".format(self.root, user["username"])
|
||||
yield Message.Queue, url, user
|
||||
|
||||
|
||||
class InstagramTagExtractor(InstagramExtractor):
|
||||
"""Extractor for Instagram tags"""
|
||||
subcategory = "tag"
|
||||
directory_fmt = ("{category}", "{subcategory}", "{tag}")
|
||||
pattern = BASE_PATTERN + r"/explore/tags/([^/?#]+)"
|
||||
test = ("https://www.instagram.com/explore/tags/instagram/", {
|
||||
"range": "1-16",
|
||||
"count": ">= 16",
|
||||
})
|
||||
example = "https://www.instagram.com/explore/tags/TAG/"
|
||||
|
||||
def metadata(self):
|
||||
return {"tag": text.unquote(self.item)}
|
||||
@ -613,10 +592,7 @@ class InstagramAvatarExtractor(InstagramExtractor):
|
||||
"""Extractor for an Instagram user's avatar"""
|
||||
subcategory = "avatar"
|
||||
pattern = USER_PATTERN + r"/avatar"
|
||||
test = ("https://www.instagram.com/instagram/avatar", {
|
||||
"pattern": r"https://instagram\.[\w.-]+\.fbcdn\.net/v/t51\.2885-19"
|
||||
r"/281440578_1088265838702675_6233856337905829714_n\.jpg",
|
||||
})
|
||||
example = "https://www.instagram.com/USER/avatar/"
|
||||
|
||||
def posts(self):
|
||||
if self._logged_in:
|
||||
@ -656,102 +632,7 @@ class InstagramPostExtractor(InstagramExtractor):
|
||||
subcategory = "post"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?instagram\.com"
|
||||
r"/(?:[^/?#]+/)?(?:p|tv|reel)/([^/?#]+)")
|
||||
test = (
|
||||
# GraphImage
|
||||
("https://www.instagram.com/p/BqvsDleB3lV/", {
|
||||
"pattern": r"https://[^/]+\.(cdninstagram\.com|fbcdn\.net)"
|
||||
r"/v(p/[0-9a-f]+/[0-9A-F]+)?/t51.2885-15/e35"
|
||||
r"/44877605_725955034447492_3123079845831750529_n.jpg",
|
||||
"keyword": {
|
||||
"date": "dt:2018-11-29 01:04:04",
|
||||
"description": str,
|
||||
"height": int,
|
||||
"likes": int,
|
||||
"location_id": "214424288",
|
||||
"location_slug": "hong-kong",
|
||||
"location_url": "re:/explore/locations/214424288/hong-kong/",
|
||||
"media_id": "1922949326347663701",
|
||||
"shortcode": "BqvsDleB3lV",
|
||||
"post_id": "1922949326347663701",
|
||||
"post_shortcode": "BqvsDleB3lV",
|
||||
"post_url": "https://www.instagram.com/p/BqvsDleB3lV/",
|
||||
"tags": ["#WHPsquares"],
|
||||
"typename": "GraphImage",
|
||||
"username": "instagram",
|
||||
"width": int,
|
||||
}
|
||||
}),
|
||||
# GraphSidecar
|
||||
("https://www.instagram.com/p/BoHk1haB5tM/", {
|
||||
"count": 5,
|
||||
"keyword": {
|
||||
"sidecar_media_id": "1875629777499953996",
|
||||
"sidecar_shortcode": "BoHk1haB5tM",
|
||||
"post_id": "1875629777499953996",
|
||||
"post_shortcode": "BoHk1haB5tM",
|
||||
"post_url": "https://www.instagram.com/p/BoHk1haB5tM/",
|
||||
"num": int,
|
||||
"likes": int,
|
||||
"username": "instagram",
|
||||
}
|
||||
}),
|
||||
# GraphVideo
|
||||
("https://www.instagram.com/p/Bqxp0VSBgJg/", {
|
||||
"pattern": r"/46840863_726311431074534_7805566102611403091_n\.mp4",
|
||||
"keyword": {
|
||||
"date": "dt:2018-11-29 19:23:58",
|
||||
"description": str,
|
||||
"height": int,
|
||||
"likes": int,
|
||||
"media_id": "1923502432034620000",
|
||||
"post_url": "https://www.instagram.com/p/Bqxp0VSBgJg/",
|
||||
"shortcode": "Bqxp0VSBgJg",
|
||||
"tags": ["#ASMR"],
|
||||
"typename": "GraphVideo",
|
||||
"username": "instagram",
|
||||
"width": int,
|
||||
}
|
||||
}),
|
||||
# GraphVideo (IGTV)
|
||||
("https://www.instagram.com/tv/BkQjCfsBIzi/", {
|
||||
"pattern": r"/10000000_597132547321814_702169244961988209_n\.mp4",
|
||||
"keyword": {
|
||||
"date": "dt:2018-06-20 19:51:32",
|
||||
"description": str,
|
||||
"height": int,
|
||||
"likes": int,
|
||||
"media_id": "1806097553666903266",
|
||||
"post_url": "https://www.instagram.com/p/BkQjCfsBIzi/",
|
||||
"shortcode": "BkQjCfsBIzi",
|
||||
"typename": "GraphVideo",
|
||||
"username": "instagram",
|
||||
"width": int,
|
||||
}
|
||||
}),
|
||||
# GraphSidecar with 2 embedded GraphVideo objects
|
||||
("https://www.instagram.com/p/BtOvDOfhvRr/", {
|
||||
"count": 2,
|
||||
"keyword": {
|
||||
"post_url": "https://www.instagram.com/p/BtOvDOfhvRr/",
|
||||
"sidecar_media_id": "1967717017113261163",
|
||||
"sidecar_shortcode": "BtOvDOfhvRr",
|
||||
"video_url": str,
|
||||
}
|
||||
}),
|
||||
# GraphImage with tagged user
|
||||
("https://www.instagram.com/p/B_2lf3qAd3y/", {
|
||||
"keyword": {
|
||||
"tagged_users": [{
|
||||
"id" : "1246468638",
|
||||
"username" : "kaaymbl",
|
||||
"full_name": "Call Me Kay",
|
||||
}]
|
||||
}
|
||||
}),
|
||||
# URL with username (#2085)
|
||||
("https://www.instagram.com/dm/p/CW042g7B9CY/"),
|
||||
("https://www.instagram.com/reel/CDg_6Y1pxWu/"),
|
||||
)
|
||||
example = "https://www.instagram.com/p/abcdefg/"
|
||||
|
||||
def posts(self):
|
||||
return self.api.media(self.item)
|
||||
@ -869,6 +750,11 @@ class InstagramRestAPI():
|
||||
params = {"count": 30}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def user_following(self, user_id):
|
||||
endpoint = "/v1/friendships/{}/following/".format(user_id)
|
||||
params = {"count": 12}
|
||||
return self._pagination_following(endpoint, params)
|
||||
|
||||
def user_saved(self):
|
||||
endpoint = "/v1/feed/saved/posts/"
|
||||
params = {"count": 50}
|
||||
@ -958,6 +844,20 @@ class InstagramRestAPI():
|
||||
return extr._update_cursor(None)
|
||||
params["max_id"] = extr._update_cursor(data["next_max_id"])
|
||||
|
||||
def _pagination_following(self, endpoint, params):
|
||||
extr = self.extractor
|
||||
params["max_id"] = text.parse_int(extr._init_cursor())
|
||||
|
||||
while True:
|
||||
data = self._call(endpoint, params=params)
|
||||
|
||||
yield from data["users"]
|
||||
|
||||
if len(data["users"]) < params["count"]:
|
||||
return extr._update_cursor(None)
|
||||
params["max_id"] = extr._update_cursor(
|
||||
params["max_id"] + params["count"])
|
||||
|
||||
|
||||
class InstagramGraphqlAPI():
|
||||
|
||||
|
@ -26,31 +26,7 @@ class IssuuPublicationExtractor(IssuuBase, GalleryExtractor):
|
||||
filename_fmt = "{num:>03}.{extension}"
|
||||
archive_fmt = "{document[publicationId]}_{num}"
|
||||
pattern = r"(?:https?://)?issuu\.com(/[^/?#]+/docs/[^/?#]+)"
|
||||
test = ("https://issuu.com/issuu/docs/motions-1-2019/", {
|
||||
"pattern": r"https://image.isu.pub/190916155301-\w+/jpg/page_\d+.jpg",
|
||||
"count" : 36,
|
||||
"keyword": {
|
||||
"document": {
|
||||
"access" : "PUBLIC",
|
||||
"contentRating" : {
|
||||
"isAdsafe" : True,
|
||||
"isExplicit": False,
|
||||
"isReviewed": True,
|
||||
},
|
||||
"date" : "dt:2019-09-16 00:00:00",
|
||||
"description" : "re:Motions, the brand new publication by I",
|
||||
"documentName" : "motions-1-2019",
|
||||
"downloadable" : False,
|
||||
"pageCount" : 36,
|
||||
"publicationId" : "d99ec95935f15091b040cb8060f05510",
|
||||
"title" : "Motions by Issuu - Issue 1",
|
||||
"username" : "issuu",
|
||||
},
|
||||
"extension": "jpg",
|
||||
"filename" : r"re:page_\d+",
|
||||
"num" : int,
|
||||
},
|
||||
})
|
||||
example = "https://issuu.com/issuu/docs/TITLE/"
|
||||
|
||||
def metadata(self, page):
|
||||
data = util.json_loads(text.rextract(
|
||||
@ -78,10 +54,7 @@ class IssuuUserExtractor(IssuuBase, Extractor):
|
||||
"""Extractor for all publications of a user/publisher"""
|
||||
subcategory = "user"
|
||||
pattern = r"(?:https?://)?issuu\.com/([^/?#]+)/?$"
|
||||
test = ("https://issuu.com/issuu", {
|
||||
"pattern": IssuuPublicationExtractor.pattern,
|
||||
"count" : "> 25",
|
||||
})
|
||||
example = "https://issuu.com/USER"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 Mike Fährmann
|
||||
# Copyright 2022-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -63,12 +63,7 @@ class ItakuGalleryExtractor(ItakuExtractor):
|
||||
"""Extractor for posts from an itaku user gallery"""
|
||||
subcategory = "gallery"
|
||||
pattern = BASE_PATTERN + r"/profile/([^/?#]+)/gallery"
|
||||
test = ("https://itaku.ee/profile/piku/gallery", {
|
||||
"pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_imgs"
|
||||
r"/[^/?#]+\.(jpg|png|gif)",
|
||||
"range": "1-10",
|
||||
"count": 10,
|
||||
})
|
||||
example = "https://itaku.ee/profile/USER/gallery"
|
||||
|
||||
def posts(self):
|
||||
return self.api.galleries_images(self.item)
|
||||
@ -77,62 +72,7 @@ class ItakuGalleryExtractor(ItakuExtractor):
|
||||
class ItakuImageExtractor(ItakuExtractor):
|
||||
subcategory = "image"
|
||||
pattern = BASE_PATTERN + r"/images/(\d+)"
|
||||
test = (
|
||||
("https://itaku.ee/images/100471", {
|
||||
"pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_imgs"
|
||||
r"/220504_oUNIAFT\.png",
|
||||
"count": 1,
|
||||
"keyword": {
|
||||
"already_pinned": None,
|
||||
"blacklisted": {
|
||||
"blacklisted_tags": [],
|
||||
"is_blacklisted": False
|
||||
},
|
||||
"can_reshare": True,
|
||||
"date": "dt:2022-05-05 19:21:17",
|
||||
"date_added": "2022-05-05T19:21:17.674148Z",
|
||||
"date_edited": "2022-05-25T14:37:46.220612Z",
|
||||
"description": "sketch from drawpile",
|
||||
"extension": "png",
|
||||
"filename": "220504_oUNIAFT",
|
||||
"hotness_score": float,
|
||||
"id": 100471,
|
||||
"image": "https://d1wmr8tlk3viaj.cloudfront.net/gallery_imgs"
|
||||
"/220504_oUNIAFT.png",
|
||||
"image_xl": "https://d1wmr8tlk3viaj.cloudfront.net"
|
||||
"/gallery_imgs/220504_oUNIAFT/lg.jpg",
|
||||
"liked_by_you": False,
|
||||
"maturity_rating": "SFW",
|
||||
"num_comments": int,
|
||||
"num_likes": int,
|
||||
"num_reshares": int,
|
||||
"obj_tags": 136446,
|
||||
"owner": 16775,
|
||||
"owner_avatar": "https://d1wmr8tlk3viaj.cloudfront.net"
|
||||
"/profile_pics/av2022r_vKYVywc/md.jpg",
|
||||
"owner_displayname": "Piku",
|
||||
"owner_username": "piku",
|
||||
"reshared_by_you": False,
|
||||
"sections": ["Fanart/Miku"],
|
||||
"tags": list,
|
||||
"tags_character": ["hatsune_miku"],
|
||||
"tags_copyright": ["vocaloid"],
|
||||
"tags_general" : ["twintails", "green_hair", "flag",
|
||||
"gloves", "green_eyes", "female",
|
||||
"racing_miku"],
|
||||
"title": "Racing Miku 2022 Ver.",
|
||||
"too_mature": False,
|
||||
"uncompressed_filesize": "0.62",
|
||||
"video": None,
|
||||
"visibility": "PUBLIC",
|
||||
},
|
||||
}),
|
||||
# video
|
||||
("https://itaku.ee/images/19465", {
|
||||
"pattern": r"https://d1wmr8tlk3viaj\.cloudfront\.net/gallery_vids"
|
||||
r"/sleepy_af_OY5GHWw\.mp4",
|
||||
}),
|
||||
)
|
||||
example = "https://itaku.ee/images/12345"
|
||||
|
||||
def posts(self):
|
||||
return (self.api.image(self.item),)
|
||||
@ -145,7 +85,6 @@ class ItakuAPI():
|
||||
self.root = extractor.root + "/api"
|
||||
self.headers = {
|
||||
"Accept": "application/json, text/plain, */*",
|
||||
"Referer": extractor.root + "/",
|
||||
}
|
||||
|
||||
def galleries_images(self, username, section=None):
|
||||
|
@ -21,28 +21,7 @@ class ItchioGameExtractor(Extractor):
|
||||
filename_fmt = "{game[title]} ({id}).{extension}"
|
||||
archive_fmt = "{id}"
|
||||
pattern = r"(?:https?://)?(\w+).itch\.io/([\w-]+)"
|
||||
test = (
|
||||
("https://sirtartarus.itch.io/a-craft-of-mine", {
|
||||
"pattern": r"https://\w+\.ssl\.hwcdn\.net/upload2"
|
||||
r"/game/1983311/7723751\?",
|
||||
"count": 1,
|
||||
"keyword": {
|
||||
"extension": "",
|
||||
"filename": "7723751",
|
||||
"game": {
|
||||
"id": 1983311,
|
||||
"noun": "game",
|
||||
"title": "A Craft Of Mine",
|
||||
"url": "https://sirtartarus.itch.io/a-craft-of-mine",
|
||||
},
|
||||
"user": {
|
||||
"id": 4060052,
|
||||
"name": "SirTartarus",
|
||||
"url": "https://sirtartarus.itch.io",
|
||||
},
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://USER.itch.io/GAME"
|
||||
|
||||
def __init__(self, match):
|
||||
self.user, self.slug = match.groups()
|
||||
|
@ -4,18 +4,18 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://jpeg.pet/"""
|
||||
"""Extractors for https://jpg1.su/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?jpe?g\.(?:pet|fish(?:ing)?|church)"
|
||||
BASE_PATTERN = r"(?:https?://)?jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)"
|
||||
|
||||
|
||||
class JpgfishExtractor(Extractor):
|
||||
"""Base class for jpgfish extractors"""
|
||||
category = "jpgfish"
|
||||
root = "https://jpeg.pet"
|
||||
root = "https://jpg1.su"
|
||||
directory_fmt = ("{category}", "{user}", "{album}",)
|
||||
archive_fmt = "{id}"
|
||||
|
||||
@ -35,28 +35,7 @@ class JpgfishImageExtractor(JpgfishExtractor):
|
||||
"""Extractor for jpgfish Images"""
|
||||
subcategory = "image"
|
||||
pattern = BASE_PATTERN + r"/img/((?:[^/?#]+\.)?(\w+))"
|
||||
test = (
|
||||
("https://jpeg.pet/img/funnymeme.LecXGS", {
|
||||
"pattern": r"https://simp3\.jpg\.church/images/funnymeme\.jpg",
|
||||
"content": "098e5e9b17ad634358426e0ffd1c93871474d13c",
|
||||
"keyword": {
|
||||
"album": "",
|
||||
"extension": "jpg",
|
||||
"filename": "funnymeme",
|
||||
"id": "LecXGS",
|
||||
"url": "https://simp3.jpg.church/images/funnymeme.jpg",
|
||||
"user": "exearco",
|
||||
},
|
||||
}),
|
||||
("https://jpg.church/img/auCruA", {
|
||||
"pattern": r"https://simp2\.jpg\.church/hannahowo_00457\.jpg",
|
||||
"keyword": {"album": "401-500"},
|
||||
}),
|
||||
("https://jpg.pet/img/funnymeme.LecXGS"),
|
||||
("https://jpg.fishing/img/funnymeme.LecXGS"),
|
||||
("https://jpg.fish/img/funnymeme.LecXGS"),
|
||||
("https://jpg.church/img/funnymeme.LecXGS"),
|
||||
)
|
||||
example = "https://jpg1.su/img/TITLE.ID"
|
||||
|
||||
def __init__(self, match):
|
||||
JpgfishExtractor.__init__(self, match)
|
||||
@ -83,21 +62,7 @@ class JpgfishAlbumExtractor(JpgfishExtractor):
|
||||
"""Extractor for jpgfish Albums"""
|
||||
subcategory = "album"
|
||||
pattern = BASE_PATTERN + r"/a(?:lbum)?/([^/?#]+)(/sub)?"
|
||||
test = (
|
||||
("https://jpeg.pet/album/CDilP/?sort=date_desc&page=1", {
|
||||
"count": 2,
|
||||
}),
|
||||
("https://jpg.fishing/a/gunggingnsk.N9OOI", {
|
||||
"count": 114,
|
||||
}),
|
||||
("https://jpg.fish/a/101-200.aNJ6A/", {
|
||||
"count": 100,
|
||||
}),
|
||||
("https://jpg.church/a/hannahowo.aNTdH/sub", {
|
||||
"count": 606,
|
||||
}),
|
||||
("https://jpg.pet/album/CDilP/?sort=date_desc&page=1"),
|
||||
)
|
||||
example = "https://jpg1.su/album/TITLE.ID"
|
||||
|
||||
def __init__(self, match):
|
||||
JpgfishExtractor.__init__(self, match)
|
||||
@ -121,18 +86,7 @@ class JpgfishUserExtractor(JpgfishExtractor):
|
||||
"""Extractor for jpgfish Users"""
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/(?!img|a(?:lbum)?)([^/?#]+)(/albums)?"
|
||||
test = (
|
||||
("https://jpeg.pet/exearco", {
|
||||
"count": 3,
|
||||
}),
|
||||
("https://jpg.church/exearco/albums", {
|
||||
"count": 1,
|
||||
}),
|
||||
("https://jpg.pet/exearco"),
|
||||
("https://jpg.fishing/exearco"),
|
||||
("https://jpg.fish/exearco"),
|
||||
("https://jpg.church/exearco"),
|
||||
)
|
||||
example = "https://jpg1.su/USER"
|
||||
|
||||
def __init__(self, match):
|
||||
JpgfishExtractor.__init__(self, match)
|
||||
|
@ -31,12 +31,7 @@ class JschanThreadExtractor(JschanExtractor):
|
||||
filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
|
||||
archive_fmt = "{board}_{postId}_{num}"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)\.html"
|
||||
test = (
|
||||
("https://94chan.org/art/thread/25.html", {
|
||||
"pattern": r"https://94chan.org/file/[0-9a-f]{64}(\.\w+)?",
|
||||
"count": ">= 15"
|
||||
})
|
||||
)
|
||||
example = "https://94chan.org/a/thread/12345.html"
|
||||
|
||||
def __init__(self, match):
|
||||
JschanExtractor.__init__(self, match)
|
||||
@ -71,15 +66,7 @@ class JschanBoardExtractor(JschanExtractor):
|
||||
subcategory = "board"
|
||||
pattern = (BASE_PATTERN + r"/([^/?#]+)"
|
||||
r"(?:/index\.html|/catalog\.html|/\d+\.html|/?$)")
|
||||
test = (
|
||||
("https://94chan.org/art/", {
|
||||
"pattern": JschanThreadExtractor.pattern,
|
||||
"count": ">= 30"
|
||||
}),
|
||||
("https://94chan.org/art/2.html"),
|
||||
("https://94chan.org/art/catalog.html"),
|
||||
("https://94chan.org/art/index.html"),
|
||||
)
|
||||
example = "https://94chan.org/a/"
|
||||
|
||||
def __init__(self, match):
|
||||
JschanExtractor.__init__(self, match)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2020 Mike Fährmann
|
||||
# Copyright 2020-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -21,16 +21,7 @@ class KabeuchiUserExtractor(Extractor):
|
||||
archive_fmt = "{id}_{num}"
|
||||
root = "https://kabe-uchiroom.com"
|
||||
pattern = r"(?:https?://)?kabe-uchiroom\.com/mypage/?\?id=(\d+)"
|
||||
test = (
|
||||
("https://kabe-uchiroom.com/mypage/?id=919865303848255493", {
|
||||
"pattern": (r"https://kabe-uchiroom\.com/accounts/upfile/3/"
|
||||
r"919865303848255493/\w+\.jpe?g"),
|
||||
"count": ">= 24",
|
||||
}),
|
||||
("https://kabe-uchiroom.com/mypage/?id=123456789", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
)
|
||||
example = "https://kabe-uchiroom.com/mypage/?id=12345"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019 Mike Fährmann
|
||||
# Copyright 2019-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -20,28 +20,7 @@ class KeenspotComicExtractor(Extractor):
|
||||
filename_fmt = "{filename}.{extension}"
|
||||
archive_fmt = "{comic}_{filename}"
|
||||
pattern = r"(?:https?://)?(?!www\.|forums\.)([\w-]+)\.keenspot\.com(/.+)?"
|
||||
test = (
|
||||
("http://marksmen.keenspot.com/", { # link
|
||||
"range": "1-3",
|
||||
"url": "83bcf029103bf8bc865a1988afa4aaeb23709ba6",
|
||||
}),
|
||||
("http://barkercomic.keenspot.com/", { # id
|
||||
"range": "1-3",
|
||||
"url": "c4080926db18d00bac641fdd708393b7d61379e6",
|
||||
}),
|
||||
("http://crowscare.keenspot.com/", { # id v2
|
||||
"range": "1-3",
|
||||
"url": "a00e66a133dd39005777317da90cef921466fcaa"
|
||||
}),
|
||||
("http://supernovas.keenspot.com/", { # ks
|
||||
"range": "1-3",
|
||||
"url": "de21b12887ef31ff82edccbc09d112e3885c3aab"
|
||||
}),
|
||||
("http://twokinds.keenspot.com/comic/1066/", { # "random" access
|
||||
"range": "1-3",
|
||||
"url": "6a784e11370abfb343dcad9adbb7718f9b7be350",
|
||||
})
|
||||
)
|
||||
example = "http://COMIC.keenspot.com/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -37,7 +37,6 @@ class KemonopartyExtractor(Extractor):
|
||||
Extractor.__init__(self, match)
|
||||
|
||||
def _init(self):
|
||||
self.session.headers["Referer"] = self.root + "/"
|
||||
self._prepare_ddosguard_cookies()
|
||||
self._find_inline = re.compile(
|
||||
r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
|
||||
@ -216,19 +215,7 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
|
||||
"""Extractor for all posts from a kemono.party user listing"""
|
||||
subcategory = "user"
|
||||
pattern = USER_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
|
||||
test = (
|
||||
("https://kemono.party/fanbox/user/6993449", {
|
||||
"range": "1-25",
|
||||
"count": 25,
|
||||
}),
|
||||
# 'max-posts' option, 'o' query parameter (#1674)
|
||||
("https://kemono.party/patreon/user/881792?o=150", {
|
||||
"options": (("max-posts", 25),),
|
||||
"count": "< 100",
|
||||
}),
|
||||
("https://kemono.su/subscribestar/user/alcorart"),
|
||||
("https://kemono.party/subscribestar/user/alcorart"),
|
||||
)
|
||||
example = "https://kemono.party/SERVICE/user/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
_, _, service, user_id, offset = match.groups()
|
||||
@ -256,87 +243,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
|
||||
"""Extractor for a single kemono.party post"""
|
||||
subcategory = "post"
|
||||
pattern = USER_PATTERN + r"/post/([^/?#]+)"
|
||||
test = (
|
||||
("https://kemono.party/fanbox/user/6993449/post/506575", {
|
||||
"pattern": r"https://kemono.party/data/21/0f"
|
||||
r"/210f35388e28bbcf756db18dd516e2d82ce75[0-9a-f]+\.jpg",
|
||||
"content": "900949cefc97ab8dc1979cc3664785aac5ba70dd",
|
||||
"keyword": {
|
||||
"added": "Wed, 06 May 2020 20:28:02 GMT",
|
||||
"content": str,
|
||||
"count": 1,
|
||||
"date": "dt:2019-08-11 02:09:04",
|
||||
"edited": None,
|
||||
"embed": dict,
|
||||
"extension": "jpeg",
|
||||
"filename": "P058kDFYus7DbqAkGlfWTlOr",
|
||||
"hash": "210f35388e28bbcf756db18dd516e2d8"
|
||||
"2ce758e0d32881eeee76d43e1716d382",
|
||||
"id": "506575",
|
||||
"num": 1,
|
||||
"published": "Sun, 11 Aug 2019 02:09:04 GMT",
|
||||
"service": "fanbox",
|
||||
"shared_file": False,
|
||||
"subcategory": "fanbox",
|
||||
"title": "c96取り置き",
|
||||
"type": "file",
|
||||
"user": "6993449",
|
||||
},
|
||||
}),
|
||||
# inline image (#1286)
|
||||
("https://kemono.party/fanbox/user/7356311/post/802343", {
|
||||
"pattern": r"https://kemono\.party/data/47/b5/47b5c014ecdcfabdf2c8"
|
||||
r"5eec53f1133a76336997ae8596f332e97d956a460ad2\.jpg",
|
||||
"keyword": {"hash": "47b5c014ecdcfabdf2c85eec53f1133a"
|
||||
"76336997ae8596f332e97d956a460ad2"},
|
||||
}),
|
||||
# kemono.party -> data.kemono.party
|
||||
("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
|
||||
"pattern": r"https://kemono\.party/data/("
|
||||
r"a4/7b/a47bfe938d8c1682eef06e885927484cd8df1b.+\.jpg|"
|
||||
r"c6/04/c6048f5067fd9dbfa7a8be565ac194efdfb6e4.+\.zip)",
|
||||
}),
|
||||
# username (#1548, #1652)
|
||||
("https://kemono.party/gumroad/user/3252870377455/post/aJnAH", {
|
||||
"options": (("metadata", True),),
|
||||
"keyword": {"username": "Kudalyn's Creations"},
|
||||
}),
|
||||
# skip patreon duplicates
|
||||
("https://kemono.party/patreon/user/4158582/post/32099982", {
|
||||
"count": 2,
|
||||
}),
|
||||
# allow duplicates (#2440)
|
||||
("https://kemono.party/patreon/user/4158582/post/32099982", {
|
||||
"options": (("duplicates", True),),
|
||||
"count": 3,
|
||||
}),
|
||||
# DMs (#2008)
|
||||
("https://kemono.party/patreon/user/34134344/post/38129255", {
|
||||
"options": (("dms", True),),
|
||||
"keyword": {"dms": [{
|
||||
"body": r"re:Hi! Thank you very much for supporting the work I"
|
||||
r" did in May. Here's your reward pack! I hope you fin"
|
||||
r"d something you enjoy in it. :\)\n\nhttps://www.medi"
|
||||
r"afire.com/file/\w+/Set13_tier_2.zip/file",
|
||||
"date": "2021-07-31 02:47:51.327865",
|
||||
}]},
|
||||
}),
|
||||
# coomer.party (#2100)
|
||||
("https://coomer.party/onlyfans/user/alinity/post/125962203", {
|
||||
"pattern": r"https://coomer\.party/data/7d/3f/7d3fd9804583dc224968"
|
||||
r"c0591163ec91794552b04f00a6c2f42a15b68231d5a8\.jpg",
|
||||
}),
|
||||
# invalid file (#3510)
|
||||
("https://kemono.party/patreon/user/19623797/post/29035449", {
|
||||
"pattern": r"907ba78b4545338d3539683e63ecb51c"
|
||||
r"f51c10adc9dabd86e92bd52339f298b9\.txt",
|
||||
"content": "da39a3ee5e6b4b0d3255bfef95601890afd80709", # empty
|
||||
}),
|
||||
("https://kemono.su/subscribestar/user/alcorart/post/184330"),
|
||||
("https://kemono.party/subscribestar/user/alcorart/post/184330"),
|
||||
("https://www.kemono.party/subscribestar/user/alcorart/post/184330"),
|
||||
("https://beta.kemono.party/subscribestar/user/alcorart/post/184330"),
|
||||
)
|
||||
example = "https://kemono.party/SERVICE/user/12345/post/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
_, _, service, user_id, post_id = match.groups()
|
||||
@ -359,30 +266,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
|
||||
filename_fmt = "{id}_{num:>02}_{filename}.{extension}"
|
||||
archive_fmt = "discord_{server}_{id}_{num}"
|
||||
pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)"
|
||||
test = (
|
||||
(("https://kemono.party/discord"
|
||||
"/server/488668827274444803#finish-work"), {
|
||||
"count": 4,
|
||||
"keyword": {"channel_name": "finish-work"},
|
||||
}),
|
||||
(("https://kemono.su/discord"
|
||||
"/server/256559665620451329/channel/462437519519383555#"), {
|
||||
"pattern": r"https://kemono\.su/data/("
|
||||
r"e3/77/e377e3525164559484ace2e64425b0cec1db08.*\.png|"
|
||||
r"51/45/51453640a5e0a4d23fbf57fb85390f9c5ec154.*\.gif)",
|
||||
"keyword": {"hash": "re:e377e3525164559484ace2e64425b0cec1db08"
|
||||
"|51453640a5e0a4d23fbf57fb85390f9c5ec154"},
|
||||
"count": ">= 2",
|
||||
}),
|
||||
# 'inline' files
|
||||
(("https://kemono.party/discord"
|
||||
"/server/315262215055736843/channel/315262215055736843#general"), {
|
||||
"pattern": r"https://cdn\.discordapp\.com/attachments/\d+/\d+/.+$",
|
||||
"options": (("image-filter", "type == 'inline'"),),
|
||||
"keyword": {"hash": ""},
|
||||
"range": "1-5",
|
||||
}),
|
||||
)
|
||||
example = "https://kemono.party/discord/server/12345#CHANNEL"
|
||||
|
||||
def __init__(self, match):
|
||||
KemonopartyExtractor.__init__(self, match)
|
||||
@ -461,16 +345,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
|
||||
class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
|
||||
subcategory = "discord-server"
|
||||
pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
|
||||
test = (
|
||||
("https://kemono.party/discord/server/488668827274444803", {
|
||||
"pattern": KemonopartyDiscordExtractor.pattern,
|
||||
"count": 13,
|
||||
}),
|
||||
("https://kemono.su/discord/server/488668827274444803", {
|
||||
"pattern": KemonopartyDiscordExtractor.pattern,
|
||||
"count": 13,
|
||||
}),
|
||||
)
|
||||
example = "https://kemono.party/discord/server/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
KemonopartyExtractor.__init__(self, match)
|
||||
@ -492,23 +367,7 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
|
||||
"""Extractor for kemono.party favorites"""
|
||||
subcategory = "favorite"
|
||||
pattern = BASE_PATTERN + r"/favorites(?:/?\?([^#]+))?"
|
||||
test = (
|
||||
("https://kemono.party/favorites", {
|
||||
"pattern": KemonopartyUserExtractor.pattern,
|
||||
"url": "f4b5b796979bcba824af84206578c79101c7f0e1",
|
||||
"count": 3,
|
||||
}),
|
||||
("https://kemono.party/favorites?type=post", {
|
||||
"pattern": KemonopartyPostExtractor.pattern,
|
||||
"url": "ecfccf5f0d50b8d14caa7bbdcf071de5c1e5b90f",
|
||||
"count": 3,
|
||||
}),
|
||||
("https://kemono.su/favorites?type=post", {
|
||||
"pattern": KemonopartyPostExtractor.pattern,
|
||||
"url": "4be8e84cb384a907a8e7997baaf6287b451783b5",
|
||||
"count": 3,
|
||||
}),
|
||||
)
|
||||
example = "https://kemono.party/favorites"
|
||||
|
||||
def __init__(self, match):
|
||||
KemonopartyExtractor.__init__(self, match)
|
||||
@ -522,7 +381,7 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
|
||||
|
||||
if self.favorites == "artist":
|
||||
users = self.request(
|
||||
self.root + "/api/favorites?type=artist").json()
|
||||
self.root + "/api/v1/account/favorites?type=artist").json()
|
||||
for user in users:
|
||||
user["_extractor"] = KemonopartyUserExtractor
|
||||
url = "{}/{}/user/{}".format(
|
||||
@ -531,7 +390,7 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
|
||||
|
||||
elif self.favorites == "post":
|
||||
posts = self.request(
|
||||
self.root + "/api/favorites?type=post").json()
|
||||
self.root + "/api/v1/account/favorites?type=post").json()
|
||||
for post in posts:
|
||||
post["_extractor"] = KemonopartyPostExtractor
|
||||
url = "{}/{}/user/{}/post/{}".format(
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2016-2022 Mike Fährmann
|
||||
# Copyright 2016-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -16,30 +16,13 @@ class KhinsiderSoundtrackExtractor(AsynchronousMixin, Extractor):
|
||||
"""Extractor for soundtracks from khinsider.com"""
|
||||
category = "khinsider"
|
||||
subcategory = "soundtrack"
|
||||
root = "https://downloads.khinsider.com"
|
||||
directory_fmt = ("{category}", "{album[name]}")
|
||||
archive_fmt = "{filename}.{extension}"
|
||||
pattern = (r"(?:https?://)?downloads\.khinsider\.com"
|
||||
r"/game-soundtracks/album/([^/?#]+)")
|
||||
root = "https://downloads.khinsider.com"
|
||||
test = (("https://downloads.khinsider.com"
|
||||
"/game-soundtracks/album/horizon-riders-wii"), {
|
||||
"pattern": r"https?://vgm(site|downloads)\.com"
|
||||
r"/soundtracks/horizon-riders-wii/[^/]+"
|
||||
r"/Horizon%20Riders%20Wii%20-%20Full%20Soundtrack\.mp3",
|
||||
"keyword": {
|
||||
"album": {
|
||||
"count": 1,
|
||||
"date": "Sep 18th, 2016",
|
||||
"name": "Horizon Riders",
|
||||
"platform": "Wii",
|
||||
"size": 26214400,
|
||||
"type": "Gamerip",
|
||||
},
|
||||
"extension": "mp3",
|
||||
"filename": "Horizon Riders Wii - Full Soundtrack",
|
||||
},
|
||||
"count": 1,
|
||||
})
|
||||
example = ("https://downloads.khinsider.com"
|
||||
"/game-soundtracks/album/TITLE")
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018-2022 Mike Fährmann
|
||||
# Copyright 2018-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -48,18 +48,7 @@ class KomikcastBase():
|
||||
class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from komikcast.site"""
|
||||
pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/)"
|
||||
test = (
|
||||
(("https://komikcast.site/chapter"
|
||||
"/apotheosis-chapter-02-2-bahasa-indonesia/"), {
|
||||
"url": "f6b43fbc027697749b3ea1c14931c83f878d7936",
|
||||
"keyword": "f3938e1aff9ad1f302f52447e9781b21f6da26d4",
|
||||
}),
|
||||
(("https://komikcast.me/chapter"
|
||||
"/soul-land-ii-chapter-300-1-bahasa-indonesia/"), {
|
||||
"url": "efd00a9bd95461272d51990d7bc54b79ff3ff2e6",
|
||||
"keyword": "cb646cfed3d45105bd645ab38b2e9f7d8c436436",
|
||||
}),
|
||||
)
|
||||
example = "https://komikcast.site/chapter/TITLE/"
|
||||
|
||||
def metadata(self, page):
|
||||
info = text.extr(page, "<title>", " - Komikcast<")
|
||||
@ -79,13 +68,7 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
|
||||
"""Extractor for manga from komikcast.site"""
|
||||
chapterclass = KomikcastChapterExtractor
|
||||
pattern = BASE_PATTERN + r"(/(?:komik/)?[^/?#]+)/?$"
|
||||
test = (
|
||||
("https://komikcast.site/komik/090-eko-to-issho/", {
|
||||
"url": "19d3d50d532e84be6280a3d61ff0fd0ca04dd6b4",
|
||||
"keyword": "837a7e96867344ff59d840771c04c20dc46c0ab1",
|
||||
}),
|
||||
("https://komikcast.me/tonari-no-kashiwagi-san/"),
|
||||
)
|
||||
example = "https://komikcast.site/komik/TITLE"
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
|
@ -48,19 +48,7 @@ class LensdumpBase():
|
||||
class LensdumpAlbumExtractor(LensdumpBase, GalleryExtractor):
|
||||
subcategory = "album"
|
||||
pattern = BASE_PATTERN + r"/(?:((?!\w+/albums|a/|i/)\w+)|a/(\w+))"
|
||||
test = (
|
||||
("https://lensdump.com/a/1IhJr", {
|
||||
"pattern": r"https://[abcd]\.l3n\.co/i/tq\w{4}\.png",
|
||||
"keyword": {
|
||||
"extension": "png",
|
||||
"name": str,
|
||||
"num": int,
|
||||
"title": str,
|
||||
"url": str,
|
||||
"width": int,
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://lensdump.com/a/ID"
|
||||
|
||||
def __init__(self, match):
|
||||
GalleryExtractor.__init__(self, match, match.string)
|
||||
@ -100,7 +88,7 @@ class LensdumpAlbumsExtractor(LensdumpBase, Extractor):
|
||||
"""Extractor for album list from lensdump.com"""
|
||||
subcategory = "albums"
|
||||
pattern = BASE_PATTERN + r"/\w+/albums"
|
||||
test = ("https://lensdump.com/vstar925/albums",)
|
||||
example = "https://lensdump.com/USER/albums"
|
||||
|
||||
def items(self):
|
||||
for node in self.nodes():
|
||||
@ -117,22 +105,7 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
|
||||
directory_fmt = ("{category}",)
|
||||
archive_fmt = "{id}"
|
||||
pattern = BASE_PATTERN + r"/i/(\w+)"
|
||||
test = (
|
||||
("https://lensdump.com/i/tyoAyM", {
|
||||
"pattern": r"https://c\.l3n\.co/i/tyoAyM\.webp",
|
||||
"content": "1aa749ed2c0cf679ec8e1df60068edaf3875de46",
|
||||
"keyword": {
|
||||
"date": "dt:2022-08-01 08:24:28",
|
||||
"extension": "webp",
|
||||
"filename": "tyoAyM",
|
||||
"height": 400,
|
||||
"id": "tyoAyM",
|
||||
"title": "MYOBI clovis bookcaseset",
|
||||
"url": "https://c.l3n.co/i/tyoAyM.webp",
|
||||
"width": 620,
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://lensdump.com/i/ID"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -20,37 +20,7 @@ class LexicaSearchExtractor(Extractor):
|
||||
directory_fmt = ("{category}", "{search_tags}")
|
||||
archive_fmt = "{id}"
|
||||
pattern = r"(?:https?://)?lexica\.art/?\?q=([^&#]+)"
|
||||
test = (
|
||||
("https://lexica.art/?q=tree", {
|
||||
"pattern": r"https://lexica-serve-encoded-images2\.sharif\."
|
||||
r"workers.dev/full_jpg/[0-9a-f-]{36}$",
|
||||
"range": "1-80",
|
||||
"count": 80,
|
||||
"keyword": {
|
||||
"height": int,
|
||||
"id": str,
|
||||
"upscaled_height": int,
|
||||
"upscaled_width": int,
|
||||
"userid": str,
|
||||
"width": int,
|
||||
"prompt": {
|
||||
"c": int,
|
||||
"grid": bool,
|
||||
"height": int,
|
||||
"id": str,
|
||||
"images": list,
|
||||
"initImage": None,
|
||||
"initImageStrength": None,
|
||||
"model": "lexica-aperture-v2",
|
||||
"negativePrompt": str,
|
||||
"prompt": str,
|
||||
"seed": str,
|
||||
"timestamp": r"re:\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\dZ",
|
||||
"width": int,
|
||||
},
|
||||
},
|
||||
}),
|
||||
)
|
||||
example = "https://lexica.art/?q=QUERY"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -18,24 +18,7 @@ class LightroomGalleryExtractor(Extractor):
|
||||
filename_fmt = "{num:>04}_{id}.{extension}"
|
||||
archive_fmt = "{id}"
|
||||
pattern = r"(?:https?://)?lightroom\.adobe\.com/shares/([0-9a-f]+)"
|
||||
test = (
|
||||
(("https://lightroom.adobe.com/shares/"
|
||||
"0c9cce2033f24d24975423fe616368bf"), {
|
||||
"keyword": {
|
||||
"title": "Sterne und Nachtphotos",
|
||||
"user": "Christian Schrang",
|
||||
},
|
||||
"count": ">= 55",
|
||||
}),
|
||||
(("https://lightroom.adobe.com/shares/"
|
||||
"7ba68ad5a97e48608d2e6c57e6082813"), {
|
||||
"keyword": {
|
||||
"title": "HEBFC Snr/Res v Brighton",
|
||||
"user": "",
|
||||
},
|
||||
"count": ">= 180",
|
||||
}),
|
||||
)
|
||||
example = "https://lightroom.adobe.com/shares/0123456789abcdef"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2019-2020 Mike Fährmann
|
||||
# Copyright 2019-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -84,32 +84,7 @@ class LivedoorBlogExtractor(LivedoorExtractor):
|
||||
"""Extractor for a user's blog on blog.livedoor.jp"""
|
||||
subcategory = "blog"
|
||||
pattern = r"(?:https?://)?blog\.livedoor\.jp/(\w+)/?(?:$|[?#])"
|
||||
test = (
|
||||
("http://blog.livedoor.jp/zatsu_ke/", {
|
||||
"range": "1-50",
|
||||
"count": 50,
|
||||
"archive": False,
|
||||
"pattern": r"https?://livedoor.blogimg.jp/\w+/imgs/\w/\w/\w+\.\w+",
|
||||
"keyword": {
|
||||
"post": {
|
||||
"categories" : tuple,
|
||||
"date" : "type:datetime",
|
||||
"description": str,
|
||||
"id" : int,
|
||||
"tags" : list,
|
||||
"title" : str,
|
||||
"user" : "zatsu_ke"
|
||||
},
|
||||
"filename": str,
|
||||
"hash" : r"re:\w{4,}",
|
||||
"num" : int,
|
||||
},
|
||||
}),
|
||||
("http://blog.livedoor.jp/uotapo/", {
|
||||
"range": "1-5",
|
||||
"count": 5,
|
||||
}),
|
||||
)
|
||||
example = "http://blog.livedoor.jp/USER/"
|
||||
|
||||
def posts(self):
|
||||
url = "{}/{}".format(self.root, self.user)
|
||||
@ -129,20 +104,7 @@ class LivedoorPostExtractor(LivedoorExtractor):
|
||||
"""Extractor for images from a blog post on blog.livedoor.jp"""
|
||||
subcategory = "post"
|
||||
pattern = r"(?:https?://)?blog\.livedoor\.jp/(\w+)/archives/(\d+)"
|
||||
test = (
|
||||
("http://blog.livedoor.jp/zatsu_ke/archives/51493859.html", {
|
||||
"url": "9ca3bbba62722c8155be79ad7fc47be409e4a7a2",
|
||||
"keyword": "1f5b558492e0734f638b760f70bfc0b65c5a97b9",
|
||||
}),
|
||||
("http://blog.livedoor.jp/amaumauma/archives/7835811.html", {
|
||||
"url": "204bbd6a9db4969c50e0923855aeede04f2e4a62",
|
||||
"keyword": "05821c7141360e6057ef2d382b046f28326a799d",
|
||||
}),
|
||||
("http://blog.livedoor.jp/uotapo/archives/1050616939.html", {
|
||||
"url": "4b5ab144b7309eb870d9c08f8853d1abee9946d2",
|
||||
"keyword": "84fbf6e4eef16675013d6333039a7cfcb22c2d50",
|
||||
}),
|
||||
)
|
||||
example = "http://blog.livedoor.jp/USER/archives/12345.html"
|
||||
|
||||
def __init__(self, match):
|
||||
LivedoorExtractor.__init__(self, match)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2021-2022 Mike Fährmann
|
||||
# Copyright 2021-2023 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
@ -30,17 +30,7 @@ BASE_PATTERN = LolisafeExtractor.update({
|
||||
class LolisafeAlbumExtractor(LolisafeExtractor):
|
||||
subcategory = "album"
|
||||
pattern = BASE_PATTERN + "/a/([^/?#]+)"
|
||||
test = (
|
||||
("https://xbunkr.com/a/TA0bu3F4", {
|
||||
"pattern": r"https://media\.xbunkr\.com/[^.]+\.\w+",
|
||||
"count": 861,
|
||||
"keyword": {
|
||||
"album_id": "TA0bu3F4",
|
||||
"album_name": "Hannahowo Onlyfans Photos",
|
||||
}
|
||||
}),
|
||||
("https://xbunkr.com/a/GNQc2I5d"),
|
||||
)
|
||||
example = "https://xbunkr.com/a/ID"
|
||||
|
||||
def __init__(self, match):
|
||||
LolisafeExtractor.__init__(self, match)
|
||||
|
@ -47,73 +47,7 @@ class LusciousAlbumExtractor(LusciousExtractor):
|
||||
archive_fmt = "{album[id]}_{id}"
|
||||
pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net"
|
||||
r"/(?:albums|pictures/c/[^/?#]+/album)/[^/?#]+_(\d+)")
|
||||
test = (
|
||||
("https://luscious.net/albums/okinami-no-koigokoro_277031/", {
|
||||
"pattern": r"https://storage\.bhs\.cloud\.ovh\.net/v1/AUTH_\w+"
|
||||
r"/images/NTRshouldbeillegal/277031"
|
||||
r"/luscious_net_\d+_\d+\.jpg$",
|
||||
# "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3",
|
||||
"keyword": {
|
||||
"album": {
|
||||
"__typename" : "Album",
|
||||
"audiences" : list,
|
||||
"content" : "Hentai",
|
||||
"cover" : "re:https://\\w+.luscious.net/.+/277031/",
|
||||
"created" : 1479625853,
|
||||
"created_by" : "NTRshouldbeillegal",
|
||||
"date" : "dt:2016-11-20 07:10:53",
|
||||
"description" : "Enjoy.",
|
||||
"download_url": "re:/download/(r/)?824778/277031/",
|
||||
"genres" : list,
|
||||
"id" : 277031,
|
||||
"is_manga" : True,
|
||||
"labels" : list,
|
||||
"language" : "English",
|
||||
"like_status" : "none",
|
||||
"modified" : int,
|
||||
"permissions" : list,
|
||||
"rating" : float,
|
||||
"slug" : "okinami-no-koigokoro",
|
||||
"status" : None,
|
||||
"tags" : list,
|
||||
"title" : "Okinami no Koigokoro",
|
||||
"url" : "/albums/okinami-no-koigokoro_277031/",
|
||||
"marked_for_deletion": False,
|
||||
"marked_for_processing": False,
|
||||
"number_of_animated_pictures": 0,
|
||||
"number_of_favorites": int,
|
||||
"number_of_pictures": 18,
|
||||
},
|
||||
"aspect_ratio": r"re:\d+:\d+",
|
||||
"category" : "luscious",
|
||||
"created" : int,
|
||||
"date" : "type:datetime",
|
||||
"height" : int,
|
||||
"id" : int,
|
||||
"is_animated" : False,
|
||||
"like_status" : "none",
|
||||
"position" : int,
|
||||
"resolution" : r"re:\d+x\d+",
|
||||
"status" : None,
|
||||
"tags" : list,
|
||||
"thumbnail" : str,
|
||||
"title" : str,
|
||||
"width" : int,
|
||||
"number_of_comments": int,
|
||||
"number_of_favorites": int,
|
||||
},
|
||||
}),
|
||||
("https://luscious.net/albums/not-found_277035/", {
|
||||
"exception": exception.NotFoundError,
|
||||
}),
|
||||
("https://members.luscious.net/albums/login-required_323871/", {
|
||||
"count": 64,
|
||||
}),
|
||||
("https://www.luscious.net/albums/okinami_277031/"),
|
||||
("https://members.luscious.net/albums/okinami_277031/"),
|
||||
("https://luscious.net/pictures/c/video_game_manga/album"
|
||||
"/okinami-no-koigokoro_277031/sorted/position/id/16528978/@_1"),
|
||||
)
|
||||
example = "https://luscious.net/albums/TITLE_12345/"
|
||||
|
||||
def __init__(self, match):
|
||||
LusciousExtractor.__init__(self, match)
|
||||
@ -338,15 +272,7 @@ class LusciousSearchExtractor(LusciousExtractor):
|
||||
subcategory = "search"
|
||||
pattern = (r"(?:https?://)?(?:www\.|members\.)?luscious\.net"
|
||||
r"/albums/list/?(?:\?([^#]+))?")
|
||||
test = (
|
||||
("https://members.luscious.net/albums/list/"),
|
||||
("https://members.luscious.net/albums/list/"
|
||||
"?display=date_newest&language_ids=%2B1&tagged=+full_color&page=1", {
|
||||
"pattern": LusciousAlbumExtractor.pattern,
|
||||
"range": "41-60",
|
||||
"count": 20,
|
||||
}),
|
||||
)
|
||||
example = "https://luscious.net/albums/list/?tagged=TAG"
|
||||
|
||||
def __init__(self, match):
|
||||
LusciousExtractor.__init__(self, match)
|
||||
|
@ -40,22 +40,7 @@ class LynxchanThreadExtractor(LynxchanExtractor):
|
||||
filename_fmt = "{postId}{num:?-//} {filename}.{extension}"
|
||||
archive_fmt = "{boardUri}_{postId}_{num}"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/res/(\d+)"
|
||||
test = (
|
||||
("https://bbw-chan.nl/bbwdraw/res/499.html", {
|
||||
"pattern": r"https://bbw-chan\.nl/\.media/[0-9a-f]{64}(\.\w+)?$",
|
||||
"count": ">= 352",
|
||||
}),
|
||||
("https://bbw-chan.nl/bbwdraw/res/489.html"),
|
||||
("https://kohlchan.net/a/res/4594.html", {
|
||||
"pattern": r"https://kohlchan\.net/\.media/[0-9a-f]{64}(\.\w+)?$",
|
||||
"count": ">= 80",
|
||||
}),
|
||||
("https://endchan.org/yuri/res/193483.html", {
|
||||
"pattern": r"https://endchan\.org/\.media/[^.]+(\.\w+)?$",
|
||||
"count" : ">= 19",
|
||||
}),
|
||||
("https://endchan.org/yuri/res/33621.html"),
|
||||
)
|
||||
example = "https://bbw-chan.nl/a/res/12345.html"
|
||||
|
||||
def __init__(self, match):
|
||||
LynxchanExtractor.__init__(self, match)
|
||||
@ -86,24 +71,7 @@ class LynxchanBoardExtractor(LynxchanExtractor):
|
||||
"""Extractor for LynxChan boards"""
|
||||
subcategory = "board"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/index|/catalog|/\d+|/?$)"
|
||||
test = (
|
||||
("https://bbw-chan.nl/bbwdraw/", {
|
||||
"pattern": LynxchanThreadExtractor.pattern,
|
||||
"count": ">= 148",
|
||||
}),
|
||||
("https://bbw-chan.nl/bbwdraw/2.html"),
|
||||
("https://kohlchan.net/a/", {
|
||||
"pattern": LynxchanThreadExtractor.pattern,
|
||||
"count": ">= 100",
|
||||
}),
|
||||
("https://kohlchan.net/a/2.html"),
|
||||
("https://kohlchan.net/a/catalog.html"),
|
||||
("https://endchan.org/yuri/", {
|
||||
"pattern": LynxchanThreadExtractor.pattern,
|
||||
"count" : ">= 9",
|
||||
}),
|
||||
("https://endchan.org/yuri/catalog.html"),
|
||||
)
|
||||
example = "https://bbw-chan.nl/a/"
|
||||
|
||||
def __init__(self, match):
|
||||
LynxchanExtractor.__init__(self, match)
|
||||
|
@ -98,25 +98,8 @@ class MangadexChapterExtractor(MangadexExtractor):
|
||||
"""Extractor for manga-chapters from mangadex.org"""
|
||||
subcategory = "chapter"
|
||||
pattern = BASE_PATTERN + r"/chapter/([0-9a-f-]+)"
|
||||
test = (
|
||||
("https://mangadex.org/chapter/f946ac53-0b71-4b5d-aeb2-7931b13c4aaa", {
|
||||
"keyword": "e86128a79ebe7201b648f1caa828496a2878dc8f",
|
||||
# "content": "50383a4c15124682057b197d40261641a98db514",
|
||||
}),
|
||||
# oneshot
|
||||
("https://mangadex.org/chapter/61a88817-9c29-4281-bdf1-77b3c1be9831", {
|
||||
"count": 64,
|
||||
"keyword": "d11ed057a919854696853362be35fc0ba7dded4c",
|
||||
}),
|
||||
# MANGA Plus (#1154)
|
||||
("https://mangadex.org/chapter/74149a55-e7c4-44ea-8a37-98e879c1096f", {
|
||||
"exception": exception.StopExtraction,
|
||||
}),
|
||||
# 'externalUrl', but still downloadable (#2503)
|
||||
("https://mangadex.org/chapter/364728a4-6909-4164-9eea-6b56354f7c78", {
|
||||
"count": 0, # 404
|
||||
}),
|
||||
)
|
||||
example = ("https://mangadex.org/chapter"
|
||||
"/01234567-89ab-cdef-0123-456789abcdef")
|
||||
|
||||
def items(self):
|
||||
try:
|
||||
@ -148,48 +131,8 @@ class MangadexMangaExtractor(MangadexExtractor):
|
||||
"""Extractor for manga from mangadex.org"""
|
||||
subcategory = "manga"
|
||||
pattern = BASE_PATTERN + r"/(?:title|manga)/(?!feed$)([0-9a-f-]+)"
|
||||
test = (
|
||||
("https://mangadex.org/title/f90c4398-8aad-4f51-8a1f-024ca09fdcbc", {
|
||||
"count": ">= 5",
|
||||
"keyword": {
|
||||
"manga" : "Souten no Koumori",
|
||||
"manga_id": "f90c4398-8aad-4f51-8a1f-024ca09fdcbc",
|
||||
"title" : "re:One[Ss]hot",
|
||||
"volume" : 0,
|
||||
"chapter" : 0,
|
||||
"chapter_minor": "",
|
||||
"chapter_id": str,
|
||||
"date" : "type:datetime",
|
||||
"lang" : str,
|
||||
"language": str,
|
||||
"artist" : ["Arakawa Hiromu"],
|
||||
"author" : ["Arakawa Hiromu"],
|
||||
"status" : "completed",
|
||||
"tags" : ["Oneshot", "Historical", "Action",
|
||||
"Martial Arts", "Drama", "Tragedy"],
|
||||
},
|
||||
}),
|
||||
# mutliple values for 'lang' (#4093)
|
||||
("https://mangadex.org/title/f90c4398-8aad-4f51-8a1f-024ca09fdcbc", {
|
||||
"options": (("lang", "fr,it"),),
|
||||
"count": 2,
|
||||
"keyword": {
|
||||
"manga" : "Souten no Koumori",
|
||||
"lang" : "re:fr|it",
|
||||
"language": "re:French|Italian",
|
||||
},
|
||||
}),
|
||||
("https://mangadex.cc/manga/d0c88e3b-ea64-4e07-9841-c1d2ac982f4a/", {
|
||||
"options": (("lang", "en"),),
|
||||
"count": ">= 100",
|
||||
}),
|
||||
("https://mangadex.org/title/7c1e2742-a086-4fd3-a3be-701fd6cf0be9", {
|
||||
"count": 1,
|
||||
}),
|
||||
("https://mangadex.org/title/584ef094-b2ab-40ce-962c-bce341fb9d10", {
|
||||
"count": ">= 20",
|
||||
})
|
||||
)
|
||||
example = ("https://mangadex.org/title"
|
||||
"/01234567-89ab-cdef-0123-456789abcdef")
|
||||
|
||||
def chapters(self):
|
||||
return self.api.manga_feed(self.uuid)
|
||||
@ -199,7 +142,7 @@ class MangadexFeedExtractor(MangadexExtractor):
|
||||
"""Extractor for chapters from your Followed Feed"""
|
||||
subcategory = "feed"
|
||||
pattern = BASE_PATTERN + r"/title/feed$()"
|
||||
test = ("https://mangadex.org/title/feed",)
|
||||
example = "https://mangadex.org/title/feed"
|
||||
|
||||
def chapters(self):
|
||||
return self.api.user_follows_manga_feed()
|
||||
|
@ -20,23 +20,13 @@ class MangafoxChapterExtractor(ChapterExtractor):
|
||||
root = "https://m.fanfox.net"
|
||||
pattern = BASE_PATTERN + \
|
||||
r"(/manga/[^/?#]+/((?:v([^/?#]+)/)?c(\d+)([^/?#]*)))"
|
||||
test = (
|
||||
("http://fanfox.net/manga/kidou_keisatsu_patlabor/v05/c006.2/1.html", {
|
||||
"keyword": "5661dab258d42d09d98f194f7172fb9851a49766",
|
||||
"content": "5c50c252dcf12ffecf68801f4db8a2167265f66c",
|
||||
}),
|
||||
("http://mangafox.me/manga/kidou_keisatsu_patlabor/v05/c006.2/"),
|
||||
("http://fanfox.net/manga/black_clover/vTBD/c295/1.html"),
|
||||
)
|
||||
example = "https://fanfox.net/manga/TITLE/v01/c001/1.html"
|
||||
|
||||
def __init__(self, match):
|
||||
base, self.cstr, self.volume, self.chapter, self.minor = match.groups()
|
||||
self.urlbase = self.root + base
|
||||
ChapterExtractor.__init__(self, match, self.urlbase + "/1.html")
|
||||
|
||||
def _init(self):
|
||||
self.session.headers["Referer"] = self.root + "/"
|
||||
|
||||
def metadata(self, page):
|
||||
manga, pos = text.extract(page, "<title>", "</title>")
|
||||
count, pos = text.extract(
|
||||
@ -73,36 +63,7 @@ class MangafoxMangaExtractor(MangaExtractor):
|
||||
root = "https://m.fanfox.net"
|
||||
chapterclass = MangafoxChapterExtractor
|
||||
pattern = BASE_PATTERN + r"(/manga/[^/?#]+)/?$"
|
||||
test = (
|
||||
("https://fanfox.net/manga/kanojo_mo_kanojo", {
|
||||
"pattern": MangafoxChapterExtractor.pattern,
|
||||
"count": ">=60",
|
||||
"keyword": {
|
||||
"author": "HIROYUKI",
|
||||
"chapter": int,
|
||||
"chapter_minor": r"re:^(\.\d+)?$",
|
||||
"chapter_string": r"re:(v\d+/)?c\d+",
|
||||
"date": "type:datetime",
|
||||
"description": "High school boy Naoya gets a confession from M"
|
||||
"omi, a cute and friendly girl. However, Naoya "
|
||||
"already has a girlfriend, Seki... but Momi is "
|
||||
"too good a catch to let go. Momi and Nagoya's "
|
||||
"goal becomes clear: convince Seki to accept be"
|
||||
"ing an item with the two of them. Will she bud"
|
||||
"ge?",
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
"manga": "Kanojo mo Kanojo",
|
||||
"tags": ["Comedy", "Romance", "School Life", "Shounen"],
|
||||
"volume": int,
|
||||
},
|
||||
}),
|
||||
("https://mangafox.me/manga/shangri_la_frontier", {
|
||||
"pattern": MangafoxChapterExtractor.pattern,
|
||||
"count": ">=45",
|
||||
}),
|
||||
("https://m.fanfox.net/manga/sentai_daishikkaku"),
|
||||
)
|
||||
example = "https://fanfox.net/manga/TITLE"
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
|
@ -25,18 +25,7 @@ class MangahereChapterExtractor(MangahereBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangahere.cc"""
|
||||
pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]/manga/"
|
||||
r"([^/]+(?:/v0*(\d+))?/c([^/?#]+))")
|
||||
test = (
|
||||
("https://www.mangahere.cc/manga/dongguo_xiaojie/c004.2/", {
|
||||
"keyword": "7c98d7b50a47e6757b089aa875a53aa970cac66f",
|
||||
"content": "708d475f06893b88549cbd30df1e3f9428f2c884",
|
||||
}),
|
||||
# URLs without HTTP scheme (#1070)
|
||||
("https://www.mangahere.cc/manga/beastars/c196/1.html", {
|
||||
"pattern": "https://zjcdn.mangahere.org/.*",
|
||||
}),
|
||||
("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
|
||||
("http://m.mangahere.co/manga/dongguo_xiaojie/c003.2/"),
|
||||
)
|
||||
example = "https://www.mangahere.cc/manga/TITLE/c001/1.html"
|
||||
|
||||
def __init__(self, match):
|
||||
self.part, self.volume, self.chapter = match.groups()
|
||||
@ -95,24 +84,7 @@ class MangahereMangaExtractor(MangahereBase, MangaExtractor):
|
||||
chapterclass = MangahereChapterExtractor
|
||||
pattern = (r"(?:https?://)?(?:www\.|m\.)?mangahere\.c[co]"
|
||||
r"(/manga/[^/?#]+/?)(?:#.*)?$")
|
||||
test = (
|
||||
("https://www.mangahere.cc/manga/aria/", {
|
||||
"url": "9c2e54ec42e9a87ad53096c328b33c90750af3e4",
|
||||
"keyword": "71503c682c5d0c277a50409a8c5fd78e871e3d69",
|
||||
"count": 71,
|
||||
}),
|
||||
("https://www.mangahere.cc/manga/hiyokoi/#50", {
|
||||
"url": "654850570aa03825cd57e2ae2904af489602c523",
|
||||
"keyword": "c8084d89a9ea6cf40353093669f9601a39bf5ca2",
|
||||
}),
|
||||
# adult filter (#556)
|
||||
("http://www.mangahere.cc/manga/gunnm_mars_chronicle/", {
|
||||
"pattern": MangahereChapterExtractor.pattern,
|
||||
"count": ">= 50",
|
||||
}),
|
||||
("https://www.mangahere.co/manga/aria/"),
|
||||
("https://m.mangahere.co/manga/aria/"),
|
||||
)
|
||||
example = "https://www.mangahere.cc/manga/TITLE"
|
||||
|
||||
def _init(self):
|
||||
self.cookies.set("isAdult", "1", domain="www.mangahere.cc")
|
||||
|
@ -19,30 +19,18 @@ BASE_PATTERN = r"(?:https?://)?(?:ww[\dw]?\.)?mangakakalot\.tv"
|
||||
class MangakakalotBase():
|
||||
"""Base class for mangakakalot extractors"""
|
||||
category = "mangakakalot"
|
||||
root = "https://ww3.mangakakalot.tv"
|
||||
root = "https://ww6.mangakakalot.tv"
|
||||
|
||||
|
||||
class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
|
||||
"""Extractor for manga chapters from mangakakalot.tv"""
|
||||
pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/chapter[_-][^/?#]+)"
|
||||
test = (
|
||||
("https://ww3.mangakakalot.tv/chapter/manga-jk986845/chapter-34.2", {
|
||||
"pattern": r"https://cm\.blazefast\.co"
|
||||
r"/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}\.jpg",
|
||||
"keyword": "0f1586ff52f0f9cbbb25306ae64ab718f8a6a633",
|
||||
"count": 9,
|
||||
}),
|
||||
("https://mangakakalot.tv/chapter"
|
||||
"/hatarakanai_futari_the_jobless_siblings/chapter_20.1"),
|
||||
)
|
||||
example = "https://ww6.mangakakalot.tv/chapter/manga-ID/chapter-01"
|
||||
|
||||
def __init__(self, match):
|
||||
self.path = match.group(1)
|
||||
ChapterExtractor.__init__(self, match, self.root + self.path)
|
||||
|
||||
def _init(self):
|
||||
self.session.headers['Referer'] = self.root + "/"
|
||||
|
||||
def metadata(self, page):
|
||||
_ , pos = text.extract(page, '<span itemprop="title">', '<')
|
||||
manga , pos = text.extract(page, '<span itemprop="title">', '<', pos)
|
||||
@ -78,13 +66,7 @@ class MangakakalotMangaExtractor(MangakakalotBase, MangaExtractor):
|
||||
"""Extractor for manga from mangakakalot.tv"""
|
||||
chapterclass = MangakakalotChapterExtractor
|
||||
pattern = BASE_PATTERN + r"(/manga/[^/?#]+)"
|
||||
test = (
|
||||
("https://ww3.mangakakalot.tv/manga/manga-jk986845", {
|
||||
"pattern": MangakakalotChapterExtractor.pattern,
|
||||
"count": ">= 30",
|
||||
}),
|
||||
("https://mangakakalot.tv/manga/lk921810"),
|
||||
)
|
||||
example = "https://ww6.mangakakalot.tv/manga/manga-ID"
|
||||
|
||||
def chapters(self, page):
|
||||
data = {"lang": "en", "language": "English"}
|
||||
|
@ -23,8 +23,6 @@ class ManganeloBase():
|
||||
super().__init__(match, "https://" + domain + path)
|
||||
|
||||
def _init(self):
|
||||
self.session.headers['Referer'] = self.root + "/"
|
||||
|
||||
if self._match_chapter is None:
|
||||
ManganeloBase._match_chapter = re.compile(
|
||||
r"(?:[Vv]ol\.?\s*(\d+)\s?)?"
|
||||
@ -55,27 +53,7 @@ class ManganeloBase():
|
||||
class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
|
||||
"""Extractor for manga chapters from manganelo.com"""
|
||||
pattern = BASE_PATTERN + r"(/(?:manga-\w+|chapter/\w+)/chapter[-_][^/?#]+)"
|
||||
test = (
|
||||
("https://chapmanganato.com/manga-gn983696/chapter-23", {
|
||||
"pattern": r"https://v\d+\.mkklcdnv6tempv5\.com/img/tab_17/03/23"
|
||||
r"/39/gn983696/vol_3_chapter_23_24_yen/\d+-[no]\.jpg",
|
||||
"keyword": "17faaea7f0fb8c2675a327bf3aa0bcd7a6311d68",
|
||||
"count": 25,
|
||||
}),
|
||||
("https://chapmanganelo.com/manga-ti107776/chapter-4", {
|
||||
"pattern": r"https://v\d+\.mkklcdnv6tempv5\.com/img/tab_17/01/92"
|
||||
r"/08/ti970565/chapter_4_caster/\d+-o\.jpg",
|
||||
"keyword": "06e01fa9b3fc9b5b954c0d4a98f0153b40922ded",
|
||||
"count": 45,
|
||||
}),
|
||||
("https://chapmanganato.com/manga-no991297/chapter-8", {
|
||||
"keyword": {"chapter": 8, "chapter_minor": "-1"},
|
||||
"count": 20,
|
||||
}),
|
||||
("https://readmanganato.com/manga-gn983696/chapter-23"),
|
||||
("https://manganelo.com/chapter/gamers/chapter_15"),
|
||||
("https://manganelo.com/chapter/gq921227/chapter_23"),
|
||||
)
|
||||
example = "https://chapmanganato.com/manga-ID/chapter-01"
|
||||
|
||||
def metadata(self, page):
|
||||
extr = text.extract_from(page)
|
||||
@ -104,19 +82,7 @@ class ManganeloMangaExtractor(ManganeloBase, MangaExtractor):
|
||||
"""Extractor for manga from manganelo.com"""
|
||||
chapterclass = ManganeloChapterExtractor
|
||||
pattern = BASE_PATTERN + r"(/(?:manga[-/]|read_)\w+)/?$"
|
||||
test = (
|
||||
("https://chapmanganato.com/manga-gn983696", {
|
||||
"pattern": ManganeloChapterExtractor.pattern,
|
||||
"count": ">= 25",
|
||||
}),
|
||||
("https://m.manganelo.com/manga-ti107776", {
|
||||
"pattern": ManganeloChapterExtractor.pattern,
|
||||
"count": ">= 12",
|
||||
}),
|
||||
("https://readmanganato.com/manga-gn983696"),
|
||||
("https://manganelo.com/manga/read_otome_no_teikoku"),
|
||||
("https://manganelo.com/manga/ol921234/"),
|
||||
)
|
||||
example = "https://manganato.com/manga-ID"
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
|
@ -35,39 +35,7 @@ class MangaparkBase():
|
||||
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangapark.net"""
|
||||
pattern = BASE_PATTERN + r"/title/[^/?#]+/(\d+)"
|
||||
test = (
|
||||
("https://mangapark.net/title/114972-aria/6710214-en-ch.60.2", {
|
||||
"count": 70,
|
||||
"pattern": r"https://[\w-]+\.mpcdn\.org/comic/2002/e67"
|
||||
r"/61e29278a583b9227964076e/\d+_\d+_\d+_\d+\.jpeg"
|
||||
r"\?acc=[^&#]+&exp=\d+",
|
||||
"keyword": {
|
||||
"artist": [],
|
||||
"author": ["Amano Kozue"],
|
||||
"chapter": 60,
|
||||
"chapter_id": 6710214,
|
||||
"chapter_minor": ".2",
|
||||
"count": 70,
|
||||
"date": "dt:2022-01-15 09:25:03",
|
||||
"extension": "jpeg",
|
||||
"filename": str,
|
||||
"genre": ["adventure", "comedy", "drama", "sci_fi",
|
||||
"shounen", "slice_of_life"],
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
"manga": "Aria",
|
||||
"manga_id": 114972,
|
||||
"page": int,
|
||||
"source": "Koala",
|
||||
"title": "Special Navigation - Aquaria Ii",
|
||||
"volume": 12,
|
||||
},
|
||||
}),
|
||||
("https://mangapark.com/title/114972-aria/6710214-en-ch.60.2"),
|
||||
("https://mangapark.org/title/114972-aria/6710214-en-ch.60.2"),
|
||||
("https://mangapark.io/title/114972-aria/6710214-en-ch.60.2"),
|
||||
("https://mangapark.me/title/114972-aria/6710214-en-ch.60.2"),
|
||||
)
|
||||
example = "https://mangapark.net/title/MANGA/12345-en-ch.01"
|
||||
|
||||
def __init__(self, match):
|
||||
self.root = text.root_from_url(match.group(0))
|
||||
@ -115,41 +83,7 @@ class MangaparkMangaExtractor(MangaparkBase, Extractor):
|
||||
"""Extractor for manga from mangapark.net"""
|
||||
subcategory = "manga"
|
||||
pattern = BASE_PATTERN + r"/title/(\d+)(?:-[^/?#]*)?/?$"
|
||||
test = (
|
||||
("https://mangapark.net/title/114972-aria", {
|
||||
"count": 141,
|
||||
"pattern": MangaparkChapterExtractor.pattern,
|
||||
"keyword": {
|
||||
"chapter": int,
|
||||
"chapter_id": int,
|
||||
"chapter_minor": str,
|
||||
"date": "type:datetime",
|
||||
"lang": "en",
|
||||
"language": "English",
|
||||
"manga_id": 114972,
|
||||
"source": "re:Horse|Koala",
|
||||
"source_id": int,
|
||||
"title": str,
|
||||
"volume": int,
|
||||
},
|
||||
}),
|
||||
# 'source' option
|
||||
("https://mangapark.net/title/114972-aria", {
|
||||
"options": (("source", "koala"),),
|
||||
"count": 70,
|
||||
"pattern": MangaparkChapterExtractor.pattern,
|
||||
"keyword": {
|
||||
"source": "Koala",
|
||||
"source_id": 15150116,
|
||||
},
|
||||
}),
|
||||
("https://mangapark.com/title/114972-"),
|
||||
("https://mangapark.com/title/114972"),
|
||||
("https://mangapark.com/title/114972-aria"),
|
||||
("https://mangapark.org/title/114972-aria"),
|
||||
("https://mangapark.io/title/114972-aria"),
|
||||
("https://mangapark.me/title/114972-aria"),
|
||||
)
|
||||
example = "https://mangapark.net/title/12345-MANGA"
|
||||
|
||||
def __init__(self, match):
|
||||
self.root = text.root_from_url(match.group(0))
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user