mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-21 18:22:30 +01:00
Merge branch 'mikf:master' into master
This commit is contained in:
commit
fbf4ef4f3a
32
CHANGELOG.md
32
CHANGELOG.md
@ -1,5 +1,37 @@
|
||||
# Changelog
|
||||
|
||||
## 1.26.6 - 2024-01-06
|
||||
### Extractors
|
||||
#### Additions
|
||||
- [batoto] add `chapter` and `manga` extractors ([#1434](https://github.com/mikf/gallery-dl/issues/1434), [#2111](https://github.com/mikf/gallery-dl/issues/2111), [#4979](https://github.com/mikf/gallery-dl/issues/4979))
|
||||
- [deviantart] add `avatar` and `background` extractors ([#4995](https://github.com/mikf/gallery-dl/issues/4995))
|
||||
- [poringa] add support ([#4675](https://github.com/mikf/gallery-dl/issues/4675), [#4962](https://github.com/mikf/gallery-dl/issues/4962))
|
||||
- [szurubooru] support `snootbooru.com` ([#5023](https://github.com/mikf/gallery-dl/issues/5023))
|
||||
- [zzup] add `gallery` extractor ([#4517](https://github.com/mikf/gallery-dl/issues/4517), [#4604](https://github.com/mikf/gallery-dl/issues/4604), [#4659](https://github.com/mikf/gallery-dl/issues/4659), [#4863](https://github.com/mikf/gallery-dl/issues/4863), [#5016](https://github.com/mikf/gallery-dl/issues/5016))
|
||||
#### Fixes
|
||||
- [gelbooru] fix `favorite` extractor ([#4903](https://github.com/mikf/gallery-dl/issues/4903))
|
||||
- [idolcomplex] fix extraction & update URL patterns ([#5002](https://github.com/mikf/gallery-dl/issues/5002))
|
||||
- [imagechest] fix loading more than 10 images in a gallery ([#4469](https://github.com/mikf/gallery-dl/issues/4469))
|
||||
- [jpgfish] update domain
|
||||
- [komikcast] fix `manga` extractor ([#5027](https://github.com/mikf/gallery-dl/issues/5027))
|
||||
- [komikcast] update domain ([#5027](https://github.com/mikf/gallery-dl/issues/5027))
|
||||
- [lynxchan] update `bbw-chan` domain ([#4970](https://github.com/mikf/gallery-dl/issues/4970))
|
||||
- [manganelo] fix extraction & recognize `.to` TLDs ([#5005](https://github.com/mikf/gallery-dl/issues/5005))
|
||||
- [paheal] restore `extension` metadata ([#4976](https://github.com/mikf/gallery-dl/issues/4976))
|
||||
- [rule34us] add fallback for `video-cdn1` videos ([#4985](https://github.com/mikf/gallery-dl/issues/4985))
|
||||
- [weibo] fix AttributeError in `user` extractor ([#5022](https://github.com/mikf/gallery-dl/issues/5022))
|
||||
#### Improvements
|
||||
- [gelbooru] show error for invalid API responses ([#4903](https://github.com/mikf/gallery-dl/issues/4903))
|
||||
- [rule34] recognize URLs with `www` subdomain ([#4984](https://github.com/mikf/gallery-dl/issues/4984))
|
||||
- [twitter] raise error for invalid `strategy` values ([#4953](https://github.com/mikf/gallery-dl/issues/4953))
|
||||
#### Metadata
|
||||
- [fanbox] add `metadata` option ([#4921](https://github.com/mikf/gallery-dl/issues/4921))
|
||||
- [nijie] add `count` metadata ([#146](https://github.com/mikf/gallery-dl/issues/146))
|
||||
- [pinterest] add `count` metadata ([#4981](https://github.com/mikf/gallery-dl/issues/4981))
|
||||
### Miscellaneous
|
||||
- fix and update zsh completion ([#4972](https://github.com/mikf/gallery-dl/issues/4972))
|
||||
- fix `--cookies-from-browser` macOS Firefox profile path
|
||||
|
||||
## 1.26.5 - 2023-12-23
|
||||
### Extractors
|
||||
#### Additions
|
||||
|
@ -72,9 +72,9 @@ Standalone Executable
|
||||
Prebuilt executable files with a Python interpreter and
|
||||
required Python packages included are available for
|
||||
|
||||
- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.exe>`__
|
||||
- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.exe>`__
|
||||
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
|
||||
- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.5/gallery-dl.bin>`__
|
||||
- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.6/gallery-dl.bin>`__
|
||||
|
||||
|
||||
Nightly Builds
|
||||
|
@ -627,6 +627,20 @@ Description
|
||||
`ssl.SSLContext.set_ciphers() <https://docs.python.org/3/library/ssl.html#ssl.SSLContext.set_ciphers>`__
|
||||
|
||||
|
||||
extractor.*.tls12
|
||||
-----------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
* ``true``
|
||||
* ``false`` for ``patreon``, ``pixiv:series``
|
||||
Description
|
||||
Allow selecting TLS 1.2 cipher suites.
|
||||
|
||||
Can be disabled to alter TLS fingerprints
|
||||
and potentially bypass Cloudflare blocks.
|
||||
|
||||
|
||||
extractor.*.keywords
|
||||
--------------------
|
||||
Type
|
||||
@ -1066,6 +1080,25 @@ Description
|
||||
after a colon ``:``, for example ``{date:%Y%m%d}``.
|
||||
|
||||
|
||||
extractor.*.write-pages
|
||||
-----------------------
|
||||
Type
|
||||
* ``bool``
|
||||
* ``string``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
During data extraction,
|
||||
write received HTTP request data
|
||||
to enumerated files in the current working directory.
|
||||
|
||||
Special values:
|
||||
|
||||
* ``"all"``: Include HTTP request and response headers. Hide ``Authorization``, ``Cookie``, and ``Set-Cookie`` values.
|
||||
* ``"ALL"``: Include all HTTP request and response headers.
|
||||
|
||||
|
||||
|
||||
Extractor-specific Options
|
||||
==========================
|
||||
|
||||
@ -1368,7 +1401,13 @@ Description
|
||||
when processing a user profile.
|
||||
|
||||
Possible values are
|
||||
``"gallery"``, ``"scraps"``, ``"journal"``, ``"favorite"``, ``"status"``.
|
||||
``"avatar"``,
|
||||
``"background"``,
|
||||
``"gallery"``,
|
||||
``"scraps"``,
|
||||
``"journal"``,
|
||||
``"favorite"``,
|
||||
``"status"``.
|
||||
|
||||
It is possible to use ``"all"`` instead of listing all values separately.
|
||||
|
||||
@ -1520,6 +1559,19 @@ Description
|
||||
Minimum wait time in seconds before API requests.
|
||||
|
||||
|
||||
extractor.deviantart.avatar.formats
|
||||
-----------------------------------
|
||||
Type
|
||||
``list`` of ``strings``
|
||||
Example
|
||||
``["original.jpg", "big.jpg", "big.gif", ".png"]``
|
||||
Description
|
||||
Avatar URL formats to return.
|
||||
|
||||
| Each format is parsed as ``SIZE.EXT``.
|
||||
| Leave ``SIZE`` empty to download the regular, small avatar format.
|
||||
|
||||
|
||||
extractor.[E621].metadata
|
||||
-------------------------
|
||||
Type
|
||||
@ -1530,7 +1582,7 @@ Default
|
||||
``false``
|
||||
Example
|
||||
* ``notes,pools``
|
||||
* ``["notes", "pools"``
|
||||
* ``["notes", "pools"]``
|
||||
Description
|
||||
Extract additional metadata (notes, pool metadata) if available.
|
||||
|
||||
@ -1672,6 +1724,21 @@ Description
|
||||
* ``false``: Ignore embeds.
|
||||
|
||||
|
||||
extractor.fanbox.metadata
|
||||
-------------------------
|
||||
Type
|
||||
* ``bool``
|
||||
* ``string``
|
||||
* ``list`` of ``strings``
|
||||
Default
|
||||
``false``
|
||||
Example
|
||||
* ``user,plan``
|
||||
* ``["user", "plan"]``
|
||||
Description
|
||||
Extract ``plan`` and extended ``user`` metadata.
|
||||
|
||||
|
||||
extractor.flickr.access-token & .access-token-secret
|
||||
----------------------------------------------------
|
||||
Type
|
||||
@ -3022,6 +3089,176 @@ Description
|
||||
Download video files.
|
||||
|
||||
|
||||
extractor.steamgriddb.animated
|
||||
------------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``true``
|
||||
Description
|
||||
Include animated assets when downloading from a list of assets.
|
||||
|
||||
|
||||
extractor.steamgriddb.epilepsy
|
||||
------------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``true``
|
||||
Description
|
||||
Include assets tagged with epilepsy when downloading from a list of assets.
|
||||
|
||||
|
||||
extractor.steamgriddb.dimensions
|
||||
--------------------------------
|
||||
Type
|
||||
* ``string``
|
||||
* ``list`` of ``strings``
|
||||
Default
|
||||
``"all"``
|
||||
Examples
|
||||
* ``"1024x512,512x512"``
|
||||
* ``["460x215", "920x430"]``
|
||||
Description
|
||||
Only include assets that are in the specified dimensions. ``all`` can be
|
||||
used to specify all dimensions. Valid values are:
|
||||
|
||||
* Grids: ``460x215``, ``920x430``, ``600x900``, ``342x482``, ``660x930``,
|
||||
``512x512``, ``1024x1024``
|
||||
* Heroes: ``1920x620``, ``3840x1240``, ``1600x650``
|
||||
* Logos: N/A (will be ignored)
|
||||
* Icons: ``8x8``, ``10x10``, ``14x14``, ``16x16``, ``20x20``, ``24x24``,
|
||||
``28x28``, ``32x32``, ``35x35``, ``40x40``, ``48x48``, ``54x54``,
|
||||
``56x56``, ``57x57``, ``60x60``, ``64x64``, ``72x72``, ``76x76``,
|
||||
``80x80``, ``90x90``, ``96x96``, ``100x100``, ``114x114``, ``120x120``,
|
||||
``128x128``, ``144x144``, ``150x150``, ``152x152``, ``160x160``,
|
||||
``180x180``, ``192x192``, ``194x194``, ``256x256``, ``310x310``,
|
||||
``512x512``, ``768x768``, ``1024x1024``
|
||||
|
||||
|
||||
extractor.steamgriddb.file-types
|
||||
--------------------------------
|
||||
Type
|
||||
* ``string``
|
||||
* ``list`` of ``strings``
|
||||
Default
|
||||
``"all"``
|
||||
Examples
|
||||
* ``"png,jpeg"``
|
||||
* ``["jpeg", "webp"]``
|
||||
Description
|
||||
Only include assets that are in the specified file types. ``all`` can be
|
||||
used to specifiy all file types. Valid values are:
|
||||
|
||||
* Grids: ``png``, ``jpeg``, ``jpg``, ``webp``
|
||||
* Heroes: ``png``, ``jpeg``, ``jpg``, ``webp``
|
||||
* Logos: ``png``, ``webp``
|
||||
* Icons: ``png``, ``ico``
|
||||
|
||||
|
||||
extractor.steamgriddb.download-fake-png
|
||||
---------------------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``true``
|
||||
Description
|
||||
Download fake PNGs alongside the real file.
|
||||
|
||||
|
||||
extractor.steamgriddb.humor
|
||||
---------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``true``
|
||||
Description
|
||||
Include assets tagged with humor when downloading from a list of assets.
|
||||
|
||||
|
||||
extractor.steamgriddb.languages
|
||||
-------------------------------
|
||||
Type
|
||||
* ``string``
|
||||
* ``list`` of ``strings``
|
||||
Default
|
||||
``"all"``
|
||||
Examples
|
||||
* ``"en,km"``
|
||||
* ``["fr", "it"]``
|
||||
Description
|
||||
Only include assets that are in the specified languages. ``all`` can be
|
||||
used to specifiy all languages. Valid values are `ISO 639-1 <https://en.wikipedia.org/wiki/ISO_639-1>`__
|
||||
language codes.
|
||||
|
||||
|
||||
extractor.steamgriddb.nsfw
|
||||
--------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``true``
|
||||
Description
|
||||
Include assets tagged with adult content when downloading from a list of assets.
|
||||
|
||||
|
||||
extractor.steamgriddb.sort
|
||||
--------------------------
|
||||
Type
|
||||
``string``
|
||||
Default
|
||||
``score_desc``
|
||||
Description
|
||||
Set the chosen sorting method when downloading from a list of assets. Can be one of:
|
||||
|
||||
* ``score_desc`` (Highest Score (Beta))
|
||||
* ``score_asc`` (Lowest Score (Beta))
|
||||
* ``score_old_desc`` (Highest Score (Old))
|
||||
* ``score_old_asc`` (Lowest Score (Old))
|
||||
* ``age_desc`` (Newest First)
|
||||
* ``age_asc`` (Oldest First)
|
||||
|
||||
|
||||
extractor.steamgriddb.static
|
||||
----------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``true``
|
||||
Description
|
||||
Include static assets when downloading from a list of assets.
|
||||
|
||||
|
||||
extractor.steamgriddb.styles
|
||||
----------------------------
|
||||
Type
|
||||
* ``string``
|
||||
* ``list`` of ``strings``
|
||||
Default
|
||||
``all``
|
||||
Examples
|
||||
* ``white,black``
|
||||
* ``["no_logo", "white_logo"]``
|
||||
Description
|
||||
Only include assets that are in the specified styles. ``all`` can be used
|
||||
to specify all styles. Valid values are:
|
||||
|
||||
* Grids: ``alternate``, ``blurred``, ``no_logo``, ``material``, ``white_logo``
|
||||
* Heroes: ``alternate``, ``blurred``, ``material``
|
||||
* Logos: ``official``, ``white``, ``black``, ``custom``
|
||||
* Icons: ``official``, ``custom``
|
||||
|
||||
|
||||
extractor.steamgriddb.untagged
|
||||
------------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``true``
|
||||
Description
|
||||
Include untagged assets when downloading from a list of assets.
|
||||
|
||||
|
||||
extractor.[szurubooru].username & .token
|
||||
----------------------------------------
|
||||
Type
|
||||
@ -3442,7 +3679,7 @@ Description
|
||||
* ``"tweets"``: `/tweets <https://twitter.com/USER/tweets>`__ timeline + search
|
||||
* ``"media"``: `/media <https://twitter.com/USER/media>`__ timeline + search
|
||||
* ``"with_replies"``: `/with_replies <https://twitter.com/USER/with_replies>`__ timeline + search
|
||||
* ``"auto"``: ``"tweets"`` or ``"media"``, depending on `retweets <extractor.twitter.retweets_>`__, `replies <extractor.twitter.replies_>`__, and `text-tweets <extractor.twitter.text-tweets_>`__ settings
|
||||
* ``"auto"``: ``"tweets"`` or ``"media"``, depending on `retweets <extractor.twitter.retweets_>`__ and `text-tweets <extractor.twitter.text-tweets_>`__ settings
|
||||
|
||||
|
||||
extractor.twitter.text-tweets
|
||||
|
@ -176,16 +176,15 @@
|
||||
|
||||
"imgur":
|
||||
{
|
||||
"#": "use different directory and filename formats when coming from a reddit post",
|
||||
"directory":
|
||||
{
|
||||
"'_reddit' in locals()": []
|
||||
},
|
||||
"filename":
|
||||
{
|
||||
"'_reddit' in locals()": "{_reddit[id]} {id}.{extension}",
|
||||
"" : "{id}.{extension}"
|
||||
}
|
||||
"#": "general imgur settings",
|
||||
"filename": "{id}.{extension}"
|
||||
},
|
||||
|
||||
"reddit>imgur":
|
||||
{
|
||||
"#": "special settings for imgur URLs found in reddit posts",
|
||||
"directory": [],
|
||||
"filename": "{_reddit[id]} {_reddit[title]} {id}.{extension}"
|
||||
},
|
||||
|
||||
"tumblr":
|
||||
|
@ -13,6 +13,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody valign="top">
|
||||
<tr>
|
||||
<td>2ch</td>
|
||||
<td>https://2ch.hk/</td>
|
||||
<td>Boards, Threads</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>2chen</td>
|
||||
<td>https://sturdychan.help/</td>
|
||||
@ -97,6 +103,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Albums, Artwork Listings, Challenges, Followed Users, individual Images, Likes, Search Results, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>BATO.TO</td>
|
||||
<td>https://bato.to/</td>
|
||||
<td>Chapters, Manga</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>BBC</td>
|
||||
<td>https://bbc.co.uk/</td>
|
||||
@ -148,7 +160,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<tr>
|
||||
<td>DeviantArt</td>
|
||||
<td>https://www.deviantart.com/</td>
|
||||
<td>Collections, Deviations, Favorites, Folders, Followed Users, Galleries, Gallery Searches, Journals, Popular Images, Scraps, Search Results, Sta.sh, Status Updates, Tag Searches, User Profiles, Watches</td>
|
||||
<td>Avatars, Backgrounds, Collections, Deviations, Favorites, Folders, Followed Users, Galleries, Gallery Searches, Journals, Popular Images, Scraps, Search Results, Sta.sh, Status Updates, Tag Searches, User Profiles, Watches</td>
|
||||
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -253,6 +265,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Folders</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>HatenaBlog</td>
|
||||
<td>https://hatenablog.com</td>
|
||||
<td>Archive, Individual Posts, Home Feed, Search Results</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>HBrowse</td>
|
||||
<td>https://www.hbrowse.com/</td>
|
||||
@ -447,7 +465,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Komikcast</td>
|
||||
<td>https://komikcast.site/</td>
|
||||
<td>https://komikcast.lol/</td>
|
||||
<td>Chapters, Manga</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
@ -496,7 +514,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<tr>
|
||||
<td>MangaDex</td>
|
||||
<td>https://mangadex.org/</td>
|
||||
<td>Chapters, Followed Feed, Manga</td>
|
||||
<td>Chapters, Followed Feed, Lists, Manga</td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -589,12 +607,6 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Albums</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Nudecollect</td>
|
||||
<td>https://nudecollect.com/</td>
|
||||
<td>Albums, individual Images</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Patreon</td>
|
||||
<td>https://www.patreon.com/</td>
|
||||
@ -679,6 +691,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Posts, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Poringa</td>
|
||||
<td>http://www.poringa.net/</td>
|
||||
<td>Posts Images, Search Results, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Porn Image</td>
|
||||
<td>https://porn-images-xxx.com/</td>
|
||||
@ -805,6 +823,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Presentations</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>SteamGridDB</td>
|
||||
<td>https://www.steamgriddb.com</td>
|
||||
<td>Individual Assets, Grids, Heroes, Icons, Logos</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>SubscribeStar</td>
|
||||
<td>https://www.subscribestar.com/</td>
|
||||
@ -997,6 +1021,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>individual Images, Tag Searches</td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Zzup</td>
|
||||
<td>https://zzup.com/</td>
|
||||
<td>Galleries</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>かべうち</td>
|
||||
<td>https://kabe-uchiroom.com/</td>
|
||||
@ -1031,7 +1061,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
</tr>
|
||||
<tr>
|
||||
<td>JPG Fish</td>
|
||||
<td>https://jpg2.su/</td>
|
||||
<td>https://jpg4.su/</td>
|
||||
<td>Albums, individual Images, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
@ -1403,6 +1433,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Posts, Tag Searches</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Snootbooru</td>
|
||||
<td>https://snootbooru.com/</td>
|
||||
<td>Posts, Tag Searches</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td colspan="4"><strong>URL Shorteners</strong></td>
|
||||
@ -1442,6 +1478,64 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td></td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td colspan="4"><strong>Wikimedia Instances</strong></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Wikipedia</td>
|
||||
<td>https://www.wikipedia.org/</td>
|
||||
<td>Articles, Categories</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Wiktionary</td>
|
||||
<td>https://www.wiktionary.org/</td>
|
||||
<td>Articles, Categories</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Wikiquote</td>
|
||||
<td>https://www.wikiquote.org/</td>
|
||||
<td>Articles, Categories</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Wikibooks</td>
|
||||
<td>https://www.wikibooks.org/</td>
|
||||
<td>Articles, Categories</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Wikisource</td>
|
||||
<td>https://www.wikisource.org/</td>
|
||||
<td>Articles, Categories</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Wikinews</td>
|
||||
<td>https://www.wikinews.org/</td>
|
||||
<td>Articles, Categories</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Wikiversity</td>
|
||||
<td>https://www.wikiversity.org/</td>
|
||||
<td>Articles, Categories</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Wikispecies</td>
|
||||
<td>https://species.wikimedia.org/</td>
|
||||
<td>Articles, Categories</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Wikimedia Commons</td>
|
||||
<td>https://commons.wikimedia.org/</td>
|
||||
<td>Articles, Categories</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td colspan="4"><strong>Moebooru and MyImouto</strong></td>
|
||||
</tr>
|
||||
|
@ -45,7 +45,7 @@ def main():
|
||||
elif filename.startswith("\\f"):
|
||||
filename = "\f" + filename[2:]
|
||||
config.set((), "filename", filename)
|
||||
if args.directory:
|
||||
if args.directory is not None:
|
||||
config.set((), "base-directory", args.directory)
|
||||
config.set((), "directory", ())
|
||||
if args.postprocessors:
|
||||
|
@ -215,9 +215,11 @@ def _firefox_cookies_database(profile=None, container=None):
|
||||
|
||||
def _firefox_browser_directory():
|
||||
if sys.platform in ("win32", "cygwin"):
|
||||
return os.path.expandvars(r"%APPDATA%\Mozilla\Firefox\Profiles")
|
||||
return os.path.expandvars(
|
||||
r"%APPDATA%\Mozilla\Firefox\Profiles")
|
||||
if sys.platform == "darwin":
|
||||
return os.path.expanduser("~/Library/Application Support/Firefox")
|
||||
return os.path.expanduser(
|
||||
"~/Library/Application Support/Firefox/Profiles")
|
||||
return os.path.expanduser("~/.mozilla/firefox")
|
||||
|
||||
|
||||
|
91
gallery_dl/extractor/2ch.py
Normal file
91
gallery_dl/extractor/2ch.py
Normal file
@ -0,0 +1,91 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://2ch.hk/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util
|
||||
|
||||
|
||||
class _2chThreadExtractor(Extractor):
|
||||
"""Extractor for 2ch threads"""
|
||||
category = "2ch"
|
||||
subcategory = "thread"
|
||||
root = "https://2ch.hk"
|
||||
directory_fmt = ("{category}", "{board}", "{thread} {title}")
|
||||
filename_fmt = "{tim}{filename:? //}.{extension}"
|
||||
archive_fmt = "{board}_{thread}_{tim}"
|
||||
pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/res/(\d+)"
|
||||
example = "https://2ch.hk/a/res/12345.html"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.board, self.thread = match.groups()
|
||||
|
||||
def items(self):
|
||||
url = "{}/{}/res/{}.json".format(self.root, self.board, self.thread)
|
||||
posts = self.request(url).json()["threads"][0]["posts"]
|
||||
|
||||
op = posts[0]
|
||||
title = op.get("subject") or text.remove_html(op["comment"])
|
||||
|
||||
thread = {
|
||||
"board" : self.board,
|
||||
"thread": self.thread,
|
||||
"title" : text.unescape(title)[:50],
|
||||
}
|
||||
|
||||
yield Message.Directory, thread
|
||||
for post in posts:
|
||||
files = post.get("files")
|
||||
if files:
|
||||
post["post_name"] = post["name"]
|
||||
post["date"] = text.parse_timestamp(post["timestamp"])
|
||||
del post["files"]
|
||||
del post["name"]
|
||||
|
||||
for file in files:
|
||||
file.update(thread)
|
||||
file.update(post)
|
||||
|
||||
file["filename"] = file["fullname"].rpartition(".")[0]
|
||||
file["tim"], _, file["extension"] = \
|
||||
file["name"].rpartition(".")
|
||||
|
||||
yield Message.Url, self.root + file["path"], file
|
||||
|
||||
|
||||
class _2chBoardExtractor(Extractor):
|
||||
"""Extractor for 2ch boards"""
|
||||
category = "2ch"
|
||||
subcategory = "board"
|
||||
root = "https://2ch.hk"
|
||||
pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/?$"
|
||||
example = "https://2ch.hk/a/"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.board = match.group(1)
|
||||
|
||||
def items(self):
|
||||
# index page
|
||||
url = "{}/{}/index.json".format(self.root, self.board)
|
||||
index = self.request(url).json()
|
||||
index["_extractor"] = _2chThreadExtractor
|
||||
for thread in index["threads"]:
|
||||
url = "{}/{}/res/{}.html".format(
|
||||
self.root, self.board, thread["thread_num"])
|
||||
yield Message.Queue, url, index
|
||||
|
||||
# pages 1..n
|
||||
for n in util.advance(index["pages"], 1):
|
||||
url = "{}/{}/{}.json".format(self.root, self.board, n)
|
||||
page = self.request(url).json()
|
||||
page["_extractor"] = _2chThreadExtractor
|
||||
for thread in page["threads"]:
|
||||
url = "{}/{}/res/{}.html".format(
|
||||
self.root, self.board, thread["thread_num"])
|
||||
yield Message.Queue, url, page
|
@ -10,6 +10,7 @@ import sys
|
||||
import re
|
||||
|
||||
modules = [
|
||||
"2ch",
|
||||
"2chan",
|
||||
"2chen",
|
||||
"35photo",
|
||||
@ -24,6 +25,7 @@ modules = [
|
||||
"architizer",
|
||||
"artstation",
|
||||
"aryion",
|
||||
"batoto",
|
||||
"bbc",
|
||||
"behance",
|
||||
"blogger",
|
||||
@ -52,6 +54,7 @@ modules = [
|
||||
"gelbooru_v01",
|
||||
"gelbooru_v02",
|
||||
"gofile",
|
||||
"hatenablog",
|
||||
"hbrowse",
|
||||
"hentai2read",
|
||||
"hentaicosplays",
|
||||
@ -107,7 +110,6 @@ modules = [
|
||||
"nitter",
|
||||
"nozomi",
|
||||
"nsfwalbum",
|
||||
"nudecollect",
|
||||
"paheal",
|
||||
"patreon",
|
||||
"philomena",
|
||||
@ -122,6 +124,7 @@ modules = [
|
||||
"pixnet",
|
||||
"plurk",
|
||||
"poipiku",
|
||||
"poringa",
|
||||
"pornhub",
|
||||
"pornpics",
|
||||
"postmill",
|
||||
@ -144,6 +147,7 @@ modules = [
|
||||
"smugmug",
|
||||
"soundgasm",
|
||||
"speakerdeck",
|
||||
"steamgriddb",
|
||||
"subscribestar",
|
||||
"szurubooru",
|
||||
"tapas",
|
||||
@ -174,9 +178,11 @@ modules = [
|
||||
"weibo",
|
||||
"wikiart",
|
||||
"wikifeet",
|
||||
"wikimedia",
|
||||
"xhamster",
|
||||
"xvideos",
|
||||
"zerochan",
|
||||
"zzup",
|
||||
"booru",
|
||||
"moebooru",
|
||||
"foolfuuka",
|
||||
|
123
gallery_dl/extractor/batoto.py
Normal file
123
gallery_dl/extractor/batoto.py
Normal file
@ -0,0 +1,123 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://bato.to/"""
|
||||
|
||||
from .common import Extractor, ChapterExtractor, MangaExtractor
|
||||
from .. import text, exception
|
||||
import re
|
||||
|
||||
BASE_PATTERN = (r"(?:https?://)?(?:"
|
||||
r"(?:ba|d|h|m|w)to\.to|"
|
||||
r"(?:(?:manga|read)toto|batocomic|[xz]bato)\.(?:com|net|org)|"
|
||||
r"comiko\.(?:net|org)|"
|
||||
r"bat(?:otoo|o?two)\.com)")
|
||||
|
||||
|
||||
class BatotoBase():
|
||||
"""Base class for batoto extractors"""
|
||||
category = "batoto"
|
||||
root = "https://bato.to"
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
kwargs["encoding"] = "utf-8"
|
||||
return Extractor.request(self, url, **kwargs)
|
||||
|
||||
|
||||
class BatotoChapterExtractor(BatotoBase, ChapterExtractor):
|
||||
"""Extractor for bato.to manga chapters"""
|
||||
pattern = BASE_PATTERN + r"/(?:title/[^/?#]+|chapter)/(\d+)"
|
||||
example = "https://bato.to/title/12345-MANGA/54321"
|
||||
|
||||
def __init__(self, match):
|
||||
self.root = text.root_from_url(match.group(0))
|
||||
self.chapter_id = match.group(1)
|
||||
url = "{}/title/0/{}".format(self.root, self.chapter_id)
|
||||
ChapterExtractor.__init__(self, match, url)
|
||||
|
||||
def metadata(self, page):
|
||||
extr = text.extract_from(page)
|
||||
manga, info, _ = extr("<title>", "<").rsplit(" - ", 3)
|
||||
manga_id = text.extr(
|
||||
extr('rel="canonical" href="', '"'), "/title/", "/")
|
||||
|
||||
match = re.match(
|
||||
r"(?:Volume\s+(\d+) )?"
|
||||
r"\w+\s+(\d+)(.*)", info)
|
||||
if match:
|
||||
volume, chapter, minor = match.groups()
|
||||
title = text.remove_html(extr(
|
||||
"selected>", "</option")).partition(" : ")[2]
|
||||
else:
|
||||
volume = chapter = 0
|
||||
minor = ""
|
||||
title = info
|
||||
|
||||
return {
|
||||
"manga" : text.unescape(manga),
|
||||
"manga_id" : text.parse_int(manga_id),
|
||||
"title" : text.unescape(title),
|
||||
"volume" : text.parse_int(volume),
|
||||
"chapter" : text.parse_int(chapter),
|
||||
"chapter_minor": minor,
|
||||
"chapter_id" : text.parse_int(self.chapter_id),
|
||||
"date" : text.parse_timestamp(extr(' time="', '"')[:-3]),
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
images_container = text.extr(page, 'pageOpts', ':[0,0]}"')
|
||||
images_container = text.unescape(images_container)
|
||||
return [
|
||||
(url, None)
|
||||
for url in text.extract_iter(images_container, r"\"", r"\"")
|
||||
]
|
||||
|
||||
|
||||
class BatotoMangaExtractor(BatotoBase, MangaExtractor):
|
||||
"""Extractor for bato.to manga"""
|
||||
reverse = False
|
||||
chapterclass = BatotoChapterExtractor
|
||||
pattern = (BASE_PATTERN +
|
||||
r"/(?:title/(\d+)[^/?#]*|series/(\d+)(?:/[^/?#]*)?)/?$")
|
||||
example = "https://bato.to/title/12345-MANGA/"
|
||||
|
||||
def __init__(self, match):
|
||||
self.root = text.root_from_url(match.group(0))
|
||||
self.manga_id = match.group(1) or match.group(2)
|
||||
url = "{}/title/{}".format(self.root, self.manga_id)
|
||||
MangaExtractor.__init__(self, match, url)
|
||||
|
||||
def chapters(self, page):
|
||||
extr = text.extract_from(page)
|
||||
|
||||
warning = extr(' class="alert alert-warning">', "</div><")
|
||||
if warning:
|
||||
raise exception.StopExtraction("'%s'", text.remove_html(warning))
|
||||
|
||||
data = {
|
||||
"manga_id": text.parse_int(self.manga_id),
|
||||
"manga" : text.unescape(extr(
|
||||
"<title>", "<").rpartition(" - ")[0]),
|
||||
}
|
||||
|
||||
extr('<div data-hk="0-0-0-0"', "")
|
||||
results = []
|
||||
while True:
|
||||
href = extr('<a href="/title/', '"')
|
||||
if not href:
|
||||
break
|
||||
|
||||
chapter = href.rpartition("-ch_")[2]
|
||||
chapter, sep, minor = chapter.partition(".")
|
||||
|
||||
data["chapter"] = text.parse_int(chapter)
|
||||
data["chapter_minor"] = sep + minor
|
||||
data["date"] = text.parse_datetime(
|
||||
extr('time="', '"'), "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
|
||||
url = "{}/title/{}".format(self.root, href)
|
||||
results.append((url, data.copy()))
|
||||
return results
|
@ -35,7 +35,7 @@ class CheveretoExtractor(BaseExtractor):
|
||||
|
||||
BASE_PATTERN = CheveretoExtractor.update({
|
||||
"jpgfish": {
|
||||
"root": "https://jpg2.su",
|
||||
"root": "https://jpg4.su",
|
||||
"pattern": r"jpe?g\d?\.(?:su|pet|fish(?:ing)?|church)",
|
||||
},
|
||||
"pixl": {
|
||||
|
@ -526,12 +526,15 @@ class Extractor():
|
||||
if include == "all":
|
||||
include = extractors
|
||||
elif isinstance(include, str):
|
||||
include = include.split(",")
|
||||
include = include.replace(" ", "").split(",")
|
||||
|
||||
result = [(Message.Version, 1)]
|
||||
for category in include:
|
||||
if category in extractors:
|
||||
try:
|
||||
extr, url = extractors[category]
|
||||
except KeyError:
|
||||
self.log.warning("Invalid include '%s'", category)
|
||||
else:
|
||||
result.append((Message.Queue, url, {"_extractor": extr}))
|
||||
return iter(result)
|
||||
|
||||
|
@ -38,7 +38,7 @@ class DeviantartExtractor(Extractor):
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.user = match.group(1) or match.group(2)
|
||||
self.user = (match.group(1) or match.group(2) or "").lower()
|
||||
self.offset = 0
|
||||
|
||||
def _init(self):
|
||||
@ -104,7 +104,6 @@ class DeviantartExtractor(Extractor):
|
||||
raise exception.StopExtraction()
|
||||
else:
|
||||
self.subcategory = "group-" + self.subcategory
|
||||
self.user = self.user.lower()
|
||||
self.group = True
|
||||
|
||||
for deviation in self.deviations():
|
||||
@ -453,9 +452,11 @@ class DeviantartExtractor(Extractor):
|
||||
return None
|
||||
|
||||
dev = self.api.deviation(deviation["deviationid"], False)
|
||||
folder = dev["premium_folder_data"]
|
||||
folder = deviation["premium_folder_data"]
|
||||
username = dev["author"]["username"]
|
||||
has_access = folder["has_access"]
|
||||
|
||||
# premium_folder_data is no longer present when user has access (#5063)
|
||||
has_access = ("premium_folder_data" not in dev) or folder["has_access"]
|
||||
|
||||
if not has_access and folder["type"] == "watchers" and \
|
||||
self.config("auto-watch"):
|
||||
@ -513,11 +514,13 @@ class DeviantartUserExtractor(DeviantartExtractor):
|
||||
def items(self):
|
||||
base = "{}/{}/".format(self.root, self.user)
|
||||
return self._dispatch_extractors((
|
||||
(DeviantartGalleryExtractor , base + "gallery"),
|
||||
(DeviantartScrapsExtractor , base + "gallery/scraps"),
|
||||
(DeviantartJournalExtractor , base + "posts"),
|
||||
(DeviantartStatusExtractor , base + "posts/statuses"),
|
||||
(DeviantartFavoriteExtractor, base + "favourites"),
|
||||
(DeviantartAvatarExtractor , base + "avatar"),
|
||||
(DeviantartBackgroundExtractor, base + "banner"),
|
||||
(DeviantartGalleryExtractor , base + "gallery"),
|
||||
(DeviantartScrapsExtractor , base + "gallery/scraps"),
|
||||
(DeviantartJournalExtractor , base + "posts"),
|
||||
(DeviantartStatusExtractor , base + "posts/statuses"),
|
||||
(DeviantartFavoriteExtractor , base + "favourites"),
|
||||
), ("gallery",))
|
||||
|
||||
|
||||
@ -538,6 +541,70 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
|
||||
return self._folder_urls(folders, "gallery", DeviantartFolderExtractor)
|
||||
|
||||
|
||||
class DeviantartAvatarExtractor(DeviantartExtractor):
|
||||
"""Extractor for an artist's avatar"""
|
||||
subcategory = "avatar"
|
||||
archive_fmt = "a_{_username}_{index}"
|
||||
pattern = BASE_PATTERN + r"/avatar"
|
||||
example = "https://www.deviantart.com/USER/avatar/"
|
||||
|
||||
def deviations(self):
|
||||
name = self.user.lower()
|
||||
profile = self.api.user_profile(name)
|
||||
if not profile:
|
||||
return ()
|
||||
|
||||
user = profile["user"]
|
||||
icon = user["usericon"]
|
||||
index = icon.rpartition("?")[2]
|
||||
|
||||
formats = self.config("formats")
|
||||
if not formats:
|
||||
url = icon.replace("/avatars/", "/avatars-big/", 1)
|
||||
return (self._make_deviation(url, user, index, ""),)
|
||||
|
||||
if isinstance(formats, str):
|
||||
formats = formats.replace(" ", "").split(",")
|
||||
|
||||
results = []
|
||||
for fmt in formats:
|
||||
fmt, _, ext = fmt.rpartition(".")
|
||||
if fmt:
|
||||
fmt = "-" + fmt
|
||||
url = "https://a.deviantart.net/avatars{}/{}/{}/{}.{}?{}".format(
|
||||
fmt, name[0], name[1], name, ext, index)
|
||||
results.append(self._make_deviation(url, user, index, fmt))
|
||||
return results
|
||||
|
||||
def _make_deviation(self, url, user, index, fmt):
|
||||
return {
|
||||
"author" : user,
|
||||
"category" : "avatar",
|
||||
"index" : text.parse_int(index),
|
||||
"is_deleted" : False,
|
||||
"is_downloadable": False,
|
||||
"published_time" : 0,
|
||||
"title" : "avatar" + fmt,
|
||||
"stats" : {"comments": 0},
|
||||
"content" : {"src": url},
|
||||
}
|
||||
|
||||
|
||||
class DeviantartBackgroundExtractor(DeviantartExtractor):
|
||||
"""Extractor for an artist's banner"""
|
||||
subcategory = "background"
|
||||
archive_fmt = "b_{index}"
|
||||
pattern = BASE_PATTERN + r"/ba(?:nner|ckground)"
|
||||
example = "https://www.deviantart.com/USER/banner/"
|
||||
|
||||
def deviations(self):
|
||||
try:
|
||||
return (self.api.user_profile(self.user.lower())
|
||||
["cover_deviation"]["cover_deviation"],)
|
||||
except Exception:
|
||||
return ()
|
||||
|
||||
|
||||
class DeviantartFolderExtractor(DeviantartExtractor):
|
||||
"""Extractor for deviations inside an artist's gallery folder"""
|
||||
subcategory = "folder"
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
from ..cache import memcache
|
||||
import re
|
||||
|
||||
BASE_PATTERN = (
|
||||
@ -27,8 +28,20 @@ class FanboxExtractor(Extractor):
|
||||
_warning = True
|
||||
|
||||
def _init(self):
|
||||
self.headers = {"Origin": self.root}
|
||||
self.embeds = self.config("embeds", True)
|
||||
|
||||
includes = self.config("metadata")
|
||||
if includes:
|
||||
if isinstance(includes, str):
|
||||
includes = includes.split(",")
|
||||
elif not isinstance(includes, (list, tuple)):
|
||||
includes = ("user", "plan")
|
||||
self._meta_user = ("user" in includes)
|
||||
self._meta_plan = ("plan" in includes)
|
||||
else:
|
||||
self._meta_user = self._meta_plan = False
|
||||
|
||||
if self._warning:
|
||||
if not self.cookies_check(("FANBOXSESSID",)):
|
||||
self.log.warning("no 'FANBOXSESSID' cookie set")
|
||||
@ -43,11 +56,9 @@ class FanboxExtractor(Extractor):
|
||||
"""Return all relevant post objects"""
|
||||
|
||||
def _pagination(self, url):
|
||||
headers = {"Origin": self.root}
|
||||
|
||||
while url:
|
||||
url = text.ensure_http_scheme(url)
|
||||
body = self.request(url, headers=headers).json()["body"]
|
||||
body = self.request(url, headers=self.headers).json()["body"]
|
||||
for item in body["items"]:
|
||||
try:
|
||||
yield self._get_post_data(item["id"])
|
||||
@ -58,9 +69,8 @@ class FanboxExtractor(Extractor):
|
||||
|
||||
def _get_post_data(self, post_id):
|
||||
"""Fetch and process post data"""
|
||||
headers = {"Origin": self.root}
|
||||
url = "https://api.fanbox.cc/post.info?postId="+post_id
|
||||
post = self.request(url, headers=headers).json()["body"]
|
||||
post = self.request(url, headers=self.headers).json()["body"]
|
||||
|
||||
content_body = post.pop("body", None)
|
||||
if content_body:
|
||||
@ -98,8 +108,47 @@ class FanboxExtractor(Extractor):
|
||||
post["text"] = content_body.get("text") if content_body else None
|
||||
post["isCoverImage"] = False
|
||||
|
||||
if self._meta_user:
|
||||
post["user"] = self._get_user_data(post["creatorId"])
|
||||
if self._meta_plan:
|
||||
plans = self._get_plan_data(post["creatorId"])
|
||||
post["plan"] = plans[post["feeRequired"]]
|
||||
|
||||
return content_body, post
|
||||
|
||||
@memcache(keyarg=1)
|
||||
def _get_user_data(self, creator_id):
|
||||
url = "https://api.fanbox.cc/creator.get"
|
||||
params = {"creatorId": creator_id}
|
||||
data = self.request(url, params=params, headers=self.headers).json()
|
||||
|
||||
user = data["body"]
|
||||
user.update(user.pop("user"))
|
||||
|
||||
return user
|
||||
|
||||
@memcache(keyarg=1)
|
||||
def _get_plan_data(self, creator_id):
|
||||
url = "https://api.fanbox.cc/plan.listCreator"
|
||||
params = {"creatorId": creator_id}
|
||||
data = self.request(url, params=params, headers=self.headers).json()
|
||||
|
||||
plans = {0: {
|
||||
"id" : "",
|
||||
"title" : "",
|
||||
"fee" : 0,
|
||||
"description" : "",
|
||||
"coverImageUrl" : "",
|
||||
"creatorId" : creator_id,
|
||||
"hasAdultContent": None,
|
||||
"paymentMethod" : None,
|
||||
}}
|
||||
for plan in data["body"]:
|
||||
del plan["user"]
|
||||
plans[plan["fee"]] = plan
|
||||
|
||||
return plans
|
||||
|
||||
def _get_urls_from_post(self, content_body, post):
|
||||
num = 0
|
||||
cover_image = post.get("coverImageUrl")
|
||||
|
@ -22,7 +22,7 @@ class FuskatorGalleryExtractor(GalleryExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
self.gallery_hash = match.group(1)
|
||||
url = "{}/thumbs/{}/".format(self.root, self.gallery_hash)
|
||||
url = "{}/thumbs/{}/index.html".format(self.root, self.gallery_hash)
|
||||
GalleryExtractor.__init__(self, match, url)
|
||||
|
||||
def metadata(self, page):
|
||||
@ -50,15 +50,16 @@ class FuskatorGalleryExtractor(GalleryExtractor):
|
||||
"gallery_id" : text.parse_int(gallery_id),
|
||||
"gallery_hash": self.gallery_hash,
|
||||
"title" : text.unescape(title[:-15]),
|
||||
"views" : data["hits"],
|
||||
"score" : data["rating"],
|
||||
"tags" : data["tags"].split(","),
|
||||
"count" : len(data["images"]),
|
||||
"views" : data.get("hits"),
|
||||
"score" : data.get("rating"),
|
||||
"tags" : (data.get("tags") or "").split(","),
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
for image in self.data["images"]:
|
||||
yield "https:" + image["imageUrl"], image
|
||||
return [
|
||||
("https:" + image["imageUrl"], image)
|
||||
for image in self.data["images"]
|
||||
]
|
||||
|
||||
|
||||
class FuskatorSearchExtractor(Extractor):
|
||||
|
@ -23,7 +23,7 @@ class GelbooruBase():
|
||||
root = "https://gelbooru.com"
|
||||
offset = 0
|
||||
|
||||
def _api_request(self, params, key="post"):
|
||||
def _api_request(self, params, key="post", log=False):
|
||||
if "s" not in params:
|
||||
params["s"] = "post"
|
||||
params["api_key"] = self.api_key
|
||||
@ -32,10 +32,14 @@ class GelbooruBase():
|
||||
url = self.root + "/index.php?page=dapi&q=index&json=1"
|
||||
data = self.request(url, params=params).json()
|
||||
|
||||
if key not in data:
|
||||
return ()
|
||||
try:
|
||||
posts = data[key]
|
||||
except KeyError:
|
||||
if log:
|
||||
self.log.error("Incomplete API response (missing '%s')", key)
|
||||
self.log.debug("%s", data)
|
||||
return []
|
||||
|
||||
posts = data[key]
|
||||
if not isinstance(posts, list):
|
||||
return (posts,)
|
||||
return posts
|
||||
@ -165,15 +169,16 @@ class GelbooruFavoriteExtractor(GelbooruBase,
|
||||
"id" : self.favorite_id,
|
||||
"limit": "1",
|
||||
}
|
||||
count = self._api_request(params, "@attributes")[0]["count"]
|
||||
|
||||
count = self._api_request(params, "@attributes", True)[0]["count"]
|
||||
if count <= self.offset:
|
||||
return
|
||||
pnum, last = divmod(count + 1, self.per_page)
|
||||
|
||||
if self.offset >= last:
|
||||
pnum, last = divmod(count-1, self.per_page)
|
||||
if self.offset > last:
|
||||
# page number change
|
||||
self.offset -= last
|
||||
diff, self.offset = divmod(self.offset, self.per_page)
|
||||
diff, self.offset = divmod(self.offset-1, self.per_page)
|
||||
pnum -= diff + 1
|
||||
skip = self.offset
|
||||
|
||||
@ -182,9 +187,9 @@ class GelbooruFavoriteExtractor(GelbooruBase,
|
||||
params["limit"] = self.per_page
|
||||
|
||||
while True:
|
||||
favs = self._api_request(params, "favorite")
|
||||
|
||||
favs = self._api_request(params, "favorite", True)
|
||||
favs.reverse()
|
||||
|
||||
if skip:
|
||||
favs = favs[skip:]
|
||||
skip = 0
|
||||
|
@ -168,7 +168,7 @@ INSTANCES = {
|
||||
},
|
||||
"rule34": {
|
||||
"root": "https://rule34.xxx",
|
||||
"pattern": r"rule34\.xxx",
|
||||
"pattern": r"(?:www\.)?rule34\.xxx",
|
||||
"api_root": "https://api.rule34.xxx",
|
||||
},
|
||||
"safebooru": {
|
||||
|
167
gallery_dl/extractor/hatenablog.py
Normal file
167
gallery_dl/extractor/hatenablog.py
Normal file
@ -0,0 +1,167 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://hatenablog.com"""
|
||||
|
||||
import re
|
||||
from .common import Extractor, Message
|
||||
from .. import text
|
||||
|
||||
|
||||
BASE_PATTERN = (
|
||||
r"(?:hatenablog:https?://([^/?#]+)|(?:https?://)?"
|
||||
r"([\w-]+\.(?:hatenablog\.(?:com|jp)"
|
||||
r"|hatenadiary\.com|hateblo\.jp)))"
|
||||
)
|
||||
QUERY_RE = r"(?:\?([^#]*))?(?:#.*)?$"
|
||||
|
||||
|
||||
class HatenablogExtractor(Extractor):
|
||||
"""Base class for HatenaBlog extractors"""
|
||||
category = "hatenablog"
|
||||
directory_fmt = ("{category}", "{domain}")
|
||||
filename_fmt = "{category}_{domain}_{entry}_{num:>02}.{extension}"
|
||||
archive_fmt = "{filename}"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.domain = match.group(1) or match.group(2)
|
||||
|
||||
def _init(self):
|
||||
self._find_img = re.compile(r'<img +([^>]+)').finditer
|
||||
|
||||
def _handle_article(self, article: str):
|
||||
extr = text.extract_from(article)
|
||||
date = text.parse_datetime(extr('<time datetime="', '"'))
|
||||
entry_link = text.unescape(extr('<a href="', '"'))
|
||||
entry = entry_link.partition("/entry/")[2]
|
||||
title = text.unescape(extr('>', '<'))
|
||||
content = extr(
|
||||
'<div class="entry-content hatenablog-entry">', '</div>')
|
||||
|
||||
images = []
|
||||
for i in self._find_img(content):
|
||||
attributes = i.group(1)
|
||||
if 'class="hatena-fotolife"' not in attributes:
|
||||
continue
|
||||
image = text.unescape(text.extr(attributes, 'src="', '"'))
|
||||
images.append(image)
|
||||
|
||||
data = {
|
||||
"domain": self.domain,
|
||||
"date": date,
|
||||
"entry": entry,
|
||||
"title": title,
|
||||
"count": len(images),
|
||||
}
|
||||
yield Message.Directory, data
|
||||
for data["num"], url in enumerate(images, 1):
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
|
||||
|
||||
class HatenablogEntriesExtractor(HatenablogExtractor):
|
||||
"""Base class for a list of entries"""
|
||||
allowed_parameters = ()
|
||||
|
||||
def __init__(self, match):
|
||||
HatenablogExtractor.__init__(self, match)
|
||||
self.path = match.group(3)
|
||||
self.query = {key: value for key, value in text.parse_query(
|
||||
match.group(4)).items() if self._acceptable_query(key)}
|
||||
|
||||
def _init(self):
|
||||
HatenablogExtractor._init(self)
|
||||
self._find_pager_url = re.compile(
|
||||
r' class="pager-next">\s*<a href="([^"]+)').search
|
||||
|
||||
def items(self):
|
||||
url = "https://" + self.domain + self.path
|
||||
query = self.query
|
||||
|
||||
while url:
|
||||
page = self.request(url, params=query).text
|
||||
|
||||
extr = text.extract_from(page)
|
||||
attributes = extr('<body ', '>')
|
||||
if "page-archive" in attributes:
|
||||
yield from self._handle_partial_articles(extr)
|
||||
else:
|
||||
yield from self._handle_full_articles(extr)
|
||||
|
||||
match = self._find_pager_url(page)
|
||||
url = text.unescape(match.group(1)) if match else None
|
||||
query = None
|
||||
|
||||
def _handle_partial_articles(self, extr):
|
||||
while True:
|
||||
section = extr('<section class="archive-entry', '</section>')
|
||||
if not section:
|
||||
break
|
||||
|
||||
url = "hatenablog:" + text.unescape(text.extr(
|
||||
section, '<a class="entry-title-link" href="', '"'))
|
||||
data = {"_extractor": HatenablogEntryExtractor}
|
||||
yield Message.Queue, url, data
|
||||
|
||||
def _handle_full_articles(self, extr):
|
||||
while True:
|
||||
attributes = extr('<article ', '>')
|
||||
if not attributes:
|
||||
break
|
||||
if "no-entry" in attributes:
|
||||
continue
|
||||
|
||||
article = extr('', '</article>')
|
||||
yield from self._handle_article(article)
|
||||
|
||||
def _acceptable_query(self, key):
|
||||
return key == "page" or key in self.allowed_parameters
|
||||
|
||||
|
||||
class HatenablogEntryExtractor(HatenablogExtractor):
|
||||
"""Extractor for a single entry URL"""
|
||||
subcategory = "entry"
|
||||
pattern = BASE_PATTERN + r"/entry/([^?#]+)" + QUERY_RE
|
||||
example = "https://BLOG.hatenablog.com/entry/PATH"
|
||||
|
||||
def __init__(self, match):
|
||||
HatenablogExtractor.__init__(self, match)
|
||||
self.path = match.group(3)
|
||||
|
||||
def items(self):
|
||||
url = "https://" + self.domain + "/entry/" + self.path
|
||||
page = self.request(url).text
|
||||
|
||||
extr = text.extract_from(page)
|
||||
while True:
|
||||
attributes = extr('<article ', '>')
|
||||
if "no-entry" in attributes:
|
||||
continue
|
||||
article = extr('', '</article>')
|
||||
return self._handle_article(article)
|
||||
|
||||
|
||||
class HatenablogHomeExtractor(HatenablogEntriesExtractor):
|
||||
"""Extractor for a blog's home page"""
|
||||
subcategory = "home"
|
||||
pattern = BASE_PATTERN + r"(/?)" + QUERY_RE
|
||||
example = "https://BLOG.hatenablog.com"
|
||||
|
||||
|
||||
class HatenablogArchiveExtractor(HatenablogEntriesExtractor):
|
||||
"""Extractor for a blog's archive page"""
|
||||
subcategory = "archive"
|
||||
pattern = (BASE_PATTERN + r"(/archive(?:/\d+(?:/\d+(?:/\d+)?)?"
|
||||
r"|/category/[^?#]+)?)" + QUERY_RE)
|
||||
example = "https://BLOG.hatenablog.com/archive/2024"
|
||||
|
||||
|
||||
class HatenablogSearchExtractor(HatenablogEntriesExtractor):
|
||||
"""Extractor for a blog's search results"""
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"(/search)" + QUERY_RE
|
||||
example = "https://BLOG.hatenablog.com/search?q=QUERY"
|
||||
allowed_parameters = ("q",)
|
@ -34,8 +34,11 @@ class IdolcomplexExtractor(SankakuExtractor):
|
||||
self.start_post = 0
|
||||
|
||||
def _init(self):
|
||||
self.find_pids = re.compile(
|
||||
r" href=[\"#]/\w\w/posts/([0-9a-f]+)"
|
||||
).findall
|
||||
self.find_tags = re.compile(
|
||||
r'tag-type-([^"]+)">\s*<div [^>]+>\s*<a href="/\?tags=([^"]+)'
|
||||
r'tag-type-([^"]+)">\s*<a [^>]*?href="/[^?]*\?tags=([^"]+)'
|
||||
).findall
|
||||
|
||||
def items(self):
|
||||
@ -149,8 +152,8 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
|
||||
subcategory = "tag"
|
||||
directory_fmt = ("{category}", "{search_tags}")
|
||||
archive_fmt = "t_{search_tags}_{id}"
|
||||
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/\?([^#]*)"
|
||||
example = "https://idol.sankakucomplex.com/?tags=TAGS"
|
||||
pattern = BASE_PATTERN + r"/(?:posts/?)?\?([^#]*)"
|
||||
example = "https://idol.sankakucomplex.com/en/posts?tags=TAGS"
|
||||
per_page = 20
|
||||
|
||||
def __init__(self, match):
|
||||
@ -196,7 +199,8 @@ class IdolcomplexTagExtractor(IdolcomplexExtractor):
|
||||
page = self.request(self.root, params=params, retries=10).text
|
||||
pos = ((page.find('id="more-popular-posts-link"') + 1) or
|
||||
(page.find('<span class="thumb') + 1))
|
||||
yield from text.extract_iter(page, ' href="/posts/', '"', pos)
|
||||
|
||||
yield from self.find_pids(page, pos)
|
||||
|
||||
next_url = text.extract(page, 'next-page-url="', '"', pos)[0]
|
||||
if not next_url:
|
||||
@ -218,7 +222,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
|
||||
subcategory = "pool"
|
||||
directory_fmt = ("{category}", "pool", "{pool}")
|
||||
archive_fmt = "p_{pool}_{id}"
|
||||
pattern = r"(?:https?://)?idol\.sankakucomplex\.com/pools?/show/(\d+)"
|
||||
pattern = BASE_PATTERN + r"/pools?/show/(\d+)"
|
||||
example = "https://idol.sankakucomplex.com/pools/show/12345"
|
||||
per_page = 24
|
||||
|
||||
@ -242,8 +246,7 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
|
||||
while True:
|
||||
page = self.request(url, params=params, retries=10).text
|
||||
pos = page.find('id="pool-show"') + 1
|
||||
post_ids = list(text.extract_iter(
|
||||
page, ' href="/posts/', '"', pos))
|
||||
post_ids = self.find_pids(page, pos)
|
||||
|
||||
yield from post_ids
|
||||
if len(post_ids) < self.per_page:
|
||||
|
@ -9,9 +9,10 @@
|
||||
"""Extractors for https://kemono.party/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache, memcache
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.|beta\.)?(kemono|coomer)\.(party|su)"
|
||||
@ -37,10 +38,14 @@ class KemonopartyExtractor(Extractor):
|
||||
Extractor.__init__(self, match)
|
||||
|
||||
def _init(self):
|
||||
self.revisions = self.config("revisions")
|
||||
self._prepare_ddosguard_cookies()
|
||||
self._find_inline = re.compile(
|
||||
r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'
|
||||
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
|
||||
self._json_dumps = json.JSONEncoder(
|
||||
ensure_ascii=False, check_circular=False,
|
||||
sort_keys=True, separators=(",", ":")).encode
|
||||
|
||||
def items(self):
|
||||
find_hash = re.compile(HASH_PATTERN).match
|
||||
@ -223,11 +228,23 @@ class KemonopartyExtractor(Extractor):
|
||||
|
||||
idx = len(revs)
|
||||
for rev in revs:
|
||||
rev["revision_hash"] = self._revision_hash(rev)
|
||||
rev["revision_index"] = idx
|
||||
idx -= 1
|
||||
|
||||
return revs
|
||||
|
||||
def _revision_hash(self, revision):
|
||||
rev = revision.copy()
|
||||
rev.pop("revision_id", None)
|
||||
rev.pop("added", None)
|
||||
rev.pop("next", None)
|
||||
rev.pop("prev", None)
|
||||
rev["file"].pop("name", None)
|
||||
for a in rev["attachments"]:
|
||||
a.pop("name", None)
|
||||
return util.sha1(self._json_dumps(rev))
|
||||
|
||||
|
||||
def _validate(response):
|
||||
return (response.headers["content-length"] != "9" or
|
||||
@ -252,13 +269,13 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
|
||||
url = self.api_url
|
||||
params = text.parse_query(self.query)
|
||||
params["o"] = text.parse_int(params.get("o"))
|
||||
revisions = self.config("revisions")
|
||||
|
||||
while True:
|
||||
posts = self.request(url, params=params).json()
|
||||
|
||||
if revisions:
|
||||
if self.revisions:
|
||||
for post in posts:
|
||||
post["revision_hash"] = self._revision_hash(post)
|
||||
post["revision_id"] = 0
|
||||
post_url = "{}/post/{}".format(self.api_url, post["id"])
|
||||
try:
|
||||
@ -296,7 +313,8 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
|
||||
def posts(self):
|
||||
if not self.revision:
|
||||
post = self.request(self.api_url).json()
|
||||
if self.config("revisions"):
|
||||
if self.revisions:
|
||||
post["revision_hash"] = self._revision_hash(post)
|
||||
post["revision_id"] = 0
|
||||
try:
|
||||
revs = self._post_revisions(self.api_url)
|
||||
|
@ -6,19 +6,19 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://komikcast.site/"""
|
||||
"""Extractors for https://komikcast.lol/"""
|
||||
|
||||
from .common import ChapterExtractor, MangaExtractor
|
||||
from .. import text
|
||||
import re
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?komikcast\.(?:site|me|com)"
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?komikcast\.(?:lol|site|me|com)"
|
||||
|
||||
|
||||
class KomikcastBase():
|
||||
"""Base class for komikcast extractors"""
|
||||
category = "komikcast"
|
||||
root = "https://komikcast.site"
|
||||
root = "https://komikcast.lol"
|
||||
|
||||
@staticmethod
|
||||
def parse_chapter_string(chapter_string, data=None):
|
||||
@ -46,9 +46,9 @@ class KomikcastBase():
|
||||
|
||||
|
||||
class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from komikcast.site"""
|
||||
"""Extractor for manga-chapters from komikcast.lol"""
|
||||
pattern = BASE_PATTERN + r"(/chapter/[^/?#]+/)"
|
||||
example = "https://komikcast.site/chapter/TITLE/"
|
||||
example = "https://komikcast.lol/chapter/TITLE/"
|
||||
|
||||
def metadata(self, page):
|
||||
info = text.extr(page, "<title>", " - Komikcast<")
|
||||
@ -65,10 +65,10 @@ class KomikcastChapterExtractor(KomikcastBase, ChapterExtractor):
|
||||
|
||||
|
||||
class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
|
||||
"""Extractor for manga from komikcast.site"""
|
||||
"""Extractor for manga from komikcast.lol"""
|
||||
chapterclass = KomikcastChapterExtractor
|
||||
pattern = BASE_PATTERN + r"(/(?:komik/)?[^/?#]+)/?$"
|
||||
example = "https://komikcast.site/komik/TITLE"
|
||||
example = "https://komikcast.lol/komik/TITLE"
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
@ -76,8 +76,10 @@ class KomikcastMangaExtractor(KomikcastBase, MangaExtractor):
|
||||
|
||||
for item in text.extract_iter(
|
||||
page, '<a class="chapter-link-item" href="', '</a'):
|
||||
url, _, chapter_string = item.rpartition('">Chapter ')
|
||||
self.parse_chapter_string(chapter_string, data)
|
||||
url, _, chapter = item.rpartition('">Chapter')
|
||||
chapter, sep, minor = chapter.strip().partition(".")
|
||||
data["chapter"] = text.parse_int(chapter)
|
||||
data["chapter_minor"] = sep + minor
|
||||
results.append((url, data.copy()))
|
||||
return results
|
||||
|
||||
|
@ -148,6 +148,31 @@ class MangadexFeedExtractor(MangadexExtractor):
|
||||
return self.api.user_follows_manga_feed()
|
||||
|
||||
|
||||
class MangadexListExtractor(MangadexExtractor):
|
||||
"""Extractor for mangadex lists"""
|
||||
subcategory = "list"
|
||||
pattern = (BASE_PATTERN +
|
||||
r"/list/([0-9a-f-]+)(?:/[^/?#]*)?(?:\?tab=(\w+))?")
|
||||
example = ("https://mangadex.org/list"
|
||||
"/01234567-89ab-cdef-0123-456789abcdef/NAME")
|
||||
|
||||
def __init__(self, match):
|
||||
MangadexExtractor.__init__(self, match)
|
||||
if match.group(2) != "feed":
|
||||
self.subcategory = "list-feed"
|
||||
self.items = self._items_titles
|
||||
|
||||
def chapters(self):
|
||||
return self.api.list_feed(self.uuid)
|
||||
|
||||
def _items_titles(self):
|
||||
data = {"_extractor": MangadexMangaExtractor}
|
||||
for item in self.api.list(self.uuid)["relationships"]:
|
||||
if item["type"] == "manga":
|
||||
url = "{}/title/{}".format(self.root, item["id"])
|
||||
yield Message.Queue, url, data
|
||||
|
||||
|
||||
class MangadexAPI():
|
||||
"""Interface for the MangaDex API v5
|
||||
|
||||
@ -173,6 +198,12 @@ class MangadexAPI():
|
||||
params = {"includes[]": ("scanlation_group",)}
|
||||
return self._call("/chapter/" + uuid, params)["data"]
|
||||
|
||||
def list(self, uuid):
|
||||
return self._call("/list/" + uuid)["data"]
|
||||
|
||||
def list_feed(self, uuid):
|
||||
return self._pagination("/list/" + uuid + "/feed")
|
||||
|
||||
@memcache(keyarg=1)
|
||||
def manga(self, uuid):
|
||||
params = {"includes[]": ("artist", "author")}
|
||||
|
@ -10,7 +10,11 @@ from .common import ChapterExtractor, MangaExtractor
|
||||
from .. import text
|
||||
import re
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?((?:chap|read|www\.|m\.)?mangan(?:at|el)o\.com)"
|
||||
BASE_PATTERN = (
|
||||
r"(?:https?://)?"
|
||||
r"((?:chap|read|www\.|m\.)?mangan(?:at|el)o"
|
||||
r"\.(?:to|com))"
|
||||
)
|
||||
|
||||
|
||||
class ManganeloBase():
|
||||
@ -67,10 +71,11 @@ class ManganeloChapterExtractor(ManganeloBase, ChapterExtractor):
|
||||
|
||||
def images(self, page):
|
||||
page = text.extr(
|
||||
page, 'class="container-chapter-reader', '\n<div')
|
||||
page, 'class="container-chapter-reader', 'class="container')
|
||||
return [
|
||||
(url, None)
|
||||
for url in text.extract_iter(page, '<img src="', '"')
|
||||
if not url.endswith("/gohome.png")
|
||||
] or [
|
||||
(url, None)
|
||||
for url in text.extract_iter(
|
||||
|
@ -55,9 +55,12 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
|
||||
else:
|
||||
data["user_id"] = data["artist_id"]
|
||||
data["user_name"] = data["artist_name"]
|
||||
yield Message.Directory, data
|
||||
|
||||
for num, url in enumerate(self._extract_images(image_id, page)):
|
||||
urls = list(self._extract_images(image_id, page))
|
||||
data["count"] = len(urls)
|
||||
|
||||
yield Message.Directory, data
|
||||
for num, url in enumerate(urls):
|
||||
image = text.nameext_from_url(url, {
|
||||
"num": num,
|
||||
"url": "https:" + url,
|
||||
@ -113,7 +116,8 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
|
||||
yield from text.extract_iter(
|
||||
page, 'href="javascript:void(0);"><img src="', '"')
|
||||
else:
|
||||
yield text.extr(page, 'itemprop="image" src="', '"')
|
||||
pos = page.find('id="view-center"') + 1
|
||||
yield text.extract(page, 'itemprop="image" src="', '"', pos)[0]
|
||||
|
||||
@staticmethod
|
||||
def _extract_user_name(page):
|
||||
|
@ -1,87 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://nudecollect.com/"""
|
||||
|
||||
from .common import GalleryExtractor
|
||||
from .. import text
|
||||
|
||||
|
||||
class NudecollectExtractor(GalleryExtractor):
|
||||
"""Base class for Nudecollect extractors"""
|
||||
category = "nudecollect"
|
||||
directory_fmt = ("{category}", "{title}")
|
||||
filename_fmt = "{slug}_{num:>03}.{extension}"
|
||||
archive_fmt = "{slug}_{num}"
|
||||
root = "https://www.nudecollect.com"
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
kwargs["allow_redirects"] = False
|
||||
return GalleryExtractor.request(self, url, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def get_title(page):
|
||||
return text.unescape(text.extr(page, "<title>", "</title>"))[31:]
|
||||
|
||||
@staticmethod
|
||||
def get_image(page):
|
||||
return text.extr(page, '<img src="', '"')
|
||||
|
||||
|
||||
class NudecollectImageExtractor(NudecollectExtractor):
|
||||
"""Extractor for individual images from nudecollect.com"""
|
||||
subcategory = "image"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?nudecollect\.com"
|
||||
r"(/content/([^/?#]+)/image-(\d+)-pics-(\d+)"
|
||||
r"-mirror-(\d+)\.html)")
|
||||
example = ("https://www.nudecollect.com/content/12345_TITLE"
|
||||
"/image-1-pics-108-mirror-1.html")
|
||||
|
||||
def __init__(self, match):
|
||||
NudecollectExtractor.__init__(self, match)
|
||||
_, self.slug, self.num, self.count, self.mirror = match.groups()
|
||||
|
||||
def metadata(self, page):
|
||||
return {
|
||||
"slug" : self.slug,
|
||||
"title" : self.get_title(page),
|
||||
"count" : text.parse_int(self.count),
|
||||
"mirror": text.parse_int(self.mirror),
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
return ((self.get_image(page), {"num": text.parse_int(self.num)}),)
|
||||
|
||||
|
||||
class NudecollectAlbumExtractor(NudecollectExtractor):
|
||||
"""Extractor for image albums on nudecollect.com"""
|
||||
subcategory = "album"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?nudecollect\.com"
|
||||
r"/content/([^/?#]+)/(?:index-mirror-(\d+)-(\d+)"
|
||||
r"|page-\d+-pics-(\d+)-mirror-(\d+))\.html")
|
||||
example = ("https://www.nudecollect.com/content/12345_TITLE"
|
||||
"/index-mirror-01-123.html")
|
||||
|
||||
def __init__(self, match):
|
||||
self.slug = match.group(1)
|
||||
self.mirror = match.group(2) or match.group(5)
|
||||
self.count = text.parse_int(match.group(3) or match.group(4))
|
||||
url = "{}/content/{}/image-1-pics-{}-mirror-{}.html".format(
|
||||
self.root, self.slug, self.count, self.mirror)
|
||||
NudecollectExtractor.__init__(self, match, url)
|
||||
|
||||
def metadata(self, page):
|
||||
return {
|
||||
"slug" : self.slug,
|
||||
"title" : self.get_title(page),
|
||||
"mirror": text.parse_int(self.mirror),
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
url = self.get_image(page)
|
||||
p1, _, p2 = url.partition("/image0")
|
||||
ufmt = p1 + "/image{:>05}" + p2[4:]
|
||||
return [(ufmt.format(num), None) for num in range(1, self.count + 1)]
|
@ -52,19 +52,24 @@ class PatreonExtractor(Extractor):
|
||||
post["hash"] = fhash
|
||||
post["type"] = kind
|
||||
post["num"] += 1
|
||||
yield Message.Url, url, text.nameext_from_url(name, post)
|
||||
text.nameext_from_url(name, post)
|
||||
if text.ext_from_url(url) == "m3u8":
|
||||
url = "ytdl:" + url
|
||||
post["extension"] = "mp4"
|
||||
yield Message.Url, url, post
|
||||
else:
|
||||
self.log.debug("skipping %s (%s %s)", url, fhash, kind)
|
||||
|
||||
@staticmethod
|
||||
def _postfile(post):
|
||||
def _postfile(self, post):
|
||||
postfile = post.get("post_file")
|
||||
if postfile:
|
||||
return (("postfile", postfile["url"], postfile["name"]),)
|
||||
url = postfile["url"]
|
||||
name = postfile.get("name") or self._filename(url) or url
|
||||
return (("postfile", url, name),)
|
||||
return ()
|
||||
|
||||
def _images(self, post):
|
||||
for image in post["images"]:
|
||||
for image in post.get("images") or ():
|
||||
url = image.get("download_url")
|
||||
if url:
|
||||
name = image.get("file_name") or self._filename(url) or url
|
||||
@ -80,7 +85,7 @@ class PatreonExtractor(Extractor):
|
||||
return ()
|
||||
|
||||
def _attachments(self, post):
|
||||
for attachment in post["attachments"]:
|
||||
for attachment in post.get("attachments") or ():
|
||||
url = self.request(
|
||||
attachment["url"], method="HEAD",
|
||||
allow_redirects=False, fatal=False,
|
||||
|
@ -47,6 +47,7 @@ class PinterestExtractor(Extractor):
|
||||
|
||||
carousel_data = pin.get("carousel_data")
|
||||
if carousel_data:
|
||||
pin["count"] = len(carousel_data["carousel_slots"])
|
||||
for num, slot in enumerate(carousel_data["carousel_slots"], 1):
|
||||
slot["media_id"] = slot.pop("id")
|
||||
pin.update(slot)
|
||||
@ -65,7 +66,7 @@ class PinterestExtractor(Extractor):
|
||||
|
||||
if videos or media.get("duration") is None:
|
||||
pin.update(media)
|
||||
pin["num"] = 0
|
||||
pin["num"] = pin["count"] = 1
|
||||
pin["media_id"] = ""
|
||||
|
||||
url = media["url"]
|
||||
|
@ -826,9 +826,9 @@ class PixivAppAPI():
|
||||
|
||||
extractor.session.headers.update({
|
||||
"App-OS" : "ios",
|
||||
"App-OS-Version": "13.1.2",
|
||||
"App-Version" : "7.7.6",
|
||||
"User-Agent" : "PixivIOSApp/7.7.6 (iOS 13.1.2; iPhone11,8)",
|
||||
"App-OS-Version": "16.7.2",
|
||||
"App-Version" : "7.19.1",
|
||||
"User-Agent" : "PixivIOSApp/7.19.1 (iOS 16.7.2; iPhone12,8)",
|
||||
"Referer" : "https://app-api.pixiv.net/",
|
||||
})
|
||||
|
||||
|
138
gallery_dl/extractor/poringa.py
Normal file
138
gallery_dl/extractor/poringa.py
Normal file
@ -0,0 +1,138 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for http://www.poringa.net/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
from ..cache import cache
|
||||
import itertools
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?poringa\.net"
|
||||
|
||||
|
||||
class PoringaExtractor(Extractor):
|
||||
category = "poringa"
|
||||
directory_fmt = ("{category}", "{user}", "{post_id}")
|
||||
filename_fmt = "{post_id}_{title}_{num:>03}_{filename}.{extension}"
|
||||
archive_fmt = "{post_id}_{num}"
|
||||
root = "http://www.poringa.net"
|
||||
|
||||
def __init__(self, match):
|
||||
Extractor.__init__(self, match)
|
||||
self.item = match.group(1)
|
||||
self.__cookies = True
|
||||
|
||||
def items(self):
|
||||
for post_id in self.posts():
|
||||
url = "{}/posts/imagenes/{}".format(self.root, post_id)
|
||||
|
||||
try:
|
||||
response = self.request(url)
|
||||
except exception.HttpError as exc:
|
||||
self.log.warning(
|
||||
"Unable to fetch posts for '%s' (%s)", post_id, exc)
|
||||
continue
|
||||
|
||||
if "/registro-login?" in response.url:
|
||||
self.log.warning("Private post '%s'", post_id)
|
||||
continue
|
||||
|
||||
page = response.text
|
||||
title, pos = text.extract(
|
||||
page, 'property="og:title" content="', '"')
|
||||
|
||||
try:
|
||||
pos = page.index('<div class="main-info', pos)
|
||||
user, pos = text.extract(
|
||||
page, 'href="http://www.poringa.net/', '"', pos)
|
||||
except ValueError:
|
||||
user = None
|
||||
|
||||
if not user:
|
||||
user = "poringa"
|
||||
|
||||
data = {
|
||||
"post_id" : post_id,
|
||||
"title" : text.unescape(title),
|
||||
"user" : text.unquote(user),
|
||||
"_http_headers": {"Referer": url},
|
||||
}
|
||||
|
||||
main_post = text.extr(
|
||||
page, 'property="dc:content" role="main">', '</div>')
|
||||
urls = list(text.extract_iter(
|
||||
main_post, '<img class="imagen" border="0" src="', '"'))
|
||||
data["count"] = len(urls)
|
||||
|
||||
yield Message.Directory, data
|
||||
for data["num"], url in enumerate(urls, 1):
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
|
||||
def posts(self):
|
||||
return ()
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
if self.__cookies:
|
||||
self.__cookies = False
|
||||
self.cookies_update(_cookie_cache())
|
||||
|
||||
for _ in range(5):
|
||||
response = Extractor.request(self, url, **kwargs)
|
||||
if response.cookies:
|
||||
_cookie_cache.update("", response.cookies)
|
||||
if response.content.find(
|
||||
b"<title>Please wait a few moments</title>", 0, 600) < 0:
|
||||
return response
|
||||
self.sleep(5.0, "check")
|
||||
|
||||
def _pagination(self, url, params):
|
||||
for params["p"] in itertools.count(1):
|
||||
page = self.request(url, params=params).text
|
||||
|
||||
posts_ids = PoringaPostExtractor.pattern.findall(page)
|
||||
posts_ids = list(dict.fromkeys(posts_ids))
|
||||
yield from posts_ids
|
||||
|
||||
if len(posts_ids) < 19:
|
||||
return
|
||||
|
||||
|
||||
class PoringaPostExtractor(PoringaExtractor):
|
||||
"""Extractor for posts on poringa.net"""
|
||||
subcategory = "post"
|
||||
pattern = BASE_PATTERN + r"/posts/imagenes/(\d+)"
|
||||
example = "http://www.poringa.net/posts/imagenes/12345/TITLE.html"
|
||||
|
||||
def posts(self):
|
||||
return (self.item,)
|
||||
|
||||
|
||||
class PoringaUserExtractor(PoringaExtractor):
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/(\w+)$"
|
||||
example = "http://www.poringa.net/USER"
|
||||
|
||||
def posts(self):
|
||||
url = self.root + "/buscar/"
|
||||
params = {"q": self.item}
|
||||
return self._pagination(url, params)
|
||||
|
||||
|
||||
class PoringaSearchExtractor(PoringaExtractor):
|
||||
subcategory = "search"
|
||||
pattern = BASE_PATTERN + r"/buscar/\?&?q=([^&#]+)"
|
||||
example = "http://www.poringa.net/buscar/?q=QUERY"
|
||||
|
||||
def posts(self):
|
||||
url = self.root + "/buscar/"
|
||||
params = {"q": self.item}
|
||||
return self._pagination(url, params)
|
||||
|
||||
|
||||
@cache()
|
||||
def _cookie_cache():
|
||||
return ()
|
@ -38,7 +38,11 @@ class Rule34usExtractor(BooruExtractor):
|
||||
"height" : extr(' x ', 'h'),
|
||||
"file_url": extr(' src="', '"'),
|
||||
}
|
||||
post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
|
||||
|
||||
url = post["file_url"]
|
||||
if "//video-cdn1." in url:
|
||||
post["_fallback"] = (url.replace("//video-cdn1.", "//video."),)
|
||||
post["md5"] = url.rpartition("/")[2].partition(".")[0]
|
||||
|
||||
tags = collections.defaultdict(list)
|
||||
for tag_type, tag_name in self._find_tags(page):
|
||||
|
211
gallery_dl/extractor/steamgriddb.py
Normal file
211
gallery_dl/extractor/steamgriddb.py
Normal file
@ -0,0 +1,211 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://www.steamgriddb.com"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?steamgriddb\.com"
|
||||
LANGUAGE_CODES = (
|
||||
"aa", "ab", "ae", "af", "ak", "am", "an", "ar", "as", "av", "ay", "az",
|
||||
"ba", "be", "bg", "bh", "bi", "bm", "bn", "bo", "br", "bs", "ca", "ce",
|
||||
"ch", "co", "cr", "cs", "cu", "cv", "cy", "da", "de", "dv", "dz", "ee",
|
||||
"el", "en", "eo", "es", "et", "eu", "fa", "ff", "fi", "fj", "fo", "fr",
|
||||
"fy", "ga", "gd", "gl", "gn", "gu", "gv", "ha", "he", "hi", "ho", "hr",
|
||||
"ht", "hu", "hy", "hz", "ia", "id", "ie", "ig", "ii", "ik", "io", "is",
|
||||
"it", "iu", "ja", "jv", "ka", "kg", "ki", "kj", "kk", "kl", "km", "kn",
|
||||
"ko", "kr", "ks", "ku", "kv", "kw", "ky", "la", "lb", "lg", "li", "ln",
|
||||
"lo", "lt", "lu", "lv", "mg", "mh", "mi", "mk", "ml", "mn", "mr", "ms",
|
||||
"mt", "my", "na", "nb", "nd", "ne", "ng", "nl", "nn", "no", "nr", "nv",
|
||||
"ny", "oc", "oj", "om", "or", "os", "pa", "pi", "pl", "ps", "pt", "qu",
|
||||
"rm", "rn", "ro", "ru", "rw", "sa", "sc", "sd", "se", "sg", "si", "sk",
|
||||
"sl", "sm", "sn", "so", "sq", "sr", "ss", "st", "su", "sv", "sw", "ta",
|
||||
"te", "tg", "th", "ti", "tk", "tl", "tn", "to", "tr", "ts", "tt", "tw",
|
||||
"ty", "ug", "uk", "ur", "uz", "ve", "vi", "vo", "wa", "wo", "xh", "yi",
|
||||
"yo", "za", "zh", "zu",
|
||||
)
|
||||
FILE_EXT_TO_MIME = {
|
||||
"png": "image/png",
|
||||
"jpeg": "image/jpeg",
|
||||
"jpg": "image/jpeg",
|
||||
"webp": "image/webp",
|
||||
"ico": "image/vnd.microsoft.icon",
|
||||
"all": "all",
|
||||
}
|
||||
|
||||
|
||||
class SteamgriddbExtractor(Extractor):
|
||||
"""Base class for SteamGridDB"""
|
||||
category = "steamgriddb"
|
||||
directory_fmt = ("{category}", "{subcategory}", "{game[id]}")
|
||||
filename_fmt = "{game[id]}_{id}_{num:>02}.{extension}"
|
||||
archive_fmt = "{filename}"
|
||||
root = "https://www.steamgriddb.com"
|
||||
|
||||
def _init(self):
|
||||
self.cookies_update({
|
||||
"userprefs": "%7B%22adult%22%3Afalse%7D",
|
||||
})
|
||||
|
||||
def items(self):
|
||||
download_fake_png = self.config("download-fake-png", True)
|
||||
|
||||
for asset in self.assets():
|
||||
if download_fake_png and asset.get("fake_png"):
|
||||
urls = (asset["url"], asset["fake_png"])
|
||||
else:
|
||||
urls = (asset["url"],)
|
||||
|
||||
asset["count"] = len(urls)
|
||||
yield Message.Directory, asset
|
||||
for asset["num"], url in enumerate(urls, 1):
|
||||
yield Message.Url, url, text.nameext_from_url(url, asset)
|
||||
|
||||
def _call(self, endpoint, **kwargs):
|
||||
data = self.request(self.root + endpoint, **kwargs).json()
|
||||
if not data["success"]:
|
||||
raise exception.StopExtraction(data["error"])
|
||||
return data["data"]
|
||||
|
||||
|
||||
class SteamgriddbAssetsExtractor(SteamgriddbExtractor):
|
||||
"""Base class for extracting a list of assets"""
|
||||
|
||||
def __init__(self, match):
|
||||
SteamgriddbExtractor.__init__(self, match)
|
||||
list_type = match.group(1)
|
||||
id = int(match.group(2))
|
||||
self.game_id = id if list_type == "game" else None
|
||||
self.collection_id = id if list_type == "collection" else None
|
||||
self.page = int(match.group(3) or 1)
|
||||
|
||||
def assets(self):
|
||||
limit = 48
|
||||
page = min(self.page - 1, 0)
|
||||
|
||||
sort = self.config("sort", "score_desc")
|
||||
if sort not in ("score_desc", "score_asc", "score_old_desc",
|
||||
"score_old_asc", "age_desc", "age_asc"):
|
||||
raise exception.StopExtractor("Invalid sort '%s'", sort)
|
||||
|
||||
json = {
|
||||
"static" : self.config("static", True),
|
||||
"animated": self.config("animated", True),
|
||||
"humor" : self.config("humor", True),
|
||||
"nsfw" : self.config("nsfw", True),
|
||||
"epilepsy": self.config("epilepsy", True),
|
||||
"untagged": self.config("untagged", True),
|
||||
|
||||
"asset_type": self.asset_type,
|
||||
"limit": limit,
|
||||
"order": sort,
|
||||
}
|
||||
if self.valid_dimensions:
|
||||
json["dimensions"] = self.config_list(
|
||||
"dimensions", "dimension", self.valid_dimensions)
|
||||
json["styles"] = self.config_list("styles", "style", self.valid_styles)
|
||||
json["languages"] = self.config_list(
|
||||
"languages", "language", LANGUAGE_CODES)
|
||||
file_types = self.config_list(
|
||||
"file-types", "file type", self.valid_file_types)
|
||||
json["mime"] = [FILE_EXT_TO_MIME[i] for i in file_types]
|
||||
|
||||
if self.game_id:
|
||||
json["game_id"] = [self.game_id]
|
||||
else:
|
||||
json["collection_id"] = self.collection_id
|
||||
|
||||
while True:
|
||||
json["page"] = page
|
||||
|
||||
data = self._call(
|
||||
"/api/public/search/assets", method="POST", json=json)
|
||||
for asset in data["assets"]:
|
||||
if not asset.get("game"):
|
||||
asset["game"] = data["game"]
|
||||
yield asset
|
||||
|
||||
if data["total"] <= limit * page:
|
||||
break
|
||||
page += 1
|
||||
|
||||
def config_list(self, key, type_name, valid_values):
|
||||
value = self.config(key)
|
||||
if isinstance(value, str):
|
||||
value = value.split(",")
|
||||
|
||||
if value is None or "all" in value:
|
||||
return ["all"]
|
||||
|
||||
for i in value:
|
||||
if i not in valid_values:
|
||||
raise exception.StopExtraction("Invalid %s '%s'", type_name, i)
|
||||
|
||||
return value
|
||||
|
||||
|
||||
class SteamgriddbAssetExtractor(SteamgriddbExtractor):
|
||||
"""Extractor for a single asset"""
|
||||
subcategory = "asset"
|
||||
pattern = BASE_PATTERN + r"/(grid|hero|logo|icon)/(\d+)"
|
||||
example = "https://www.steamgriddb.com/grid/1234"
|
||||
|
||||
def __init__(self, match):
|
||||
SteamgriddbExtractor.__init__(self, match)
|
||||
self.asset_type = match.group(1)
|
||||
self.asset_id = match.group(2)
|
||||
|
||||
def assets(self):
|
||||
endpoint = "/api/public/asset/" + self.asset_type + "/" + self.asset_id
|
||||
asset = self._call(endpoint)["asset"]
|
||||
return (asset,)
|
||||
|
||||
|
||||
class SteamgriddbGridsExtractor(SteamgriddbAssetsExtractor):
|
||||
subcategory = "grids"
|
||||
asset_type = "grid"
|
||||
pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/grids(?:/(\d+))?"
|
||||
example = "https://www.steamgriddb.com/game/1234/grids"
|
||||
valid_dimensions = ("460x215", "920x430", "600x900", "342x482", "660x930",
|
||||
"512x512", "1024x1024")
|
||||
valid_styles = ("alternate", "blurred", "no_logo", "material",
|
||||
"white_logo")
|
||||
valid_file_types = ("png", "jpeg", "jpg", "webp")
|
||||
|
||||
|
||||
class SteamgriddbHeroesExtractor(SteamgriddbAssetsExtractor):
|
||||
subcategory = "heroes"
|
||||
asset_type = "hero"
|
||||
pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/heroes(?:/(\d+))?"
|
||||
example = "https://www.steamgriddb.com/game/1234/heroes"
|
||||
valid_dimensions = ("1920x620", "3840x1240", "1600x650")
|
||||
valid_styles = ("alternate", "blurred", "material")
|
||||
valid_file_types = ("png", "jpeg", "jpg", "webp")
|
||||
|
||||
|
||||
class SteamgriddbLogosExtractor(SteamgriddbAssetsExtractor):
|
||||
subcategory = "logos"
|
||||
asset_type = "logo"
|
||||
pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/logos(?:/(\d+))?"
|
||||
example = "https://www.steamgriddb.com/game/1234/logos"
|
||||
valid_dimensions = None
|
||||
valid_styles = ("official", "white", "black", "custom")
|
||||
valid_file_types = ("png", "webp")
|
||||
|
||||
|
||||
class SteamgriddbIconsExtractor(SteamgriddbAssetsExtractor):
|
||||
subcategory = "icons"
|
||||
asset_type = "icon"
|
||||
pattern = BASE_PATTERN + r"/(game|collection)/(\d+)/icons(?:/(\d+))?"
|
||||
example = "https://www.steamgriddb.com/game/1234/icons"
|
||||
valid_dimensions = ["{0}x{0}".format(i) for i in (8, 10, 14, 16, 20, 24,
|
||||
28, 32, 35, 40, 48, 54, 56, 57, 60, 64, 72, 76, 80, 90,
|
||||
96, 100, 114, 120, 128, 144, 150, 152, 160, 180, 192,
|
||||
194, 256, 310, 512, 768, 1024)]
|
||||
valid_styles = ("official", "custom")
|
||||
valid_file_types = ("png", "ico")
|
@ -87,6 +87,10 @@ BASE_PATTERN = SzurubooruExtractor.update({
|
||||
"root": "https://booru.bcbnsfw.space",
|
||||
"pattern": r"booru\.bcbnsfw\.space",
|
||||
},
|
||||
"snootbooru": {
|
||||
"root": "https://snootbooru.com",
|
||||
"pattern": r"snootbooru\.com",
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
|
@ -546,15 +546,17 @@ class TwitterTimelineExtractor(TwitterExtractor):
|
||||
def _select_tweet_source(self):
|
||||
strategy = self.config("strategy")
|
||||
if strategy is None or strategy == "auto":
|
||||
if self.retweets or self.replies or self.textonly:
|
||||
if self.retweets or self.textonly:
|
||||
return self.api.user_tweets
|
||||
else:
|
||||
return self.api.user_media
|
||||
if strategy == "tweets":
|
||||
return self.api.user_tweets
|
||||
if strategy == "media":
|
||||
return self.api.user_media
|
||||
if strategy == "with_replies":
|
||||
return self.api.user_tweets_and_replies
|
||||
return self.api.user_media
|
||||
raise exception.StopExtraction("Invalid strategy '%s'", strategy)
|
||||
|
||||
|
||||
class TwitterTweetsExtractor(TwitterExtractor):
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, exception
|
||||
import re
|
||||
|
||||
BASE_PATTERN = r"(?:https://)?(?:www\.|m\.)?vk\.com"
|
||||
|
||||
@ -24,6 +25,7 @@ class VkExtractor(Extractor):
|
||||
request_interval = (0.5, 1.5)
|
||||
|
||||
def items(self):
|
||||
sub = re.compile(r"/imp[fg]/").sub
|
||||
sizes = "wzyxrqpo"
|
||||
|
||||
data = self.metadata()
|
||||
@ -40,11 +42,15 @@ class VkExtractor(Extractor):
|
||||
continue
|
||||
|
||||
try:
|
||||
photo["url"] = photo[size + "src"]
|
||||
url = photo[size + "src"]
|
||||
except KeyError:
|
||||
self.log.warning("no photo URL found (%s)", photo.get("id"))
|
||||
continue
|
||||
|
||||
photo["url"] = sub("/", url.partition("?")[0])
|
||||
# photo["url"] = url
|
||||
photo["_fallback"] = (url,)
|
||||
|
||||
try:
|
||||
_, photo["width"], photo["height"] = photo[size]
|
||||
except ValueError:
|
||||
|
@ -87,23 +87,31 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
|
||||
self.episode_no = params.get("episode_no")
|
||||
|
||||
def metadata(self, page):
|
||||
keywords, pos = text.extract(
|
||||
page, '<meta name="keywords" content="', '"')
|
||||
title, pos = text.extract(
|
||||
page, '<meta property="og:title" content="', '"', pos)
|
||||
descr, pos = text.extract(
|
||||
page, '<meta property="og:description" content="', '"', pos)
|
||||
extr = text.extract_from(page)
|
||||
keywords = extr('<meta name="keywords" content="', '"').split(", ")
|
||||
title = extr('<meta property="og:title" content="', '"')
|
||||
descr = extr('<meta property="og:description" content="', '"')
|
||||
|
||||
if extr('<div class="author_area"', '\n'):
|
||||
username = extr('/creator/', '"')
|
||||
author_name = extr('<span>', '</span>')
|
||||
else:
|
||||
username = author_name = ""
|
||||
|
||||
return {
|
||||
"genre" : self.genre,
|
||||
"comic" : self.comic,
|
||||
"title_no" : self.title_no,
|
||||
"episode_no" : self.episode_no,
|
||||
"title" : text.unescape(title),
|
||||
"episode" : keywords.split(", ")[1],
|
||||
"description": text.unescape(descr),
|
||||
"lang" : self.lang,
|
||||
"language" : util.code_to_language(self.lang),
|
||||
"genre" : self.genre,
|
||||
"comic" : self.comic,
|
||||
"title_no" : self.title_no,
|
||||
"episode_no" : self.episode_no,
|
||||
"title" : text.unescape(title),
|
||||
"episode" : keywords[1],
|
||||
"comic_name" : text.unescape(keywords[0]),
|
||||
"episode_name": text.unescape(keywords[2]),
|
||||
"username" : username,
|
||||
"author_name" : text.unescape(author_name),
|
||||
"description" : text.unescape(descr),
|
||||
"lang" : self.lang,
|
||||
"language" : util.code_to_language(self.lang),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
|
@ -225,9 +225,6 @@ class WeiboUserExtractor(WeiboExtractor):
|
||||
pattern = USER_PATTERN + r"(?:$|#)"
|
||||
example = "https://weibo.com/USER"
|
||||
|
||||
def initialize(self):
|
||||
pass
|
||||
|
||||
def items(self):
|
||||
base = "{}/u/{}?tabtype=".format(self.root, self._user_id())
|
||||
return self._dispatch_extractors((
|
||||
|
144
gallery_dl/extractor/wikimedia.py
Normal file
144
gallery_dl/extractor/wikimedia.py
Normal file
@ -0,0 +1,144 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 Ailothaen
|
||||
# Copyright 2024 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for Wikimedia and Wikipedia"""
|
||||
|
||||
from .common import BaseExtractor, Message
|
||||
from .. import text
|
||||
|
||||
|
||||
class WikimediaExtractor(BaseExtractor):
|
||||
"""Base class for wikimedia extractors"""
|
||||
basecategory = "wikimedia"
|
||||
directory_fmt = ("{category}", "{page}")
|
||||
archive_fmt = "{sha1}"
|
||||
request_interval = (1.0, 2.0)
|
||||
|
||||
def __init__(self, match):
|
||||
BaseExtractor.__init__(self, match)
|
||||
self.title = match.group(match.lastindex)
|
||||
|
||||
def items(self):
|
||||
for info in self._pagination(self.params):
|
||||
image = info["imageinfo"][0]
|
||||
|
||||
image["metadata"] = {
|
||||
m["name"]: m["value"]
|
||||
for m in image["metadata"]}
|
||||
image["commonmetadata"] = {
|
||||
m["name"]: m["value"]
|
||||
for m in image["commonmetadata"]}
|
||||
|
||||
filename = image["canonicaltitle"]
|
||||
image["filename"], _, image["extension"] = \
|
||||
filename.partition(":")[2].rpartition(".")
|
||||
image["date"] = text.parse_datetime(
|
||||
image["timestamp"], "%Y-%m-%dT%H:%M:%SZ")
|
||||
image["page"] = self.title
|
||||
|
||||
yield Message.Directory, image
|
||||
yield Message.Url, image["url"], image
|
||||
|
||||
def _pagination(self, params):
|
||||
"""
|
||||
https://www.mediawiki.org/wiki/API:Query
|
||||
https://opendata.stackexchange.com/questions/13381
|
||||
"""
|
||||
|
||||
url = self.root + "/w/api.php"
|
||||
params["action"] = "query"
|
||||
params["format"] = "json"
|
||||
|
||||
while True:
|
||||
data = self.request(url, params=params).json()
|
||||
|
||||
try:
|
||||
pages = data["query"]["pages"]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
yield from pages.values()
|
||||
|
||||
try:
|
||||
continuation = data["continue"]
|
||||
except KeyError:
|
||||
break
|
||||
params.update(continuation)
|
||||
|
||||
|
||||
BASE_PATTERN = WikimediaExtractor.update({
|
||||
"wikipedia": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wikipedia\.org",
|
||||
},
|
||||
"wiktionary": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wiktionary\.org",
|
||||
},
|
||||
"wikiquote": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wikiquote\.org",
|
||||
},
|
||||
"wikibooks": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wikibooks\.org",
|
||||
},
|
||||
"wikisource": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wikisource\.org",
|
||||
},
|
||||
"wikinews": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wikinews\.org",
|
||||
},
|
||||
"wikiversity": {
|
||||
"root": None,
|
||||
"pattern": r"[a-z]{2,}\.wikiversity\.org",
|
||||
},
|
||||
"wikispecies": {
|
||||
"root": "https://species.wikimedia.org",
|
||||
"pattern": r"species\.wikimedia\.org",
|
||||
},
|
||||
"wikimediacommons": {
|
||||
"root": "https://commons.wikimedia.org",
|
||||
"pattern": r"commons\.wikimedia\.org",
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
class WikimediaArticleExtractor(WikimediaExtractor):
|
||||
"""Extractor for wikimedia articles"""
|
||||
subcategory = "article"
|
||||
pattern = BASE_PATTERN + r"/wiki/(?!Category:)([^/?#]+)"
|
||||
example = "https://en.wikipedia.org/wiki/TITLE"
|
||||
|
||||
def _init(self):
|
||||
self.params = {
|
||||
"generator": "images",
|
||||
"titles" : self.title,
|
||||
"prop" : "imageinfo",
|
||||
"iiprop": "timestamp|user|userid|comment|canonicaltitle|url|size|"
|
||||
"sha1|mime|metadata|commonmetadata|extmetadata|bitdepth",
|
||||
}
|
||||
|
||||
|
||||
class WikimediaCategoryExtractor(WikimediaExtractor):
|
||||
subcategory = "category"
|
||||
pattern = BASE_PATTERN + r"/wiki/(Category:[^/?#]+)"
|
||||
example = "https://commons.wikimedia.org/wiki/Category:NAME"
|
||||
|
||||
def _init(self):
|
||||
self.params = {
|
||||
"generator": "categorymembers",
|
||||
"gcmtitle" : self.title,
|
||||
"gcmtype" : "file",
|
||||
"prop" : "imageinfo",
|
||||
"iiprop": "timestamp|user|userid|comment|canonicaltitle|url|size|"
|
||||
"sha1|mime|metadata|commonmetadata|extmetadata|bitdepth",
|
||||
}
|
40
gallery_dl/extractor/zzup.py
Normal file
40
gallery_dl/extractor/zzup.py
Normal file
@ -0,0 +1,40 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from .common import GalleryExtractor
|
||||
from .. import text
|
||||
|
||||
|
||||
class ZzupGalleryExtractor(GalleryExtractor):
|
||||
category = "zzup"
|
||||
directory_fmt = ("{category}", "{title}")
|
||||
filename_fmt = "{slug}_{num:>03}.{extension}"
|
||||
archive_fmt = "{slug}_{num}"
|
||||
root = "https://zzup.com"
|
||||
pattern = (r"(?:https?://)?(?:www\.)?zzup\.com(/content"
|
||||
r"/[\w=]+/([^/?#]+)/[\w=]+)/(?:index|page-\d+)\.html")
|
||||
example = "https://zzup.com/content/xyz=/12345_TITLE/123=/index.html"
|
||||
|
||||
def __init__(self, match):
|
||||
url = "{}/{}/index.html".format(self.root, match.group(1))
|
||||
GalleryExtractor.__init__(self, match, url)
|
||||
self.slug = match.group(2)
|
||||
|
||||
def metadata(self, page):
|
||||
return {
|
||||
"slug" : self.slug,
|
||||
"title": text.unescape(text.extr(
|
||||
page, "<title>", "</title>"))[:-11],
|
||||
}
|
||||
|
||||
def images(self, page):
|
||||
path = text.extr(page, 'class="picbox"><a target="_blank" href="', '"')
|
||||
count = text.parse_int(text.extr(path, "-pics-", "-mirror"))
|
||||
page = self.request(self.root + path).text
|
||||
url = self.root + text.extr(page, '\n<a href="', '"')
|
||||
p1, _, p2 = url.partition("/image0")
|
||||
ufmt = p1 + "/image{:>05}" + p2[4:]
|
||||
return [(ufmt.format(num), None) for num in range(1, count + 1)]
|
@ -6,4 +6,4 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
__version__ = "1.26.6-dev"
|
||||
__version__ = "1.26.7-dev"
|
||||
|
@ -41,6 +41,10 @@ case "${2,,}" in
|
||||
call git push "$USER" HEAD:"$BRANCH"
|
||||
;;
|
||||
|
||||
"pf"|"push-force")
|
||||
call git push --force "$USER" HEAD:"$BRANCH"
|
||||
;;
|
||||
|
||||
"d"|"delete")
|
||||
call git switch master
|
||||
call git branch -D "$USER-$BRANCH"
|
||||
|
@ -32,6 +32,7 @@ CATEGORY_MAP = {
|
||||
"atfbooru" : "ATFBooru",
|
||||
"b4k" : "arch.b4k.co",
|
||||
"baraag" : "baraag",
|
||||
"batoto" : "BATO.TO",
|
||||
"bbc" : "BBC",
|
||||
"comicvine" : "Comic Vine",
|
||||
"coomerparty" : "Coomer",
|
||||
@ -49,6 +50,7 @@ CATEGORY_MAP = {
|
||||
"fanbox" : "pixivFANBOX",
|
||||
"fashionnova" : "Fashion Nova",
|
||||
"furaffinity" : "Fur Affinity",
|
||||
"hatenablog" : "HatenaBlog",
|
||||
"hbrowse" : "HBrowse",
|
||||
"hentai2read" : "Hentai2Read",
|
||||
"hentaicosplays" : "Hentai Cosplay",
|
||||
@ -101,7 +103,6 @@ CATEGORY_MAP = {
|
||||
"pornimagesxxx" : "Porn Image",
|
||||
"pornpics" : "PornPics.com",
|
||||
"pornreactor" : "PornReactor",
|
||||
"postmill" : "Postmill",
|
||||
"readcomiconline": "Read Comic Online",
|
||||
"rbt" : "RebeccaBlackTech",
|
||||
"redgifs" : "RedGIFs",
|
||||
@ -120,6 +121,7 @@ CATEGORY_MAP = {
|
||||
"slideshare" : "SlideShare",
|
||||
"smugmug" : "SmugMug",
|
||||
"speakerdeck" : "Speaker Deck",
|
||||
"steamgriddb" : "SteamGridDB",
|
||||
"subscribestar" : "SubscribeStar",
|
||||
"tbib" : "The Big ImageBoard",
|
||||
"tcbscans" : "TCB Scans",
|
||||
@ -137,6 +139,7 @@ CATEGORY_MAP = {
|
||||
"webmshare" : "webmshare",
|
||||
"webtoons" : "Webtoon",
|
||||
"wikiart" : "WikiArt.org",
|
||||
"wikimediacommons": "Wikimedia Commons",
|
||||
"xbunkr" : "xBunkr",
|
||||
"xhamster" : "xHamster",
|
||||
"xvideos" : "XVideos",
|
||||
@ -188,6 +191,11 @@ SUBCATEGORY_MAP = {
|
||||
"fapello": {
|
||||
"path": "Videos, Trending Posts, Popular Videos, Top Models",
|
||||
},
|
||||
"hatenablog": {
|
||||
"archive": "Archive",
|
||||
"entry" : "Individual Posts",
|
||||
"home" : "Home Feed",
|
||||
},
|
||||
"hentaifoundry": {
|
||||
"story": "",
|
||||
},
|
||||
@ -234,6 +242,9 @@ SUBCATEGORY_MAP = {
|
||||
"sketch": "Sketch",
|
||||
"work": "individual Images",
|
||||
},
|
||||
"poringa": {
|
||||
"post": "Posts Images",
|
||||
},
|
||||
"pornhub": {
|
||||
"gifs": "",
|
||||
},
|
||||
@ -258,6 +269,9 @@ SUBCATEGORY_MAP = {
|
||||
"smugmug": {
|
||||
"path": "Images from Users and Folders",
|
||||
},
|
||||
"steamgriddb": {
|
||||
"asset": "Individual Assets",
|
||||
},
|
||||
"tumblr": {
|
||||
"day": "Days",
|
||||
},
|
||||
|
@ -1,7 +1,8 @@
|
||||
[flake8]
|
||||
exclude = .git,__pycache__,build,dist,archive,results
|
||||
exclude = .git,__pycache__,build,dist,archive
|
||||
ignore = E203,E226,W504
|
||||
per-file-ignores =
|
||||
setup.py: E501
|
||||
gallery_dl/extractor/500px.py: E501
|
||||
gallery_dl/extractor/mangapark.py: E501
|
||||
test/results/*.py: E122,E241,E402,E501
|
||||
|
64
test/results/2ch.py
Normal file
64
test/results/2ch.py
Normal file
@ -0,0 +1,64 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
gallery_dl = __import__("gallery_dl.extractor.2ch")
|
||||
_2ch = getattr(gallery_dl.extractor, "2ch")
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://2ch.hk/a/res/6202876.html",
|
||||
"#category": ("", "2ch", "thread"),
|
||||
"#class" : _2ch._2chThreadExtractor,
|
||||
"#pattern" : r"https://2ch\.hk/a/src/6202876/\d+\.\w+",
|
||||
"#count" : range(450, 1000),
|
||||
|
||||
"banned" : 0,
|
||||
"board" : "a",
|
||||
"closed" : 0,
|
||||
"comment" : str,
|
||||
"date" : "type:datetime",
|
||||
"displayname": str,
|
||||
"email" : "",
|
||||
"endless" : 1,
|
||||
"extension": str,
|
||||
"filename" : str,
|
||||
"fullname" : str,
|
||||
"height" : int,
|
||||
"lasthit" : 1705273977,
|
||||
"md5" : r"re:[0-9a-f]{32}",
|
||||
"name" : r"re:\d+\.\w+",
|
||||
"num" : int,
|
||||
"number" : range(1, 1000),
|
||||
"op" : 0,
|
||||
"parent" : int,
|
||||
"path" : r"re:/a/src/6202876/\d+\.\w+",
|
||||
"post_name": "Аноним",
|
||||
"size" : int,
|
||||
"sticky" : 0,
|
||||
"subject" : str,
|
||||
"thread" : "6202876",
|
||||
"thumbnail": str,
|
||||
"tim" : r"re:\d+",
|
||||
"timestamp": int,
|
||||
"title" : "MP4/WEBM",
|
||||
"tn_height": int,
|
||||
"tn_width" : int,
|
||||
"trip" : "",
|
||||
"type" : int,
|
||||
"views" : int,
|
||||
"width" : int,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://2ch.hk/a/",
|
||||
"#category": ("", "2ch", "board"),
|
||||
"#class" : _2ch._2chBoardExtractor,
|
||||
"#pattern" : _2ch._2chThreadExtractor.pattern,
|
||||
"#count" : range(200, 300),
|
||||
},
|
||||
|
||||
)
|
@ -12,7 +12,7 @@ __tests__ = (
|
||||
"#url" : "https://archive.4plebs.org/tg/thread/54059290",
|
||||
"#category": ("foolfuuka", "4plebs", "thread"),
|
||||
"#class" : foolfuuka.FoolfuukaThreadExtractor,
|
||||
"#pattern" : "https://i\.4pcdn\.org/tg/1[34]\d{11}\.(jpg|png|gif)",
|
||||
"#pattern" : r"https://i\.4pcdn\.org/tg/1[34]\d{11}\.(jpg|png|gif)",
|
||||
"#count" : 30,
|
||||
},
|
||||
|
||||
|
@ -5,7 +5,6 @@
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import functools
|
||||
|
||||
__directory__ = os.path.dirname(__file__)
|
||||
@ -13,12 +12,8 @@ __directory__ = os.path.dirname(__file__)
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def tests(name):
|
||||
try:
|
||||
module = __import__(name, globals(), None, (), 1)
|
||||
return module.__tests__
|
||||
except Exception as exc:
|
||||
print(exc)
|
||||
return ()
|
||||
module = __import__(name, globals(), None, (), 1)
|
||||
return module.__tests__
|
||||
|
||||
|
||||
def all():
|
||||
|
240
test/results/batoto.py
Normal file
240
test/results/batoto.py
Normal file
@ -0,0 +1,240 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import batoto
|
||||
from gallery_dl import exception
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://bato.to/title/86408-i-shall-master-this-family-official/1681030-ch_8",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
"#count" : 66,
|
||||
|
||||
"chapter" : 8,
|
||||
"chapter_id" : 1681030,
|
||||
"chapter_minor": "",
|
||||
"count" : 66,
|
||||
"date" : "dt:2021-05-15 18:51:37",
|
||||
"extension" : "webp",
|
||||
"filename" : str,
|
||||
"manga" : "I Shall Master this Family! [Official]",
|
||||
"manga_id" : 86408,
|
||||
"page" : range(1, 66),
|
||||
"title" : "Observing",
|
||||
"volume" : 0,
|
||||
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://bato.to/title/104929-86-eighty-six-official/1943513-vol_1-ch_5",
|
||||
"#comment" : "volume (vol) in url",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
"#count" : 7,
|
||||
|
||||
"manga" : "86--EIGHTY-SIX (Official)",
|
||||
"title" : "The Spearhead Squadron's Power",
|
||||
"volume" : 1,
|
||||
"chapter": 5,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://bato.to/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://bato.to/chapter/1681030",
|
||||
"#comment" : "v2 URL",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://bato.to/title/113742-futsutsuka-na-akujo-de-wa-gozaimasu-ga-suuguu-chouso-torikae-den-official",
|
||||
"#category": ("", "batoto", "manga"),
|
||||
"#class" : batoto.BatotoMangaExtractor,
|
||||
"#count" : ">= 21",
|
||||
|
||||
"chapter" : int,
|
||||
"chapter_minor": str,
|
||||
"date" : "type:datetime",
|
||||
"manga" : "Futsutsuka na Akujo de wa Gozaimasu ga - Suuguu Chouso Torikae Den (Official)",
|
||||
"manga_id" : 113742,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://bato.to/title/104929-86-eighty-six-official",
|
||||
"#comment" : "Manga with number in name",
|
||||
"#category": ("", "batoto", "manga"),
|
||||
"#class" : batoto.BatotoMangaExtractor,
|
||||
"#count" : ">= 18",
|
||||
|
||||
"manga": "86--EIGHTY-SIX (Official)",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://bato.to/title/140046-the-grand-duke-s-fox-princess-mgchan",
|
||||
"#comment" : "Non-English translation (Indonesian)",
|
||||
"#category": ("", "batoto", "manga"),
|
||||
"#class" : batoto.BatotoMangaExtractor,
|
||||
"#count" : ">= 29",
|
||||
|
||||
"manga": "The Grand Duke’s Fox Princess ⎝⎝MGCHAN⎠⎠",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://bato.to/title/134270-removed",
|
||||
"#comment" : "Deleted/removed manga",
|
||||
"#category": ("", "batoto", "manga"),
|
||||
"#class" : batoto.BatotoMangaExtractor,
|
||||
"#exception": exception.StopExtraction,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://bato.to/title/86408-i-shall-master-this-family-official",
|
||||
"#category": ("", "batoto", "manga"),
|
||||
"#class" : batoto.BatotoMangaExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://bato.to/series/86408/i-shall-master-this-family-official",
|
||||
"#comment" : "v2 URL",
|
||||
"#category": ("", "batoto", "manga"),
|
||||
"#class" : batoto.BatotoMangaExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://dto.to/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://hto.to/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://mto.to/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://wto.to/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://mangatoto.com/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://mangatoto.net/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://mangatoto.org/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://batocomic.com/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://batocomic.net/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://batocomic.org/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://readtoto.com/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://readtoto.net/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://readtoto.org/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://xbato.com/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://xbato.net/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://xbato.org/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://zbato.com/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://zbato.net/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://zbato.org/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://comiko.net/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://comiko.org/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://batotoo.com/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://batotwo.com/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
{
|
||||
"#url" : "https://battwo.com/title/86408/1681030",
|
||||
"#category": ("", "batoto", "chapter"),
|
||||
"#class" : batoto.BatotoChapterExtractor,
|
||||
},
|
||||
|
||||
)
|
@ -43,7 +43,6 @@ __tests__ = (
|
||||
"extension": "jpg",
|
||||
"filename" : "Icy-Moonrise---For-Web",
|
||||
"num" : 1,
|
||||
"num" : int,
|
||||
"url" : "https://3.bp.blogspot.com/-zlJddJtJOUo/Tt4WooTPNtI/AAAAAAAABG8/dGT2cGp2E7Y/s0/Icy-Moonrise---For-Web.jpg",
|
||||
},
|
||||
|
||||
|
@ -9,11 +9,18 @@ from gallery_dl.extractor import kemonoparty
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://coomer.party/onlyfans/user/alinity/post/125962203",
|
||||
"#comment" : "coomer.party (#2100)",
|
||||
"#url" : "https://coomer.su/onlyfans/user/alinity/post/125962203",
|
||||
"#comment" : "coomer (#2100)",
|
||||
"#category": ("", "coomerparty", "onlyfans"),
|
||||
"#class" : kemonoparty.KemonopartyPostExtractor,
|
||||
"#pattern" : r"https://coomer\.party/data/7d/3f/7d3fd9804583dc224968c0591163ec91794552b04f00a6c2f42a15b68231d5a8\.jpg",
|
||||
"#urls" : "https://coomer.su/data/7d/3f/7d3fd9804583dc224968c0591163ec91794552b04f00a6c2f42a15b68231d5a8.jpg",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://coomer.party/onlyfans/user/alinity/post/125962203",
|
||||
"#category": ("", "coomerparty", "onlyfans"),
|
||||
"#class" : kemonoparty.KemonopartyPostExtractor,
|
||||
"#urls" : "https://coomer.party/data/7d/3f/7d3fd9804583dc224968c0591163ec91794552b04f00a6c2f42a15b68231d5a8.jpg",
|
||||
},
|
||||
|
||||
)
|
||||
|
@ -14,7 +14,7 @@ __tests__ = (
|
||||
"#url" : "https://www.deviantart.com/shimoda7",
|
||||
"#category": ("", "deviantart", "user"),
|
||||
"#class" : deviantart.DeviantartUserExtractor,
|
||||
"#pattern" : "/shimoda7/gallery$",
|
||||
"#urls" : "https://www.deviantart.com/shimoda7/gallery",
|
||||
},
|
||||
|
||||
{
|
||||
@ -22,8 +22,15 @@ __tests__ = (
|
||||
"#category": ("", "deviantart", "user"),
|
||||
"#class" : deviantart.DeviantartUserExtractor,
|
||||
"#options" : {"include": "all"},
|
||||
"#pattern" : "/shimoda7/(gallery(/scraps)?|posts(/statuses)?|favourites)$",
|
||||
"#count" : 5,
|
||||
"#urls" : (
|
||||
"https://www.deviantart.com/shimoda7/avatar",
|
||||
"https://www.deviantart.com/shimoda7/banner",
|
||||
"https://www.deviantart.com/shimoda7/gallery",
|
||||
"https://www.deviantart.com/shimoda7/gallery/scraps",
|
||||
"https://www.deviantart.com/shimoda7/posts",
|
||||
"https://www.deviantart.com/shimoda7/posts/statuses",
|
||||
"https://www.deviantart.com/shimoda7/favourites",
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
@ -195,6 +202,108 @@ __tests__ = (
|
||||
"#class" : deviantart.DeviantartGalleryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://deviantart.com/shimoda7/avatar",
|
||||
"#category": ("", "deviantart", "avatar"),
|
||||
"#class" : deviantart.DeviantartAvatarExtractor,
|
||||
"#urls" : "https://a.deviantart.net/avatars-big/s/h/shimoda7.jpg?4",
|
||||
"#sha1_content": "abf2cc79b842315f2e54bfdd93bf794a0f612b6f",
|
||||
|
||||
"author" : {
|
||||
"type" : "regular",
|
||||
"usericon": "https://a.deviantart.net/avatars/s/h/shimoda7.jpg?4",
|
||||
"userid" : "9AE51FC7-0278-806C-3FFF-F4961ABF9E2B",
|
||||
"username": "shimoda7",
|
||||
},
|
||||
"content" : {
|
||||
"src": "https://a.deviantart.net/avatars-big/s/h/shimoda7.jpg?4"
|
||||
},
|
||||
"da_category" : "avatar",
|
||||
"date" : "dt:1970-01-01 00:00:00",
|
||||
"extension" : "jpg",
|
||||
"filename" : "avatar_by_shimoda7-d4",
|
||||
"index" : 4,
|
||||
"index_base36" : "4",
|
||||
"is_deleted" : False,
|
||||
"is_downloadable": False,
|
||||
"is_original" : True,
|
||||
"published_time" : 0,
|
||||
"target" : {
|
||||
"extension": "jpg",
|
||||
"filename" : "avatar_by_shimoda7-d4",
|
||||
"src" : "https://a.deviantart.net/avatars-big/s/h/shimoda7.jpg?4"
|
||||
},
|
||||
"title" : "avatar",
|
||||
"username" : "shimoda7",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://deviantart.com/shimoda7/avatar",
|
||||
"#comment" : "'formats' option",
|
||||
"#category": ("", "deviantart", "avatar"),
|
||||
"#class" : deviantart.DeviantartAvatarExtractor,
|
||||
"#archive" : False,
|
||||
"#options" : {"formats": ["original.jpg", "big.jpg", "big.png", "big.gif"]},
|
||||
"#urls" : (
|
||||
"https://a.deviantart.net/avatars-original/s/h/shimoda7.jpg?4",
|
||||
"https://a.deviantart.net/avatars-big/s/h/shimoda7.jpg?4",
|
||||
"https://a.deviantart.net/avatars-big/s/h/shimoda7.png?4",
|
||||
"https://a.deviantart.net/avatars-big/s/h/shimoda7.gif?4",
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://deviantart.com/gdldev/banner",
|
||||
"#category": ("", "deviantart", "background"),
|
||||
"#class" : deviantart.DeviantartBackgroundExtractor,
|
||||
"#pattern" : r"https://wixmp-\w+\.wixmp\.com/f/b042e0ae-a7ff-420b-a41a-b35503427360/dgntyqc-3deebb65-04b4-4085-992a-aa0c0e7e225d\.png\?token=ey[\w.-]+$",
|
||||
"#sha1_content": "980eaa76ce515f1b6bef60dfadf26a5bbe9c583f",
|
||||
|
||||
"allows_comments" : True,
|
||||
"author" : {
|
||||
"type" : "regular",
|
||||
"usericon": "https://a.deviantart.net/avatars/g/d/gdldev.jpg?2",
|
||||
"userid" : "1A12BA26-33C2-AA0A-7678-0B6DFBA7AC8E",
|
||||
"username": "gdldev"
|
||||
},
|
||||
"category_path" : "",
|
||||
"content" : {
|
||||
"filename" : "banner_by_gdldev_dgntyqc.png",
|
||||
"filesize" : 84510,
|
||||
"height" : 4000,
|
||||
"src" : r"re:https://wixmp-\w+\.wixmp\.com/f/b042e0ae-a7ff-420b-a41a-b35503427360/dgntyqc-3deebb65-04b4-4085-992a-aa0c0e7e225d\.png\?token=ey[\w.-]+$",
|
||||
"transparency": False,
|
||||
"width" : 6400
|
||||
},
|
||||
"da_category" : "Uncategorized",
|
||||
"date" : "dt:2024-01-02 21:16:06",
|
||||
"deviationid" : "8C8D6B28-766A-DE21-7F7D-CE055C3BD50A",
|
||||
"download_filesize": 84510,
|
||||
"extension" : "png",
|
||||
"filename" : "banner_by_gdldev-dgntyqc",
|
||||
"index" : 1007488020,
|
||||
"index_base36" : "gntyqc",
|
||||
"is_blocked" : False,
|
||||
"is_deleted" : False,
|
||||
"is_downloadable" : True,
|
||||
"is_favourited" : False,
|
||||
"is_mature" : False,
|
||||
"is_original" : True,
|
||||
"is_published" : False,
|
||||
"preview" : dict,
|
||||
"printid" : None,
|
||||
"published_time" : 1704230166,
|
||||
"stats" : {
|
||||
"comments" : 0,
|
||||
"favourites": 0,
|
||||
},
|
||||
"target" : dict,
|
||||
"thumbs" : list,
|
||||
"title" : "Banner",
|
||||
"url" : "https://sta.sh/0198jippkeys",
|
||||
"username" : "gdldev",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.deviantart.com/shimoda7/gallery/722019/Miscellaneous",
|
||||
"#comment" : "user",
|
||||
|
@ -86,6 +86,43 @@ __tests__ = (
|
||||
"content": r"re:(?s)^Greetings from FANBOX.\n \nAs of Monday, September 5th, 2022, we are happy to announce the start of the FANBOX hashtag event #MySetupTour ! \nAbout the event\nTo join this event .+ \nPlease check this page for further details regarding the Privacy & Terms.\nhttps://fanbox.pixiv.help/.+/10184952456601\n\n\nThank you for your continued support of FANBOX.$",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://official-en.fanbox.cc/posts/7022572",
|
||||
"#comment" : "'plan' and 'user' metadata (#4921)",
|
||||
"#category": ("", "fanbox", "post"),
|
||||
"#class" : fanbox.FanboxPostExtractor,
|
||||
"#options" : {"metadata": True},
|
||||
|
||||
"plan": {
|
||||
"coverImageUrl" : "",
|
||||
"creatorId" : "official-en",
|
||||
"description" : "",
|
||||
"fee" : 0,
|
||||
"hasAdultContent": None,
|
||||
"id" : "",
|
||||
"paymentMethod" : None,
|
||||
"title" : "",
|
||||
},
|
||||
"user": {
|
||||
"coverImageUrl" : "https://pixiv.pximg.net/c/1620x580_90_a2_g5/fanbox/public/images/creator/74349833/cover/n9mX8q4tUXHXXj7sK1RPWyUu.jpeg",
|
||||
"creatorId" : "official-en",
|
||||
"description" : "This is the official English pixivFANBOX account! \n(official Japanese account: https://official.fanbox.cc/ )\n\npixivFANBOX is a subscription service for building a reliable fan community where creators can nurture creative lifestyles together with their fans.\nFollowers can be notified of the updates from their favorite creators they are following. Supporters can enjoy closer communication with creators through exclusive content and their latest information.\n",
|
||||
"hasAdultContent" : False,
|
||||
"hasBoothShop" : False,
|
||||
"iconUrl" : "https://pixiv.pximg.net/c/160x160_90_a2_g5/fanbox/public/images/user/74349833/icon/oJH0OoGoSixLrJXlnneNvC95.jpeg",
|
||||
"isAcceptingRequest": False,
|
||||
"isFollowed" : False,
|
||||
"isStopped" : False,
|
||||
"isSupported" : False,
|
||||
"name" : "pixivFANBOX English",
|
||||
"profileItems" : [],
|
||||
"profileLinks" : [
|
||||
"https://twitter.com/pixivfanbox",
|
||||
],
|
||||
"userId" : "74349833",
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://mochirong.fanbox.cc/posts/3746116",
|
||||
"#comment" : "imageMap file order (#2718)",
|
||||
|
144
test/results/hatenablog.py
Normal file
144
test/results/hatenablog.py
Normal file
@ -0,0 +1,144 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import hatenablog
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://cosmiclatte.hatenablog.com/entry/2020/05/28/003227",
|
||||
"#category": ("", "hatenablog", "entry"),
|
||||
"#class" : hatenablog.HatenablogEntryExtractor,
|
||||
"#count" : 20,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://moko0908.hatenablog.jp/entry/2023/12/31/083846",
|
||||
"#category": ("", "hatenablog", "entry"),
|
||||
"#class" : hatenablog.HatenablogEntryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://p-shirokuma.hatenadiary.com/entry/20231227/1703685600",
|
||||
"#category": ("", "hatenablog", "entry"),
|
||||
"#class" : hatenablog.HatenablogEntryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://urakatahero.hateblo.jp/entry/2ndlife",
|
||||
"#category": ("", "hatenablog", "entry"),
|
||||
"#class" : hatenablog.HatenablogEntryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "hatenablog:https://blog.hyouhon.com/entry/2023/12/22/133549",
|
||||
"#category": ("", "hatenablog", "entry"),
|
||||
"#class" : hatenablog.HatenablogEntryExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cetriolo.hatenablog.com",
|
||||
"#category": ("", "hatenablog", "home"),
|
||||
"#class" : hatenablog.HatenablogHomeExtractor,
|
||||
"#range" : "1-7",
|
||||
"#count" : 7,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://moko0908.hatenablog.jp/",
|
||||
"#category": ("", "hatenablog", "home"),
|
||||
"#class" : hatenablog.HatenablogHomeExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://p-shirokuma.hatenadiary.com/",
|
||||
"#category": ("", "hatenablog", "home"),
|
||||
"#class" : hatenablog.HatenablogHomeExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://urakatahero.hateblo.jp/",
|
||||
"#category": ("", "hatenablog", "home"),
|
||||
"#class" : hatenablog.HatenablogHomeExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "hatenablog:https://blog.hyouhon.com/",
|
||||
"#category": ("", "hatenablog", "home"),
|
||||
"#class" : hatenablog.HatenablogHomeExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : ("https://8saki.hatenablog.com/archive/category/%E3%82%BB%E3"
|
||||
"%83%AB%E3%83%95%E3%82%B8%E3%82%A7%E3%83%AB%E3%83%8D%E3%82"
|
||||
"%A4%E3%83%AB"),
|
||||
"#category": ("", "hatenablog", "archive"),
|
||||
"#class" : hatenablog.HatenablogArchiveExtractor,
|
||||
"#range" : "1-30",
|
||||
"#count" : 30,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://moko0908.hatenablog.jp/archive/2023",
|
||||
"#category": ("", "hatenablog", "archive"),
|
||||
"#class" : hatenablog.HatenablogArchiveExtractor,
|
||||
"#count" : 13,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://p-shirokuma.hatenadiary.com/archive/2023/01",
|
||||
"#category": ("", "hatenablog", "archive"),
|
||||
"#class" : hatenablog.HatenablogArchiveExtractor,
|
||||
"#count" : 5,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://urakatahero.hateblo.jp/archive",
|
||||
"#category": ("", "hatenablog", "archive"),
|
||||
"#class" : hatenablog.HatenablogArchiveExtractor,
|
||||
"#range" : "1-30",
|
||||
"#count" : 30,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "hatenablog:https://blog.hyouhon.com/archive/2024/01/01",
|
||||
"#category": ("", "hatenablog", "archive"),
|
||||
"#class" : hatenablog.HatenablogArchiveExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "hatenablog:https://blog.hyouhon.com/search?q=a",
|
||||
"#category": ("", "hatenablog", "search"),
|
||||
"#class" : hatenablog.HatenablogSearchExtractor,
|
||||
"#range" : "1-30",
|
||||
"#count" : 30,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://cosmiclatte.hatenablog.com/search?q=a",
|
||||
"#category": ("", "hatenablog", "search"),
|
||||
"#class" : hatenablog.HatenablogSearchExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://moko0908.hatenablog.jp/search?q=a",
|
||||
"#category": ("", "hatenablog", "search"),
|
||||
"#class" : hatenablog.HatenablogSearchExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://p-shirokuma.hatenadiary.com/search?q=a",
|
||||
"#category": ("", "hatenablog", "search"),
|
||||
"#class" : hatenablog.HatenablogSearchExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://urakatahero.hateblo.jp/search?q=a",
|
||||
"#category": ("", "hatenablog", "search"),
|
||||
"#class" : hatenablog.HatenablogSearchExtractor,
|
||||
},
|
||||
|
||||
)
|
@ -83,6 +83,7 @@ __tests__ = (
|
||||
|
||||
"artist_id" : 58000,
|
||||
"artist_name": "のえるわ",
|
||||
"count" : 1,
|
||||
"date" : "dt:2018-01-29 14:25:39",
|
||||
"description": "前回とシチュがまるかぶり \r\n竿野郎は塗るのだるかった",
|
||||
"extension" : "png",
|
||||
@ -113,9 +114,11 @@ __tests__ = (
|
||||
|
||||
"artist_id" : 58000,
|
||||
"artist_name": "のえるわ",
|
||||
"count" : 4,
|
||||
"date" : "dt:2018-02-04 14:47:24",
|
||||
"description": "ノエル「そんなことしなくても、言ってくれたら咥えるのに・・・♡」",
|
||||
"image_id" : 8716,
|
||||
"num" : range(0, 3),
|
||||
"tags" : [
|
||||
"男の娘",
|
||||
"フェラ",
|
||||
|
@ -9,7 +9,7 @@ from gallery_dl.extractor import idolcomplex
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://idol.sankakucomplex.com/?tags=lyumos",
|
||||
"#url" : "https://idol.sankakucomplex.com/en/posts?tags=lyumos",
|
||||
"#category": ("booru", "idolcomplex", "tag"),
|
||||
"#class" : idolcomplex.IdolcomplexTagExtractor,
|
||||
"#pattern" : r"https://i[sv]\.sankakucomplex\.com/data/[^/]{2}/[^/]{2}/[^/]{32}\.\w+\?e=\d+&m=[^&#]+",
|
||||
@ -17,6 +17,24 @@ __tests__ = (
|
||||
"#count" : 5,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://idol.sankakucomplex.com/posts/?tags=lyumos",
|
||||
"#category": ("booru", "idolcomplex", "tag"),
|
||||
"#class" : idolcomplex.IdolcomplexTagExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://idol.sankakucomplex.com/en/?tags=lyumos",
|
||||
"#category": ("booru", "idolcomplex", "tag"),
|
||||
"#class" : idolcomplex.IdolcomplexTagExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://idol.sankakucomplex.com/?tags=lyumos",
|
||||
"#category": ("booru", "idolcomplex", "tag"),
|
||||
"#class" : idolcomplex.IdolcomplexTagExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://idol.sankakucomplex.com/?tags=lyumos+wreath&page=3&next=694215",
|
||||
"#category": ("booru", "idolcomplex", "tag"),
|
||||
@ -30,6 +48,12 @@ __tests__ = (
|
||||
"#count" : 3,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://idol.sankakucomplex.com/en/pools/show/145",
|
||||
"#category": ("booru", "idolcomplex", "pool"),
|
||||
"#class" : idolcomplex.IdolcomplexPoolExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://idol.sankakucomplex.com/pool/show/145",
|
||||
"#category": ("booru", "idolcomplex", "pool"),
|
||||
@ -37,7 +61,7 @@ __tests__ = (
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://idol.sankakucomplex.com/en/posts/show/509eccbba54a43cea6b275a65b93c51d",
|
||||
"#url" : "https://idol.sankakucomplex.com/en/posts/509eccbba54a43cea6b275a65b93c51d",
|
||||
"#category": ("booru", "idolcomplex", "post"),
|
||||
"#class" : idolcomplex.IdolcomplexPostExtractor,
|
||||
"#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
|
||||
@ -45,7 +69,7 @@ __tests__ = (
|
||||
"created_at" : "2017-11-24 17:01:27.696",
|
||||
"date" : "dt:2017-11-24 17:01:27",
|
||||
"extension" : "jpg",
|
||||
"file_url" : r"re:https://is\.sankakucomplex\.com/data/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?",
|
||||
"file_url" : r"re:https://i[sv]\.sankakucomplex\.com/data/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?",
|
||||
"filename" : "509eccbba54a43cea6b275a65b93c51d",
|
||||
"height" : 683,
|
||||
"id" : 694215,
|
||||
@ -62,6 +86,12 @@ __tests__ = (
|
||||
"width" : 1024,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://idol.sankakucomplex.com/en/posts/show/509eccbba54a43cea6b275a65b93c51d",
|
||||
"#category": ("booru", "idolcomplex", "post"),
|
||||
"#class" : idolcomplex.IdolcomplexPostExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://idol.sankakucomplex.com/posts/509eccbba54a43cea6b275a65b93c51d",
|
||||
"#category": ("booru", "idolcomplex", "post"),
|
||||
|
@ -21,7 +21,7 @@ __tests__ = (
|
||||
"album_id" : "i5PggF",
|
||||
"album_name" : "British Scrap Book",
|
||||
"extension" : "jpg",
|
||||
"id" : "re:^\w{7}$",
|
||||
"id" : r"re:^\w{7}$",
|
||||
"title" : str,
|
||||
"url" : r"re:https://i\.ibb\.co/\w{7}/[\w-]+\.jpg",
|
||||
"user" : "folkie",
|
||||
|
@ -9,7 +9,7 @@ from gallery_dl.extractor import chevereto
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://jpg2.su/img/funnymeme.LecXGS",
|
||||
"#url" : "https://jpg4.su/img/funnymeme.LecXGS",
|
||||
"#category": ("chevereto", "jpgfish", "image"),
|
||||
"#class" : chevereto.CheveretoImageExtractor,
|
||||
"#urls" : "https://simp3.jpg.church/images/funnymeme.jpg",
|
||||
|
@ -177,6 +177,7 @@ __tests__ = (
|
||||
|
||||
"revision_id": 142470,
|
||||
"revision_index": 2,
|
||||
"revision_hash": "e0e93281495e151b11636c156e52bfe9234c2a40",
|
||||
},
|
||||
|
||||
{
|
||||
@ -190,6 +191,7 @@ __tests__ = (
|
||||
|
||||
"revision_id": range(134996, 3052965),
|
||||
"revision_index": range(1, 9),
|
||||
"revision_hash": r"re:^[0-9a-f]{40}$",
|
||||
},
|
||||
|
||||
|
||||
@ -297,6 +299,7 @@ __tests__ = (
|
||||
"#category": ("", "kemonoparty", "favorite"),
|
||||
"#class" : kemonoparty.KemonopartyFavoriteExtractor,
|
||||
"#pattern" : kemonoparty.KemonopartyUserExtractor.pattern,
|
||||
"#auth" : True,
|
||||
"#count" : 3,
|
||||
"#sha1_url": "f4b5b796979bcba824af84206578c79101c7f0e1",
|
||||
},
|
||||
@ -306,6 +309,7 @@ __tests__ = (
|
||||
"#category": ("", "kemonoparty", "favorite"),
|
||||
"#class" : kemonoparty.KemonopartyFavoriteExtractor,
|
||||
"#pattern" : kemonoparty.KemonopartyPostExtractor.pattern,
|
||||
"#auth" : True,
|
||||
"#count" : 3,
|
||||
"#sha1_url": "ecfccf5f0d50b8d14caa7bbdcf071de5c1e5b90f",
|
||||
},
|
||||
@ -315,6 +319,7 @@ __tests__ = (
|
||||
"#category": ("", "kemonoparty", "favorite"),
|
||||
"#class" : kemonoparty.KemonopartyFavoriteExtractor,
|
||||
"#pattern" : kemonoparty.KemonopartyPostExtractor.pattern,
|
||||
"#auth" : True,
|
||||
"#count" : 3,
|
||||
"#sha1_url": "4be8e84cb384a907a8e7997baaf6287b451783b5",
|
||||
},
|
||||
|
@ -8,19 +8,48 @@ from gallery_dl.extractor import komikcast
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://komikcast.lol/chapter/apotheosis-chapter-02-2-bahasa-indonesia/",
|
||||
"#category": ("", "komikcast", "chapter"),
|
||||
"#class" : komikcast.KomikcastChapterExtractor,
|
||||
"#pattern" : r"https://svr\d+\.imgkc\d+\.my\.id/wp-content/img/A/Apotheosis/002-2/\d{3}\.jpg",
|
||||
"#count" : 18,
|
||||
|
||||
"chapter" : 2,
|
||||
"chapter_minor": ".2",
|
||||
"count" : 18,
|
||||
"extension": "jpg",
|
||||
"filename" : r"re:0\d{2}",
|
||||
"lang" : "id",
|
||||
"language" : "Indonesian",
|
||||
"manga" : "Apotheosis",
|
||||
"page" : range(1, 18),
|
||||
"title" : "",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://komikcast.site/chapter/apotheosis-chapter-02-2-bahasa-indonesia/",
|
||||
"#category": ("", "komikcast", "chapter"),
|
||||
"#class" : komikcast.KomikcastChapterExtractor,
|
||||
"#sha1_url" : "f6b43fbc027697749b3ea1c14931c83f878d7936",
|
||||
"#sha1_metadata": "f3938e1aff9ad1f302f52447e9781b21f6da26d4",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://komikcast.me/chapter/apotheosis-chapter-02-2-bahasa-indonesia/",
|
||||
"#category": ("", "komikcast", "chapter"),
|
||||
"#class" : komikcast.KomikcastChapterExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://komikcast.com/chapter/apotheosis-chapter-02-2-bahasa-indonesia/",
|
||||
"#category": ("", "komikcast", "chapter"),
|
||||
"#class" : komikcast.KomikcastChapterExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://komikcast.me/chapter/soul-land-ii-chapter-300-1-bahasa-indonesia/",
|
||||
"#category": ("", "komikcast", "chapter"),
|
||||
"#class" : komikcast.KomikcastChapterExtractor,
|
||||
"#sha1_url" : "efd00a9bd95461272d51990d7bc54b79ff3ff2e6",
|
||||
"#sha1_url" : "f2674e31b41a7f009f2f292652be2aefb6612d3f",
|
||||
"#sha1_metadata": "cb646cfed3d45105bd645ab38b2e9f7d8c436436",
|
||||
},
|
||||
|
||||
@ -28,8 +57,22 @@ __tests__ = (
|
||||
"#url" : "https://komikcast.site/komik/090-eko-to-issho/",
|
||||
"#category": ("", "komikcast", "manga"),
|
||||
"#class" : komikcast.KomikcastMangaExtractor,
|
||||
"#sha1_url" : "19d3d50d532e84be6280a3d61ff0fd0ca04dd6b4",
|
||||
"#sha1_metadata": "837a7e96867344ff59d840771c04c20dc46c0ab1",
|
||||
"#pattern" : komikcast.KomikcastChapterExtractor.pattern,
|
||||
"#count" : 12,
|
||||
|
||||
"author" : "Asakura Maru",
|
||||
"chapter": range(1, 12),
|
||||
"chapter_minor": "",
|
||||
"genres" : [
|
||||
"Comedy",
|
||||
"Drama",
|
||||
"Romance",
|
||||
"School Life",
|
||||
"Sci-Fi",
|
||||
"Shounen"
|
||||
],
|
||||
"manga" : "090 Eko to Issho",
|
||||
"type" : "Manga",
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -113,4 +113,31 @@ __tests__ = (
|
||||
"#class" : mangadex.MangadexFeedExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://mangadex.org/list/3a0982c5-65aa-4de2-8a4a-2175be7383ab/test",
|
||||
"#category": ("", "mangadex", "list"),
|
||||
"#class" : mangadex.MangadexListExtractor,
|
||||
"#urls" : (
|
||||
"https://mangadex.org/title/cba4e5d6-67a0-47a0-b37a-c06e9bf25d93",
|
||||
"https://mangadex.org/title/cad76ec6-ca22-42f6-96f8-eca164da6545",
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://mangadex.org/list/3a0982c5-65aa-4de2-8a4a-2175be7383ab/test?tab=titles",
|
||||
"#category": ("", "mangadex", "list"),
|
||||
"#class" : mangadex.MangadexListExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://mangadex.org/list/3a0982c5-65aa-4de2-8a4a-2175be7383ab/test?tab=feed",
|
||||
"#category": ("", "mangadex", "list-feed"),
|
||||
"#class" : mangadex.MangadexListExtractor,
|
||||
"#urls" : (
|
||||
"https://mangadex.org/chapter/c765d6d5-5712-4360-be0b-0c8e0914fc94",
|
||||
"https://mangadex.org/chapter/fa8a695d-260f-4dcc-95a3-1f30e66d6571",
|
||||
"https://mangadex.org/chapter/788766b9-41c6-422e-97ba-552f03ba9655",
|
||||
),
|
||||
},
|
||||
|
||||
)
|
||||
|
@ -31,12 +31,13 @@ __tests__ = (
|
||||
|
||||
"artist_id" : 44,
|
||||
"artist_name": "ED",
|
||||
"count" : 1,
|
||||
"date" : datetime.datetime,
|
||||
"description": str,
|
||||
"extension" : "jpg",
|
||||
"filename" : str,
|
||||
"image_id" : int,
|
||||
"num" : int,
|
||||
"num" : 0,
|
||||
"tags" : list,
|
||||
"title" : str,
|
||||
"url" : r"re:https://pic.nijie.net/\d+/nijie/.*jpg$",
|
||||
@ -102,11 +103,12 @@ __tests__ = (
|
||||
"#class" : nijie.NijieImageExtractor,
|
||||
"#urls" : "https://pic.nijie.net/06/nijie/14/44/44/illust/0_0_28e8c02d921bee33_9222d3.jpg",
|
||||
"#sha1_url" : "3d654e890212ba823c9647754767336aebc0a743",
|
||||
"#sha1_metadata": "41da5d0e178b04f01fe72460185df52fadc3c91b",
|
||||
"#sha1_metadata": "58e716bcb03b431cae901178c198c787908e1c0c",
|
||||
"#sha1_content" : "d85e3ea896ed5e4da0bca2390ad310a4df716ca6",
|
||||
|
||||
"artist_id" : 44,
|
||||
"artist_name": "ED",
|
||||
"count" : 1,
|
||||
"date" : "dt:2014-01-18 19:58:21",
|
||||
"description": "租絵にてお邪魔いたし候\r\n是非ともこの”おっぱい”をご高覧賜りたく馳せ参じた次第\r\n長文にて失礼仕る\r\n\r\nまず全景でありますが、首を右に傾けてみて頂きたい\r\nこの絵図は茶碗を眺めていた私が思わぬ美しさにて昇天したときのものを、筆をとり、したためたものである(トレースではない)\r\n筆は疾風の如く走り、半刻過ぎには私好みの”おっぱい”になっていたのである!\r\n次に細部をみて頂きたい\r\n絵図を正面から見直して頂くと、なんとはんなりと美しいお椀型をしたおっぱいであろうか 右手から緩やかに生まれる曲線は左手に進むにつれて、穏やかな歪みを含み流れる これは所謂轆轤目であるが三重の紐でおっぱいをぐるぐると巻きつけた情景そのままであり、この歪みから茶碗の均整は崩れ、たぷんたぷんのおっぱいの重量感を醸し出している!\r\nさらに左手に進めば梅花皮(カイラギ)を孕んだ高大が現れる 今回は点線にて表現するが、その姿は乳首から母乳が噴出するが如く 或は精子をぶっかけられたが如く 白くとろっとした釉薬の凝固が素晴しい景色をつくりだしているのである!\r\n最後には極めつけ、すくっと螺旋を帯びながらそそり立つ兜巾(ときん)!この情景はまさしく乳首である! 全体をふんわりと盛り上げさせる乳輪にちょこっと存在する乳頭はぺろりと舌で確かめ勃起させたくなる風情がある!\r\n\r\nこれを”おっぱい”と呼ばずなんと呼ぼうや!?\r\n\r\n興奮のあまり失礼致した\r\n御免",
|
||||
"extension" : "jpg",
|
||||
@ -133,6 +135,7 @@ __tests__ = (
|
||||
|
||||
"artist_id" : 49509,
|
||||
"artist_name": "黒川 竜",
|
||||
"count" : 4,
|
||||
"date" : "dt:2023-12-02 04:19:29",
|
||||
"description": "【DLサイトコム】ウィンターセール 30%OFF\r\n期間:2024年2月14日まで\r\n【toloveるドリンク】\r\nhttps://www.dlsite.com/maniax/work/=/product_id/RJ042727.html\r\n【toloveるドリンク2】\r\nhttps://www.dlsite.com/maniax/work/=/product_id/RJ043289.html\r\n【クランクランBIG】\r\nhttps://www.dlsite.com/maniax/work/=/product_id/RJ043564.html",
|
||||
"image_id" : 594044,
|
||||
@ -154,6 +157,14 @@ __tests__ = (
|
||||
"user_name" : "黒川 竜",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://nijie.info/view.php?id=37078",
|
||||
"#comment" : "'view_side_dojin' thumbnails (#5049)",
|
||||
"#category": ("Nijie", "nijie", "image"),
|
||||
"#class" : nijie.NijieImageExtractor,
|
||||
"#urls" : "https://pic.nijie.net/03/nijie/13/98/498/illust/0_0_703023d18ca8d058_bca943.jpg",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://nijie.info/view.php?id=70724",
|
||||
"#category": ("Nijie", "nijie", "image"),
|
||||
|
@ -41,10 +41,8 @@ __tests__ = (
|
||||
"#category": ("nitter", "nitter.1d4.us", "tweet"),
|
||||
"#class" : nitter.NitterTweetExtractor,
|
||||
|
||||
"content": r"""re:Gear up for #PokemonSwordShieldEX with special Mystery Gifts!
|
||||
|
||||
You’ll be able to receive four Galarian form Pokémon with Hidden Abilities, plus some very useful items. It’s our \(Mystery\) Gift to you, Trainers!
|
||||
|
||||
"content": r"""re:Gear up for #PokemonSwordShieldEX with special Mystery Gifts! \n
|
||||
You’ll be able to receive four Galarian form Pokémon with Hidden Abilities, plus some very useful items. It’s our \(Mystery\) Gift to you, Trainers! \n
|
||||
❓🎁➡️ """,
|
||||
},
|
||||
|
||||
|
@ -1,56 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import nudecollect
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.nudecollect.com/content/20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust/image-4-pics-108-mirror-43.html",
|
||||
"#category": ("", "nudecollect", "image"),
|
||||
"#class" : nudecollect.NudecollectImageExtractor,
|
||||
"#pattern" : r"https://mirror\d+\.nudecollect\.com/showimage/nudecollect-8769086487/image00004-5896498214-43-9689595623/20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust/9879560327/nudecollect\.com\.jpg",
|
||||
|
||||
"slug" : "20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust",
|
||||
"title" : "20201220 Teenpornstorage Patritcy Vanessa Lesbian Lust",
|
||||
"num" : 4,
|
||||
"count" : 108,
|
||||
"mirror": 43,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.nudecollect.com/content/20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust/image-10-pics-108-mirror-43.html",
|
||||
"#category": ("", "nudecollect", "image"),
|
||||
"#class" : nudecollect.NudecollectImageExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.nudecollect.com/content/20170219_TheWhiteBoxxx_Caprice_Tracy_Loves_Hot_ass_fingering_and_sensual_lesbian_sex_with_alluring_Czech_babes_x125_1080px/index-mirror-67-125.html",
|
||||
"#category": ("", "nudecollect", "album"),
|
||||
"#class" : nudecollect.NudecollectAlbumExtractor,
|
||||
"#pattern" : r"https://mirror\d+\.nudecollect\.com/showimage/nudecollect-8769086487/image00\d\d\d-5896498214-67-9689595623/20170219_TheWhiteBoxxx_Caprice_Tracy_Loves_Hot_ass_fingering_and_sensual_lesbian_sex_with_alluring_Czech_babes_x125_1080px/9879560327/nudecollect\.com\.jpg",
|
||||
"#count" : 125,
|
||||
|
||||
"slug" : "20170219_TheWhiteBoxxx_Caprice_Tracy_Loves_Hot_ass_fingering_and_sensual_lesbian_sex_with_alluring_Czech_babes_x125_1080px",
|
||||
"title" : "20170219 TheWhiteBoxxx Caprice Tracy Loves Hot ass fingering and sensual lesbian sex with alluring Czech babes x125 1080px",
|
||||
"num" : int,
|
||||
"mirror": 67,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.nudecollect.com/content/20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust/page-1-pics-108-mirror-43.html",
|
||||
"#category": ("", "nudecollect", "album"),
|
||||
"#class" : nudecollect.NudecollectAlbumExtractor,
|
||||
"#pattern" : r"https://mirror\d+\.nudecollect\.com/showimage/nudecollect-8769086487/image00\d\d\d-5896498214-43-9689595623/20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust/9879560327/nudecollect\.com\.jpg",
|
||||
"#count" : 108,
|
||||
|
||||
"slug" : "20201220_Teenpornstorage_Patritcy_Vanessa_Lesbian_Lust",
|
||||
"title" : "20201220 Teenpornstorage Patritcy Vanessa Lesbian Lust",
|
||||
"num" : int,
|
||||
"mirror": 43,
|
||||
},
|
||||
|
||||
)
|
@ -12,7 +12,7 @@ __tests__ = (
|
||||
"#url" : "https://rule34.paheal.net/post/list/Ayane_Suzuki/1",
|
||||
"#category": ("shimmie2", "paheal", "tag"),
|
||||
"#class" : paheal.PahealTagExtractor,
|
||||
"#pattern" : "https://[^.]+\.paheal\.net/_images/\w+/\d+%20-%20|https://r34i\.paheal-cdn\.net/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}$",
|
||||
"#pattern" : r"https://[^.]+\.paheal\.net/_images/\w+/\d+%20-%20|https://r34i\.paheal-cdn\.net/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}$",
|
||||
"#count" : range(70, 200),
|
||||
|
||||
"date" : "type:datetime",
|
||||
|
@ -103,6 +103,14 @@ __tests__ = (
|
||||
"tags": ["AWMedia"],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.patreon.com/posts/meu8-94714289",
|
||||
"#category": ("", "patreon", "post"),
|
||||
"#class" : patreon.PatreonPostExtractor,
|
||||
"#range" : "2",
|
||||
"#pattern" : r"ytdl:https://stream\.mux\.com/NLrxTLdxyGStpOgapJAtB8uPGAaokEcj8YovML00y2DY\.m3u8\?token=ey.+",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.patreon.com/posts/not-found-123",
|
||||
"#category": ("", "patreon", "post"),
|
||||
|
@ -71,58 +71,58 @@ __tests__ = (
|
||||
"#pattern" : r"https://img2\.pillowfort\.social/posts/c8e834bc09e6_Brandee\.png",
|
||||
"#count" : 1,
|
||||
|
||||
"avatar_frame" : None,
|
||||
"avatar_id" : None,
|
||||
"avatar_url" : "https://img3.pillowfort.social/avatars/000/037/139/original/437.jpg?1545015697",
|
||||
"b2_lg_url" : "https://img2.pillowfort.social/posts/c8e834bc09e6_Brandee.png",
|
||||
"b2_sm_url" : "https://img2.pillowfort.social/posts/c8e834bc09e6_Brandee_small.png",
|
||||
"cached_tag_list": "art, digital art, mermaid, mermaids, underwater, seaweed, illustration, speed paint",
|
||||
"col" : 0,
|
||||
"comm_screening_status": "not_applicable",
|
||||
"commentable" : True,
|
||||
"comments_count": 0,
|
||||
"community_id" : None,
|
||||
"concealed_comment_warning": None,
|
||||
"content" : "<p>Sea Bed</p>",
|
||||
"created_at" : r"re:2020-02-.+",
|
||||
"currentuser_default_avatar_url": None,
|
||||
"currentuser_multi_avi": None,
|
||||
"date" : "dt:2020-02-29 17:09:03",
|
||||
"deleted" : None,
|
||||
"deleted_at" : None,
|
||||
"deleted_by_mod": None,
|
||||
"deleted_for_flag_id": None,
|
||||
"embed_code" : None,
|
||||
"extension" : "png",
|
||||
"filename" : "Brandee",
|
||||
"hash" : "c8e834bc09e6",
|
||||
"id" : 720167,
|
||||
"last_activity" : r"re:2020-02-.+",
|
||||
"last_activity_elapsed": r"re:\d+ months",
|
||||
"last_edited_at": None,
|
||||
"likes_count" : 8,
|
||||
"media_type" : "picture",
|
||||
"nsfw" : False,
|
||||
"num" : 1,
|
||||
"original_post_id": None,
|
||||
"original_post_user_id": None,
|
||||
"pic_row_last" : 1,
|
||||
"picture_content_type": None,
|
||||
"picture_file_name": None,
|
||||
"picture_file_size": None,
|
||||
"picture_updated_at": None,
|
||||
"post_id" : 1124584,
|
||||
"post_type" : "picture",
|
||||
"privacy" : "public",
|
||||
"reblog_copy_info": [],
|
||||
"rebloggable" : True,
|
||||
"reblogged_from_post_id": None,
|
||||
"reblogged_from_user_id": None,
|
||||
"reblogs_count" : int,
|
||||
"row" : 1,
|
||||
"small_image_url": None,
|
||||
"tag_list" : None,
|
||||
"tags" : [
|
||||
"avatar_frame" : None,
|
||||
"avatar_id" : None,
|
||||
"avatar_url" : "https://img3.pillowfort.social/avatars/000/037/139/original/437.jpg?1545015697",
|
||||
"b2_lg_url" : "https://img2.pillowfort.social/posts/c8e834bc09e6_Brandee.png",
|
||||
"b2_sm_url" : "https://img2.pillowfort.social/posts/c8e834bc09e6_Brandee_small.png",
|
||||
"cached_tag_list": "art, digital art, mermaid, mermaids, underwater, seaweed, illustration, speed paint",
|
||||
"col" : 0,
|
||||
"comm_screening_status": "not_applicable",
|
||||
"commentable" : True,
|
||||
"comments_count": 0,
|
||||
"community_id" : None,
|
||||
"concealed_comment_warning": None,
|
||||
"content" : "<p>Sea Bed</p>",
|
||||
"created_at" : r"re:2020-02-.+",
|
||||
"currentuser_default_avatar_url": None,
|
||||
"currentuser_multi_avi": None,
|
||||
"date" : "dt:2020-02-29 17:09:03",
|
||||
"deleted" : None,
|
||||
"deleted_at" : None,
|
||||
"deleted_by_mod": None,
|
||||
"deleted_for_flag_id": None,
|
||||
"embed_code" : None,
|
||||
"extension" : "png",
|
||||
"filename" : "Brandee",
|
||||
"hash" : "c8e834bc09e6",
|
||||
"id" : 720167,
|
||||
"last_activity" : r"re:2020-02-.+",
|
||||
"last_activity_elapsed": r"re:\d+ months",
|
||||
"last_edited_at": None,
|
||||
"likes_count" : 8,
|
||||
"media_type" : "picture",
|
||||
"nsfw" : False,
|
||||
"num" : 1,
|
||||
"original_post_id": None,
|
||||
"original_post_user_id": None,
|
||||
"pic_row_last" : 1,
|
||||
"picture_content_type": None,
|
||||
"picture_file_name": None,
|
||||
"picture_file_size": None,
|
||||
"picture_updated_at": None,
|
||||
"post_id" : 1124584,
|
||||
"post_type" : "picture",
|
||||
"privacy" : "public",
|
||||
"reblog_copy_info": [],
|
||||
"rebloggable" : True,
|
||||
"reblogged_from_post_id": None,
|
||||
"reblogged_from_user_id": None,
|
||||
"reblogs_count" : int,
|
||||
"row" : 1,
|
||||
"small_image_url": None,
|
||||
"tag_list" : None,
|
||||
"tags" : [
|
||||
"art",
|
||||
"digital art",
|
||||
"mermaid",
|
||||
@ -130,16 +130,16 @@ __tests__ = (
|
||||
"underwater",
|
||||
"seaweed",
|
||||
"illustration",
|
||||
"speed paint"
|
||||
],
|
||||
"time_elapsed" : r"re:\d+ months",
|
||||
"timestamp" : str,
|
||||
"title" : "",
|
||||
"updated_at" : r"re:2020-02-.+",
|
||||
"url" : "",
|
||||
"user_concealed": None,
|
||||
"user_id" : 37201,
|
||||
"username" : "Maclanahan",
|
||||
"speed paint",
|
||||
],
|
||||
"time_elapsed" : r"re:\d+ months",
|
||||
"timestamp" : str,
|
||||
"title" : "",
|
||||
"updated_at" : r"re:2020-02-.+",
|
||||
"url" : "",
|
||||
"user_concealed": None,
|
||||
"user_id" : 37201,
|
||||
"username" : "Maclanahan",
|
||||
},
|
||||
|
||||
{
|
||||
|
54
test/results/poringa.py
Normal file
54
test/results/poringa.py
Normal file
@ -0,0 +1,54 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import poringa
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "http://www.poringa.net/posts/imagenes/3051081/Turrita-alto-ojete.html",
|
||||
"#category": ("", "poringa", "post"),
|
||||
"#class" : poringa.PoringaPostExtractor,
|
||||
"#count" : 26,
|
||||
|
||||
"count" : 26,
|
||||
"num" : range(1, 26),
|
||||
"post_id" : "3051081",
|
||||
"title" : "turrita alto ojete...",
|
||||
"user" : "vipower1top",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "http://www.poringa.net/posts/imagenes/3095554/Otra-culona-de-instagram.html",
|
||||
"#category": ("", "poringa", "post"),
|
||||
"#class" : poringa.PoringaPostExtractor,
|
||||
"#count" : 15,
|
||||
|
||||
"count" : 15,
|
||||
"num" : range(1, 15),
|
||||
"post_id" : "3095554",
|
||||
"title" : "Otra culona de instagram",
|
||||
"user" : "Expectro007",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "http://www.poringa.net/Expectro007",
|
||||
"#category": ("", "poringa", "user"),
|
||||
"#class" : poringa.PoringaUserExtractor,
|
||||
"#pattern" : r"https?://img-\d+\.poringa\.net/poringa/img/././././././Expectro007/\w{3}\.(jpg|png|gif)",
|
||||
"#count" : range(500, 600),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "http://www.poringa.net/buscar/?&q=yuslopez",
|
||||
"#category": ("", "poringa", "search"),
|
||||
"#class" : poringa.PoringaSearchExtractor,
|
||||
"#pattern" : r"https?://img-\d+\.poringa\.net/poringa/img/././././././\w+/\w{3}\.(jpg|png|gif)",
|
||||
"#range" : "1-50",
|
||||
"#count" : 50,
|
||||
},
|
||||
|
||||
)
|
@ -21,7 +21,7 @@ __tests__ = (
|
||||
"#category": ("postmill", "raddle.me", "forum"),
|
||||
"#class" : postmill.PostmillForumExtractor,
|
||||
"#count" : 1,
|
||||
"#pattern" : "^https://raddle\.me/f/traa/156646/click-here-to-go-to-f-traaaaaaannnnnnnnnns$",
|
||||
"#pattern" : r"^https://raddle\.me/f/traa/156646/click-here-to-go-to-f-traaaaaaannnnnnnnnns$",
|
||||
},
|
||||
|
||||
{
|
||||
@ -97,7 +97,7 @@ __tests__ = (
|
||||
"#comment" : "Link + text post (with text disabled)",
|
||||
"#category": ("postmill", "raddle.me", "post"),
|
||||
"#class" : postmill.PostmillPostExtractor,
|
||||
"#pattern" : "^https://fantasyanime\.com/anime/neo-tokyo-dub$",
|
||||
"#pattern" : r"^https://fantasyanime\.com/anime/neo-tokyo-dub$",
|
||||
"#count" : 1,
|
||||
},
|
||||
|
||||
|
@ -34,6 +34,13 @@ __tests__ = (
|
||||
"#count" : 3,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.rule34.xxx/index.php?page=post&s=view&id=863",
|
||||
"#comment" : "www subdomain",
|
||||
"#category": ("gelbooru_v02", "rule34", "post"),
|
||||
"#class" : gelbooru_v02.GelbooruV02PostExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://rule34.xxx/index.php?page=post&s=view&id=863",
|
||||
"#category": ("gelbooru_v02", "rule34", "post"),
|
||||
|
79
test/results/snootbooru.py
Normal file
79
test/results/snootbooru.py
Normal file
@ -0,0 +1,79 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import szurubooru
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://snootbooru.com/posts/query=sport",
|
||||
"#category": ("szurubooru", "snootbooru", "tag"),
|
||||
"#class" : szurubooru.SzurubooruTagExtractor,
|
||||
"#pattern" : r"https://snootbooru\.com/data/posts/\d+_[0-9a-f]{16}\.\w+",
|
||||
"#count" : range(35, 50),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://snootbooru.com/post/14511",
|
||||
"#category": ("szurubooru", "snootbooru", "post"),
|
||||
"#class" : szurubooru.SzurubooruPostExtractor,
|
||||
"#urls" : "https://snootbooru.com/data/posts/14511_e753313112755da6.png",
|
||||
"#sha1_content": "e69e61e61c5372514808480aae3a8e355c9cd6fb",
|
||||
|
||||
"canvasHeight" : 1000,
|
||||
"canvasWidth" : 1414,
|
||||
"checksum" : "e69e61e61c5372514808480aae3a8e355c9cd6fb",
|
||||
"checksumMD5" : "f4f4ddfcbdf367f466ede0980acb3d7d",
|
||||
"commentCount" : int,
|
||||
"comments" : list,
|
||||
"contentUrl" : "data/posts/14511_e753313112755da6.png",
|
||||
"creationTime" : "2023-12-02T01:11:01.433664Z",
|
||||
"date" : "dt:2023-12-02 01:11:01",
|
||||
"extension" : "png",
|
||||
"favoriteCount": int,
|
||||
"favoritedBy" : list,
|
||||
"featureCount" : int,
|
||||
"fileSize" : 270639,
|
||||
"filename" : "14511_e753313112755da6",
|
||||
"flags" : [],
|
||||
"hasCustomThumbnail": False,
|
||||
"id" : 14511,
|
||||
"lastEditTime" : "2023-12-02T01:12:09.500217Z",
|
||||
"lastFeatureTime": None,
|
||||
"mimeType" : "image/png",
|
||||
"noteCount" : 0,
|
||||
"notes" : [],
|
||||
"ownFavorite" : False,
|
||||
"ownScore" : 0,
|
||||
"pools" : [],
|
||||
"relationCount": 0,
|
||||
"relations" : [],
|
||||
"safety" : "safe",
|
||||
"score" : 0,
|
||||
"source" : None,
|
||||
"tagCount" : 3,
|
||||
"tags" : [
|
||||
"transparent",
|
||||
"sport",
|
||||
"text",
|
||||
],
|
||||
"tags_default" : [
|
||||
"sport",
|
||||
"text"
|
||||
],
|
||||
"tags_type" : [
|
||||
"transparent"
|
||||
],
|
||||
"thumbnailUrl" : "data/generated-thumbnails/14511_e753313112755da6.jpg",
|
||||
"type" : "image",
|
||||
"user" : {
|
||||
"avatarUrl": "data/avatars/komp.png",
|
||||
"name": "komp"
|
||||
},
|
||||
"version" : 2,
|
||||
},
|
||||
|
||||
)
|
124
test/results/steamgriddb.py
Normal file
124
test/results/steamgriddb.py
Normal file
@ -0,0 +1,124 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import steamgriddb
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/grid/368023",
|
||||
"#category": ("", "steamgriddb", "asset"),
|
||||
"#class" : steamgriddb.SteamgriddbAssetExtractor,
|
||||
"#urls" : ("https://cdn2.steamgriddb.com/grid/"
|
||||
"82fee171d62c044898d99ba0fddeb203.png"),
|
||||
"#count" : 1,
|
||||
"#sha1_content": "0bffaccae6f35f9fab529684a5b158d1cec4186b",
|
||||
|
||||
"game": {
|
||||
"id" : 5259324,
|
||||
"name": "Helltaker",
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/grid/132605",
|
||||
"#category": ("", "steamgriddb", "asset"),
|
||||
"#class" : steamgriddb.SteamgriddbAssetExtractor,
|
||||
"#count" : 2,
|
||||
"#sha1_url" : "4ff9158c008a1f01921d7553bcabf5e6204cdc79",
|
||||
"#sha1_content": "bc16c5eebf71463abdb33cfbf4b45a2fe092a2b2",
|
||||
|
||||
"game": {
|
||||
"id" : 5247997,
|
||||
"name": "OMORI",
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/grid/132605",
|
||||
"#category": ("", "steamgriddb", "asset"),
|
||||
"#class" : steamgriddb.SteamgriddbAssetExtractor,
|
||||
"#options" : {"download-fake-png": False},
|
||||
"#count" : 1,
|
||||
"#sha1_url" : "f6819c593ff65f15864796fb89581f05d21adddb",
|
||||
"#sha1_content": "0d9e6114dd8bb9699182fbb7c6bd9064d8b0b6cd",
|
||||
|
||||
"game": {
|
||||
"id" : 5247997,
|
||||
"name": "OMORI",
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/hero/61104",
|
||||
"#category": ("", "steamgriddb", "asset"),
|
||||
"#class" : steamgriddb.SteamgriddbAssetExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/logo/9610",
|
||||
"#category": ("", "steamgriddb", "asset"),
|
||||
"#class" : steamgriddb.SteamgriddbAssetExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/icon/173",
|
||||
"#category": ("", "steamgriddb", "asset"),
|
||||
"#class" : steamgriddb.SteamgriddbAssetExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/game/5259324/grids",
|
||||
"#category": ("", "steamgriddb", "grids"),
|
||||
"#class" : steamgriddb.SteamgriddbGridsExtractor,
|
||||
"#range" : "1-10",
|
||||
"#count" : 10,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/game/5259324/grids",
|
||||
"#category": ("", "steamgriddb", "grids"),
|
||||
"#class" : steamgriddb.SteamgriddbGridsExtractor,
|
||||
"#options" : {"humor": False, "epilepsy": False, "untagged": False},
|
||||
"#range" : "1-33",
|
||||
"#count" : 33,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/game/5331605/heroes",
|
||||
"#category": ("", "steamgriddb", "heroes"),
|
||||
"#class" : steamgriddb.SteamgriddbHeroesExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/game/5255394/logos",
|
||||
"#category": ("", "steamgriddb", "logos"),
|
||||
"#class" : steamgriddb.SteamgriddbLogosExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/game/5279790/icons",
|
||||
"#category": ("", "steamgriddb", "icons"),
|
||||
"#class" : steamgriddb.SteamgriddbIconsExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/collection/332/grids",
|
||||
"#category": ("", "steamgriddb", "grids"),
|
||||
"#class" : steamgriddb.SteamgriddbGridsExtractor,
|
||||
"#range" : "1-10",
|
||||
"#count" : 10,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.steamgriddb.com/collection/332/heroes",
|
||||
"#category": ("", "steamgriddb", "heroes"),
|
||||
"#class" : steamgriddb.SteamgriddbHeroesExtractor,
|
||||
"#options" : {"animated": False},
|
||||
"#count" : 0,
|
||||
},
|
||||
|
||||
)
|
@ -81,7 +81,7 @@ __tests__ = (
|
||||
"full" : "https://images.unsplash.com/photo-1601823984263-b87b59798b70?crop=entropy&cs=srgb&fm=jpg&ixid=M3wxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNzAwODY2NDE4fA&ixlib=rb-4.0.3&q=85",
|
||||
"raw" : "https://images.unsplash.com/photo-1601823984263-b87b59798b70?ixid=M3wxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNzAwODY2NDE4fA&ixlib=rb-4.0.3",
|
||||
"regular" : "https://images.unsplash.com/photo-1601823984263-b87b59798b70?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=M3wxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNzAwODY2NDE4fA&ixlib=rb-4.0.3&q=80&w=1080",
|
||||
"small" : "https://images.unsplash.com/photo-1601823984263-b87b59798b70?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=M3wxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNzAwODY2NDE4fA&ixlib=rb-4.0.3&q=80&w=400",
|
||||
"small" : "https://images.unsplash.com/photo-1601823984263-b87b59798b70?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=M3wxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNzAwODY2NDE4fA&ixlib=rb-4.0.3&q=80&w=400",
|
||||
"small_s3": "https://s3.us-west-2.amazonaws.com/images.unsplash.com/small/photo-1601823984263-b87b59798b70",
|
||||
"thumb" : "https://images.unsplash.com/photo-1601823984263-b87b59798b70?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=M3wxMjA3fDB8MXxhbGx8fHx8fHx8fHwxNzAwODY2NDE4fA&ixlib=rb-4.0.3&q=80&w=200",
|
||||
},
|
||||
|
@ -20,6 +20,22 @@ __tests__ = (
|
||||
"42055e44659f6ffc410b3fb6557346dfbb993df3",
|
||||
"49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9",
|
||||
],
|
||||
|
||||
"author_name" : "Chris McCoy",
|
||||
"comic" : "safely-endangered",
|
||||
"comic_name" : "Safely Endangered",
|
||||
"count" : 5,
|
||||
"description" : "Silly comics for silly people.",
|
||||
"episode" : "572",
|
||||
"episode_name": "Ep. 572 - Earth",
|
||||
"episode_no" : "572",
|
||||
"genre" : "comedy",
|
||||
"lang" : "en",
|
||||
"language" : "English",
|
||||
"num" : range(1, 5),
|
||||
"title" : "Safely Endangered - Ep. 572 - Earth",
|
||||
"title_no" : "352",
|
||||
"username" : "safelyendangered",
|
||||
},
|
||||
|
||||
{
|
||||
@ -37,6 +53,18 @@ __tests__ = (
|
||||
"title_no" : "312584",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.webtoons.com/en/canvas/i-want-to-be-a-cute-anime-girl/209-the-storys-story/viewer?title_no=349416&episode_no=214",
|
||||
"#category": ("", "webtoons", "episode"),
|
||||
"#class" : webtoons.WebtoonsEpisodeExtractor,
|
||||
"#count" : 4,
|
||||
|
||||
"comic_name" : "I want to be a cute anime girl",
|
||||
"episode_name": "209 - The story's story",
|
||||
"username" : "m9huj",
|
||||
"author_name" : "Azul Crescent",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.webtoons.com/en/comedy/live-with-yourself/list?title_no=919",
|
||||
"#comment" : "english",
|
||||
|
@ -13,7 +13,35 @@ __tests__ = (
|
||||
"#url" : "https://weibo.com/1758989602",
|
||||
"#category": ("", "weibo", "user"),
|
||||
"#class" : weibo.WeiboUserExtractor,
|
||||
"#pattern" : r"^https://weibo\.com/u/1758989602\?tabtype=feed$",
|
||||
"#urls" : "https://weibo.com/u/1758989602?tabtype=feed",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://weibo.com/1758989602",
|
||||
"#category": ("", "weibo", "user"),
|
||||
"#class" : weibo.WeiboUserExtractor,
|
||||
"#options" : {"include": "all"},
|
||||
"#urls" : (
|
||||
"https://weibo.com/u/1758989602?tabtype=home",
|
||||
"https://weibo.com/u/1758989602?tabtype=feed",
|
||||
"https://weibo.com/u/1758989602?tabtype=video",
|
||||
"https://weibo.com/u/1758989602?tabtype=newVideo",
|
||||
"https://weibo.com/u/1758989602?tabtype=album",
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://weibo.com/zhouyuxi77",
|
||||
"#category": ("", "weibo", "user"),
|
||||
"#class" : weibo.WeiboUserExtractor,
|
||||
"#urls" : "https://weibo.com/u/7488709788?tabtype=feed",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.weibo.com/n/周于希Sally",
|
||||
"#category": ("", "weibo", "user"),
|
||||
"#class" : weibo.WeiboUserExtractor,
|
||||
"#urls" : "https://weibo.com/u/7488709788?tabtype=feed",
|
||||
},
|
||||
|
||||
{
|
||||
@ -69,9 +97,11 @@ __tests__ = (
|
||||
"#class" : weibo.WeiboFeedExtractor,
|
||||
"#range" : "1",
|
||||
|
||||
"status": {"user": {
|
||||
"id" : 7488709788,
|
||||
}},
|
||||
"status": {
|
||||
"user": {
|
||||
"id": 7488709788,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
@ -80,9 +110,12 @@ __tests__ = (
|
||||
"#class" : weibo.WeiboFeedExtractor,
|
||||
"#range" : "1",
|
||||
|
||||
"status": {"user": {
|
||||
"id" : 7488709788,
|
||||
}},
|
||||
|
||||
"status": {
|
||||
"user": {
|
||||
"id": 7488709788,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -12,7 +12,7 @@ __tests__ = (
|
||||
"#url" : "https://www.wikiart.org/en/thomas-cole",
|
||||
"#category": ("", "wikiart", "artist"),
|
||||
"#class" : wikiart.WikiartArtistExtractor,
|
||||
"#pattern" : "https://uploads\d+\.wikiart\.org/(\d+/)?images/thomas-cole/[\w()-]+\.(jpg|png)",
|
||||
"#pattern" : r"https://uploads\d+\.wikiart\.org/(\d+/)?images/thomas-cole/[\w()-]+\.(jpg|png)",
|
||||
"#count" : "> 100",
|
||||
|
||||
"albums" : None,
|
||||
|
23
test/results/wikibooks.py
Normal file
23
test/results/wikibooks.py
Normal file
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wikibooks.org/wiki/Title",
|
||||
"#category": ("wikimedia", "wikibooks", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikibooks.org/wiki/Category:Title",
|
||||
"#category": ("wikimedia", "wikibooks", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
23
test/results/wikimediacommons.py
Normal file
23
test/results/wikimediacommons.py
Normal file
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://commons.wikimedia.org/wiki/File:Starr-050516-1367-Pimenta_dioica-flowers-Maunaloa-Molokai_(24762757525).jpg",
|
||||
"#category": ("wikimedia", "wikimediacommons", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://commons.wikimedia.org/wiki/Category:Network_maps_of_the_Paris_Metro",
|
||||
"#category": ("wikimedia", "wikimediacommons", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
23
test/results/wikinews.py
Normal file
23
test/results/wikinews.py
Normal file
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wikinews.org/wiki/Title",
|
||||
"#category": ("wikimedia", "wikinews", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikinews.org/wiki/Category:Title",
|
||||
"#category": ("wikimedia", "wikinews", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
53
test/results/wikipedia.py
Normal file
53
test/results/wikipedia.py
Normal file
@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wikipedia.org/wiki/Title",
|
||||
"#category": ("wikimedia", "wikipedia", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikipedia.org/wiki/Athena",
|
||||
"#category": ("wikimedia", "wikipedia", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
"#pattern" : r"https://upload.wikimedia.org/wikipedia/.+",
|
||||
"#count" : range(50, 100),
|
||||
|
||||
"bitdepth" : int,
|
||||
"canonicaltitle": str,
|
||||
"comment" : str,
|
||||
"commonmetadata": dict,
|
||||
"date" : "type:datetime",
|
||||
"descriptionshorturl": str,
|
||||
"descriptionurl": str,
|
||||
"extension" : str,
|
||||
"extmetadata" : dict,
|
||||
"filename" : str,
|
||||
"height" : int,
|
||||
"metadata" : dict,
|
||||
"mime" : r"re:image/\w+",
|
||||
"page" : "Athena",
|
||||
"sha1" : r"re:^[0-9a-f]{40}$",
|
||||
"size" : int,
|
||||
"timestamp" : str,
|
||||
"url" : str,
|
||||
"user" : str,
|
||||
"userid" : int,
|
||||
"width" : int,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikipedia.org/wiki/Category:Physics",
|
||||
"#category": ("wikimedia", "wikipedia", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
23
test/results/wikiquote.py
Normal file
23
test/results/wikiquote.py
Normal file
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wikiquote.org/wiki/Title",
|
||||
"#category": ("wikimedia", "wikiquote", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikiquote.org/wiki/Category:Title",
|
||||
"#category": ("wikimedia", "wikiquote", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
23
test/results/wikisource.py
Normal file
23
test/results/wikisource.py
Normal file
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wikisource.org/wiki/Title",
|
||||
"#category": ("wikimedia", "wikisource", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikisource.org/wiki/Category:Title",
|
||||
"#category": ("wikimedia", "wikisource", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
25
test/results/wikispecies.py
Normal file
25
test/results/wikispecies.py
Normal file
@ -0,0 +1,25 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://species.wikimedia.org/wiki/Geranospiza",
|
||||
"#category": ("wikimedia", "wikispecies", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
"#urls" : "https://upload.wikimedia.org/wikipedia/commons/0/01/Geranospiza_caerulescens.jpg",
|
||||
"#sha1_content": "3a17c14b15489928e4154f826af1c42afb5a523e",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://species.wikimedia.org/wiki/Category:Names",
|
||||
"#category": ("wikimedia", "wikispecies", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
23
test/results/wikiversity.py
Normal file
23
test/results/wikiversity.py
Normal file
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wikiversity.org/wiki/Title",
|
||||
"#category": ("wikimedia", "wikiversity", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wikiversity.org/wiki/Category:Title",
|
||||
"#category": ("wikimedia", "wikiversity", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
23
test/results/wiktionary.py
Normal file
23
test/results/wiktionary.py
Normal file
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import wikimedia
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://www.wiktionary.org/wiki/Word",
|
||||
"#category": ("wikimedia", "wiktionary", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://en.wiktionary.org/wiki/Category:Words",
|
||||
"#category": ("wikimedia", "wiktionary", "category"),
|
||||
"#class" : wikimedia.WikimediaCategoryExtractor,
|
||||
},
|
||||
|
||||
)
|
30
test/results/zzup.py
Normal file
30
test/results/zzup.py
Normal file
@ -0,0 +1,30 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import zzup
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://zzup.com/content/NjM=/MetArt_20080206_viki_c_sensazioni_by_ingret/OTE=/index.html",
|
||||
"#category": ("", "zzup", "gallery"),
|
||||
"#class" : zzup.ZzupGalleryExtractor,
|
||||
"#pattern" : r"https://zzup\.com/MjAxNjc3OTIyMjE5Nzk=/showimage/zzup-8769086487/image00\d\d\d-5896498214-1-9689595623/MetArt-20080206_viki_c_sensazioni_by_ingret/9879560327/zzup.com.jpg",
|
||||
|
||||
"slug" : "MetArt_20080206_viki_c_sensazioni_by_ingret",
|
||||
"title" : "MetArt 20080206 viki c sensazioni by ingret",
|
||||
"num" : int,
|
||||
"count" : 135,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://zzup.com/content/MTc2MDYxMw==/Courtesan/NDA=/page-1.html",
|
||||
"#category": ("", "zzup", "gallery"),
|
||||
"#class" : zzup.ZzupGalleryExtractor,
|
||||
"#pattern" : r"https://zzup.com/MjAxNjc3OTIyMjE5Nzk=/showimage/zzup-8769086487/image000\d\d-5896498214-40-9689595623/Courtesan/9879560327/zzup.com.jpg",
|
||||
},
|
||||
|
||||
)
|
@ -28,6 +28,25 @@ BROKEN = {
|
||||
"photobucket",
|
||||
}
|
||||
|
||||
CONFIG = {
|
||||
"cache": {
|
||||
"file": None,
|
||||
},
|
||||
"downloader": {
|
||||
"adjust-extensions": False,
|
||||
"part": False,
|
||||
},
|
||||
}
|
||||
|
||||
AUTH = {
|
||||
"pixiv",
|
||||
"nijie",
|
||||
"horne",
|
||||
"seiga",
|
||||
"instagram",
|
||||
"twitter",
|
||||
}
|
||||
|
||||
|
||||
class TestExtractorResults(unittest.TestCase):
|
||||
|
||||
@ -66,6 +85,18 @@ class TestExtractorResults(unittest.TestCase):
|
||||
for key, value in result["#options"].items():
|
||||
key = key.split(".")
|
||||
config.set(key[:-1], key[-1], value)
|
||||
|
||||
requires_auth = result.get("#auth")
|
||||
if requires_auth is None:
|
||||
requires_auth = (result["#category"][1] in AUTH)
|
||||
if requires_auth:
|
||||
extr = result["#class"].from_url(result["#url"])
|
||||
if not any(extr.config(key) for key in (
|
||||
"username", "cookies", "api-key", "client-id")):
|
||||
msg = "no auth"
|
||||
self._skipped.append((result["#url"], msg))
|
||||
self.skipTest(msg)
|
||||
|
||||
if "#range" in result:
|
||||
config.set((), "image-range" , result["#range"])
|
||||
config.set((), "chapter-range", result["#range"])
|
||||
@ -348,56 +379,21 @@ class TestFormatter(formatter.StringFormatter):
|
||||
|
||||
|
||||
def setup_test_config():
|
||||
name = "gallerydl"
|
||||
email = "gallerydl@openaliasbox.org"
|
||||
email2 = "gallerydl@protonmail.com"
|
||||
config._config.update(CONFIG)
|
||||
|
||||
config.clear()
|
||||
config.set(("cache",), "file", None)
|
||||
config.set(("downloader",), "part", False)
|
||||
config.set(("downloader",), "adjust-extensions", False)
|
||||
config.set(("extractor" ,), "timeout" , 60)
|
||||
config.set(("extractor" ,), "username", name)
|
||||
config.set(("extractor" ,), "password", name)
|
||||
|
||||
config.set(("extractor", "nijie") , "username", email)
|
||||
config.set(("extractor", "seiga") , "username", email)
|
||||
config.set(("extractor", "horne") , "username", email2)
|
||||
config.set(("extractor", "pinterest") , "username", email2)
|
||||
config.set(("extractor", "pinterest") , "username", None) # login broken
|
||||
|
||||
config.set(("extractor", "newgrounds"), "username", "d1618111")
|
||||
config.set(("extractor", "newgrounds"), "password", "d1618111")
|
||||
|
||||
config.set(("extractor", "mangoxo") , "username", "LiQiang3")
|
||||
config.set(("extractor", "mangoxo") , "password", "5zbQF10_5u25259Ma")
|
||||
|
||||
for category in ("danbooru", "atfbooru", "aibooru", "booruvar",
|
||||
"e621", "e926", "e6ai",
|
||||
"instagram", "twitter", "subscribestar", "deviantart",
|
||||
"inkbunny", "tapas", "pillowfort", "mangadex",
|
||||
"vipergirls"):
|
||||
config.set(("extractor", category), "username", None)
|
||||
|
||||
config.set(("extractor", "mastodon.social"), "access-token",
|
||||
"Blf9gVqG7GytDTfVMiyYQjwVMQaNACgf3Ds3IxxVDUQ")
|
||||
|
||||
config.set(("extractor", "nana"), "favkey",
|
||||
"9237ddb82019558ea7d179e805100805"
|
||||
"ea6aa1c53ca6885cd4c179f9fb22ead2")
|
||||
|
||||
config.set(("extractor", "deviantart"), "client-id", "7777")
|
||||
config.set(("extractor", "deviantart"), "client-secret",
|
||||
"ff14994c744d9208e5caeec7aab4a026")
|
||||
|
||||
config.set(("extractor", "tumblr"), "api-key",
|
||||
"0cXoHfIqVzMQcc3HESZSNsVlulGxEXGDTTZCDrRrjaa0jmuTc6")
|
||||
config.set(("extractor", "tumblr"), "api-secret",
|
||||
"6wxAK2HwrXdedn7VIoZWxGqVhZ8JdYKDLjiQjL46MLqGuEtyVj")
|
||||
config.set(("extractor", "tumblr"), "access-token",
|
||||
"N613fPV6tOZQnyn0ERTuoEZn0mEqG8m2K8M3ClSJdEHZJuqFdG")
|
||||
config.set(("extractor", "tumblr"), "access-token-secret",
|
||||
"sgOA7ZTT4FBXdOGGVV331sSp0jHYp4yMDRslbhaQf7CaS71i4O")
|
||||
def load_test_config():
|
||||
try:
|
||||
path = os.path.join(
|
||||
os.path.dirname(os.path.dirname(__file__)),
|
||||
"archive", "config.json")
|
||||
with open(path) as fp:
|
||||
CONFIG.update(json.loads(fp.read()))
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except Exception as exc:
|
||||
sys.exit("Error when loading {}: {}: {}".format(
|
||||
path, exc.__class__.__name__, exc))
|
||||
|
||||
|
||||
def generate_tests():
|
||||
@ -442,10 +438,12 @@ def generate_tests():
|
||||
enum[name] += 1
|
||||
|
||||
method = _generate_method(result)
|
||||
method.__doc__ = result["#url"]
|
||||
method.__name__ = "test_{}_{}".format(name, enum[name])
|
||||
setattr(TestExtractorResults, method.__name__, method)
|
||||
|
||||
|
||||
generate_tests()
|
||||
if __name__ == "__main__":
|
||||
load_test_config()
|
||||
unittest.main(warnings="ignore")
|
||||
|
Loading…
Reference in New Issue
Block a user