1
0
mirror of https://github.com/mikf/gallery-dl.git synced 2024-11-22 02:32:33 +01:00

Merge branch 'mikf:master' into feature/patreonPostComments

This commit is contained in:
Krystian Owoc 2024-05-17 17:11:13 +02:00 committed by GitHub
commit e6c948f425
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
121 changed files with 2649 additions and 777 deletions

View File

@ -1,33 +1,47 @@
name: docker
name: Docker Images
on:
workflow_dispatch:
push:
branches:
- master
tags:
- v[0-9]+.[0-9]+.[0-9]+
permissions:
packages: write
concurrency:
group: docker
cancel-in-progress: false
jobs:
docker:
build:
runs-on: ubuntu-latest
# on release commits, run only for tag event
if: ${{ ! startsWith( github.event.head_commit.message , 'release version ' ) || startsWith( github.ref , 'refs/tags/v' ) }}
steps:
- uses: actions/checkout@v4
# https://github.com/docker/setup-buildx-action
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
# https://github.com/docker/login-action
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
- uses: docker/metadata-action@v5
id: metadata
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GHCR_TOKEN }}
images: |
mikf123/gallery-dl
ghcr.io/mikf/gallery-dl
tags: |
type=ref,event=tag
type=raw,value=dev
type=sha,format=long,prefix=
type=raw,priority=500,value={{date 'YYYY.MM.DD'}}
- uses: docker/setup-qemu-action@v3
- uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
uses: docker/login-action@v3
@ -35,23 +49,17 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
# https://github.com/docker/metadata-action
- name: Generate Docker tags
uses: docker/metadata-action@v5
id: metadata
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
images: |
mikf123/gallery-dl
ghcr.io/mikf/gallery-dl
tags: |
type=sha,format=long,prefix=
type=ref,event=tag
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GHCR_TOKEN }}
# https://github.com/docker/build-push-action
- name: Build image
uses: docker/build-push-action@v5
- uses: docker/build-push-action@v5
with:
context: .
push: true
tags: ${{ steps.metadata.outputs.tags }}
labels: ${{ steps.metadata.outputs.labels }}
platforms: linux/amd64
platforms: linux/amd64,linux/arm64

View File

@ -1,10 +1,15 @@
name: executables
name: Executables
on:
workflow_dispatch:
push:
branches:
- master
tags-ignore:
- "*"
env:
DATE_FORMAT: "%Y.%m.%d"
jobs:
build:
@ -31,19 +36,58 @@ jobs:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} ${{ matrix.architecture }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
architecture: ${{ matrix.architecture }}
- name: Date
run: echo "DATE=$(date '+${{ env.DATE_FORMAT }}')" >> "$GITHUB_ENV"
- name: Update Version
# use Python since its behavior is consistent across operating systems
shell: python
run: |
import re
path = "./gallery_dl/version.py"
with open(path) as fp:
content = fp.read()
content = re.sub(
r'\b(__version__ = "[^"]+)',
r"\1:${{ env.DATE }}",
content)
with open(path, "w") as fp:
fp.write(content)
- name: Build executable
run: |
pip install requests requests[socks] yt-dlp pyyaml ${{ matrix.python-packages }} pyinstaller
python scripts/pyinstaller.py
python ./scripts/pyinstaller.py --os '${{ matrix.os }}' --arch '${{ matrix.architecture }}'
- name: Upload executable
uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
with:
name: gallery-dl-${{ matrix.os }}-${{ matrix.architecture }}-${{ matrix.python-version }}
path: |
dist
name: executable-${{ matrix.os }}-${{ matrix.architecture }}-${{ matrix.python-version }}
path: dist/*
retention-days: 1
compression-level: 0
release:
needs: build
runs-on: ubuntu-latest
steps:
- uses: actions/download-artifact@v4
- name: Date
run: echo "DATE=$(date '+${{ env.DATE_FORMAT }}')" >> "$GITHUB_ENV"
- uses: ncipollo/release-action@v1
with:
owner: gdl-org
repo: builds
tag: ${{ env.DATE }}
artifacts: "executable-*/*"
allowUpdates: true
makeLatest: true
token: ${{ secrets.REPO_TOKEN }}

56
.github/workflows/pages.yml vendored Normal file
View File

@ -0,0 +1,56 @@
name: GitHub Pages
on:
workflow_dispatch:
push:
branches:
- master
paths:
- docs/**
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: pages
cancel-in-progress: false
jobs:
dispatch:
runs-on: ubuntu-latest
steps:
- name: Dispatch to gdl-org/docs
run: >
curl -L
-X POST
-H "Accept: application/vnd.github+json"
-H "Authorization: Bearer ${{ secrets.REPO_TOKEN }}"
-H "X-GitHub-Api-Version: 2022-11-28"
https://api.github.com/repos/gdl-org/docs/actions/workflows/pages.yml/dispatches
-d '{"ref":"master"}'
deploy:
runs-on: ubuntu-latest
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- uses: actions/checkout@v4
- uses: actions/configure-pages@v4
- name: Copy static files
run: |
mkdir --parents -- ./_site
cp --archive --target-directory=./_site -- \
./docs/oauth-redirect.html
- uses: actions/upload-pages-artifact@v3
- uses: actions/deploy-pages@v4
id: deployment

View File

@ -1,23 +0,0 @@
name: Dispatch GitHub Pages Build
on:
workflow_dispatch:
push:
branches:
- "master"
paths:
- "docs/**"
jobs:
dispatch:
runs-on: ubuntu-latest
steps:
- name: dispatch
run: >
curl -L
-X POST
-H "Accept: application/vnd.github+json"
-H "Authorization: Bearer ${{ secrets.DISPATCH_TOKEN }}"
-H "X-GitHub-Api-Version: 2022-11-28"
https://api.github.com/repos/gdl-org/docs/actions/workflows/pages.yml/dispatches
-d '{"ref":"master"}'

View File

@ -16,7 +16,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "pypy3.9"]
python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "pypy3.9"]
steps:
- uses: actions/checkout@v4
@ -26,7 +26,7 @@ jobs:
if [[ "$(find ./gallery_dl -type f -not -perm 644)" ]]; then exit 1; fi
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

View File

@ -1,5 +1,89 @@
# Changelog
## 1.26.9 - 2024-03-23
### Extractors
#### Additions
- [artstation] support video clips ([#2566](https://github.com/mikf/gallery-dl/issues/2566), [#3309](https://github.com/mikf/gallery-dl/issues/3309), [#3911](https://github.com/mikf/gallery-dl/issues/3911))
- [artstation] support collections ([#146](https://github.com/mikf/gallery-dl/issues/146))
- [deviantart] recognize `deviantart.com/stash/…` URLs
- [idolcomplex] support new pool URLs
- [lensdump] recognize direct image links ([#5293](https://github.com/mikf/gallery-dl/issues/5293))
- [skeb] add extractor for followed users ([#5290](https://github.com/mikf/gallery-dl/issues/5290))
- [twitter] add `quotes` extractor ([#5262](https://github.com/mikf/gallery-dl/issues/5262))
- [wikimedia] support `azurlane.koumakan.jp` ([#5256](https://github.com/mikf/gallery-dl/issues/5256))
- [xvideos] support `/channels/` URLs ([#5244](https://github.com/mikf/gallery-dl/issues/5244))
#### Fixes
- [artstation] fix handling usernames with dashes in domain names ([#5224](https://github.com/mikf/gallery-dl/issues/5224))
- [bluesky] fix not spawning child extractors for followed users ([#5246](https://github.com/mikf/gallery-dl/issues/5246))
- [deviantart] handle CloudFront blocks ([#5363](https://github.com/mikf/gallery-dl/issues/5363))
- [deviantart:avatar] fix `index` for URLs without `?` ([#5276](https://github.com/mikf/gallery-dl/issues/5276))
- [deviantart:stash] fix `index` values ([#5335](https://github.com/mikf/gallery-dl/issues/5335))
- [gofile] fix extraction
- [hiperdex] update URL patterns & fix `manga` metadata ([#5340](https://github.com/mikf/gallery-dl/issues/5340))
- [idolcomplex] fix metadata extraction
- [imagefap] fix folder extraction ([#5333](https://github.com/mikf/gallery-dl/issues/5333))
- [instagram] make accessing `like_count` non-fatal ([#5218](https://github.com/mikf/gallery-dl/issues/5218))
- [mastodon] fix handling null `moved` account field ([#5321](https://github.com/mikf/gallery-dl/issues/5321))
- [naver] fix EUC-KR encoding issue in old image URLs ([#5126](https://github.com/mikf/gallery-dl/issues/5126))
- [nijie] increase default delay between requests ([#5221](https://github.com/mikf/gallery-dl/issues/5221))
- [nitter] ignore invalid Tweets ([#5253](https://github.com/mikf/gallery-dl/issues/5253))
- [pixiv:novel] fix text extraction ([#5285](https://github.com/mikf/gallery-dl/issues/5285), [#5309](https://github.com/mikf/gallery-dl/issues/5309))
- [skeb] retry 429 responses containing a `request_key` cookie ([#5210](https://github.com/mikf/gallery-dl/issues/5210))
- [warosu] fix crash for threads with deleted posts ([#5289](https://github.com/mikf/gallery-dl/issues/5289))
- [weibo] fix retweets ([#2825](https://github.com/mikf/gallery-dl/issues/2825), [#3874](https://github.com/mikf/gallery-dl/issues/3874), [#5263](https://github.com/mikf/gallery-dl/issues/5263))
- [weibo] fix `livephoto` filename extensions ([#5287](https://github.com/mikf/gallery-dl/issues/5287))
- [xvideos] fix galleries with more than 500 images ([#5244](https://github.com/mikf/gallery-dl/issues/5244))
#### Improvements
- [bluesky] improve API error messages
- [bluesky] handle posts with different `embed` structure
- [deviantart:avatar] ignore default avatars ([#5276](https://github.com/mikf/gallery-dl/issues/5276))
- [fapello] download full-sized images ([#5349](https://github.com/mikf/gallery-dl/issues/5349))
- [gelbooru:favorite] automatically detect returned post order ([#5220](https://github.com/mikf/gallery-dl/issues/5220))
- [imgur] fail downloads when redirected to `removed.png` ([#5308](https://github.com/mikf/gallery-dl/issues/5308))
- [instagram] raise proper error for missing `reels_media` ([#5257](https://github.com/mikf/gallery-dl/issues/5257))
- [instagram] change `posts are private` exception to a warning ([#5322](https://github.com/mikf/gallery-dl/issues/5322))
- [reddit] improve preview fallback formats ([#5296](https://github.com/mikf/gallery-dl/issues/5296), [#5315](https://github.com/mikf/gallery-dl/issues/5315))
- [steamgriddb] raise exception for deleted assets
- [twitter] handle "account is temporarily locked" errors ([#5300](https://github.com/mikf/gallery-dl/issues/5300))
- [weibo] rework pagination logic ([#4168](https://github.com/mikf/gallery-dl/issues/4168))
- [zerochan] fetch more posts by using the API ([#3669](https://github.com/mikf/gallery-dl/issues/3669))
#### Metadata
- [bluesky] add `instance` metadata field ([#4438](https://github.com/mikf/gallery-dl/issues/4438))
- [gelbooru:favorite] add `date_favorited` metadata field
- [imagefap] extract `folder` metadata ([#5270](https://github.com/mikf/gallery-dl/issues/5270))
- [instagram] default `likes` to `0` ([#5323](https://github.com/mikf/gallery-dl/issues/5323))
- [kemonoparty] add `revision_count` metadata field ([#5334](https://github.com/mikf/gallery-dl/issues/5334))
- [naver] unescape post `title` and `description`
- [pornhub:gif] extract `viewkey` and `timestamp` metadata ([#4463](https://github.com/mikf/gallery-dl/issues/4463))
- [redgifs] make `date` available for directories ([#5262](https://github.com/mikf/gallery-dl/issues/5262))
- [subscribestar] fix `date` metadata
- [twitter] add `birdwatch` metadata field ([#5317](https://github.com/mikf/gallery-dl/issues/5317))
- [twitter] add `protected` metadata field ([#5327](https://github.com/mikf/gallery-dl/issues/5327))
- [warosu] fix `board_name` metadata
#### Options
- [bluesky] add `reposts` option ([#4438](https://github.com/mikf/gallery-dl/issues/4438), [#5248](https://github.com/mikf/gallery-dl/issues/5248))
- [deviantart] add `comments-avatars` option ([#4995](https://github.com/mikf/gallery-dl/issues/4995))
- [deviantart] extend `metadata` option ([#5175](https://github.com/mikf/gallery-dl/issues/5175))
- [flickr] add `contexts` option ([#5324](https://github.com/mikf/gallery-dl/issues/5324))
- [gelbooru:favorite] add `order-posts` option ([#5220](https://github.com/mikf/gallery-dl/issues/5220))
- [kemonoparty] add `order-revisions` option ([#5334](https://github.com/mikf/gallery-dl/issues/5334))
- [vipergirls] add `like` option ([#4166](https://github.com/mikf/gallery-dl/issues/4166))
- [vipergirls] add `domain` option ([#4166](https://github.com/mikf/gallery-dl/issues/4166))
### Downloaders
- [http] add MIME type and signature for `.mov` files ([#5287](https://github.com/mikf/gallery-dl/issues/5287))
### Docker
- build images from source instead of PyPI package
- build `linux/arm64` images ([#5227](https://github.com/mikf/gallery-dl/issues/5227))
- build images on every push to master
- tag images as `YYYY.MM.DD`
- tag the most recent build from master as `dev`
- tag the most recent release build as `latest`
- reduce image size ([#5097](https://github.com/mikf/gallery-dl/issues/5097))
### Miscellaneous
- [formatter] fix local DST datetime offsets for `:O`
- build Linux executable on Ubuntu 22.04 LTS ([#4184](https://github.com/mikf/gallery-dl/issues/4184))
- automatically create directories for logging files ([#5249](https://github.com/mikf/gallery-dl/issues/5249))
## 1.26.8 - 2024-02-17
### Extractors
#### Additions

View File

@ -1,7 +1,21 @@
FROM python:alpine
RUN python3 -m pip install --no-cache-dir -U pip && \
python3 -m pip install --no-cache-dir -U gallery-dl yt-dlp
RUN apk update && \
apk add --no-cache ffmpeg && \
rm -rf /var/cache/apk/*
ENV LANG=C.UTF-8
RUN : \
&& apk --no-interactive update \
&& apk --no-cache --no-interactive add ffmpeg \
&& rm -rf /var/cache/apk \
&& :
RUN : \
&& python3 -B -m pip --no-cache-dir --no-input --disable-pip-version-check install -U \
pip \
&& python3 -B -m pip --no-cache-dir --no-input --disable-pip-version-check install -U \
https://github.com/mikf/gallery-dl/archive/refs/heads/master.tar.gz \
yt-dlp \
&& rm -rf /root/.cache/pip \
&& find /usr/local/lib/python3.*/site-packages/setuptools -name __pycache__ -exec rm -rf {} + \
&& find /usr/local/lib/python3.*/site-packages/wheel -name __pycache__ -exec rm -rf {} + \
&& :
ENTRYPOINT [ "gallery-dl" ]

View File

@ -7,8 +7,8 @@ to download image galleries and collections
from several image hosting sites
(see `Supported Sites <docs/supportedsites.md>`__).
It is a cross-platform tool
with many `configuration options <docs/configuration.rst>`__
and powerful `filenaming capabilities <docs/formatting.md>`__.
with many `configuration options <https://gdl-org.github.io/docs/configuration.html>`__
and powerful `filenaming capabilities <https://gdl-org.github.io/docs/formatting.html>`__.
|pypi| |build|
@ -72,9 +72,9 @@ Standalone Executable
Prebuilt executable files with a Python interpreter and
required Python packages included are available for
- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.8/gallery-dl.exe>`__
- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.9/gallery-dl.exe>`__
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.8/gallery-dl.bin>`__
- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.9/gallery-dl.bin>`__
Nightly Builds
@ -234,7 +234,7 @@ Documentation
-------------
A list of all available configuration options and their descriptions
can be found in `<docs/configuration.rst>`__.
can be found at `<https://gdl-org.github.io/docs/configuration.html>`__.
| For a default configuration file with available options set to their
default values, see `<docs/gallery-dl.conf>`__.
@ -330,7 +330,7 @@ CAPTCHA or similar, or has not been implemented yet, you can use the
cookies from a browser login session and input them into *gallery-dl*.
This can be done via the
`cookies <docs/configuration.rst#extractorcookies>`__
`cookies <https://gdl-org.github.io/docs/configuration.html#extractor-cookies>`__
option in your configuration file by specifying
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon

View File

@ -0,0 +1,20 @@
<!DOCTYPE html>
<html lang="en-US">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
{% seo %}
<link rel="stylesheet" href="{{ "/assets/css/style.css?v=" | append: site.github.build_revision | relative_url }}">
<script src="links.js"></script>
</head>
<body>
<div class="container-lg px-3 my-5 markdown-body">
{{ content }}
</div>
</body>
</html>

View File

@ -337,6 +337,15 @@ Description
filename extension (``file.1.ext``, ``file.2.ext``, etc.)
extractor.*.skip-filter
-----------------------
Type
``string``
Description
Python expression controlling which skipped files to count towards
``"abort"`` / ``"terminate"`` / ``"exit"``.
extractor.*.sleep
-----------------
Type
@ -358,12 +367,39 @@ Description
i.e. before starting a new extractor.
extractor.*.sleep-429
---------------------
Type
|Duration|_
Default
``60``
Description
Number of seconds to sleep when receiving a `429 Too Many Requests`
response before `retrying <extractor.*.retries_>`__ the request.
extractor.*.sleep-request
-------------------------
Type
|Duration|_
Default
``0``
* ``"0.5-1.5"``
``[Danbooru]``, ``[E621]``, ``[foolfuuka]:search``, ``itaku``,
``newgrounds``, ``[philomena]``, ``pixiv:novel``, ``plurk``,
``poipiku`` , ``pornpics``, ``soundgasm``, ``urlgalleries``,
``vk``, ``zerochan``
* ``"1.0-2.0"``
``flickr``, ``weibo``, ``[wikimedia]``
* ``"2.0-4.0"``
``behance``, ``imagefap``, ``[Nijie]``
* ``"3.0-6.0"``
``exhentai``, ``idolcomplex``, ``[reactor]``, ``readcomiconline``
* ``"6.0-6.1"``
``twibooru``
* ``"6.0-12.0"``
``instagram``
* ``0``
otherwise
Description
Minimal time interval in seconds between each HTTP request
during data extraction.
@ -382,6 +418,7 @@ Description
Specifying username and password is required for
* ``nijie``
* ``horne``
and optional for
@ -389,8 +426,12 @@ Description
* ``aryion``
* ``atfbooru`` (*)
* ``bluesky``
* ``booruvar`` (*)
* ``coomerparty``
* ``danbooru`` (*)
* ``deviantart``
* ``e621`` (*)
* ``e6ai`` (*)
* ``e926`` (*)
* ``exhentai``
* ``idolcomplex``
@ -401,7 +442,6 @@ Description
* ``mangoxo``
* ``pillowfort``
* ``sankaku``
* ``seisoparty``
* ``subscribestar``
* ``tapas``
* ``tsumino``
@ -417,7 +457,7 @@ Description
the API key found in your user profile, not the actual account password.
Note: Leave the ``password`` value empty or undefined
to get prompted for a passeword when performing a login
to be prompted for a passeword when performing a login
(see `getpass() <https://docs.python.org/3/library/getpass.html#getpass.getpass>`__).
@ -557,8 +597,8 @@ extractor.*.browser
Type
``string``
Default
* ``"firefox"`` for ``patreon``, ``mangapark``, and ``mangasee``
* ``null`` everywhere else
* ``"firefox"``: ``artstation``, ``mangasee``, ``patreon``, ``pixiv:series``, ``twitter``
* ``null``: otherwise
Example
* ``"chrome:macos"``
Description
@ -633,8 +673,8 @@ extractor.*.tls12
Type
``bool``
Default
* ``true``
* ``false`` for ``patreon``, ``pixiv:series``
* ``false``: ``patreon``, ``pixiv:series``
* ``true``: otherwise
Description
Allow selecting TLS 1.2 cipher suites.
@ -813,6 +853,22 @@ Description
An alternative `format string`_ to build archive IDs with.
extractor.*.archive-mode
------------------------
Type
``string``
Default
``"file"``
Description
Controls when to write `archive IDs <extractor.*.archive-format_>`__
to the archive database.
* ``"file"``: Write IDs immediately
after completing or skipping a file download.
* ``"memory"``: Keep IDs in memory
and only write them after successful job completion.
extractor.*.archive-prefix
--------------------------
Type
@ -836,6 +892,65 @@ Description
for available ``PRAGMA`` statements and further details.
extractor.*.actions
-------------------
Type
* ``object`` (`pattern` -> `action`)
* ``list`` of ``lists`` with 2 ``strings`` as elements
Example
.. code:: json
{
"error" : "status |= 1",
"warning:(?i)unable to .+": "exit 127",
"info:Logging in as .+" : "level = debug"
}
.. code:: json
[
["error" , "status |= 1" ],
["warning:(?i)unable to .+", "exit 127" ],
["info:Logging in as .+" , "level = debug"]
]
Description
Perform an ``action`` when logging a message matched by ``pattern``.
``pattern`` is parsed as severity level (``debug``, ``info``, ``warning``, ``error``, or integer value)
followed by an optional `Python Regular Expression <https://docs.python.org/3/library/re.html#regular-expression-syntax>`__
separated by a colon ``:``.
Using ``*`` as `level` or leaving it empty
matches logging messages of all levels
(e.g. ``*:<re>`` or ``:<re>``).
``action`` is parsed as action type
followed by (optional) arguments.
Supported Action Types:
``status``:
| Modify job exit status.
| Expected syntax is ``<operator> <value>`` (e.g. ``= 100``).
Supported operators are
``=`` (assignment),
``&`` (bitwise AND),
``|`` (bitwise OR),
``^`` (bitwise XOR).
``level``:
| Modify severity level of the current logging message.
| Can be one of ``debug``, ``info``, ``warning``, ``error`` or an integer value.
``print``
Write argument to stdout.
``restart``:
Restart the current extractor run.
``wait``:
Stop execution until Enter is pressed.
``exit``:
Exit the program with the given argument as exit status.
extractor.*.postprocessors
--------------------------
Type
@ -1872,6 +1987,20 @@ Description
from `linking your Flickr account to gallery-dl <OAuth_>`__.
extractor.flickr.contexts
-------------------------
Type
``bool``
Default
``false``
Description
For each photo, return the albums and pools it belongs to
as ``set`` and ``pool`` metadata.
Note: This requires 1 additional API call per photo.
See `flickr.photos.getAllContexts <https://www.flickr.com/services/api/flickr.photos.getAllContexts.html>`__ for details.
extractor.flickr.exif
---------------------
Type
@ -1879,9 +2008,11 @@ Type
Default
``false``
Description
Fetch `exif` and `camera` metadata for each photo.
For each photo, return its EXIF/TIFF/GPS tags
as ``exif`` and ``camera`` metadata.
Note: This requires 1 additional API call per photo.
See `flickr.photos.getExif <https://www.flickr.com/services/api/flickr.photos.getExif.html>`__ for details.
extractor.flickr.metadata
@ -1901,7 +2032,7 @@ Description
It is possible to specify a custom list of metadata includes.
See `the extras parameter <https://www.flickr.com/services/api/flickr.people.getPhotos.html>`__
in `Flickr API docs <https://www.flickr.com/services/api/>`__
in `Flickr's API docs <https://www.flickr.com/services/api/>`__
for possible field names.
@ -2001,6 +2132,20 @@ Description
page.
extractor.gelbooru.favorite.order-posts
---------------------------------------
Type
``string``
Default
``"desc"``
Description
Controls the order in which favorited posts are returned.
* ``"asc"``: Ascending favorite date order (oldest first)
* ``"desc"``: Descending favorite date order (newest first)
* ``"reverse"``: Same as ``"asc"``
extractor.generic.enabled
-------------------------
Type
@ -2287,6 +2432,16 @@ Description
Extract a user's direct messages as ``dms`` metadata.
extractor.kemonoparty.announcements
-----------------------------------
Type
``bool``
Default
``false``
Description
Extract a user's announcements as ``announcements`` metadata.
extractor.kemonoparty.favorites
-------------------------------
Type
@ -2346,6 +2501,22 @@ Description
Note: This requires 1 additional HTTP request per post.
extractor.kemonoparty.order-revisions
-------------------------------------
Type
``string``
Default
``"desc"``
Description
Controls the order in which
`revisions <extractor.kemonoparty.revisions_>`__
are returned.
* ``"asc"``: Ascending order (oldest first)
* ``"desc"``: Descending order (newest first)
* ``"reverse"``: Same as ``"asc"``
extractor.khinsider.format
--------------------------
Type
@ -2470,6 +2641,16 @@ Description
user IDs.
extractor.[mastodon].cards
--------------------------
Type
``bool``
Default
``false``
Description
Fetch media from cards.
extractor.[mastodon].reblogs
----------------------------
Type
@ -2829,14 +3010,24 @@ Description
`gppt <https://github.com/eggplants/get-pixivpy-token>`__.
extractor.pixiv.embeds
----------------------
extractor.pixiv.novel.covers
----------------------------
Type
``bool``
Default
``false``
Description
Download images embedded in novels.
Download cover images.
extractor.pixiv.novel.embeds
----------------------------
Type
``bool``
Default
``false``
Description
Download embedded images.
extractor.pixiv.novel.full-series
@ -3286,7 +3477,7 @@ Examples
* ``["jpeg", "webp"]``
Description
Only include assets that are in the specified file types. ``all`` can be
used to specifiy all file types. Valid values are:
used to specify all file types. Valid values are:
* Grids: ``png``, ``jpeg``, ``jpg``, ``webp``
* Heroes: ``png``, ``jpeg``, ``jpg``, ``webp``
@ -3326,7 +3517,7 @@ Examples
* ``["fr", "it"]``
Description
Only include assets that are in the specified languages. ``all`` can be
used to specifiy all languages. Valid values are `ISO 639-1 <https://en.wikipedia.org/wiki/ISO_639-1>`__
used to specify all languages. Valid values are `ISO 639-1 <https://en.wikipedia.org/wiki/ISO_639-1>`__
language codes.
@ -3771,6 +3962,32 @@ Description
* ``"wait"``: Wait until rate limit reset
extractor.twitter.relogin
-------------------------
Type
``bool``
Default
``true``
Description
| When receiving a "Could not authenticate you" error while logged in with
`username & passeword <extractor.*.username & .password_>`__,
| refresh the current login session and
try to continue from where it left off.
extractor.twitter.locked
------------------------
Type
``string``
Default
``"abort"``
Description
Selects how to handle "account is temporarily locked" errors.
* ``"abort"``: Raise an error and stop extraction
* ``"wait"``: Wait until the account is unlocked and retry
extractor.twitter.replies
-------------------------
Type
@ -3909,6 +4126,31 @@ Description
``"raw"``, ``"full"``, ``"regular"``, ``"small"``, and ``"thumb"``.
extractor.vipergirls.domain
---------------------------
Type
``string``
Default
``"vipergirls.to"``
Description
Specifies the domain used by ``vipergirls`` extractors.
For example ``"viper.click"`` if the main domain is blocked or to bypass Cloudflare,
extractor.vipergirls.like
-------------------------
Type
``bool``
Default
``false``
Description
Automatically `like` posts after downloading their images.
Note: Requires `login <extractor.*.username & .password_>`__
or `cookies <extractor.*.cookies_>`__
extractor.vsco.videos
---------------------
Type
@ -4039,7 +4281,7 @@ extractor.weibo.retweets
Type
``bool``
Default
``true``
``false``
Description
Fetch media from retweeted posts.
@ -4714,10 +4956,33 @@ output.colors
Type
``object`` (`key` -> `ANSI color`)
Default
``{"success": "1;32", "skip": "2"}``
.. code:: json
{
"success": "1;32",
"skip" : "2",
"debug" : "0;37",
"info" : "1;37",
"warning": "1;33",
"error" : "1;31"
}
Description
Controls the `ANSI colors <https://gist.github.com/fnky/458719343aabd01cfb17a3a4f7296797#colors--graphics-mode>`__
used with |mode: color|__ for successfully downloaded or skipped files.
Controls the
`ANSI colors <https://gist.github.com/fnky/458719343aabd01cfb17a3a4f7296797#colors--graphics-mode>`__
used for various outputs.
Output for |mode: color|__
* ``success``: successfully downloaded files
* ``skip``: skipped files
Logging Messages:
* ``debug``: debug logging messages
* ``info``: info logging messages
* ``warning``: warning logging messages
* ``error``: error logging messages
.. __: `output.mode`_
@ -4727,7 +4992,7 @@ output.ansi
Type
``bool``
Default
``false``
``true``
Description
| On Windows, enable ANSI escape sequences and colored output
| by setting the ``ENABLE_VIRTUAL_TERMINAL_PROCESSING`` flag for stdout and stderr.
@ -5784,7 +6049,7 @@ How To
* choose a name
* select "installed app"
* set ``http://localhost:6414/`` as "redirect uri"
* solve the "I'm not a rebot" reCATCHA if needed
* solve the "I'm not a robot" reCAPTCHA if needed
* click "create app"
* copy the client id (third line, under your application's name and
@ -5932,7 +6197,7 @@ Description
* format
* General format string for logging messages
or a dictionary with format strings for each loglevel.
or an ``object`` with format strings for each loglevel.
In addition to the default
`LogRecord attributes <https://docs.python.org/3/library/logging.html#logrecord-attributes>`__,

44
docs/links.js Normal file
View File

@ -0,0 +1,44 @@
"use strict";
function add_header_links()
{
let style = document.createElement("style");
style.id = "headerlinks"
document.head.appendChild(style);
style.sheet.insertRule(
"a.headerlink {" +
" visibility: hidden;" +
" text-decoration: none;" +
" font-size: 0.8em;" +
" padding: 0 4px 0 4px;" +
"}");
style.sheet.insertRule(
":hover > a.headerlink {" +
" visibility: visible;" +
"}");
let headers = document.querySelectorAll("h2, h3, h4, h5, h6");
for (let i = 0, len = headers.length; i < len; ++i)
{
let header = headers[i];
let id = header.id || header.parentNode.id;
if (!id)
continue;
let link = document.createElement("a");
link.href = "#" + id;
link.className = "headerlink";
link.textContent = "¶";
header.appendChild(link);
}
}
if (document.readyState !== "loading") {
add_header_links();
} else {
document.addEventListener("DOMContentLoaded", add_header_links);
}

View File

@ -29,6 +29,7 @@
## Output Options:
-q, --quiet Activate quiet mode
-w, --warning Print only warnings and errors
-v, --verbose Print various debugging information
-g, --get-urls Print URLs instead of downloading
-G, --resolve-urls Print URLs instead of downloading; resolve
@ -48,12 +49,12 @@
extractors but cannot be handled, to FILE
--write-pages Write downloaded intermediary pages to files in
the current directory to debug problems
--no-colors Do not emit ANSI color codes in output
## Downloader Options:
-r, --limit-rate RATE Maximum download rate (e.g. 500k or 2.5M)
-R, --retries N Maximum number of retries for failed HTTP
requests or -1 for infinite retries (default:
4)
requests or -1 for infinite retries (default: 4)
--http-timeout SECONDS Timeout for HTTP connections (default: 30.0)
--sleep SECONDS Number of seconds to wait before each download.
This can be either a constant value or a range

View File

@ -790,7 +790,7 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>Skeb</td>
<td>https://skeb.jp/</td>
<td>Followed Users, Posts, Search Results, User Profiles</td>
<td>Followed Creators, Followed Users, Posts, Search Results, User Profiles</td>
<td></td>
</tr>
<tr>
@ -838,7 +838,7 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>Tapas</td>
<td>https://tapas.io/</td>
<td>Episodes, Series</td>
<td>Creators, Episodes, Series</td>
<td>Supported</td>
</tr>
<tr>
@ -898,7 +898,7 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>Twitter</td>
<td>https://twitter.com/</td>
<td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followed Users, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets, User Profiles</td>
<td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followed Users, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles</td>
<td>Supported</td>
</tr>
<tr>
@ -940,14 +940,14 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>VSCO</td>
<td>https://vsco.co/</td>
<td>Collections, individual Images, Spaces, User Profiles</td>
<td>Avatars, Collections, individual Images, Spaces, User Profiles</td>
<td></td>
</tr>
<tr>
<td>Wallhaven</td>
<td>https://wallhaven.cc/</td>
<td>Collections, individual Images, Search Results, User Profiles</td>
<td><a href="configuration.rst#extractorwallhavenapi-key">API Key</a></td>
<td><a href="https://gdl-org.github.io/docs/configuration.html#extractor-wallhaven-api-key">API Key</a></td>
</tr>
<tr>
<td>Wallpaper Cave</td>
@ -965,7 +965,7 @@ Consider all listed sites to potentially be NSFW.
<td>Weasyl</td>
<td>https://www.weasyl.com/</td>
<td>Favorites, Folders, Journals, Submissions</td>
<td><a href="configuration.rst#extractorweasylapi-key">API Key</a></td>
<td><a href="https://gdl-org.github.io/docs/configuration.html#extractor-weasyl-api-key">API Key</a></td>
</tr>
<tr>
<td>webmshare</td>
@ -1103,7 +1103,7 @@ Consider all listed sites to potentially be NSFW.
<td>Booruvar</td>
<td>https://booru.borvar.art/</td>
<td>Pools, Popular Images, Posts, Tag Searches</td>
<td></td>
<td>Supported</td>
</tr>
<tr>
@ -1125,7 +1125,7 @@ Consider all listed sites to potentially be NSFW.
<td>e6AI</td>
<td>https://e6ai.net/</td>
<td>Favorites, Pools, Popular Images, Posts, Tag Searches</td>
<td></td>
<td>Supported</td>
</tr>
<tr>
@ -1319,7 +1319,7 @@ Consider all listed sites to potentially be NSFW.
<td>Derpibooru</td>
<td>https://derpibooru.org/</td>
<td>Galleries, Posts, Search Results</td>
<td><a href="configuration.rst#extractorderpibooruapi-key">API Key</a></td>
<td><a href="https://gdl-org.github.io/docs/configuration.html#extractor-derpibooru-api-key">API Key</a></td>
</tr>
<tr>
<td>Ponybooru</td>
@ -1331,7 +1331,7 @@ Consider all listed sites to potentially be NSFW.
<td>Furbooru</td>
<td>https://furbooru.org/</td>
<td>Galleries, Posts, Search Results</td>
<td></td>
<td>API Key</td>
</tr>
<tr>
@ -1499,6 +1499,12 @@ Consider all listed sites to potentially be NSFW.
<td>Articles</td>
<td></td>
</tr>
<tr>
<td>wiki.gg</td>
<td>https://www.wiki.gg/</td>
<td>Articles</td>
<td></td>
</tr>
<tr>
<td>Super Mario Wiki</td>
<td>https://www.mariowiki.com/</td>
@ -1616,19 +1622,19 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>mastodon.social</td>
<td>https://mastodon.social/</td>
<td>Bookmarks, Followed Users, Images from Statuses, User Profiles</td>
<td>Bookmarks, Favorites, Followed Users, Hashtags, Lists, Images from Statuses, User Profiles</td>
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
</tr>
<tr>
<td>Pawoo</td>
<td>https://pawoo.net/</td>
<td>Bookmarks, Followed Users, Images from Statuses, User Profiles</td>
<td>Bookmarks, Favorites, Followed Users, Hashtags, Lists, Images from Statuses, User Profiles</td>
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
</tr>
<tr>
<td>baraag</td>
<td>https://baraag.net/</td>
<td>Bookmarks, Followed Users, Images from Statuses, User Profiles</td>
<td>Bookmarks, Favorites, Followed Users, Hashtags, Lists, Images from Statuses, User Profiles</td>
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
</tr>

View File

@ -38,6 +38,11 @@ def main():
except ImportError:
import toml
config.load(args.configs_toml, strict=True, loads=toml.loads)
if not args.colors:
output.ANSI = False
config.set((), "colors", False)
if util.WINDOWS:
config.set(("output",), "ansi", False)
if args.filename:
filename = args.filename
if filename == "/O":
@ -86,7 +91,7 @@ def main():
signal.signal(signal_num, signal.SIG_IGN)
# enable ANSI escape sequences on Windows
if util.WINDOWS and config.get(("output",), "ansi"):
if util.WINDOWS and config.get(("output",), "ansi", output.COLORS):
from ctypes import windll, wintypes, byref
kernel32 = windll.kernel32
mode = wintypes.DWORD()
@ -113,7 +118,7 @@ def main():
# loglevels
output.configure_logging(args.loglevel)
if args.loglevel >= logging.ERROR:
if args.loglevel >= logging.WARNING:
config.set(("output",), "mode", "null")
config.set(("downloader",), "progress", None)
elif args.loglevel <= logging.DEBUG:

98
gallery_dl/archive.py Normal file
View File

@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-
# Copyright 2024 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Download Archives"""
import os
import sqlite3
from . import formatter
class DownloadArchive():
def __init__(self, path, format_string, pragma=None,
cache_key="_archive_key"):
try:
con = sqlite3.connect(path, timeout=60, check_same_thread=False)
except sqlite3.OperationalError:
os.makedirs(os.path.dirname(path))
con = sqlite3.connect(path, timeout=60, check_same_thread=False)
con.isolation_level = None
self.keygen = formatter.parse(format_string).format_map
self.connection = con
self.close = con.close
self.cursor = cursor = con.cursor()
self._cache_key = cache_key
if pragma:
for stmt in pragma:
cursor.execute("PRAGMA " + stmt)
try:
cursor.execute("CREATE TABLE IF NOT EXISTS archive "
"(entry TEXT PRIMARY KEY) WITHOUT ROWID")
except sqlite3.OperationalError:
# fallback for missing WITHOUT ROWID support (#553)
cursor.execute("CREATE TABLE IF NOT EXISTS archive "
"(entry TEXT PRIMARY KEY)")
def add(self, kwdict):
"""Add item described by 'kwdict' to archive"""
key = kwdict.get(self._cache_key) or self.keygen(kwdict)
self.cursor.execute(
"INSERT OR IGNORE INTO archive (entry) VALUES (?)", (key,))
def check(self, kwdict):
"""Return True if the item described by 'kwdict' exists in archive"""
key = kwdict[self._cache_key] = self.keygen(kwdict)
self.cursor.execute(
"SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
return self.cursor.fetchone()
def finalize(self):
pass
class DownloadArchiveMemory(DownloadArchive):
def __init__(self, path, format_string, pragma=None,
cache_key="_archive_key"):
DownloadArchive.__init__(self, path, format_string, pragma, cache_key)
self.keys = set()
def add(self, kwdict):
self.keys.add(
kwdict.get(self._cache_key) or
self.keygen(kwdict))
def check(self, kwdict):
key = kwdict[self._cache_key] = self.keygen(kwdict)
if key in self.keys:
return True
self.cursor.execute(
"SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
return self.cursor.fetchone()
def finalize(self):
if not self.keys:
return
cursor = self.cursor
with self.connection:
try:
cursor.execute("BEGIN")
except sqlite3.OperationalError:
pass
stmt = "INSERT OR IGNORE INTO archive (entry) VALUES (?)"
if len(self.keys) < 100:
for key in self.keys:
cursor.execute(stmt, (key,))
else:
cursor.executemany(stmt, ((key,) for key in self.keys))

View File

@ -10,7 +10,6 @@
# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/cookies.py
import binascii
import contextlib
import ctypes
import logging
import os
@ -147,7 +146,8 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None,
set_cookie(Cookie(
0, name, value, None, False,
domain, bool(domain), domain.startswith("."),
path, bool(path), secure, expires, False, None, None, {},
path, bool(path), secure, expires or None, False,
None, None, {},
))
if failed_cookies > 0:
@ -682,7 +682,8 @@ def _get_gnome_keyring_password(browser_keyring_name):
# lists all keys and presumably searches for its key in the list.
# It appears that we must do the same.
# https://github.com/jaraco/keyring/issues/556
with contextlib.closing(secretstorage.dbus_init()) as con:
con = secretstorage.dbus_init()
try:
col = secretstorage.get_default_collection(con)
label = browser_keyring_name + " Safe Storage"
for item in col.get_all_items():
@ -691,6 +692,8 @@ def _get_gnome_keyring_password(browser_keyring_name):
else:
_log_error("Failed to read from GNOME keyring")
return b""
finally:
con.close()
def _get_linux_keyring_password(browser_keyring_name, keyring):
@ -857,7 +860,7 @@ class DatabaseConnection():
def Popen_communicate(*args):
proc = subprocess.Popen(
proc = util.Popen(
args, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
try:
stdout, stderr = proc.communicate()
@ -999,6 +1002,12 @@ def _decrypt_windows_dpapi(ciphertext):
def _find_most_recently_used_file(root, filename):
# if the provided root points to an exact profile path
# check if it contains the wanted filename
first_choice = os.path.join(root, filename)
if os.path.exists(first_choice):
return first_choice
# if there are multiple browser profiles, take the most recently used one
paths = []
for curr_root, dirs, files in os.walk(root):

View File

@ -98,6 +98,8 @@ class HttpDownloader(DownloaderBase):
metadata = self.metadata
kwdict = pathfmt.kwdict
expected_status = kwdict.get(
"_http_expected_status", ())
adjust_extension = kwdict.get(
"_http_adjust_extension", self.adjust_extension)
@ -151,7 +153,7 @@ class HttpDownloader(DownloaderBase):
# check response
code = response.status_code
if code == 200: # OK
if code == 200 or code in expected_status: # OK
offset = 0
size = response.headers.get("Content-Length")
elif code == 206: # Partial Content
@ -399,6 +401,9 @@ MIME_TYPES = {
"video/webm": "webm",
"video/ogg" : "ogg",
"video/mp4" : "mp4",
"video/m4v" : "m4v",
"video/x-m4v": "m4v",
"video/quicktime": "mov",
"audio/wav" : "wav",
"audio/x-wav": "wav",
@ -440,7 +445,9 @@ SIGNATURE_CHECKS = {
"cur" : lambda s: s[0:4] == b"\x00\x00\x02\x00",
"psd" : lambda s: s[0:4] == b"8BPS",
"mp4" : lambda s: (s[4:8] == b"ftyp" and s[8:11] in (
b"mp4", b"avc", b"iso", b"M4V")),
b"mp4", b"avc", b"iso")),
"m4v" : lambda s: s[4:11] == b"ftypM4V",
"mov" : lambda s: s[4:12] == b"ftypqt ",
"webm": lambda s: s[0:4] == b"\x1A\x45\xDF\xA3",
"ogg" : lambda s: s[0:4] == b"OggS",
"wav" : lambda s: (s[0:4] == b"RIFF" and

View File

@ -26,6 +26,9 @@ class _8chanExtractor(Extractor):
self.root = "https://8chan." + match.group(1)
Extractor.__init__(self, match)
def _init(self):
self.cookies.set("TOS", "1", domain=self.root.rpartition("/")[2])
@memcache()
def cookies_prepare(self):
# fetch captcha cookies

View File

@ -40,6 +40,7 @@ class BlueskyExtractor(Extractor):
self.api = BlueskyAPI(self)
self._user = self._user_did = None
self.instance = self.root.partition("://")[2]
def items(self):
for post in self.posts():
@ -81,6 +82,7 @@ class BlueskyExtractor(Extractor):
if self._metadata_user:
post["user"] = self._user or post["author"]
post["instance"] = self.instance
post["post_id"] = pid
post["count"] = len(images)
post["date"] = text.parse_datetime(
@ -315,7 +317,7 @@ class BlueskyAPI():
def get_author_feed(self, actor, filter="posts_and_author_threads"):
endpoint = "app.bsky.feed.getAuthorFeed"
params = {
"actor" : self._did_from_actor(actor),
"actor" : self._did_from_actor(actor, True),
"filter": filter,
"limit" : "100",
}
@ -325,7 +327,7 @@ class BlueskyAPI():
endpoint = "app.bsky.feed.getFeed"
params = {
"feed" : "at://{}/app.bsky.feed.generator/{}".format(
self._did_from_actor(actor, False), feed),
self._did_from_actor(actor), feed),
"limit": "100",
}
return self._pagination(endpoint, params)
@ -342,7 +344,7 @@ class BlueskyAPI():
endpoint = "app.bsky.feed.getListFeed"
params = {
"list" : "at://{}/app.bsky.graph.list/{}".format(
self._did_from_actor(actor, False), list),
self._did_from_actor(actor), list),
"limit": "100",
}
return self._pagination(endpoint, params)
@ -389,7 +391,7 @@ class BlueskyAPI():
}
return self._pagination(endpoint, params, "posts")
def _did_from_actor(self, actor, user_did=True):
def _did_from_actor(self, actor, user_did=False):
if actor.startswith("did:"):
did = actor
else:

View File

@ -54,7 +54,6 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"album_id" : self.album_id,
"album_name" : text.unescape(info[0]),
"album_size" : size[1:-1],
"description": text.unescape(info[2]) if len(info) > 2 else "",
"count" : len(urls),
}

View File

@ -14,6 +14,7 @@ import ssl
import time
import netrc
import queue
import getpass
import logging
import datetime
import requests
@ -21,6 +22,7 @@ import threading
from requests.adapters import HTTPAdapter
from .message import Message
from .. import config, text, util, cache, exception
urllib3 = requests.packages.urllib3
class Extractor():
@ -45,6 +47,8 @@ class Extractor():
def __init__(self, match):
self.log = logging.getLogger(self.category)
self.url = match.string
self.match = match
self.groups = match.groups()
self._cfgpath = ("extractor", self.category, self.subcategory)
self._parentdir = ""
@ -168,22 +172,25 @@ class Extractor():
requests.exceptions.ChunkedEncodingError,
requests.exceptions.ContentDecodingError) as exc:
msg = exc
code = 0
except (requests.exceptions.RequestException) as exc:
raise exception.HttpError(exc)
else:
code = response.status_code
if self._write_pages:
self._dump_response(response)
if 200 <= code < 400 or fatal is None and \
(400 <= code < 500) or not fatal and \
(400 <= code < 429 or 431 <= code < 500):
if (
code < 400 or
code < 500 and (not fatal and code != 429 or fatal is None)
):
if encoding:
response.encoding = encoding
return response
if notfound and code == 404:
raise exception.NotFoundError(notfound)
msg = "'{} {}' for '{}'".format(code, response.reason, url)
msg = "'{} {}' for '{}'".format(
code, response.reason, response.url)
server = response.headers.get("Server")
if server and server.startswith("cloudflare") and \
code in (403, 503):
@ -194,7 +201,10 @@ class Extractor():
if b'name="captcha-bypass"' in content:
self.log.warning("Cloudflare CAPTCHA")
break
if code not in retry_codes and code < 500:
if code == 429 and self._interval_429:
pass
elif code not in retry_codes and code < 500:
break
finally:
@ -203,15 +213,25 @@ class Extractor():
self.log.debug("%s (%s/%s)", msg, tries, retries+1)
if tries > retries:
break
self.sleep(
max(tries, self._interval()) if self._interval else tries,
"retry")
seconds = tries
if self._interval:
s = self._interval()
if seconds < s:
seconds = s
if code == 429 and self._interval_429:
s = self._interval_429()
if seconds < s:
seconds = s
self.wait(seconds=seconds, reason="429 Too Many Requests")
else:
self.sleep(seconds, "retry")
tries += 1
raise exception.HttpError(msg, response)
def wait(self, seconds=None, until=None, adjust=1.0,
reason="rate limit reset"):
reason="rate limit"):
now = time.time()
if seconds:
@ -234,7 +254,7 @@ class Extractor():
if reason:
t = datetime.datetime.fromtimestamp(until).time()
isotime = "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second)
self.log.info("Waiting until %s for %s.", isotime, reason)
self.log.info("Waiting until %s (%s)", isotime, reason)
time.sleep(seconds)
def sleep(self, seconds, reason):
@ -242,6 +262,15 @@ class Extractor():
seconds, reason)
time.sleep(seconds)
def input(self, prompt, echo=True):
if echo:
try:
return input(prompt)
except (EOFError, OSError):
return None
else:
return getpass.getpass(prompt)
def _get_auth_info(self):
"""Return authentication information as (username, password) tuple"""
username = self.config("username")
@ -274,6 +303,9 @@ class Extractor():
self.config("sleep-request", self.request_interval),
self.request_interval_min,
)
self._interval_429 = util.build_duration_func(
self.config("sleep-429", 60),
)
if self._retries < 0:
self._retries = float("inf")
@ -433,9 +465,11 @@ class Extractor():
if not path:
return
path_tmp = path + ".tmp"
try:
with open(path, "w") as fp:
with open(path_tmp, "w") as fp:
util.cookiestxt_store(fp, self.cookies)
os.replace(path_tmp, path)
except OSError as exc:
self.log.warning("cookies: %s", exc)
@ -593,7 +627,7 @@ class GalleryExtractor(Extractor):
def __init__(self, match, url=None):
Extractor.__init__(self, match)
self.gallery_url = self.root + match.group(1) if url is None else url
self.gallery_url = self.root + self.groups[0] if url is None else url
def items(self):
self.login()
@ -668,7 +702,7 @@ class MangaExtractor(Extractor):
def __init__(self, match, url=None):
Extractor.__init__(self, match)
self.manga_url = url or self.root + match.group(1)
self.manga_url = self.root + self.groups[0] if url is None else url
if self.config("chapter-reverse", False):
self.reverse = not self.reverse
@ -730,17 +764,18 @@ class BaseExtractor(Extractor):
instances = ()
def __init__(self, match):
if not self.category:
self._init_category(match)
Extractor.__init__(self, match)
if not self.category:
self._init_category()
self._cfgpath = ("extractor", self.category, self.subcategory)
def _init_category(self, match):
for index, group in enumerate(match.groups()):
def _init_category(self):
for index, group in enumerate(self.groups):
if group is not None:
if index:
self.category, self.root, info = self.instances[index-1]
if not self.root:
self.root = text.root_from_url(match.group(0))
self.root = text.root_from_url(self.match.group(0))
self.config_instance = info.get
else:
self.root = group
@ -800,12 +835,9 @@ def _build_requests_adapter(ssl_options, ssl_ciphers, source_address):
pass
if ssl_options or ssl_ciphers:
ssl_context = ssl.create_default_context()
if ssl_options:
ssl_context.options |= ssl_options
if ssl_ciphers:
ssl_context.set_ecdh_curve("prime256v1")
ssl_context.set_ciphers(ssl_ciphers)
ssl_context = urllib3.connection.create_urllib3_context(
options=ssl_options or None, ciphers=ssl_ciphers)
ssl_context.check_hostname = False
else:
ssl_context = None
@ -925,8 +957,6 @@ SSL_CIPHERS = {
}
urllib3 = requests.packages.urllib3
# detect brotli support
try:
BROTLI = urllib3.response.brotli is not None

View File

@ -18,12 +18,12 @@ import binascii
import time
import re
BASE_PATTERN = (
r"(?:https?://)?(?:"
r"(?:www\.)?(?:fx)?deviantart\.com/(?!watch/)([\w-]+)|"
r"(?!www\.)([\w-]+)\.(?:fx)?deviantart\.com)"
)
DEFAULT_AVATAR = "https://a.deviantart.net/avatars/default.gif"
class DeviantartExtractor(Extractor):
@ -84,6 +84,16 @@ class DeviantartExtractor(Extractor):
else:
self.commit_journal = None
def request(self, url, **kwargs):
if "fatal" not in kwargs:
kwargs["fatal"] = False
while True:
response = Extractor.request(self, url, **kwargs)
if response.status_code != 403 or \
b"Request blocked." not in response.content:
return response
self.wait(seconds=300, reason="CloudFront block")
def skip(self, num):
self.offset += num
return num
@ -177,6 +187,10 @@ class DeviantartExtractor(Extractor):
for comment in deviation["comments"]:
user = comment["user"]
name = user["username"].lower()
if user["usericon"] == DEFAULT_AVATAR:
self.log.debug(
"Skipping avatar of '%s' (default)", name)
continue
_user_details.update(name, user)
url = "{}/{}/avatar/".format(self.root, name)
@ -209,7 +223,9 @@ class DeviantartExtractor(Extractor):
"""Adjust the contents of a Deviation-object"""
if "index" not in deviation:
try:
if deviation["url"].startswith("https://sta.sh"):
if deviation["url"].startswith((
"https://www.deviantart.com/stash/", "https://sta.sh",
)):
filename = deviation["content"]["src"].split("/")[5]
deviation["index_base36"] = filename.partition("-")[0][1:]
deviation["index"] = id_from_base36(
@ -456,18 +472,12 @@ class DeviantartExtractor(Extractor):
def _limited_request(self, url, **kwargs):
"""Limits HTTP requests to one every 2 seconds"""
kwargs["fatal"] = None
diff = time.time() - DeviantartExtractor._last_request
if diff < 2.0:
self.sleep(2.0 - diff, "request")
while True:
response = self.request(url, **kwargs)
if response.status_code != 403 or \
b"Request blocked." not in response.content:
DeviantartExtractor._last_request = time.time()
return response
self.wait(seconds=180)
def _fetch_premium(self, deviation):
try:
@ -585,7 +595,13 @@ class DeviantartAvatarExtractor(DeviantartExtractor):
return ()
icon = user["usericon"]
index = icon.rpartition("?")[2]
if icon == DEFAULT_AVATAR:
self.log.debug("Skipping avatar of '%s' (default)", name)
return ()
_, sep, index = icon.rpartition("?")
if not sep:
index = "0"
formats = self.config("formats")
if not formats:
@ -668,7 +684,8 @@ class DeviantartStashExtractor(DeviantartExtractor):
"""Extractor for sta.sh-ed deviations"""
subcategory = "stash"
archive_fmt = "{index}.{extension}"
pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
pattern = (r"(?:https?://)?(?:(?:www\.)?deviantart\.com/stash|sta\.sh)"
r"/([a-z0-9]+)")
example = "https://sta.sh/abcde"
skip = Extractor.skip
@ -689,7 +706,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
if uuid:
deviation = self.api.deviation(uuid)
deviation["index"] = text.parse_int(text.extr(
page, 'gmi-deviationid="', '"'))
page, '\\"deviationId\\":', ','))
yield deviation
return
@ -1405,9 +1422,14 @@ class DeviantartOAuthAPI():
self.authenticate(None if public else self.refresh_token_key)
kwargs["headers"] = self.headers
response = self.extractor.request(url, **kwargs)
data = response.json()
status = response.status_code
try:
data = response.json()
except ValueError:
self.log.error("Unable to parse API response")
data = {}
status = response.status_code
if 200 <= status < 400:
if self.delay > self.delay_min:
self.delay -= 1
@ -1435,9 +1457,8 @@ class DeviantartOAuthAPI():
self.log.info(
"Register your own OAuth application and use its "
"credentials to prevent this error: "
"https://github.com/mikf/gallery-dl/blob/master/do"
"cs/configuration.rst#extractordeviantartclient-id"
"--client-secret")
"https://gdl-org.github.io/docs/configuration.html"
"#extractor-deviantart-client-id-client-secret")
else:
if log:
self.log.error(msg)

View File

@ -50,7 +50,7 @@ class ExhentaiExtractor(Extractor):
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)
if response.history and response.headers.get("Content-Length") == "0":
if "Cache-Control" not in response.headers and not response.content:
self.log.info("blank page")
raise exception.AuthorizationError()
return response
@ -95,7 +95,11 @@ class ExhentaiExtractor(Extractor):
self.cookies.clear()
response = self.request(url, method="POST", headers=headers, data=data)
if b"You are now logged in as:" not in response.content:
content = response.content
if b"You are now logged in as:" not in content:
if b"The captcha was not entered correctly" in content:
raise exception.AuthenticationError(
"CAPTCHA required. Use cookies instead.")
raise exception.AuthenticationError()
# collect more cookies
@ -437,7 +441,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
raise exception.AuthorizationError()
if page.startswith(("Key missing", "Gallery not found")):
raise exception.NotFoundError("gallery")
if "hentai.org/mpv/" in page:
if page.count("hentai.org/mpv/") > 1:
self.log.warning("Enabled Multi-Page Viewer is not supported")
return page

View File

@ -42,7 +42,8 @@ class FapelloPostExtractor(Extractor):
"type" : "video" if 'type="video' in page else "photo",
"thumbnail": text.extr(page, 'poster="', '"'),
}
url = text.extr(page, 'src="', '"')
url = text.extr(page, 'src="', '"').replace(
".md", "").replace(".th", "")
yield Message.Directory, data
yield Message.Url, url, text.nameext_from_url(url, data)

View File

@ -77,6 +77,8 @@ class FlickrImageExtractor(FlickrExtractor):
photo = self.api.photos_getInfo(self.item_id)
if self.api.exif:
photo.update(self.api.photos_getExif(self.item_id))
if self.api.contexts:
photo.update(self.api.photos_getAllContexts(self.item_id))
if photo["media"] == "video" and self.api.videos:
self.api._extract_video(photo)
@ -268,6 +270,8 @@ class FlickrAPI(oauth.OAuth1API):
self.exif = extractor.config("exif", False)
self.videos = extractor.config("videos", True)
self.contexts = extractor.config("contexts", False)
self.maxsize = extractor.config("size-max")
if isinstance(self.maxsize, str):
for fmt, fmtname, fmtwidth in self.FORMATS:
@ -311,6 +315,13 @@ class FlickrAPI(oauth.OAuth1API):
params = {"user_id": user_id}
return self._pagination("people.getPhotos", params)
def photos_getAllContexts(self, photo_id):
"""Returns all visible sets and pools the photo belongs to."""
params = {"photo_id": photo_id}
data = self._call("photos.getAllContexts", params)
del data["stat"]
return data
def photos_getExif(self, photo_id):
"""Retrieves a list of EXIF/TIFF/GPS tags for a given photo."""
params = {"photo_id": photo_id}
@ -444,6 +455,8 @@ class FlickrAPI(oauth.OAuth1API):
if self.exif:
photo.update(self.photos_getExif(photo["id"]))
if self.contexts:
photo.update(self.photos_getAllContexts(photo["id"]))
photo["id"] = text.parse_int(photo["id"])
if "owner" in photo:

View File

@ -117,8 +117,8 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
self.board = match.group(match.lastindex-1)
self.thread = match.group(match.lastindex)
self.board = self.groups[-2]
self.thread = self.groups[-1]
self.data = None
def metadata(self):
@ -140,20 +140,22 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
"""Base extractor for FoolFuuka based boards/archives"""
subcategory = "board"
pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/(?:page/)?(\d*))?$"
example = "https://archived.moe/a/"
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
self.board = match.group(match.lastindex)
self.board = self.groups[-2]
self.page = self.groups[-1]
def items(self):
index_base = "{}/_/api/chan/index/?board={}&page=".format(
self.root, self.board)
thread_base = "{}/{}/thread/".format(self.root, self.board)
for page in itertools.count(1):
with self.request(index_base + format(page)) as response:
page = self.page
for pnum in itertools.count(text.parse_int(page, 1)):
with self.request(index_base + format(pnum)) as response:
try:
threads = response.json()
except ValueError:
@ -167,6 +169,9 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
thread["_extractor"] = FoolfuukaThreadExtractor
yield Message.Queue, thread["url"], thread
if page:
return
class FoolfuukaSearchExtractor(FoolfuukaExtractor):
"""Base extractor for search results on FoolFuuka based boards/archives"""
@ -179,17 +184,16 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
self.params = params = {}
args = match.group(match.lastindex).split("/")
key = None
for arg in args:
key = None
for arg in self.groups[-1].split("/"):
if key:
params[key] = text.unescape(arg)
key = None
else:
key = arg
board = match.group(match.lastindex-1)
board = self.groups[-2]
if board != "_":
params["boards"] = board

View File

@ -11,7 +11,7 @@
from .common import Extractor, Message
from .. import text, util
BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?furaffinity\.net"
BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?(?:f[ux]|f?xfu)raffinity\.net"
class FuraffinityExtractor(Extractor):

View File

@ -32,6 +32,9 @@ class GelbooruBase():
url = self.root + "/index.php?page=dapi&q=index&json=1"
data = self.request(url, params=params).json()
if not key:
return data
try:
posts = data[key]
except KeyError:
@ -48,19 +51,44 @@ class GelbooruBase():
params["pid"] = self.page_start
params["limit"] = self.per_page
limit = self.per_page // 2
pid = False
if "tags" in params:
tags = params["tags"].split()
op = "<"
id = False
for tag in tags:
if tag.startswith("sort:"):
if tag == "sort:id:asc":
op = ">"
elif tag == "sort:id" or tag.startswith("sort:id:"):
op = "<"
else:
pid = True
elif tag.startswith("id:"):
id = True
if not pid:
if id:
tag = "id:" + op
tags = [t for t in tags if not t.startswith(tag)]
tags = "{} id:{}".format(" ".join(tags), op)
while True:
posts = self._api_request(params)
for post in posts:
yield post
yield from posts
if len(posts) < limit:
return
if pid:
params["pid"] += 1
else:
if "pid" in params:
del params["pid"]
params["tags"] = "{} id:<{}".format(self.tags, post["id"])
params["tags"] = tags + str(posts[-1]["id"])
def _pagination_html(self, params):
url = self.root + "/index.php"
@ -167,13 +195,61 @@ class GelbooruFavoriteExtractor(GelbooruBase,
params = {
"s" : "favorite",
"id" : self.favorite_id,
"limit": "1",
"limit": "2",
}
data = self._api_request(params, None, True)
count = self._api_request(params, "@attributes", True)[0]["count"]
if count <= self.offset:
count = data["@attributes"]["count"]
self.log.debug("API reports %s favorite entries", count)
favs = data["favorite"]
try:
order = 1 if favs[0]["id"] < favs[1]["id"] else -1
except LookupError as exc:
self.log.debug(
"Error when determining API favorite order (%s: %s)",
exc.__class__.__name__, exc)
order = -1
else:
self.log.debug("API yields favorites in %sscending order",
"a" if order > 0 else "de")
order_favs = self.config("order-posts")
if order_favs and order_favs[0] in ("r", "a"):
self.log.debug("Returning them in reverse")
order = -order
if order < 0:
return self._pagination(params, count)
return self._pagination_reverse(params, count)
def _pagination(self, params, count):
if self.offset:
pnum, skip = divmod(self.offset, self.per_page)
else:
pnum = skip = 0
params["pid"] = pnum
params["limit"] = self.per_page
while True:
favs = self._api_request(params, "favorite")
if not favs:
return
if skip:
favs = favs[skip:]
skip = 0
for fav in favs:
for post in self._api_request({"id": fav["favorite"]}):
post["date_favorited"] = text.parse_timestamp(fav["added"])
yield post
params["pid"] += 1
def _pagination_reverse(self, params, count):
pnum, last = divmod(count-1, self.per_page)
if self.offset > last:
# page number change
@ -182,12 +258,11 @@ class GelbooruFavoriteExtractor(GelbooruBase,
pnum -= diff + 1
skip = self.offset
# paginate over them in reverse
params["pid"] = pnum
params["limit"] = self.per_page
while True:
favs = self._api_request(params, "favorite", True)
favs = self._api_request(params, "favorite")
favs.reverse()
if skip:
@ -195,7 +270,9 @@ class GelbooruFavoriteExtractor(GelbooruBase,
skip = 0
for fav in favs:
yield from self._api_request({"id": fav["favorite"]})
for post in self._api_request({"id": fav["favorite"]}):
post["date_favorited"] = text.parse_timestamp(fav["added"])
yield post
params["pid"] -= 1
if params["pid"] < 0:

View File

@ -41,9 +41,13 @@ class GofileFolderExtractor(Extractor):
folder = self._get_content(self.content_id, password)
yield Message.Directory, folder
try:
contents = folder.pop("children")
except KeyError:
raise exception.AuthorizationError("Password required")
num = 0
contents = folder.pop("contents")
for content_id in folder["childs"]:
for content_id in folder["childrenIds"]:
content = contents[content_id]
content["folder"] = folder
@ -67,31 +71,32 @@ class GofileFolderExtractor(Extractor):
@memcache()
def _create_account(self):
self.log.debug("Creating temporary account")
return self._api_request("createAccount")["token"]
return self._api_request("accounts", method="POST")["token"]
@cache(maxage=86400)
def _get_website_token(self):
self.log.debug("Fetching website token")
page = self.request(self.root + "/dist/js/alljs.js").text
return text.extr(page, 'fetchData.wt = "', '"')
return text.extr(page, 'wt: "', '"')
def _get_content(self, content_id, password=None):
headers = {"Authorization": "Bearer " + self.api_token}
params = {"wt": self.website_token}
if password is not None:
password = hashlib.sha256(password.encode()).hexdigest()
return self._api_request("getContent", {
"contentId" : content_id,
"token" : self.api_token,
"wt" : self.website_token,
"password" : password,
})
params["password"] = hashlib.sha256(password.encode()).hexdigest()
return self._api_request("contents/" + content_id, params, headers)
def _api_request(self, endpoint, params=None):
def _api_request(self, endpoint, params=None, headers=None, method="GET"):
response = self.request(
"https://api.gofile.io/" + endpoint, params=params).json()
"https://api.gofile.io/" + endpoint,
method=method, params=params, headers=headers,
).json()
if response["status"] != "ok":
if response["status"] == "error-notFound":
raise exception.NotFoundError("content")
if response["status"] == "error-passwordRequired":
raise exception.AuthorizationError("Password required")
raise exception.StopExtraction(
"%s failed (Status: %s)", endpoint, response["status"])

View File

@ -25,7 +25,7 @@ class HiperdexBase():
@memcache(keyarg=1)
def manga_data(self, manga, page=None):
if not page:
url = "{}/manga/{}/".format(self.root, manga)
url = "{}/mangas/{}/".format(self.root, manga)
page = self.request(url).text
extr = text.extract_from(page)
@ -33,7 +33,7 @@ class HiperdexBase():
"url" : text.unescape(extr(
'property="og:url" content="', '"')),
"manga" : text.unescape(extr(
'"headline": "', '"')),
' property="name" title="', '"')),
"score" : text.parse_float(extr(
'id="averagerate">', '<')),
"author" : text.remove_html(extr(
@ -68,8 +68,8 @@ class HiperdexBase():
class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
"""Extractor for manga chapters from hiperdex.com"""
pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))"
example = "https://hiperdex.com/manga/MANGA/CHAPTER/"
pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+)/([^/?#]+))"
example = "https://hiperdex.com/mangas/MANGA/CHAPTER/"
def __init__(self, match):
root, path, self.manga, self.chapter = match.groups()
@ -90,8 +90,8 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
"""Extractor for manga from hiperdex.com"""
chapterclass = HiperdexChapterExtractor
pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$"
example = "https://hiperdex.com/manga/MANGA/"
pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+))/?$"
example = "https://hiperdex.com/mangas/MANGA/"
def __init__(self, match):
root, path, self.manga = match.groups()

View File

@ -23,6 +23,7 @@ class HotleakExtractor(Extractor):
def items(self):
for post in self.posts():
post["_http_expected_status"] = (404,)
yield Message.Directory, post
yield Message.Url, post["url"], post

View File

@ -101,9 +101,8 @@ class IdolcomplexExtractor(SankakuExtractor):
page = self.request(url, retries=10).text
extr = text.extract_from(page)
pid_alnum = extr('/posts/', '"')
vavg = extr('itemprop="ratingValue">', "<")
vcnt = extr('itemprop="reviewCount">', "<")
vavg = extr('id="rating"', "</ul>")
vcnt = extr('>Votes</strong>:', "<")
pid = extr(">Post ID:", "<")
created = extr(' title="', '"')
@ -120,10 +119,10 @@ class IdolcomplexExtractor(SankakuExtractor):
rating = extr(">Rating:", "<br")
data = {
"id" : text.parse_int(pid),
"id_alnum" : pid_alnum,
"id" : pid.strip(),
"md5" : file_url.rpartition("/")[2].partition(".")[0],
"vote_average": text.parse_float(vavg),
"vote_average": (1.0 * vavg.count('class="star-full"') +
0.5 * vavg.count('class="star-half"')),
"vote_count" : text.parse_int(vcnt),
"created_at" : created,
"date" : text.parse_datetime(
@ -222,8 +221,8 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
subcategory = "pool"
directory_fmt = ("{category}", "pool", "{pool}")
archive_fmt = "p_{pool}_{id}"
pattern = BASE_PATTERN + r"/pools?/show/(\d+)"
example = "https://idol.sankakucomplex.com/pools/show/12345"
pattern = BASE_PATTERN + r"/pools?/(?:show/)?(\w+)"
example = "https://idol.sankakucomplex.com/pools/0123456789abcdef"
per_page = 24
def __init__(self, match):

View File

@ -161,11 +161,12 @@ class ImagefapFolderExtractor(ImagefapExtractor):
self.user = user or profile
def items(self):
for gallery_id, name in self.galleries(self.folder_id):
for gallery_id, name, folder in self.galleries(self.folder_id):
url = "{}/gallery/{}".format(self.root, gallery_id)
data = {
"gallery_id": gallery_id,
"title" : text.unescape(name),
"folder" : text.unescape(folder),
"_extractor": ImagefapGalleryExtractor,
}
yield Message.Queue, url, data
@ -173,6 +174,7 @@ class ImagefapFolderExtractor(ImagefapExtractor):
def galleries(self, folder_id):
"""Yield gallery IDs and titles of a folder"""
if folder_id == "-1":
folder_name = "Uncategorized"
if self._id:
url = "{}/usergallery.php?userid={}&folderid=-1".format(
self.root, self.user)
@ -180,23 +182,28 @@ class ImagefapFolderExtractor(ImagefapExtractor):
url = "{}/profile/{}/galleries?folderid=-1".format(
self.root, self.user)
else:
folder_name = None
url = "{}/organizer/{}/".format(self.root, folder_id)
params = {"page": 0}
while True:
extr = text.extract_from(self.request(url, params=params).text)
if not folder_name:
folder_name = extr("class'blk_galleries'><b>", "</b>")
while True:
cnt = 0
while True:
gid = extr('<a href="/gallery/', '"')
gid = extr(' id="gid-', '"')
if not gid:
break
yield gid, extr("<b>", "<")
yield gid, extr("<b>", "<"), folder_name
cnt += 1
if cnt < 20:
break
params["page"] += 1
extr = text.extract_from(self.request(url, params=params).text)
class ImagefapUserExtractor(ImagefapExtractor):

View File

@ -39,10 +39,15 @@ class ImgurExtractor(Extractor):
image["url"] = url = "https://i.imgur.com/{}.{}".format(
image["id"], image["ext"])
image["date"] = text.parse_datetime(image["created_at"])
image["_http_validate"] = self._validate
text.nameext_from_url(url, image)
return url
def _validate(self, response):
return (not response.history or
not response.url.endswith("/removed.png"))
def _items_queue(self, items):
album_ex = ImgurAlbumExtractor
image_ex = ImgurImageExtractor

View File

@ -330,15 +330,18 @@ class InkbunnyAPI():
def _call(self, endpoint, params):
url = "https://inkbunny.net/api_" + endpoint + ".php"
params["sid"] = self.session_id
while True:
data = self.extractor.request(url, params=params).json()
if "error_code" in data:
if "error_code" not in data:
return data
if str(data["error_code"]) == "2":
self.authenticate(invalidate=True)
return self._call(endpoint, params)
raise exception.StopExtraction(data.get("error_message"))
continue
return data
raise exception.StopExtraction(data.get("error_message"))
def _pagination_search(self, params):
params["page"] = 1

View File

@ -165,7 +165,7 @@ class InstagramExtractor(Extractor):
data = {
"post_id" : post["pk"],
"post_shortcode": post["code"],
"likes": post.get("like_count"),
"likes": post.get("like_count", 0),
"pinned": post.get("timeline_pinned_user_ids", ()),
"date": text.parse_timestamp(post.get("taken_at")),
}
@ -736,7 +736,7 @@ class InstagramRestAPI():
not user["followed_by_viewer"]:
name = user["username"]
s = "" if name.endswith("s") else "s"
raise exception.StopExtraction("%s'%s posts are private", name, s)
self.extractor.log.warning("%s'%s posts are private", name, s)
self.extractor._assign_user(user)
return user["id"]

View File

@ -41,6 +41,9 @@ class KemonopartyExtractor(Extractor):
self.revisions = self.config("revisions")
if self.revisions:
self.revisions_unique = (self.revisions == "unique")
order = self.config("order-revisions")
self.revisions_reverse = order[0] in ("r", "a") if order else False
self._prepare_ddosguard_cookies()
self._find_inline = re.compile(
r'src="(?:https?://(?:kemono|coomer)\.(?:su|party))?(/inline/[^"]+'
@ -54,7 +57,7 @@ class KemonopartyExtractor(Extractor):
generators = self._build_file_generators(self.config("files"))
duplicates = self.config("duplicates")
comments = self.config("comments")
username = dms = None
username = dms = announcements = None
# prevent files from being sent with gzip compression
headers = {"Accept-Encoding": "identity"}
@ -65,6 +68,8 @@ class KemonopartyExtractor(Extractor):
'<meta name="artist_name" content="', '"')[0])
if self.config("dms"):
dms = True
if self.config("announcements"):
announcements = True
posts = self.posts()
max_posts = self.config("max-posts")
@ -77,7 +82,7 @@ class KemonopartyExtractor(Extractor):
self.root, post["service"], post["user"], post["id"])
post["_http_headers"] = headers
post["date"] = self._parse_datetime(
post["published"] or post["added"])
post.get("published") or post.get("added") or "")
if username:
post["username"] = username
@ -85,8 +90,12 @@ class KemonopartyExtractor(Extractor):
post["comments"] = self._extract_comments(post)
if dms is not None:
if dms is True:
dms = self._extract_dms(post)
dms = self._extract_cards(post, "dms")
post["dms"] = dms
if announcements is not None:
if announcements is True:
announcements = self._extract_cards(post, "announcements")
post["announcements"] = announcements
files = []
hashes = set()
@ -153,7 +162,7 @@ class KemonopartyExtractor(Extractor):
def _file(self, post):
file = post["file"]
if not file:
if not file or "path" not in file:
return ()
file["type"] = "file"
return (file,)
@ -197,21 +206,21 @@ class KemonopartyExtractor(Extractor):
})
return comments
def _extract_dms(self, post):
url = "{}/{}/user/{}/dms".format(
self.root, post["service"], post["user"])
def _extract_cards(self, post, type):
url = "{}/{}/user/{}/{}".format(
self.root, post["service"], post["user"], type)
page = self.request(url).text
dms = []
for dm in text.extract_iter(page, "<article", "</article>"):
footer = text.extr(dm, "<footer", "</footer>")
dms.append({
cards = []
for card in text.extract_iter(page, "<article", "</article>"):
footer = text.extr(card, "<footer", "</footer>")
cards.append({
"body": text.unescape(text.extr(
dm, "<pre>", "</pre></",
card, "<pre>", "</pre></",
).strip()),
"date": text.extr(footer, 'Published: ', '\n'),
"date": text.extr(footer, ': ', '\n'),
})
return dms
return cards
def _parse_datetime(self, date_string):
if len(date_string) > 19:
@ -232,6 +241,7 @@ class KemonopartyExtractor(Extractor):
except exception.HttpError:
post["revision_hash"] = self._revision_hash(post)
post["revision_index"] = 1
post["revision_count"] = 1
return (post,)
revs.insert(0, post)
@ -247,22 +257,30 @@ class KemonopartyExtractor(Extractor):
uniq.append(rev)
revs = uniq
idx = len(revs)
cnt = idx = len(revs)
for rev in revs:
rev["revision_index"] = idx
rev["revision_count"] = cnt
idx -= 1
if self.revisions_reverse:
revs.reverse()
return revs
def _revisions_all(self, url):
revs = self.request(url + "/revisions").json()
idx = len(revs)
cnt = idx = len(revs)
for rev in revs:
rev["revision_hash"] = self._revision_hash(rev)
rev["revision_index"] = idx
rev["revision_count"] = cnt
idx -= 1
if self.revisions_reverse:
revs.reverse()
return revs
def _revision_hash(self, revision):
@ -482,7 +500,8 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
self.favorites = (text.parse_query(match.group(3)).get("type") or
self.params = text.parse_query(match.group(3))
self.favorites = (self.params.get("type") or
self.config("favorites") or
"artist")
@ -490,9 +509,17 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
self._prepare_ddosguard_cookies()
self.login()
sort = self.params.get("sort")
order = self.params.get("order") or "desc"
if self.favorites == "artist":
users = self.request(
self.root + "/api/v1/account/favorites?type=artist").json()
if not sort:
sort = "updated"
users.sort(key=lambda x: x[sort], reverse=(order == "desc"))
for user in users:
user["_extractor"] = KemonopartyUserExtractor
url = "{}/{}/user/{}".format(
@ -502,6 +529,11 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
elif self.favorites == "post":
posts = self.request(
self.root + "/api/v1/account/favorites?type=post").json()
if not sort:
sort = "faved_seq"
posts.sort(key=lambda x: x[sort], reverse=(order == "desc"))
for post in posts:
post["_extractor"] = KemonopartyPostExtractor
url = "{}/{}/user/{}/post/{}".format(

View File

@ -104,7 +104,7 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
filename_fmt = "{category}_{id}{title:?_//}.{extension}"
directory_fmt = ("{category}",)
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/i/(\w+)"
pattern = r"(?:https?://)?(?:(?:i\d?\.)?lensdump\.com|\w\.l3n\.co)/i/(\w+)"
example = "https://lensdump.com/i/ID"
def __init__(self, match):

View File

@ -29,6 +29,7 @@ class MastodonExtractor(BaseExtractor):
self.instance = self.root.partition("://")[2]
self.reblogs = self.config("reblogs", False)
self.replies = self.config("replies", True)
self.cards = self.config("cards", False)
def items(self):
for status in self.statuses():
@ -48,6 +49,17 @@ class MastodonExtractor(BaseExtractor):
if status["reblog"]:
attachments.extend(status["reblog"]["media_attachments"])
if self.cards:
card = status.get("card")
if card:
url = card.get("image")
if url:
card["weburl"] = card.get("url")
card["url"] = url
card["id"] = "card" + "".join(
url.split("/")[6:-2]).lstrip("0")
attachments.append(card)
status["instance"] = self.instance
acct = status["account"]["acct"]
status["instance_remote"] = \
@ -70,7 +82,11 @@ class MastodonExtractor(BaseExtractor):
def _check_moved(self, account):
self._check_moved = None
if "moved" in account:
# Certain fediverse software (such as Iceshrimp and Sharkey) have a
# null account "moved" field instead of not having it outright.
# To handle this, check if the "moved" value is truthy instead
# if only it exists.
if account.get("moved"):
self.log.warning("Account '%s' moved to '%s'",
account["acct"], account["moved"]["acct"])
@ -116,6 +132,7 @@ class MastodonUserExtractor(MastodonExtractor):
api.account_id_by_username(self.item),
only_media=(
not self.reblogs and
not self.cards and
not self.config("text-posts", False)
),
exclude_replies=not self.replies,
@ -132,6 +149,36 @@ class MastodonBookmarkExtractor(MastodonExtractor):
return MastodonAPI(self).account_bookmarks()
class MastodonFavoriteExtractor(MastodonExtractor):
"""Extractor for mastodon favorites"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/favourites"
example = "https://mastodon.social/favourites"
def statuses(self):
return MastodonAPI(self).account_favorites()
class MastodonListExtractor(MastodonExtractor):
"""Extractor for mastodon lists"""
subcategory = "list"
pattern = BASE_PATTERN + r"/lists/(\w+)"
example = "https://mastodon.social/lists/12345"
def statuses(self):
return MastodonAPI(self).timelines_list(self.item)
class MastodonHashtagExtractor(MastodonExtractor):
"""Extractor for mastodon hashtags"""
subcategory = "hashtag"
pattern = BASE_PATTERN + r"/tags/(\w+)"
example = "https://mastodon.social/tags/NAME"
def statuses(self):
return MastodonAPI(self).timelines_tag(self.item)
class MastodonFollowingExtractor(MastodonExtractor):
"""Extractor for followed mastodon users"""
subcategory = "following"
@ -201,37 +248,55 @@ class MastodonAPI():
raise exception.NotFoundError("account")
def account_bookmarks(self):
"""Statuses the user has bookmarked"""
endpoint = "/v1/bookmarks"
return self._pagination(endpoint, None)
def account_favorites(self):
"""Statuses the user has favourited"""
endpoint = "/v1/favourites"
return self._pagination(endpoint, None)
def account_following(self, account_id):
"""Accounts which the given account is following"""
endpoint = "/v1/accounts/{}/following".format(account_id)
return self._pagination(endpoint, None)
def account_lookup(self, username):
"""Quickly lookup a username to see if it is available"""
endpoint = "/v1/accounts/lookup"
params = {"acct": username}
return self._call(endpoint, params).json()
def account_search(self, query, limit=40):
"""Search for accounts"""
"""Search for matching accounts by username or display name"""
endpoint = "/v1/accounts/search"
params = {"q": query, "limit": limit}
return self._call(endpoint, params).json()
def account_statuses(self, account_id, only_media=True,
exclude_replies=False):
"""Fetch an account's statuses"""
"""Statuses posted to the given account"""
endpoint = "/v1/accounts/{}/statuses".format(account_id)
params = {"only_media" : "1" if only_media else "0",
"exclude_replies": "1" if exclude_replies else "0"}
params = {"only_media" : "true" if only_media else "false",
"exclude_replies": "true" if exclude_replies else "false"}
return self._pagination(endpoint, params)
def status(self, status_id):
"""Fetch a status"""
"""Obtain information about a status"""
endpoint = "/v1/statuses/" + status_id
return self._call(endpoint).json()
def timelines_list(self, list_id):
"""View statuses in the given list timeline"""
endpoint = "/v1/timelines/list/" + list_id
return self._pagination(endpoint, None)
def timelines_tag(self, hashtag):
"""View public statuses containing the given hashtag"""
endpoint = "/v1/timelines/tag/" + hashtag
return self._pagination(endpoint, None)
def _call(self, endpoint, params=None):
if endpoint.startswith("http"):
url = endpoint

View File

@ -26,7 +26,8 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
"{post[date]:%Y-%m-%d} {post[title]}")
archive_fmt = "{blog[id]}_{post[num]}_{num}"
pattern = (r"(?:https?://)?blog\.naver\.com/"
r"(?:PostView\.nhn\?blogId=(\w+)&logNo=(\d+)|(\w+)/(\d+)/?$)")
r"(?:PostView\.n(?:aver|hn)\?blogId=(\w+)&logNo=(\d+)|"
r"(\w+)/(\d+)/?$)")
example = "https://blog.naver.com/BLOGID/12345"
def __init__(self, match):
@ -46,8 +47,10 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
extr = text.extract_from(page)
data = {
"post": {
"title" : extr('"og:title" content="', '"'),
"description": extr('"og:description" content="', '"'),
"title" : text.unescape(extr(
'"og:title" content="', '"')),
"description": text.unescape(extr(
'"og:description" content="', '"')).replace("&nbsp;", " "),
"num" : text.parse_int(self.post_id),
},
"blog": {
@ -62,10 +65,13 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
return data
def images(self, page):
return [
(url.replace("://post", "://blog", 1).partition("?")[0], None)
for url in text.extract_iter(page, 'data-lazy-src="', '"')
]
results = []
for url in text.extract_iter(page, 'data-lazy-src="', '"'):
url = url.replace("://post", "://blog", 1).partition("?")[0]
if "\ufffd" in text.unquote(url):
url = text.unquote(url, encoding="EUC-KR")
results.append((url, None))
return results
class NaverBlogExtractor(NaverBase, Extractor):
@ -73,7 +79,8 @@ class NaverBlogExtractor(NaverBase, Extractor):
subcategory = "blog"
categorytransfer = True
pattern = (r"(?:https?://)?blog\.naver\.com/"
r"(?:PostList.nhn\?(?:[^&#]+&)*blogId=([^&#]+)|(\w+)/?$)")
r"(?:PostList\.n(?:aver|hn)\?(?:[^&#]+&)*blogId=([^&#]+)|"
r"(\w+)/?$)")
example = "https://blog.naver.com/BLOGID"
def __init__(self, match):
@ -81,12 +88,11 @@ class NaverBlogExtractor(NaverBase, Extractor):
self.blog_id = match.group(1) or match.group(2)
def items(self):
# fetch first post number
url = "{}/PostList.nhn?blogId={}".format(self.root, self.blog_id)
post_num = text.extract(
post_num = text.extr(
self.request(url).text, 'gnFirstLogNo = "', '"',
)[0]
)
# setup params for API calls
url = "{}/PostViewBottomTitleListAsync.nhn".format(self.root)

View File

@ -110,7 +110,7 @@ class OAuthBase(Extractor):
# get a request token
params = {"oauth_callback": self.redirect_uri}
data = self.session.get(request_token_url, params=params).text
data = self.request(request_token_url, params=params).text
data = text.parse_query(data)
self.session.auth.token_secret = data["oauth_token_secret"]
@ -120,7 +120,7 @@ class OAuthBase(Extractor):
data = self.open(authorize_url, params)
# exchange the request token for an access token
data = self.session.get(access_token_url, params=data).text
data = self.request(access_token_url, params=data).text
data = text.parse_query(data)
token = data["oauth_token"]
token_secret = data["oauth_token_secret"]
@ -189,7 +189,8 @@ class OAuthBase(Extractor):
data["client_id"] = client_id
data["client_secret"] = client_secret
data = self.session.post(token_url, data=data, auth=auth).json()
data = self.request(
token_url, method="POST", data=data, auth=auth).json()
# check token response
if "error" in data:
@ -386,7 +387,7 @@ class OAuthMastodon(OAuthBase):
"redirect_uris": self.redirect_uri,
"scopes": "read",
}
data = self.session.post(url, data=data).json()
data = self.request(url, method="POST", data=data).json()
if "client_id" not in data or "client_secret" not in data:
raise exception.StopExtraction(
@ -441,7 +442,8 @@ class OAuthPixiv(OAuthBase):
"redirect_uri" : "https://app-api.pixiv.net"
"/web/v1/users/auth/pixiv/callback",
}
data = self.session.post(url, headers=headers, data=data).json()
data = self.request(
url, method="POST", headers=headers, data=data).json()
if "error" in data:
stdout_write("\n{}\n".format(data))

View File

@ -104,7 +104,8 @@ class PixivExtractor(Extractor):
elif work["page_count"] == 1:
url = meta_single_page["original_image_url"]
if url == url_sanity:
self.log.debug("Skipping 'sanity_level' warning (%s)",
self.log.warning(
"Unable to download work %s ('sanity_level' warning)",
work["id"])
continue
work["date_url"] = self._date_from_url(url)
@ -619,6 +620,7 @@ class PixivNovelExtractor(PixivExtractor):
meta_user = self.config("metadata")
meta_bookmark = self.config("metadata-bookmark")
embeds = self.config("embeds")
covers = self.config("covers")
if embeds:
headers = {
@ -650,7 +652,7 @@ class PixivNovelExtractor(PixivExtractor):
yield Message.Directory, novel
try:
content = self.api.novel_text(novel["id"])["novel_text"]
content = self.api.novel_webview(novel["id"])["text"]
except Exception:
self.log.warning("Unable to download novel %s", novel["id"])
continue
@ -658,12 +660,25 @@ class PixivNovelExtractor(PixivExtractor):
novel["extension"] = "txt"
yield Message.Url, "text:" + content, novel
if covers:
path = novel["image_urls"]["large"].partition("/img/")[2]
url = ("https://i.pximg.net/novel-cover-original/img/" +
path.rpartition(".")[0].replace("_master1200", ""))
novel["date_url"] = self._date_from_url(url)
novel["num"] += 1
novel["suffix"] = "_p{:02}".format(novel["num"])
novel["_fallback"] = (url + ".png",)
url_jpg = url + ".jpg"
text.nameext_from_url(url_jpg, novel)
yield Message.Url, url_jpg, novel
del novel["_fallback"]
if embeds:
desktop = False
illusts = {}
for marker in text.extract_iter(content, "[", "]"):
if marker.startswith("[jumpuri:If you would like to "):
if marker.startswith("uploadedimage:"):
desktop = True
elif marker.startswith("pixivimage:"):
illusts[marker[11:].partition("-")[0]] = None
@ -918,6 +933,15 @@ class PixivAppAPI():
params = {"novel_id": novel_id}
return self._call("/v1/novel/text", params)
def novel_webview(self, novel_id):
params = {"id": novel_id, "viewer_version": "20221031_ai"}
return self._call(
"/webview/v2/novel", params, self._novel_webview_parse)
def _novel_webview_parse(self, response):
return util.json_loads(text.extr(
response.text, "novel: ", ",\n"))
def search_illust(self, word, sort=None, target=None, duration=None,
date_start=None, date_end=None):
params = {"word": word, "search_target": target,
@ -962,12 +986,16 @@ class PixivAppAPI():
params = {"illust_id": illust_id}
return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"]
def _call(self, endpoint, params=None):
def _call(self, endpoint, params=None, parse=None):
url = "https://app-api.pixiv.net" + endpoint
while True:
self.login()
response = self.extractor.request(url, params=params, fatal=False)
if parse:
data = parse(response)
else:
data = response.json()
if "error" not in data:

View File

@ -23,6 +23,10 @@ class PoipikuExtractor(Extractor):
archive_fmt = "{post_id}_{num}"
request_interval = (0.5, 1.5)
def _init(self):
self.cookies.set(
"POIPIKU_CONTENTS_VIEW_MODE", "1", domain="poipiku.com")
def items(self):
password = self.config("password", "")

View File

@ -143,6 +143,9 @@ class PornhubGifExtractor(PornhubExtractor):
"url" : extr('"contentUrl": "', '"'),
"date" : text.parse_datetime(
extr('"uploadDate": "', '"'), "%Y-%m-%d"),
"viewkey" : extr('From this video: '
'<a href="/view_video.php?viewkey=', '"'),
"timestamp": extr('lass="directLink tstamp" rel="nofollow">', '<'),
"user" : text.remove_html(extr("Created by:", "</div>")),
}

View File

@ -35,10 +35,7 @@ class ReadcomiconlineBase():
self.log.warning(
"Redirect to \n%s\nVisit this URL in your browser, solve "
"the CAPTCHA, and press ENTER to continue", response.url)
try:
input()
except (EOFError, OSError):
pass
self.input()
else:
raise exception.StopExtraction(
"Redirect to \n%s\nVisit this URL in your browser and "

View File

@ -74,8 +74,8 @@ class RedditExtractor(Extractor):
yield Message.Url, url, submission
elif "gallery_data" in media:
for submission["num"], url in enumerate(
self._extract_gallery(media), 1):
for url in self._extract_gallery(media):
submission["num"] += 1
text.nameext_from_url(url, submission)
yield Message.Url, url, submission
@ -99,7 +99,10 @@ class RedditExtractor(Extractor):
urls.append((url, submission))
for comment in comments:
html = comment["body_html"] or ""
if ' href="' in html:
href = (' href="' in html)
media = ("media_metadata" in comment)
if media or href:
comment["date"] = text.parse_timestamp(
comment["created_utc"])
if submission:
@ -107,6 +110,14 @@ class RedditExtractor(Extractor):
data["comment"] = comment
else:
data = comment
if media:
for embed in self._extract_embed(comment):
submission["num"] += 1
text.nameext_from_url(embed, submission)
yield Message.Url, embed, submission
if href:
for url in text.extract_iter(html, ' href="', '"'):
urls.append((url, data))
@ -118,6 +129,7 @@ class RedditExtractor(Extractor):
if url.startswith((
"https://www.reddit.com/message/compose",
"https://reddit.com/message/compose",
"https://preview.redd.it/",
)):
continue
@ -172,6 +184,27 @@ class RedditExtractor(Extractor):
submission["id"], item["media_id"])
self.log.debug(src)
def _extract_embed(self, submission):
meta = submission["media_metadata"]
if not meta:
return
for mid, data in meta.items():
if data["status"] != "valid" or "s" not in data:
self.log.warning(
"embed %s: skipping item %s (status: %s)",
submission["id"], mid, data.get("status"))
continue
src = data["s"]
url = src.get("u") or src.get("gif") or src.get("mp4")
if url:
yield url.partition("?")[0].replace("/preview.", "/i.", 1)
else:
self.log.error(
"embed %s: unable to fetch download URL for item %s",
submission["id"], mid)
self.log.debug(src)
def _extract_video_ytdl(self, submission):
return "https://www.reddit.com" + submission["permalink"]
@ -191,6 +224,8 @@ class RedditExtractor(Extractor):
try:
if "reddit_video_preview" in post["preview"]:
video = post["preview"]["reddit_video_preview"]
if "fallback_url" in video:
yield video["fallback_url"]
if "dash_url" in video:
yield "ytdl:" + video["dash_url"]
if "hls_url" in video:
@ -200,6 +235,12 @@ class RedditExtractor(Extractor):
try:
for image in post["preview"]["images"]:
variants = image.get("variants")
if variants:
if "gif" in variants:
yield variants["gif"]["source"]["url"]
if "mp4" in variants:
yield variants["mp4"]["source"]["url"]
yield image["source"]["url"]
except Exception as exc:
self.log.debug("%s: %s", exc.__class__.__name__, exc)
@ -446,14 +487,14 @@ class RedditAPI():
remaining = response.headers.get("x-ratelimit-remaining")
if remaining and float(remaining) < 2:
if self._warn_429:
self._warn_429 = False
self.log.warning("API rate limit exceeded")
if self._warn_429 and self.client_id == self.CLIENT_ID:
self.log.info(
"Register your own OAuth application and use its "
"credentials to prevent this error: "
"https://github.com/mikf/gallery-dl/blob/master"
"/docs/configuration.rst"
"#extractorredditclient-id--user-agent")
"https://gdl-org.github.io/docs/configuration.html"
"#extractor-reddit-client-id-user-agent")
self._warn_429 = False
self.extractor.wait(
seconds=response.headers["x-ratelimit-reset"])
continue

View File

@ -26,10 +26,10 @@ class SkebExtractor(Extractor):
def _init(self):
self.thumbnails = self.config("thumbnails", False)
self.article = self.config("article", False)
self.headers = {
"Accept" : "application/json, text/plain, */*",
"Authorization": "Bearer null",
}
self.headers = {"Accept": "application/json, text/plain, */*"}
if "Authorization" not in self.session.headers:
self.headers["Authorization"] = "Bearer null"
def request(self, url, **kwargs):
while True:
@ -55,6 +55,12 @@ class SkebExtractor(Extractor):
url = file["file_url"]
yield Message.Url, url, text.nameext_from_url(url, post)
def _items_users(self):
base = self.root + "/@"
for user in self.users():
user["_extractor"] = SkebUserExtractor
yield Message.Queue, base + user["screen_name"], user
def posts(self):
"""Return post number"""
@ -83,6 +89,20 @@ class SkebExtractor(Extractor):
return
params["offset"] += 30
def _pagination_users(self, endpoint, params):
url = "{}/api{}".format(self.root, endpoint)
params["offset"] = 0
params["limit"] = 90
while True:
data = self.request(
url, params=params, headers=self.headers).json()
yield from data
if len(data) < params["limit"]:
return
params["offset"] += params["limit"]
def _get_post_data(self, user_name, post_num):
url = "{}/api/users/{}/works/{}".format(
self.root, user_name, post_num)
@ -256,22 +276,23 @@ class SkebFollowingExtractor(SkebExtractor):
pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/following_creators"
example = "https://skeb.jp/@USER/following_creators"
def items(self):
for user in self.users():
url = "{}/@{}".format(self.root, user["screen_name"])
user["_extractor"] = SkebUserExtractor
yield Message.Queue, url, user
items = SkebExtractor._items_users
def users(self):
url = "{}/api/users/{}/following_creators".format(
self.root, self.user_name)
params = {"sort": "date", "offset": 0, "limit": 90}
endpoint = "/users/{}/following_creators".format(self.user_name)
params = {"sort": "date"}
return self._pagination_users(endpoint, params)
while True:
data = self.request(
url, params=params, headers=self.headers).json()
yield from data
if len(data) < params["limit"]:
return
params["offset"] += params["limit"]
class SkebFollowingUsersExtractor(SkebExtractor):
"""Extractor for your followed users"""
subcategory = "following-users"
pattern = r"(?:https?://)?skeb\.jp/following_users()"
example = "https://skeb.jp/following_users"
items = SkebExtractor._items_users
def users(self):
endpoint = "/following_users"
params = {}
return self._pagination_users(endpoint, params)

View File

@ -163,6 +163,9 @@ class SteamgriddbAssetExtractor(SteamgriddbExtractor):
def assets(self):
endpoint = "/api/public/asset/" + self.asset_type + "/" + self.asset_id
asset = self._call(endpoint)["asset"]
if asset is None:
raise exception.NotFoundError("asset ({}:{})".format(
self.asset_type, self.asset_id))
return (asset,)

View File

@ -175,7 +175,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
"author_id" : text.parse_int(extr('data-user-id="', '"')),
"author_nick": text.unescape(extr('alt="', '"')),
"date" : self._parse_datetime(extr(
'class="section-subtitle">', '<')),
'<span class="star_link-types">', '<')),
"content" : (extr(
'<div class="post-content', '<div class="post-uploads')
.partition(">")[2]),

View File

@ -151,3 +151,18 @@ class TapasEpisodeExtractor(TapasExtractor):
def episode_ids(self):
return (self.episode_id,)
class TapasCreatorExtractor(TapasExtractor):
subcategory = "creator"
pattern = BASE_PATTERN + r"/(?!series|episode)([^/?#]+)"
example = "https://tapas.io/CREATOR"
def items(self):
url = "{}/{}/series".format(self.root, self.groups[0])
page = self.request(url).text
page = text.extr(page, '<ul class="content-list-wrap', "</ul>")
data = {"_extractor": TapasSeriesExtractor}
for path in text.extract_iter(page, ' href="', '"'):
yield Message.Queue, self.root + path, data

View File

@ -447,9 +447,9 @@ class TumblrAPI(oauth.OAuth1API):
if api_key == self.API_KEY:
self.log.info(
"Register your own OAuth application and use its "
"credentials to prevent this error: https://githu"
"b.com/mikf/gallery-dl/blob/master/docs/configurat"
"ion.rst#extractortumblrapi-key--api-secret")
"credentials to prevent this error: "
"https://gdl-org.github.io/docs/configuration.html"
"#extractor-tumblr-api-key-api-secret")
if self.extractor.config("ratelimit") == "wait":
self.extractor.wait(seconds=reset)

View File

@ -12,11 +12,12 @@ from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache, memcache
import itertools
import random
import json
import re
BASE_PATTERN = (r"(?:https?://)?(?:www\.|mobile\.)?"
r"(?:(?:[fv]x)?twitter|(?:fixup)?x)\.com")
r"(?:(?:[fv]x)?twitter|(?:fix(?:up|v))?x)\.com")
class TwitterExtractor(Extractor):
@ -243,8 +244,8 @@ class TwitterExtractor(Extractor):
# collect URLs from entities
for url in tweet["entities"].get("urls") or ():
url = url["expanded_url"]
if "//twitpic.com/" not in url or "/photos/" in url:
url = url.get("expanded_url") or url.get("url") or ""
if not url or "//twitpic.com/" not in url or "/photos/" in url:
continue
if url.startswith("http:"):
url = "https" + url[4:]
@ -336,10 +337,20 @@ class TwitterExtractor(Extractor):
urls = entities.get("urls")
if urls:
for url in urls:
try:
content = content.replace(url["url"], url["expanded_url"])
except KeyError:
pass
txt, _, tco = content.rpartition(" ")
tdata["content"] = txt if tco.startswith("https://t.co/") else content
if "birdwatch_pivot" in tweet:
try:
tdata["birdwatch"] = \
tweet["birdwatch_pivot"]["subtitle"]["text"]
except KeyError:
self.log.debug("Unable to extract 'birdwatch' note from %s",
tweet["birdwatch_pivot"])
if "in_reply_to_screen_name" in legacy:
tdata["reply_to"] = legacy["in_reply_to_screen_name"]
if "quoted_by" in legacy:
@ -380,6 +391,7 @@ class TwitterExtractor(Extractor):
"date" : text.parse_datetime(
uget("created_at"), "%a %b %d %H:%M:%S %z %Y"),
"verified" : uget("verified", False),
"protected" : uget("protected", False),
"profile_banner" : uget("profile_banner_url", ""),
"profile_image" : uget(
"profile_image_url_https", "").replace("_normal.", "."),
@ -395,7 +407,10 @@ class TwitterExtractor(Extractor):
urls = entities["description"].get("urls")
if urls:
for url in urls:
try:
descr = descr.replace(url["url"], url["expanded_url"])
except KeyError:
pass
udata["description"] = descr
if "url" in entities:
@ -731,9 +746,10 @@ class TwitterEventExtractor(TwitterExtractor):
class TwitterTweetExtractor(TwitterExtractor):
"""Extractor for images from individual tweets"""
"""Extractor for individual tweets"""
subcategory = "tweet"
pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
pattern = (BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
r"/?(?:$|\?|#|photo/)")
example = "https://twitter.com/USER/status/12345"
def __init__(self, match):
@ -810,6 +826,18 @@ class TwitterTweetExtractor(TwitterExtractor):
return itertools.chain(buffer, tweets)
class TwitterQuotesExtractor(TwitterExtractor):
"""Extractor for quotes of a Tweet"""
subcategory = "quotes"
pattern = BASE_PATTERN + r"/(?:[^/?#]+|i/web)/status/(\d+)/quotes"
example = "https://twitter.com/USER/status/12345/quotes"
def items(self):
url = "{}/search?q=quoted_tweet_id:{}".format(self.root, self.user)
data = {"_extractor": TwitterSearchExtractor}
yield Message.Queue, url, data
class TwitterAvatarExtractor(TwitterExtractor):
subcategory = "avatar"
filename_fmt = "avatar {date}.{extension}"
@ -882,6 +910,7 @@ class TwitterAPI():
def __init__(self, extractor):
self.extractor = extractor
self.log = extractor.log
self.root = "https://twitter.com/i/api"
self._nsfw_warning = True
@ -1244,7 +1273,7 @@ class TwitterAPI():
@cache(maxage=3600)
def _guest_token(self):
endpoint = "/1.1/guest/activate.json"
self.extractor.log.info("Requesting guest token")
self.log.info("Requesting guest token")
return str(self._call(
endpoint, None, "POST", False, "https://api.twitter.com",
)["guest_token"])
@ -1272,45 +1301,72 @@ class TwitterAPI():
if csrf_token:
self.headers["x-csrf-token"] = csrf_token
if response.status_code < 400:
remaining = int(response.headers.get("x-rate-limit-remaining", 6))
if remaining < 6 and remaining <= random.randrange(1, 6):
self._handle_ratelimit(response)
continue
try:
data = response.json()
if not data.get("errors") or not any(
(e.get("message") or "").lower().startswith("timeout")
for e in data["errors"]):
return data # success or non-timeout errors
except ValueError:
data = {"errors": ({"message": response.text},)}
msg = data["errors"][0].get("message") or "Unspecified"
self.extractor.log.debug("Internal Twitter error: '%s'", msg)
errors = data.get("errors")
if not errors:
return data
retry = False
for error in errors:
msg = error.get("message") or "Unspecified"
self.log.debug("API error: '%s'", msg)
if "this account is temporarily locked" in msg:
msg = "Account temporarily locked"
if self.extractor.config("locked") != "wait":
raise exception.AuthorizationError(msg)
self.log.warning(msg)
self.extractor.input("Press ENTER to retry.")
retry = True
elif "Could not authenticate you" in msg:
if not self.extractor.config("relogin", True):
continue
username, password = self.extractor._get_auth_info()
if not username:
continue
_login_impl.invalidate(username)
self.extractor.cookies_update(
_login_impl(self.extractor, username, password))
self.__init__(self.extractor)
retry = True
elif msg.lower().startswith("timeout"):
retry = True
if retry:
if self.headers["x-twitter-auth-type"]:
self.extractor.log.debug("Retrying API request")
continue # retry
self.log.debug("Retrying API request")
continue
else:
# fall through to "Login Required"
response.status_code = 404
if response.status_code == 429:
# rate limit exceeded
if self.extractor.config("ratelimit") == "abort":
raise exception.StopExtraction("Rate limit exceeded")
until = response.headers.get("x-rate-limit-reset")
seconds = None if until else 60
self.extractor.wait(until=until, seconds=seconds)
continue
if response.status_code in (403, 404) and \
if response.status_code < 400:
return data
elif response.status_code in (403, 404) and \
not self.headers["x-twitter-auth-type"]:
raise exception.AuthorizationError("Login required")
elif response.status_code == 429:
self._handle_ratelimit(response)
continue
# error
try:
data = response.json()
errors = ", ".join(e["message"] for e in data["errors"])
except ValueError:
errors = response.text
errors = ", ".join(e["message"] for e in errors)
except Exception:
errors = data.get("errors", "")
pass
raise exception.StopExtraction(
"%s %s (%s)", response.status_code, response.reason, errors)
@ -1374,7 +1430,7 @@ class TwitterAPI():
try:
tweet = tweets[tweet_id]
except KeyError:
self.extractor.log.debug("Skipping %s (deleted)", tweet_id)
self.log.debug("Skipping %s (deleted)", tweet_id)
continue
if "retweeted_status_id_str" in tweet:
@ -1606,8 +1662,10 @@ class TwitterAPI():
variables["cursor"] = cursor
def _pagination_users(self, endpoint, variables, path=None):
params = {"variables": None,
"features" : self._json_dumps(self.features_pagination)}
params = {
"variables": None,
"features" : self._json_dumps(self.features_pagination),
}
while True:
cursor = entry = None
@ -1644,6 +1702,13 @@ class TwitterAPI():
return
variables["cursor"] = cursor
def _handle_ratelimit(self, response):
if self.extractor.config("ratelimit") == "abort":
raise exception.StopExtraction("Rate limit exceeded")
until = response.headers.get("x-rate-limit-reset")
self.extractor.wait(until=until, seconds=None if until else 60)
def _process_tombstone(self, entry, tombstone):
text = (tombstone.get("richText") or tombstone["text"])["text"]
tweet_id = entry["entryId"].rpartition("-")[2]
@ -1651,30 +1716,30 @@ class TwitterAPI():
if text.startswith("Age-restricted"):
if self._nsfw_warning:
self._nsfw_warning = False
self.extractor.log.warning('"%s"', text)
self.log.warning('"%s"', text)
self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text)
self.log.debug("Skipping %s ('%s')", tweet_id, text)
@cache(maxage=365*86400, keyarg=1)
def _login_impl(extr, username, password):
import re
import random
def process(data, params=None):
response = extr.request(
url, params=params, headers=headers, json=data,
method="POST", fatal=None)
if re.fullmatch(r"[\w.%+-]+@[\w.-]+\.\w{2,}", username):
extr.log.warning(
"Login with email is no longer possible. "
"You need to provide your username or phone number instead.")
def process(response):
try:
data = response.json()
except ValueError:
data = {"errors": ({"message": "Invalid response"},)}
else:
if response.status_code < 400:
return data["flow_token"]
try:
return (data["flow_token"],
data["subtasks"][0]["subtask_id"])
except LookupError:
pass
errors = []
for error in data.get("errors") or ():
@ -1683,9 +1748,13 @@ def _login_impl(extr, username, password):
extr.log.debug(response.text)
raise exception.AuthenticationError(", ".join(errors))
extr.cookies.clear()
cookies = extr.cookies
cookies.clear()
api = TwitterAPI(extr)
api._authenticate_guest()
url = "https://api.twitter.com/1.1/onboarding/task.json"
params = {"flow_name": "login"}
headers = api.headers
extr.log.info("Logging in as %s", username)
@ -1742,31 +1811,18 @@ def _login_impl(extr, username, password):
"web_modal": 1,
},
}
url = "https://api.twitter.com/1.1/onboarding/task.json?flow_name=login"
response = extr.request(url, method="POST", headers=headers, json=data)
flow_token, subtask = process(data, params)
while not cookies.get("auth_token"):
if subtask == "LoginJsInstrumentationSubtask":
data = {
"flow_token": process(response),
"subtask_inputs": [
{
"subtask_id": "LoginJsInstrumentationSubtask",
"js_instrumentation": {
"response": "{}",
"link": "next_link",
},
},
],
}
url = "https://api.twitter.com/1.1/onboarding/task.json"
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=None)
# username
elif subtask == "LoginEnterUserIdentifierSSO":
data = {
"flow_token": process(response),
"subtask_inputs": [
{
"subtask_id": "LoginEnterUserIdentifierSSO",
"settings_list": {
"setting_responses": [
{
@ -1778,48 +1834,61 @@ def _login_impl(extr, username, password):
],
"link": "next_link",
},
},
],
}
# url = "https://api.twitter.com/1.1/onboarding/task.json"
extr.sleep(random.uniform(2.0, 4.0), "login (username)")
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=None)
# password
elif subtask == "LoginEnterPassword":
data = {
"flow_token": process(response),
"subtask_inputs": [
{
"subtask_id": "LoginEnterPassword",
"enter_password": {
"password": password,
"link": "next_link",
},
},
],
}
# url = "https://api.twitter.com/1.1/onboarding/task.json"
extr.sleep(random.uniform(2.0, 4.0), "login (password)")
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=None)
# account duplication check ?
elif subtask == "LoginEnterAlternateIdentifierSubtask":
alt = extr.input(
"Alternate Identifier (username, email, phone number): ")
data = {
"enter_text": {
"text": alt,
"link": "next_link",
},
}
elif subtask == "LoginTwoFactorAuthChallenge":
data = {
"enter_text": {
"text": extr.input("2FA Token: "),
"link": "next_link",
},
}
elif subtask == "LoginAcid":
data = {
"enter_text": {
"text": extr.input("Email Verification Code: "),
"link": "next_link",
},
}
elif subtask == "AccountDuplicationCheck":
data = {
"flow_token": process(response),
"subtask_inputs": [
{
"subtask_id": "AccountDuplicationCheck",
"check_logged_in_account": {
"link": "AccountDuplicationCheck_false",
},
},
],
}
# url = "https://api.twitter.com/1.1/onboarding/task.json"
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=None)
process(response)
elif subtask == "ArkoseLogin":
raise exception.AuthenticationError("Login requires CAPTCHA")
elif subtask == "DenyLoginSubtask":
raise exception.AuthenticationError("Login rejected as suspicious")
elif subtask == "ArkoseLogin":
raise exception.AuthenticationError("No auth token cookie")
else:
raise exception.StopExtraction("Unrecognized subtask %s", subtask)
inputs = {"subtask_id": subtask}
inputs.update(data)
data = {
"flow_token": flow_token,
"subtask_inputs": [inputs],
}
extr.sleep(random.uniform(1.0, 3.0), "login ({})".format(subtask))
flow_token, subtask = process(data)
return {
cookie.name: cookie.value

View File

@ -26,17 +26,39 @@ class VipergirlsExtractor(Extractor):
cookies_domain = ".vipergirls.to"
cookies_names = ("vg_userid", "vg_password")
def _init(self):
domain = self.config("domain")
if domain:
self.root = text.ensure_http_scheme(domain)
def items(self):
self.login()
posts = self.posts()
for post in self.posts():
like = self.config("like")
if like:
user_hash = posts[0].get("hash")
if len(user_hash) < 16:
self.log.warning("Login required to like posts")
like = False
posts = posts.iter("post")
if self.page:
util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
for post in posts:
data = post.attrib
data["thread_id"] = self.thread_id
yield Message.Directory, data
image = None
for image in post:
yield Message.Queue, image.attrib["main_url"], data
if image is not None and like:
self.like(post, user_hash)
def login(self):
if self.cookies_check(self.cookies_names):
return
@ -64,6 +86,17 @@ class VipergirlsExtractor(Extractor):
return {cookie.name: cookie.value
for cookie in response.cookies}
def like(self, post, user_hash):
url = self.root + "/post_thanks.php"
params = {
"do" : "post_thanks_add",
"p" : post.get("id"),
"securitytoken": user_hash,
}
with self.request(url, params=params, allow_redirects=False):
pass
class VipergirlsThreadExtractor(VipergirlsExtractor):
"""Extractor for vipergirls threads"""
@ -77,12 +110,7 @@ class VipergirlsThreadExtractor(VipergirlsExtractor):
def posts(self):
url = "{}/vr.php?t={}".format(self.root, self.thread_id)
root = ElementTree.fromstring(self.request(url).text)
posts = root.iter("post")
if self.page:
util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
return posts
return ElementTree.fromstring(self.request(url).text)
class VipergirlsPostExtractor(VipergirlsExtractor):
@ -95,8 +123,8 @@ class VipergirlsPostExtractor(VipergirlsExtractor):
def __init__(self, match):
VipergirlsExtractor.__init__(self, match)
self.thread_id, self.post_id = match.groups()
self.page = 0
def posts(self):
url = "{}/vr.php?p={}".format(self.root, self.post_id)
root = ElementTree.fromstring(self.request(url).text)
return root.iter("post")
return ElementTree.fromstring(self.request(url).text)

View File

@ -46,6 +46,8 @@ class VscoExtractor(Extractor):
url = "https://image-{}.vsco.co/{}".format(cdn, path)
elif cdn.isdecimal():
url = "https://image.vsco.co/" + base
elif img["responsive_url"].startswith("http"):
url = img["responsive_url"]
else:
url = "https://" + img["responsive_url"]
@ -238,6 +240,34 @@ class VscoSpacesExtractor(VscoExtractor):
yield Message.Queue, url, space
class VscoAvatarExtractor(VscoExtractor):
"""Extractor for vsco.co user avatars"""
subcategory = "avatar"
pattern = USER_PATTERN + r"/avatar"
example = "https://vsco.co/USER/avatar"
def images(self):
url = "{}/{}/gallery".format(self.root, self.user)
page = self.request(url).text
piid = text.extr(page, '"profileImageId":"', '"')
url = "https://im.vsco.co/" + piid
# needs GET request, since HEAD does not redirect to full URL
response = self.request(url, allow_redirects=False)
return ({
"_id" : piid,
"is_video" : False,
"grid_name" : "",
"upload_date" : 0,
"responsive_url": response.headers["Location"],
"video_url" : "",
"image_meta" : None,
"width" : 0,
"height" : 0,
},)
class VscoImageExtractor(VscoExtractor):
"""Extractor for individual images on vsco.co"""
subcategory = "image"

View File

@ -50,7 +50,7 @@ class WarosuThreadExtractor(Extractor):
title = text.unescape(text.extr(page, "class=filetitle>", "<"))
return {
"board" : self.board,
"board_name": boardname.rpartition(" - ")[2],
"board_name": boardname.split(" - ")[1],
"thread" : self.thread,
"title" : title,
}
@ -64,8 +64,7 @@ class WarosuThreadExtractor(Extractor):
def parse(self, post):
"""Build post object by extracting data from an HTML post"""
data = self._extract_post(post)
if "<span> File:" in post:
self._extract_image(post, data)
if "<span> File:" in post and self._extract_image(post, data):
part = data["image"].rpartition("/")[2]
data["tim"], _, data["extension"] = part.partition(".")
data["ext"] = "." + data["extension"]
@ -91,6 +90,11 @@ class WarosuThreadExtractor(Extractor):
"", "<").rstrip().rpartition(".")[0])
extr("<br>", "")
data["image"] = url = extr("<a href=", ">")
url = extr("<a href=", ">")
if url:
if url[0] == "/":
data["image"] = self.root + url
else:
data["image"] = url
return True
return False

View File

@ -30,9 +30,9 @@ class WeiboExtractor(Extractor):
self._prefix, self.user = match.groups()
def _init(self):
self.retweets = self.config("retweets", True)
self.videos = self.config("videos", True)
self.livephoto = self.config("livephoto", True)
self.retweets = self.config("retweets", False)
self.videos = self.config("videos", True)
self.gifs = self.config("gifs", True)
self.gifs_video = (self.gifs == "video")
@ -59,15 +59,25 @@ class WeiboExtractor(Extractor):
for status in self.statuses():
if "ori_mid" in status and not self.retweets:
self.log.debug("Skipping %s (快转 retweet)", status["id"])
continue
if "retweeted_status" in status:
if not self.retweets:
self.log.debug("Skipping %s (retweet)", status["id"])
continue
# videos of the original post are in status
# images of the original post are in status["retweeted_status"]
files = []
if self.retweets and "retweeted_status" in status:
if original_retweets:
status = status["retweeted_status"]
self._extract_status(status, files)
else:
self._extract_status(status, files)
self._extract_status(status["retweeted_status"], files)
if original_retweets:
status = status["retweeted_status"]
else:
files = []
self._extract_status(status, files)
status["date"] = text.parse_datetime(
@ -118,7 +128,7 @@ class WeiboExtractor(Extractor):
append(pic["largest"].copy())
file = {"url": pic["video"]}
file["filehame"], _, file["extension"] = \
file["filename"], _, file["extension"] = \
pic["video"].rpartition("%2F")[2].rpartition(".")
append(file)
@ -176,22 +186,33 @@ class WeiboExtractor(Extractor):
data = data["data"]
statuses = data["list"]
if not statuses:
return
yield from statuses
if "next_cursor" in data: # videos, newvideo
if data["next_cursor"] == -1:
# videos, newvideo
cursor = data.get("next_cursor")
if cursor:
if cursor == -1:
return
params["cursor"] = data["next_cursor"]
elif "page" in params: # home, article
params["page"] += 1
elif data["since_id"]: # album
params["cursor"] = cursor
continue
# album
since_id = data.get("since_id")
if since_id:
params["sinceid"] = data["since_id"]
else: # feed, last album page
continue
# home, article
if "page" in params:
if not statuses:
return
params["page"] += 1
continue
# feed, last album page
try:
params["since_id"] = statuses[-1]["id"] - 1
except KeyError:
except LookupError:
return
def _sina_visitor_system(self, response):

View File

@ -27,9 +27,9 @@ class WikimediaExtractor(BaseExtractor):
if self.category == "wikimedia":
self.category = self.root.split(".")[-2]
elif self.category == "fandom":
self.category = \
"fandom-" + self.root.partition(".")[0].rpartition("/")[2]
elif self.category in ("fandom", "wikigg"):
self.category = "{}-{}".format(
self.category, self.root.partition(".")[0].rpartition("/")[2])
if path.startswith("wiki/"):
path = path[5:]
@ -69,14 +69,18 @@ class WikimediaExtractor(BaseExtractor):
def items(self):
for info in self._pagination(self.params):
try:
image = info["imageinfo"][0]
except LookupError:
self.log.debug("Missing 'imageinfo' for %s", info)
continue
image["metadata"] = {
m["name"]: m["value"]
for m in image["metadata"]}
for m in image["metadata"] or ()}
image["commonmetadata"] = {
m["name"]: m["value"]
for m in image["commonmetadata"]}
for m in image["commonmetadata"] or ()}
filename = image["canonicaltitle"]
image["filename"], _, image["extension"] = \
@ -148,6 +152,10 @@ BASE_PATTERN = WikimediaExtractor.update({
"root": None,
"pattern": r"[\w-]+\.fandom\.com",
},
"wikigg": {
"root": None,
"pattern": r"\w+\.wiki\.gg",
},
"mariowiki": {
"root": "https://www.mariowiki.com",
"pattern": r"(?:www\.)?mariowiki\.com",

View File

@ -243,13 +243,12 @@ class TemplateFStringFormatter(FStringFormatter):
def parse_field_name(field_name):
if field_name[0] == "'":
return "_lit", (operator.itemgetter(field_name[1:-1]),)
first, rest = _string.formatter_field_name_split(field_name)
funcs = []
if first[0] == "'":
funcs.append(operator.itemgetter(first[1:-1]))
first = "_lit"
for is_attr, key in rest:
if is_attr:
func = operator.attrgetter
@ -375,15 +374,15 @@ def _parse_offset(format_spec, default):
fmt = _build_format_func(format_spec, default)
if not offset or offset == "local":
is_dst = time.daylight and time.localtime().tm_isdst > 0
offset = -(time.altzone if is_dst else time.timezone)
def off(dt):
local = time.localtime(util.datetime_to_timestamp(dt))
return fmt(dt + datetime.timedelta(0, local.tm_gmtoff))
else:
hours, _, minutes = offset.partition(":")
offset = 3600 * int(hours)
if minutes:
offset += 60 * (int(minutes) if offset > 0 else -int(minutes))
offset = datetime.timedelta(seconds=offset)
offset = datetime.timedelta(0, offset)
def off(obj):
return fmt(obj + offset)

View File

@ -11,10 +11,23 @@ import errno
import logging
import functools
import collections
from . import extractor, downloader, postprocessor
from . import config, text, util, path, formatter, output, exception, version
from . import (
extractor,
downloader,
postprocessor,
archive,
config,
exception,
formatter,
output,
path,
text,
util,
version,
)
from .extractor.message import Message
from .output import stdout_write
stdout_write = output.stdout_write
class Job():
@ -423,6 +436,8 @@ class DownloadJob(Job):
def handle_finalize(self):
if self.archive:
if not self.status:
self.archive.finalize()
self.archive.close()
pathfmt = self.pathfmt
@ -453,9 +468,12 @@ class DownloadJob(Job):
for callback in self.hooks["skip"]:
callback(pathfmt)
if self._skipexc:
if not self._skipftr or self._skipftr(pathfmt.kwdict):
self._skipcnt += 1
if self._skipcnt >= self._skipmax:
raise self._skipexc()
else:
self._skipcnt = 0
def download(self, url):
"""Download 'url'"""
@ -507,23 +525,28 @@ class DownloadJob(Job):
# monkey-patch method to do nothing and always return True
self.download = pathfmt.fix_extension
archive = cfg("archive")
if archive:
archive = util.expand_path(archive)
archive_path = cfg("archive")
if archive_path:
archive_path = util.expand_path(archive_path)
archive_format = (cfg("archive-prefix", extr.category) +
cfg("archive-format", extr.archive_fmt))
archive_pragma = (cfg("archive-pragma"))
try:
if "{" in archive:
archive = formatter.parse(archive).format_map(kwdict)
self.archive = util.DownloadArchive(
archive, archive_format, archive_pragma)
if "{" in archive_path:
archive_path = formatter.parse(
archive_path).format_map(kwdict)
if cfg("archive-mode") == "memory":
archive_cls = archive.DownloadArchiveMemory
else:
archive_cls = archive.DownloadArchive
self.archive = archive_cls(
archive_path, archive_format, archive_pragma)
except Exception as exc:
extr.log.warning(
"Failed to open download archive at '%s' (%s: %s)",
archive, exc.__class__.__name__, exc)
archive_path, exc.__class__.__name__, exc)
else:
extr.log.debug("Using download archive '%s'", archive)
extr.log.debug("Using download archive '%s'", archive_path)
skip = cfg("skip", True)
if skip:
@ -539,6 +562,12 @@ class DownloadJob(Job):
elif skip == "exit":
self._skipexc = SystemExit
self._skipmax = text.parse_int(smax)
skip_filter = cfg("skip-filter")
if skip_filter:
self._skipftr = util.compile_expression(skip_filter)
else:
self._skipftr = None
else:
# monkey-patch methods to always return False
pathfmt.exists = lambda x=None: False

View File

@ -249,6 +249,12 @@ def build_parser():
action="store_const", const=logging.ERROR,
help="Activate quiet mode",
)
output.add_argument(
"-w", "--warning",
dest="loglevel",
action="store_const", const=logging.WARNING,
help="Print only warnings and errors",
)
output.add_argument(
"-v", "--verbose",
dest="loglevel",
@ -319,6 +325,11 @@ def build_parser():
help=("Write downloaded intermediary pages to files "
"in the current directory to debug problems"),
)
output.add_argument(
"--no-colors",
dest="colors", action="store_false",
help=("Do not emit ANSI color codes in output"),
)
downloader = parser.add_argument_group("Downloader Options")
downloader.add_argument(

View File

@ -15,12 +15,40 @@ import unicodedata
from . import config, util, formatter
# --------------------------------------------------------------------
# Globals
COLORS = not os.environ.get("NO_COLOR")
COLORS_DEFAULT = {
"success": "1;32",
"skip" : "2",
"debug" : "0;37",
"info" : "1;37",
"warning": "1;33",
"error" : "1;31",
} if COLORS else {}
if util.WINDOWS:
ANSI = COLORS and os.environ.get("TERM") == "ANSI"
OFFSET = 1
CHAR_SKIP = "# "
CHAR_SUCCESS = "* "
CHAR_ELLIPSIES = "..."
else:
ANSI = COLORS
OFFSET = 0
CHAR_SKIP = "# "
CHAR_SUCCESS = ""
CHAR_ELLIPSIES = ""
# --------------------------------------------------------------------
# Logging
LOG_FORMAT = "[{name}][{levelname}] {message}"
LOG_FORMAT_DATE = "%Y-%m-%d %H:%M:%S"
LOG_LEVEL = logging.INFO
LOG_LEVELS = ("debug", "info", "warning", "error")
class Logger(logging.Logger):
@ -129,7 +157,7 @@ class Formatter(logging.Formatter):
def __init__(self, fmt, datefmt):
if isinstance(fmt, dict):
for key in ("debug", "info", "warning", "error"):
for key in LOG_LEVELS:
value = fmt[key] if key in fmt else LOG_FORMAT
fmt[key] = (formatter.parse(value).format_map,
"{asctime" in value)
@ -187,16 +215,36 @@ def configure_logging(loglevel):
# stream logging handler
handler = root.handlers[0]
opts = config.interpolate(("output",), "log")
colors = config.interpolate(("output",), "colors")
if colors is None:
colors = COLORS_DEFAULT
if colors and not opts:
opts = LOG_FORMAT
if opts:
if isinstance(opts, str):
opts = {"format": opts}
if handler.level == LOG_LEVEL and "level" in opts:
handler.setLevel(opts["level"])
if "format" in opts or "format-date" in opts:
logfmt = opts
opts = {}
elif "format" in opts:
logfmt = opts["format"]
else:
logfmt = LOG_FORMAT
if not isinstance(logfmt, dict) and colors:
ansifmt = "\033[{}m{}\033[0m".format
lf = {}
for level in LOG_LEVELS:
c = colors.get(level)
lf[level] = ansifmt(c, logfmt) if c else logfmt
logfmt = lf
handler.setFormatter(Formatter(
opts.get("format", LOG_FORMAT),
opts.get("format-date", LOG_FORMAT_DATE),
))
logfmt, opts.get("format-date", LOG_FORMAT_DATE)))
if "level" in opts and handler.level == LOG_LEVEL:
handler.setLevel(opts["level"])
if minlevel > handler.level:
minlevel = handler.level
@ -307,10 +355,13 @@ def select():
mode = config.get(("output",), "mode")
if mode is None or mode == "auto":
if hasattr(sys.stdout, "isatty") and sys.stdout.isatty():
try:
if sys.stdout.isatty():
output = ColorOutput() if ANSI else TerminalOutput()
else:
output = PipeOutput()
except Exception:
output = PipeOutput()
elif isinstance(mode, dict):
output = CustomOutput(mode)
else:
@ -388,7 +439,10 @@ class ColorOutput(TerminalOutput):
def __init__(self):
TerminalOutput.__init__(self)
colors = config.get(("output",), "colors") or {}
colors = config.interpolate(("output",), "colors")
if colors is None:
colors = COLORS_DEFAULT
self.color_skip = "\033[{}m".format(
colors.get("skip", "2"))
self.color_success = "\r\033[{}m".format(
@ -514,17 +568,3 @@ def shorten_string_eaw(txt, limit, sep="…", cache=EAWCache()):
right -= 1
return txt[:left] + sep + txt[right+1:]
if util.WINDOWS:
ANSI = os.environ.get("TERM") == "ANSI"
OFFSET = 1
CHAR_SKIP = "# "
CHAR_SUCCESS = "* "
CHAR_ELLIPSIES = "..."
else:
ANSI = True
OFFSET = 0
CHAR_SKIP = "# "
CHAR_SUCCESS = ""
CHAR_ELLIPSIES = ""

View File

@ -8,7 +8,7 @@
"""Common classes and constants used by postprocessor modules."""
from .. import util, formatter
from .. import util, formatter, archive
class PostProcessor():
@ -22,30 +22,31 @@ class PostProcessor():
return self.__class__.__name__
def _init_archive(self, job, options, prefix=None):
archive = options.get("archive")
if archive:
archive_path = options.get("archive")
if archive_path:
extr = job.extractor
archive = util.expand_path(archive)
archive_path = util.expand_path(archive_path)
if not prefix:
prefix = "_" + self.name.upper() + "_"
archive_format = (
options.get("archive-prefix", extr.category) +
options.get("archive-format", prefix + extr.archive_fmt))
try:
if "{" in archive:
archive = formatter.parse(archive).format_map(
if "{" in archive_path:
archive_path = formatter.parse(archive_path).format_map(
job.pathfmt.kwdict)
self.archive = util.DownloadArchive(
archive, archive_format,
self.archive = archive.DownloadArchive(
archive_path, archive_format,
options.get("archive-pragma"),
"_archive_" + self.name)
except Exception as exc:
self.log.warning(
"Failed to open %s archive at '%s' (%s: %s)",
self.name, archive, exc.__class__.__name__, exc)
self.name, archive_path, exc.__class__.__name__, exc)
else:
self.log.debug("Using %s archive '%s'", self.name, archive)
self.log.debug(
"Using %s archive '%s'", self.name, archive_path)
return True
else:
self.archive = None
return False

View File

@ -10,7 +10,6 @@
from .common import PostProcessor
from .. import util, formatter
import subprocess
import os
import re
@ -80,14 +79,14 @@ class ExecPP(PostProcessor):
def _exec(self, args, shell):
self.log.debug("Running '%s'", args)
retcode = subprocess.Popen(args, shell=shell).wait()
retcode = util.Popen(args, shell=shell).wait()
if retcode:
self.log.warning("'%s' returned with non-zero exit status (%d)",
args, retcode)
def _exec_async(self, args, shell):
self.log.debug("Running '%s'", args)
subprocess.Popen(args, shell=shell)
util.Popen(args, shell=shell)
def _replace(self, match):
name = match.group(1)

View File

@ -33,6 +33,9 @@ class MtimePP(PostProcessor):
def run(self, pathfmt):
mtime = self._get(pathfmt.kwdict)
if mtime is None:
return
pathfmt.kwdict["_mtime"] = (
util.datetime_to_timestamp(mtime)
if isinstance(mtime, datetime) else

View File

@ -155,7 +155,9 @@ class UgoiraPP(PostProcessor):
self.log.error("Unable to invoke FFmpeg (%s: %s)",
exc.__class__.__name__, exc)
pathfmt.realpath = pathfmt.temppath
except Exception:
except Exception as exc:
print()
self.log.error("%s: %s", exc.__class__.__name__, exc)
pathfmt.realpath = pathfmt.temppath
else:
if self.mtime:
@ -171,7 +173,7 @@ class UgoiraPP(PostProcessor):
def _exec(self, args):
self.log.debug(args)
out = None if self.output else subprocess.DEVNULL
retcode = subprocess.Popen(args, stdout=out, stderr=out).wait()
retcode = util.Popen(args, stdout=out, stderr=out).wait()
if retcode:
print()
self.log.error("Non-zero exit status when running %s (%s)",

View File

@ -73,7 +73,7 @@ def filename_from_url(url):
"""Extract the last part of an URL to use as a filename"""
try:
return url.partition("?")[0].rpartition("/")[2]
except (TypeError, AttributeError):
except Exception:
return ""
@ -122,7 +122,7 @@ def extract(txt, begin, end, pos=0):
first = txt.index(begin, pos) + len(begin)
last = txt.index(end, first)
return txt[first:last], last+len(end)
except (ValueError, TypeError, AttributeError):
except Exception:
return None, pos
@ -131,7 +131,7 @@ def extr(txt, begin, end, default=""):
try:
first = txt.index(begin) + len(begin)
return txt[first:txt.index(end, first)]
except (ValueError, TypeError, AttributeError):
except Exception:
return default
@ -141,7 +141,7 @@ def rextract(txt, begin, end, pos=-1):
first = txt.rindex(begin, 0, pos)
last = txt.index(end, first + lbeg)
return txt[first + lbeg:last], first
except (ValueError, TypeError, AttributeError):
except Exception:
return None, pos
@ -167,7 +167,7 @@ def extract_iter(txt, begin, end, pos=0):
last = index(end, first)
pos = last + lend
yield txt[first:last]
except (ValueError, TypeError, AttributeError):
except Exception:
return
@ -180,7 +180,7 @@ def extract_from(txt, pos=0, default=""):
last = index(end, first)
pos = last + len(end)
return txt[first:last]
except (ValueError, TypeError, AttributeError):
except Exception:
return default
return extr
@ -200,7 +200,7 @@ def parse_bytes(value, default=0, suffixes="bkmgtp"):
"""Convert a bytes-amount ("500k", "2.5M", ...) to int"""
try:
last = value[-1].lower()
except (TypeError, LookupError):
except Exception:
return default
if last in suffixes:
@ -221,7 +221,7 @@ def parse_int(value, default=0):
return default
try:
return int(value)
except (ValueError, TypeError):
except Exception:
return default
@ -231,7 +231,7 @@ def parse_float(value, default=0.0):
return default
try:
return float(value)
except (ValueError, TypeError):
except Exception:
return default
@ -242,7 +242,7 @@ def parse_query(qs):
for key, value in urllib.parse.parse_qsl(qs):
if key not in result:
result[key] = value
except AttributeError:
except Exception:
pass
return result
@ -251,7 +251,7 @@ def parse_timestamp(ts, default=None):
"""Create a datetime object from a unix timestamp"""
try:
return datetime.datetime.utcfromtimestamp(int(ts))
except (TypeError, ValueError, OverflowError):
except Exception:
return default

View File

@ -16,7 +16,6 @@ import time
import random
import getpass
import hashlib
import sqlite3
import binascii
import datetime
import functools
@ -339,7 +338,7 @@ def extract_headers(response):
@functools.lru_cache(maxsize=None)
def git_head():
try:
out, err = subprocess.Popen(
out, err = Popen(
("git", "rev-parse", "--short", "HEAD"),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
@ -579,6 +578,33 @@ GLOBALS = {
}
if EXECUTABLE and hasattr(sys, "_MEIPASS"):
# https://github.com/pyinstaller/pyinstaller/blob/develop/doc
# /runtime-information.rst#ld_library_path--libpath-considerations
_popen_env = os.environ.copy()
orig = _popen_env.get("LD_LIBRARY_PATH_ORIG")
if orig is None:
_popen_env.pop("LD_LIBRARY_PATH", None)
else:
_popen_env["LD_LIBRARY_PATH"] = orig
orig = _popen_env.get("DYLD_LIBRARY_PATH_ORIG")
if orig is None:
_popen_env.pop("DYLD_LIBRARY_PATH", None)
else:
_popen_env["DYLD_LIBRARY_PATH"] = orig
del orig
class Popen(subprocess.Popen):
def __init__(self, args, **kwargs):
kwargs["env"] = _popen_env
subprocess.Popen.__init__(self, args, **kwargs)
else:
Popen = subprocess.Popen
def compile_expression(expr, name="<expr>", globals=None):
code_object = compile(expr, name, "eval")
return functools.partial(eval, code_object, globals or GLOBALS)
@ -825,46 +851,3 @@ class FilterPredicate():
raise
except Exception as exc:
raise exception.FilterError(exc)
class DownloadArchive():
def __init__(self, path, format_string, pragma=None,
cache_key="_archive_key"):
try:
con = sqlite3.connect(path, timeout=60, check_same_thread=False)
except sqlite3.OperationalError:
os.makedirs(os.path.dirname(path))
con = sqlite3.connect(path, timeout=60, check_same_thread=False)
con.isolation_level = None
from . import formatter
self.keygen = formatter.parse(format_string).format_map
self.close = con.close
self.cursor = cursor = con.cursor()
self._cache_key = cache_key
if pragma:
for stmt in pragma:
cursor.execute("PRAGMA " + stmt)
try:
cursor.execute("CREATE TABLE IF NOT EXISTS archive "
"(entry TEXT PRIMARY KEY) WITHOUT ROWID")
except sqlite3.OperationalError:
# fallback for missing WITHOUT ROWID support (#553)
cursor.execute("CREATE TABLE IF NOT EXISTS archive "
"(entry TEXT PRIMARY KEY)")
def check(self, kwdict):
"""Return True if the item described by 'kwdict' exists in archive"""
key = kwdict[self._cache_key] = self.keygen(kwdict)
self.cursor.execute(
"SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
return self.cursor.fetchone()
def add(self, kwdict):
"""Add item described by 'kwdict' to archive"""
key = kwdict.get(self._cache_key) or self.keygen(kwdict)
self.cursor.execute(
"INSERT OR IGNORE INTO archive (entry) VALUES (?)", (key,))

View File

@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
__version__ = "1.26.9-dev"
__version__ = "1.27.0-dev"

3
pyproject.toml Normal file
View File

@ -0,0 +1,3 @@
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"

View File

@ -4,16 +4,37 @@
"""Build a standalone executable using PyInstaller"""
import PyInstaller.__main__
import argparse
import util
import os
import sys
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-o", "--os")
parser.add_argument("-a", "--arch")
parser.add_argument("-e", "--extension")
args = parser.parse_args()
name = "gallery-dl"
if args.os:
name = "{}_{}".format(name, args.os.partition("-")[0].lower())
if args.arch == "x86":
name += "_x86"
if args.extension:
name = "{}.{}".format(name, args.extension.lower())
PyInstaller.__main__.run([
"--onefile",
"--console",
"--name", "gallery-dl." + ("exe" if os.name == "nt" else "bin"),
"--name", name,
"--additional-hooks-dir", util.path("scripts"),
"--distpath", util.path("dist"),
"--workpath", util.path("build"),
"--specpath", util.path("build"),
util.path("gallery_dl", "__main__.py"),
])
if __name__ == "__main__":
sys.exit(main())

View File

@ -44,40 +44,52 @@ update-dev() {
build-python() {
cd "${ROOTDIR}"
echo Building bdist_wheel and sdist
echo Building sdist and wheel
python setup.py bdist_wheel sdist
python -m build
}
build-linux() {
cd "${ROOTDIR}"
echo Building Linux executable
VENV_PATH="/tmp/venv"
VENV_PYTHON="${VENV_PATH}/bin/python"
rm -rf "${VENV_PATH}"
python -m virtualenv "${VENV_PATH}"
$VENV_PYTHON -m pip install requests requests[socks] yt-dlp pyyaml secretstorage pyinstaller
$VENV_PYTHON ./scripts/pyinstaller.py
build-vm 'ubuntu22.04' 'gallery-dl.bin'
}
build-windows() {
cd "${ROOTDIR}/dist"
cd "${ROOTDIR}"
echo Building Windows executable
# remove old executable
rm -f "gallery-dl.exe"
build-vm 'windows7_x86_sp1' 'gallery-dl.exe'
}
# build windows exe in vm
ln -fs "${ROOTDIR}" /tmp/
vmstart "windows7_x86_sp1" &
build-vm() {
VMNAME="$1"
BINNAME="$2"
TMPPATH="/tmp/gallery-dl/dist/$BINNAME"
# launch VM
vmstart "$VMNAME" &
disown
while [ ! -e "gallery-dl.exe" ] ; do
# copy source files
mkdir -p /tmp/gallery-dl
cp -a -t /tmp/gallery-dl -- \
./gallery_dl ./scripts ./data ./setup.py ./README.rst
# remove old executable
rm -f "./dist/$BINNAME"
# wait for new executable
while [ ! -e "$TMPPATH" ] ; do
sleep 5
done
sleep 2
# move
mv "$TMPPATH" "./dist/$BINNAME"
rm -r /tmp/gallery-dl
}
sign() {
@ -100,6 +112,14 @@ changelog() {
-e "s*\([( ]\)#\([0-9]\+\)*\1[#\2](https://github.com/mikf/gallery-dl/issues/\2)*g" \
-e "s*^## \w\+\$*## ${NEWVERSION} - $(date +%Y-%m-%d)*" \
"${CHANGELOG}"
mv "${CHANGELOG}" "${CHANGELOG}.orig"
# - remove all but the latest entries
sed -n \
-e '/^## /,/^$/ { /^$/q; p }' \
"${CHANGELOG}.orig" \
> "${CHANGELOG}"
}
supportedsites() {
@ -117,6 +137,7 @@ upload-git() {
cd "${ROOTDIR}"
echo Pushing changes to github
mv "${CHANGELOG}.orig" "${CHANGELOG}" || true
git add "gallery_dl/version.py" "${README}" "${CHANGELOG}"
git commit -S -m "release version ${NEWVERSION}"
git tag -s -m "version ${NEWVERSION}" "v${NEWVERSION}"

View File

@ -143,6 +143,7 @@ CATEGORY_MAP = {
"webmshare" : "webmshare",
"webtoons" : "Webtoon",
"wikiart" : "WikiArt.org",
"wikigg" : "wiki.gg",
"wikimediacommons": "Wikimedia Commons",
"xbunkr" : "xBunkr",
"xhamster" : "xHamster",
@ -273,6 +274,10 @@ SUBCATEGORY_MAP = {
"sexcom": {
"pins": "User Pins",
},
"skeb": {
"following" : "Followed Creators",
"following-users": "Followed Users",
},
"smugmug": {
"path": "Images from Users and Folders",
},
@ -337,12 +342,12 @@ URL_MAP = {
_OAUTH = '<a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a>'
_COOKIES = '<a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a>'
_APIKEY_DB = \
'<a href="configuration.rst#extractorderpibooruapi-key">API Key</a>'
_APIKEY_WH = \
'<a href="configuration.rst#extractorwallhavenapi-key">API Key</a>'
_APIKEY_WY = \
'<a href="configuration.rst#extractorweasylapi-key">API Key</a>'
_APIKEY_DB = ('<a href="https://gdl-org.github.io/docs/configuration.html'
'#extractor-derpibooru-api-key">API Key</a>')
_APIKEY_WH = ('<a href="https://gdl-org.github.io/docs/configuration.html'
'#extractor-wallhaven-api-key">API Key</a>')
_APIKEY_WY = ('<a href="https://gdl-org.github.io/docs/configuration.html'
'#extractor-weasyl-api-key">API Key</a>')
AUTH_MAP = {
"aibooru" : "Supported",
@ -350,11 +355,13 @@ AUTH_MAP = {
"atfbooru" : "Supported",
"baraag" : _OAUTH,
"bluesky" : "Supported",
"booruvar" : "Supported",
"coomerparty" : "Supported",
"danbooru" : "Supported",
"derpibooru" : _APIKEY_DB,
"deviantart" : _OAUTH,
"e621" : "Supported",
"e6ai" : "Supported",
"e926" : "Supported",
"e-hentai" : "Supported",
"exhentai" : "Supported",
@ -362,6 +369,7 @@ AUTH_MAP = {
"fantia" : _COOKIES,
"flickr" : _OAUTH,
"furaffinity" : _COOKIES,
"furbooru" : "API Key",
"horne" : "Required",
"idolcomplex" : "Supported",
"imgbb" : "Supported",
@ -382,7 +390,6 @@ AUTH_MAP = {
"reddit" : _OAUTH,
"sankaku" : "Supported",
"seiga" : _COOKIES,
"seisoparty" : "Supported",
"smugmug" : _OAUTH,
"subscribestar" : "Supported",
"tapas" : "Supported",

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# This is the maintainence launcher for the snap, make necessary runtime environment changes to make the snap work here. You may also insert security confinement/deprecation/obsoletion notice of the snap here.
# This is the maintenance launcher for the snap, make necessary runtime environment changes to make the snap work here. You may also insert security confinement/deprecation/obsoletion notice of the snap here.
set \
-o errexit \

View File

@ -37,7 +37,7 @@ plugs:
# Network access
network:
# For network service for recieving OAuth callback tokens
# For network service for receiving OAuth callback tokens
network-bind:
# Configuration access

View File

@ -73,7 +73,7 @@ __tests__ = (
"#category": ("", "8chan", "board"),
"#class" : _8chan._8chanBoardExtractor,
"#pattern" : _8chan._8chanThreadExtractor.pattern,
"#count" : 27,
"#count" : range(24, 28),
},
{

View File

@ -14,4 +14,12 @@ __tests__ = (
"#class" : wikimedia.WikimediaArticleExtractor,
},
{
"#url" : "https://azurlane.koumakan.jp/wiki/Louisville/Gallery",
"#comment" : "entries with missing 'imageinfo' (#5384)",
"#category": ("wikimedia", "azurlanewiki", "article"),
"#class" : wikimedia.WikimediaArticleExtractor,
"#count" : "> 10",
},
)

View File

@ -12,7 +12,7 @@ __tests__ = (
"#url" : "https://julianbphotography.blogspot.com/2010/12/moon-rise.html",
"#category": ("blogger", "blogspot", "post"),
"#class" : blogger.BloggerPostExtractor,
"#urls" : "https://3.bp.blogspot.com/-zlJddJtJOUo/Tt4WooTPNtI/AAAAAAAABG8/dGT2cGp2E7Y/s0/Icy-Moonrise---For-Web.jpg",
"#urls" : "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjH9WkPvLJq2moxKtyt3ieJZWSDFQwOi3PHRdlHVHEQHRwy-d86Jg6HWSMhxaa6EgvlXq-zDMmKM4kIPn27eJ9Hepk2X9e9HQhqwMfrT8RYTnFe65uexw7KSk5FdWHxRVp5crz3p_qph3Bj/s0/Icy-Moonrise---For-Web.jpg",
"blog": {
"date" : "dt:2010-11-21 18:19:42",
@ -43,7 +43,7 @@ __tests__ = (
"extension": "jpg",
"filename" : "Icy-Moonrise---For-Web",
"num" : 1,
"url" : "https://3.bp.blogspot.com/-zlJddJtJOUo/Tt4WooTPNtI/AAAAAAAABG8/dGT2cGp2E7Y/s0/Icy-Moonrise---For-Web.jpg",
"url" : "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjH9WkPvLJq2moxKtyt3ieJZWSDFQwOi3PHRdlHVHEQHRwy-d86Jg6HWSMhxaa6EgvlXq-zDMmKM4kIPn27eJ9Hepk2X9e9HQhqwMfrT8RYTnFe65uexw7KSk5FdWHxRVp5crz3p_qph3Bj/s0/Icy-Moonrise---For-Web.jpg",
},
{
@ -59,7 +59,7 @@ __tests__ = (
"#comment" : "new image domain (#2204)",
"#category": ("blogger", "blogspot", "post"),
"#class" : blogger.BloggerPostExtractor,
"#pattern" : "https://blogger.googleusercontent.com/img/a/.+=s0$",
"#pattern" : r"https://blogger\.googleusercontent\.com/img/.+=s0$",
"#count" : 8,
},
@ -67,7 +67,7 @@ __tests__ = (
"#url" : "https://julianbphotography.blogspot.com/",
"#category": ("blogger", "blogspot", "blog"),
"#class" : blogger.BloggerBlogExtractor,
"#pattern" : r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
"#pattern" : r"https://blogger\.googleusercontent\.com/img/.+/s0/",
"#range" : "1-25",
"#count" : 25,
},

View File

@ -133,6 +133,7 @@ __tests__ = (
"filename" : "bafkreidypzoaybmfj5h7pnpiyct6ng5yae6ydp4czrm72ocg7ev6vbirri",
"height" : 630,
"indexedAt" : "2023-12-22T18:58:32.715Z",
"instance" : "bsky.app",
"labels" : [],
"likeCount" : int,
"num" : 1,
@ -153,7 +154,7 @@ __tests__ = (
"followersCount": int,
"followsCount" : int,
"handle" : "bsky.app",
"indexedAt" : "2023-12-22T18:54:12.339Z",
"indexedAt" : "2024-01-20T05:04:41.904Z",
"labels" : [],
"postsCount" : int,
},

View File

@ -13,13 +13,12 @@ __tests__ = (
"#category": ("lolisafe", "bunkr", "album"),
"#class" : bunkr.BunkrAlbumExtractor,
"#urls" : "https://i-burger.bunkr.ru/test-テスト-\"&>-QjgneIQv.png",
"#sha1_content": "f38b54b17cd7462e687b58d83f00fca88b1b105a",
"#sha1_content": "961b25d85b5f5bd18cbe3e847ac55925f14d0286",
"album_id" : "Lktg9Keq",
"album_name" : "test テスト \"&>",
"album_size" : "182 B",
"count" : 1,
"description": "",
"extension" : "png",
"file" : "https://i-burger.bunkr.ru/test-テスト-\"&>-QjgneIQv.png",
"filename" : "test-テスト-\"&>-QjgneIQv",
@ -43,7 +42,6 @@ __tests__ = (
"album_name" : "test2",
"album_size" : "561.6 KB",
"count" : 2,
"description": "",
"filename" : r"re:video-gLn1hgpw|image-sZrQUeOx",
"id" : r"re:gLn1hgpw|sZrQUeOx",
"name" : r"re:video|image",

View File

@ -15,12 +15,32 @@ __tests__ = (
"#sha1_url": "e7d624aded15a069194e38dc731ec23217a422fb",
},
{
"#url" : "https://desuarchive.org/a",
"#category": ("foolfuuka", "desuarchive", "board"),
"#class" : foolfuuka.FoolfuukaBoardExtractor,
},
{
"#url" : "https://desuarchive.org/a/",
"#category": ("foolfuuka", "desuarchive", "board"),
"#class" : foolfuuka.FoolfuukaBoardExtractor,
},
{
"#url" : "https://desuarchive.org/a/2",
"#category": ("foolfuuka", "desuarchive", "board"),
"#class" : foolfuuka.FoolfuukaBoardExtractor,
},
{
"#url" : "https://desuarchive.org/a/page/2",
"#category": ("foolfuuka", "desuarchive", "board"),
"#class" : foolfuuka.FoolfuukaBoardExtractor,
"#pattern" : foolfuuka.FoolfuukaThreadExtractor.pattern,
"#count" : 10,
},
{
"#url" : "https://desuarchive.org/_/search/text/test/",
"#category": ("foolfuuka", "desuarchive", "search"),

View File

@ -252,6 +252,14 @@ __tests__ = (
),
},
{
"#url" : "https://deviantart.com/h3813067/avatar",
"#comment" : "default avatar (#5276)",
"#category": ("", "deviantart", "avatar"),
"#class" : deviantart.DeviantartAvatarExtractor,
"#count" : 0,
},
{
"#url" : "https://deviantart.com/gdldev/banner",
"#category": ("", "deviantart", "background"),
@ -300,7 +308,7 @@ __tests__ = (
"target" : dict,
"thumbs" : list,
"title" : "Banner",
"url" : "https://sta.sh/0198jippkeys",
"url" : "https://www.deviantart.com/stash/0198jippkeys",
"username" : "gdldev",
},
@ -352,13 +360,38 @@ __tests__ = (
"#class" : deviantart.DeviantartFolderExtractor,
},
{
"#url" : "https://www.deviantart.com/stash/022c83odnaxc",
"#category": ("", "deviantart", "stash"),
"#class" : deviantart.DeviantartStashExtractor,
"#pattern" : r"https://wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/940f2d05-c5eb-4917-8192-7eb6a2d508c6/dcvdmbc-e506cdcf-3208-4c20-85ab-0bfa8a7bcb16.png\?token=ey.+",
"#count" : 1,
"#sha1_content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f",
"content": {
"filename": "01_by_justatest235723_dcvdmbc.png",
"filesize": 380,
"width" : 128,
"height" : 128,
"src" : r"re:https://wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/940f2d05-c5eb-4917-8192-7eb6a2d508c6/dcvdmbc-e506cdcf-3208-4c20-85ab-0bfa8a7bcb16.png\?token=ey.+",
},
"da_category" : "Uncategorized",
"date" : "dt:2018-12-26 14:49:27",
"deviationid" : "A4A6AD52-8857-46EE-ABFE-86D49D4FF9D0",
"download_filesize": 380,
"extension" : "png",
"filename" : "01_by_justatest235723-dcvdmbc",
"index" : 778297656,
"index_base36" : "cvdmbc",
"published_time": 1545835767,
"title" : "01",
"url" : "https://www.deviantart.com/stash/022c83odnaxc",
},
{
"#url" : "https://sta.sh/022c83odnaxc",
"#category": ("", "deviantart", "stash"),
"#class" : deviantart.DeviantartStashExtractor,
"#pattern" : r"https://wixmp-[^.]+\.wixmp\.com/f/.+/.+\.png\?token=.+",
"#count" : 1,
"#sha1_content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f",
},
{
@ -556,7 +589,7 @@ __tests__ = (
"index" : int,
"index_base36": r"re:^[0-9a-z]+$",
"url" : r"re:^https://sta.sh",
"url" : r"re:^https://www.deviantart.com/stash/\w+",
},
{

View File

@ -83,6 +83,15 @@ __tests__ = (
"width" : 728,
},
{
"#url" : "https://hearthstone.fandom.com/wiki/Flame_Juggler",
"#comment" : "empty 'metadata'",
"#category": ("wikimedia", "fandom-hearthstone", "article"),
"#class" : wikimedia.WikimediaArticleExtractor,
"metadata" : {},
},
{
"#url" : "https://projectsekai.fandom.com/wiki/Project_SEKAI_Wiki",
"#category": ("wikimedia", "fandom-projectsekai", "article"),

View File

@ -121,6 +121,24 @@ __tests__ = (
"#class" : furaffinity.FuraffinityPostExtractor,
},
{
"#url" : "https://fxfuraffinity.net/view/21835115/",
"#category": ("", "furaffinity", "post"),
"#class" : furaffinity.FuraffinityPostExtractor,
},
{
"#url" : "https://xfuraffinity.net/view/21835115/",
"#category": ("", "furaffinity", "post"),
"#class" : furaffinity.FuraffinityPostExtractor,
},
{
"#url" : "https://fxraffinity.net/view/21835115/",
"#category": ("", "furaffinity", "post"),
"#class" : furaffinity.FuraffinityPostExtractor,
},
{
"#url" : "https://sfw.furaffinity.net/view/21835115/",
"#category": ("", "furaffinity", "post"),

View File

@ -39,6 +39,22 @@ __tests__ = (
"#sha1_url": "845a61aa1f90fb4ced841e8b7e62098be2e967bf",
},
{
"#url" : "https://gelbooru.com/index.php?page=post&s=list&tags=id:>=67800+id:<=68000",
"#comment" : "meta tags (#5478)",
"#category": ("booru", "gelbooru", "tag"),
"#class" : gelbooru.GelbooruTagExtractor,
"#count" : 187,
},
{
"#url" : "https://gelbooru.com/index.php?page=post&s=list&tags=id:>=67800+id:<=68000+sort:id:asc",
"#comment" : "meta + sort tags (#5478)",
"#category": ("booru", "gelbooru", "tag"),
"#class" : gelbooru.GelbooruTagExtractor,
"#count" : 187,
},
{
"#url" : "https://gelbooru.com/index.php?page=pool&s=show&id=761",
"#category": ("booru", "gelbooru", "pool"),
@ -47,10 +63,30 @@ __tests__ = (
},
{
"#url" : "https://gelbooru.com/index.php?page=favorites&s=view&id=279415",
"#url" : "https://gelbooru.com/index.php?page=favorites&s=view&id=1435674",
"#category": ("booru", "gelbooru", "favorite"),
"#class" : gelbooru.GelbooruFavoriteExtractor,
"#count" : 3,
"#urls" : (
"https://img3.gelbooru.com/images/5d/30/5d30fc056ed8668616b3c440df9bac89.jpg",
"https://img3.gelbooru.com/images/4c/2d/4c2da867ed643acdadd8105177dcdaf0.png",
"https://img3.gelbooru.com/images/c8/26/c826f3cb90d9aaca8d0632a96bf4abe8.jpg",
"https://img3.gelbooru.com/images/c1/fe/c1fe59c0bc8ce955dd353544b1015d0c.jpg",
"https://img3.gelbooru.com/images/e6/6d/e66d8883c184f5d3b2591dfcdf0d007c.jpg",
),
},
{
"#url" : "https://gelbooru.com/index.php?page=favorites&s=view&id=1435674",
"#category": ("booru", "gelbooru", "favorite"),
"#class" : gelbooru.GelbooruFavoriteExtractor,
"#options" : {"order-posts": "reverse"},
"#urls" : (
"https://img3.gelbooru.com/images/e6/6d/e66d8883c184f5d3b2591dfcdf0d007c.jpg",
"https://img3.gelbooru.com/images/c1/fe/c1fe59c0bc8ce955dd353544b1015d0c.jpg",
"https://img3.gelbooru.com/images/c8/26/c826f3cb90d9aaca8d0632a96bf4abe8.jpg",
"https://img3.gelbooru.com/images/4c/2d/4c2da867ed643acdadd8105177dcdaf0.png",
"https://img3.gelbooru.com/images/5d/30/5d30fc056ed8668616b3c440df9bac89.jpg",
),
},
{

View File

@ -29,10 +29,11 @@ __tests__ = (
},
{
"#url" : "https://www.hentai-foundry.com/pictures/user/Evulchibi/scraps",
"#url" : "https://www.hentai-foundry.com/pictures/user/Ethevian/scraps",
"#category": ("", "hentaifoundry", "scraps"),
"#class" : hentaifoundry.HentaifoundryScrapsExtractor,
"#sha1_url": "7cd9c6ec6258c4ab8c44991f7731be82337492a7",
"#pattern" : r"https://pictures\.hentai-foundry\.com/e/Ethevian/.+",
"#count" : ">= 10",
},
{

View File

@ -9,7 +9,7 @@ from gallery_dl.extractor import hiperdex
__tests__ = (
{
"#url" : "https://hiperdex.com/manga/domestic-na-kanojo/154-5/",
"#url" : "https://hiperdex.com/mangas/domestic-na-kanojo/154-5/",
"#category": ("", "hiperdex", "chapter"),
"#class" : hiperdex.HiperdexChapterExtractor,
"#pattern" : r"https://(1st)?hiperdex\d?.(com|net|info)/wp-content/uploads/WP-manga/data/manga_\w+/[0-9a-f]{32}/\d+\.webp",
@ -27,6 +27,12 @@ __tests__ = (
"type" : "Manga",
},
{
"#url" : "https://hiperdex.com/manga/domestic-na-kanojo/154-5/",
"#category": ("", "hiperdex", "chapter"),
"#class" : hiperdex.HiperdexChapterExtractor,
},
{
"#url" : "https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/",
"#category": ("", "hiperdex", "chapter"),

View File

@ -5,6 +5,7 @@
# published by the Free Software Foundation.
from gallery_dl.extractor import hitomi
from gallery_dl import exception
__tests__ = (
@ -47,9 +48,7 @@ __tests__ = (
"#comment" : "gallery with 'broken' redirect",
"#category": ("", "hitomi", "gallery"),
"#class" : hitomi.HitomiGalleryExtractor,
"#options" : {"format": "original"},
"#pattern" : r"https://[a-c]b\.hitomi\.la/images/\d+/\d+/[0-9a-f]{64}\.jpg",
"#count" : 10,
"#exception": exception.NotFoundError,
},
{

View File

@ -42,7 +42,7 @@ __tests__ = (
},
{
"#url" : "https://idol.sankakucomplex.com/pools/show/145",
"#url" : "https://idol.sankakucomplex.com/en/pools/e9PMwnwRBK3",
"#category": ("booru", "idolcomplex", "pool"),
"#class" : idolcomplex.IdolcomplexPoolExtractor,
"#count" : 3,
@ -72,16 +72,16 @@ __tests__ = (
"file_url" : r"re:https://i[sv]\.sankakucomplex\.com/data/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?",
"filename" : "509eccbba54a43cea6b275a65b93c51d",
"height" : 683,
"id" : 694215,
"id_alnum" : "vkr36qdOaZ4",
"id" : "vkr36qdOaZ4", # legacy ID: 694215
"md5" : "509eccbba54a43cea6b275a65b93c51d",
"rating" : "g",
"tags" : "lyumos the_witcher shani_(the_witcher) 1girl green_eyes non-asian redhead waistcoat wreath cosplay 3:2_aspect_ratio",
"tags_character": "shani_(the_witcher)",
"tags_copyright": "the_witcher",
"tags_general" : "1girl green_eyes non-asian redhead waistcoat wreath",
"tags_genre" : "cosplay",
"tags_idol" : "lyumos",
"tags_medium" : "cosplay 3:2_aspect_ratio",
"tags_medium" : "3:2_aspect_ratio",
"vote_average" : range(4, 5),
"vote_count" : range(25, 40),
"width" : 1024,
@ -111,8 +111,7 @@ __tests__ = (
"#class" : idolcomplex.IdolcomplexPostExtractor,
"#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
"id" : 694215,
"id_alnum" : "vkr36qdOaZ4",
"id" : "vkr36qdOaZ4", # legacy ID: 694215
"tags_character": "shani_(the_witcher)",
"tags_copyright": "the_witcher",
"tags_idol" : str,

View File

@ -120,11 +120,25 @@ __tests__ = (
"#sha1_url": "37822523e6e4a56feb9dea35653760c86b44ff89",
},
{
"#url" : "https://www.imagefap.com/organizer/613950/Grace-Stout",
"#category": ("", "imagefap", "folder"),
"#class" : imagefap.ImagefapFolderExtractor,
"#pattern" : imagefap.ImagefapGalleryExtractor.pattern,
"#count" : 31,
"title": r"re:Grace Stout .+",
},
{
"#url" : "https://www.imagefap.com/usergallery.php?userid=1981976&folderid=409758",
"#category": ("", "imagefap", "folder"),
"#class" : imagefap.ImagefapFolderExtractor,
"#sha1_url": "37822523e6e4a56feb9dea35653760c86b44ff89",
"#urls" : "https://www.imagefap.com/gallery/7876223",
"folder" : "Softcore",
"gallery_id": "7876223",
"title" : "Kelsi Monroe in lingerie",
},
{
@ -140,6 +154,8 @@ __tests__ = (
"#class" : imagefap.ImagefapFolderExtractor,
"#pattern" : imagefap.ImagefapGalleryExtractor.pattern,
"#range" : "1-40",
"folder": "Uncategorized",
},
{

View File

@ -89,11 +89,10 @@ __tests__ = (
},
{
"#url" : "https://kemono.party/gumroad/user/trylsc/post/IURjT",
"#comment" : "kemono.party -> data.kemono.party",
"#url" : "https://kemono.su/gumroad/user/3101696181060/post/tOWyf",
"#category": ("", "kemonoparty", "gumroad"),
"#class" : kemonoparty.KemonopartyPostExtractor,
"#pattern" : r"https://kemono\.party/data/(a4/7b/a47bfe938d8c1682eef06e885927484cd8df1b.+\.jpg|c6/04/c6048f5067fd9dbfa7a8be565ac194efdfb6e4.+\.zip)",
"#urls" : "https://kemono.su/data/6f/13/6f1394b19516396ea520254350662c254bbea30c1e111fd4b0f042c61c426d07.zip",
},
{
@ -136,6 +135,19 @@ __tests__ = (
}],
},
{
"#url" : "https://kemono.su/patreon/user/3161935/post/68231671",
"#comment" : "announcements",
"#category": ("", "kemonoparty", "patreon"),
"#class" : kemonoparty.KemonopartyPostExtractor,
"#options" : {"announcements": True},
"announcements": [{
"body": "<div><strong>Thank you so much for the support!</strong><strong><br></strong>This Patreon is more of a tip jar for supporting what I make. I have to clarify that there are <strong>no exclusive Patreon animations</strong> because all are released for the public. You will get earlier access to WIPs. Direct downloads to my works are also available for $5 and $10 Tiers.</div>",
"date": "2023-02",
}],
},
{
"#url" : "https://kemono.su/patreon/user/19623797/post/29035449",
"#comment" : "invalid file (#3510)",
@ -195,6 +207,7 @@ __tests__ = (
"hash" : "88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86",
"revision_id" : 142470,
"revision_index": 2,
"revision_count": 9,
"revision_hash" : "e0e93281495e151b11636c156e52bfe9234c2a40",
},
@ -210,6 +223,7 @@ __tests__ = (
"hash" : "88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86",
"revision_id" : 0,
"revision_index": 1,
"revision_count": 1,
"revision_hash" : "e0e93281495e151b11636c156e52bfe9234c2a40",
},
@ -224,6 +238,7 @@ __tests__ = (
"revision_id": range(134996, 3052965),
"revision_index": range(1, 9),
"revision_count": 9,
"revision_hash": "e0e93281495e151b11636c156e52bfe9234c2a40",
},
@ -246,6 +261,16 @@ __tests__ = (
"published": "2022-07-29T21:12:11.483000",
},
{
"#url" : "https://kemono.su/gumroad/user/3267960360326/post/jwwag",
"#comment" : "empty 'file' with no 'path' (#5368)",
"#category": ("", "kemonoparty", "gumroad"),
"#class" : kemonoparty.KemonopartyPostExtractor,
"#count" : 8,
"type" : "attachment",
},
{
"#url" : "https://kemono.su/discord/server/488668827274444803#608504710906904576",
"#category": ("", "kemonoparty", "discord"),
@ -340,8 +365,24 @@ __tests__ = (
"#class" : kemonoparty.KemonopartyFavoriteExtractor,
"#pattern" : kemonoparty.KemonopartyUserExtractor.pattern,
"#auth" : True,
"#count" : 3,
"#sha1_url": "902c656c8002a3257ef9e255cb69bca1937373d4",
"#urls" : (
"https://kemono.su/patreon/user/881792",
"https://kemono.su/fanbox/user/6993449",
"https://kemono.su/subscribestar/user/alcorart",
),
},
{
"#url" : "https://kemono.su/favorites?type=artist&sort=faved_seq&order=asc",
"#category": ("", "kemonoparty", "favorite"),
"#class" : kemonoparty.KemonopartyFavoriteExtractor,
"#pattern" : kemonoparty.KemonopartyUserExtractor.pattern,
"#auth" : True,
"#urls" : (
"https://kemono.su/fanbox/user/6993449",
"https://kemono.su/patreon/user/881792",
"https://kemono.su/subscribestar/user/alcorart",
),
},
{
@ -350,8 +391,24 @@ __tests__ = (
"#class" : kemonoparty.KemonopartyFavoriteExtractor,
"#pattern" : kemonoparty.KemonopartyPostExtractor.pattern,
"#auth" : True,
"#count" : 3,
"#sha1_url": "4be8e84cb384a907a8e7997baaf6287b451783b5",
"#urls" : (
"https://kemono.su/subscribestar/user/alcorart/post/184329",
"https://kemono.su/fanbox/user/6993449/post/23913",
"https://kemono.su/patreon/user/881792/post/4769638",
),
},
{
"#url" : "https://kemono.su/favorites?type=post&sort=published&order=asc",
"#category": ("", "kemonoparty", "favorite"),
"#class" : kemonoparty.KemonopartyFavoriteExtractor,
"#pattern" : kemonoparty.KemonopartyPostExtractor.pattern,
"#auth" : True,
"#urls" : (
"https://kemono.su/patreon/user/881792/post/4769638",
"https://kemono.su/fanbox/user/6993449/post/23913",
"https://kemono.su/subscribestar/user/alcorart/post/184329",
),
},
)

View File

@ -32,7 +32,7 @@ __tests__ = (
"#url" : "https://lensdump.com/i/tyoAyM",
"#category": ("", "lensdump", "image"),
"#class" : lensdump.LensdumpImageExtractor,
"#pattern" : r"https://c\.l3n\.co/i/tyoAyM\.webp",
"#urls" : "https://c.l3n.co/i/tyoAyM.webp",
"#sha1_content": "1aa749ed2c0cf679ec8e1df60068edaf3875de46",
"date" : "dt:2022-08-01 08:24:28",
@ -45,4 +45,32 @@ __tests__ = (
"width" : 620,
},
{
"#url" : "https://c.l3n.co/i/tyoAyM.webp",
"#category": ("", "lensdump", "image"),
"#class" : lensdump.LensdumpImageExtractor,
"#urls" : "https://c.l3n.co/i/tyoAyM.webp",
"date" : "dt:2022-08-01 08:24:28",
"extension": "webp",
"filename" : "tyoAyM",
"height" : 400,
"id" : "tyoAyM",
"title" : "MYOBI clovis bookcaseset",
"url" : "https://c.l3n.co/i/tyoAyM.webp",
"width" : 620,
},
{
"#url" : "https://i.lensdump.com/i/tyoAyM",
"#category": ("", "lensdump", "image"),
"#class" : lensdump.LensdumpImageExtractor,
},
{
"#url" : "https://i3.lensdump.com/i/tyoAyM",
"#category": ("", "lensdump", "image"),
"#class" : lensdump.LensdumpImageExtractor,
},
)

View File

@ -18,4 +18,15 @@ __tests__ = (
"instance_remote": None,
},
{
"#url" : "mastodon:https://wanderingwires.net/@quarc/9qppkxzyd1ee3i9p",
"#comment" : "null moved account",
"#category": ("mastodon", "wanderingwires.net", "status"),
"#class" : mastodon.MastodonStatusExtractor,
"#urls" : "https://s3.wanderingwires.net/null/4377e826-72ab-4659-885c-fa12945eb207.png",
"instance": "wanderingwires.net",
"instance_remote": None,
},
)

View File

@ -74,6 +74,33 @@ __tests__ = (
"#url" : "https://mastodon.social/bookmarks",
"#category": ("mastodon", "mastodon.social", "bookmark"),
"#class" : mastodon.MastodonBookmarkExtractor,
"#auth" : True,
"#urls" : "https://files.mastodon.social/media_attachments/files/111/331/603/082/304/823/original/e12cde371c88c1b0.png",
},
{
"#url" : "https://mastodon.social/favourites",
"#category": ("mastodon", "mastodon.social", "favorite"),
"#class" : mastodon.MastodonFavoriteExtractor,
"#auth" : True,
"#urls" : "https://files.mastodon.social/media_attachments/files/111/331/603/082/304/823/original/e12cde371c88c1b0.png",
},
{
"#url" : "https://mastodon.social/lists/92653",
"#category": ("mastodon", "mastodon.social", "list"),
"#class" : mastodon.MastodonListExtractor,
"#auth" : True,
"#pattern" : r"https://files\.mastodon\.social/media_attachments/files/(\d+/){3,}original/\w+",
"#range" : "1-10",
},
{
"#url" : "https://mastodon.social/tags/mastodon",
"#category": ("mastodon", "mastodon.social", "hashtag"),
"#class" : mastodon.MastodonHashtagExtractor,
"#pattern" : r"https://files\.mastodon\.social/media_attachments/files/(\d+/){3,}original/\w+",
"#range" : "1-10",
},
{
@ -82,9 +109,9 @@ __tests__ = (
"#class" : mastodon.MastodonFollowingExtractor,
"#extractor": False,
"#urls" : (
"https://mastodon.ie/@RustyBertrand",
"https://ravenation.club/@soundwarrior20",
"https://mastodon.social/@0x4f",
"https://mastodon.social/@RustyBertrand",
"https://mastodon.social/@christianselig",
"https://saturation.social/@clive",
"https://mastodon.social/@sjvn",
@ -137,4 +164,36 @@ __tests__ = (
"num" : int,
},
{
"#url" : "https://mastodon.social/@technewsbot@assortedflotsam.com/112360601113258881",
"#comment" : "card image",
"#category": ("mastodon", "mastodon.social", "status"),
"#class" : mastodon.MastodonStatusExtractor,
"#options" : {"cards": True},
"#urls" : "https://files.mastodon.social/cache/preview_cards/images/095/900/335/original/83f0b4a793c84123.jpg",
"media": {
"author_name" : "Tom Warren",
"author_url" : "https://www.theverge.com/authors/tom-warren",
"blurhash" : "UHBDWMCjVGM0k,XjnPM#0h+vkpb^RkjYSh$*",
"description" : "Microsofts big Xbox games showcase will take place on June 9th. It will include more games than last year and a special Call of Duty Direct will follow.",
"embed_url" : "",
"height" : 628,
"html" : "",
"id" : "card95900335",
"image" : "https://files.mastodon.social/cache/preview_cards/images/095/900/335/original/83f0b4a793c84123.jpg",
"image_description": "The Xbox showcase illustration",
"language" : "en",
"provider_name": "The Verge",
"provider_url": "",
"published_at": "2024-04-30T14:15:30.341Z",
"title" : "The Xbox games showcase airs June 9th, followed by a Call of Duty Direct",
"type" : "link",
"url" : "https://files.mastodon.social/cache/preview_cards/images/095/900/335/original/83f0b4a793c84123.jpg",
"weburl" : "https://www.theverge.com/2024/4/30/24145262/xbox-games-showcase-summer-2024-call-of-duty-direct",
"width" : 1200,
},
},
)

View File

@ -21,7 +21,7 @@ __tests__ = (
"#url" : "https://misskey.design/@blooddj@pawoo.net",
"#category": ("misskey", "misskey.design", "user"),
"#class" : misskey.MisskeyUserExtractor,
"#count" : 7,
"#count" : "> 30",
},
{

View File

@ -12,7 +12,7 @@ __tests__ = (
"#url" : "https://myhentaigallery.com/g/16247",
"#category": ("", "myhentaigallery", "gallery"),
"#class" : myhentaigallery.MyhentaigalleryGalleryExtractor,
"#pattern" : r"https://images\.myhentaicomics\.com/m\w\w/images/[^/]+/original/\d+\.jpg",
"#pattern" : r"https://(cdn|images)\.myhentaicomics\.com/m\w\w/images/[^/]+/original/\d+\.jpg",
"artist" : list,
"count" : 11,

View File

@ -24,6 +24,39 @@ __tests__ = (
"#sha1_metadata": "a6e23d19afbee86b37d6e7ad934650c379d2cb1e",
},
{
"#url" : "https://blog.naver.com/PostView.nhn?blogId=rlfqjxm0&logNo=70161391809",
"#comment" : "filenames in EUC-KR encoding (#5126)",
"#category": ("", "naver", "post"),
"#class" : naver.NaverPostExtractor,
"#urls": (
"https://blogfiles.pstatic.net/20130305_23/ping9303_1362411028002Dpz9z_PNG/1_사본.png",
"https://blogfiles.pstatic.net/20130305_46/rlfqjxm0_1362473322580x33zi_PNG/오마갓합작.png",
),
"blog": {
"id" : "rlfqjxm0",
"num" : 43030507,
"user": "에나",
},
"post": {
"date" : "dt:2013-03-05 17:48:00",
"description": " ◈ PROMOTER :핑수 ˚ 아담 EDITOR핑수 넵:이크:핑수...",
"num" : 70161391809,
"title" : "[공유] { 합작} OH, MY GOD! ~ 아 또 무슨 종말을 한다 그래~",
},
"count" : 2,
"num" : range(1, 2),
"filename" : r"re:1_사본|오마갓합작",
"extension": "png",
},
{
"#url" : "https://blog.naver.com/PostView.naver?blogId=rlfqjxm0&logNo=221430673006",
"#category": ("", "naver", "post"),
"#class" : naver.NaverPostExtractor,
},
{
"#url" : "https://blog.naver.com/gukjung",
"#category": ("", "naver", "blog"),
@ -42,4 +75,10 @@ __tests__ = (
"#count" : 12,
},
{
"#url" : "https://blog.naver.com/PostList.naver?blogId=gukjung",
"#category": ("", "naver", "blog"),
"#class" : naver.NaverBlogExtractor,
},
)

View File

@ -109,7 +109,7 @@ __tests__ = (
"#category": ("", "naverwebtoon", "comic"),
"#class" : naverwebtoon.NaverwebtoonComicExtractor,
"#pattern" : naverwebtoon.NaverwebtoonEpisodeExtractor.pattern,
"#count" : 25,
"#count" : 24,
},
{

View File

@ -15,11 +15,11 @@ __tests__ = (
},
{
"#url" : "https://www.omgmiamiswimwear.com/products/la-medusa-maxi-dress",
"#url" : "https://www.omgmiamiswimwear.com/products/snatch-me-waist-belt",
"#category": ("shopify", "omgmiamiswimwear", "product"),
"#class" : shopify.ShopifyProductExtractor,
"#pattern" : r"https://cdn\.shopify\.com/s/files/1/1819/6171/",
"#count" : 5,
"#count" : 3,
},
)

View File

@ -163,6 +163,14 @@ __tests__ = (
"#count" : ">= 10",
},
{
"#url" : "https://www.pixiv.net/artworks/966412",
"#comment" : "limit_sanity_level_360.png (#4327, #5180)",
"#category": ("", "pixiv", "work"),
"#class" : pixiv.PixivWorkExtractor,
"#count" : 0,
},
{
"#url" : "https://www.pixiv.net/en/artworks/966412",
"#category": ("", "pixiv", "work"),
@ -459,11 +467,14 @@ __tests__ = (
{
"#url" : "https://www.pixiv.net/novel/show.php?id=16422450",
"#comment" : "embeds",
"#comment" : "embeds // covers (#5373)",
"#category": ("", "pixiv", "novel"),
"#class" : pixiv.PixivNovelExtractor,
"#options" : {"embeds": True},
"#count" : 3,
"#options" : {
"embeds": True,
"covers": True,
},
"#count" : 4,
},
{

View File

@ -62,9 +62,11 @@ __tests__ = (
"hardcore sex",
"babes 18 year",
],
"timestamp": "5:07",
"title" : "Intense sloppy blowjob of Danika Mori",
"url" : "https://el.phncdn.com/pics/gifs/043/726/891/43726891a.webm",
"user" : "Danika Mori",
"viewkey" : "64367c8c78a4a",
},
{

Some files were not shown because too many files have changed in this diff Show More