mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-22 02:32:33 +01:00
Merge branch 'mikf:master' into feature/patreonPostComments
This commit is contained in:
commit
e6c948f425
60
.github/workflows/docker.yml
vendored
60
.github/workflows/docker.yml
vendored
@ -1,33 +1,47 @@
|
||||
name: docker
|
||||
name: Docker Images
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags:
|
||||
- v[0-9]+.[0-9]+.[0-9]+
|
||||
|
||||
|
||||
permissions:
|
||||
packages: write
|
||||
|
||||
concurrency:
|
||||
group: docker
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# on release commits, run only for tag event
|
||||
if: ${{ ! startsWith( github.event.head_commit.message , 'release version ' ) || startsWith( github.ref , 'refs/tags/v' ) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
# https://github.com/docker/setup-buildx-action
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
# https://github.com/docker/login-action
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
- uses: docker/metadata-action@v5
|
||||
id: metadata
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GHCR_TOKEN }}
|
||||
images: |
|
||||
mikf123/gallery-dl
|
||||
ghcr.io/mikf/gallery-dl
|
||||
tags: |
|
||||
type=ref,event=tag
|
||||
type=raw,value=dev
|
||||
type=sha,format=long,prefix=
|
||||
type=raw,priority=500,value={{date 'YYYY.MM.DD'}}
|
||||
|
||||
- uses: docker/setup-qemu-action@v3
|
||||
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
@ -35,23 +49,17 @@ jobs:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# https://github.com/docker/metadata-action
|
||||
- name: Generate Docker tags
|
||||
uses: docker/metadata-action@v5
|
||||
id: metadata
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
images: |
|
||||
mikf123/gallery-dl
|
||||
ghcr.io/mikf/gallery-dl
|
||||
tags: |
|
||||
type=sha,format=long,prefix=
|
||||
type=ref,event=tag
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GHCR_TOKEN }}
|
||||
|
||||
# https://github.com/docker/build-push-action
|
||||
- name: Build image
|
||||
uses: docker/build-push-action@v5
|
||||
- uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
tags: ${{ steps.metadata.outputs.tags }}
|
||||
labels: ${{ steps.metadata.outputs.labels }}
|
||||
platforms: linux/amd64
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
60
.github/workflows/executables.yml
vendored
60
.github/workflows/executables.yml
vendored
@ -1,10 +1,15 @@
|
||||
name: executables
|
||||
name: Executables
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
tags-ignore:
|
||||
- "*"
|
||||
|
||||
env:
|
||||
DATE_FORMAT: "%Y.%m.%d"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@ -31,19 +36,58 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} ${{ matrix.architecture }}
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
architecture: ${{ matrix.architecture }}
|
||||
|
||||
- name: Date
|
||||
run: echo "DATE=$(date '+${{ env.DATE_FORMAT }}')" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Update Version
|
||||
# use Python since its behavior is consistent across operating systems
|
||||
shell: python
|
||||
run: |
|
||||
import re
|
||||
path = "./gallery_dl/version.py"
|
||||
with open(path) as fp:
|
||||
content = fp.read()
|
||||
content = re.sub(
|
||||
r'\b(__version__ = "[^"]+)',
|
||||
r"\1:${{ env.DATE }}",
|
||||
content)
|
||||
with open(path, "w") as fp:
|
||||
fp.write(content)
|
||||
|
||||
- name: Build executable
|
||||
run: |
|
||||
pip install requests requests[socks] yt-dlp pyyaml ${{ matrix.python-packages }} pyinstaller
|
||||
python scripts/pyinstaller.py
|
||||
python ./scripts/pyinstaller.py --os '${{ matrix.os }}' --arch '${{ matrix.architecture }}'
|
||||
|
||||
- name: Upload executable
|
||||
uses: actions/upload-artifact@v3
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: gallery-dl-${{ matrix.os }}-${{ matrix.architecture }}-${{ matrix.python-version }}
|
||||
path: |
|
||||
dist
|
||||
name: executable-${{ matrix.os }}-${{ matrix.architecture }}-${{ matrix.python-version }}
|
||||
path: dist/*
|
||||
retention-days: 1
|
||||
compression-level: 0
|
||||
|
||||
release:
|
||||
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
|
||||
- name: Date
|
||||
run: echo "DATE=$(date '+${{ env.DATE_FORMAT }}')" >> "$GITHUB_ENV"
|
||||
|
||||
- uses: ncipollo/release-action@v1
|
||||
with:
|
||||
owner: gdl-org
|
||||
repo: builds
|
||||
tag: ${{ env.DATE }}
|
||||
artifacts: "executable-*/*"
|
||||
allowUpdates: true
|
||||
makeLatest: true
|
||||
token: ${{ secrets.REPO_TOKEN }}
|
||||
|
56
.github/workflows/pages.yml
vendored
Normal file
56
.github/workflows/pages.yml
vendored
Normal file
@ -0,0 +1,56 @@
|
||||
name: GitHub Pages
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- docs/**
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pages: write
|
||||
id-token: write
|
||||
|
||||
concurrency:
|
||||
group: pages
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
dispatch:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Dispatch to gdl-org/docs
|
||||
run: >
|
||||
curl -L
|
||||
-X POST
|
||||
-H "Accept: application/vnd.github+json"
|
||||
-H "Authorization: Bearer ${{ secrets.REPO_TOKEN }}"
|
||||
-H "X-GitHub-Api-Version: 2022-11-28"
|
||||
https://api.github.com/repos/gdl-org/docs/actions/workflows/pages.yml/dispatches
|
||||
-d '{"ref":"master"}'
|
||||
|
||||
deploy:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deployment.outputs.page_url }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/configure-pages@v4
|
||||
|
||||
- name: Copy static files
|
||||
run: |
|
||||
mkdir --parents -- ./_site
|
||||
cp --archive --target-directory=./_site -- \
|
||||
./docs/oauth-redirect.html
|
||||
|
||||
- uses: actions/upload-pages-artifact@v3
|
||||
- uses: actions/deploy-pages@v4
|
||||
id: deployment
|
23
.github/workflows/pages_dispatch.yml
vendored
23
.github/workflows/pages_dispatch.yml
vendored
@ -1,23 +0,0 @@
|
||||
name: Dispatch GitHub Pages Build
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- "master"
|
||||
paths:
|
||||
- "docs/**"
|
||||
|
||||
jobs:
|
||||
dispatch:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: dispatch
|
||||
run: >
|
||||
curl -L
|
||||
-X POST
|
||||
-H "Accept: application/vnd.github+json"
|
||||
-H "Authorization: Bearer ${{ secrets.DISPATCH_TOKEN }}"
|
||||
-H "X-GitHub-Api-Version: 2022-11-28"
|
||||
https://api.github.com/repos/gdl-org/docs/actions/workflows/pages.yml/dispatches
|
||||
-d '{"ref":"master"}'
|
4
.github/workflows/tests.yml
vendored
4
.github/workflows/tests.yml
vendored
@ -16,7 +16,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "pypy3.9"]
|
||||
python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "pypy3.9"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@ -26,7 +26,7 @@ jobs:
|
||||
if [[ "$(find ./gallery_dl -type f -not -perm 644)" ]]; then exit 1; fi
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
|
84
CHANGELOG.md
84
CHANGELOG.md
@ -1,5 +1,89 @@
|
||||
# Changelog
|
||||
|
||||
## 1.26.9 - 2024-03-23
|
||||
### Extractors
|
||||
#### Additions
|
||||
- [artstation] support video clips ([#2566](https://github.com/mikf/gallery-dl/issues/2566), [#3309](https://github.com/mikf/gallery-dl/issues/3309), [#3911](https://github.com/mikf/gallery-dl/issues/3911))
|
||||
- [artstation] support collections ([#146](https://github.com/mikf/gallery-dl/issues/146))
|
||||
- [deviantart] recognize `deviantart.com/stash/…` URLs
|
||||
- [idolcomplex] support new pool URLs
|
||||
- [lensdump] recognize direct image links ([#5293](https://github.com/mikf/gallery-dl/issues/5293))
|
||||
- [skeb] add extractor for followed users ([#5290](https://github.com/mikf/gallery-dl/issues/5290))
|
||||
- [twitter] add `quotes` extractor ([#5262](https://github.com/mikf/gallery-dl/issues/5262))
|
||||
- [wikimedia] support `azurlane.koumakan.jp` ([#5256](https://github.com/mikf/gallery-dl/issues/5256))
|
||||
- [xvideos] support `/channels/` URLs ([#5244](https://github.com/mikf/gallery-dl/issues/5244))
|
||||
#### Fixes
|
||||
- [artstation] fix handling usernames with dashes in domain names ([#5224](https://github.com/mikf/gallery-dl/issues/5224))
|
||||
- [bluesky] fix not spawning child extractors for followed users ([#5246](https://github.com/mikf/gallery-dl/issues/5246))
|
||||
- [deviantart] handle CloudFront blocks ([#5363](https://github.com/mikf/gallery-dl/issues/5363))
|
||||
- [deviantart:avatar] fix `index` for URLs without `?` ([#5276](https://github.com/mikf/gallery-dl/issues/5276))
|
||||
- [deviantart:stash] fix `index` values ([#5335](https://github.com/mikf/gallery-dl/issues/5335))
|
||||
- [gofile] fix extraction
|
||||
- [hiperdex] update URL patterns & fix `manga` metadata ([#5340](https://github.com/mikf/gallery-dl/issues/5340))
|
||||
- [idolcomplex] fix metadata extraction
|
||||
- [imagefap] fix folder extraction ([#5333](https://github.com/mikf/gallery-dl/issues/5333))
|
||||
- [instagram] make accessing `like_count` non-fatal ([#5218](https://github.com/mikf/gallery-dl/issues/5218))
|
||||
- [mastodon] fix handling null `moved` account field ([#5321](https://github.com/mikf/gallery-dl/issues/5321))
|
||||
- [naver] fix EUC-KR encoding issue in old image URLs ([#5126](https://github.com/mikf/gallery-dl/issues/5126))
|
||||
- [nijie] increase default delay between requests ([#5221](https://github.com/mikf/gallery-dl/issues/5221))
|
||||
- [nitter] ignore invalid Tweets ([#5253](https://github.com/mikf/gallery-dl/issues/5253))
|
||||
- [pixiv:novel] fix text extraction ([#5285](https://github.com/mikf/gallery-dl/issues/5285), [#5309](https://github.com/mikf/gallery-dl/issues/5309))
|
||||
- [skeb] retry 429 responses containing a `request_key` cookie ([#5210](https://github.com/mikf/gallery-dl/issues/5210))
|
||||
- [warosu] fix crash for threads with deleted posts ([#5289](https://github.com/mikf/gallery-dl/issues/5289))
|
||||
- [weibo] fix retweets ([#2825](https://github.com/mikf/gallery-dl/issues/2825), [#3874](https://github.com/mikf/gallery-dl/issues/3874), [#5263](https://github.com/mikf/gallery-dl/issues/5263))
|
||||
- [weibo] fix `livephoto` filename extensions ([#5287](https://github.com/mikf/gallery-dl/issues/5287))
|
||||
- [xvideos] fix galleries with more than 500 images ([#5244](https://github.com/mikf/gallery-dl/issues/5244))
|
||||
#### Improvements
|
||||
- [bluesky] improve API error messages
|
||||
- [bluesky] handle posts with different `embed` structure
|
||||
- [deviantart:avatar] ignore default avatars ([#5276](https://github.com/mikf/gallery-dl/issues/5276))
|
||||
- [fapello] download full-sized images ([#5349](https://github.com/mikf/gallery-dl/issues/5349))
|
||||
- [gelbooru:favorite] automatically detect returned post order ([#5220](https://github.com/mikf/gallery-dl/issues/5220))
|
||||
- [imgur] fail downloads when redirected to `removed.png` ([#5308](https://github.com/mikf/gallery-dl/issues/5308))
|
||||
- [instagram] raise proper error for missing `reels_media` ([#5257](https://github.com/mikf/gallery-dl/issues/5257))
|
||||
- [instagram] change `posts are private` exception to a warning ([#5322](https://github.com/mikf/gallery-dl/issues/5322))
|
||||
- [reddit] improve preview fallback formats ([#5296](https://github.com/mikf/gallery-dl/issues/5296), [#5315](https://github.com/mikf/gallery-dl/issues/5315))
|
||||
- [steamgriddb] raise exception for deleted assets
|
||||
- [twitter] handle "account is temporarily locked" errors ([#5300](https://github.com/mikf/gallery-dl/issues/5300))
|
||||
- [weibo] rework pagination logic ([#4168](https://github.com/mikf/gallery-dl/issues/4168))
|
||||
- [zerochan] fetch more posts by using the API ([#3669](https://github.com/mikf/gallery-dl/issues/3669))
|
||||
#### Metadata
|
||||
- [bluesky] add `instance` metadata field ([#4438](https://github.com/mikf/gallery-dl/issues/4438))
|
||||
- [gelbooru:favorite] add `date_favorited` metadata field
|
||||
- [imagefap] extract `folder` metadata ([#5270](https://github.com/mikf/gallery-dl/issues/5270))
|
||||
- [instagram] default `likes` to `0` ([#5323](https://github.com/mikf/gallery-dl/issues/5323))
|
||||
- [kemonoparty] add `revision_count` metadata field ([#5334](https://github.com/mikf/gallery-dl/issues/5334))
|
||||
- [naver] unescape post `title` and `description`
|
||||
- [pornhub:gif] extract `viewkey` and `timestamp` metadata ([#4463](https://github.com/mikf/gallery-dl/issues/4463))
|
||||
- [redgifs] make `date` available for directories ([#5262](https://github.com/mikf/gallery-dl/issues/5262))
|
||||
- [subscribestar] fix `date` metadata
|
||||
- [twitter] add `birdwatch` metadata field ([#5317](https://github.com/mikf/gallery-dl/issues/5317))
|
||||
- [twitter] add `protected` metadata field ([#5327](https://github.com/mikf/gallery-dl/issues/5327))
|
||||
- [warosu] fix `board_name` metadata
|
||||
#### Options
|
||||
- [bluesky] add `reposts` option ([#4438](https://github.com/mikf/gallery-dl/issues/4438), [#5248](https://github.com/mikf/gallery-dl/issues/5248))
|
||||
- [deviantart] add `comments-avatars` option ([#4995](https://github.com/mikf/gallery-dl/issues/4995))
|
||||
- [deviantart] extend `metadata` option ([#5175](https://github.com/mikf/gallery-dl/issues/5175))
|
||||
- [flickr] add `contexts` option ([#5324](https://github.com/mikf/gallery-dl/issues/5324))
|
||||
- [gelbooru:favorite] add `order-posts` option ([#5220](https://github.com/mikf/gallery-dl/issues/5220))
|
||||
- [kemonoparty] add `order-revisions` option ([#5334](https://github.com/mikf/gallery-dl/issues/5334))
|
||||
- [vipergirls] add `like` option ([#4166](https://github.com/mikf/gallery-dl/issues/4166))
|
||||
- [vipergirls] add `domain` option ([#4166](https://github.com/mikf/gallery-dl/issues/4166))
|
||||
### Downloaders
|
||||
- [http] add MIME type and signature for `.mov` files ([#5287](https://github.com/mikf/gallery-dl/issues/5287))
|
||||
### Docker
|
||||
- build images from source instead of PyPI package
|
||||
- build `linux/arm64` images ([#5227](https://github.com/mikf/gallery-dl/issues/5227))
|
||||
- build images on every push to master
|
||||
- tag images as `YYYY.MM.DD`
|
||||
- tag the most recent build from master as `dev`
|
||||
- tag the most recent release build as `latest`
|
||||
- reduce image size ([#5097](https://github.com/mikf/gallery-dl/issues/5097))
|
||||
### Miscellaneous
|
||||
- [formatter] fix local DST datetime offsets for `:O`
|
||||
- build Linux executable on Ubuntu 22.04 LTS ([#4184](https://github.com/mikf/gallery-dl/issues/4184))
|
||||
- automatically create directories for logging files ([#5249](https://github.com/mikf/gallery-dl/issues/5249))
|
||||
|
||||
## 1.26.8 - 2024-02-17
|
||||
### Extractors
|
||||
#### Additions
|
||||
|
24
Dockerfile
24
Dockerfile
@ -1,7 +1,21 @@
|
||||
FROM python:alpine
|
||||
RUN python3 -m pip install --no-cache-dir -U pip && \
|
||||
python3 -m pip install --no-cache-dir -U gallery-dl yt-dlp
|
||||
RUN apk update && \
|
||||
apk add --no-cache ffmpeg && \
|
||||
rm -rf /var/cache/apk/*
|
||||
ENV LANG=C.UTF-8
|
||||
|
||||
RUN : \
|
||||
&& apk --no-interactive update \
|
||||
&& apk --no-cache --no-interactive add ffmpeg \
|
||||
&& rm -rf /var/cache/apk \
|
||||
&& :
|
||||
|
||||
RUN : \
|
||||
&& python3 -B -m pip --no-cache-dir --no-input --disable-pip-version-check install -U \
|
||||
pip \
|
||||
&& python3 -B -m pip --no-cache-dir --no-input --disable-pip-version-check install -U \
|
||||
https://github.com/mikf/gallery-dl/archive/refs/heads/master.tar.gz \
|
||||
yt-dlp \
|
||||
&& rm -rf /root/.cache/pip \
|
||||
&& find /usr/local/lib/python3.*/site-packages/setuptools -name __pycache__ -exec rm -rf {} + \
|
||||
&& find /usr/local/lib/python3.*/site-packages/wheel -name __pycache__ -exec rm -rf {} + \
|
||||
&& :
|
||||
|
||||
ENTRYPOINT [ "gallery-dl" ]
|
||||
|
12
README.rst
12
README.rst
@ -7,8 +7,8 @@ to download image galleries and collections
|
||||
from several image hosting sites
|
||||
(see `Supported Sites <docs/supportedsites.md>`__).
|
||||
It is a cross-platform tool
|
||||
with many `configuration options <docs/configuration.rst>`__
|
||||
and powerful `filenaming capabilities <docs/formatting.md>`__.
|
||||
with many `configuration options <https://gdl-org.github.io/docs/configuration.html>`__
|
||||
and powerful `filenaming capabilities <https://gdl-org.github.io/docs/formatting.html>`__.
|
||||
|
||||
|
||||
|pypi| |build|
|
||||
@ -72,9 +72,9 @@ Standalone Executable
|
||||
Prebuilt executable files with a Python interpreter and
|
||||
required Python packages included are available for
|
||||
|
||||
- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.8/gallery-dl.exe>`__
|
||||
- `Windows <https://github.com/mikf/gallery-dl/releases/download/v1.26.9/gallery-dl.exe>`__
|
||||
(Requires `Microsoft Visual C++ Redistributable Package (x86) <https://aka.ms/vs/17/release/vc_redist.x86.exe>`__)
|
||||
- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.8/gallery-dl.bin>`__
|
||||
- `Linux <https://github.com/mikf/gallery-dl/releases/download/v1.26.9/gallery-dl.bin>`__
|
||||
|
||||
|
||||
Nightly Builds
|
||||
@ -234,7 +234,7 @@ Documentation
|
||||
-------------
|
||||
|
||||
A list of all available configuration options and their descriptions
|
||||
can be found in `<docs/configuration.rst>`__.
|
||||
can be found at `<https://gdl-org.github.io/docs/configuration.html>`__.
|
||||
|
||||
| For a default configuration file with available options set to their
|
||||
default values, see `<docs/gallery-dl.conf>`__.
|
||||
@ -330,7 +330,7 @@ CAPTCHA or similar, or has not been implemented yet, you can use the
|
||||
cookies from a browser login session and input them into *gallery-dl*.
|
||||
|
||||
This can be done via the
|
||||
`cookies <docs/configuration.rst#extractorcookies>`__
|
||||
`cookies <https://gdl-org.github.io/docs/configuration.html#extractor-cookies>`__
|
||||
option in your configuration file by specifying
|
||||
|
||||
- | the path to a Mozilla/Netscape format cookies.txt file exported by a browser addon
|
||||
|
20
docs/_layouts/default.html
Normal file
20
docs/_layouts/default.html
Normal file
@ -0,0 +1,20 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en-US">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
|
||||
{% seo %}
|
||||
|
||||
<link rel="stylesheet" href="{{ "/assets/css/style.css?v=" | append: site.github.build_revision | relative_url }}">
|
||||
<script src="links.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container-lg px-3 my-5 markdown-body">
|
||||
|
||||
{{ content }}
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@ -337,6 +337,15 @@ Description
|
||||
filename extension (``file.1.ext``, ``file.2.ext``, etc.)
|
||||
|
||||
|
||||
extractor.*.skip-filter
|
||||
-----------------------
|
||||
Type
|
||||
``string``
|
||||
Description
|
||||
Python expression controlling which skipped files to count towards
|
||||
``"abort"`` / ``"terminate"`` / ``"exit"``.
|
||||
|
||||
|
||||
extractor.*.sleep
|
||||
-----------------
|
||||
Type
|
||||
@ -358,12 +367,39 @@ Description
|
||||
i.e. before starting a new extractor.
|
||||
|
||||
|
||||
extractor.*.sleep-429
|
||||
---------------------
|
||||
Type
|
||||
|Duration|_
|
||||
Default
|
||||
``60``
|
||||
Description
|
||||
Number of seconds to sleep when receiving a `429 Too Many Requests`
|
||||
response before `retrying <extractor.*.retries_>`__ the request.
|
||||
|
||||
|
||||
extractor.*.sleep-request
|
||||
-------------------------
|
||||
Type
|
||||
|Duration|_
|
||||
Default
|
||||
``0``
|
||||
* ``"0.5-1.5"``
|
||||
``[Danbooru]``, ``[E621]``, ``[foolfuuka]:search``, ``itaku``,
|
||||
``newgrounds``, ``[philomena]``, ``pixiv:novel``, ``plurk``,
|
||||
``poipiku`` , ``pornpics``, ``soundgasm``, ``urlgalleries``,
|
||||
``vk``, ``zerochan``
|
||||
* ``"1.0-2.0"``
|
||||
``flickr``, ``weibo``, ``[wikimedia]``
|
||||
* ``"2.0-4.0"``
|
||||
``behance``, ``imagefap``, ``[Nijie]``
|
||||
* ``"3.0-6.0"``
|
||||
``exhentai``, ``idolcomplex``, ``[reactor]``, ``readcomiconline``
|
||||
* ``"6.0-6.1"``
|
||||
``twibooru``
|
||||
* ``"6.0-12.0"``
|
||||
``instagram``
|
||||
* ``0``
|
||||
otherwise
|
||||
Description
|
||||
Minimal time interval in seconds between each HTTP request
|
||||
during data extraction.
|
||||
@ -382,6 +418,7 @@ Description
|
||||
Specifying username and password is required for
|
||||
|
||||
* ``nijie``
|
||||
* ``horne``
|
||||
|
||||
and optional for
|
||||
|
||||
@ -389,8 +426,12 @@ Description
|
||||
* ``aryion``
|
||||
* ``atfbooru`` (*)
|
||||
* ``bluesky``
|
||||
* ``booruvar`` (*)
|
||||
* ``coomerparty``
|
||||
* ``danbooru`` (*)
|
||||
* ``deviantart``
|
||||
* ``e621`` (*)
|
||||
* ``e6ai`` (*)
|
||||
* ``e926`` (*)
|
||||
* ``exhentai``
|
||||
* ``idolcomplex``
|
||||
@ -401,7 +442,6 @@ Description
|
||||
* ``mangoxo``
|
||||
* ``pillowfort``
|
||||
* ``sankaku``
|
||||
* ``seisoparty``
|
||||
* ``subscribestar``
|
||||
* ``tapas``
|
||||
* ``tsumino``
|
||||
@ -417,7 +457,7 @@ Description
|
||||
the API key found in your user profile, not the actual account password.
|
||||
|
||||
Note: Leave the ``password`` value empty or undefined
|
||||
to get prompted for a passeword when performing a login
|
||||
to be prompted for a passeword when performing a login
|
||||
(see `getpass() <https://docs.python.org/3/library/getpass.html#getpass.getpass>`__).
|
||||
|
||||
|
||||
@ -557,8 +597,8 @@ extractor.*.browser
|
||||
Type
|
||||
``string``
|
||||
Default
|
||||
* ``"firefox"`` for ``patreon``, ``mangapark``, and ``mangasee``
|
||||
* ``null`` everywhere else
|
||||
* ``"firefox"``: ``artstation``, ``mangasee``, ``patreon``, ``pixiv:series``, ``twitter``
|
||||
* ``null``: otherwise
|
||||
Example
|
||||
* ``"chrome:macos"``
|
||||
Description
|
||||
@ -633,8 +673,8 @@ extractor.*.tls12
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
* ``true``
|
||||
* ``false`` for ``patreon``, ``pixiv:series``
|
||||
* ``false``: ``patreon``, ``pixiv:series``
|
||||
* ``true``: otherwise
|
||||
Description
|
||||
Allow selecting TLS 1.2 cipher suites.
|
||||
|
||||
@ -813,6 +853,22 @@ Description
|
||||
An alternative `format string`_ to build archive IDs with.
|
||||
|
||||
|
||||
extractor.*.archive-mode
|
||||
------------------------
|
||||
Type
|
||||
``string``
|
||||
Default
|
||||
``"file"``
|
||||
Description
|
||||
Controls when to write `archive IDs <extractor.*.archive-format_>`__
|
||||
to the archive database.
|
||||
|
||||
* ``"file"``: Write IDs immediately
|
||||
after completing or skipping a file download.
|
||||
* ``"memory"``: Keep IDs in memory
|
||||
and only write them after successful job completion.
|
||||
|
||||
|
||||
extractor.*.archive-prefix
|
||||
--------------------------
|
||||
Type
|
||||
@ -836,6 +892,65 @@ Description
|
||||
for available ``PRAGMA`` statements and further details.
|
||||
|
||||
|
||||
extractor.*.actions
|
||||
-------------------
|
||||
Type
|
||||
* ``object`` (`pattern` -> `action`)
|
||||
* ``list`` of ``lists`` with 2 ``strings`` as elements
|
||||
Example
|
||||
.. code:: json
|
||||
|
||||
{
|
||||
"error" : "status |= 1",
|
||||
"warning:(?i)unable to .+": "exit 127",
|
||||
"info:Logging in as .+" : "level = debug"
|
||||
}
|
||||
|
||||
.. code:: json
|
||||
|
||||
[
|
||||
["error" , "status |= 1" ],
|
||||
["warning:(?i)unable to .+", "exit 127" ],
|
||||
["info:Logging in as .+" , "level = debug"]
|
||||
]
|
||||
|
||||
Description
|
||||
Perform an ``action`` when logging a message matched by ``pattern``.
|
||||
|
||||
``pattern`` is parsed as severity level (``debug``, ``info``, ``warning``, ``error``, or integer value)
|
||||
followed by an optional `Python Regular Expression <https://docs.python.org/3/library/re.html#regular-expression-syntax>`__
|
||||
separated by a colon ``:``.
|
||||
Using ``*`` as `level` or leaving it empty
|
||||
matches logging messages of all levels
|
||||
(e.g. ``*:<re>`` or ``:<re>``).
|
||||
|
||||
``action`` is parsed as action type
|
||||
followed by (optional) arguments.
|
||||
|
||||
Supported Action Types:
|
||||
|
||||
``status``:
|
||||
| Modify job exit status.
|
||||
| Expected syntax is ``<operator> <value>`` (e.g. ``= 100``).
|
||||
|
||||
Supported operators are
|
||||
``=`` (assignment),
|
||||
``&`` (bitwise AND),
|
||||
``|`` (bitwise OR),
|
||||
``^`` (bitwise XOR).
|
||||
``level``:
|
||||
| Modify severity level of the current logging message.
|
||||
| Can be one of ``debug``, ``info``, ``warning``, ``error`` or an integer value.
|
||||
``print``
|
||||
Write argument to stdout.
|
||||
``restart``:
|
||||
Restart the current extractor run.
|
||||
``wait``:
|
||||
Stop execution until Enter is pressed.
|
||||
``exit``:
|
||||
Exit the program with the given argument as exit status.
|
||||
|
||||
|
||||
extractor.*.postprocessors
|
||||
--------------------------
|
||||
Type
|
||||
@ -1872,6 +1987,20 @@ Description
|
||||
from `linking your Flickr account to gallery-dl <OAuth_>`__.
|
||||
|
||||
|
||||
extractor.flickr.contexts
|
||||
-------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
For each photo, return the albums and pools it belongs to
|
||||
as ``set`` and ``pool`` metadata.
|
||||
|
||||
Note: This requires 1 additional API call per photo.
|
||||
See `flickr.photos.getAllContexts <https://www.flickr.com/services/api/flickr.photos.getAllContexts.html>`__ for details.
|
||||
|
||||
|
||||
extractor.flickr.exif
|
||||
---------------------
|
||||
Type
|
||||
@ -1879,9 +2008,11 @@ Type
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Fetch `exif` and `camera` metadata for each photo.
|
||||
For each photo, return its EXIF/TIFF/GPS tags
|
||||
as ``exif`` and ``camera`` metadata.
|
||||
|
||||
Note: This requires 1 additional API call per photo.
|
||||
See `flickr.photos.getExif <https://www.flickr.com/services/api/flickr.photos.getExif.html>`__ for details.
|
||||
|
||||
|
||||
extractor.flickr.metadata
|
||||
@ -1901,7 +2032,7 @@ Description
|
||||
|
||||
It is possible to specify a custom list of metadata includes.
|
||||
See `the extras parameter <https://www.flickr.com/services/api/flickr.people.getPhotos.html>`__
|
||||
in `Flickr API docs <https://www.flickr.com/services/api/>`__
|
||||
in `Flickr's API docs <https://www.flickr.com/services/api/>`__
|
||||
for possible field names.
|
||||
|
||||
|
||||
@ -2001,6 +2132,20 @@ Description
|
||||
page.
|
||||
|
||||
|
||||
extractor.gelbooru.favorite.order-posts
|
||||
---------------------------------------
|
||||
Type
|
||||
``string``
|
||||
Default
|
||||
``"desc"``
|
||||
Description
|
||||
Controls the order in which favorited posts are returned.
|
||||
|
||||
* ``"asc"``: Ascending favorite date order (oldest first)
|
||||
* ``"desc"``: Descending favorite date order (newest first)
|
||||
* ``"reverse"``: Same as ``"asc"``
|
||||
|
||||
|
||||
extractor.generic.enabled
|
||||
-------------------------
|
||||
Type
|
||||
@ -2287,6 +2432,16 @@ Description
|
||||
Extract a user's direct messages as ``dms`` metadata.
|
||||
|
||||
|
||||
extractor.kemonoparty.announcements
|
||||
-----------------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Extract a user's announcements as ``announcements`` metadata.
|
||||
|
||||
|
||||
extractor.kemonoparty.favorites
|
||||
-------------------------------
|
||||
Type
|
||||
@ -2346,6 +2501,22 @@ Description
|
||||
Note: This requires 1 additional HTTP request per post.
|
||||
|
||||
|
||||
extractor.kemonoparty.order-revisions
|
||||
-------------------------------------
|
||||
Type
|
||||
``string``
|
||||
Default
|
||||
``"desc"``
|
||||
Description
|
||||
Controls the order in which
|
||||
`revisions <extractor.kemonoparty.revisions_>`__
|
||||
are returned.
|
||||
|
||||
* ``"asc"``: Ascending order (oldest first)
|
||||
* ``"desc"``: Descending order (newest first)
|
||||
* ``"reverse"``: Same as ``"asc"``
|
||||
|
||||
|
||||
extractor.khinsider.format
|
||||
--------------------------
|
||||
Type
|
||||
@ -2470,6 +2641,16 @@ Description
|
||||
user IDs.
|
||||
|
||||
|
||||
extractor.[mastodon].cards
|
||||
--------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Fetch media from cards.
|
||||
|
||||
|
||||
extractor.[mastodon].reblogs
|
||||
----------------------------
|
||||
Type
|
||||
@ -2829,14 +3010,24 @@ Description
|
||||
`gppt <https://github.com/eggplants/get-pixivpy-token>`__.
|
||||
|
||||
|
||||
extractor.pixiv.embeds
|
||||
----------------------
|
||||
extractor.pixiv.novel.covers
|
||||
----------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Download images embedded in novels.
|
||||
Download cover images.
|
||||
|
||||
|
||||
extractor.pixiv.novel.embeds
|
||||
----------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Download embedded images.
|
||||
|
||||
|
||||
extractor.pixiv.novel.full-series
|
||||
@ -3286,7 +3477,7 @@ Examples
|
||||
* ``["jpeg", "webp"]``
|
||||
Description
|
||||
Only include assets that are in the specified file types. ``all`` can be
|
||||
used to specifiy all file types. Valid values are:
|
||||
used to specify all file types. Valid values are:
|
||||
|
||||
* Grids: ``png``, ``jpeg``, ``jpg``, ``webp``
|
||||
* Heroes: ``png``, ``jpeg``, ``jpg``, ``webp``
|
||||
@ -3326,7 +3517,7 @@ Examples
|
||||
* ``["fr", "it"]``
|
||||
Description
|
||||
Only include assets that are in the specified languages. ``all`` can be
|
||||
used to specifiy all languages. Valid values are `ISO 639-1 <https://en.wikipedia.org/wiki/ISO_639-1>`__
|
||||
used to specify all languages. Valid values are `ISO 639-1 <https://en.wikipedia.org/wiki/ISO_639-1>`__
|
||||
language codes.
|
||||
|
||||
|
||||
@ -3771,6 +3962,32 @@ Description
|
||||
* ``"wait"``: Wait until rate limit reset
|
||||
|
||||
|
||||
extractor.twitter.relogin
|
||||
-------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``true``
|
||||
Description
|
||||
| When receiving a "Could not authenticate you" error while logged in with
|
||||
`username & passeword <extractor.*.username & .password_>`__,
|
||||
| refresh the current login session and
|
||||
try to continue from where it left off.
|
||||
|
||||
|
||||
extractor.twitter.locked
|
||||
------------------------
|
||||
Type
|
||||
``string``
|
||||
Default
|
||||
``"abort"``
|
||||
Description
|
||||
Selects how to handle "account is temporarily locked" errors.
|
||||
|
||||
* ``"abort"``: Raise an error and stop extraction
|
||||
* ``"wait"``: Wait until the account is unlocked and retry
|
||||
|
||||
|
||||
extractor.twitter.replies
|
||||
-------------------------
|
||||
Type
|
||||
@ -3909,6 +4126,31 @@ Description
|
||||
``"raw"``, ``"full"``, ``"regular"``, ``"small"``, and ``"thumb"``.
|
||||
|
||||
|
||||
extractor.vipergirls.domain
|
||||
---------------------------
|
||||
Type
|
||||
``string``
|
||||
Default
|
||||
``"vipergirls.to"``
|
||||
Description
|
||||
Specifies the domain used by ``vipergirls`` extractors.
|
||||
|
||||
For example ``"viper.click"`` if the main domain is blocked or to bypass Cloudflare,
|
||||
|
||||
|
||||
extractor.vipergirls.like
|
||||
-------------------------
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
Description
|
||||
Automatically `like` posts after downloading their images.
|
||||
|
||||
Note: Requires `login <extractor.*.username & .password_>`__
|
||||
or `cookies <extractor.*.cookies_>`__
|
||||
|
||||
|
||||
extractor.vsco.videos
|
||||
---------------------
|
||||
Type
|
||||
@ -4039,7 +4281,7 @@ extractor.weibo.retweets
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``true``
|
||||
``false``
|
||||
Description
|
||||
Fetch media from retweeted posts.
|
||||
|
||||
@ -4714,10 +4956,33 @@ output.colors
|
||||
Type
|
||||
``object`` (`key` -> `ANSI color`)
|
||||
Default
|
||||
``{"success": "1;32", "skip": "2"}``
|
||||
.. code:: json
|
||||
|
||||
{
|
||||
"success": "1;32",
|
||||
"skip" : "2",
|
||||
"debug" : "0;37",
|
||||
"info" : "1;37",
|
||||
"warning": "1;33",
|
||||
"error" : "1;31"
|
||||
}
|
||||
|
||||
Description
|
||||
Controls the `ANSI colors <https://gist.github.com/fnky/458719343aabd01cfb17a3a4f7296797#colors--graphics-mode>`__
|
||||
used with |mode: color|__ for successfully downloaded or skipped files.
|
||||
Controls the
|
||||
`ANSI colors <https://gist.github.com/fnky/458719343aabd01cfb17a3a4f7296797#colors--graphics-mode>`__
|
||||
used for various outputs.
|
||||
|
||||
Output for |mode: color|__
|
||||
|
||||
* ``success``: successfully downloaded files
|
||||
* ``skip``: skipped files
|
||||
|
||||
Logging Messages:
|
||||
|
||||
* ``debug``: debug logging messages
|
||||
* ``info``: info logging messages
|
||||
* ``warning``: warning logging messages
|
||||
* ``error``: error logging messages
|
||||
|
||||
.. __: `output.mode`_
|
||||
|
||||
@ -4727,7 +4992,7 @@ output.ansi
|
||||
Type
|
||||
``bool``
|
||||
Default
|
||||
``false``
|
||||
``true``
|
||||
Description
|
||||
| On Windows, enable ANSI escape sequences and colored output
|
||||
| by setting the ``ENABLE_VIRTUAL_TERMINAL_PROCESSING`` flag for stdout and stderr.
|
||||
@ -5784,7 +6049,7 @@ How To
|
||||
* choose a name
|
||||
* select "installed app"
|
||||
* set ``http://localhost:6414/`` as "redirect uri"
|
||||
* solve the "I'm not a rebot" reCATCHA if needed
|
||||
* solve the "I'm not a robot" reCAPTCHA if needed
|
||||
* click "create app"
|
||||
|
||||
* copy the client id (third line, under your application's name and
|
||||
@ -5932,7 +6197,7 @@ Description
|
||||
|
||||
* format
|
||||
* General format string for logging messages
|
||||
or a dictionary with format strings for each loglevel.
|
||||
or an ``object`` with format strings for each loglevel.
|
||||
|
||||
In addition to the default
|
||||
`LogRecord attributes <https://docs.python.org/3/library/logging.html#logrecord-attributes>`__,
|
||||
|
44
docs/links.js
Normal file
44
docs/links.js
Normal file
@ -0,0 +1,44 @@
|
||||
"use strict";
|
||||
|
||||
|
||||
function add_header_links()
|
||||
{
|
||||
let style = document.createElement("style");
|
||||
style.id = "headerlinks"
|
||||
document.head.appendChild(style);
|
||||
style.sheet.insertRule(
|
||||
"a.headerlink {" +
|
||||
" visibility: hidden;" +
|
||||
" text-decoration: none;" +
|
||||
" font-size: 0.8em;" +
|
||||
" padding: 0 4px 0 4px;" +
|
||||
"}");
|
||||
style.sheet.insertRule(
|
||||
":hover > a.headerlink {" +
|
||||
" visibility: visible;" +
|
||||
"}");
|
||||
|
||||
let headers = document.querySelectorAll("h2, h3, h4, h5, h6");
|
||||
for (let i = 0, len = headers.length; i < len; ++i)
|
||||
{
|
||||
let header = headers[i];
|
||||
|
||||
let id = header.id || header.parentNode.id;
|
||||
if (!id)
|
||||
continue;
|
||||
|
||||
let link = document.createElement("a");
|
||||
link.href = "#" + id;
|
||||
link.className = "headerlink";
|
||||
link.textContent = "¶";
|
||||
|
||||
header.appendChild(link);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (document.readyState !== "loading") {
|
||||
add_header_links();
|
||||
} else {
|
||||
document.addEventListener("DOMContentLoaded", add_header_links);
|
||||
}
|
@ -29,6 +29,7 @@
|
||||
|
||||
## Output Options:
|
||||
-q, --quiet Activate quiet mode
|
||||
-w, --warning Print only warnings and errors
|
||||
-v, --verbose Print various debugging information
|
||||
-g, --get-urls Print URLs instead of downloading
|
||||
-G, --resolve-urls Print URLs instead of downloading; resolve
|
||||
@ -48,12 +49,12 @@
|
||||
extractors but cannot be handled, to FILE
|
||||
--write-pages Write downloaded intermediary pages to files in
|
||||
the current directory to debug problems
|
||||
--no-colors Do not emit ANSI color codes in output
|
||||
|
||||
## Downloader Options:
|
||||
-r, --limit-rate RATE Maximum download rate (e.g. 500k or 2.5M)
|
||||
-R, --retries N Maximum number of retries for failed HTTP
|
||||
requests or -1 for infinite retries (default:
|
||||
4)
|
||||
requests or -1 for infinite retries (default: 4)
|
||||
--http-timeout SECONDS Timeout for HTTP connections (default: 30.0)
|
||||
--sleep SECONDS Number of seconds to wait before each download.
|
||||
This can be either a constant value or a range
|
||||
|
@ -790,7 +790,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<tr>
|
||||
<td>Skeb</td>
|
||||
<td>https://skeb.jp/</td>
|
||||
<td>Followed Users, Posts, Search Results, User Profiles</td>
|
||||
<td>Followed Creators, Followed Users, Posts, Search Results, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -838,7 +838,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<tr>
|
||||
<td>Tapas</td>
|
||||
<td>https://tapas.io/</td>
|
||||
<td>Episodes, Series</td>
|
||||
<td>Creators, Episodes, Series</td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -898,7 +898,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<tr>
|
||||
<td>Twitter</td>
|
||||
<td>https://twitter.com/</td>
|
||||
<td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followed Users, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Search Results, Timelines, Tweets, User Profiles</td>
|
||||
<td>Avatars, Backgrounds, Bookmarks, Communities, Events, Followed Users, Hashtags, individual Images, Likes, Lists, List Members, Media Timelines, Quotes, Search Results, Timelines, Tweets, User Profiles</td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
<tr>
|
||||
@ -940,14 +940,14 @@ Consider all listed sites to potentially be NSFW.
|
||||
<tr>
|
||||
<td>VSCO</td>
|
||||
<td>https://vsco.co/</td>
|
||||
<td>Collections, individual Images, Spaces, User Profiles</td>
|
||||
<td>Avatars, Collections, individual Images, Spaces, User Profiles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Wallhaven</td>
|
||||
<td>https://wallhaven.cc/</td>
|
||||
<td>Collections, individual Images, Search Results, User Profiles</td>
|
||||
<td><a href="configuration.rst#extractorwallhavenapi-key">API Key</a></td>
|
||||
<td><a href="https://gdl-org.github.io/docs/configuration.html#extractor-wallhaven-api-key">API Key</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Wallpaper Cave</td>
|
||||
@ -965,7 +965,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Weasyl</td>
|
||||
<td>https://www.weasyl.com/</td>
|
||||
<td>Favorites, Folders, Journals, Submissions</td>
|
||||
<td><a href="configuration.rst#extractorweasylapi-key">API Key</a></td>
|
||||
<td><a href="https://gdl-org.github.io/docs/configuration.html#extractor-weasyl-api-key">API Key</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>webmshare</td>
|
||||
@ -1103,7 +1103,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Booruvar</td>
|
||||
<td>https://booru.borvar.art/</td>
|
||||
<td>Pools, Popular Images, Posts, Tag Searches</td>
|
||||
<td></td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
@ -1125,7 +1125,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>e6AI</td>
|
||||
<td>https://e6ai.net/</td>
|
||||
<td>Favorites, Pools, Popular Images, Posts, Tag Searches</td>
|
||||
<td></td>
|
||||
<td>Supported</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
@ -1319,7 +1319,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Derpibooru</td>
|
||||
<td>https://derpibooru.org/</td>
|
||||
<td>Galleries, Posts, Search Results</td>
|
||||
<td><a href="configuration.rst#extractorderpibooruapi-key">API Key</a></td>
|
||||
<td><a href="https://gdl-org.github.io/docs/configuration.html#extractor-derpibooru-api-key">API Key</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Ponybooru</td>
|
||||
@ -1331,7 +1331,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Furbooru</td>
|
||||
<td>https://furbooru.org/</td>
|
||||
<td>Galleries, Posts, Search Results</td>
|
||||
<td></td>
|
||||
<td>API Key</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
@ -1499,6 +1499,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Articles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>wiki.gg</td>
|
||||
<td>https://www.wiki.gg/</td>
|
||||
<td>Articles</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Super Mario Wiki</td>
|
||||
<td>https://www.mariowiki.com/</td>
|
||||
@ -1616,19 +1622,19 @@ Consider all listed sites to potentially be NSFW.
|
||||
<tr>
|
||||
<td>mastodon.social</td>
|
||||
<td>https://mastodon.social/</td>
|
||||
<td>Bookmarks, Followed Users, Images from Statuses, User Profiles</td>
|
||||
<td>Bookmarks, Favorites, Followed Users, Hashtags, Lists, Images from Statuses, User Profiles</td>
|
||||
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Pawoo</td>
|
||||
<td>https://pawoo.net/</td>
|
||||
<td>Bookmarks, Followed Users, Images from Statuses, User Profiles</td>
|
||||
<td>Bookmarks, Favorites, Followed Users, Hashtags, Lists, Images from Statuses, User Profiles</td>
|
||||
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>baraag</td>
|
||||
<td>https://baraag.net/</td>
|
||||
<td>Bookmarks, Followed Users, Images from Statuses, User Profiles</td>
|
||||
<td>Bookmarks, Favorites, Followed Users, Hashtags, Lists, Images from Statuses, User Profiles</td>
|
||||
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
|
||||
</tr>
|
||||
|
||||
|
@ -38,6 +38,11 @@ def main():
|
||||
except ImportError:
|
||||
import toml
|
||||
config.load(args.configs_toml, strict=True, loads=toml.loads)
|
||||
if not args.colors:
|
||||
output.ANSI = False
|
||||
config.set((), "colors", False)
|
||||
if util.WINDOWS:
|
||||
config.set(("output",), "ansi", False)
|
||||
if args.filename:
|
||||
filename = args.filename
|
||||
if filename == "/O":
|
||||
@ -86,7 +91,7 @@ def main():
|
||||
signal.signal(signal_num, signal.SIG_IGN)
|
||||
|
||||
# enable ANSI escape sequences on Windows
|
||||
if util.WINDOWS and config.get(("output",), "ansi"):
|
||||
if util.WINDOWS and config.get(("output",), "ansi", output.COLORS):
|
||||
from ctypes import windll, wintypes, byref
|
||||
kernel32 = windll.kernel32
|
||||
mode = wintypes.DWORD()
|
||||
@ -113,7 +118,7 @@ def main():
|
||||
|
||||
# loglevels
|
||||
output.configure_logging(args.loglevel)
|
||||
if args.loglevel >= logging.ERROR:
|
||||
if args.loglevel >= logging.WARNING:
|
||||
config.set(("output",), "mode", "null")
|
||||
config.set(("downloader",), "progress", None)
|
||||
elif args.loglevel <= logging.DEBUG:
|
||||
|
98
gallery_dl/archive.py
Normal file
98
gallery_dl/archive.py
Normal file
@ -0,0 +1,98 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Download Archives"""
|
||||
|
||||
import os
|
||||
import sqlite3
|
||||
from . import formatter
|
||||
|
||||
|
||||
class DownloadArchive():
|
||||
|
||||
def __init__(self, path, format_string, pragma=None,
|
||||
cache_key="_archive_key"):
|
||||
try:
|
||||
con = sqlite3.connect(path, timeout=60, check_same_thread=False)
|
||||
except sqlite3.OperationalError:
|
||||
os.makedirs(os.path.dirname(path))
|
||||
con = sqlite3.connect(path, timeout=60, check_same_thread=False)
|
||||
con.isolation_level = None
|
||||
|
||||
self.keygen = formatter.parse(format_string).format_map
|
||||
self.connection = con
|
||||
self.close = con.close
|
||||
self.cursor = cursor = con.cursor()
|
||||
self._cache_key = cache_key
|
||||
|
||||
if pragma:
|
||||
for stmt in pragma:
|
||||
cursor.execute("PRAGMA " + stmt)
|
||||
|
||||
try:
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS archive "
|
||||
"(entry TEXT PRIMARY KEY) WITHOUT ROWID")
|
||||
except sqlite3.OperationalError:
|
||||
# fallback for missing WITHOUT ROWID support (#553)
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS archive "
|
||||
"(entry TEXT PRIMARY KEY)")
|
||||
|
||||
def add(self, kwdict):
|
||||
"""Add item described by 'kwdict' to archive"""
|
||||
key = kwdict.get(self._cache_key) or self.keygen(kwdict)
|
||||
self.cursor.execute(
|
||||
"INSERT OR IGNORE INTO archive (entry) VALUES (?)", (key,))
|
||||
|
||||
def check(self, kwdict):
|
||||
"""Return True if the item described by 'kwdict' exists in archive"""
|
||||
key = kwdict[self._cache_key] = self.keygen(kwdict)
|
||||
self.cursor.execute(
|
||||
"SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
|
||||
return self.cursor.fetchone()
|
||||
|
||||
def finalize(self):
|
||||
pass
|
||||
|
||||
|
||||
class DownloadArchiveMemory(DownloadArchive):
|
||||
|
||||
def __init__(self, path, format_string, pragma=None,
|
||||
cache_key="_archive_key"):
|
||||
DownloadArchive.__init__(self, path, format_string, pragma, cache_key)
|
||||
self.keys = set()
|
||||
|
||||
def add(self, kwdict):
|
||||
self.keys.add(
|
||||
kwdict.get(self._cache_key) or
|
||||
self.keygen(kwdict))
|
||||
|
||||
def check(self, kwdict):
|
||||
key = kwdict[self._cache_key] = self.keygen(kwdict)
|
||||
if key in self.keys:
|
||||
return True
|
||||
self.cursor.execute(
|
||||
"SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
|
||||
return self.cursor.fetchone()
|
||||
|
||||
def finalize(self):
|
||||
if not self.keys:
|
||||
return
|
||||
|
||||
cursor = self.cursor
|
||||
with self.connection:
|
||||
try:
|
||||
cursor.execute("BEGIN")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
|
||||
stmt = "INSERT OR IGNORE INTO archive (entry) VALUES (?)"
|
||||
if len(self.keys) < 100:
|
||||
for key in self.keys:
|
||||
cursor.execute(stmt, (key,))
|
||||
else:
|
||||
cursor.executemany(stmt, ((key,) for key in self.keys))
|
@ -10,7 +10,6 @@
|
||||
# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/cookies.py
|
||||
|
||||
import binascii
|
||||
import contextlib
|
||||
import ctypes
|
||||
import logging
|
||||
import os
|
||||
@ -147,7 +146,8 @@ def load_cookies_chrome(cookiejar, browser_name, profile=None,
|
||||
set_cookie(Cookie(
|
||||
0, name, value, None, False,
|
||||
domain, bool(domain), domain.startswith("."),
|
||||
path, bool(path), secure, expires, False, None, None, {},
|
||||
path, bool(path), secure, expires or None, False,
|
||||
None, None, {},
|
||||
))
|
||||
|
||||
if failed_cookies > 0:
|
||||
@ -682,7 +682,8 @@ def _get_gnome_keyring_password(browser_keyring_name):
|
||||
# lists all keys and presumably searches for its key in the list.
|
||||
# It appears that we must do the same.
|
||||
# https://github.com/jaraco/keyring/issues/556
|
||||
with contextlib.closing(secretstorage.dbus_init()) as con:
|
||||
con = secretstorage.dbus_init()
|
||||
try:
|
||||
col = secretstorage.get_default_collection(con)
|
||||
label = browser_keyring_name + " Safe Storage"
|
||||
for item in col.get_all_items():
|
||||
@ -691,6 +692,8 @@ def _get_gnome_keyring_password(browser_keyring_name):
|
||||
else:
|
||||
_log_error("Failed to read from GNOME keyring")
|
||||
return b""
|
||||
finally:
|
||||
con.close()
|
||||
|
||||
|
||||
def _get_linux_keyring_password(browser_keyring_name, keyring):
|
||||
@ -857,7 +860,7 @@ class DatabaseConnection():
|
||||
|
||||
|
||||
def Popen_communicate(*args):
|
||||
proc = subprocess.Popen(
|
||||
proc = util.Popen(
|
||||
args, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
|
||||
try:
|
||||
stdout, stderr = proc.communicate()
|
||||
@ -999,6 +1002,12 @@ def _decrypt_windows_dpapi(ciphertext):
|
||||
|
||||
|
||||
def _find_most_recently_used_file(root, filename):
|
||||
# if the provided root points to an exact profile path
|
||||
# check if it contains the wanted filename
|
||||
first_choice = os.path.join(root, filename)
|
||||
if os.path.exists(first_choice):
|
||||
return first_choice
|
||||
|
||||
# if there are multiple browser profiles, take the most recently used one
|
||||
paths = []
|
||||
for curr_root, dirs, files in os.walk(root):
|
||||
|
@ -98,6 +98,8 @@ class HttpDownloader(DownloaderBase):
|
||||
|
||||
metadata = self.metadata
|
||||
kwdict = pathfmt.kwdict
|
||||
expected_status = kwdict.get(
|
||||
"_http_expected_status", ())
|
||||
adjust_extension = kwdict.get(
|
||||
"_http_adjust_extension", self.adjust_extension)
|
||||
|
||||
@ -151,7 +153,7 @@ class HttpDownloader(DownloaderBase):
|
||||
|
||||
# check response
|
||||
code = response.status_code
|
||||
if code == 200: # OK
|
||||
if code == 200 or code in expected_status: # OK
|
||||
offset = 0
|
||||
size = response.headers.get("Content-Length")
|
||||
elif code == 206: # Partial Content
|
||||
@ -399,6 +401,9 @@ MIME_TYPES = {
|
||||
"video/webm": "webm",
|
||||
"video/ogg" : "ogg",
|
||||
"video/mp4" : "mp4",
|
||||
"video/m4v" : "m4v",
|
||||
"video/x-m4v": "m4v",
|
||||
"video/quicktime": "mov",
|
||||
|
||||
"audio/wav" : "wav",
|
||||
"audio/x-wav": "wav",
|
||||
@ -440,7 +445,9 @@ SIGNATURE_CHECKS = {
|
||||
"cur" : lambda s: s[0:4] == b"\x00\x00\x02\x00",
|
||||
"psd" : lambda s: s[0:4] == b"8BPS",
|
||||
"mp4" : lambda s: (s[4:8] == b"ftyp" and s[8:11] in (
|
||||
b"mp4", b"avc", b"iso", b"M4V")),
|
||||
b"mp4", b"avc", b"iso")),
|
||||
"m4v" : lambda s: s[4:11] == b"ftypM4V",
|
||||
"mov" : lambda s: s[4:12] == b"ftypqt ",
|
||||
"webm": lambda s: s[0:4] == b"\x1A\x45\xDF\xA3",
|
||||
"ogg" : lambda s: s[0:4] == b"OggS",
|
||||
"wav" : lambda s: (s[0:4] == b"RIFF" and
|
||||
|
@ -26,6 +26,9 @@ class _8chanExtractor(Extractor):
|
||||
self.root = "https://8chan." + match.group(1)
|
||||
Extractor.__init__(self, match)
|
||||
|
||||
def _init(self):
|
||||
self.cookies.set("TOS", "1", domain=self.root.rpartition("/")[2])
|
||||
|
||||
@memcache()
|
||||
def cookies_prepare(self):
|
||||
# fetch captcha cookies
|
||||
|
@ -40,6 +40,7 @@ class BlueskyExtractor(Extractor):
|
||||
|
||||
self.api = BlueskyAPI(self)
|
||||
self._user = self._user_did = None
|
||||
self.instance = self.root.partition("://")[2]
|
||||
|
||||
def items(self):
|
||||
for post in self.posts():
|
||||
@ -81,6 +82,7 @@ class BlueskyExtractor(Extractor):
|
||||
if self._metadata_user:
|
||||
post["user"] = self._user or post["author"]
|
||||
|
||||
post["instance"] = self.instance
|
||||
post["post_id"] = pid
|
||||
post["count"] = len(images)
|
||||
post["date"] = text.parse_datetime(
|
||||
@ -315,7 +317,7 @@ class BlueskyAPI():
|
||||
def get_author_feed(self, actor, filter="posts_and_author_threads"):
|
||||
endpoint = "app.bsky.feed.getAuthorFeed"
|
||||
params = {
|
||||
"actor" : self._did_from_actor(actor),
|
||||
"actor" : self._did_from_actor(actor, True),
|
||||
"filter": filter,
|
||||
"limit" : "100",
|
||||
}
|
||||
@ -325,7 +327,7 @@ class BlueskyAPI():
|
||||
endpoint = "app.bsky.feed.getFeed"
|
||||
params = {
|
||||
"feed" : "at://{}/app.bsky.feed.generator/{}".format(
|
||||
self._did_from_actor(actor, False), feed),
|
||||
self._did_from_actor(actor), feed),
|
||||
"limit": "100",
|
||||
}
|
||||
return self._pagination(endpoint, params)
|
||||
@ -342,7 +344,7 @@ class BlueskyAPI():
|
||||
endpoint = "app.bsky.feed.getListFeed"
|
||||
params = {
|
||||
"list" : "at://{}/app.bsky.graph.list/{}".format(
|
||||
self._did_from_actor(actor, False), list),
|
||||
self._did_from_actor(actor), list),
|
||||
"limit": "100",
|
||||
}
|
||||
return self._pagination(endpoint, params)
|
||||
@ -389,7 +391,7 @@ class BlueskyAPI():
|
||||
}
|
||||
return self._pagination(endpoint, params, "posts")
|
||||
|
||||
def _did_from_actor(self, actor, user_did=True):
|
||||
def _did_from_actor(self, actor, user_did=False):
|
||||
if actor.startswith("did:"):
|
||||
did = actor
|
||||
else:
|
||||
|
@ -54,7 +54,6 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
|
||||
"album_id" : self.album_id,
|
||||
"album_name" : text.unescape(info[0]),
|
||||
"album_size" : size[1:-1],
|
||||
"description": text.unescape(info[2]) if len(info) > 2 else "",
|
||||
"count" : len(urls),
|
||||
}
|
||||
|
||||
|
@ -14,6 +14,7 @@ import ssl
|
||||
import time
|
||||
import netrc
|
||||
import queue
|
||||
import getpass
|
||||
import logging
|
||||
import datetime
|
||||
import requests
|
||||
@ -21,6 +22,7 @@ import threading
|
||||
from requests.adapters import HTTPAdapter
|
||||
from .message import Message
|
||||
from .. import config, text, util, cache, exception
|
||||
urllib3 = requests.packages.urllib3
|
||||
|
||||
|
||||
class Extractor():
|
||||
@ -45,6 +47,8 @@ class Extractor():
|
||||
def __init__(self, match):
|
||||
self.log = logging.getLogger(self.category)
|
||||
self.url = match.string
|
||||
self.match = match
|
||||
self.groups = match.groups()
|
||||
self._cfgpath = ("extractor", self.category, self.subcategory)
|
||||
self._parentdir = ""
|
||||
|
||||
@ -168,22 +172,25 @@ class Extractor():
|
||||
requests.exceptions.ChunkedEncodingError,
|
||||
requests.exceptions.ContentDecodingError) as exc:
|
||||
msg = exc
|
||||
code = 0
|
||||
except (requests.exceptions.RequestException) as exc:
|
||||
raise exception.HttpError(exc)
|
||||
else:
|
||||
code = response.status_code
|
||||
if self._write_pages:
|
||||
self._dump_response(response)
|
||||
if 200 <= code < 400 or fatal is None and \
|
||||
(400 <= code < 500) or not fatal and \
|
||||
(400 <= code < 429 or 431 <= code < 500):
|
||||
if (
|
||||
code < 400 or
|
||||
code < 500 and (not fatal and code != 429 or fatal is None)
|
||||
):
|
||||
if encoding:
|
||||
response.encoding = encoding
|
||||
return response
|
||||
if notfound and code == 404:
|
||||
raise exception.NotFoundError(notfound)
|
||||
|
||||
msg = "'{} {}' for '{}'".format(code, response.reason, url)
|
||||
msg = "'{} {}' for '{}'".format(
|
||||
code, response.reason, response.url)
|
||||
server = response.headers.get("Server")
|
||||
if server and server.startswith("cloudflare") and \
|
||||
code in (403, 503):
|
||||
@ -194,7 +201,10 @@ class Extractor():
|
||||
if b'name="captcha-bypass"' in content:
|
||||
self.log.warning("Cloudflare CAPTCHA")
|
||||
break
|
||||
if code not in retry_codes and code < 500:
|
||||
|
||||
if code == 429 and self._interval_429:
|
||||
pass
|
||||
elif code not in retry_codes and code < 500:
|
||||
break
|
||||
|
||||
finally:
|
||||
@ -203,15 +213,25 @@ class Extractor():
|
||||
self.log.debug("%s (%s/%s)", msg, tries, retries+1)
|
||||
if tries > retries:
|
||||
break
|
||||
self.sleep(
|
||||
max(tries, self._interval()) if self._interval else tries,
|
||||
"retry")
|
||||
|
||||
seconds = tries
|
||||
if self._interval:
|
||||
s = self._interval()
|
||||
if seconds < s:
|
||||
seconds = s
|
||||
if code == 429 and self._interval_429:
|
||||
s = self._interval_429()
|
||||
if seconds < s:
|
||||
seconds = s
|
||||
self.wait(seconds=seconds, reason="429 Too Many Requests")
|
||||
else:
|
||||
self.sleep(seconds, "retry")
|
||||
tries += 1
|
||||
|
||||
raise exception.HttpError(msg, response)
|
||||
|
||||
def wait(self, seconds=None, until=None, adjust=1.0,
|
||||
reason="rate limit reset"):
|
||||
reason="rate limit"):
|
||||
now = time.time()
|
||||
|
||||
if seconds:
|
||||
@ -234,7 +254,7 @@ class Extractor():
|
||||
if reason:
|
||||
t = datetime.datetime.fromtimestamp(until).time()
|
||||
isotime = "{:02}:{:02}:{:02}".format(t.hour, t.minute, t.second)
|
||||
self.log.info("Waiting until %s for %s.", isotime, reason)
|
||||
self.log.info("Waiting until %s (%s)", isotime, reason)
|
||||
time.sleep(seconds)
|
||||
|
||||
def sleep(self, seconds, reason):
|
||||
@ -242,6 +262,15 @@ class Extractor():
|
||||
seconds, reason)
|
||||
time.sleep(seconds)
|
||||
|
||||
def input(self, prompt, echo=True):
|
||||
if echo:
|
||||
try:
|
||||
return input(prompt)
|
||||
except (EOFError, OSError):
|
||||
return None
|
||||
else:
|
||||
return getpass.getpass(prompt)
|
||||
|
||||
def _get_auth_info(self):
|
||||
"""Return authentication information as (username, password) tuple"""
|
||||
username = self.config("username")
|
||||
@ -274,6 +303,9 @@ class Extractor():
|
||||
self.config("sleep-request", self.request_interval),
|
||||
self.request_interval_min,
|
||||
)
|
||||
self._interval_429 = util.build_duration_func(
|
||||
self.config("sleep-429", 60),
|
||||
)
|
||||
|
||||
if self._retries < 0:
|
||||
self._retries = float("inf")
|
||||
@ -433,9 +465,11 @@ class Extractor():
|
||||
if not path:
|
||||
return
|
||||
|
||||
path_tmp = path + ".tmp"
|
||||
try:
|
||||
with open(path, "w") as fp:
|
||||
with open(path_tmp, "w") as fp:
|
||||
util.cookiestxt_store(fp, self.cookies)
|
||||
os.replace(path_tmp, path)
|
||||
except OSError as exc:
|
||||
self.log.warning("cookies: %s", exc)
|
||||
|
||||
@ -593,7 +627,7 @@ class GalleryExtractor(Extractor):
|
||||
|
||||
def __init__(self, match, url=None):
|
||||
Extractor.__init__(self, match)
|
||||
self.gallery_url = self.root + match.group(1) if url is None else url
|
||||
self.gallery_url = self.root + self.groups[0] if url is None else url
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
@ -668,7 +702,7 @@ class MangaExtractor(Extractor):
|
||||
|
||||
def __init__(self, match, url=None):
|
||||
Extractor.__init__(self, match)
|
||||
self.manga_url = url or self.root + match.group(1)
|
||||
self.manga_url = self.root + self.groups[0] if url is None else url
|
||||
|
||||
if self.config("chapter-reverse", False):
|
||||
self.reverse = not self.reverse
|
||||
@ -730,17 +764,18 @@ class BaseExtractor(Extractor):
|
||||
instances = ()
|
||||
|
||||
def __init__(self, match):
|
||||
if not self.category:
|
||||
self._init_category(match)
|
||||
Extractor.__init__(self, match)
|
||||
if not self.category:
|
||||
self._init_category()
|
||||
self._cfgpath = ("extractor", self.category, self.subcategory)
|
||||
|
||||
def _init_category(self, match):
|
||||
for index, group in enumerate(match.groups()):
|
||||
def _init_category(self):
|
||||
for index, group in enumerate(self.groups):
|
||||
if group is not None:
|
||||
if index:
|
||||
self.category, self.root, info = self.instances[index-1]
|
||||
if not self.root:
|
||||
self.root = text.root_from_url(match.group(0))
|
||||
self.root = text.root_from_url(self.match.group(0))
|
||||
self.config_instance = info.get
|
||||
else:
|
||||
self.root = group
|
||||
@ -800,12 +835,9 @@ def _build_requests_adapter(ssl_options, ssl_ciphers, source_address):
|
||||
pass
|
||||
|
||||
if ssl_options or ssl_ciphers:
|
||||
ssl_context = ssl.create_default_context()
|
||||
if ssl_options:
|
||||
ssl_context.options |= ssl_options
|
||||
if ssl_ciphers:
|
||||
ssl_context.set_ecdh_curve("prime256v1")
|
||||
ssl_context.set_ciphers(ssl_ciphers)
|
||||
ssl_context = urllib3.connection.create_urllib3_context(
|
||||
options=ssl_options or None, ciphers=ssl_ciphers)
|
||||
ssl_context.check_hostname = False
|
||||
else:
|
||||
ssl_context = None
|
||||
|
||||
@ -925,8 +957,6 @@ SSL_CIPHERS = {
|
||||
}
|
||||
|
||||
|
||||
urllib3 = requests.packages.urllib3
|
||||
|
||||
# detect brotli support
|
||||
try:
|
||||
BROTLI = urllib3.response.brotli is not None
|
||||
|
@ -18,12 +18,12 @@ import binascii
|
||||
import time
|
||||
import re
|
||||
|
||||
|
||||
BASE_PATTERN = (
|
||||
r"(?:https?://)?(?:"
|
||||
r"(?:www\.)?(?:fx)?deviantart\.com/(?!watch/)([\w-]+)|"
|
||||
r"(?!www\.)([\w-]+)\.(?:fx)?deviantart\.com)"
|
||||
)
|
||||
DEFAULT_AVATAR = "https://a.deviantart.net/avatars/default.gif"
|
||||
|
||||
|
||||
class DeviantartExtractor(Extractor):
|
||||
@ -84,6 +84,16 @@ class DeviantartExtractor(Extractor):
|
||||
else:
|
||||
self.commit_journal = None
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
if "fatal" not in kwargs:
|
||||
kwargs["fatal"] = False
|
||||
while True:
|
||||
response = Extractor.request(self, url, **kwargs)
|
||||
if response.status_code != 403 or \
|
||||
b"Request blocked." not in response.content:
|
||||
return response
|
||||
self.wait(seconds=300, reason="CloudFront block")
|
||||
|
||||
def skip(self, num):
|
||||
self.offset += num
|
||||
return num
|
||||
@ -177,6 +187,10 @@ class DeviantartExtractor(Extractor):
|
||||
for comment in deviation["comments"]:
|
||||
user = comment["user"]
|
||||
name = user["username"].lower()
|
||||
if user["usericon"] == DEFAULT_AVATAR:
|
||||
self.log.debug(
|
||||
"Skipping avatar of '%s' (default)", name)
|
||||
continue
|
||||
_user_details.update(name, user)
|
||||
|
||||
url = "{}/{}/avatar/".format(self.root, name)
|
||||
@ -209,7 +223,9 @@ class DeviantartExtractor(Extractor):
|
||||
"""Adjust the contents of a Deviation-object"""
|
||||
if "index" not in deviation:
|
||||
try:
|
||||
if deviation["url"].startswith("https://sta.sh"):
|
||||
if deviation["url"].startswith((
|
||||
"https://www.deviantart.com/stash/", "https://sta.sh",
|
||||
)):
|
||||
filename = deviation["content"]["src"].split("/")[5]
|
||||
deviation["index_base36"] = filename.partition("-")[0][1:]
|
||||
deviation["index"] = id_from_base36(
|
||||
@ -456,18 +472,12 @@ class DeviantartExtractor(Extractor):
|
||||
|
||||
def _limited_request(self, url, **kwargs):
|
||||
"""Limits HTTP requests to one every 2 seconds"""
|
||||
kwargs["fatal"] = None
|
||||
diff = time.time() - DeviantartExtractor._last_request
|
||||
if diff < 2.0:
|
||||
self.sleep(2.0 - diff, "request")
|
||||
|
||||
while True:
|
||||
response = self.request(url, **kwargs)
|
||||
if response.status_code != 403 or \
|
||||
b"Request blocked." not in response.content:
|
||||
DeviantartExtractor._last_request = time.time()
|
||||
return response
|
||||
self.wait(seconds=180)
|
||||
response = self.request(url, **kwargs)
|
||||
DeviantartExtractor._last_request = time.time()
|
||||
return response
|
||||
|
||||
def _fetch_premium(self, deviation):
|
||||
try:
|
||||
@ -585,7 +595,13 @@ class DeviantartAvatarExtractor(DeviantartExtractor):
|
||||
return ()
|
||||
|
||||
icon = user["usericon"]
|
||||
index = icon.rpartition("?")[2]
|
||||
if icon == DEFAULT_AVATAR:
|
||||
self.log.debug("Skipping avatar of '%s' (default)", name)
|
||||
return ()
|
||||
|
||||
_, sep, index = icon.rpartition("?")
|
||||
if not sep:
|
||||
index = "0"
|
||||
|
||||
formats = self.config("formats")
|
||||
if not formats:
|
||||
@ -668,7 +684,8 @@ class DeviantartStashExtractor(DeviantartExtractor):
|
||||
"""Extractor for sta.sh-ed deviations"""
|
||||
subcategory = "stash"
|
||||
archive_fmt = "{index}.{extension}"
|
||||
pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
|
||||
pattern = (r"(?:https?://)?(?:(?:www\.)?deviantart\.com/stash|sta\.sh)"
|
||||
r"/([a-z0-9]+)")
|
||||
example = "https://sta.sh/abcde"
|
||||
|
||||
skip = Extractor.skip
|
||||
@ -689,7 +706,7 @@ class DeviantartStashExtractor(DeviantartExtractor):
|
||||
if uuid:
|
||||
deviation = self.api.deviation(uuid)
|
||||
deviation["index"] = text.parse_int(text.extr(
|
||||
page, 'gmi-deviationid="', '"'))
|
||||
page, '\\"deviationId\\":', ','))
|
||||
yield deviation
|
||||
return
|
||||
|
||||
@ -1405,9 +1422,14 @@ class DeviantartOAuthAPI():
|
||||
self.authenticate(None if public else self.refresh_token_key)
|
||||
kwargs["headers"] = self.headers
|
||||
response = self.extractor.request(url, **kwargs)
|
||||
data = response.json()
|
||||
status = response.status_code
|
||||
|
||||
try:
|
||||
data = response.json()
|
||||
except ValueError:
|
||||
self.log.error("Unable to parse API response")
|
||||
data = {}
|
||||
|
||||
status = response.status_code
|
||||
if 200 <= status < 400:
|
||||
if self.delay > self.delay_min:
|
||||
self.delay -= 1
|
||||
@ -1435,9 +1457,8 @@ class DeviantartOAuthAPI():
|
||||
self.log.info(
|
||||
"Register your own OAuth application and use its "
|
||||
"credentials to prevent this error: "
|
||||
"https://github.com/mikf/gallery-dl/blob/master/do"
|
||||
"cs/configuration.rst#extractordeviantartclient-id"
|
||||
"--client-secret")
|
||||
"https://gdl-org.github.io/docs/configuration.html"
|
||||
"#extractor-deviantart-client-id-client-secret")
|
||||
else:
|
||||
if log:
|
||||
self.log.error(msg)
|
||||
|
@ -50,7 +50,7 @@ class ExhentaiExtractor(Extractor):
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
response = Extractor.request(self, url, **kwargs)
|
||||
if response.history and response.headers.get("Content-Length") == "0":
|
||||
if "Cache-Control" not in response.headers and not response.content:
|
||||
self.log.info("blank page")
|
||||
raise exception.AuthorizationError()
|
||||
return response
|
||||
@ -95,7 +95,11 @@ class ExhentaiExtractor(Extractor):
|
||||
self.cookies.clear()
|
||||
|
||||
response = self.request(url, method="POST", headers=headers, data=data)
|
||||
if b"You are now logged in as:" not in response.content:
|
||||
content = response.content
|
||||
if b"You are now logged in as:" not in content:
|
||||
if b"The captcha was not entered correctly" in content:
|
||||
raise exception.AuthenticationError(
|
||||
"CAPTCHA required. Use cookies instead.")
|
||||
raise exception.AuthenticationError()
|
||||
|
||||
# collect more cookies
|
||||
@ -437,7 +441,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
||||
raise exception.AuthorizationError()
|
||||
if page.startswith(("Key missing", "Gallery not found")):
|
||||
raise exception.NotFoundError("gallery")
|
||||
if "hentai.org/mpv/" in page:
|
||||
if page.count("hentai.org/mpv/") > 1:
|
||||
self.log.warning("Enabled Multi-Page Viewer is not supported")
|
||||
return page
|
||||
|
||||
|
@ -42,7 +42,8 @@ class FapelloPostExtractor(Extractor):
|
||||
"type" : "video" if 'type="video' in page else "photo",
|
||||
"thumbnail": text.extr(page, 'poster="', '"'),
|
||||
}
|
||||
url = text.extr(page, 'src="', '"')
|
||||
url = text.extr(page, 'src="', '"').replace(
|
||||
".md", "").replace(".th", "")
|
||||
yield Message.Directory, data
|
||||
yield Message.Url, url, text.nameext_from_url(url, data)
|
||||
|
||||
|
@ -77,6 +77,8 @@ class FlickrImageExtractor(FlickrExtractor):
|
||||
photo = self.api.photos_getInfo(self.item_id)
|
||||
if self.api.exif:
|
||||
photo.update(self.api.photos_getExif(self.item_id))
|
||||
if self.api.contexts:
|
||||
photo.update(self.api.photos_getAllContexts(self.item_id))
|
||||
|
||||
if photo["media"] == "video" and self.api.videos:
|
||||
self.api._extract_video(photo)
|
||||
@ -268,6 +270,8 @@ class FlickrAPI(oauth.OAuth1API):
|
||||
|
||||
self.exif = extractor.config("exif", False)
|
||||
self.videos = extractor.config("videos", True)
|
||||
self.contexts = extractor.config("contexts", False)
|
||||
|
||||
self.maxsize = extractor.config("size-max")
|
||||
if isinstance(self.maxsize, str):
|
||||
for fmt, fmtname, fmtwidth in self.FORMATS:
|
||||
@ -311,6 +315,13 @@ class FlickrAPI(oauth.OAuth1API):
|
||||
params = {"user_id": user_id}
|
||||
return self._pagination("people.getPhotos", params)
|
||||
|
||||
def photos_getAllContexts(self, photo_id):
|
||||
"""Returns all visible sets and pools the photo belongs to."""
|
||||
params = {"photo_id": photo_id}
|
||||
data = self._call("photos.getAllContexts", params)
|
||||
del data["stat"]
|
||||
return data
|
||||
|
||||
def photos_getExif(self, photo_id):
|
||||
"""Retrieves a list of EXIF/TIFF/GPS tags for a given photo."""
|
||||
params = {"photo_id": photo_id}
|
||||
@ -444,6 +455,8 @@ class FlickrAPI(oauth.OAuth1API):
|
||||
|
||||
if self.exif:
|
||||
photo.update(self.photos_getExif(photo["id"]))
|
||||
if self.contexts:
|
||||
photo.update(self.photos_getAllContexts(photo["id"]))
|
||||
photo["id"] = text.parse_int(photo["id"])
|
||||
|
||||
if "owner" in photo:
|
||||
|
@ -117,8 +117,8 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
FoolfuukaExtractor.__init__(self, match)
|
||||
self.board = match.group(match.lastindex-1)
|
||||
self.thread = match.group(match.lastindex)
|
||||
self.board = self.groups[-2]
|
||||
self.thread = self.groups[-1]
|
||||
self.data = None
|
||||
|
||||
def metadata(self):
|
||||
@ -140,20 +140,22 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):
|
||||
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
||||
"""Base extractor for FoolFuuka based boards/archives"""
|
||||
subcategory = "board"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/(?:page/)?(\d*))?$"
|
||||
example = "https://archived.moe/a/"
|
||||
|
||||
def __init__(self, match):
|
||||
FoolfuukaExtractor.__init__(self, match)
|
||||
self.board = match.group(match.lastindex)
|
||||
self.board = self.groups[-2]
|
||||
self.page = self.groups[-1]
|
||||
|
||||
def items(self):
|
||||
index_base = "{}/_/api/chan/index/?board={}&page=".format(
|
||||
self.root, self.board)
|
||||
thread_base = "{}/{}/thread/".format(self.root, self.board)
|
||||
|
||||
for page in itertools.count(1):
|
||||
with self.request(index_base + format(page)) as response:
|
||||
page = self.page
|
||||
for pnum in itertools.count(text.parse_int(page, 1)):
|
||||
with self.request(index_base + format(pnum)) as response:
|
||||
try:
|
||||
threads = response.json()
|
||||
except ValueError:
|
||||
@ -167,6 +169,9 @@ class FoolfuukaBoardExtractor(FoolfuukaExtractor):
|
||||
thread["_extractor"] = FoolfuukaThreadExtractor
|
||||
yield Message.Queue, thread["url"], thread
|
||||
|
||||
if page:
|
||||
return
|
||||
|
||||
|
||||
class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
||||
"""Base extractor for search results on FoolFuuka based boards/archives"""
|
||||
@ -179,17 +184,16 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
|
||||
def __init__(self, match):
|
||||
FoolfuukaExtractor.__init__(self, match)
|
||||
self.params = params = {}
|
||||
args = match.group(match.lastindex).split("/")
|
||||
key = None
|
||||
|
||||
for arg in args:
|
||||
key = None
|
||||
for arg in self.groups[-1].split("/"):
|
||||
if key:
|
||||
params[key] = text.unescape(arg)
|
||||
key = None
|
||||
else:
|
||||
key = arg
|
||||
|
||||
board = match.group(match.lastindex-1)
|
||||
board = self.groups[-2]
|
||||
if board != "_":
|
||||
params["boards"] = board
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?furaffinity\.net"
|
||||
BASE_PATTERN = r"(?:https?://)?(?:www\.|sfw\.)?(?:f[ux]|f?xfu)raffinity\.net"
|
||||
|
||||
|
||||
class FuraffinityExtractor(Extractor):
|
||||
|
@ -32,6 +32,9 @@ class GelbooruBase():
|
||||
url = self.root + "/index.php?page=dapi&q=index&json=1"
|
||||
data = self.request(url, params=params).json()
|
||||
|
||||
if not key:
|
||||
return data
|
||||
|
||||
try:
|
||||
posts = data[key]
|
||||
except KeyError:
|
||||
@ -48,19 +51,44 @@ class GelbooruBase():
|
||||
params["pid"] = self.page_start
|
||||
params["limit"] = self.per_page
|
||||
limit = self.per_page // 2
|
||||
pid = False
|
||||
|
||||
if "tags" in params:
|
||||
tags = params["tags"].split()
|
||||
op = "<"
|
||||
id = False
|
||||
|
||||
for tag in tags:
|
||||
if tag.startswith("sort:"):
|
||||
if tag == "sort:id:asc":
|
||||
op = ">"
|
||||
elif tag == "sort:id" or tag.startswith("sort:id:"):
|
||||
op = "<"
|
||||
else:
|
||||
pid = True
|
||||
elif tag.startswith("id:"):
|
||||
id = True
|
||||
|
||||
if not pid:
|
||||
if id:
|
||||
tag = "id:" + op
|
||||
tags = [t for t in tags if not t.startswith(tag)]
|
||||
tags = "{} id:{}".format(" ".join(tags), op)
|
||||
|
||||
while True:
|
||||
posts = self._api_request(params)
|
||||
|
||||
for post in posts:
|
||||
yield post
|
||||
yield from posts
|
||||
|
||||
if len(posts) < limit:
|
||||
return
|
||||
|
||||
if "pid" in params:
|
||||
del params["pid"]
|
||||
params["tags"] = "{} id:<{}".format(self.tags, post["id"])
|
||||
if pid:
|
||||
params["pid"] += 1
|
||||
else:
|
||||
if "pid" in params:
|
||||
del params["pid"]
|
||||
params["tags"] = tags + str(posts[-1]["id"])
|
||||
|
||||
def _pagination_html(self, params):
|
||||
url = self.root + "/index.php"
|
||||
@ -167,13 +195,61 @@ class GelbooruFavoriteExtractor(GelbooruBase,
|
||||
params = {
|
||||
"s" : "favorite",
|
||||
"id" : self.favorite_id,
|
||||
"limit": "1",
|
||||
"limit": "2",
|
||||
}
|
||||
data = self._api_request(params, None, True)
|
||||
|
||||
count = self._api_request(params, "@attributes", True)[0]["count"]
|
||||
if count <= self.offset:
|
||||
return
|
||||
count = data["@attributes"]["count"]
|
||||
self.log.debug("API reports %s favorite entries", count)
|
||||
|
||||
favs = data["favorite"]
|
||||
try:
|
||||
order = 1 if favs[0]["id"] < favs[1]["id"] else -1
|
||||
except LookupError as exc:
|
||||
self.log.debug(
|
||||
"Error when determining API favorite order (%s: %s)",
|
||||
exc.__class__.__name__, exc)
|
||||
order = -1
|
||||
else:
|
||||
self.log.debug("API yields favorites in %sscending order",
|
||||
"a" if order > 0 else "de")
|
||||
|
||||
order_favs = self.config("order-posts")
|
||||
if order_favs and order_favs[0] in ("r", "a"):
|
||||
self.log.debug("Returning them in reverse")
|
||||
order = -order
|
||||
|
||||
if order < 0:
|
||||
return self._pagination(params, count)
|
||||
return self._pagination_reverse(params, count)
|
||||
|
||||
def _pagination(self, params, count):
|
||||
if self.offset:
|
||||
pnum, skip = divmod(self.offset, self.per_page)
|
||||
else:
|
||||
pnum = skip = 0
|
||||
|
||||
params["pid"] = pnum
|
||||
params["limit"] = self.per_page
|
||||
|
||||
while True:
|
||||
favs = self._api_request(params, "favorite")
|
||||
|
||||
if not favs:
|
||||
return
|
||||
|
||||
if skip:
|
||||
favs = favs[skip:]
|
||||
skip = 0
|
||||
|
||||
for fav in favs:
|
||||
for post in self._api_request({"id": fav["favorite"]}):
|
||||
post["date_favorited"] = text.parse_timestamp(fav["added"])
|
||||
yield post
|
||||
|
||||
params["pid"] += 1
|
||||
|
||||
def _pagination_reverse(self, params, count):
|
||||
pnum, last = divmod(count-1, self.per_page)
|
||||
if self.offset > last:
|
||||
# page number change
|
||||
@ -182,12 +258,11 @@ class GelbooruFavoriteExtractor(GelbooruBase,
|
||||
pnum -= diff + 1
|
||||
skip = self.offset
|
||||
|
||||
# paginate over them in reverse
|
||||
params["pid"] = pnum
|
||||
params["limit"] = self.per_page
|
||||
|
||||
while True:
|
||||
favs = self._api_request(params, "favorite", True)
|
||||
favs = self._api_request(params, "favorite")
|
||||
favs.reverse()
|
||||
|
||||
if skip:
|
||||
@ -195,7 +270,9 @@ class GelbooruFavoriteExtractor(GelbooruBase,
|
||||
skip = 0
|
||||
|
||||
for fav in favs:
|
||||
yield from self._api_request({"id": fav["favorite"]})
|
||||
for post in self._api_request({"id": fav["favorite"]}):
|
||||
post["date_favorited"] = text.parse_timestamp(fav["added"])
|
||||
yield post
|
||||
|
||||
params["pid"] -= 1
|
||||
if params["pid"] < 0:
|
||||
|
@ -41,9 +41,13 @@ class GofileFolderExtractor(Extractor):
|
||||
folder = self._get_content(self.content_id, password)
|
||||
yield Message.Directory, folder
|
||||
|
||||
try:
|
||||
contents = folder.pop("children")
|
||||
except KeyError:
|
||||
raise exception.AuthorizationError("Password required")
|
||||
|
||||
num = 0
|
||||
contents = folder.pop("contents")
|
||||
for content_id in folder["childs"]:
|
||||
for content_id in folder["childrenIds"]:
|
||||
content = contents[content_id]
|
||||
content["folder"] = folder
|
||||
|
||||
@ -67,31 +71,32 @@ class GofileFolderExtractor(Extractor):
|
||||
@memcache()
|
||||
def _create_account(self):
|
||||
self.log.debug("Creating temporary account")
|
||||
return self._api_request("createAccount")["token"]
|
||||
return self._api_request("accounts", method="POST")["token"]
|
||||
|
||||
@cache(maxage=86400)
|
||||
def _get_website_token(self):
|
||||
self.log.debug("Fetching website token")
|
||||
page = self.request(self.root + "/dist/js/alljs.js").text
|
||||
return text.extr(page, 'fetchData.wt = "', '"')
|
||||
return text.extr(page, 'wt: "', '"')
|
||||
|
||||
def _get_content(self, content_id, password=None):
|
||||
headers = {"Authorization": "Bearer " + self.api_token}
|
||||
params = {"wt": self.website_token}
|
||||
if password is not None:
|
||||
password = hashlib.sha256(password.encode()).hexdigest()
|
||||
return self._api_request("getContent", {
|
||||
"contentId" : content_id,
|
||||
"token" : self.api_token,
|
||||
"wt" : self.website_token,
|
||||
"password" : password,
|
||||
})
|
||||
params["password"] = hashlib.sha256(password.encode()).hexdigest()
|
||||
return self._api_request("contents/" + content_id, params, headers)
|
||||
|
||||
def _api_request(self, endpoint, params=None):
|
||||
def _api_request(self, endpoint, params=None, headers=None, method="GET"):
|
||||
response = self.request(
|
||||
"https://api.gofile.io/" + endpoint, params=params).json()
|
||||
"https://api.gofile.io/" + endpoint,
|
||||
method=method, params=params, headers=headers,
|
||||
).json()
|
||||
|
||||
if response["status"] != "ok":
|
||||
if response["status"] == "error-notFound":
|
||||
raise exception.NotFoundError("content")
|
||||
if response["status"] == "error-passwordRequired":
|
||||
raise exception.AuthorizationError("Password required")
|
||||
raise exception.StopExtraction(
|
||||
"%s failed (Status: %s)", endpoint, response["status"])
|
||||
|
||||
|
@ -25,7 +25,7 @@ class HiperdexBase():
|
||||
@memcache(keyarg=1)
|
||||
def manga_data(self, manga, page=None):
|
||||
if not page:
|
||||
url = "{}/manga/{}/".format(self.root, manga)
|
||||
url = "{}/mangas/{}/".format(self.root, manga)
|
||||
page = self.request(url).text
|
||||
extr = text.extract_from(page)
|
||||
|
||||
@ -33,7 +33,7 @@ class HiperdexBase():
|
||||
"url" : text.unescape(extr(
|
||||
'property="og:url" content="', '"')),
|
||||
"manga" : text.unescape(extr(
|
||||
'"headline": "', '"')),
|
||||
' property="name" title="', '"')),
|
||||
"score" : text.parse_float(extr(
|
||||
'id="averagerate">', '<')),
|
||||
"author" : text.remove_html(extr(
|
||||
@ -68,8 +68,8 @@ class HiperdexBase():
|
||||
|
||||
class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
|
||||
"""Extractor for manga chapters from hiperdex.com"""
|
||||
pattern = BASE_PATTERN + r"(/manga/([^/?#]+)/([^/?#]+))"
|
||||
example = "https://hiperdex.com/manga/MANGA/CHAPTER/"
|
||||
pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+)/([^/?#]+))"
|
||||
example = "https://hiperdex.com/mangas/MANGA/CHAPTER/"
|
||||
|
||||
def __init__(self, match):
|
||||
root, path, self.manga, self.chapter = match.groups()
|
||||
@ -90,8 +90,8 @@ class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
|
||||
class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
|
||||
"""Extractor for manga from hiperdex.com"""
|
||||
chapterclass = HiperdexChapterExtractor
|
||||
pattern = BASE_PATTERN + r"(/manga/([^/?#]+))/?$"
|
||||
example = "https://hiperdex.com/manga/MANGA/"
|
||||
pattern = BASE_PATTERN + r"(/mangas?/([^/?#]+))/?$"
|
||||
example = "https://hiperdex.com/mangas/MANGA/"
|
||||
|
||||
def __init__(self, match):
|
||||
root, path, self.manga = match.groups()
|
||||
|
@ -23,6 +23,7 @@ class HotleakExtractor(Extractor):
|
||||
|
||||
def items(self):
|
||||
for post in self.posts():
|
||||
post["_http_expected_status"] = (404,)
|
||||
yield Message.Directory, post
|
||||
yield Message.Url, post["url"], post
|
||||
|
||||
|
@ -101,9 +101,8 @@ class IdolcomplexExtractor(SankakuExtractor):
|
||||
page = self.request(url, retries=10).text
|
||||
extr = text.extract_from(page)
|
||||
|
||||
pid_alnum = extr('/posts/', '"')
|
||||
vavg = extr('itemprop="ratingValue">', "<")
|
||||
vcnt = extr('itemprop="reviewCount">', "<")
|
||||
vavg = extr('id="rating"', "</ul>")
|
||||
vcnt = extr('>Votes</strong>:', "<")
|
||||
pid = extr(">Post ID:", "<")
|
||||
created = extr(' title="', '"')
|
||||
|
||||
@ -120,10 +119,10 @@ class IdolcomplexExtractor(SankakuExtractor):
|
||||
rating = extr(">Rating:", "<br")
|
||||
|
||||
data = {
|
||||
"id" : text.parse_int(pid),
|
||||
"id_alnum" : pid_alnum,
|
||||
"id" : pid.strip(),
|
||||
"md5" : file_url.rpartition("/")[2].partition(".")[0],
|
||||
"vote_average": text.parse_float(vavg),
|
||||
"vote_average": (1.0 * vavg.count('class="star-full"') +
|
||||
0.5 * vavg.count('class="star-half"')),
|
||||
"vote_count" : text.parse_int(vcnt),
|
||||
"created_at" : created,
|
||||
"date" : text.parse_datetime(
|
||||
@ -222,8 +221,8 @@ class IdolcomplexPoolExtractor(IdolcomplexExtractor):
|
||||
subcategory = "pool"
|
||||
directory_fmt = ("{category}", "pool", "{pool}")
|
||||
archive_fmt = "p_{pool}_{id}"
|
||||
pattern = BASE_PATTERN + r"/pools?/show/(\d+)"
|
||||
example = "https://idol.sankakucomplex.com/pools/show/12345"
|
||||
pattern = BASE_PATTERN + r"/pools?/(?:show/)?(\w+)"
|
||||
example = "https://idol.sankakucomplex.com/pools/0123456789abcdef"
|
||||
per_page = 24
|
||||
|
||||
def __init__(self, match):
|
||||
|
@ -161,11 +161,12 @@ class ImagefapFolderExtractor(ImagefapExtractor):
|
||||
self.user = user or profile
|
||||
|
||||
def items(self):
|
||||
for gallery_id, name in self.galleries(self.folder_id):
|
||||
for gallery_id, name, folder in self.galleries(self.folder_id):
|
||||
url = "{}/gallery/{}".format(self.root, gallery_id)
|
||||
data = {
|
||||
"gallery_id": gallery_id,
|
||||
"title" : text.unescape(name),
|
||||
"folder" : text.unescape(folder),
|
||||
"_extractor": ImagefapGalleryExtractor,
|
||||
}
|
||||
yield Message.Queue, url, data
|
||||
@ -173,6 +174,7 @@ class ImagefapFolderExtractor(ImagefapExtractor):
|
||||
def galleries(self, folder_id):
|
||||
"""Yield gallery IDs and titles of a folder"""
|
||||
if folder_id == "-1":
|
||||
folder_name = "Uncategorized"
|
||||
if self._id:
|
||||
url = "{}/usergallery.php?userid={}&folderid=-1".format(
|
||||
self.root, self.user)
|
||||
@ -180,23 +182,28 @@ class ImagefapFolderExtractor(ImagefapExtractor):
|
||||
url = "{}/profile/{}/galleries?folderid=-1".format(
|
||||
self.root, self.user)
|
||||
else:
|
||||
folder_name = None
|
||||
url = "{}/organizer/{}/".format(self.root, folder_id)
|
||||
|
||||
params = {"page": 0}
|
||||
extr = text.extract_from(self.request(url, params=params).text)
|
||||
if not folder_name:
|
||||
folder_name = extr("class'blk_galleries'><b>", "</b>")
|
||||
|
||||
while True:
|
||||
extr = text.extract_from(self.request(url, params=params).text)
|
||||
cnt = 0
|
||||
|
||||
while True:
|
||||
gid = extr('<a href="/gallery/', '"')
|
||||
gid = extr(' id="gid-', '"')
|
||||
if not gid:
|
||||
break
|
||||
yield gid, extr("<b>", "<")
|
||||
yield gid, extr("<b>", "<"), folder_name
|
||||
cnt += 1
|
||||
|
||||
if cnt < 20:
|
||||
break
|
||||
params["page"] += 1
|
||||
extr = text.extract_from(self.request(url, params=params).text)
|
||||
|
||||
|
||||
class ImagefapUserExtractor(ImagefapExtractor):
|
||||
|
@ -39,10 +39,15 @@ class ImgurExtractor(Extractor):
|
||||
image["url"] = url = "https://i.imgur.com/{}.{}".format(
|
||||
image["id"], image["ext"])
|
||||
image["date"] = text.parse_datetime(image["created_at"])
|
||||
image["_http_validate"] = self._validate
|
||||
text.nameext_from_url(url, image)
|
||||
|
||||
return url
|
||||
|
||||
def _validate(self, response):
|
||||
return (not response.history or
|
||||
not response.url.endswith("/removed.png"))
|
||||
|
||||
def _items_queue(self, items):
|
||||
album_ex = ImgurAlbumExtractor
|
||||
image_ex = ImgurImageExtractor
|
||||
|
@ -330,15 +330,18 @@ class InkbunnyAPI():
|
||||
def _call(self, endpoint, params):
|
||||
url = "https://inkbunny.net/api_" + endpoint + ".php"
|
||||
params["sid"] = self.session_id
|
||||
data = self.extractor.request(url, params=params).json()
|
||||
|
||||
if "error_code" in data:
|
||||
while True:
|
||||
data = self.extractor.request(url, params=params).json()
|
||||
|
||||
if "error_code" not in data:
|
||||
return data
|
||||
|
||||
if str(data["error_code"]) == "2":
|
||||
self.authenticate(invalidate=True)
|
||||
return self._call(endpoint, params)
|
||||
raise exception.StopExtraction(data.get("error_message"))
|
||||
continue
|
||||
|
||||
return data
|
||||
raise exception.StopExtraction(data.get("error_message"))
|
||||
|
||||
def _pagination_search(self, params):
|
||||
params["page"] = 1
|
||||
|
@ -165,7 +165,7 @@ class InstagramExtractor(Extractor):
|
||||
data = {
|
||||
"post_id" : post["pk"],
|
||||
"post_shortcode": post["code"],
|
||||
"likes": post.get("like_count"),
|
||||
"likes": post.get("like_count", 0),
|
||||
"pinned": post.get("timeline_pinned_user_ids", ()),
|
||||
"date": text.parse_timestamp(post.get("taken_at")),
|
||||
}
|
||||
@ -736,7 +736,7 @@ class InstagramRestAPI():
|
||||
not user["followed_by_viewer"]:
|
||||
name = user["username"]
|
||||
s = "" if name.endswith("s") else "s"
|
||||
raise exception.StopExtraction("%s'%s posts are private", name, s)
|
||||
self.extractor.log.warning("%s'%s posts are private", name, s)
|
||||
self.extractor._assign_user(user)
|
||||
return user["id"]
|
||||
|
||||
|
@ -41,6 +41,9 @@ class KemonopartyExtractor(Extractor):
|
||||
self.revisions = self.config("revisions")
|
||||
if self.revisions:
|
||||
self.revisions_unique = (self.revisions == "unique")
|
||||
order = self.config("order-revisions")
|
||||
self.revisions_reverse = order[0] in ("r", "a") if order else False
|
||||
|
||||
self._prepare_ddosguard_cookies()
|
||||
self._find_inline = re.compile(
|
||||
r'src="(?:https?://(?:kemono|coomer)\.(?:su|party))?(/inline/[^"]+'
|
||||
@ -54,7 +57,7 @@ class KemonopartyExtractor(Extractor):
|
||||
generators = self._build_file_generators(self.config("files"))
|
||||
duplicates = self.config("duplicates")
|
||||
comments = self.config("comments")
|
||||
username = dms = None
|
||||
username = dms = announcements = None
|
||||
|
||||
# prevent files from being sent with gzip compression
|
||||
headers = {"Accept-Encoding": "identity"}
|
||||
@ -65,6 +68,8 @@ class KemonopartyExtractor(Extractor):
|
||||
'<meta name="artist_name" content="', '"')[0])
|
||||
if self.config("dms"):
|
||||
dms = True
|
||||
if self.config("announcements"):
|
||||
announcements = True
|
||||
|
||||
posts = self.posts()
|
||||
max_posts = self.config("max-posts")
|
||||
@ -77,7 +82,7 @@ class KemonopartyExtractor(Extractor):
|
||||
self.root, post["service"], post["user"], post["id"])
|
||||
post["_http_headers"] = headers
|
||||
post["date"] = self._parse_datetime(
|
||||
post["published"] or post["added"])
|
||||
post.get("published") or post.get("added") or "")
|
||||
|
||||
if username:
|
||||
post["username"] = username
|
||||
@ -85,8 +90,12 @@ class KemonopartyExtractor(Extractor):
|
||||
post["comments"] = self._extract_comments(post)
|
||||
if dms is not None:
|
||||
if dms is True:
|
||||
dms = self._extract_dms(post)
|
||||
dms = self._extract_cards(post, "dms")
|
||||
post["dms"] = dms
|
||||
if announcements is not None:
|
||||
if announcements is True:
|
||||
announcements = self._extract_cards(post, "announcements")
|
||||
post["announcements"] = announcements
|
||||
|
||||
files = []
|
||||
hashes = set()
|
||||
@ -153,7 +162,7 @@ class KemonopartyExtractor(Extractor):
|
||||
|
||||
def _file(self, post):
|
||||
file = post["file"]
|
||||
if not file:
|
||||
if not file or "path" not in file:
|
||||
return ()
|
||||
file["type"] = "file"
|
||||
return (file,)
|
||||
@ -197,21 +206,21 @@ class KemonopartyExtractor(Extractor):
|
||||
})
|
||||
return comments
|
||||
|
||||
def _extract_dms(self, post):
|
||||
url = "{}/{}/user/{}/dms".format(
|
||||
self.root, post["service"], post["user"])
|
||||
def _extract_cards(self, post, type):
|
||||
url = "{}/{}/user/{}/{}".format(
|
||||
self.root, post["service"], post["user"], type)
|
||||
page = self.request(url).text
|
||||
|
||||
dms = []
|
||||
for dm in text.extract_iter(page, "<article", "</article>"):
|
||||
footer = text.extr(dm, "<footer", "</footer>")
|
||||
dms.append({
|
||||
cards = []
|
||||
for card in text.extract_iter(page, "<article", "</article>"):
|
||||
footer = text.extr(card, "<footer", "</footer>")
|
||||
cards.append({
|
||||
"body": text.unescape(text.extr(
|
||||
dm, "<pre>", "</pre></",
|
||||
card, "<pre>", "</pre></",
|
||||
).strip()),
|
||||
"date": text.extr(footer, 'Published: ', '\n'),
|
||||
"date": text.extr(footer, ': ', '\n'),
|
||||
})
|
||||
return dms
|
||||
return cards
|
||||
|
||||
def _parse_datetime(self, date_string):
|
||||
if len(date_string) > 19:
|
||||
@ -232,6 +241,7 @@ class KemonopartyExtractor(Extractor):
|
||||
except exception.HttpError:
|
||||
post["revision_hash"] = self._revision_hash(post)
|
||||
post["revision_index"] = 1
|
||||
post["revision_count"] = 1
|
||||
return (post,)
|
||||
revs.insert(0, post)
|
||||
|
||||
@ -247,22 +257,30 @@ class KemonopartyExtractor(Extractor):
|
||||
uniq.append(rev)
|
||||
revs = uniq
|
||||
|
||||
idx = len(revs)
|
||||
cnt = idx = len(revs)
|
||||
for rev in revs:
|
||||
rev["revision_index"] = idx
|
||||
rev["revision_count"] = cnt
|
||||
idx -= 1
|
||||
|
||||
if self.revisions_reverse:
|
||||
revs.reverse()
|
||||
|
||||
return revs
|
||||
|
||||
def _revisions_all(self, url):
|
||||
revs = self.request(url + "/revisions").json()
|
||||
|
||||
idx = len(revs)
|
||||
cnt = idx = len(revs)
|
||||
for rev in revs:
|
||||
rev["revision_hash"] = self._revision_hash(rev)
|
||||
rev["revision_index"] = idx
|
||||
rev["revision_count"] = cnt
|
||||
idx -= 1
|
||||
|
||||
if self.revisions_reverse:
|
||||
revs.reverse()
|
||||
|
||||
return revs
|
||||
|
||||
def _revision_hash(self, revision):
|
||||
@ -482,7 +500,8 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
|
||||
|
||||
def __init__(self, match):
|
||||
KemonopartyExtractor.__init__(self, match)
|
||||
self.favorites = (text.parse_query(match.group(3)).get("type") or
|
||||
self.params = text.parse_query(match.group(3))
|
||||
self.favorites = (self.params.get("type") or
|
||||
self.config("favorites") or
|
||||
"artist")
|
||||
|
||||
@ -490,9 +509,17 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
|
||||
self._prepare_ddosguard_cookies()
|
||||
self.login()
|
||||
|
||||
sort = self.params.get("sort")
|
||||
order = self.params.get("order") or "desc"
|
||||
|
||||
if self.favorites == "artist":
|
||||
users = self.request(
|
||||
self.root + "/api/v1/account/favorites?type=artist").json()
|
||||
|
||||
if not sort:
|
||||
sort = "updated"
|
||||
users.sort(key=lambda x: x[sort], reverse=(order == "desc"))
|
||||
|
||||
for user in users:
|
||||
user["_extractor"] = KemonopartyUserExtractor
|
||||
url = "{}/{}/user/{}".format(
|
||||
@ -502,6 +529,11 @@ class KemonopartyFavoriteExtractor(KemonopartyExtractor):
|
||||
elif self.favorites == "post":
|
||||
posts = self.request(
|
||||
self.root + "/api/v1/account/favorites?type=post").json()
|
||||
|
||||
if not sort:
|
||||
sort = "faved_seq"
|
||||
posts.sort(key=lambda x: x[sort], reverse=(order == "desc"))
|
||||
|
||||
for post in posts:
|
||||
post["_extractor"] = KemonopartyPostExtractor
|
||||
url = "{}/{}/user/{}/post/{}".format(
|
||||
|
@ -104,7 +104,7 @@ class LensdumpImageExtractor(LensdumpBase, Extractor):
|
||||
filename_fmt = "{category}_{id}{title:?_//}.{extension}"
|
||||
directory_fmt = ("{category}",)
|
||||
archive_fmt = "{id}"
|
||||
pattern = BASE_PATTERN + r"/i/(\w+)"
|
||||
pattern = r"(?:https?://)?(?:(?:i\d?\.)?lensdump\.com|\w\.l3n\.co)/i/(\w+)"
|
||||
example = "https://lensdump.com/i/ID"
|
||||
|
||||
def __init__(self, match):
|
||||
|
@ -29,6 +29,7 @@ class MastodonExtractor(BaseExtractor):
|
||||
self.instance = self.root.partition("://")[2]
|
||||
self.reblogs = self.config("reblogs", False)
|
||||
self.replies = self.config("replies", True)
|
||||
self.cards = self.config("cards", False)
|
||||
|
||||
def items(self):
|
||||
for status in self.statuses():
|
||||
@ -48,6 +49,17 @@ class MastodonExtractor(BaseExtractor):
|
||||
if status["reblog"]:
|
||||
attachments.extend(status["reblog"]["media_attachments"])
|
||||
|
||||
if self.cards:
|
||||
card = status.get("card")
|
||||
if card:
|
||||
url = card.get("image")
|
||||
if url:
|
||||
card["weburl"] = card.get("url")
|
||||
card["url"] = url
|
||||
card["id"] = "card" + "".join(
|
||||
url.split("/")[6:-2]).lstrip("0")
|
||||
attachments.append(card)
|
||||
|
||||
status["instance"] = self.instance
|
||||
acct = status["account"]["acct"]
|
||||
status["instance_remote"] = \
|
||||
@ -70,7 +82,11 @@ class MastodonExtractor(BaseExtractor):
|
||||
|
||||
def _check_moved(self, account):
|
||||
self._check_moved = None
|
||||
if "moved" in account:
|
||||
# Certain fediverse software (such as Iceshrimp and Sharkey) have a
|
||||
# null account "moved" field instead of not having it outright.
|
||||
# To handle this, check if the "moved" value is truthy instead
|
||||
# if only it exists.
|
||||
if account.get("moved"):
|
||||
self.log.warning("Account '%s' moved to '%s'",
|
||||
account["acct"], account["moved"]["acct"])
|
||||
|
||||
@ -116,6 +132,7 @@ class MastodonUserExtractor(MastodonExtractor):
|
||||
api.account_id_by_username(self.item),
|
||||
only_media=(
|
||||
not self.reblogs and
|
||||
not self.cards and
|
||||
not self.config("text-posts", False)
|
||||
),
|
||||
exclude_replies=not self.replies,
|
||||
@ -132,6 +149,36 @@ class MastodonBookmarkExtractor(MastodonExtractor):
|
||||
return MastodonAPI(self).account_bookmarks()
|
||||
|
||||
|
||||
class MastodonFavoriteExtractor(MastodonExtractor):
|
||||
"""Extractor for mastodon favorites"""
|
||||
subcategory = "favorite"
|
||||
pattern = BASE_PATTERN + r"/favourites"
|
||||
example = "https://mastodon.social/favourites"
|
||||
|
||||
def statuses(self):
|
||||
return MastodonAPI(self).account_favorites()
|
||||
|
||||
|
||||
class MastodonListExtractor(MastodonExtractor):
|
||||
"""Extractor for mastodon lists"""
|
||||
subcategory = "list"
|
||||
pattern = BASE_PATTERN + r"/lists/(\w+)"
|
||||
example = "https://mastodon.social/lists/12345"
|
||||
|
||||
def statuses(self):
|
||||
return MastodonAPI(self).timelines_list(self.item)
|
||||
|
||||
|
||||
class MastodonHashtagExtractor(MastodonExtractor):
|
||||
"""Extractor for mastodon hashtags"""
|
||||
subcategory = "hashtag"
|
||||
pattern = BASE_PATTERN + r"/tags/(\w+)"
|
||||
example = "https://mastodon.social/tags/NAME"
|
||||
|
||||
def statuses(self):
|
||||
return MastodonAPI(self).timelines_tag(self.item)
|
||||
|
||||
|
||||
class MastodonFollowingExtractor(MastodonExtractor):
|
||||
"""Extractor for followed mastodon users"""
|
||||
subcategory = "following"
|
||||
@ -201,37 +248,55 @@ class MastodonAPI():
|
||||
raise exception.NotFoundError("account")
|
||||
|
||||
def account_bookmarks(self):
|
||||
"""Statuses the user has bookmarked"""
|
||||
endpoint = "/v1/bookmarks"
|
||||
return self._pagination(endpoint, None)
|
||||
|
||||
def account_favorites(self):
|
||||
"""Statuses the user has favourited"""
|
||||
endpoint = "/v1/favourites"
|
||||
return self._pagination(endpoint, None)
|
||||
|
||||
def account_following(self, account_id):
|
||||
"""Accounts which the given account is following"""
|
||||
endpoint = "/v1/accounts/{}/following".format(account_id)
|
||||
return self._pagination(endpoint, None)
|
||||
|
||||
def account_lookup(self, username):
|
||||
"""Quickly lookup a username to see if it is available"""
|
||||
endpoint = "/v1/accounts/lookup"
|
||||
params = {"acct": username}
|
||||
return self._call(endpoint, params).json()
|
||||
|
||||
def account_search(self, query, limit=40):
|
||||
"""Search for accounts"""
|
||||
"""Search for matching accounts by username or display name"""
|
||||
endpoint = "/v1/accounts/search"
|
||||
params = {"q": query, "limit": limit}
|
||||
return self._call(endpoint, params).json()
|
||||
|
||||
def account_statuses(self, account_id, only_media=True,
|
||||
exclude_replies=False):
|
||||
"""Fetch an account's statuses"""
|
||||
"""Statuses posted to the given account"""
|
||||
endpoint = "/v1/accounts/{}/statuses".format(account_id)
|
||||
params = {"only_media" : "1" if only_media else "0",
|
||||
"exclude_replies": "1" if exclude_replies else "0"}
|
||||
params = {"only_media" : "true" if only_media else "false",
|
||||
"exclude_replies": "true" if exclude_replies else "false"}
|
||||
return self._pagination(endpoint, params)
|
||||
|
||||
def status(self, status_id):
|
||||
"""Fetch a status"""
|
||||
"""Obtain information about a status"""
|
||||
endpoint = "/v1/statuses/" + status_id
|
||||
return self._call(endpoint).json()
|
||||
|
||||
def timelines_list(self, list_id):
|
||||
"""View statuses in the given list timeline"""
|
||||
endpoint = "/v1/timelines/list/" + list_id
|
||||
return self._pagination(endpoint, None)
|
||||
|
||||
def timelines_tag(self, hashtag):
|
||||
"""View public statuses containing the given hashtag"""
|
||||
endpoint = "/v1/timelines/tag/" + hashtag
|
||||
return self._pagination(endpoint, None)
|
||||
|
||||
def _call(self, endpoint, params=None):
|
||||
if endpoint.startswith("http"):
|
||||
url = endpoint
|
||||
|
@ -26,7 +26,8 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
|
||||
"{post[date]:%Y-%m-%d} {post[title]}")
|
||||
archive_fmt = "{blog[id]}_{post[num]}_{num}"
|
||||
pattern = (r"(?:https?://)?blog\.naver\.com/"
|
||||
r"(?:PostView\.nhn\?blogId=(\w+)&logNo=(\d+)|(\w+)/(\d+)/?$)")
|
||||
r"(?:PostView\.n(?:aver|hn)\?blogId=(\w+)&logNo=(\d+)|"
|
||||
r"(\w+)/(\d+)/?$)")
|
||||
example = "https://blog.naver.com/BLOGID/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
@ -46,8 +47,10 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
|
||||
extr = text.extract_from(page)
|
||||
data = {
|
||||
"post": {
|
||||
"title" : extr('"og:title" content="', '"'),
|
||||
"description": extr('"og:description" content="', '"'),
|
||||
"title" : text.unescape(extr(
|
||||
'"og:title" content="', '"')),
|
||||
"description": text.unescape(extr(
|
||||
'"og:description" content="', '"')).replace(" ", " "),
|
||||
"num" : text.parse_int(self.post_id),
|
||||
},
|
||||
"blog": {
|
||||
@ -62,10 +65,13 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
|
||||
return data
|
||||
|
||||
def images(self, page):
|
||||
return [
|
||||
(url.replace("://post", "://blog", 1).partition("?")[0], None)
|
||||
for url in text.extract_iter(page, 'data-lazy-src="', '"')
|
||||
]
|
||||
results = []
|
||||
for url in text.extract_iter(page, 'data-lazy-src="', '"'):
|
||||
url = url.replace("://post", "://blog", 1).partition("?")[0]
|
||||
if "\ufffd" in text.unquote(url):
|
||||
url = text.unquote(url, encoding="EUC-KR")
|
||||
results.append((url, None))
|
||||
return results
|
||||
|
||||
|
||||
class NaverBlogExtractor(NaverBase, Extractor):
|
||||
@ -73,7 +79,8 @@ class NaverBlogExtractor(NaverBase, Extractor):
|
||||
subcategory = "blog"
|
||||
categorytransfer = True
|
||||
pattern = (r"(?:https?://)?blog\.naver\.com/"
|
||||
r"(?:PostList.nhn\?(?:[^&#]+&)*blogId=([^&#]+)|(\w+)/?$)")
|
||||
r"(?:PostList\.n(?:aver|hn)\?(?:[^&#]+&)*blogId=([^&#]+)|"
|
||||
r"(\w+)/?$)")
|
||||
example = "https://blog.naver.com/BLOGID"
|
||||
|
||||
def __init__(self, match):
|
||||
@ -81,12 +88,11 @@ class NaverBlogExtractor(NaverBase, Extractor):
|
||||
self.blog_id = match.group(1) or match.group(2)
|
||||
|
||||
def items(self):
|
||||
|
||||
# fetch first post number
|
||||
url = "{}/PostList.nhn?blogId={}".format(self.root, self.blog_id)
|
||||
post_num = text.extract(
|
||||
post_num = text.extr(
|
||||
self.request(url).text, 'gnFirstLogNo = "', '"',
|
||||
)[0]
|
||||
)
|
||||
|
||||
# setup params for API calls
|
||||
url = "{}/PostViewBottomTitleListAsync.nhn".format(self.root)
|
||||
|
@ -110,7 +110,7 @@ class OAuthBase(Extractor):
|
||||
|
||||
# get a request token
|
||||
params = {"oauth_callback": self.redirect_uri}
|
||||
data = self.session.get(request_token_url, params=params).text
|
||||
data = self.request(request_token_url, params=params).text
|
||||
|
||||
data = text.parse_query(data)
|
||||
self.session.auth.token_secret = data["oauth_token_secret"]
|
||||
@ -120,7 +120,7 @@ class OAuthBase(Extractor):
|
||||
data = self.open(authorize_url, params)
|
||||
|
||||
# exchange the request token for an access token
|
||||
data = self.session.get(access_token_url, params=data).text
|
||||
data = self.request(access_token_url, params=data).text
|
||||
data = text.parse_query(data)
|
||||
token = data["oauth_token"]
|
||||
token_secret = data["oauth_token_secret"]
|
||||
@ -189,7 +189,8 @@ class OAuthBase(Extractor):
|
||||
data["client_id"] = client_id
|
||||
data["client_secret"] = client_secret
|
||||
|
||||
data = self.session.post(token_url, data=data, auth=auth).json()
|
||||
data = self.request(
|
||||
token_url, method="POST", data=data, auth=auth).json()
|
||||
|
||||
# check token response
|
||||
if "error" in data:
|
||||
@ -386,7 +387,7 @@ class OAuthMastodon(OAuthBase):
|
||||
"redirect_uris": self.redirect_uri,
|
||||
"scopes": "read",
|
||||
}
|
||||
data = self.session.post(url, data=data).json()
|
||||
data = self.request(url, method="POST", data=data).json()
|
||||
|
||||
if "client_id" not in data or "client_secret" not in data:
|
||||
raise exception.StopExtraction(
|
||||
@ -441,7 +442,8 @@ class OAuthPixiv(OAuthBase):
|
||||
"redirect_uri" : "https://app-api.pixiv.net"
|
||||
"/web/v1/users/auth/pixiv/callback",
|
||||
}
|
||||
data = self.session.post(url, headers=headers, data=data).json()
|
||||
data = self.request(
|
||||
url, method="POST", headers=headers, data=data).json()
|
||||
|
||||
if "error" in data:
|
||||
stdout_write("\n{}\n".format(data))
|
||||
|
@ -104,8 +104,9 @@ class PixivExtractor(Extractor):
|
||||
elif work["page_count"] == 1:
|
||||
url = meta_single_page["original_image_url"]
|
||||
if url == url_sanity:
|
||||
self.log.debug("Skipping 'sanity_level' warning (%s)",
|
||||
work["id"])
|
||||
self.log.warning(
|
||||
"Unable to download work %s ('sanity_level' warning)",
|
||||
work["id"])
|
||||
continue
|
||||
work["date_url"] = self._date_from_url(url)
|
||||
yield Message.Url, url, text.nameext_from_url(url, work)
|
||||
@ -619,6 +620,7 @@ class PixivNovelExtractor(PixivExtractor):
|
||||
meta_user = self.config("metadata")
|
||||
meta_bookmark = self.config("metadata-bookmark")
|
||||
embeds = self.config("embeds")
|
||||
covers = self.config("covers")
|
||||
|
||||
if embeds:
|
||||
headers = {
|
||||
@ -650,7 +652,7 @@ class PixivNovelExtractor(PixivExtractor):
|
||||
yield Message.Directory, novel
|
||||
|
||||
try:
|
||||
content = self.api.novel_text(novel["id"])["novel_text"]
|
||||
content = self.api.novel_webview(novel["id"])["text"]
|
||||
except Exception:
|
||||
self.log.warning("Unable to download novel %s", novel["id"])
|
||||
continue
|
||||
@ -658,12 +660,25 @@ class PixivNovelExtractor(PixivExtractor):
|
||||
novel["extension"] = "txt"
|
||||
yield Message.Url, "text:" + content, novel
|
||||
|
||||
if covers:
|
||||
path = novel["image_urls"]["large"].partition("/img/")[2]
|
||||
url = ("https://i.pximg.net/novel-cover-original/img/" +
|
||||
path.rpartition(".")[0].replace("_master1200", ""))
|
||||
novel["date_url"] = self._date_from_url(url)
|
||||
novel["num"] += 1
|
||||
novel["suffix"] = "_p{:02}".format(novel["num"])
|
||||
novel["_fallback"] = (url + ".png",)
|
||||
url_jpg = url + ".jpg"
|
||||
text.nameext_from_url(url_jpg, novel)
|
||||
yield Message.Url, url_jpg, novel
|
||||
del novel["_fallback"]
|
||||
|
||||
if embeds:
|
||||
desktop = False
|
||||
illusts = {}
|
||||
|
||||
for marker in text.extract_iter(content, "[", "]"):
|
||||
if marker.startswith("[jumpuri:If you would like to "):
|
||||
if marker.startswith("uploadedimage:"):
|
||||
desktop = True
|
||||
elif marker.startswith("pixivimage:"):
|
||||
illusts[marker[11:].partition("-")[0]] = None
|
||||
@ -918,6 +933,15 @@ class PixivAppAPI():
|
||||
params = {"novel_id": novel_id}
|
||||
return self._call("/v1/novel/text", params)
|
||||
|
||||
def novel_webview(self, novel_id):
|
||||
params = {"id": novel_id, "viewer_version": "20221031_ai"}
|
||||
return self._call(
|
||||
"/webview/v2/novel", params, self._novel_webview_parse)
|
||||
|
||||
def _novel_webview_parse(self, response):
|
||||
return util.json_loads(text.extr(
|
||||
response.text, "novel: ", ",\n"))
|
||||
|
||||
def search_illust(self, word, sort=None, target=None, duration=None,
|
||||
date_start=None, date_end=None):
|
||||
params = {"word": word, "search_target": target,
|
||||
@ -962,13 +986,17 @@ class PixivAppAPI():
|
||||
params = {"illust_id": illust_id}
|
||||
return self._call("/v1/ugoira/metadata", params)["ugoira_metadata"]
|
||||
|
||||
def _call(self, endpoint, params=None):
|
||||
def _call(self, endpoint, params=None, parse=None):
|
||||
url = "https://app-api.pixiv.net" + endpoint
|
||||
|
||||
while True:
|
||||
self.login()
|
||||
response = self.extractor.request(url, params=params, fatal=False)
|
||||
data = response.json()
|
||||
|
||||
if parse:
|
||||
data = parse(response)
|
||||
else:
|
||||
data = response.json()
|
||||
|
||||
if "error" not in data:
|
||||
return data
|
||||
|
@ -23,6 +23,10 @@ class PoipikuExtractor(Extractor):
|
||||
archive_fmt = "{post_id}_{num}"
|
||||
request_interval = (0.5, 1.5)
|
||||
|
||||
def _init(self):
|
||||
self.cookies.set(
|
||||
"POIPIKU_CONTENTS_VIEW_MODE", "1", domain="poipiku.com")
|
||||
|
||||
def items(self):
|
||||
password = self.config("password", "")
|
||||
|
||||
|
@ -143,6 +143,9 @@ class PornhubGifExtractor(PornhubExtractor):
|
||||
"url" : extr('"contentUrl": "', '"'),
|
||||
"date" : text.parse_datetime(
|
||||
extr('"uploadDate": "', '"'), "%Y-%m-%d"),
|
||||
"viewkey" : extr('From this video: '
|
||||
'<a href="/view_video.php?viewkey=', '"'),
|
||||
"timestamp": extr('lass="directLink tstamp" rel="nofollow">', '<'),
|
||||
"user" : text.remove_html(extr("Created by:", "</div>")),
|
||||
}
|
||||
|
||||
|
@ -35,10 +35,7 @@ class ReadcomiconlineBase():
|
||||
self.log.warning(
|
||||
"Redirect to \n%s\nVisit this URL in your browser, solve "
|
||||
"the CAPTCHA, and press ENTER to continue", response.url)
|
||||
try:
|
||||
input()
|
||||
except (EOFError, OSError):
|
||||
pass
|
||||
self.input()
|
||||
else:
|
||||
raise exception.StopExtraction(
|
||||
"Redirect to \n%s\nVisit this URL in your browser and "
|
||||
|
@ -74,8 +74,8 @@ class RedditExtractor(Extractor):
|
||||
yield Message.Url, url, submission
|
||||
|
||||
elif "gallery_data" in media:
|
||||
for submission["num"], url in enumerate(
|
||||
self._extract_gallery(media), 1):
|
||||
for url in self._extract_gallery(media):
|
||||
submission["num"] += 1
|
||||
text.nameext_from_url(url, submission)
|
||||
yield Message.Url, url, submission
|
||||
|
||||
@ -99,7 +99,10 @@ class RedditExtractor(Extractor):
|
||||
urls.append((url, submission))
|
||||
for comment in comments:
|
||||
html = comment["body_html"] or ""
|
||||
if ' href="' in html:
|
||||
href = (' href="' in html)
|
||||
media = ("media_metadata" in comment)
|
||||
|
||||
if media or href:
|
||||
comment["date"] = text.parse_timestamp(
|
||||
comment["created_utc"])
|
||||
if submission:
|
||||
@ -107,6 +110,14 @@ class RedditExtractor(Extractor):
|
||||
data["comment"] = comment
|
||||
else:
|
||||
data = comment
|
||||
|
||||
if media:
|
||||
for embed in self._extract_embed(comment):
|
||||
submission["num"] += 1
|
||||
text.nameext_from_url(embed, submission)
|
||||
yield Message.Url, embed, submission
|
||||
|
||||
if href:
|
||||
for url in text.extract_iter(html, ' href="', '"'):
|
||||
urls.append((url, data))
|
||||
|
||||
@ -118,6 +129,7 @@ class RedditExtractor(Extractor):
|
||||
if url.startswith((
|
||||
"https://www.reddit.com/message/compose",
|
||||
"https://reddit.com/message/compose",
|
||||
"https://preview.redd.it/",
|
||||
)):
|
||||
continue
|
||||
|
||||
@ -172,6 +184,27 @@ class RedditExtractor(Extractor):
|
||||
submission["id"], item["media_id"])
|
||||
self.log.debug(src)
|
||||
|
||||
def _extract_embed(self, submission):
|
||||
meta = submission["media_metadata"]
|
||||
if not meta:
|
||||
return
|
||||
|
||||
for mid, data in meta.items():
|
||||
if data["status"] != "valid" or "s" not in data:
|
||||
self.log.warning(
|
||||
"embed %s: skipping item %s (status: %s)",
|
||||
submission["id"], mid, data.get("status"))
|
||||
continue
|
||||
src = data["s"]
|
||||
url = src.get("u") or src.get("gif") or src.get("mp4")
|
||||
if url:
|
||||
yield url.partition("?")[0].replace("/preview.", "/i.", 1)
|
||||
else:
|
||||
self.log.error(
|
||||
"embed %s: unable to fetch download URL for item %s",
|
||||
submission["id"], mid)
|
||||
self.log.debug(src)
|
||||
|
||||
def _extract_video_ytdl(self, submission):
|
||||
return "https://www.reddit.com" + submission["permalink"]
|
||||
|
||||
@ -191,6 +224,8 @@ class RedditExtractor(Extractor):
|
||||
try:
|
||||
if "reddit_video_preview" in post["preview"]:
|
||||
video = post["preview"]["reddit_video_preview"]
|
||||
if "fallback_url" in video:
|
||||
yield video["fallback_url"]
|
||||
if "dash_url" in video:
|
||||
yield "ytdl:" + video["dash_url"]
|
||||
if "hls_url" in video:
|
||||
@ -200,6 +235,12 @@ class RedditExtractor(Extractor):
|
||||
|
||||
try:
|
||||
for image in post["preview"]["images"]:
|
||||
variants = image.get("variants")
|
||||
if variants:
|
||||
if "gif" in variants:
|
||||
yield variants["gif"]["source"]["url"]
|
||||
if "mp4" in variants:
|
||||
yield variants["mp4"]["source"]["url"]
|
||||
yield image["source"]["url"]
|
||||
except Exception as exc:
|
||||
self.log.debug("%s: %s", exc.__class__.__name__, exc)
|
||||
@ -446,14 +487,14 @@ class RedditAPI():
|
||||
|
||||
remaining = response.headers.get("x-ratelimit-remaining")
|
||||
if remaining and float(remaining) < 2:
|
||||
if self._warn_429:
|
||||
self._warn_429 = False
|
||||
self.log.warning("API rate limit exceeded")
|
||||
if self._warn_429 and self.client_id == self.CLIENT_ID:
|
||||
self.log.info(
|
||||
"Register your own OAuth application and use its "
|
||||
"credentials to prevent this error: "
|
||||
"https://github.com/mikf/gallery-dl/blob/master"
|
||||
"/docs/configuration.rst"
|
||||
"#extractorredditclient-id--user-agent")
|
||||
"https://gdl-org.github.io/docs/configuration.html"
|
||||
"#extractor-reddit-client-id-user-agent")
|
||||
self._warn_429 = False
|
||||
self.extractor.wait(
|
||||
seconds=response.headers["x-ratelimit-reset"])
|
||||
continue
|
||||
|
@ -26,10 +26,10 @@ class SkebExtractor(Extractor):
|
||||
def _init(self):
|
||||
self.thumbnails = self.config("thumbnails", False)
|
||||
self.article = self.config("article", False)
|
||||
self.headers = {
|
||||
"Accept" : "application/json, text/plain, */*",
|
||||
"Authorization": "Bearer null",
|
||||
}
|
||||
self.headers = {"Accept": "application/json, text/plain, */*"}
|
||||
|
||||
if "Authorization" not in self.session.headers:
|
||||
self.headers["Authorization"] = "Bearer null"
|
||||
|
||||
def request(self, url, **kwargs):
|
||||
while True:
|
||||
@ -55,6 +55,12 @@ class SkebExtractor(Extractor):
|
||||
url = file["file_url"]
|
||||
yield Message.Url, url, text.nameext_from_url(url, post)
|
||||
|
||||
def _items_users(self):
|
||||
base = self.root + "/@"
|
||||
for user in self.users():
|
||||
user["_extractor"] = SkebUserExtractor
|
||||
yield Message.Queue, base + user["screen_name"], user
|
||||
|
||||
def posts(self):
|
||||
"""Return post number"""
|
||||
|
||||
@ -83,6 +89,20 @@ class SkebExtractor(Extractor):
|
||||
return
|
||||
params["offset"] += 30
|
||||
|
||||
def _pagination_users(self, endpoint, params):
|
||||
url = "{}/api{}".format(self.root, endpoint)
|
||||
params["offset"] = 0
|
||||
params["limit"] = 90
|
||||
|
||||
while True:
|
||||
data = self.request(
|
||||
url, params=params, headers=self.headers).json()
|
||||
yield from data
|
||||
|
||||
if len(data) < params["limit"]:
|
||||
return
|
||||
params["offset"] += params["limit"]
|
||||
|
||||
def _get_post_data(self, user_name, post_num):
|
||||
url = "{}/api/users/{}/works/{}".format(
|
||||
self.root, user_name, post_num)
|
||||
@ -256,22 +276,23 @@ class SkebFollowingExtractor(SkebExtractor):
|
||||
pattern = r"(?:https?://)?skeb\.jp/@([^/?#]+)/following_creators"
|
||||
example = "https://skeb.jp/@USER/following_creators"
|
||||
|
||||
def items(self):
|
||||
for user in self.users():
|
||||
url = "{}/@{}".format(self.root, user["screen_name"])
|
||||
user["_extractor"] = SkebUserExtractor
|
||||
yield Message.Queue, url, user
|
||||
items = SkebExtractor._items_users
|
||||
|
||||
def users(self):
|
||||
url = "{}/api/users/{}/following_creators".format(
|
||||
self.root, self.user_name)
|
||||
params = {"sort": "date", "offset": 0, "limit": 90}
|
||||
endpoint = "/users/{}/following_creators".format(self.user_name)
|
||||
params = {"sort": "date"}
|
||||
return self._pagination_users(endpoint, params)
|
||||
|
||||
while True:
|
||||
data = self.request(
|
||||
url, params=params, headers=self.headers).json()
|
||||
yield from data
|
||||
|
||||
if len(data) < params["limit"]:
|
||||
return
|
||||
params["offset"] += params["limit"]
|
||||
class SkebFollowingUsersExtractor(SkebExtractor):
|
||||
"""Extractor for your followed users"""
|
||||
subcategory = "following-users"
|
||||
pattern = r"(?:https?://)?skeb\.jp/following_users()"
|
||||
example = "https://skeb.jp/following_users"
|
||||
|
||||
items = SkebExtractor._items_users
|
||||
|
||||
def users(self):
|
||||
endpoint = "/following_users"
|
||||
params = {}
|
||||
return self._pagination_users(endpoint, params)
|
||||
|
@ -163,6 +163,9 @@ class SteamgriddbAssetExtractor(SteamgriddbExtractor):
|
||||
def assets(self):
|
||||
endpoint = "/api/public/asset/" + self.asset_type + "/" + self.asset_id
|
||||
asset = self._call(endpoint)["asset"]
|
||||
if asset is None:
|
||||
raise exception.NotFoundError("asset ({}:{})".format(
|
||||
self.asset_type, self.asset_id))
|
||||
return (asset,)
|
||||
|
||||
|
||||
|
@ -175,7 +175,7 @@ class SubscribestarPostExtractor(SubscribestarExtractor):
|
||||
"author_id" : text.parse_int(extr('data-user-id="', '"')),
|
||||
"author_nick": text.unescape(extr('alt="', '"')),
|
||||
"date" : self._parse_datetime(extr(
|
||||
'class="section-subtitle">', '<')),
|
||||
'<span class="star_link-types">', '<')),
|
||||
"content" : (extr(
|
||||
'<div class="post-content', '<div class="post-uploads')
|
||||
.partition(">")[2]),
|
||||
|
@ -151,3 +151,18 @@ class TapasEpisodeExtractor(TapasExtractor):
|
||||
|
||||
def episode_ids(self):
|
||||
return (self.episode_id,)
|
||||
|
||||
|
||||
class TapasCreatorExtractor(TapasExtractor):
|
||||
subcategory = "creator"
|
||||
pattern = BASE_PATTERN + r"/(?!series|episode)([^/?#]+)"
|
||||
example = "https://tapas.io/CREATOR"
|
||||
|
||||
def items(self):
|
||||
url = "{}/{}/series".format(self.root, self.groups[0])
|
||||
page = self.request(url).text
|
||||
page = text.extr(page, '<ul class="content-list-wrap', "</ul>")
|
||||
|
||||
data = {"_extractor": TapasSeriesExtractor}
|
||||
for path in text.extract_iter(page, ' href="', '"'):
|
||||
yield Message.Queue, self.root + path, data
|
||||
|
@ -447,9 +447,9 @@ class TumblrAPI(oauth.OAuth1API):
|
||||
if api_key == self.API_KEY:
|
||||
self.log.info(
|
||||
"Register your own OAuth application and use its "
|
||||
"credentials to prevent this error: https://githu"
|
||||
"b.com/mikf/gallery-dl/blob/master/docs/configurat"
|
||||
"ion.rst#extractortumblrapi-key--api-secret")
|
||||
"credentials to prevent this error: "
|
||||
"https://gdl-org.github.io/docs/configuration.html"
|
||||
"#extractor-tumblr-api-key-api-secret")
|
||||
|
||||
if self.extractor.config("ratelimit") == "wait":
|
||||
self.extractor.wait(seconds=reset)
|
||||
|
@ -12,11 +12,12 @@ from .common import Extractor, Message
|
||||
from .. import text, util, exception
|
||||
from ..cache import cache, memcache
|
||||
import itertools
|
||||
import random
|
||||
import json
|
||||
import re
|
||||
|
||||
BASE_PATTERN = (r"(?:https?://)?(?:www\.|mobile\.)?"
|
||||
r"(?:(?:[fv]x)?twitter|(?:fixup)?x)\.com")
|
||||
r"(?:(?:[fv]x)?twitter|(?:fix(?:up|v))?x)\.com")
|
||||
|
||||
|
||||
class TwitterExtractor(Extractor):
|
||||
@ -243,8 +244,8 @@ class TwitterExtractor(Extractor):
|
||||
|
||||
# collect URLs from entities
|
||||
for url in tweet["entities"].get("urls") or ():
|
||||
url = url["expanded_url"]
|
||||
if "//twitpic.com/" not in url or "/photos/" in url:
|
||||
url = url.get("expanded_url") or url.get("url") or ""
|
||||
if not url or "//twitpic.com/" not in url or "/photos/" in url:
|
||||
continue
|
||||
if url.startswith("http:"):
|
||||
url = "https" + url[4:]
|
||||
@ -336,10 +337,20 @@ class TwitterExtractor(Extractor):
|
||||
urls = entities.get("urls")
|
||||
if urls:
|
||||
for url in urls:
|
||||
content = content.replace(url["url"], url["expanded_url"])
|
||||
try:
|
||||
content = content.replace(url["url"], url["expanded_url"])
|
||||
except KeyError:
|
||||
pass
|
||||
txt, _, tco = content.rpartition(" ")
|
||||
tdata["content"] = txt if tco.startswith("https://t.co/") else content
|
||||
|
||||
if "birdwatch_pivot" in tweet:
|
||||
try:
|
||||
tdata["birdwatch"] = \
|
||||
tweet["birdwatch_pivot"]["subtitle"]["text"]
|
||||
except KeyError:
|
||||
self.log.debug("Unable to extract 'birdwatch' note from %s",
|
||||
tweet["birdwatch_pivot"])
|
||||
if "in_reply_to_screen_name" in legacy:
|
||||
tdata["reply_to"] = legacy["in_reply_to_screen_name"]
|
||||
if "quoted_by" in legacy:
|
||||
@ -380,6 +391,7 @@ class TwitterExtractor(Extractor):
|
||||
"date" : text.parse_datetime(
|
||||
uget("created_at"), "%a %b %d %H:%M:%S %z %Y"),
|
||||
"verified" : uget("verified", False),
|
||||
"protected" : uget("protected", False),
|
||||
"profile_banner" : uget("profile_banner_url", ""),
|
||||
"profile_image" : uget(
|
||||
"profile_image_url_https", "").replace("_normal.", "."),
|
||||
@ -395,7 +407,10 @@ class TwitterExtractor(Extractor):
|
||||
urls = entities["description"].get("urls")
|
||||
if urls:
|
||||
for url in urls:
|
||||
descr = descr.replace(url["url"], url["expanded_url"])
|
||||
try:
|
||||
descr = descr.replace(url["url"], url["expanded_url"])
|
||||
except KeyError:
|
||||
pass
|
||||
udata["description"] = descr
|
||||
|
||||
if "url" in entities:
|
||||
@ -731,9 +746,10 @@ class TwitterEventExtractor(TwitterExtractor):
|
||||
|
||||
|
||||
class TwitterTweetExtractor(TwitterExtractor):
|
||||
"""Extractor for images from individual tweets"""
|
||||
"""Extractor for individual tweets"""
|
||||
subcategory = "tweet"
|
||||
pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
|
||||
pattern = (BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
|
||||
r"/?(?:$|\?|#|photo/)")
|
||||
example = "https://twitter.com/USER/status/12345"
|
||||
|
||||
def __init__(self, match):
|
||||
@ -810,6 +826,18 @@ class TwitterTweetExtractor(TwitterExtractor):
|
||||
return itertools.chain(buffer, tweets)
|
||||
|
||||
|
||||
class TwitterQuotesExtractor(TwitterExtractor):
|
||||
"""Extractor for quotes of a Tweet"""
|
||||
subcategory = "quotes"
|
||||
pattern = BASE_PATTERN + r"/(?:[^/?#]+|i/web)/status/(\d+)/quotes"
|
||||
example = "https://twitter.com/USER/status/12345/quotes"
|
||||
|
||||
def items(self):
|
||||
url = "{}/search?q=quoted_tweet_id:{}".format(self.root, self.user)
|
||||
data = {"_extractor": TwitterSearchExtractor}
|
||||
yield Message.Queue, url, data
|
||||
|
||||
|
||||
class TwitterAvatarExtractor(TwitterExtractor):
|
||||
subcategory = "avatar"
|
||||
filename_fmt = "avatar {date}.{extension}"
|
||||
@ -882,6 +910,7 @@ class TwitterAPI():
|
||||
|
||||
def __init__(self, extractor):
|
||||
self.extractor = extractor
|
||||
self.log = extractor.log
|
||||
|
||||
self.root = "https://twitter.com/i/api"
|
||||
self._nsfw_warning = True
|
||||
@ -1244,7 +1273,7 @@ class TwitterAPI():
|
||||
@cache(maxage=3600)
|
||||
def _guest_token(self):
|
||||
endpoint = "/1.1/guest/activate.json"
|
||||
self.extractor.log.info("Requesting guest token")
|
||||
self.log.info("Requesting guest token")
|
||||
return str(self._call(
|
||||
endpoint, None, "POST", False, "https://api.twitter.com",
|
||||
)["guest_token"])
|
||||
@ -1272,45 +1301,72 @@ class TwitterAPI():
|
||||
if csrf_token:
|
||||
self.headers["x-csrf-token"] = csrf_token
|
||||
|
||||
if response.status_code < 400:
|
||||
data = response.json()
|
||||
if not data.get("errors") or not any(
|
||||
(e.get("message") or "").lower().startswith("timeout")
|
||||
for e in data["errors"]):
|
||||
return data # success or non-timeout errors
|
||||
|
||||
msg = data["errors"][0].get("message") or "Unspecified"
|
||||
self.extractor.log.debug("Internal Twitter error: '%s'", msg)
|
||||
|
||||
if self.headers["x-twitter-auth-type"]:
|
||||
self.extractor.log.debug("Retrying API request")
|
||||
continue # retry
|
||||
|
||||
# fall through to "Login Required"
|
||||
response.status_code = 404
|
||||
|
||||
if response.status_code == 429:
|
||||
# rate limit exceeded
|
||||
if self.extractor.config("ratelimit") == "abort":
|
||||
raise exception.StopExtraction("Rate limit exceeded")
|
||||
|
||||
until = response.headers.get("x-rate-limit-reset")
|
||||
seconds = None if until else 60
|
||||
self.extractor.wait(until=until, seconds=seconds)
|
||||
remaining = int(response.headers.get("x-rate-limit-remaining", 6))
|
||||
if remaining < 6 and remaining <= random.randrange(1, 6):
|
||||
self._handle_ratelimit(response)
|
||||
continue
|
||||
|
||||
if response.status_code in (403, 404) and \
|
||||
try:
|
||||
data = response.json()
|
||||
except ValueError:
|
||||
data = {"errors": ({"message": response.text},)}
|
||||
|
||||
errors = data.get("errors")
|
||||
if not errors:
|
||||
return data
|
||||
|
||||
retry = False
|
||||
for error in errors:
|
||||
msg = error.get("message") or "Unspecified"
|
||||
self.log.debug("API error: '%s'", msg)
|
||||
|
||||
if "this account is temporarily locked" in msg:
|
||||
msg = "Account temporarily locked"
|
||||
if self.extractor.config("locked") != "wait":
|
||||
raise exception.AuthorizationError(msg)
|
||||
self.log.warning(msg)
|
||||
self.extractor.input("Press ENTER to retry.")
|
||||
retry = True
|
||||
|
||||
elif "Could not authenticate you" in msg:
|
||||
if not self.extractor.config("relogin", True):
|
||||
continue
|
||||
|
||||
username, password = self.extractor._get_auth_info()
|
||||
if not username:
|
||||
continue
|
||||
|
||||
_login_impl.invalidate(username)
|
||||
self.extractor.cookies_update(
|
||||
_login_impl(self.extractor, username, password))
|
||||
self.__init__(self.extractor)
|
||||
retry = True
|
||||
|
||||
elif msg.lower().startswith("timeout"):
|
||||
retry = True
|
||||
|
||||
if retry:
|
||||
if self.headers["x-twitter-auth-type"]:
|
||||
self.log.debug("Retrying API request")
|
||||
continue
|
||||
else:
|
||||
# fall through to "Login Required"
|
||||
response.status_code = 404
|
||||
|
||||
if response.status_code < 400:
|
||||
return data
|
||||
elif response.status_code in (403, 404) and \
|
||||
not self.headers["x-twitter-auth-type"]:
|
||||
raise exception.AuthorizationError("Login required")
|
||||
elif response.status_code == 429:
|
||||
self._handle_ratelimit(response)
|
||||
continue
|
||||
|
||||
# error
|
||||
try:
|
||||
data = response.json()
|
||||
errors = ", ".join(e["message"] for e in data["errors"])
|
||||
except ValueError:
|
||||
errors = response.text
|
||||
errors = ", ".join(e["message"] for e in errors)
|
||||
except Exception:
|
||||
errors = data.get("errors", "")
|
||||
pass
|
||||
|
||||
raise exception.StopExtraction(
|
||||
"%s %s (%s)", response.status_code, response.reason, errors)
|
||||
@ -1374,7 +1430,7 @@ class TwitterAPI():
|
||||
try:
|
||||
tweet = tweets[tweet_id]
|
||||
except KeyError:
|
||||
self.extractor.log.debug("Skipping %s (deleted)", tweet_id)
|
||||
self.log.debug("Skipping %s (deleted)", tweet_id)
|
||||
continue
|
||||
|
||||
if "retweeted_status_id_str" in tweet:
|
||||
@ -1606,8 +1662,10 @@ class TwitterAPI():
|
||||
variables["cursor"] = cursor
|
||||
|
||||
def _pagination_users(self, endpoint, variables, path=None):
|
||||
params = {"variables": None,
|
||||
"features" : self._json_dumps(self.features_pagination)}
|
||||
params = {
|
||||
"variables": None,
|
||||
"features" : self._json_dumps(self.features_pagination),
|
||||
}
|
||||
|
||||
while True:
|
||||
cursor = entry = None
|
||||
@ -1644,6 +1702,13 @@ class TwitterAPI():
|
||||
return
|
||||
variables["cursor"] = cursor
|
||||
|
||||
def _handle_ratelimit(self, response):
|
||||
if self.extractor.config("ratelimit") == "abort":
|
||||
raise exception.StopExtraction("Rate limit exceeded")
|
||||
|
||||
until = response.headers.get("x-rate-limit-reset")
|
||||
self.extractor.wait(until=until, seconds=None if until else 60)
|
||||
|
||||
def _process_tombstone(self, entry, tombstone):
|
||||
text = (tombstone.get("richText") or tombstone["text"])["text"]
|
||||
tweet_id = entry["entryId"].rpartition("-")[2]
|
||||
@ -1651,30 +1716,30 @@ class TwitterAPI():
|
||||
if text.startswith("Age-restricted"):
|
||||
if self._nsfw_warning:
|
||||
self._nsfw_warning = False
|
||||
self.extractor.log.warning('"%s"', text)
|
||||
self.log.warning('"%s"', text)
|
||||
|
||||
self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text)
|
||||
self.log.debug("Skipping %s ('%s')", tweet_id, text)
|
||||
|
||||
|
||||
@cache(maxage=365*86400, keyarg=1)
|
||||
def _login_impl(extr, username, password):
|
||||
|
||||
import re
|
||||
import random
|
||||
def process(data, params=None):
|
||||
response = extr.request(
|
||||
url, params=params, headers=headers, json=data,
|
||||
method="POST", fatal=None)
|
||||
|
||||
if re.fullmatch(r"[\w.%+-]+@[\w.-]+\.\w{2,}", username):
|
||||
extr.log.warning(
|
||||
"Login with email is no longer possible. "
|
||||
"You need to provide your username or phone number instead.")
|
||||
|
||||
def process(response):
|
||||
try:
|
||||
data = response.json()
|
||||
except ValueError:
|
||||
data = {"errors": ({"message": "Invalid response"},)}
|
||||
else:
|
||||
if response.status_code < 400:
|
||||
return data["flow_token"]
|
||||
try:
|
||||
return (data["flow_token"],
|
||||
data["subtasks"][0]["subtask_id"])
|
||||
except LookupError:
|
||||
pass
|
||||
|
||||
errors = []
|
||||
for error in data.get("errors") or ():
|
||||
@ -1683,9 +1748,13 @@ def _login_impl(extr, username, password):
|
||||
extr.log.debug(response.text)
|
||||
raise exception.AuthenticationError(", ".join(errors))
|
||||
|
||||
extr.cookies.clear()
|
||||
cookies = extr.cookies
|
||||
cookies.clear()
|
||||
api = TwitterAPI(extr)
|
||||
api._authenticate_guest()
|
||||
|
||||
url = "https://api.twitter.com/1.1/onboarding/task.json"
|
||||
params = {"flow_name": "login"}
|
||||
headers = api.headers
|
||||
|
||||
extr.log.info("Logging in as %s", username)
|
||||
@ -1742,31 +1811,18 @@ def _login_impl(extr, username, password):
|
||||
"web_modal": 1,
|
||||
},
|
||||
}
|
||||
url = "https://api.twitter.com/1.1/onboarding/task.json?flow_name=login"
|
||||
response = extr.request(url, method="POST", headers=headers, json=data)
|
||||
|
||||
data = {
|
||||
"flow_token": process(response),
|
||||
"subtask_inputs": [
|
||||
{
|
||||
"subtask_id": "LoginJsInstrumentationSubtask",
|
||||
flow_token, subtask = process(data, params)
|
||||
while not cookies.get("auth_token"):
|
||||
if subtask == "LoginJsInstrumentationSubtask":
|
||||
data = {
|
||||
"js_instrumentation": {
|
||||
"response": "{}",
|
||||
"link": "next_link",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
url = "https://api.twitter.com/1.1/onboarding/task.json"
|
||||
response = extr.request(
|
||||
url, method="POST", headers=headers, json=data, fatal=None)
|
||||
|
||||
# username
|
||||
data = {
|
||||
"flow_token": process(response),
|
||||
"subtask_inputs": [
|
||||
{
|
||||
"subtask_id": "LoginEnterUserIdentifierSSO",
|
||||
}
|
||||
elif subtask == "LoginEnterUserIdentifierSSO":
|
||||
data = {
|
||||
"settings_list": {
|
||||
"setting_responses": [
|
||||
{
|
||||
@ -1778,48 +1834,61 @@ def _login_impl(extr, username, password):
|
||||
],
|
||||
"link": "next_link",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
# url = "https://api.twitter.com/1.1/onboarding/task.json"
|
||||
extr.sleep(random.uniform(2.0, 4.0), "login (username)")
|
||||
response = extr.request(
|
||||
url, method="POST", headers=headers, json=data, fatal=None)
|
||||
|
||||
# password
|
||||
data = {
|
||||
"flow_token": process(response),
|
||||
"subtask_inputs": [
|
||||
{
|
||||
"subtask_id": "LoginEnterPassword",
|
||||
}
|
||||
elif subtask == "LoginEnterPassword":
|
||||
data = {
|
||||
"enter_password": {
|
||||
"password": password,
|
||||
"link": "next_link",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
# url = "https://api.twitter.com/1.1/onboarding/task.json"
|
||||
extr.sleep(random.uniform(2.0, 4.0), "login (password)")
|
||||
response = extr.request(
|
||||
url, method="POST", headers=headers, json=data, fatal=None)
|
||||
|
||||
# account duplication check ?
|
||||
data = {
|
||||
"flow_token": process(response),
|
||||
"subtask_inputs": [
|
||||
{
|
||||
"subtask_id": "AccountDuplicationCheck",
|
||||
}
|
||||
elif subtask == "LoginEnterAlternateIdentifierSubtask":
|
||||
alt = extr.input(
|
||||
"Alternate Identifier (username, email, phone number): ")
|
||||
data = {
|
||||
"enter_text": {
|
||||
"text": alt,
|
||||
"link": "next_link",
|
||||
},
|
||||
}
|
||||
elif subtask == "LoginTwoFactorAuthChallenge":
|
||||
data = {
|
||||
"enter_text": {
|
||||
"text": extr.input("2FA Token: "),
|
||||
"link": "next_link",
|
||||
},
|
||||
}
|
||||
elif subtask == "LoginAcid":
|
||||
data = {
|
||||
"enter_text": {
|
||||
"text": extr.input("Email Verification Code: "),
|
||||
"link": "next_link",
|
||||
},
|
||||
}
|
||||
elif subtask == "AccountDuplicationCheck":
|
||||
data = {
|
||||
"check_logged_in_account": {
|
||||
"link": "AccountDuplicationCheck_false",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
# url = "https://api.twitter.com/1.1/onboarding/task.json"
|
||||
response = extr.request(
|
||||
url, method="POST", headers=headers, json=data, fatal=None)
|
||||
process(response)
|
||||
}
|
||||
elif subtask == "ArkoseLogin":
|
||||
raise exception.AuthenticationError("Login requires CAPTCHA")
|
||||
elif subtask == "DenyLoginSubtask":
|
||||
raise exception.AuthenticationError("Login rejected as suspicious")
|
||||
elif subtask == "ArkoseLogin":
|
||||
raise exception.AuthenticationError("No auth token cookie")
|
||||
else:
|
||||
raise exception.StopExtraction("Unrecognized subtask %s", subtask)
|
||||
|
||||
inputs = {"subtask_id": subtask}
|
||||
inputs.update(data)
|
||||
data = {
|
||||
"flow_token": flow_token,
|
||||
"subtask_inputs": [inputs],
|
||||
}
|
||||
|
||||
extr.sleep(random.uniform(1.0, 3.0), "login ({})".format(subtask))
|
||||
flow_token, subtask = process(data)
|
||||
|
||||
return {
|
||||
cookie.name: cookie.value
|
||||
|
@ -26,17 +26,39 @@ class VipergirlsExtractor(Extractor):
|
||||
cookies_domain = ".vipergirls.to"
|
||||
cookies_names = ("vg_userid", "vg_password")
|
||||
|
||||
def _init(self):
|
||||
domain = self.config("domain")
|
||||
if domain:
|
||||
self.root = text.ensure_http_scheme(domain)
|
||||
|
||||
def items(self):
|
||||
self.login()
|
||||
posts = self.posts()
|
||||
|
||||
for post in self.posts():
|
||||
like = self.config("like")
|
||||
if like:
|
||||
user_hash = posts[0].get("hash")
|
||||
if len(user_hash) < 16:
|
||||
self.log.warning("Login required to like posts")
|
||||
like = False
|
||||
|
||||
posts = posts.iter("post")
|
||||
if self.page:
|
||||
util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
|
||||
|
||||
for post in posts:
|
||||
data = post.attrib
|
||||
data["thread_id"] = self.thread_id
|
||||
|
||||
yield Message.Directory, data
|
||||
|
||||
image = None
|
||||
for image in post:
|
||||
yield Message.Queue, image.attrib["main_url"], data
|
||||
|
||||
if image is not None and like:
|
||||
self.like(post, user_hash)
|
||||
|
||||
def login(self):
|
||||
if self.cookies_check(self.cookies_names):
|
||||
return
|
||||
@ -64,6 +86,17 @@ class VipergirlsExtractor(Extractor):
|
||||
return {cookie.name: cookie.value
|
||||
for cookie in response.cookies}
|
||||
|
||||
def like(self, post, user_hash):
|
||||
url = self.root + "/post_thanks.php"
|
||||
params = {
|
||||
"do" : "post_thanks_add",
|
||||
"p" : post.get("id"),
|
||||
"securitytoken": user_hash,
|
||||
}
|
||||
|
||||
with self.request(url, params=params, allow_redirects=False):
|
||||
pass
|
||||
|
||||
|
||||
class VipergirlsThreadExtractor(VipergirlsExtractor):
|
||||
"""Extractor for vipergirls threads"""
|
||||
@ -77,12 +110,7 @@ class VipergirlsThreadExtractor(VipergirlsExtractor):
|
||||
|
||||
def posts(self):
|
||||
url = "{}/vr.php?t={}".format(self.root, self.thread_id)
|
||||
root = ElementTree.fromstring(self.request(url).text)
|
||||
posts = root.iter("post")
|
||||
|
||||
if self.page:
|
||||
util.advance(posts, (text.parse_int(self.page[5:]) - 1) * 15)
|
||||
return posts
|
||||
return ElementTree.fromstring(self.request(url).text)
|
||||
|
||||
|
||||
class VipergirlsPostExtractor(VipergirlsExtractor):
|
||||
@ -95,8 +123,8 @@ class VipergirlsPostExtractor(VipergirlsExtractor):
|
||||
def __init__(self, match):
|
||||
VipergirlsExtractor.__init__(self, match)
|
||||
self.thread_id, self.post_id = match.groups()
|
||||
self.page = 0
|
||||
|
||||
def posts(self):
|
||||
url = "{}/vr.php?p={}".format(self.root, self.post_id)
|
||||
root = ElementTree.fromstring(self.request(url).text)
|
||||
return root.iter("post")
|
||||
return ElementTree.fromstring(self.request(url).text)
|
||||
|
@ -46,6 +46,8 @@ class VscoExtractor(Extractor):
|
||||
url = "https://image-{}.vsco.co/{}".format(cdn, path)
|
||||
elif cdn.isdecimal():
|
||||
url = "https://image.vsco.co/" + base
|
||||
elif img["responsive_url"].startswith("http"):
|
||||
url = img["responsive_url"]
|
||||
else:
|
||||
url = "https://" + img["responsive_url"]
|
||||
|
||||
@ -238,6 +240,34 @@ class VscoSpacesExtractor(VscoExtractor):
|
||||
yield Message.Queue, url, space
|
||||
|
||||
|
||||
class VscoAvatarExtractor(VscoExtractor):
|
||||
"""Extractor for vsco.co user avatars"""
|
||||
subcategory = "avatar"
|
||||
pattern = USER_PATTERN + r"/avatar"
|
||||
example = "https://vsco.co/USER/avatar"
|
||||
|
||||
def images(self):
|
||||
url = "{}/{}/gallery".format(self.root, self.user)
|
||||
page = self.request(url).text
|
||||
piid = text.extr(page, '"profileImageId":"', '"')
|
||||
|
||||
url = "https://im.vsco.co/" + piid
|
||||
# needs GET request, since HEAD does not redirect to full URL
|
||||
response = self.request(url, allow_redirects=False)
|
||||
|
||||
return ({
|
||||
"_id" : piid,
|
||||
"is_video" : False,
|
||||
"grid_name" : "",
|
||||
"upload_date" : 0,
|
||||
"responsive_url": response.headers["Location"],
|
||||
"video_url" : "",
|
||||
"image_meta" : None,
|
||||
"width" : 0,
|
||||
"height" : 0,
|
||||
},)
|
||||
|
||||
|
||||
class VscoImageExtractor(VscoExtractor):
|
||||
"""Extractor for individual images on vsco.co"""
|
||||
subcategory = "image"
|
||||
|
@ -50,7 +50,7 @@ class WarosuThreadExtractor(Extractor):
|
||||
title = text.unescape(text.extr(page, "class=filetitle>", "<"))
|
||||
return {
|
||||
"board" : self.board,
|
||||
"board_name": boardname.rpartition(" - ")[2],
|
||||
"board_name": boardname.split(" - ")[1],
|
||||
"thread" : self.thread,
|
||||
"title" : title,
|
||||
}
|
||||
@ -64,8 +64,7 @@ class WarosuThreadExtractor(Extractor):
|
||||
def parse(self, post):
|
||||
"""Build post object by extracting data from an HTML post"""
|
||||
data = self._extract_post(post)
|
||||
if "<span> File:" in post:
|
||||
self._extract_image(post, data)
|
||||
if "<span> File:" in post and self._extract_image(post, data):
|
||||
part = data["image"].rpartition("/")[2]
|
||||
data["tim"], _, data["extension"] = part.partition(".")
|
||||
data["ext"] = "." + data["extension"]
|
||||
@ -91,6 +90,11 @@ class WarosuThreadExtractor(Extractor):
|
||||
"", "<").rstrip().rpartition(".")[0])
|
||||
extr("<br>", "")
|
||||
|
||||
data["image"] = url = extr("<a href=", ">")
|
||||
if url[0] == "/":
|
||||
data["image"] = self.root + url
|
||||
url = extr("<a href=", ">")
|
||||
if url:
|
||||
if url[0] == "/":
|
||||
data["image"] = self.root + url
|
||||
else:
|
||||
data["image"] = url
|
||||
return True
|
||||
return False
|
||||
|
@ -30,9 +30,9 @@ class WeiboExtractor(Extractor):
|
||||
self._prefix, self.user = match.groups()
|
||||
|
||||
def _init(self):
|
||||
self.retweets = self.config("retweets", True)
|
||||
self.videos = self.config("videos", True)
|
||||
self.livephoto = self.config("livephoto", True)
|
||||
self.retweets = self.config("retweets", False)
|
||||
self.videos = self.config("videos", True)
|
||||
self.gifs = self.config("gifs", True)
|
||||
self.gifs_video = (self.gifs == "video")
|
||||
|
||||
@ -59,15 +59,25 @@ class WeiboExtractor(Extractor):
|
||||
|
||||
for status in self.statuses():
|
||||
|
||||
files = []
|
||||
if self.retweets and "retweeted_status" in status:
|
||||
if "ori_mid" in status and not self.retweets:
|
||||
self.log.debug("Skipping %s (快转 retweet)", status["id"])
|
||||
continue
|
||||
|
||||
if "retweeted_status" in status:
|
||||
if not self.retweets:
|
||||
self.log.debug("Skipping %s (retweet)", status["id"])
|
||||
continue
|
||||
|
||||
# videos of the original post are in status
|
||||
# images of the original post are in status["retweeted_status"]
|
||||
files = []
|
||||
self._extract_status(status, files)
|
||||
self._extract_status(status["retweeted_status"], files)
|
||||
|
||||
if original_retweets:
|
||||
status = status["retweeted_status"]
|
||||
self._extract_status(status, files)
|
||||
else:
|
||||
self._extract_status(status, files)
|
||||
self._extract_status(status["retweeted_status"], files)
|
||||
else:
|
||||
files = []
|
||||
self._extract_status(status, files)
|
||||
|
||||
status["date"] = text.parse_datetime(
|
||||
@ -118,7 +128,7 @@ class WeiboExtractor(Extractor):
|
||||
append(pic["largest"].copy())
|
||||
|
||||
file = {"url": pic["video"]}
|
||||
file["filehame"], _, file["extension"] = \
|
||||
file["filename"], _, file["extension"] = \
|
||||
pic["video"].rpartition("%2F")[2].rpartition(".")
|
||||
append(file)
|
||||
|
||||
@ -176,23 +186,34 @@ class WeiboExtractor(Extractor):
|
||||
|
||||
data = data["data"]
|
||||
statuses = data["list"]
|
||||
if not statuses:
|
||||
return
|
||||
yield from statuses
|
||||
|
||||
if "next_cursor" in data: # videos, newvideo
|
||||
if data["next_cursor"] == -1:
|
||||
# videos, newvideo
|
||||
cursor = data.get("next_cursor")
|
||||
if cursor:
|
||||
if cursor == -1:
|
||||
return
|
||||
params["cursor"] = data["next_cursor"]
|
||||
elif "page" in params: # home, article
|
||||
params["page"] += 1
|
||||
elif data["since_id"]: # album
|
||||
params["cursor"] = cursor
|
||||
continue
|
||||
|
||||
# album
|
||||
since_id = data.get("since_id")
|
||||
if since_id:
|
||||
params["sinceid"] = data["since_id"]
|
||||
else: # feed, last album page
|
||||
try:
|
||||
params["since_id"] = statuses[-1]["id"] - 1
|
||||
except KeyError:
|
||||
continue
|
||||
|
||||
# home, article
|
||||
if "page" in params:
|
||||
if not statuses:
|
||||
return
|
||||
params["page"] += 1
|
||||
continue
|
||||
|
||||
# feed, last album page
|
||||
try:
|
||||
params["since_id"] = statuses[-1]["id"] - 1
|
||||
except LookupError:
|
||||
return
|
||||
|
||||
def _sina_visitor_system(self, response):
|
||||
self.log.info("Sina Visitor System")
|
||||
|
@ -27,9 +27,9 @@ class WikimediaExtractor(BaseExtractor):
|
||||
|
||||
if self.category == "wikimedia":
|
||||
self.category = self.root.split(".")[-2]
|
||||
elif self.category == "fandom":
|
||||
self.category = \
|
||||
"fandom-" + self.root.partition(".")[0].rpartition("/")[2]
|
||||
elif self.category in ("fandom", "wikigg"):
|
||||
self.category = "{}-{}".format(
|
||||
self.category, self.root.partition(".")[0].rpartition("/")[2])
|
||||
|
||||
if path.startswith("wiki/"):
|
||||
path = path[5:]
|
||||
@ -69,14 +69,18 @@ class WikimediaExtractor(BaseExtractor):
|
||||
|
||||
def items(self):
|
||||
for info in self._pagination(self.params):
|
||||
image = info["imageinfo"][0]
|
||||
try:
|
||||
image = info["imageinfo"][0]
|
||||
except LookupError:
|
||||
self.log.debug("Missing 'imageinfo' for %s", info)
|
||||
continue
|
||||
|
||||
image["metadata"] = {
|
||||
m["name"]: m["value"]
|
||||
for m in image["metadata"]}
|
||||
for m in image["metadata"] or ()}
|
||||
image["commonmetadata"] = {
|
||||
m["name"]: m["value"]
|
||||
for m in image["commonmetadata"]}
|
||||
for m in image["commonmetadata"] or ()}
|
||||
|
||||
filename = image["canonicaltitle"]
|
||||
image["filename"], _, image["extension"] = \
|
||||
@ -148,6 +152,10 @@ BASE_PATTERN = WikimediaExtractor.update({
|
||||
"root": None,
|
||||
"pattern": r"[\w-]+\.fandom\.com",
|
||||
},
|
||||
"wikigg": {
|
||||
"root": None,
|
||||
"pattern": r"\w+\.wiki\.gg",
|
||||
},
|
||||
"mariowiki": {
|
||||
"root": "https://www.mariowiki.com",
|
||||
"pattern": r"(?:www\.)?mariowiki\.com",
|
||||
|
@ -243,13 +243,12 @@ class TemplateFStringFormatter(FStringFormatter):
|
||||
|
||||
|
||||
def parse_field_name(field_name):
|
||||
if field_name[0] == "'":
|
||||
return "_lit", (operator.itemgetter(field_name[1:-1]),)
|
||||
|
||||
first, rest = _string.formatter_field_name_split(field_name)
|
||||
funcs = []
|
||||
|
||||
if first[0] == "'":
|
||||
funcs.append(operator.itemgetter(first[1:-1]))
|
||||
first = "_lit"
|
||||
|
||||
for is_attr, key in rest:
|
||||
if is_attr:
|
||||
func = operator.attrgetter
|
||||
@ -375,18 +374,18 @@ def _parse_offset(format_spec, default):
|
||||
fmt = _build_format_func(format_spec, default)
|
||||
|
||||
if not offset or offset == "local":
|
||||
is_dst = time.daylight and time.localtime().tm_isdst > 0
|
||||
offset = -(time.altzone if is_dst else time.timezone)
|
||||
def off(dt):
|
||||
local = time.localtime(util.datetime_to_timestamp(dt))
|
||||
return fmt(dt + datetime.timedelta(0, local.tm_gmtoff))
|
||||
else:
|
||||
hours, _, minutes = offset.partition(":")
|
||||
offset = 3600 * int(hours)
|
||||
if minutes:
|
||||
offset += 60 * (int(minutes) if offset > 0 else -int(minutes))
|
||||
offset = datetime.timedelta(0, offset)
|
||||
|
||||
offset = datetime.timedelta(seconds=offset)
|
||||
|
||||
def off(obj):
|
||||
return fmt(obj + offset)
|
||||
def off(obj):
|
||||
return fmt(obj + offset)
|
||||
return off
|
||||
|
||||
|
||||
|
@ -11,10 +11,23 @@ import errno
|
||||
import logging
|
||||
import functools
|
||||
import collections
|
||||
from . import extractor, downloader, postprocessor
|
||||
from . import config, text, util, path, formatter, output, exception, version
|
||||
|
||||
from . import (
|
||||
extractor,
|
||||
downloader,
|
||||
postprocessor,
|
||||
archive,
|
||||
config,
|
||||
exception,
|
||||
formatter,
|
||||
output,
|
||||
path,
|
||||
text,
|
||||
util,
|
||||
version,
|
||||
)
|
||||
from .extractor.message import Message
|
||||
from .output import stdout_write
|
||||
stdout_write = output.stdout_write
|
||||
|
||||
|
||||
class Job():
|
||||
@ -423,6 +436,8 @@ class DownloadJob(Job):
|
||||
|
||||
def handle_finalize(self):
|
||||
if self.archive:
|
||||
if not self.status:
|
||||
self.archive.finalize()
|
||||
self.archive.close()
|
||||
|
||||
pathfmt = self.pathfmt
|
||||
@ -453,9 +468,12 @@ class DownloadJob(Job):
|
||||
for callback in self.hooks["skip"]:
|
||||
callback(pathfmt)
|
||||
if self._skipexc:
|
||||
self._skipcnt += 1
|
||||
if self._skipcnt >= self._skipmax:
|
||||
raise self._skipexc()
|
||||
if not self._skipftr or self._skipftr(pathfmt.kwdict):
|
||||
self._skipcnt += 1
|
||||
if self._skipcnt >= self._skipmax:
|
||||
raise self._skipexc()
|
||||
else:
|
||||
self._skipcnt = 0
|
||||
|
||||
def download(self, url):
|
||||
"""Download 'url'"""
|
||||
@ -507,23 +525,28 @@ class DownloadJob(Job):
|
||||
# monkey-patch method to do nothing and always return True
|
||||
self.download = pathfmt.fix_extension
|
||||
|
||||
archive = cfg("archive")
|
||||
if archive:
|
||||
archive = util.expand_path(archive)
|
||||
archive_path = cfg("archive")
|
||||
if archive_path:
|
||||
archive_path = util.expand_path(archive_path)
|
||||
archive_format = (cfg("archive-prefix", extr.category) +
|
||||
cfg("archive-format", extr.archive_fmt))
|
||||
archive_pragma = (cfg("archive-pragma"))
|
||||
try:
|
||||
if "{" in archive:
|
||||
archive = formatter.parse(archive).format_map(kwdict)
|
||||
self.archive = util.DownloadArchive(
|
||||
archive, archive_format, archive_pragma)
|
||||
if "{" in archive_path:
|
||||
archive_path = formatter.parse(
|
||||
archive_path).format_map(kwdict)
|
||||
if cfg("archive-mode") == "memory":
|
||||
archive_cls = archive.DownloadArchiveMemory
|
||||
else:
|
||||
archive_cls = archive.DownloadArchive
|
||||
self.archive = archive_cls(
|
||||
archive_path, archive_format, archive_pragma)
|
||||
except Exception as exc:
|
||||
extr.log.warning(
|
||||
"Failed to open download archive at '%s' (%s: %s)",
|
||||
archive, exc.__class__.__name__, exc)
|
||||
archive_path, exc.__class__.__name__, exc)
|
||||
else:
|
||||
extr.log.debug("Using download archive '%s'", archive)
|
||||
extr.log.debug("Using download archive '%s'", archive_path)
|
||||
|
||||
skip = cfg("skip", True)
|
||||
if skip:
|
||||
@ -539,6 +562,12 @@ class DownloadJob(Job):
|
||||
elif skip == "exit":
|
||||
self._skipexc = SystemExit
|
||||
self._skipmax = text.parse_int(smax)
|
||||
|
||||
skip_filter = cfg("skip-filter")
|
||||
if skip_filter:
|
||||
self._skipftr = util.compile_expression(skip_filter)
|
||||
else:
|
||||
self._skipftr = None
|
||||
else:
|
||||
# monkey-patch methods to always return False
|
||||
pathfmt.exists = lambda x=None: False
|
||||
|
@ -249,6 +249,12 @@ def build_parser():
|
||||
action="store_const", const=logging.ERROR,
|
||||
help="Activate quiet mode",
|
||||
)
|
||||
output.add_argument(
|
||||
"-w", "--warning",
|
||||
dest="loglevel",
|
||||
action="store_const", const=logging.WARNING,
|
||||
help="Print only warnings and errors",
|
||||
)
|
||||
output.add_argument(
|
||||
"-v", "--verbose",
|
||||
dest="loglevel",
|
||||
@ -319,6 +325,11 @@ def build_parser():
|
||||
help=("Write downloaded intermediary pages to files "
|
||||
"in the current directory to debug problems"),
|
||||
)
|
||||
output.add_argument(
|
||||
"--no-colors",
|
||||
dest="colors", action="store_false",
|
||||
help=("Do not emit ANSI color codes in output"),
|
||||
)
|
||||
|
||||
downloader = parser.add_argument_group("Downloader Options")
|
||||
downloader.add_argument(
|
||||
|
@ -15,12 +15,40 @@ import unicodedata
|
||||
from . import config, util, formatter
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Globals
|
||||
|
||||
COLORS = not os.environ.get("NO_COLOR")
|
||||
COLORS_DEFAULT = {
|
||||
"success": "1;32",
|
||||
"skip" : "2",
|
||||
"debug" : "0;37",
|
||||
"info" : "1;37",
|
||||
"warning": "1;33",
|
||||
"error" : "1;31",
|
||||
} if COLORS else {}
|
||||
|
||||
if util.WINDOWS:
|
||||
ANSI = COLORS and os.environ.get("TERM") == "ANSI"
|
||||
OFFSET = 1
|
||||
CHAR_SKIP = "# "
|
||||
CHAR_SUCCESS = "* "
|
||||
CHAR_ELLIPSIES = "..."
|
||||
else:
|
||||
ANSI = COLORS
|
||||
OFFSET = 0
|
||||
CHAR_SKIP = "# "
|
||||
CHAR_SUCCESS = "✔ "
|
||||
CHAR_ELLIPSIES = "…"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Logging
|
||||
|
||||
LOG_FORMAT = "[{name}][{levelname}] {message}"
|
||||
LOG_FORMAT_DATE = "%Y-%m-%d %H:%M:%S"
|
||||
LOG_LEVEL = logging.INFO
|
||||
LOG_LEVELS = ("debug", "info", "warning", "error")
|
||||
|
||||
|
||||
class Logger(logging.Logger):
|
||||
@ -129,7 +157,7 @@ class Formatter(logging.Formatter):
|
||||
|
||||
def __init__(self, fmt, datefmt):
|
||||
if isinstance(fmt, dict):
|
||||
for key in ("debug", "info", "warning", "error"):
|
||||
for key in LOG_LEVELS:
|
||||
value = fmt[key] if key in fmt else LOG_FORMAT
|
||||
fmt[key] = (formatter.parse(value).format_map,
|
||||
"{asctime" in value)
|
||||
@ -187,16 +215,36 @@ def configure_logging(loglevel):
|
||||
# stream logging handler
|
||||
handler = root.handlers[0]
|
||||
opts = config.interpolate(("output",), "log")
|
||||
|
||||
colors = config.interpolate(("output",), "colors")
|
||||
if colors is None:
|
||||
colors = COLORS_DEFAULT
|
||||
if colors and not opts:
|
||||
opts = LOG_FORMAT
|
||||
|
||||
if opts:
|
||||
if isinstance(opts, str):
|
||||
opts = {"format": opts}
|
||||
if handler.level == LOG_LEVEL and "level" in opts:
|
||||
logfmt = opts
|
||||
opts = {}
|
||||
elif "format" in opts:
|
||||
logfmt = opts["format"]
|
||||
else:
|
||||
logfmt = LOG_FORMAT
|
||||
|
||||
if not isinstance(logfmt, dict) and colors:
|
||||
ansifmt = "\033[{}m{}\033[0m".format
|
||||
lf = {}
|
||||
for level in LOG_LEVELS:
|
||||
c = colors.get(level)
|
||||
lf[level] = ansifmt(c, logfmt) if c else logfmt
|
||||
logfmt = lf
|
||||
|
||||
handler.setFormatter(Formatter(
|
||||
logfmt, opts.get("format-date", LOG_FORMAT_DATE)))
|
||||
|
||||
if "level" in opts and handler.level == LOG_LEVEL:
|
||||
handler.setLevel(opts["level"])
|
||||
if "format" in opts or "format-date" in opts:
|
||||
handler.setFormatter(Formatter(
|
||||
opts.get("format", LOG_FORMAT),
|
||||
opts.get("format-date", LOG_FORMAT_DATE),
|
||||
))
|
||||
|
||||
if minlevel > handler.level:
|
||||
minlevel = handler.level
|
||||
|
||||
@ -307,9 +355,12 @@ def select():
|
||||
mode = config.get(("output",), "mode")
|
||||
|
||||
if mode is None or mode == "auto":
|
||||
if hasattr(sys.stdout, "isatty") and sys.stdout.isatty():
|
||||
output = ColorOutput() if ANSI else TerminalOutput()
|
||||
else:
|
||||
try:
|
||||
if sys.stdout.isatty():
|
||||
output = ColorOutput() if ANSI else TerminalOutput()
|
||||
else:
|
||||
output = PipeOutput()
|
||||
except Exception:
|
||||
output = PipeOutput()
|
||||
elif isinstance(mode, dict):
|
||||
output = CustomOutput(mode)
|
||||
@ -388,7 +439,10 @@ class ColorOutput(TerminalOutput):
|
||||
def __init__(self):
|
||||
TerminalOutput.__init__(self)
|
||||
|
||||
colors = config.get(("output",), "colors") or {}
|
||||
colors = config.interpolate(("output",), "colors")
|
||||
if colors is None:
|
||||
colors = COLORS_DEFAULT
|
||||
|
||||
self.color_skip = "\033[{}m".format(
|
||||
colors.get("skip", "2"))
|
||||
self.color_success = "\r\033[{}m".format(
|
||||
@ -514,17 +568,3 @@ def shorten_string_eaw(txt, limit, sep="…", cache=EAWCache()):
|
||||
right -= 1
|
||||
|
||||
return txt[:left] + sep + txt[right+1:]
|
||||
|
||||
|
||||
if util.WINDOWS:
|
||||
ANSI = os.environ.get("TERM") == "ANSI"
|
||||
OFFSET = 1
|
||||
CHAR_SKIP = "# "
|
||||
CHAR_SUCCESS = "* "
|
||||
CHAR_ELLIPSIES = "..."
|
||||
else:
|
||||
ANSI = True
|
||||
OFFSET = 0
|
||||
CHAR_SKIP = "# "
|
||||
CHAR_SUCCESS = "✔ "
|
||||
CHAR_ELLIPSIES = "…"
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
"""Common classes and constants used by postprocessor modules."""
|
||||
|
||||
from .. import util, formatter
|
||||
from .. import util, formatter, archive
|
||||
|
||||
|
||||
class PostProcessor():
|
||||
@ -22,30 +22,31 @@ class PostProcessor():
|
||||
return self.__class__.__name__
|
||||
|
||||
def _init_archive(self, job, options, prefix=None):
|
||||
archive = options.get("archive")
|
||||
if archive:
|
||||
archive_path = options.get("archive")
|
||||
if archive_path:
|
||||
extr = job.extractor
|
||||
archive = util.expand_path(archive)
|
||||
archive_path = util.expand_path(archive_path)
|
||||
if not prefix:
|
||||
prefix = "_" + self.name.upper() + "_"
|
||||
archive_format = (
|
||||
options.get("archive-prefix", extr.category) +
|
||||
options.get("archive-format", prefix + extr.archive_fmt))
|
||||
try:
|
||||
if "{" in archive:
|
||||
archive = formatter.parse(archive).format_map(
|
||||
if "{" in archive_path:
|
||||
archive_path = formatter.parse(archive_path).format_map(
|
||||
job.pathfmt.kwdict)
|
||||
self.archive = util.DownloadArchive(
|
||||
archive, archive_format,
|
||||
self.archive = archive.DownloadArchive(
|
||||
archive_path, archive_format,
|
||||
options.get("archive-pragma"),
|
||||
"_archive_" + self.name)
|
||||
except Exception as exc:
|
||||
self.log.warning(
|
||||
"Failed to open %s archive at '%s' (%s: %s)",
|
||||
self.name, archive, exc.__class__.__name__, exc)
|
||||
self.name, archive_path, exc.__class__.__name__, exc)
|
||||
else:
|
||||
self.log.debug("Using %s archive '%s'", self.name, archive)
|
||||
self.log.debug(
|
||||
"Using %s archive '%s'", self.name, archive_path)
|
||||
return True
|
||||
else:
|
||||
self.archive = None
|
||||
|
||||
self.archive = None
|
||||
return False
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
from .common import PostProcessor
|
||||
from .. import util, formatter
|
||||
import subprocess
|
||||
import os
|
||||
import re
|
||||
|
||||
@ -80,14 +79,14 @@ class ExecPP(PostProcessor):
|
||||
|
||||
def _exec(self, args, shell):
|
||||
self.log.debug("Running '%s'", args)
|
||||
retcode = subprocess.Popen(args, shell=shell).wait()
|
||||
retcode = util.Popen(args, shell=shell).wait()
|
||||
if retcode:
|
||||
self.log.warning("'%s' returned with non-zero exit status (%d)",
|
||||
args, retcode)
|
||||
|
||||
def _exec_async(self, args, shell):
|
||||
self.log.debug("Running '%s'", args)
|
||||
subprocess.Popen(args, shell=shell)
|
||||
util.Popen(args, shell=shell)
|
||||
|
||||
def _replace(self, match):
|
||||
name = match.group(1)
|
||||
|
@ -33,6 +33,9 @@ class MtimePP(PostProcessor):
|
||||
|
||||
def run(self, pathfmt):
|
||||
mtime = self._get(pathfmt.kwdict)
|
||||
if mtime is None:
|
||||
return
|
||||
|
||||
pathfmt.kwdict["_mtime"] = (
|
||||
util.datetime_to_timestamp(mtime)
|
||||
if isinstance(mtime, datetime) else
|
||||
|
@ -155,7 +155,9 @@ class UgoiraPP(PostProcessor):
|
||||
self.log.error("Unable to invoke FFmpeg (%s: %s)",
|
||||
exc.__class__.__name__, exc)
|
||||
pathfmt.realpath = pathfmt.temppath
|
||||
except Exception:
|
||||
except Exception as exc:
|
||||
print()
|
||||
self.log.error("%s: %s", exc.__class__.__name__, exc)
|
||||
pathfmt.realpath = pathfmt.temppath
|
||||
else:
|
||||
if self.mtime:
|
||||
@ -171,7 +173,7 @@ class UgoiraPP(PostProcessor):
|
||||
def _exec(self, args):
|
||||
self.log.debug(args)
|
||||
out = None if self.output else subprocess.DEVNULL
|
||||
retcode = subprocess.Popen(args, stdout=out, stderr=out).wait()
|
||||
retcode = util.Popen(args, stdout=out, stderr=out).wait()
|
||||
if retcode:
|
||||
print()
|
||||
self.log.error("Non-zero exit status when running %s (%s)",
|
||||
|
@ -73,7 +73,7 @@ def filename_from_url(url):
|
||||
"""Extract the last part of an URL to use as a filename"""
|
||||
try:
|
||||
return url.partition("?")[0].rpartition("/")[2]
|
||||
except (TypeError, AttributeError):
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
@ -122,7 +122,7 @@ def extract(txt, begin, end, pos=0):
|
||||
first = txt.index(begin, pos) + len(begin)
|
||||
last = txt.index(end, first)
|
||||
return txt[first:last], last+len(end)
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
except Exception:
|
||||
return None, pos
|
||||
|
||||
|
||||
@ -131,7 +131,7 @@ def extr(txt, begin, end, default=""):
|
||||
try:
|
||||
first = txt.index(begin) + len(begin)
|
||||
return txt[first:txt.index(end, first)]
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
|
||||
@ -141,7 +141,7 @@ def rextract(txt, begin, end, pos=-1):
|
||||
first = txt.rindex(begin, 0, pos)
|
||||
last = txt.index(end, first + lbeg)
|
||||
return txt[first + lbeg:last], first
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
except Exception:
|
||||
return None, pos
|
||||
|
||||
|
||||
@ -167,7 +167,7 @@ def extract_iter(txt, begin, end, pos=0):
|
||||
last = index(end, first)
|
||||
pos = last + lend
|
||||
yield txt[first:last]
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
except Exception:
|
||||
return
|
||||
|
||||
|
||||
@ -180,7 +180,7 @@ def extract_from(txt, pos=0, default=""):
|
||||
last = index(end, first)
|
||||
pos = last + len(end)
|
||||
return txt[first:last]
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
except Exception:
|
||||
return default
|
||||
return extr
|
||||
|
||||
@ -200,7 +200,7 @@ def parse_bytes(value, default=0, suffixes="bkmgtp"):
|
||||
"""Convert a bytes-amount ("500k", "2.5M", ...) to int"""
|
||||
try:
|
||||
last = value[-1].lower()
|
||||
except (TypeError, LookupError):
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
if last in suffixes:
|
||||
@ -221,7 +221,7 @@ def parse_int(value, default=0):
|
||||
return default
|
||||
try:
|
||||
return int(value)
|
||||
except (ValueError, TypeError):
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
|
||||
@ -231,7 +231,7 @@ def parse_float(value, default=0.0):
|
||||
return default
|
||||
try:
|
||||
return float(value)
|
||||
except (ValueError, TypeError):
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
|
||||
@ -242,7 +242,7 @@ def parse_query(qs):
|
||||
for key, value in urllib.parse.parse_qsl(qs):
|
||||
if key not in result:
|
||||
result[key] = value
|
||||
except AttributeError:
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
|
||||
@ -251,7 +251,7 @@ def parse_timestamp(ts, default=None):
|
||||
"""Create a datetime object from a unix timestamp"""
|
||||
try:
|
||||
return datetime.datetime.utcfromtimestamp(int(ts))
|
||||
except (TypeError, ValueError, OverflowError):
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
|
||||
|
@ -16,7 +16,6 @@ import time
|
||||
import random
|
||||
import getpass
|
||||
import hashlib
|
||||
import sqlite3
|
||||
import binascii
|
||||
import datetime
|
||||
import functools
|
||||
@ -339,7 +338,7 @@ def extract_headers(response):
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def git_head():
|
||||
try:
|
||||
out, err = subprocess.Popen(
|
||||
out, err = Popen(
|
||||
("git", "rev-parse", "--short", "HEAD"),
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
@ -579,6 +578,33 @@ GLOBALS = {
|
||||
}
|
||||
|
||||
|
||||
if EXECUTABLE and hasattr(sys, "_MEIPASS"):
|
||||
# https://github.com/pyinstaller/pyinstaller/blob/develop/doc
|
||||
# /runtime-information.rst#ld_library_path--libpath-considerations
|
||||
_popen_env = os.environ.copy()
|
||||
|
||||
orig = _popen_env.get("LD_LIBRARY_PATH_ORIG")
|
||||
if orig is None:
|
||||
_popen_env.pop("LD_LIBRARY_PATH", None)
|
||||
else:
|
||||
_popen_env["LD_LIBRARY_PATH"] = orig
|
||||
|
||||
orig = _popen_env.get("DYLD_LIBRARY_PATH_ORIG")
|
||||
if orig is None:
|
||||
_popen_env.pop("DYLD_LIBRARY_PATH", None)
|
||||
else:
|
||||
_popen_env["DYLD_LIBRARY_PATH"] = orig
|
||||
|
||||
del orig
|
||||
|
||||
class Popen(subprocess.Popen):
|
||||
def __init__(self, args, **kwargs):
|
||||
kwargs["env"] = _popen_env
|
||||
subprocess.Popen.__init__(self, args, **kwargs)
|
||||
else:
|
||||
Popen = subprocess.Popen
|
||||
|
||||
|
||||
def compile_expression(expr, name="<expr>", globals=None):
|
||||
code_object = compile(expr, name, "eval")
|
||||
return functools.partial(eval, code_object, globals or GLOBALS)
|
||||
@ -825,46 +851,3 @@ class FilterPredicate():
|
||||
raise
|
||||
except Exception as exc:
|
||||
raise exception.FilterError(exc)
|
||||
|
||||
|
||||
class DownloadArchive():
|
||||
|
||||
def __init__(self, path, format_string, pragma=None,
|
||||
cache_key="_archive_key"):
|
||||
try:
|
||||
con = sqlite3.connect(path, timeout=60, check_same_thread=False)
|
||||
except sqlite3.OperationalError:
|
||||
os.makedirs(os.path.dirname(path))
|
||||
con = sqlite3.connect(path, timeout=60, check_same_thread=False)
|
||||
con.isolation_level = None
|
||||
|
||||
from . import formatter
|
||||
self.keygen = formatter.parse(format_string).format_map
|
||||
self.close = con.close
|
||||
self.cursor = cursor = con.cursor()
|
||||
self._cache_key = cache_key
|
||||
|
||||
if pragma:
|
||||
for stmt in pragma:
|
||||
cursor.execute("PRAGMA " + stmt)
|
||||
|
||||
try:
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS archive "
|
||||
"(entry TEXT PRIMARY KEY) WITHOUT ROWID")
|
||||
except sqlite3.OperationalError:
|
||||
# fallback for missing WITHOUT ROWID support (#553)
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS archive "
|
||||
"(entry TEXT PRIMARY KEY)")
|
||||
|
||||
def check(self, kwdict):
|
||||
"""Return True if the item described by 'kwdict' exists in archive"""
|
||||
key = kwdict[self._cache_key] = self.keygen(kwdict)
|
||||
self.cursor.execute(
|
||||
"SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,))
|
||||
return self.cursor.fetchone()
|
||||
|
||||
def add(self, kwdict):
|
||||
"""Add item described by 'kwdict' to archive"""
|
||||
key = kwdict.get(self._cache_key) or self.keygen(kwdict)
|
||||
self.cursor.execute(
|
||||
"INSERT OR IGNORE INTO archive (entry) VALUES (?)", (key,))
|
||||
|
@ -6,4 +6,4 @@
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
__version__ = "1.26.9-dev"
|
||||
__version__ = "1.27.0-dev"
|
||||
|
3
pyproject.toml
Normal file
3
pyproject.toml
Normal file
@ -0,0 +1,3 @@
|
||||
[build-system]
|
||||
requires = ["setuptools"]
|
||||
build-backend = "setuptools.build_meta"
|
@ -4,16 +4,37 @@
|
||||
"""Build a standalone executable using PyInstaller"""
|
||||
|
||||
import PyInstaller.__main__
|
||||
import argparse
|
||||
import util
|
||||
import os
|
||||
import sys
|
||||
|
||||
PyInstaller.__main__.run([
|
||||
"--onefile",
|
||||
"--console",
|
||||
"--name", "gallery-dl." + ("exe" if os.name == "nt" else "bin"),
|
||||
"--additional-hooks-dir", util.path("scripts"),
|
||||
"--distpath", util.path("dist"),
|
||||
"--workpath", util.path("build"),
|
||||
"--specpath", util.path("build"),
|
||||
util.path("gallery_dl", "__main__.py"),
|
||||
])
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-o", "--os")
|
||||
parser.add_argument("-a", "--arch")
|
||||
parser.add_argument("-e", "--extension")
|
||||
args = parser.parse_args()
|
||||
|
||||
name = "gallery-dl"
|
||||
if args.os:
|
||||
name = "{}_{}".format(name, args.os.partition("-")[0].lower())
|
||||
if args.arch == "x86":
|
||||
name += "_x86"
|
||||
if args.extension:
|
||||
name = "{}.{}".format(name, args.extension.lower())
|
||||
|
||||
PyInstaller.__main__.run([
|
||||
"--onefile",
|
||||
"--console",
|
||||
"--name", name,
|
||||
"--additional-hooks-dir", util.path("scripts"),
|
||||
"--distpath", util.path("dist"),
|
||||
"--workpath", util.path("build"),
|
||||
"--specpath", util.path("build"),
|
||||
util.path("gallery_dl", "__main__.py"),
|
||||
])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
@ -44,40 +44,52 @@ update-dev() {
|
||||
|
||||
build-python() {
|
||||
cd "${ROOTDIR}"
|
||||
echo Building bdist_wheel and sdist
|
||||
echo Building sdist and wheel
|
||||
|
||||
python setup.py bdist_wheel sdist
|
||||
python -m build
|
||||
}
|
||||
|
||||
build-linux() {
|
||||
cd "${ROOTDIR}"
|
||||
echo Building Linux executable
|
||||
|
||||
VENV_PATH="/tmp/venv"
|
||||
VENV_PYTHON="${VENV_PATH}/bin/python"
|
||||
|
||||
rm -rf "${VENV_PATH}"
|
||||
python -m virtualenv "${VENV_PATH}"
|
||||
|
||||
$VENV_PYTHON -m pip install requests requests[socks] yt-dlp pyyaml secretstorage pyinstaller
|
||||
$VENV_PYTHON ./scripts/pyinstaller.py
|
||||
build-vm 'ubuntu22.04' 'gallery-dl.bin'
|
||||
}
|
||||
|
||||
build-windows() {
|
||||
cd "${ROOTDIR}/dist"
|
||||
cd "${ROOTDIR}"
|
||||
echo Building Windows executable
|
||||
|
||||
# remove old executable
|
||||
rm -f "gallery-dl.exe"
|
||||
build-vm 'windows7_x86_sp1' 'gallery-dl.exe'
|
||||
}
|
||||
|
||||
# build windows exe in vm
|
||||
ln -fs "${ROOTDIR}" /tmp/
|
||||
vmstart "windows7_x86_sp1" &
|
||||
build-vm() {
|
||||
VMNAME="$1"
|
||||
BINNAME="$2"
|
||||
TMPPATH="/tmp/gallery-dl/dist/$BINNAME"
|
||||
|
||||
# launch VM
|
||||
vmstart "$VMNAME" &
|
||||
disown
|
||||
while [ ! -e "gallery-dl.exe" ] ; do
|
||||
|
||||
# copy source files
|
||||
mkdir -p /tmp/gallery-dl
|
||||
cp -a -t /tmp/gallery-dl -- \
|
||||
./gallery_dl ./scripts ./data ./setup.py ./README.rst
|
||||
|
||||
# remove old executable
|
||||
rm -f "./dist/$BINNAME"
|
||||
|
||||
# wait for new executable
|
||||
while [ ! -e "$TMPPATH" ] ; do
|
||||
sleep 5
|
||||
done
|
||||
sleep 2
|
||||
|
||||
# move
|
||||
mv "$TMPPATH" "./dist/$BINNAME"
|
||||
|
||||
rm -r /tmp/gallery-dl
|
||||
}
|
||||
|
||||
sign() {
|
||||
@ -100,6 +112,14 @@ changelog() {
|
||||
-e "s*\([( ]\)#\([0-9]\+\)*\1[#\2](https://github.com/mikf/gallery-dl/issues/\2)*g" \
|
||||
-e "s*^## \w\+\$*## ${NEWVERSION} - $(date +%Y-%m-%d)*" \
|
||||
"${CHANGELOG}"
|
||||
|
||||
mv "${CHANGELOG}" "${CHANGELOG}.orig"
|
||||
|
||||
# - remove all but the latest entries
|
||||
sed -n \
|
||||
-e '/^## /,/^$/ { /^$/q; p }' \
|
||||
"${CHANGELOG}.orig" \
|
||||
> "${CHANGELOG}"
|
||||
}
|
||||
|
||||
supportedsites() {
|
||||
@ -117,6 +137,7 @@ upload-git() {
|
||||
cd "${ROOTDIR}"
|
||||
echo Pushing changes to github
|
||||
|
||||
mv "${CHANGELOG}.orig" "${CHANGELOG}" || true
|
||||
git add "gallery_dl/version.py" "${README}" "${CHANGELOG}"
|
||||
git commit -S -m "release version ${NEWVERSION}"
|
||||
git tag -s -m "version ${NEWVERSION}" "v${NEWVERSION}"
|
||||
|
@ -143,6 +143,7 @@ CATEGORY_MAP = {
|
||||
"webmshare" : "webmshare",
|
||||
"webtoons" : "Webtoon",
|
||||
"wikiart" : "WikiArt.org",
|
||||
"wikigg" : "wiki.gg",
|
||||
"wikimediacommons": "Wikimedia Commons",
|
||||
"xbunkr" : "xBunkr",
|
||||
"xhamster" : "xHamster",
|
||||
@ -273,6 +274,10 @@ SUBCATEGORY_MAP = {
|
||||
"sexcom": {
|
||||
"pins": "User Pins",
|
||||
},
|
||||
"skeb": {
|
||||
"following" : "Followed Creators",
|
||||
"following-users": "Followed Users",
|
||||
},
|
||||
"smugmug": {
|
||||
"path": "Images from Users and Folders",
|
||||
},
|
||||
@ -337,12 +342,12 @@ URL_MAP = {
|
||||
|
||||
_OAUTH = '<a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a>'
|
||||
_COOKIES = '<a href="https://github.com/mikf/gallery-dl#cookies">Cookies</a>'
|
||||
_APIKEY_DB = \
|
||||
'<a href="configuration.rst#extractorderpibooruapi-key">API Key</a>'
|
||||
_APIKEY_WH = \
|
||||
'<a href="configuration.rst#extractorwallhavenapi-key">API Key</a>'
|
||||
_APIKEY_WY = \
|
||||
'<a href="configuration.rst#extractorweasylapi-key">API Key</a>'
|
||||
_APIKEY_DB = ('<a href="https://gdl-org.github.io/docs/configuration.html'
|
||||
'#extractor-derpibooru-api-key">API Key</a>')
|
||||
_APIKEY_WH = ('<a href="https://gdl-org.github.io/docs/configuration.html'
|
||||
'#extractor-wallhaven-api-key">API Key</a>')
|
||||
_APIKEY_WY = ('<a href="https://gdl-org.github.io/docs/configuration.html'
|
||||
'#extractor-weasyl-api-key">API Key</a>')
|
||||
|
||||
AUTH_MAP = {
|
||||
"aibooru" : "Supported",
|
||||
@ -350,11 +355,13 @@ AUTH_MAP = {
|
||||
"atfbooru" : "Supported",
|
||||
"baraag" : _OAUTH,
|
||||
"bluesky" : "Supported",
|
||||
"booruvar" : "Supported",
|
||||
"coomerparty" : "Supported",
|
||||
"danbooru" : "Supported",
|
||||
"derpibooru" : _APIKEY_DB,
|
||||
"deviantart" : _OAUTH,
|
||||
"e621" : "Supported",
|
||||
"e6ai" : "Supported",
|
||||
"e926" : "Supported",
|
||||
"e-hentai" : "Supported",
|
||||
"exhentai" : "Supported",
|
||||
@ -362,6 +369,7 @@ AUTH_MAP = {
|
||||
"fantia" : _COOKIES,
|
||||
"flickr" : _OAUTH,
|
||||
"furaffinity" : _COOKIES,
|
||||
"furbooru" : "API Key",
|
||||
"horne" : "Required",
|
||||
"idolcomplex" : "Supported",
|
||||
"imgbb" : "Supported",
|
||||
@ -382,7 +390,6 @@ AUTH_MAP = {
|
||||
"reddit" : _OAUTH,
|
||||
"sankaku" : "Supported",
|
||||
"seiga" : _COOKIES,
|
||||
"seisoparty" : "Supported",
|
||||
"smugmug" : _OAUTH,
|
||||
"subscribestar" : "Supported",
|
||||
"tapas" : "Supported",
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# This is the maintainence launcher for the snap, make necessary runtime environment changes to make the snap work here. You may also insert security confinement/deprecation/obsoletion notice of the snap here.
|
||||
# This is the maintenance launcher for the snap, make necessary runtime environment changes to make the snap work here. You may also insert security confinement/deprecation/obsoletion notice of the snap here.
|
||||
|
||||
set \
|
||||
-o errexit \
|
||||
|
@ -37,7 +37,7 @@ plugs:
|
||||
# Network access
|
||||
network:
|
||||
|
||||
# For network service for recieving OAuth callback tokens
|
||||
# For network service for receiving OAuth callback tokens
|
||||
network-bind:
|
||||
|
||||
# Configuration access
|
||||
|
@ -73,7 +73,7 @@ __tests__ = (
|
||||
"#category": ("", "8chan", "board"),
|
||||
"#class" : _8chan._8chanBoardExtractor,
|
||||
"#pattern" : _8chan._8chanThreadExtractor.pattern,
|
||||
"#count" : 27,
|
||||
"#count" : range(24, 28),
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -14,4 +14,12 @@ __tests__ = (
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://azurlane.koumakan.jp/wiki/Louisville/Gallery",
|
||||
"#comment" : "entries with missing 'imageinfo' (#5384)",
|
||||
"#category": ("wikimedia", "azurlanewiki", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
"#count" : "> 10",
|
||||
},
|
||||
|
||||
)
|
||||
|
@ -12,7 +12,7 @@ __tests__ = (
|
||||
"#url" : "https://julianbphotography.blogspot.com/2010/12/moon-rise.html",
|
||||
"#category": ("blogger", "blogspot", "post"),
|
||||
"#class" : blogger.BloggerPostExtractor,
|
||||
"#urls" : "https://3.bp.blogspot.com/-zlJddJtJOUo/Tt4WooTPNtI/AAAAAAAABG8/dGT2cGp2E7Y/s0/Icy-Moonrise---For-Web.jpg",
|
||||
"#urls" : "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjH9WkPvLJq2moxKtyt3ieJZWSDFQwOi3PHRdlHVHEQHRwy-d86Jg6HWSMhxaa6EgvlXq-zDMmKM4kIPn27eJ9Hepk2X9e9HQhqwMfrT8RYTnFe65uexw7KSk5FdWHxRVp5crz3p_qph3Bj/s0/Icy-Moonrise---For-Web.jpg",
|
||||
|
||||
"blog": {
|
||||
"date" : "dt:2010-11-21 18:19:42",
|
||||
@ -43,7 +43,7 @@ __tests__ = (
|
||||
"extension": "jpg",
|
||||
"filename" : "Icy-Moonrise---For-Web",
|
||||
"num" : 1,
|
||||
"url" : "https://3.bp.blogspot.com/-zlJddJtJOUo/Tt4WooTPNtI/AAAAAAAABG8/dGT2cGp2E7Y/s0/Icy-Moonrise---For-Web.jpg",
|
||||
"url" : "https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjH9WkPvLJq2moxKtyt3ieJZWSDFQwOi3PHRdlHVHEQHRwy-d86Jg6HWSMhxaa6EgvlXq-zDMmKM4kIPn27eJ9Hepk2X9e9HQhqwMfrT8RYTnFe65uexw7KSk5FdWHxRVp5crz3p_qph3Bj/s0/Icy-Moonrise---For-Web.jpg",
|
||||
},
|
||||
|
||||
{
|
||||
@ -59,7 +59,7 @@ __tests__ = (
|
||||
"#comment" : "new image domain (#2204)",
|
||||
"#category": ("blogger", "blogspot", "post"),
|
||||
"#class" : blogger.BloggerPostExtractor,
|
||||
"#pattern" : "https://blogger.googleusercontent.com/img/a/.+=s0$",
|
||||
"#pattern" : r"https://blogger\.googleusercontent\.com/img/.+=s0$",
|
||||
"#count" : 8,
|
||||
},
|
||||
|
||||
@ -67,7 +67,7 @@ __tests__ = (
|
||||
"#url" : "https://julianbphotography.blogspot.com/",
|
||||
"#category": ("blogger", "blogspot", "blog"),
|
||||
"#class" : blogger.BloggerBlogExtractor,
|
||||
"#pattern" : r"https://\d\.bp\.blogspot\.com/.*/s0/[^.]+\.jpg",
|
||||
"#pattern" : r"https://blogger\.googleusercontent\.com/img/.+/s0/",
|
||||
"#range" : "1-25",
|
||||
"#count" : 25,
|
||||
},
|
||||
|
@ -133,6 +133,7 @@ __tests__ = (
|
||||
"filename" : "bafkreidypzoaybmfj5h7pnpiyct6ng5yae6ydp4czrm72ocg7ev6vbirri",
|
||||
"height" : 630,
|
||||
"indexedAt" : "2023-12-22T18:58:32.715Z",
|
||||
"instance" : "bsky.app",
|
||||
"labels" : [],
|
||||
"likeCount" : int,
|
||||
"num" : 1,
|
||||
@ -153,7 +154,7 @@ __tests__ = (
|
||||
"followersCount": int,
|
||||
"followsCount" : int,
|
||||
"handle" : "bsky.app",
|
||||
"indexedAt" : "2023-12-22T18:54:12.339Z",
|
||||
"indexedAt" : "2024-01-20T05:04:41.904Z",
|
||||
"labels" : [],
|
||||
"postsCount" : int,
|
||||
},
|
||||
|
@ -13,13 +13,12 @@ __tests__ = (
|
||||
"#category": ("lolisafe", "bunkr", "album"),
|
||||
"#class" : bunkr.BunkrAlbumExtractor,
|
||||
"#urls" : "https://i-burger.bunkr.ru/test-テスト-\"&>-QjgneIQv.png",
|
||||
"#sha1_content": "f38b54b17cd7462e687b58d83f00fca88b1b105a",
|
||||
"#sha1_content": "961b25d85b5f5bd18cbe3e847ac55925f14d0286",
|
||||
|
||||
"album_id" : "Lktg9Keq",
|
||||
"album_name" : "test テスト \"&>",
|
||||
"album_size" : "182 B",
|
||||
"count" : 1,
|
||||
"description": "",
|
||||
"extension" : "png",
|
||||
"file" : "https://i-burger.bunkr.ru/test-テスト-\"&>-QjgneIQv.png",
|
||||
"filename" : "test-テスト-\"&>-QjgneIQv",
|
||||
@ -43,7 +42,6 @@ __tests__ = (
|
||||
"album_name" : "test2",
|
||||
"album_size" : "561.6 KB",
|
||||
"count" : 2,
|
||||
"description": "",
|
||||
"filename" : r"re:video-gLn1hgpw|image-sZrQUeOx",
|
||||
"id" : r"re:gLn1hgpw|sZrQUeOx",
|
||||
"name" : r"re:video|image",
|
||||
|
@ -15,12 +15,32 @@ __tests__ = (
|
||||
"#sha1_url": "e7d624aded15a069194e38dc731ec23217a422fb",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://desuarchive.org/a",
|
||||
"#category": ("foolfuuka", "desuarchive", "board"),
|
||||
"#class" : foolfuuka.FoolfuukaBoardExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://desuarchive.org/a/",
|
||||
"#category": ("foolfuuka", "desuarchive", "board"),
|
||||
"#class" : foolfuuka.FoolfuukaBoardExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://desuarchive.org/a/2",
|
||||
"#category": ("foolfuuka", "desuarchive", "board"),
|
||||
"#class" : foolfuuka.FoolfuukaBoardExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://desuarchive.org/a/page/2",
|
||||
"#category": ("foolfuuka", "desuarchive", "board"),
|
||||
"#class" : foolfuuka.FoolfuukaBoardExtractor,
|
||||
"#pattern" : foolfuuka.FoolfuukaThreadExtractor.pattern,
|
||||
"#count" : 10,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://desuarchive.org/_/search/text/test/",
|
||||
"#category": ("foolfuuka", "desuarchive", "search"),
|
||||
|
@ -252,6 +252,14 @@ __tests__ = (
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://deviantart.com/h3813067/avatar",
|
||||
"#comment" : "default avatar (#5276)",
|
||||
"#category": ("", "deviantart", "avatar"),
|
||||
"#class" : deviantart.DeviantartAvatarExtractor,
|
||||
"#count" : 0,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://deviantart.com/gdldev/banner",
|
||||
"#category": ("", "deviantart", "background"),
|
||||
@ -300,7 +308,7 @@ __tests__ = (
|
||||
"target" : dict,
|
||||
"thumbs" : list,
|
||||
"title" : "Banner",
|
||||
"url" : "https://sta.sh/0198jippkeys",
|
||||
"url" : "https://www.deviantart.com/stash/0198jippkeys",
|
||||
"username" : "gdldev",
|
||||
},
|
||||
|
||||
@ -352,13 +360,38 @@ __tests__ = (
|
||||
"#class" : deviantart.DeviantartFolderExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.deviantart.com/stash/022c83odnaxc",
|
||||
"#category": ("", "deviantart", "stash"),
|
||||
"#class" : deviantart.DeviantartStashExtractor,
|
||||
"#pattern" : r"https://wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/940f2d05-c5eb-4917-8192-7eb6a2d508c6/dcvdmbc-e506cdcf-3208-4c20-85ab-0bfa8a7bcb16.png\?token=ey.+",
|
||||
"#count" : 1,
|
||||
"#sha1_content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f",
|
||||
|
||||
"content": {
|
||||
"filename": "01_by_justatest235723_dcvdmbc.png",
|
||||
"filesize": 380,
|
||||
"width" : 128,
|
||||
"height" : 128,
|
||||
"src" : r"re:https://wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/940f2d05-c5eb-4917-8192-7eb6a2d508c6/dcvdmbc-e506cdcf-3208-4c20-85ab-0bfa8a7bcb16.png\?token=ey.+",
|
||||
},
|
||||
"da_category" : "Uncategorized",
|
||||
"date" : "dt:2018-12-26 14:49:27",
|
||||
"deviationid" : "A4A6AD52-8857-46EE-ABFE-86D49D4FF9D0",
|
||||
"download_filesize": 380,
|
||||
"extension" : "png",
|
||||
"filename" : "01_by_justatest235723-dcvdmbc",
|
||||
"index" : 778297656,
|
||||
"index_base36" : "cvdmbc",
|
||||
"published_time": 1545835767,
|
||||
"title" : "01",
|
||||
"url" : "https://www.deviantart.com/stash/022c83odnaxc",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://sta.sh/022c83odnaxc",
|
||||
"#category": ("", "deviantart", "stash"),
|
||||
"#class" : deviantart.DeviantartStashExtractor,
|
||||
"#pattern" : r"https://wixmp-[^.]+\.wixmp\.com/f/.+/.+\.png\?token=.+",
|
||||
"#count" : 1,
|
||||
"#sha1_content": "057eb2f2861f6c8a96876b13cca1a4b7a408c11f",
|
||||
},
|
||||
|
||||
{
|
||||
@ -556,7 +589,7 @@ __tests__ = (
|
||||
|
||||
"index" : int,
|
||||
"index_base36": r"re:^[0-9a-z]+$",
|
||||
"url" : r"re:^https://sta.sh",
|
||||
"url" : r"re:^https://www.deviantart.com/stash/\w+",
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -83,6 +83,15 @@ __tests__ = (
|
||||
"width" : 728,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://hearthstone.fandom.com/wiki/Flame_Juggler",
|
||||
"#comment" : "empty 'metadata'",
|
||||
"#category": ("wikimedia", "fandom-hearthstone", "article"),
|
||||
"#class" : wikimedia.WikimediaArticleExtractor,
|
||||
|
||||
"metadata" : {},
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://projectsekai.fandom.com/wiki/Project_SEKAI_Wiki",
|
||||
"#category": ("wikimedia", "fandom-projectsekai", "article"),
|
||||
|
@ -121,6 +121,24 @@ __tests__ = (
|
||||
"#class" : furaffinity.FuraffinityPostExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://fxfuraffinity.net/view/21835115/",
|
||||
"#category": ("", "furaffinity", "post"),
|
||||
"#class" : furaffinity.FuraffinityPostExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://xfuraffinity.net/view/21835115/",
|
||||
"#category": ("", "furaffinity", "post"),
|
||||
"#class" : furaffinity.FuraffinityPostExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://fxraffinity.net/view/21835115/",
|
||||
"#category": ("", "furaffinity", "post"),
|
||||
"#class" : furaffinity.FuraffinityPostExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://sfw.furaffinity.net/view/21835115/",
|
||||
"#category": ("", "furaffinity", "post"),
|
||||
|
@ -39,6 +39,22 @@ __tests__ = (
|
||||
"#sha1_url": "845a61aa1f90fb4ced841e8b7e62098be2e967bf",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://gelbooru.com/index.php?page=post&s=list&tags=id:>=67800+id:<=68000",
|
||||
"#comment" : "meta tags (#5478)",
|
||||
"#category": ("booru", "gelbooru", "tag"),
|
||||
"#class" : gelbooru.GelbooruTagExtractor,
|
||||
"#count" : 187,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://gelbooru.com/index.php?page=post&s=list&tags=id:>=67800+id:<=68000+sort:id:asc",
|
||||
"#comment" : "meta + sort tags (#5478)",
|
||||
"#category": ("booru", "gelbooru", "tag"),
|
||||
"#class" : gelbooru.GelbooruTagExtractor,
|
||||
"#count" : 187,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://gelbooru.com/index.php?page=pool&s=show&id=761",
|
||||
"#category": ("booru", "gelbooru", "pool"),
|
||||
@ -47,10 +63,30 @@ __tests__ = (
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://gelbooru.com/index.php?page=favorites&s=view&id=279415",
|
||||
"#url" : "https://gelbooru.com/index.php?page=favorites&s=view&id=1435674",
|
||||
"#category": ("booru", "gelbooru", "favorite"),
|
||||
"#class" : gelbooru.GelbooruFavoriteExtractor,
|
||||
"#count" : 3,
|
||||
"#urls" : (
|
||||
"https://img3.gelbooru.com/images/5d/30/5d30fc056ed8668616b3c440df9bac89.jpg",
|
||||
"https://img3.gelbooru.com/images/4c/2d/4c2da867ed643acdadd8105177dcdaf0.png",
|
||||
"https://img3.gelbooru.com/images/c8/26/c826f3cb90d9aaca8d0632a96bf4abe8.jpg",
|
||||
"https://img3.gelbooru.com/images/c1/fe/c1fe59c0bc8ce955dd353544b1015d0c.jpg",
|
||||
"https://img3.gelbooru.com/images/e6/6d/e66d8883c184f5d3b2591dfcdf0d007c.jpg",
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://gelbooru.com/index.php?page=favorites&s=view&id=1435674",
|
||||
"#category": ("booru", "gelbooru", "favorite"),
|
||||
"#class" : gelbooru.GelbooruFavoriteExtractor,
|
||||
"#options" : {"order-posts": "reverse"},
|
||||
"#urls" : (
|
||||
"https://img3.gelbooru.com/images/e6/6d/e66d8883c184f5d3b2591dfcdf0d007c.jpg",
|
||||
"https://img3.gelbooru.com/images/c1/fe/c1fe59c0bc8ce955dd353544b1015d0c.jpg",
|
||||
"https://img3.gelbooru.com/images/c8/26/c826f3cb90d9aaca8d0632a96bf4abe8.jpg",
|
||||
"https://img3.gelbooru.com/images/4c/2d/4c2da867ed643acdadd8105177dcdaf0.png",
|
||||
"https://img3.gelbooru.com/images/5d/30/5d30fc056ed8668616b3c440df9bac89.jpg",
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -29,10 +29,11 @@ __tests__ = (
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.hentai-foundry.com/pictures/user/Evulchibi/scraps",
|
||||
"#url" : "https://www.hentai-foundry.com/pictures/user/Ethevian/scraps",
|
||||
"#category": ("", "hentaifoundry", "scraps"),
|
||||
"#class" : hentaifoundry.HentaifoundryScrapsExtractor,
|
||||
"#sha1_url": "7cd9c6ec6258c4ab8c44991f7731be82337492a7",
|
||||
"#pattern" : r"https://pictures\.hentai-foundry\.com/e/Ethevian/.+",
|
||||
"#count" : ">= 10",
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -9,7 +9,7 @@ from gallery_dl.extractor import hiperdex
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://hiperdex.com/manga/domestic-na-kanojo/154-5/",
|
||||
"#url" : "https://hiperdex.com/mangas/domestic-na-kanojo/154-5/",
|
||||
"#category": ("", "hiperdex", "chapter"),
|
||||
"#class" : hiperdex.HiperdexChapterExtractor,
|
||||
"#pattern" : r"https://(1st)?hiperdex\d?.(com|net|info)/wp-content/uploads/WP-manga/data/manga_\w+/[0-9a-f]{32}/\d+\.webp",
|
||||
@ -27,6 +27,12 @@ __tests__ = (
|
||||
"type" : "Manga",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://hiperdex.com/manga/domestic-na-kanojo/154-5/",
|
||||
"#category": ("", "hiperdex", "chapter"),
|
||||
"#class" : hiperdex.HiperdexChapterExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://1sthiperdex.com/manga/domestic-na-kanojo/154-5/",
|
||||
"#category": ("", "hiperdex", "chapter"),
|
||||
|
@ -5,6 +5,7 @@
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import hitomi
|
||||
from gallery_dl import exception
|
||||
|
||||
|
||||
__tests__ = (
|
||||
@ -47,9 +48,7 @@ __tests__ = (
|
||||
"#comment" : "gallery with 'broken' redirect",
|
||||
"#category": ("", "hitomi", "gallery"),
|
||||
"#class" : hitomi.HitomiGalleryExtractor,
|
||||
"#options" : {"format": "original"},
|
||||
"#pattern" : r"https://[a-c]b\.hitomi\.la/images/\d+/\d+/[0-9a-f]{64}\.jpg",
|
||||
"#count" : 10,
|
||||
"#exception": exception.NotFoundError,
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -42,7 +42,7 @@ __tests__ = (
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://idol.sankakucomplex.com/pools/show/145",
|
||||
"#url" : "https://idol.sankakucomplex.com/en/pools/e9PMwnwRBK3",
|
||||
"#category": ("booru", "idolcomplex", "pool"),
|
||||
"#class" : idolcomplex.IdolcomplexPoolExtractor,
|
||||
"#count" : 3,
|
||||
@ -72,16 +72,16 @@ __tests__ = (
|
||||
"file_url" : r"re:https://i[sv]\.sankakucomplex\.com/data/50/9e/509eccbba54a43cea6b275a65b93c51d\.jpg\?",
|
||||
"filename" : "509eccbba54a43cea6b275a65b93c51d",
|
||||
"height" : 683,
|
||||
"id" : 694215,
|
||||
"id_alnum" : "vkr36qdOaZ4",
|
||||
"id" : "vkr36qdOaZ4", # legacy ID: 694215
|
||||
"md5" : "509eccbba54a43cea6b275a65b93c51d",
|
||||
"rating" : "g",
|
||||
"tags" : "lyumos the_witcher shani_(the_witcher) 1girl green_eyes non-asian redhead waistcoat wreath cosplay 3:2_aspect_ratio",
|
||||
"tags_character": "shani_(the_witcher)",
|
||||
"tags_copyright": "the_witcher",
|
||||
"tags_general" : "1girl green_eyes non-asian redhead waistcoat wreath",
|
||||
"tags_genre" : "cosplay",
|
||||
"tags_idol" : "lyumos",
|
||||
"tags_medium" : "cosplay 3:2_aspect_ratio",
|
||||
"tags_medium" : "3:2_aspect_ratio",
|
||||
"vote_average" : range(4, 5),
|
||||
"vote_count" : range(25, 40),
|
||||
"width" : 1024,
|
||||
@ -111,8 +111,7 @@ __tests__ = (
|
||||
"#class" : idolcomplex.IdolcomplexPostExtractor,
|
||||
"#sha1_content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
|
||||
|
||||
"id" : 694215,
|
||||
"id_alnum" : "vkr36qdOaZ4",
|
||||
"id" : "vkr36qdOaZ4", # legacy ID: 694215
|
||||
"tags_character": "shani_(the_witcher)",
|
||||
"tags_copyright": "the_witcher",
|
||||
"tags_idol" : str,
|
||||
|
@ -120,11 +120,25 @@ __tests__ = (
|
||||
"#sha1_url": "37822523e6e4a56feb9dea35653760c86b44ff89",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.imagefap.com/organizer/613950/Grace-Stout",
|
||||
"#category": ("", "imagefap", "folder"),
|
||||
"#class" : imagefap.ImagefapFolderExtractor,
|
||||
"#pattern" : imagefap.ImagefapGalleryExtractor.pattern,
|
||||
"#count" : 31,
|
||||
|
||||
"title": r"re:Grace Stout .+",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.imagefap.com/usergallery.php?userid=1981976&folderid=409758",
|
||||
"#category": ("", "imagefap", "folder"),
|
||||
"#class" : imagefap.ImagefapFolderExtractor,
|
||||
"#sha1_url": "37822523e6e4a56feb9dea35653760c86b44ff89",
|
||||
"#urls" : "https://www.imagefap.com/gallery/7876223",
|
||||
|
||||
"folder" : "Softcore",
|
||||
"gallery_id": "7876223",
|
||||
"title" : "Kelsi Monroe in lingerie",
|
||||
},
|
||||
|
||||
{
|
||||
@ -140,6 +154,8 @@ __tests__ = (
|
||||
"#class" : imagefap.ImagefapFolderExtractor,
|
||||
"#pattern" : imagefap.ImagefapGalleryExtractor.pattern,
|
||||
"#range" : "1-40",
|
||||
|
||||
"folder": "Uncategorized",
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -89,11 +89,10 @@ __tests__ = (
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://kemono.party/gumroad/user/trylsc/post/IURjT",
|
||||
"#comment" : "kemono.party -> data.kemono.party",
|
||||
"#url" : "https://kemono.su/gumroad/user/3101696181060/post/tOWyf",
|
||||
"#category": ("", "kemonoparty", "gumroad"),
|
||||
"#class" : kemonoparty.KemonopartyPostExtractor,
|
||||
"#pattern" : r"https://kemono\.party/data/(a4/7b/a47bfe938d8c1682eef06e885927484cd8df1b.+\.jpg|c6/04/c6048f5067fd9dbfa7a8be565ac194efdfb6e4.+\.zip)",
|
||||
"#urls" : "https://kemono.su/data/6f/13/6f1394b19516396ea520254350662c254bbea30c1e111fd4b0f042c61c426d07.zip",
|
||||
},
|
||||
|
||||
{
|
||||
@ -136,6 +135,19 @@ __tests__ = (
|
||||
}],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://kemono.su/patreon/user/3161935/post/68231671",
|
||||
"#comment" : "announcements",
|
||||
"#category": ("", "kemonoparty", "patreon"),
|
||||
"#class" : kemonoparty.KemonopartyPostExtractor,
|
||||
"#options" : {"announcements": True},
|
||||
|
||||
"announcements": [{
|
||||
"body": "<div><strong>Thank you so much for the support!</strong><strong><br></strong>This Patreon is more of a tip jar for supporting what I make. I have to clarify that there are <strong>no exclusive Patreon animations</strong> because all are released for the public. You will get earlier access to WIPs. Direct downloads to my works are also available for $5 and $10 Tiers.</div>",
|
||||
"date": "2023-02",
|
||||
}],
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://kemono.su/patreon/user/19623797/post/29035449",
|
||||
"#comment" : "invalid file (#3510)",
|
||||
@ -195,6 +207,7 @@ __tests__ = (
|
||||
"hash" : "88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86",
|
||||
"revision_id" : 142470,
|
||||
"revision_index": 2,
|
||||
"revision_count": 9,
|
||||
"revision_hash" : "e0e93281495e151b11636c156e52bfe9234c2a40",
|
||||
},
|
||||
|
||||
@ -210,6 +223,7 @@ __tests__ = (
|
||||
"hash" : "88521f71822dfa2f42df3beba319ea4fceda2a2d6dc59da0276a75238f743f86",
|
||||
"revision_id" : 0,
|
||||
"revision_index": 1,
|
||||
"revision_count": 1,
|
||||
"revision_hash" : "e0e93281495e151b11636c156e52bfe9234c2a40",
|
||||
},
|
||||
|
||||
@ -224,6 +238,7 @@ __tests__ = (
|
||||
|
||||
"revision_id": range(134996, 3052965),
|
||||
"revision_index": range(1, 9),
|
||||
"revision_count": 9,
|
||||
"revision_hash": "e0e93281495e151b11636c156e52bfe9234c2a40",
|
||||
},
|
||||
|
||||
@ -246,6 +261,16 @@ __tests__ = (
|
||||
"published": "2022-07-29T21:12:11.483000",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://kemono.su/gumroad/user/3267960360326/post/jwwag",
|
||||
"#comment" : "empty 'file' with no 'path' (#5368)",
|
||||
"#category": ("", "kemonoparty", "gumroad"),
|
||||
"#class" : kemonoparty.KemonopartyPostExtractor,
|
||||
"#count" : 8,
|
||||
|
||||
"type" : "attachment",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://kemono.su/discord/server/488668827274444803#608504710906904576",
|
||||
"#category": ("", "kemonoparty", "discord"),
|
||||
@ -340,8 +365,24 @@ __tests__ = (
|
||||
"#class" : kemonoparty.KemonopartyFavoriteExtractor,
|
||||
"#pattern" : kemonoparty.KemonopartyUserExtractor.pattern,
|
||||
"#auth" : True,
|
||||
"#count" : 3,
|
||||
"#sha1_url": "902c656c8002a3257ef9e255cb69bca1937373d4",
|
||||
"#urls" : (
|
||||
"https://kemono.su/patreon/user/881792",
|
||||
"https://kemono.su/fanbox/user/6993449",
|
||||
"https://kemono.su/subscribestar/user/alcorart",
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://kemono.su/favorites?type=artist&sort=faved_seq&order=asc",
|
||||
"#category": ("", "kemonoparty", "favorite"),
|
||||
"#class" : kemonoparty.KemonopartyFavoriteExtractor,
|
||||
"#pattern" : kemonoparty.KemonopartyUserExtractor.pattern,
|
||||
"#auth" : True,
|
||||
"#urls" : (
|
||||
"https://kemono.su/fanbox/user/6993449",
|
||||
"https://kemono.su/patreon/user/881792",
|
||||
"https://kemono.su/subscribestar/user/alcorart",
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
@ -350,8 +391,24 @@ __tests__ = (
|
||||
"#class" : kemonoparty.KemonopartyFavoriteExtractor,
|
||||
"#pattern" : kemonoparty.KemonopartyPostExtractor.pattern,
|
||||
"#auth" : True,
|
||||
"#count" : 3,
|
||||
"#sha1_url": "4be8e84cb384a907a8e7997baaf6287b451783b5",
|
||||
"#urls" : (
|
||||
"https://kemono.su/subscribestar/user/alcorart/post/184329",
|
||||
"https://kemono.su/fanbox/user/6993449/post/23913",
|
||||
"https://kemono.su/patreon/user/881792/post/4769638",
|
||||
),
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://kemono.su/favorites?type=post&sort=published&order=asc",
|
||||
"#category": ("", "kemonoparty", "favorite"),
|
||||
"#class" : kemonoparty.KemonopartyFavoriteExtractor,
|
||||
"#pattern" : kemonoparty.KemonopartyPostExtractor.pattern,
|
||||
"#auth" : True,
|
||||
"#urls" : (
|
||||
"https://kemono.su/patreon/user/881792/post/4769638",
|
||||
"https://kemono.su/fanbox/user/6993449/post/23913",
|
||||
"https://kemono.su/subscribestar/user/alcorart/post/184329",
|
||||
),
|
||||
},
|
||||
|
||||
)
|
||||
|
@ -32,7 +32,7 @@ __tests__ = (
|
||||
"#url" : "https://lensdump.com/i/tyoAyM",
|
||||
"#category": ("", "lensdump", "image"),
|
||||
"#class" : lensdump.LensdumpImageExtractor,
|
||||
"#pattern" : r"https://c\.l3n\.co/i/tyoAyM\.webp",
|
||||
"#urls" : "https://c.l3n.co/i/tyoAyM.webp",
|
||||
"#sha1_content": "1aa749ed2c0cf679ec8e1df60068edaf3875de46",
|
||||
|
||||
"date" : "dt:2022-08-01 08:24:28",
|
||||
@ -45,4 +45,32 @@ __tests__ = (
|
||||
"width" : 620,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://c.l3n.co/i/tyoAyM.webp",
|
||||
"#category": ("", "lensdump", "image"),
|
||||
"#class" : lensdump.LensdumpImageExtractor,
|
||||
"#urls" : "https://c.l3n.co/i/tyoAyM.webp",
|
||||
|
||||
"date" : "dt:2022-08-01 08:24:28",
|
||||
"extension": "webp",
|
||||
"filename" : "tyoAyM",
|
||||
"height" : 400,
|
||||
"id" : "tyoAyM",
|
||||
"title" : "MYOBI clovis bookcaseset",
|
||||
"url" : "https://c.l3n.co/i/tyoAyM.webp",
|
||||
"width" : 620,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://i.lensdump.com/i/tyoAyM",
|
||||
"#category": ("", "lensdump", "image"),
|
||||
"#class" : lensdump.LensdumpImageExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://i3.lensdump.com/i/tyoAyM",
|
||||
"#category": ("", "lensdump", "image"),
|
||||
"#class" : lensdump.LensdumpImageExtractor,
|
||||
},
|
||||
|
||||
)
|
||||
|
@ -18,4 +18,15 @@ __tests__ = (
|
||||
"instance_remote": None,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "mastodon:https://wanderingwires.net/@quarc/9qppkxzyd1ee3i9p",
|
||||
"#comment" : "null moved account",
|
||||
"#category": ("mastodon", "wanderingwires.net", "status"),
|
||||
"#class" : mastodon.MastodonStatusExtractor,
|
||||
"#urls" : "https://s3.wanderingwires.net/null/4377e826-72ab-4659-885c-fa12945eb207.png",
|
||||
|
||||
"instance": "wanderingwires.net",
|
||||
"instance_remote": None,
|
||||
},
|
||||
|
||||
)
|
||||
|
@ -74,6 +74,33 @@ __tests__ = (
|
||||
"#url" : "https://mastodon.social/bookmarks",
|
||||
"#category": ("mastodon", "mastodon.social", "bookmark"),
|
||||
"#class" : mastodon.MastodonBookmarkExtractor,
|
||||
"#auth" : True,
|
||||
"#urls" : "https://files.mastodon.social/media_attachments/files/111/331/603/082/304/823/original/e12cde371c88c1b0.png",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://mastodon.social/favourites",
|
||||
"#category": ("mastodon", "mastodon.social", "favorite"),
|
||||
"#class" : mastodon.MastodonFavoriteExtractor,
|
||||
"#auth" : True,
|
||||
"#urls" : "https://files.mastodon.social/media_attachments/files/111/331/603/082/304/823/original/e12cde371c88c1b0.png",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://mastodon.social/lists/92653",
|
||||
"#category": ("mastodon", "mastodon.social", "list"),
|
||||
"#class" : mastodon.MastodonListExtractor,
|
||||
"#auth" : True,
|
||||
"#pattern" : r"https://files\.mastodon\.social/media_attachments/files/(\d+/){3,}original/\w+",
|
||||
"#range" : "1-10",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://mastodon.social/tags/mastodon",
|
||||
"#category": ("mastodon", "mastodon.social", "hashtag"),
|
||||
"#class" : mastodon.MastodonHashtagExtractor,
|
||||
"#pattern" : r"https://files\.mastodon\.social/media_attachments/files/(\d+/){3,}original/\w+",
|
||||
"#range" : "1-10",
|
||||
},
|
||||
|
||||
{
|
||||
@ -82,9 +109,9 @@ __tests__ = (
|
||||
"#class" : mastodon.MastodonFollowingExtractor,
|
||||
"#extractor": False,
|
||||
"#urls" : (
|
||||
"https://mastodon.ie/@RustyBertrand",
|
||||
"https://ravenation.club/@soundwarrior20",
|
||||
"https://mastodon.social/@0x4f",
|
||||
"https://mastodon.social/@RustyBertrand",
|
||||
"https://mastodon.social/@christianselig",
|
||||
"https://saturation.social/@clive",
|
||||
"https://mastodon.social/@sjvn",
|
||||
@ -137,4 +164,36 @@ __tests__ = (
|
||||
"num" : int,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://mastodon.social/@technewsbot@assortedflotsam.com/112360601113258881",
|
||||
"#comment" : "card image",
|
||||
"#category": ("mastodon", "mastodon.social", "status"),
|
||||
"#class" : mastodon.MastodonStatusExtractor,
|
||||
"#options" : {"cards": True},
|
||||
"#urls" : "https://files.mastodon.social/cache/preview_cards/images/095/900/335/original/83f0b4a793c84123.jpg",
|
||||
|
||||
"media": {
|
||||
"author_name" : "Tom Warren",
|
||||
"author_url" : "https://www.theverge.com/authors/tom-warren",
|
||||
"blurhash" : "UHBDWMCjVGM0k,XjnPM#0h+vkpb^RkjYSh$*",
|
||||
"description" : "Microsoft’s big Xbox games showcase will take place on June 9th. It will include more games than last year and a special Call of Duty Direct will follow.",
|
||||
"embed_url" : "",
|
||||
"height" : 628,
|
||||
"html" : "",
|
||||
"id" : "card95900335",
|
||||
"image" : "https://files.mastodon.social/cache/preview_cards/images/095/900/335/original/83f0b4a793c84123.jpg",
|
||||
"image_description": "The Xbox showcase illustration",
|
||||
"language" : "en",
|
||||
"provider_name": "The Verge",
|
||||
"provider_url": "",
|
||||
"published_at": "2024-04-30T14:15:30.341Z",
|
||||
"title" : "The Xbox games showcase airs June 9th, followed by a Call of Duty Direct",
|
||||
"type" : "link",
|
||||
"url" : "https://files.mastodon.social/cache/preview_cards/images/095/900/335/original/83f0b4a793c84123.jpg",
|
||||
"weburl" : "https://www.theverge.com/2024/4/30/24145262/xbox-games-showcase-summer-2024-call-of-duty-direct",
|
||||
"width" : 1200,
|
||||
},
|
||||
|
||||
},
|
||||
|
||||
)
|
||||
|
@ -21,7 +21,7 @@ __tests__ = (
|
||||
"#url" : "https://misskey.design/@blooddj@pawoo.net",
|
||||
"#category": ("misskey", "misskey.design", "user"),
|
||||
"#class" : misskey.MisskeyUserExtractor,
|
||||
"#count" : 7,
|
||||
"#count" : "> 30",
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -12,7 +12,7 @@ __tests__ = (
|
||||
"#url" : "https://myhentaigallery.com/g/16247",
|
||||
"#category": ("", "myhentaigallery", "gallery"),
|
||||
"#class" : myhentaigallery.MyhentaigalleryGalleryExtractor,
|
||||
"#pattern" : r"https://images\.myhentaicomics\.com/m\w\w/images/[^/]+/original/\d+\.jpg",
|
||||
"#pattern" : r"https://(cdn|images)\.myhentaicomics\.com/m\w\w/images/[^/]+/original/\d+\.jpg",
|
||||
|
||||
"artist" : list,
|
||||
"count" : 11,
|
||||
|
@ -24,6 +24,39 @@ __tests__ = (
|
||||
"#sha1_metadata": "a6e23d19afbee86b37d6e7ad934650c379d2cb1e",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://blog.naver.com/PostView.nhn?blogId=rlfqjxm0&logNo=70161391809",
|
||||
"#comment" : "filenames in EUC-KR encoding (#5126)",
|
||||
"#category": ("", "naver", "post"),
|
||||
"#class" : naver.NaverPostExtractor,
|
||||
"#urls": (
|
||||
"https://blogfiles.pstatic.net/20130305_23/ping9303_1362411028002Dpz9z_PNG/1_사본.png",
|
||||
"https://blogfiles.pstatic.net/20130305_46/rlfqjxm0_1362473322580x33zi_PNG/오마갓합작.png",
|
||||
),
|
||||
|
||||
"blog": {
|
||||
"id" : "rlfqjxm0",
|
||||
"num" : 43030507,
|
||||
"user": "에나",
|
||||
},
|
||||
"post": {
|
||||
"date" : "dt:2013-03-05 17:48:00",
|
||||
"description": " ◈ PROMOTER :핑수 ˚ 아담 EDITOR:핑수 넵:이크:핑수...",
|
||||
"num" : 70161391809,
|
||||
"title" : "[공유] { 합작} OH, MY GOD! ~ 아 또 무슨 종말을 한다 그래~",
|
||||
},
|
||||
"count" : 2,
|
||||
"num" : range(1, 2),
|
||||
"filename" : r"re:1_사본|오마갓합작",
|
||||
"extension": "png",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://blog.naver.com/PostView.naver?blogId=rlfqjxm0&logNo=221430673006",
|
||||
"#category": ("", "naver", "post"),
|
||||
"#class" : naver.NaverPostExtractor,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://blog.naver.com/gukjung",
|
||||
"#category": ("", "naver", "blog"),
|
||||
@ -42,4 +75,10 @@ __tests__ = (
|
||||
"#count" : 12,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://blog.naver.com/PostList.naver?blogId=gukjung",
|
||||
"#category": ("", "naver", "blog"),
|
||||
"#class" : naver.NaverBlogExtractor,
|
||||
},
|
||||
|
||||
)
|
||||
|
@ -109,7 +109,7 @@ __tests__ = (
|
||||
"#category": ("", "naverwebtoon", "comic"),
|
||||
"#class" : naverwebtoon.NaverwebtoonComicExtractor,
|
||||
"#pattern" : naverwebtoon.NaverwebtoonEpisodeExtractor.pattern,
|
||||
"#count" : 25,
|
||||
"#count" : 24,
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -15,11 +15,11 @@ __tests__ = (
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.omgmiamiswimwear.com/products/la-medusa-maxi-dress",
|
||||
"#url" : "https://www.omgmiamiswimwear.com/products/snatch-me-waist-belt",
|
||||
"#category": ("shopify", "omgmiamiswimwear", "product"),
|
||||
"#class" : shopify.ShopifyProductExtractor,
|
||||
"#pattern" : r"https://cdn\.shopify\.com/s/files/1/1819/6171/",
|
||||
"#count" : 5,
|
||||
"#count" : 3,
|
||||
},
|
||||
|
||||
)
|
||||
|
@ -163,6 +163,14 @@ __tests__ = (
|
||||
"#count" : ">= 10",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.pixiv.net/artworks/966412",
|
||||
"#comment" : "limit_sanity_level_360.png (#4327, #5180)",
|
||||
"#category": ("", "pixiv", "work"),
|
||||
"#class" : pixiv.PixivWorkExtractor,
|
||||
"#count" : 0,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://www.pixiv.net/en/artworks/966412",
|
||||
"#category": ("", "pixiv", "work"),
|
||||
@ -459,11 +467,14 @@ __tests__ = (
|
||||
|
||||
{
|
||||
"#url" : "https://www.pixiv.net/novel/show.php?id=16422450",
|
||||
"#comment" : "embeds",
|
||||
"#comment" : "embeds // covers (#5373)",
|
||||
"#category": ("", "pixiv", "novel"),
|
||||
"#class" : pixiv.PixivNovelExtractor,
|
||||
"#options" : {"embeds": True},
|
||||
"#count" : 3,
|
||||
"#options" : {
|
||||
"embeds": True,
|
||||
"covers": True,
|
||||
},
|
||||
"#count" : 4,
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -62,9 +62,11 @@ __tests__ = (
|
||||
"hardcore sex",
|
||||
"babes 18 year",
|
||||
],
|
||||
"timestamp": "5:07",
|
||||
"title" : "Intense sloppy blowjob of Danika Mori",
|
||||
"url" : "https://el.phncdn.com/pics/gifs/043/726/891/43726891a.webm",
|
||||
"user" : "Danika Mori",
|
||||
"viewkey" : "64367c8c78a4a",
|
||||
},
|
||||
|
||||
{
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user