mirror of
https://github.com/mikf/gallery-dl.git
synced 2024-11-24 19:52:32 +01:00
parent
65d7cccaf9
commit
aa6d00613f
@ -92,7 +92,7 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Agnph</td>
|
||||
<td>AGNPH</td>
|
||||
<td>https://agn.ph/</td>
|
||||
<td>Posts, Tag Searches</td>
|
||||
<td></td>
|
||||
@ -145,6 +145,12 @@ Consider all listed sites to potentially be NSFW.
|
||||
<td>Albums, Files</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Ci-en</td>
|
||||
<td>https://ci-en.net/</td>
|
||||
<td>Articles, Creators, Followed Users, Recent Images</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Comic Vine</td>
|
||||
<td>https://comicvine.gamespot.com/</td>
|
||||
|
@ -34,6 +34,7 @@ modules = [
|
||||
"bunkr",
|
||||
"catbox",
|
||||
"chevereto",
|
||||
"cien",
|
||||
"comicvine",
|
||||
"cyberdrop",
|
||||
"danbooru",
|
||||
|
168
gallery_dl/extractor/cien.py
Normal file
168
gallery_dl/extractor/cien.py
Normal file
@ -0,0 +1,168 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extractors for https://ci-en.net/"""
|
||||
|
||||
from .common import Extractor, Message
|
||||
from .. import text, util
|
||||
|
||||
BASE_PATTERN = r"(?:https?://)?ci-en\.(?:net|dlsite\.com)"
|
||||
|
||||
|
||||
class CienExtractor(Extractor):
|
||||
category = "cien"
|
||||
root = "https://ci-en.net"
|
||||
|
||||
def __init__(self, match):
|
||||
self.root = text.root_from_url(match.group(0))
|
||||
Extractor.__init__(self, match)
|
||||
|
||||
def _pagination_articles(self, url, params):
|
||||
data = {"_extractor": CienArticleExtractor}
|
||||
params["page"] = text.parse_int(params.get("page"), 1)
|
||||
|
||||
while True:
|
||||
page = self.request(url, params=params).text
|
||||
|
||||
for card in text.extract_iter(
|
||||
page, ' class="c-cardCase-item', '</div>'):
|
||||
article_url = text.extr(card, ' href="', '"')
|
||||
yield Message.Queue, article_url, data
|
||||
|
||||
if ' rel="next"' not in page:
|
||||
return
|
||||
params["page"] += 1
|
||||
|
||||
|
||||
class CienArticleExtractor(CienExtractor):
|
||||
subcategory = "article"
|
||||
filename_fmt = "{num:>02} {filename}.{extension}"
|
||||
directory_fmt = ("{category}", "{author[name]}", "{post_id} {name}")
|
||||
archive_fmt = "{post_id}_{num}"
|
||||
pattern = BASE_PATTERN + r"/creator/(\d+)/article/(\d+)"
|
||||
example = "https://ci-en.net/creator/123/article/12345"
|
||||
|
||||
def items(self):
|
||||
url = "{}/creator/{}/article/{}".format(
|
||||
self.root, self.groups[0], self.groups[1])
|
||||
page = self.request(url, notfound="article").text
|
||||
|
||||
post = util.json_loads(text.extr(
|
||||
page, '<script type="application/ld+json">', '</script>'))[0]
|
||||
|
||||
files = self._extract_files(post.get("articleBody") or page)
|
||||
|
||||
post["post_id"] = text.parse_int(self.groups[1])
|
||||
post["count"] = len(files)
|
||||
post["date"] = text.parse_datetime(post["datePublished"])
|
||||
|
||||
try:
|
||||
del post["publisher"]
|
||||
del post["sameAs"]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
yield Message.Directory, post
|
||||
for post["num"], file in enumerate(files, 1):
|
||||
post.update(file)
|
||||
if "extension" not in file:
|
||||
text.nameext_from_url(file["url"], post)
|
||||
yield Message.Url, file["url"], post
|
||||
|
||||
def _extract_files(self, page):
|
||||
files = []
|
||||
|
||||
for image in text.extract_iter(
|
||||
page, 'class="file-player-image"', "</figure>"):
|
||||
size = text.extr(image, ' data-size="', '"')
|
||||
w, _, h = size.partition("x")
|
||||
|
||||
files.append({
|
||||
"url" : text.extr(image, ' data-raw="', '"'),
|
||||
"width" : text.parse_int(w),
|
||||
"height": text.parse_int(h),
|
||||
"type" : "image",
|
||||
})
|
||||
|
||||
for video in text.extract_iter(
|
||||
page, "<vue-file-player", "</vue-file-player>"):
|
||||
path = text.extr(video, ' base-path="', '"')
|
||||
name = text.extr(video, ' file-name="', '"')
|
||||
auth = text.extr(video, ' auth-key="', '"')
|
||||
|
||||
file = text.nameext_from_url(name)
|
||||
file["url"] = "{}video-web.mp4?{}".format(path, auth)
|
||||
file["type"] = "video"
|
||||
files.append(file)
|
||||
|
||||
for download in text.extract_iter(
|
||||
page, 'class="downloadBlock', "</div>"):
|
||||
name = text.extr(download, "<p>", "<")
|
||||
|
||||
file = text.nameext_from_url(name.rpartition(" ")[0])
|
||||
file["url"] = text.extr(download, ' href="', '"')
|
||||
file["type"] = "attachment"
|
||||
files.append(file)
|
||||
|
||||
return files
|
||||
|
||||
def _extract_galleries(self, page):
|
||||
# TODO
|
||||
files = []
|
||||
|
||||
for gallery in text.extract_iter(
|
||||
page, "<vue-image-gallery", "</vue-image-gallery>"):
|
||||
|
||||
url = "https://ci-en.dlsite.com/api/creator/gallery/images"
|
||||
params = {
|
||||
"hash" : text.extr(gallery, ' hash="', '"'),
|
||||
"gallery_id": text.extr(gallery, ' gallery-id="', '"'),
|
||||
"time" : text.extr(gallery, ' time="', '"'),
|
||||
}
|
||||
self.request(url, params=params)
|
||||
|
||||
return files
|
||||
|
||||
|
||||
class CienCreatorExtractor(CienExtractor):
|
||||
subcategory = "creator"
|
||||
pattern = BASE_PATTERN + r"/creator/(\d+)(?:/article(?:\?([^#]+))?)?/?$"
|
||||
example = "https://ci-en.net/creator/123"
|
||||
|
||||
def items(self):
|
||||
url = "{}/creator/{}/article".format(self.root, self.groups[0])
|
||||
params = text.parse_query(self.groups[1])
|
||||
params["mode"] = "list"
|
||||
return self._pagination_articles(url, params)
|
||||
|
||||
|
||||
class CienRecentExtractor(CienExtractor):
|
||||
subcategory = "recent"
|
||||
pattern = BASE_PATTERN + r"/mypage/recent(?:\?([^#]+))?"
|
||||
example = "https://ci-en.net/mypage/recent"
|
||||
|
||||
def items(self):
|
||||
url = self.root + "/mypage/recent"
|
||||
params = text.parse_query(self.groups[0])
|
||||
return self._pagination_articles(url, params)
|
||||
|
||||
|
||||
class CienFollowingExtractor(CienExtractor):
|
||||
subcategory = "following"
|
||||
pattern = BASE_PATTERN + r"/mypage/subscription(/following)?"
|
||||
example = "https://ci-en.net/mypage/subscription"
|
||||
|
||||
def items(self):
|
||||
url = self.root + "/mypage/subscription" + (self.groups[0] or "")
|
||||
page = self.request(url).text
|
||||
data = {"_extractor": CienCreatorExtractor}
|
||||
|
||||
for subscription in text.extract_iter(
|
||||
page, 'class="c-grid-subscriptionInfo', '</figure>'):
|
||||
url = text.extr(subscription, ' href="', '"')
|
||||
yield Message.Queue, url, data
|
@ -24,6 +24,7 @@ CATEGORY_MAP = {
|
||||
"2chan" : "Futaba Channel",
|
||||
"35photo" : "35PHOTO",
|
||||
"adultempire" : "Adult Empire",
|
||||
"agnph" : "AGNPH",
|
||||
"allgirlbooru" : "All girl",
|
||||
"archivedmoe" : "Archived.Moe",
|
||||
"archiveofsins" : "Archive of Sins",
|
||||
@ -35,6 +36,7 @@ CATEGORY_MAP = {
|
||||
"baraag" : "baraag",
|
||||
"batoto" : "BATO.TO",
|
||||
"bbc" : "BBC",
|
||||
"cien" : "Ci-en",
|
||||
"comicvine" : "Comic Vine",
|
||||
"coomerparty" : "Coomer",
|
||||
"deltaporno" : "DeltaPorno",
|
||||
|
92
test/results/cien.py
Normal file
92
test/results/cien.py
Normal file
@ -0,0 +1,92 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from gallery_dl.extractor import cien
|
||||
|
||||
|
||||
__tests__ = (
|
||||
{
|
||||
"#url" : "https://ci-en.net/creator/7491/article/1194568",
|
||||
"#category": ("", "cien", "article"),
|
||||
"#class" : cien.CienArticleExtractor,
|
||||
"#pattern" : r"https://media\.ci-en\.jp/private/attachment/creator/00007491/c0c212a93027c8863bdb40668071c1525a4567f94baca13c17989045e5a3d81d/video-web\.mp4\?px-time=.+",
|
||||
|
||||
"author": {
|
||||
"@type" : "Person",
|
||||
"image" : "https://media.ci-en.jp/public/icon/creator/00007491/9601a2a224245156335aaa839fa408d52c32c87dae5787fc03f455b7fd1d3488/image-200-c.jpg",
|
||||
"name" : "やかろ",
|
||||
"url" : "https://ci-en.net/creator/7491",
|
||||
"sameAs": [
|
||||
"https://pokapoka0802.wixsite.com/tunousaginoie82",
|
||||
"https://www.freem.ne.jp/brand/6001",
|
||||
"https://store.steampowered.com/search/?developer=%E3%83%84%E3%83%8E%E3%82%A6%E3%82%B5%E3%82%AE%E3%81%AE%E5%AE%B6",
|
||||
"https://plicy.net/User/87381",
|
||||
"https://twitter.com/pokapoka0802",
|
||||
],
|
||||
},
|
||||
"articleBody": str,
|
||||
"count" : 1,
|
||||
"date" : "dt:2024-07-21 15:36:00",
|
||||
"dateModified" : "2024-07-22T03:28:40+09:00",
|
||||
"datePublished": "2024-07-22T00:36:00+09:00",
|
||||
"description": "お知らせ 今回は雨のピリオードの解説をしたいと思うのですが、その前にいくつかお知らせがあります。 電話を使って謎を解いていくフリーゲーム 電話を通して、様々なキャラクターを会話をしていく、ノベルゲーム……",
|
||||
"extension" : "mp4",
|
||||
"filename" : "無題の動画 (1)",
|
||||
"headline" : "角兎図書館「雨のピリオード」No,16",
|
||||
"image" : "https://media.ci-en.jp/public/article_cover/creator/00007491/cb4062e8d885ab93e0d0fb3133265a7ad1056c906fd4ab81da509220620901e1/image-1280-c.jpg",
|
||||
"keywords" : "お知らせ,角兎図書館",
|
||||
"mainEntityOfPage": "https://ci-en.net/creator/7491/article/1194568",
|
||||
"name" : "角兎図書館「雨のピリオード」No,16",
|
||||
"num" : 1,
|
||||
"post_id" : 1194568,
|
||||
"type" : "video",
|
||||
"url" : str,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://ci-en.dlsite.com/creator/25509/article/1172460",
|
||||
"#category": ("", "cien", "article"),
|
||||
"#class" : cien.CienArticleExtractor,
|
||||
"#range" : "3",
|
||||
"#pattern" : r"https://media\.ci-en\.jp/private/attachment/creator/00025509/7fd3c039d2277ba9541e82592aca6f6751f6c268404038ccbf1112bcf2f93357/upload/.+\.zip\?px-time=.+",
|
||||
|
||||
"filename" : "VP 1.05.4 Tim-v9 ENG rec v3",
|
||||
"extension": "zip",
|
||||
"type" : "attachment",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://ci-en.net/creator/11962",
|
||||
"#category": ("", "cien", "creator"),
|
||||
"#class" : cien.CienCreatorExtractor,
|
||||
"#pattern" : cien.CienArticleExtractor.pattern,
|
||||
"#count" : "> 25",
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://ci-en.net/mypage/recent",
|
||||
"#category": ("", "cien", "recent"),
|
||||
"#class" : cien.CienRecentExtractor,
|
||||
"#auth" : True,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://ci-en.net/mypage/subscription/following",
|
||||
"#category": ("", "cien", "following"),
|
||||
"#class" : cien.CienFollowingExtractor,
|
||||
"#pattern" : cien.CienCreatorExtractor.pattern,
|
||||
"#count" : "> 3",
|
||||
"#auth" : True,
|
||||
},
|
||||
|
||||
{
|
||||
"#url" : "https://ci-en.net/mypage/subscription",
|
||||
"#category": ("", "cien", "following"),
|
||||
"#class" : cien.CienFollowingExtractor,
|
||||
"#auth" : True,
|
||||
},
|
||||
|
||||
)
|
Loading…
Reference in New Issue
Block a user