diff --git a/docs/supportedsites.md b/docs/supportedsites.md index a18beca9..5611d34a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -92,7 +92,7 @@ Consider all listed sites to potentially be NSFW. - Agnph + AGNPH https://agn.ph/ Posts, Tag Searches @@ -145,6 +145,12 @@ Consider all listed sites to potentially be NSFW. Albums, Files + + Ci-en + https://ci-en.net/ + Articles, Creators, Followed Users, Recent Images + + Comic Vine https://comicvine.gamespot.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 0a5d3bc2..0ada702a 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -34,6 +34,7 @@ modules = [ "bunkr", "catbox", "chevereto", + "cien", "comicvine", "cyberdrop", "danbooru", diff --git a/gallery_dl/extractor/cien.py b/gallery_dl/extractor/cien.py new file mode 100644 index 00000000..400eddbe --- /dev/null +++ b/gallery_dl/extractor/cien.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- + +# Copyright 2024 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://ci-en.net/""" + +from .common import Extractor, Message +from .. import text, util + +BASE_PATTERN = r"(?:https?://)?ci-en\.(?:net|dlsite\.com)" + + +class CienExtractor(Extractor): + category = "cien" + root = "https://ci-en.net" + + def __init__(self, match): + self.root = text.root_from_url(match.group(0)) + Extractor.__init__(self, match) + + def _pagination_articles(self, url, params): + data = {"_extractor": CienArticleExtractor} + params["page"] = text.parse_int(params.get("page"), 1) + + while True: + page = self.request(url, params=params).text + + for card in text.extract_iter( + page, ' class="c-cardCase-item', ''): + article_url = text.extr(card, ' href="', '"') + yield Message.Queue, article_url, data + + if ' rel="next"' not in page: + return + params["page"] += 1 + + +class CienArticleExtractor(CienExtractor): + subcategory = "article" + filename_fmt = "{num:>02} {filename}.{extension}" + directory_fmt = ("{category}", "{author[name]}", "{post_id} {name}") + archive_fmt = "{post_id}_{num}" + pattern = BASE_PATTERN + r"/creator/(\d+)/article/(\d+)" + example = "https://ci-en.net/creator/123/article/12345" + + def items(self): + url = "{}/creator/{}/article/{}".format( + self.root, self.groups[0], self.groups[1]) + page = self.request(url, notfound="article").text + + post = util.json_loads(text.extr( + page, ''))[0] + + files = self._extract_files(post.get("articleBody") or page) + + post["post_id"] = text.parse_int(self.groups[1]) + post["count"] = len(files) + post["date"] = text.parse_datetime(post["datePublished"]) + + try: + del post["publisher"] + del post["sameAs"] + except Exception: + pass + + yield Message.Directory, post + for post["num"], file in enumerate(files, 1): + post.update(file) + if "extension" not in file: + text.nameext_from_url(file["url"], post) + yield Message.Url, file["url"], post + + def _extract_files(self, page): + files = [] + + for image in text.extract_iter( + page, 'class="file-player-image"', ""): + size = text.extr(image, ' data-size="', '"') + w, _, h = size.partition("x") + + files.append({ + "url" : text.extr(image, ' data-raw="', '"'), + "width" : text.parse_int(w), + "height": text.parse_int(h), + "type" : "image", + }) + + for video in text.extract_iter( + page, ""): + path = text.extr(video, ' base-path="', '"') + name = text.extr(video, ' file-name="', '"') + auth = text.extr(video, ' auth-key="', '"') + + file = text.nameext_from_url(name) + file["url"] = "{}video-web.mp4?{}".format(path, auth) + file["type"] = "video" + files.append(file) + + for download in text.extract_iter( + page, 'class="downloadBlock', ""): + name = text.extr(download, "

", "<") + + file = text.nameext_from_url(name.rpartition(" ")[0]) + file["url"] = text.extr(download, ' href="', '"') + file["type"] = "attachment" + files.append(file) + + return files + + def _extract_galleries(self, page): + # TODO + files = [] + + for gallery in text.extract_iter( + page, ""): + + url = "https://ci-en.dlsite.com/api/creator/gallery/images" + params = { + "hash" : text.extr(gallery, ' hash="', '"'), + "gallery_id": text.extr(gallery, ' gallery-id="', '"'), + "time" : text.extr(gallery, ' time="', '"'), + } + self.request(url, params=params) + + return files + + +class CienCreatorExtractor(CienExtractor): + subcategory = "creator" + pattern = BASE_PATTERN + r"/creator/(\d+)(?:/article(?:\?([^#]+))?)?/?$" + example = "https://ci-en.net/creator/123" + + def items(self): + url = "{}/creator/{}/article".format(self.root, self.groups[0]) + params = text.parse_query(self.groups[1]) + params["mode"] = "list" + return self._pagination_articles(url, params) + + +class CienRecentExtractor(CienExtractor): + subcategory = "recent" + pattern = BASE_PATTERN + r"/mypage/recent(?:\?([^#]+))?" + example = "https://ci-en.net/mypage/recent" + + def items(self): + url = self.root + "/mypage/recent" + params = text.parse_query(self.groups[0]) + return self._pagination_articles(url, params) + + +class CienFollowingExtractor(CienExtractor): + subcategory = "following" + pattern = BASE_PATTERN + r"/mypage/subscription(/following)?" + example = "https://ci-en.net/mypage/subscription" + + def items(self): + url = self.root + "/mypage/subscription" + (self.groups[0] or "") + page = self.request(url).text + data = {"_extractor": CienCreatorExtractor} + + for subscription in text.extract_iter( + page, 'class="c-grid-subscriptionInfo', ''): + url = text.extr(subscription, ' href="', '"') + yield Message.Queue, url, data diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 3ab86f62..57ab5d3f 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -24,6 +24,7 @@ CATEGORY_MAP = { "2chan" : "Futaba Channel", "35photo" : "35PHOTO", "adultempire" : "Adult Empire", + "agnph" : "AGNPH", "allgirlbooru" : "All girl", "archivedmoe" : "Archived.Moe", "archiveofsins" : "Archive of Sins", @@ -35,6 +36,7 @@ CATEGORY_MAP = { "baraag" : "baraag", "batoto" : "BATO.TO", "bbc" : "BBC", + "cien" : "Ci-en", "comicvine" : "Comic Vine", "coomerparty" : "Coomer", "deltaporno" : "DeltaPorno", diff --git a/test/results/cien.py b/test/results/cien.py new file mode 100644 index 00000000..d3bf875f --- /dev/null +++ b/test/results/cien.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import cien + + +__tests__ = ( +{ + "#url" : "https://ci-en.net/creator/7491/article/1194568", + "#category": ("", "cien", "article"), + "#class" : cien.CienArticleExtractor, + "#pattern" : r"https://media\.ci-en\.jp/private/attachment/creator/00007491/c0c212a93027c8863bdb40668071c1525a4567f94baca13c17989045e5a3d81d/video-web\.mp4\?px-time=.+", + + "author": { + "@type" : "Person", + "image" : "https://media.ci-en.jp/public/icon/creator/00007491/9601a2a224245156335aaa839fa408d52c32c87dae5787fc03f455b7fd1d3488/image-200-c.jpg", + "name" : "やかろ", + "url" : "https://ci-en.net/creator/7491", + "sameAs": [ + "https://pokapoka0802.wixsite.com/tunousaginoie82", + "https://www.freem.ne.jp/brand/6001", + "https://store.steampowered.com/search/?developer=%E3%83%84%E3%83%8E%E3%82%A6%E3%82%B5%E3%82%AE%E3%81%AE%E5%AE%B6", + "https://plicy.net/User/87381", + "https://twitter.com/pokapoka0802", + ], + }, + "articleBody": str, + "count" : 1, + "date" : "dt:2024-07-21 15:36:00", + "dateModified" : "2024-07-22T03:28:40+09:00", + "datePublished": "2024-07-22T00:36:00+09:00", + "description": "お知らせ 今回は雨のピリオードの解説をしたいと思うのですが、その前にいくつかお知らせがあります。 電話を使って謎を解いていくフリーゲーム 電話を通して、様々なキャラクターを会話をしていく、ノベルゲーム……", + "extension" : "mp4", + "filename" : "無題の動画 (1)", + "headline" : "角兎図書館「雨のピリオード」No,16", + "image" : "https://media.ci-en.jp/public/article_cover/creator/00007491/cb4062e8d885ab93e0d0fb3133265a7ad1056c906fd4ab81da509220620901e1/image-1280-c.jpg", + "keywords" : "お知らせ,角兎図書館", + "mainEntityOfPage": "https://ci-en.net/creator/7491/article/1194568", + "name" : "角兎図書館「雨のピリオード」No,16", + "num" : 1, + "post_id" : 1194568, + "type" : "video", + "url" : str, +}, + +{ + "#url" : "https://ci-en.dlsite.com/creator/25509/article/1172460", + "#category": ("", "cien", "article"), + "#class" : cien.CienArticleExtractor, + "#range" : "3", + "#pattern" : r"https://media\.ci-en\.jp/private/attachment/creator/00025509/7fd3c039d2277ba9541e82592aca6f6751f6c268404038ccbf1112bcf2f93357/upload/.+\.zip\?px-time=.+", + + "filename" : "VP 1.05.4 Tim-v9 ENG rec v3", + "extension": "zip", + "type" : "attachment", +}, + +{ + "#url" : "https://ci-en.net/creator/11962", + "#category": ("", "cien", "creator"), + "#class" : cien.CienCreatorExtractor, + "#pattern" : cien.CienArticleExtractor.pattern, + "#count" : "> 25", +}, + +{ + "#url" : "https://ci-en.net/mypage/recent", + "#category": ("", "cien", "recent"), + "#class" : cien.CienRecentExtractor, + "#auth" : True, +}, + +{ + "#url" : "https://ci-en.net/mypage/subscription/following", + "#category": ("", "cien", "following"), + "#class" : cien.CienFollowingExtractor, + "#pattern" : cien.CienCreatorExtractor.pattern, + "#count" : "> 3", + "#auth" : True, +}, + +{ + "#url" : "https://ci-en.net/mypage/subscription", + "#category": ("", "cien", "following"), + "#class" : cien.CienFollowingExtractor, + "#auth" : True, +}, + +)