# -*- coding: utf-8 -*- # Copyright 2019-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://www.sex.com/""" from .common import Extractor, Message from .. import text class SexcomExtractor(Extractor): """Base class for sexcom extractors""" category = "sexcom" directory_fmt = ("{category}") filename_fmt = "{pin_id}{title:? //}.{extension}" archive_fmt = "{pin_id}" root = "https://www.sex.com" def items(self): yield Message.Version, 1 yield Message.Directory, self.metadata() for pin in map(self._parse_pin, self.pins()): if pin: yield Message.Url, pin["url"], pin def metadata(self): return {} def pins(self): return () def _pagination(self, url): while True: extr = text.extract_from(self.request(url).text) while True: href = extr('') url = text.extract(pager, ' href="', '"')[0] if not url: return url = text.urljoin(self.root, url) def _parse_pin(self, url): response = self.request(url, fatal=False) if response.status_code >= 400: self.log.warning('Unable to fetch %s ("%s %s")', url, response.status_code, response.reason) return None extr = text.extract_from(response.text) data = {} data["_http_headers"] = {"Referer": url} data["thumbnail"] = extr('itemprop="thumbnail" content="', '"') data["type"] = extr('

' , '<').rstrip(" -").strip().lower() data["title"] = text.unescape(extr('itemprop="name">' , '<')) data["repins"] = text.parse_int(text.extract( extr('"btn-group"', ''), '"btn btn-primary">' , '<')[0]) data["likes"] = text.parse_int(text.extract( extr('"btn-group"', ''), '"btn btn-default">' , '<')[0]) data["pin_id"] = text.parse_int(extr('data-id="', '"')) if data["type"] == "video": info = extr("player.updateSrc(", ");") if info: path = text.extract(info, "src: '", "'")[0] data["filename"] = path.rpartition("/")[2] data["extension"] = "mp4" if "'HD'" in info: path += "/hd" data["url"] = self.root + path else: data["extension"] = None data["url"] = "ytdl:" + text.extract( extr(''), ' src="', '"')[0] else: data["url"] = text.unescape(extr(' src="', '"').partition("?")[0]) text.nameext_from_url(data["url"], data) data["uploader"] = extr('itemprop="author">', '<') data["date"] = text.parse_datetime(extr('datetime="', '"')) data["tags"] = text.split_html(extr('class="tags"> Tags', '')) data["comments"] = text.parse_int(extr('Comments (', ')')) return data class SexcomPinExtractor(SexcomExtractor): """Extractor for a pinned image or video on www.sex.com""" subcategory = "pin" directory_fmt = ("{category}",) pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+)(?!.*#related$)" test = ( # picture ("https://www.sex.com/pin/21241874-sexy-ecchi-girls-166/", { "pattern": "https://cdn.sex.com/images/.+/2014/08/26/7637609.jpg", "content": "ebe1814dadfebf15d11c6af4f6afb1a50d6c2a1c", "keyword": { "comments" : int, "date" : "dt:2014-10-19 15:45:44", "extension": "jpg", "filename" : "7637609", "likes" : int, "pin_id" : 21241874, "repins" : int, "tags" : list, "thumbnail": str, "title" : "Sexy Ecchi Girls 166", "type" : "picture", "uploader" : "mangazeta", "url" : str, }, }), # gif ("https://www.sex.com/pin/11465040-big-titted-hentai-gif/", { "pattern": "https://cdn.sex.com/images/.+/2014/01/26/4829951.gif", "content": "af6726d74d11d819e1c885fe5303f711862eae96", }), # video ("https://www.sex.com/pin/55748341/", { "pattern": "https://www.sex.com/video/stream/776229/hd", "content": "e1a5834869163e2c4d1ca2677f5b7b367cf8cfff", }), # pornhub embed ("https://www.sex.com/pin/55847384-very-nicely-animated/", { "pattern": "ytdl:https://www.pornhub.com/embed/ph56ef24b6750f2", }), ) def __init__(self, match): SexcomExtractor.__init__(self, match) self.pin_id = match.group(1) def pins(self): return ("{}/pin/{}/".format(self.root, self.pin_id),) class SexcomRelatedPinExtractor(SexcomPinExtractor): """Extractor for related pins on www.sex.com""" subcategory = "related-pin" directory_fmt = ("{category}", "related {original_pin[pin_id]}") pattern = r"(?:https?://)?(?:www\.)?sex\.com/pin/(\d+).*#related$" test = ("https://www.sex.com/pin/21241874/#related", { "count": ">= 20", }) def metadata(self): pin = self._parse_pin(SexcomPinExtractor.pins(self)[0]) return {"original_pin": pin} def pins(self): url = "{}/pin/related?pinId={}&limit=24&offset=0".format( self.root, self.pin_id) return self._pagination(url) class SexcomBoardExtractor(SexcomExtractor): """Extractor for pins from a board on www.sex.com""" subcategory = "board" directory_fmt = ("{category}", "{user}", "{board}") pattern = (r"(?:https?://)?(?:www\.)?sex\.com/user" r"/([^/?&#]+)/(?!(?:following|pins|repins|likes)/)([^/?&#]+)") test = ("https://www.sex.com/user/ronin17/exciting-hentai/", { "count": ">= 15", }) def __init__(self, match): SexcomExtractor.__init__(self, match) self.user, self.board = match.groups() def metadata(self): return { "user" : text.unquote(self.user), "board": text.unquote(self.board), } def pins(self): url = "{}/user/{}/{}/".format(self.root, self.user, self.board) return self._pagination(url) class SexcomSearchExtractor(SexcomExtractor): """Extractor for search results on www.sex.com""" subcategory = "search" directory_fmt = ("{category}", "search", "{search[query]}") pattern = (r"(?:https?://)?(?:www\.)?sex\.com/((?:" r"(pic|gif|video)s/([^/?&#]+)|search/(pic|gif|video)s" r")/?(?:\?([^#]+))?)") test = ( ("https://www.sex.com/search/pics?query=ecchi", { "range": "1-10", "count": 10, }), ("https://www.sex.com/videos/hentai/", { "range": "1-10", "count": 10, }), ) def __init__(self, match): SexcomExtractor.__init__(self, match) self.path = match.group(1) self.search = text.parse_query(match.group(5)) self.search["type"] = match.group(2) or match.group(4) if "query" not in self.search: self.search["query"] = match.group(3) or "" def metadata(self): return {"search": self.search} def pins(self): url = "{}/{}".format(self.root, self.path) return self._pagination(url)