diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 928681b2..a81c67b1 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -46,6 +46,7 @@ modules = [ "spectrumnexus", "turboimagehost", "yandere", + "generic", ] def find(url): diff --git a/gallery_dl/extractor/generic.py b/gallery_dl/extractor/generic.py new file mode 100644 index 00000000..7700cff6 --- /dev/null +++ b/gallery_dl/extractor/generic.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +# Copyright 2015 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Generic extractor""" + +import re +from .common import Extractor, Message + +class GenericExtractor(Extractor): + + category = "generic" + pattern = ["generic:(.+)"] + + def __init__(self, match): + Extractor.__init__(self) + self.url = match.group(1) + + def items(self): + page = self.request(self.url).text + yield Message.Version, 1 + for match in re.finditer("https?://[^ \"']+", page): + yield Message.Queue, match.group(0)