1
0
mirror of https://github.com/instaloader/instaloader.git synced 2024-08-18 12:49:38 +02:00

Download pictures with #hashtag

Instaloader is now capable of downloading all pictures associated with
one #hashtag with:
instaloader #hashtag

This implements the feature requested with #18.
This commit is contained in:
Alexander Graf 2017-04-17 12:10:43 +02:00
parent a7d1c5bbb0
commit 3e1360160d
2 changed files with 80 additions and 12 deletions

View File

@ -51,6 +51,12 @@ To later update your local copy of that profile, you may run
When ``--fast-update`` is given, Instaloader terminates when arriving at When ``--fast-update`` is given, Instaloader terminates when arriving at
the first already-downloaded picture. the first already-downloaded picture.
You may also download the most recent pictures with one hashtag:
::
instaloader #hashtag
Instaloader can also be used to **download private profiles**. To do so, Instaloader can also be used to **download private profiles**. To do so,
invoke it with invoke it with
@ -117,6 +123,12 @@ You could also download your last 20 liked pics with
instaloader.download_feed_pics(session, max_count=20, fast_update=True, instaloader.download_feed_pics(session, max_count=20, fast_update=True,
filter_func=lambda node: not node["likes"]["viewer_has_liked"]) filter_func=lambda node: not node["likes"]["viewer_has_liked"])
To download the last 20 pictures with hashtag #cat, do
.. code:: python
instaloader.download_hashtag('cat', session=instaloader.get_anonymous_session(), max_count=20)
Each Instagram profile has its own unique ID which stays unmodified even Each Instagram profile has its own unique ID which stays unmodified even
if a user changes his/her username. To get said ID, given the profile's if a user changes his/her username. To get said ID, given the profile's
name, you may call name, you may call

View File

@ -64,9 +64,10 @@ def _log(*msg, sep='', end='\n', flush=False, quiet=False):
print(*msg, sep=sep, end=end, flush=flush) print(*msg, sep=sep, end=end, flush=flush)
def get_json(name: str, session: requests.Session, max_id: int = 0, sleep: bool = True) -> Optional[Dict[str, Any]]: def get_json(name: str, session: requests.Session,
max_id: Optional[int] = None, sleep: bool = True) -> Optional[Dict[str, Any]]:
"""Return JSON of a profile""" """Return JSON of a profile"""
if max_id == 0: if not max_id:
resp = session.get('https://www.instagram.com/'+name) resp = session.get('https://www.instagram.com/'+name)
else: else:
resp = session.get('https://www.instagram.com/'+name, params={'max_id': max_id}) resp = session.get('https://www.instagram.com/'+name, params={'max_id': max_id})
@ -462,7 +463,7 @@ def download_node(node: Dict[str, Any], session: requests.Session, name: str,
:param quiet: Suppress output :param quiet: Suppress output
:return: True if something was downloaded, False otherwise, i.e. file was already there :return: True if something was downloaded, False otherwise, i.e. file was already there
""" """
if node['__typename'] == 'GraphSidecar': if '__typename' in node and node['__typename'] == 'GraphSidecar':
sidecar_data = session.get('https://www.instagram.com/p/' + node['code'] + '/', params={'__a': 1}).json() sidecar_data = session.get('https://www.instagram.com/p/' + node['code'] + '/', params={'__a': 1}).json()
edge_number = 1 edge_number = 1
downloaded = False downloaded = False
@ -538,6 +539,56 @@ def download_feed_pics(session: requests.Session, max_count: int = None, fast_up
sleep=sleep) sleep=sleep)
def get_hashtag_json(hashtag: str, session: requests.Session,
max_id: Optional[int] = None, sleep: bool = True) -> Optional[Dict[str, Any]]:
"""Return JSON of a #hashtag"""
return get_json(name='explore/tags/{0}/'.format(hashtag), session=session, max_id=max_id, sleep=sleep)
def download_hashtag(hashtag: str, session: requests.Session,
max_count: Optional[int] = None,
filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None,
fast_update: bool = False, download_videos: bool = True, geotags: bool = False,
shorter_output: bool = False, sleep: bool = True, quiet: bool = False) -> None:
"""Download pictures of one hashtag.
To download the last 30 pictures with hashtag #cat, do
>>> download_hashtag('cat', session=get_anonymous_session(), max_count=30)
:param hashtag: Hashtag to download, without leading '#'
:param session: Session belonging to a user, i.e. not an anonymous session
:param max_count: Maximum count of pictures to download
:param filter_func: function(node), which returns True if given picture should not be downloaded
:param fast_update: If true, abort when first already-downloaded picture is encountered
:param download_videos: True, if videos should be downloaded
:param geotags: Download geotags
:param shorter_output: Shorten log output by not printing captions
:param sleep: Sleep between requests to instagram server
:param quiet: Suppress output
"""
data = get_hashtag_json(hashtag, session, sleep=sleep)
count = 1
while data:
for node in data['entry_data']['TagPage'][0]['tag']['media']['nodes']:
if max_count is not None and count > max_count:
return
_log('[{0:3d}] #{1} '.format(count, hashtag), end='', flush=True, quiet=quiet)
count += 1
if filter_func is not None and filter_func(node):
_log('<skipped>', quiet=quiet)
continue
downloaded = download_node(node, session, '#{0}'.format(hashtag),
download_videos=download_videos, geotags=geotags, sleep=sleep,
shorter_output=shorter_output, quiet=quiet)
if fast_update and not downloaded:
return
if data['entry_data']['TagPage'][0]['tag']['media']['page_info']['has_next_page']:
data = get_hashtag_json(hashtag, session, sleep=sleep,
max_id=data['entry_data']['TagPage'][0]['tag']['media']['page_info']['end_cursor'])
else:
break
def check_id(profile: str, session: requests.Session, json_data: Dict[str, Any], quiet: bool = False) -> str: def check_id(profile: str, session: requests.Session, json_data: Dict[str, Any], quiet: bool = False) -> str:
""" """
Consult locally stored ID of profile with given name, check whether ID matches and whether name Consult locally stored ID of profile with given name, check whether ID matches and whether name
@ -575,9 +626,10 @@ def check_id(profile: str, session: requests.Session, json_data: Dict[str, Any],
raise ProfileNotExistsException("Profile {0} does not exist.".format(profile)) raise ProfileNotExistsException("Profile {0} does not exist.".format(profile))
def download(name: str, session: requests.Session, profile_pic_only: bool = False, download_videos: bool = True, def download(name: str, session: requests.Session,
geotags: bool = False, profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False,
fast_update: bool = False, shorter_output: bool = False, sleep: bool = True, quiet: bool = False) -> None: fast_update: bool = False, shorter_output: bool = False, sleep: bool = True,
quiet: bool = False) -> None:
"""Download one profile""" """Download one profile"""
# pylint:disable=too-many-branches,too-many-locals # pylint:disable=too-many-branches,too-many-locals
# Get profile main page json # Get profile main page json
@ -667,7 +719,12 @@ def download_profiles(profilelist: List[str], username: Optional[str] = None, pa
try: try:
# Generate set of targets # Generate set of targets
for pentry in profilelist: for pentry in profilelist:
if pentry[0] == '@' and username is not None: if pentry[0] == '#':
_log("Retrieving pictures with hashtag {0}".format(pentry), quiet=quiet)
download_hashtag(hashtag=pentry[1:], session=session, fast_update=fast_update,
download_videos=download_videos, geotags=geotags, shorter_output=shorter_output,
sleep=sleep, quiet=quiet)
elif pentry[0] == '@' and username is not None:
_log("Retrieving followees of %s..." % pentry[1:], quiet=quiet) _log("Retrieving followees of %s..." % pentry[1:], quiet=quiet)
followees = get_followees(pentry[1:], session) followees = get_followees(pentry[1:], session)
targets.update([followee['username'] for followee in followees]) targets.update([followee['username'] for followee in followees])
@ -684,9 +741,7 @@ def download_profiles(profilelist: List[str], username: Optional[str] = None, pa
shorter_output=shorter_output, sleep=sleep, quiet=quiet) shorter_output=shorter_output, sleep=sleep, quiet=quiet)
else: else:
targets.add(pentry) targets.add(pentry)
if len(targets) == 0: if len(targets) > 1:
_log("No profiles to download given.", quiet=quiet)
elif len(targets) > 1:
_log("Downloading %i profiles..." % len(targets), quiet=quiet) _log("Downloading %i profiles..." % len(targets), quiet=quiet)
# Iterate through targets list and download them # Iterate through targets list and download them
for target in targets: for target in targets:
@ -716,8 +771,9 @@ def download_profiles(profilelist: List[str], username: Optional[str] = None, pa
def main(): def main():
parser = ArgumentParser(description=__doc__, parser = ArgumentParser(description=__doc__,
epilog="Report issues at https://github.com/Thammus/instaloader/issues.") epilog="Report issues at https://github.com/Thammus/instaloader/issues.")
parser.add_argument('profile', nargs='*', parser.add_argument('profile', nargs='*', metavar='profile|#hashtag',
help='Name of profile to download; If --login is given: @<profile> to download all followees of ' help='Name of profile or #hashtag to download. '
'Alternatively, if --login is given: @<profile> to download all followees of '
'<profile>; or the special targets :feed-all or :feed-liked to ' '<profile>; or the special targets :feed-all or :feed-liked to '
'download pictures from your feed (using ' 'download pictures from your feed (using '
'--fast-update is recommended).') '--fast-update is recommended).')