diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 0000000..7fec515 --- /dev/null +++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000..3e12315 --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,181 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from instaloader.instaloader import Instaloader, Post\n", + "from instaloader.nodeiterator import NodeIterator\n", + "import pickle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "L = Instaloader(download_pictures=False, download_videos=False, download_video_thumbnails=False, download_geotags = False, download_comments = False, compress_json=True, save_metadata = True, post_metadata_txt_pattern=\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "LOC = '309580832976358'\n", + "post_iterator = NodeIterator(L.context, \n", + " \"ac38b90f0f3981c42092016a37c59bf7\", \n", + " lambda d: d['data']['location']['edge_location_to_media'], \n", + " lambda n: instaloader.Post(L.context, n),\n", + " {'id': LOC},\n", + " f\"https://www.instagram.com/explore/locations/{LOC}/\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "HASHTAG = 'bodypositive'\n", + "post_iterator = NodeIterator(\n", + " L.context, \"9b498c08113f1e09617a1703c22b2f32\",\n", + " lambda d: d['data']['hashtag']['edge_hashtag_to_media'],\n", + " lambda n: Post(L.context, n),\n", + " {'tag_name': HASHTAG},\n", + " f\"https://www.instagram.com/explore/tags/{HASHTAG}/\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " for post in post_iterator:\n", + " L.download_post(post, target = HASHTAG)\n", + "except Exception as err:\n", + " print(err)\n", + " iteratorfreeze = post_iterator.freeze()\n", + " with open(f'{HASHTAG}.pkl', 'wb') as resumefile:\n", + " pickle.dump(iteratorfreeze, resumefile)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(f'{HASHTAG}.pkl', 'rb') as resumefile:\n", + " iteratorfreeze = pickle.load(resumefile)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iteratorfreeze" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "a = [1,2,3]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'int' object is not iterable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_2027462/3346660367.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: 'int' object is not iterable" + ] + } + ], + "source": [ + "a.extend(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import torch" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'stylegan2/models/finger_masks_asd/model_4.pt'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_853173/2129044928.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mweights\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'stylegan2/models/finger_masks_asd/model_4.pt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(f, map_location, pickle_module, **pickle_load_args)\u001b[0m\n\u001b[1;32m 577\u001b[0m \u001b[0mpickle_load_args\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'encoding'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'utf-8'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 578\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 579\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mopened_file\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 580\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_zipfile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopened_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 581\u001b[0m \u001b[0;31m# The zipfile reader is going to advance the current file position.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m_open_file_like\u001b[0;34m(name_or_buffer, mode)\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_open_file_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m_is_path\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 230\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 231\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'w'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/torch/serialization.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, name, mode)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0m_open_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_opener\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 211\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_open_file\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 212\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 213\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__exit__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'stylegan2/models/finger_masks_asd/model_4.pt'" + ] + } + ], + "source": [ + "weights = torch.load('stylegan2/models/finger_masks_asd/model_4.pt')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/app.log b/app.log index 02c95aa..85d9b3b 100644 --- a/app.log +++ b/app.log @@ -57,3 +57,23 @@ IndexError: list index out of range 2021-08-27 14:23:17,347 - JSON Query to graphql/query: Redirected to login page. Use --login. [retrying; skip with ^C] 2021-08-27 14:23:45,522 - [skipped by user] 2021-08-27 14:23:45,523 - im_vladapetrova: JSON Query to graphql/query: Redirected to login page. Use --login. +2021-09-14 11:32:16,910 - JSON Query to graphql/query: Redirected to login page. Use --login. [retrying; skip with ^C] +2021-09-14 11:35:21,538 - JSON Query to graphql/query: Redirected to login page. Use --login. [retrying; skip with ^C] +2021-09-14 11:37:19,106 - [skipped by user] +2021-09-14 16:39:53,918 - JSON Query to graphql/query: Redirected to login page. Use --login. [retrying; skip with ^C] +2021-09-14 16:40:11,029 - [skipped by user] +2021-09-22 10:43:38,102 - JSON Query to explore/tags/bodypositive/: Redirected to login page. Use --login. [retrying; skip with ^C] +2021-09-22 10:43:43,013 - [skipped by user] +2021-09-23 12:19:07,896 - JSON Query to graphql/query: HTTP error code 500. [retrying; skip with ^C] +2021-09-23 12:19:08,828 - JSON Query to graphql/query: HTTP error code 500. [retrying; skip with ^C] +2021-09-28 15:12:45,171 - Error occurred during loading data. Trying to use cache server https://fake-useragent.herokuapp.com/browsers/0.1.11 +Traceback (most recent call last): + File "/home/grisha/anaconda3/lib/python3.8/site-packages/fake_useragent/utils.py", line 154, in load + for item in get_browsers(verify_ssl=verify_ssl): + File "/home/grisha/anaconda3/lib/python3.8/site-packages/fake_useragent/utils.py", line 99, in get_browsers + html = html.split('')[1] +IndexError: list index out of range +2021-09-28 15:13:51,075 - JSON Query to im_vladapetrova/feed/: Expecting value: line 1 column 1 (char 0) [retrying; skip with ^C] +2021-09-28 15:13:53,448 - JSON Query to im_vladapetrova/feed/: Expecting value: line 1 column 1 (char 0) [retrying; skip with ^C] +2021-09-28 15:15:13,242 - JSON Query to im_vladapetrova/feed/: Expecting value: line 1 column 1 (char 0) [retrying; skip with ^C] +2021-09-28 15:15:15,011 - JSON Query to im_vladapetrova/feed/: Expecting value: line 1 column 1 (char 0) [retrying; skip with ^C] diff --git a/instaloader.py b/instaloader.py index e40e428..3473110 100644 --- a/instaloader.py +++ b/instaloader.py @@ -11,8 +11,8 @@ from instaloader.instaloader import Instaloader from instaloader.instaloadercontext import default_user_agent import pandas as pd -from tqdm import tqdm -from multiprocessing import Process +from queue import Empty, Queue +from threading import Thread import os from instaloader.utils import get_profile_struct @@ -72,7 +72,7 @@ def delete_row(df, row_idx): for i in range(args.proxy_index + 1): proxy_objects.append(ProxyRotator(api_key = api_key, idx = i)) else: - proxy_objects.append(None) + proxy_objects = [None] loaders = [] for proxy in proxy_objects: @@ -122,10 +122,26 @@ def delete_row(df, row_idx): else: ids.append({'id': 'nan', 'username': item}) +print(len(ids)) ids_container = iter(ids) flag = True total_index = 0 +done = False + +def produce(queue): + global ids_container + while True: + try: + item = next(ids_container) + print('Producer thread target: {}'.format(item)) + queue.put(item) + except StopIteration: + break + except KeyboardInterrupt: + break + + def subprocess(loader, target): global total_index, df total_index += 1 @@ -133,8 +149,7 @@ def subprocess(loader, target): try: if args.task not in ['scrape_hashtag', 'scrape_location']: target = get_profile_struct(loader, target) - - func(loader, target) + func(loader, target, max_count = 3) total_index += 1 if not (df is None): df.loc[target['idx'], 'downloaded'] = True @@ -153,18 +168,80 @@ def subprocess(loader, target): raise err +def subprocess(loader, queue): + global total_index, df, done + total_index += 1 + print('Current total index is {}.'.format(total_index)) + while not done: + try: + target = queue.get(timeout = 1.) + print('Consumer thread target: {}'.format(target)) + if args.task not in ['scrape_hashtag', 'scrape_location']: + target = get_profile_struct(loader, target) + func(loader, target, max_count = 3) + total_index += 1 + if not (df is None): + df.loc[target['idx'], 'downloaded'] = True + if (total_index % 100 == 0): + df.to_csv(args.csv_path, sep = ';', index = None) + queue.task_done() + except (QueryReturnedNotFoundException, ProfileNotExistsException): + if not (df is None): + df = delete_row(df, target['idx']) + pass + except KeyboardInterrupt: + if not (df is None): + df.to_csv(args.csv_path, sep = ';', index = None) + raise + except Empty: + done = True + pass + except Exception as err: + print(err) + raise err + +# num_processes = 1 if (not args.use_proxy) else (args.proxy_index + 1) +num_processes = len(loaders) + +q = Queue(len(loaders)) +producer = Thread(target = produce, args = (q,)) +producer.start() +# if num_processes == 1: +# loader = loaders[0] +# del loaders +# for item in ids_container: +# subprocess(loader, item) +# else: + # processes = [None]*len(loaders) +processes = [] +flag = True +for i in range(len(loaders)): + consumer = Thread(target = subprocess, args = (loaders[i], q)) + consumer.start() + processes.append(consumer) +producer.join() +for consumer in processes: + consumer.join() + # processes[i].join() + # while flag: + # try: + # for i, process in enumerate(processes): + # time.sleep(0.1) + # if process.is_alive(): + # continue + # else: + # process.kill() + # processes[i] = Thread(subprocess, args = (item, next(ids_container))) + # processes[i].start() + # # processes[i].join() + + + + # except KeyboardInterrupt: + # print('KeyboardInterrupt! breaking the loop') + # flag = False -num_processes = 1 if (not args.use_proxy) else (args.proxy_index + 1) -if num_processes == 1: - loader = loaders[0] - del loaders - for item in ids_container: - subprocess(loader, item) -else: - processes = [] - for item in loaders: - processes.append(Process(subprocess, args = (item, next(ids_container)))) print('Ready!') diff --git a/instaloader/__pycache__/__init__.cpython-38.pyc b/instaloader/__pycache__/__init__.cpython-38.pyc index 76d7e2a..e0b671d 100644 Binary files a/instaloader/__pycache__/__init__.cpython-38.pyc and b/instaloader/__pycache__/__init__.cpython-38.pyc differ diff --git a/instaloader/__pycache__/exceptions.cpython-38.pyc b/instaloader/__pycache__/exceptions.cpython-38.pyc index 40131ee..ce50935 100644 Binary files a/instaloader/__pycache__/exceptions.cpython-38.pyc and b/instaloader/__pycache__/exceptions.cpython-38.pyc differ diff --git a/instaloader/__pycache__/instaloader.cpython-38.pyc b/instaloader/__pycache__/instaloader.cpython-38.pyc index b6997e0..b566a05 100644 Binary files a/instaloader/__pycache__/instaloader.cpython-38.pyc and b/instaloader/__pycache__/instaloader.cpython-38.pyc differ diff --git a/instaloader/__pycache__/instaloadercontext.cpython-38.pyc b/instaloader/__pycache__/instaloadercontext.cpython-38.pyc index 78aa90e..188e5d9 100644 Binary files a/instaloader/__pycache__/instaloadercontext.cpython-38.pyc and b/instaloader/__pycache__/instaloadercontext.cpython-38.pyc differ diff --git a/instaloader/__pycache__/nodeiterator.cpython-38.pyc b/instaloader/__pycache__/nodeiterator.cpython-38.pyc index 19359cd..5568cef 100644 Binary files a/instaloader/__pycache__/nodeiterator.cpython-38.pyc and b/instaloader/__pycache__/nodeiterator.cpython-38.pyc differ diff --git a/instaloader/__pycache__/proxyrotator.cpython-38.pyc b/instaloader/__pycache__/proxyrotator.cpython-38.pyc index d489e8f..2c2f191 100644 Binary files a/instaloader/__pycache__/proxyrotator.cpython-38.pyc and b/instaloader/__pycache__/proxyrotator.cpython-38.pyc differ diff --git a/instaloader/__pycache__/structures.cpython-38.pyc b/instaloader/__pycache__/structures.cpython-38.pyc index 8e2d371..209070d 100644 Binary files a/instaloader/__pycache__/structures.cpython-38.pyc and b/instaloader/__pycache__/structures.cpython-38.pyc differ diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 6b75347..ecc1062 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -944,13 +944,19 @@ def get_location_posts(self, location: str) -> Iterator[Post]: """ has_next_page = True end_cursor = None + # location_hash = "ac38b90f0f3981c42092016a37c59bf7" while has_next_page: if end_cursor: params = {'__a': 1, 'max_id': end_cursor} else: params = {'__a': 1} + # params['query_hash'] = location_hash + # params['id'] = str(location) location_data = self.context.get_json('explore/locations/{0}/'.format(location), params)['graphql']['location']['edge_location_to_media'] + # location_data = self.context.get_json('graphql/query', + # params)['graphql']['location']['edge_location_to_media'] + yield from (Post(self.context, edge['node']) for edge in location_data['edges']) has_next_page = location_data['page_info']['has_next_page'] end_cursor = location_data['page_info']['end_cursor'] diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index 38f115c..7e6cf6c 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -39,13 +39,9 @@ def copy_session(session: requests.Session, request_timeout: Optional[float] = N return new -# def default_user_agent() -> str: -# return 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \ -# '(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36' - def default_user_agent() -> str: - #print('FUNCTION EXECUTED:{}'.format(inspect.currentframe().f_code.co_name)) - return 'Mozilla/5.0 (Linux; Android 8.1.0; motorola one Build/OPKS28.63-18-3; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/70.0.3538.80 Mobile Safari/537.36 Instagram 72.0.0.21.98 Android (27/8.1.0; 320dpi; 720x1362; motorola; motorola one; deen_sprout; qcom; pt_BR; 132081645)' + return 'Mozilla/5.0 (X11; Linux i686; rv:92.0) Gecko/20100101 Firefox/92.0' + class InstaloaderContext: @@ -85,8 +81,7 @@ def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional self.proxy_dict = self.proxyrotator.get_proxy_url_format() self.total_requests = 0 self.request_batch = np.random.randint(10, 25)*6 - print('Current request batch limit: {}'.format(self.request_batch)) - # self.user_agent = user_agent if user_agent is not None else default_user_agent() + self.user_agent = user_agent if user_agent is not None else default_user_agent() self.request_timeout = request_timeout self._session = self.get_anonymous_session() self.username = None @@ -418,8 +413,6 @@ def get_json(self, path: str, params: Dict[str, Any], host: str = 'www.instagram self.do_sleep() print('Current request batch limit: {}'.format(self.request_batch)) resp = sess.get('https://{0}/{1}'.format(host, path), params=params, allow_redirects=False) - self.request_batch -= 1 - while resp.is_redirect: redirect_url = resp.headers['location'] diff --git a/instaloader/scrape_location.py b/instaloader/scrape_location.py index bda1f3b..efb3fb0 100644 --- a/instaloader/scrape_location.py +++ b/instaloader/scrape_location.py @@ -8,13 +8,17 @@ import time import pandas as pd from operator import itemgetter +from .utils import login def _main(instaloader: Instaloader, loc: Dict, + username: Optional[str] = None, + password: Optional[str] = None, max_count: Optional[int] = 1000000) -> None: flag = True loc = str(loc['id']) + login(instaloader, username, password) while flag: try: instaloader.context.log("Scraping location: {}".format(loc)) diff --git a/instaloader/structures.py b/instaloader/structures.py index 8c71db0..41b668f 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -1310,6 +1310,8 @@ def _query(self, params): #print('FUNCTION EXECUTED:{}'.format(inspect.currentframe().f_code.co_name)) return self._context.get_json("explore/tags/{0}/".format(self.name), params)["graphql"]["hashtag"] + # data = self._context.graphql_query("ded47faa9a1aaded10161a2ff32abb6b", params)['data']['reels_media'][0]['items'] + def _obtain_metadata(self): #print('FUNCTION EXECUTED:{}'.format(inspect.currentframe().f_code.co_name)) diff --git a/instaloader/utils.py b/instaloader/utils.py index a1b1fc3..48a2200 100644 --- a/instaloader/utils.py +++ b/instaloader/utils.py @@ -3,7 +3,7 @@ from operator import itemgetter from typing import Dict from .instaloader import Instaloader -from .exceptions import QueryReturnedNotFoundException, ProfileNotExistsException +from .exceptions import QueryReturnedNotFoundException, ProfileNotExistsException, TwoFactorAuthRequiredException, BadCredentialsException from .structures import Profile @@ -35,13 +35,13 @@ def login(loader: Instaloader, username: str, password: str): if not re.match(r"^[A-Za-z0-9._]+$", username): loader.context.error( "Warning: Parameter \"{}\" for --login is not a valid username.".format(username)) - try: - loader.load_session_from_file(username, sessionfile) - except FileNotFoundError as err: - if sessionfile is not None: - print(err, file=sys.stderr) - loader.context.log("Session file does not exist yet - Logging in.") - if not loader.context.is_logged_in or username != instaloader.test_login(): + # try: + # loader.load_session_from_file(username, sessionfile) + # except FileNotFoundError as err: + # if sessionfile is not None: + # print(err, file=sys.stderr) + # loader.context.log("Session file does not exist yet - Logging in.") + if not loader.context.is_logged_in or username != loader.test_login(): if password is not None: try: loader.login(username, password)