From 2ef13505633ada7a7048202e2189ba597652478c Mon Sep 17 00:00:00 2001 From: Sultan Iman Date: Fri, 1 Mar 2024 11:31:23 +0100 Subject: [PATCH] Return self from __call__ --- sources/scraping/runner.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sources/scraping/runner.py b/sources/scraping/runner.py index 10e7c24eb..25a5292f8 100644 --- a/sources/scraping/runner.py +++ b/sources/scraping/runner.py @@ -1,9 +1,10 @@ import threading import typing as t - import dlt + from dlt.common import logger from pydispatch import dispatcher # type: ignore +from typing_extensions import Self from scrapy import signals, Item, Spider # type: ignore from scrapy.crawler import CrawlerProcess # type: ignore @@ -20,9 +21,6 @@ def __init__(self, pipeline_name: str, queue: ScrapingQueue[T]) -> None: self.queue = queue self.pipeline_name = pipeline_name - def __call__(self, crawler: CrawlerProcess) -> None: - self.crawler = crawler - def on_item_scraped(self, item: Item) -> None: if not self.queue.is_closed: self.queue.put(item) @@ -40,6 +38,10 @@ def on_engine_stopped(self) -> None: self.queue.join() self.queue.close() + def __call__(self, crawler: CrawlerProcess) -> Self: + self.crawler = crawler + return self + def __enter__(self) -> None: # We want to receive on_item_scraped callback from # outside so we don't have to know about any queue instance.