Skip to content

Commit

Permalink
Return self from __call__
Browse files Browse the repository at this point in the history
  • Loading branch information
sultaniman committed Mar 1, 2024
1 parent b5f0f06 commit 2ef1350
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions sources/scraping/runner.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import threading
import typing as t

import dlt

from dlt.common import logger
from pydispatch import dispatcher # type: ignore
from typing_extensions import Self

from scrapy import signals, Item, Spider # type: ignore
from scrapy.crawler import CrawlerProcess # type: ignore
Expand All @@ -20,9 +21,6 @@ def __init__(self, pipeline_name: str, queue: ScrapingQueue[T]) -> None:
self.queue = queue
self.pipeline_name = pipeline_name

def __call__(self, crawler: CrawlerProcess) -> None:
self.crawler = crawler

def on_item_scraped(self, item: Item) -> None:
if not self.queue.is_closed:
self.queue.put(item)
Expand All @@ -40,6 +38,10 @@ def on_engine_stopped(self) -> None:
self.queue.join()
self.queue.close()

def __call__(self, crawler: CrawlerProcess) -> Self:
self.crawler = crawler
return self

def __enter__(self) -> None:
# We want to receive on_item_scraped callback from
# outside so we don't have to know about any queue instance.
Expand Down

0 comments on commit 2ef1350

Please sign in to comment.