Skip to content

Commit

Permalink
Consolidate block fetch requests.
Browse files Browse the repository at this point in the history
  • Loading branch information
groutr committed Oct 22, 2024
1 parent 97a2168 commit 27af35d
Showing 1 changed file with 14 additions and 12 deletions.
26 changes: 14 additions & 12 deletions fsspec/caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import os
import threading
import warnings
from itertools import groupby
from operator import itemgetter
from concurrent.futures import Future, ThreadPoolExecutor
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -161,21 +163,21 @@ def _fetch(self, start: int | None, end: int | None) -> bytes:
return b""
start_block = start // self.blocksize
end_block = end // self.blocksize
need = [i for i in range(start_block, end_block + 1) if i not in self.blocks]
hits = [i for i in range(start_block, end_block + 1) if i in self.blocks]
block_range = range(start_block, end_block + 1)
need = [i for i in block_range if i not in self.blocks]
self.miss_count += len(need)
self.hit_count += len(hits)
while need:
# TODO: not a for loop so we can consolidate blocks later to
# make fewer fetch calls; this could be parallel
i = need.pop(0)

sstart = i * self.blocksize
send = min(sstart + self.blocksize, self.size)
self.hit_count += len(self.blocks.intersection(block_range))

# Consolidate needed blocks.
# Algorithm adapted from Python 2.x itertools documentation
for _, _blocks in groupby(enumerate(need), key=lambda x: x[0] - x[1]):
_blocks = tuple(map(itemgetter(1), _blocks))
sstart = _blocks[0] * self.blocksize
send = min(_blocks[-1] * self.blocksize + self.blocksize, self.size)
self.total_requested_bytes += send - sstart
logger.debug(f"MMap get block #{i} ({sstart}-{send})")
logger.debug(f"MMap get blocks {_blocks[0]}-{_blocks[-1]} ({sstart}-{send}")
self.cache[sstart:send] = self.fetcher(sstart, send)
self.blocks.add(i)
self.blocks.update(_blocks)

return self.cache[start:end]

Expand Down

0 comments on commit 27af35d

Please sign in to comment.