Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor fix for header parsing and remote file transfer #28

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .codeclimate.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version: 2
version: "2"
plugins:
phpcodesniffer:
enabled: true
Expand All @@ -14,4 +14,3 @@ exclude_patterns:
- ".circleci/"
- ".gitignore"
- "docs/"

2 changes: 1 addition & 1 deletion phpunit.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
</coverage>
<testsuites>
<testsuite name="all">
<directory suffix="Test.php" phpVersion="7.2" phpVersionOperator="&gt;=">test</directory>
<directory suffix="Test.php">test</directory>
</testsuite>
</testsuites>
</phpunit>
96 changes: 62 additions & 34 deletions src/Processor/Remote.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,85 @@

namespace FileFetcher\Processor;

use GuzzleHttp\Client;
use Procrastinator\Result;

class Remote extends ProcessorBase implements ProcessorInterface
class Remote extends AbstractChunkedProcessor
{
protected const HTTP_URL_REGEX = '%^(?:https?://)?(?:\S+(?::\S*)?@|\d{1,3}(?:\.\d{1,3}){3}|(?:(?:[a-z\d\x{00a1}-' .
'\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)(?:\.(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)*(?:' .
'\.[a-z\x{00a1}-\x{ffff}]{2,6}))(?::\d+)?(?:[^\s]*)?$%iu';
// phpcs:disable
protected const HTTP_URL_REGEX = '%^(?:https?://)?(?:\S+(?::\S*)?@|\d{1,3}(?:\.\d{1,3}){3}|(?:(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)(?:\.(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)*(?:\.[a-z\x{00a1}-\x{ffff}]{2,6}))(?::\d+)?(?:[^\s]*)?$%iu';
// phpcs:enable

protected function getFileSize(string $filePath): int
{
$headers = $this->getHeaders($filePath);
// @todo Handle situation where the file size cannot be adequately
// determined.
return $headers['content-length'] ?? 0;
}

public function isServerCompatible(array $state): bool
{
return preg_match(self::HTTP_URL_REGEX, $state['source']) === 1;
}

public function setupState(array $state): array
protected function getChunk(string $filePath, int $start, int $end)
{
$state['destination'] = $this->getTemporaryFilePath($state);
$state['temporary'] = true;
$state['total_bytes'] = 0;

if (file_exists($state['destination'])) {
$state['total_bytes_copied'] = $this->getFilesize($state['destination']);
}

return $state;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $filePath);
curl_setopt($ch, CURLOPT_RANGE, "{$start}-{$end}");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$result = $this->php->curl_exec($ch);
curl_close($ch);
return $result;
}

public function copy(array $state, Result $result, int $timeLimit = PHP_INT_MAX): array
protected function getHeaders($url)
{
$client = $this->getClient();
try {
$fout = fopen($state['destination'], "w");
$client->get($state['source'], ['sink' => $fout]);
$result->setStatus(Result::DONE);
} catch (\Exception $e) {
$result->setStatus(Result::ERROR);
$result->setError($e->getMessage());
}
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 20);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_NOBODY, true);

$state['total_bytes_copied'] = $state['total_bytes'] = $this->getFilesize($state['destination']);
return ['state' => $state, 'result' => $result];
$headers = static::parseHeaders($this->php->curl_exec($ch));
curl_close($ch);
return $headers;
}

protected function getFileSize($path): int
/**
* Parse headers as array from header string.
*
* @param $string
* The headers.
*
* @return array
* The headers as an associative array. Header names are converted to
* lower case.
*/
public static function parseHeaders($string)
{
clearstatcache();
return filesize($path);
$headers = [];
$lines = explode(PHP_EOL, $string);
foreach ($lines as $line) {
$line = trim($line);
$keyvalue = self::getKeyValueFromLine($line);
$headers[strtolower($keyvalue['key'])] = $keyvalue['value'];
}
return $headers;
}

protected function getClient(): Client
private static function getKeyValueFromLine($line): array
{
return new Client();
$key = null;
$value = null;

$parts = explode(":", $line);
if (count($parts) > 1) {
$key = array_shift($parts);
$value = trim(implode(":", $parts));
} else {
$value = trim($line);
}

return ['key' => $key, 'value' => $value];
}
}
13 changes: 4 additions & 9 deletions test/Mock/FakeRemote.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,9 @@ protected function getClient(): Client

private function getMockHandler()
{
$mock = new MockHandler([
new Response(200, ['X-Foo' => 'Bar'], 'Hello, World'),
]);
return $mock;
}

protected function getFileSize($path): int
{
return 10;
$data = "";
$numberOfBytes = $end - $start;
$data = str_pad($data, $numberOfBytes, 'A');
return !empty(trim($data)) ? $data . PHP_EOL : false;
}
}
50 changes: 42 additions & 8 deletions test/Processor/RemoteTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
use FileFetcher\FileFetcher;
use FileFetcher\Processor\Remote;
use FileFetcherTests\Mock\FakeRemote;
use org\bovigo\vfs\vfsStream;
use MockChain\Chain;
use MockChain\Options;
use PHPUnit\Framework\TestCase;
use Procrastinator\Result;

Expand All @@ -18,19 +19,23 @@ class RemoteTest extends TestCase

public function testCopyAFileWithRemoteProcessor()
{
$config = [
"filePath" => 'http://notreal.blah/notacsv.csv',
"processors" => [FakeRemote::class]
];
$fetcher = FileFetcher::get("1", new Memory(), $config);

$this->assertNotSame(false, $fetcher = FileFetcher::get(
"1",
new Memory(),
[
"filePath" => 'http://notreal.blah/notacsv.csv',
"processors" => [FakeRemote::class]
]
));

$fetcher->setTimeLimit(1);

$result = $fetcher->run();
$state = $fetcher->getState();

$this->assertEquals(10, $state['total_bytes_copied']);
$this->assertEquals(Result::DONE, $result->getStatus());
$this->assertEquals(FakeRemote::class, $state['processor']);
$this->assertEquals('/tmp/notreal_blah_notacsv.csv', $state['destination']);
}

public function provideIsServerCompatible(): array
Expand Down Expand Up @@ -72,4 +77,33 @@ public function testCopyException()
$this->assertSame(Result::ERROR, $result->getStatus());
$this->assertStringContainsString('ailed to open stream', $result->getError());
}

public function provideFileSizeHeaders()
{
return [
[1, ['content-length' => 1]],
[0, ['content-length' => 0]],
'wrong_type' => [23, ['content-length' => '23']],
'wrong_type_null' => [0, ['content-length' => null]],
'no_header' => [0, []],
];
}

/**
* @covers \FileFetcher\Processor\Remote::getFileSize()
* @dataProvider provideFileSizeHeaders
*/
public function testGetFileSize($expected, $headers)
{
$remote = $this->getMockBuilder(Remote::class)
->onlyMethods(['getFileSize', 'getHeaders'])
->getMock();
$remote->method('getHeaders')
->willReturn($headers);
$ref_getFileSize = (new \ReflectionClass(Remote::class))
->getMethod('getFileSize');
$ref_getFileSize->setAccessible(true);

$this->assertSame($expected, $ref_getFileSize->invokeArgs($remote, ['filepath']));
}
}