From a13b06725e55a0eddd806da299446d27a2d745b5 Mon Sep 17 00:00:00 2001 From: Jordi Boggiano Date: Mon, 18 Jan 2016 12:25:37 +0000 Subject: [PATCH] Add support for if-modified-since on lazy repos and turning packagist into a lazy provider repo --- .../Downloader/TransportException.php | 11 ++ .../Repository/ComposerRepository.php | 112 ++++++++++++++++-- src/Composer/Util/RemoteFilesystem.php | 62 ++++++++-- 3 files changed, 164 insertions(+), 21 deletions(-) diff --git a/src/Composer/Downloader/TransportException.php b/src/Composer/Downloader/TransportException.php index 2e4b42f01..c682df080 100644 --- a/src/Composer/Downloader/TransportException.php +++ b/src/Composer/Downloader/TransportException.php @@ -19,6 +19,7 @@ class TransportException extends \RuntimeException { protected $headers; protected $response; + protected $statusCode; public function setHeaders($headers) { @@ -39,4 +40,14 @@ class TransportException extends \RuntimeException { return $this->response; } + + public function setStatusCode($statusCode) + { + $this->statusCode = $statusCode; + } + + public function getStatusCode() + { + return $this->statusCode; + } } diff --git a/src/Composer/Repository/ComposerRepository.php b/src/Composer/Repository/ComposerRepository.php index 1994876c5..6806d1c8a 100644 --- a/src/Composer/Repository/ComposerRepository.php +++ b/src/Composer/Repository/ComposerRepository.php @@ -26,6 +26,7 @@ use Composer\Util\RemoteFilesystem; use Composer\Plugin\PluginEvents; use Composer\Plugin\PreFileDownloadEvent; use Composer\EventDispatcher\EventDispatcher; +use Composer\Downloader\TransportException; use Composer\Semver\Constraint\ConstraintInterface; use Composer\Semver\Constraint\Constraint; @@ -285,10 +286,12 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito $this->loadProviderListings($this->loadRootServerFile()); } + $useLastModifiedCheck = false; if ($this->lazyProvidersUrl && !isset($this->providerListing[$name])) { $hash = null; $url = str_replace('%package%', $name, $this->lazyProvidersUrl); - $cacheKey = false; + $cacheKey = 'provider-'.strtr($name, '/', '$').'.json'; + $useLastModifiedCheck = true; } elseif ($this->providersUrl) { // package does not exist in this repo if (!isset($this->providerListing[$name])) { @@ -310,11 +313,36 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito $cacheKey = null; } - if ($cacheKey && $this->cache->sha256($cacheKey) === $hash) { - $packages = json_decode($this->cache->read($cacheKey), true); - } else { - // TODO check if we can do if-modified-since or etag header here and skip the listings - $packages = $this->fetchFile($url, $cacheKey, $hash); + $packages = null; + if ($cacheKey) { + if (!$useLastModifiedCheck && $hash && $this->cache->sha256($cacheKey) === $hash) { + $packages = json_decode($this->cache->read($cacheKey), true); + } elseif ($useLastModifiedCheck) { + if ($contents = $this->cache->read($cacheKey)) { + $contents = json_decode($contents, true); + if (isset($contents['last-modified'])) { + $response = $this->fetchFileIfLastModified($url, $cacheKey, $contents['last-modified']); + if (true === $response) { + $packages = $contents; + } elseif ($response) { + $packages = $response; + } + } + } + } + } + + if (!$packages) { + try { + $packages = $this->fetchFile($url, $cacheKey, $hash, $useLastModifiedCheck); + } catch (TransportException $e) { + // 404s are acceptable for lazy provider repos + if ($e->getStatusCode() === 404 && $this->lazyProvidersUrl) { + $packages = array('packages' => array()); + } else { + throw $e; + } + } } $this->providers[$name] = array(); @@ -477,6 +505,14 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito $this->hasProviders = true; } + // force values for packagist + if (preg_match('{^https?://packagist.org/?$}i', $this->url) && !empty($this->repoConfig['force-lazy-providers'])) { + $this->url = 'https://packagist.org'; + $this->baseUrl = 'https://packagist.org'; + $this->lazyProvidersUrl = $this->canonicalizeUrl('https://packagist.org/p/%package%.json'); + $this->providersUrl = null; + } + return $this->rootData = $data; } @@ -590,7 +626,7 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito } } - protected function fetchFile($filename, $cacheKey = null, $sha256 = null) + protected function fetchFile($filename, $cacheKey = null, $sha256 = null, $storeLastModifiedTime = false) { if (null === $cacheKey) { $cacheKey = $filename; @@ -611,7 +647,8 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito } $hostname = parse_url($filename, PHP_URL_HOST) ?: $filename; - $json = $preFileDownloadEvent->getRemoteFilesystem()->getContents($hostname, $filename, false); + $rfs = $preFileDownloadEvent->getRemoteFilesystem(); + $json = $rfs->getContents($hostname, $filename, false); if ($sha256 && $sha256 !== hash('sha256', $json)) { if ($retries) { usleep(100000); @@ -622,13 +659,25 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito // TODO use scarier wording once we know for sure it doesn't do false positives anymore throw new RepositorySecurityException('The contents of '.$filename.' do not match its signature. This should indicate a man-in-the-middle attack. Try running composer again and report this if you think it is a mistake.'); } + $data = JsonFile::parseJson($json, $filename); if ($cacheKey) { + if ($storeLastModifiedTime) { + $lastModifiedDate = $rfs->findHeaderValue($rfs->getLastHeaders(), 'last-modified'); + if ($lastModifiedDate) { + $data['last-modified'] = $lastModifiedDate; + $json = json_encode($data); + } + } $this->cache->write($cacheKey, $json); } break; } catch (\Exception $e) { + if ($e instanceof TransportException && $e->getStatusCode() === 404) { + throw $e; + } + if ($retries) { usleep(100000); continue; @@ -655,4 +704,51 @@ class ComposerRepository extends ArrayRepository implements ConfigurableReposito return $data; } + + protected function fetchFileIfLastModified($filename, $cacheKey, $lastModifiedTime) + { + $retries = 3; + while ($retries--) { + try { + $preFileDownloadEvent = new PreFileDownloadEvent(PluginEvents::PRE_FILE_DOWNLOAD, $this->rfs, $filename); + if ($this->eventDispatcher) { + $this->eventDispatcher->dispatch($preFileDownloadEvent->getName(), $preFileDownloadEvent); + } + + $hostname = parse_url($filename, PHP_URL_HOST) ?: $filename; + $rfs = $preFileDownloadEvent->getRemoteFilesystem(); + $options = array('http' => array('header' => array('If-Modified-Since: '.$lastModifiedTime))); + $json = $rfs->getContents($hostname, $filename, false, $options); + if ($json === '' && $rfs->findStatusCode($rfs->getLastHeaders()) === 304) { + return true; + } + + $data = JsonFile::parseJson($json, $filename); + $lastModifiedDate = $rfs->findHeaderValue($rfs->getLastHeaders(), 'last-modified'); + if ($lastModifiedDate) { + $data['last-modified'] = $lastModifiedDate; + $json = json_encode($data); + } + $this->cache->write($cacheKey, $json); + + return $data; + } catch (\Exception $e) { + if ($e instanceof TransportException && $e->getStatusCode() === 404) { + throw $e; + } + + if ($retries) { + usleep(100000); + continue; + } + + if (!$this->degradedMode) { + $this->io->writeError(''.$e->getMessage().''); + $this->io->writeError(''.$this->url.' could not be fully loaded, package information was loaded from the local cache and may be out of date'); + } + $this->degradedMode = true; + return true; + } + } + } } diff --git a/src/Composer/Util/RemoteFilesystem.php b/src/Composer/Util/RemoteFilesystem.php index 31124772e..e4aa38fee 100644 --- a/src/Composer/Util/RemoteFilesystem.php +++ b/src/Composer/Util/RemoteFilesystem.php @@ -138,6 +138,45 @@ class RemoteFilesystem return $this->lastHeaders; } + /** + * @param array $headers array of returned headers like from getLastHeaders() + * @param string $name header name (case insensitive) + * @return string|null + */ + public function findHeaderValue(array $headers, $name) + { + $value = null; + foreach ($headers as $header) { + if (preg_match('{^'.$name.':\s*(.+?)\s*$}i', $header, $match)) { + $value = $match[1]; + } elseif (preg_match('{^HTTP/}i', $header)) { + // In case of redirects, http_response_headers contains the headers of all responses + // so we reset the flag when a new response is being parsed as we are only interested in the last response + $value = null; + } + } + + return $value; + } + + /** + * @param array $headers array of returned headers like from getLastHeaders() + * @return int|null + */ + public function findStatusCode(array $headers) + { + $value = null; + foreach ($headers as $header) { + if (preg_match('{^HTTP/\S+ (\d+)}i', $header, $match)) { + // In case of redirects, http_response_headers contains the headers of all responses + // so we can not return directly and need to keep iterating + $value = (int) $match[1]; + } + } + + return $value; + } + /** * Get file content or copy action. * @@ -224,6 +263,7 @@ class RemoteFilesystem } catch (\Exception $e) { if ($e instanceof TransportException && !empty($http_response_header[0])) { $e->setHeaders($http_response_header); + $e->setStatusCode($this->findStatusCode($http_response_header)); } if ($e instanceof TransportException && $result !== false) { $e->setResponse($result); @@ -248,13 +288,18 @@ class RemoteFilesystem throw $e; } + $statusCode = null; + if (!empty($http_response_header[0])) { + $statusCode = $this->findStatusCode($http_response_header); + } + // fail 4xx and 5xx responses and capture the response - if (!empty($http_response_header[0]) && preg_match('{^HTTP/\S+ ([45]\d\d)}i', $http_response_header[0], $match)) { - $errorCode = $match[1]; + if ($statusCode && $statusCode >= 400 && $statusCode <= 599) { if (!$this->retry) { - $e = new TransportException('The "'.$this->fileUrl.'" file could not be downloaded ('.$http_response_header[0].')', $errorCode); + $e = new TransportException('The "'.$this->fileUrl.'" file could not be downloaded ('.$http_response_header[0].')', $statusCode); $e->setHeaders($http_response_header); $e->setResponse($result); + $e->setStatusCode($statusCode); throw $e; } $result = false; @@ -266,16 +311,7 @@ class RemoteFilesystem // decode gzip if ($result && extension_loaded('zlib') && substr($fileUrl, 0, 4) === 'http') { - $decode = false; - foreach ($http_response_header as $header) { - if (preg_match('{^content-encoding: *gzip *$}i', $header)) { - $decode = true; - } elseif (preg_match('{^HTTP/}i', $header)) { - // In case of redirects, http_response_headers contains the headers of all responses - // so we reset the flag when a new response is being parsed as we are only interested in the last response - $decode = false; - } - } + $decode = 'gzip' === strtolower($this->findHeaderValue($http_response_header, 'content-encoding')); if ($decode) { try {