From b5d286e27b68298d03544eab199b9b1de961dee7 Mon Sep 17 00:00:00 2001 From: Rob Bast Date: Fri, 3 Jul 2015 10:15:17 +0200 Subject: [PATCH] apply a regex solution instead of tokenizer --- src/Composer/Util/SpdxLicense.php | 160 +++++++------------ tests/Composer/Test/Util/SpdxLicenseTest.php | 4 +- 2 files changed, 58 insertions(+), 106 deletions(-) diff --git a/src/Composer/Util/SpdxLicense.php b/src/Composer/Util/SpdxLicense.php index b9ffdc70d..d26c5e9b1 100644 --- a/src/Composer/Util/SpdxLicense.php +++ b/src/Composer/Util/SpdxLicense.php @@ -192,115 +192,69 @@ class SpdxLicense * @param string $license * * @return bool + * * @throws \RuntimeException */ private function isValidLicenseString($license) { - $tokens = array( - 'po' => '\(', - 'pc' => '\)', - 'op' => '(?:or|OR|and|AND)', - 'wi' => '(?:with|WITH)', - 'lix' => '(?:NONE|NOASSERTION)', - 'lir' => 'LicenseRef-\d+', - 'lic' => '[-_.a-zA-Z0-9]{3,}\+?', - 'ws' => '\s+', - '_' => '.', - ); - - $next = function () use ($license, $tokens) { - static $offset = 0; - - if ($offset >= strlen($license)) { - return null; - } - - foreach ($tokens as $name => $token) { - if (false === $r = preg_match('{' . $token . '}', $license, $matches, PREG_OFFSET_CAPTURE, $offset)) { - throw new \RuntimeException('Pattern for token %s failed (regex error).', $name); - } - if ($r === 0) { - continue; - } - if ($matches[0][1] !== $offset) { - continue; - } - $offset += strlen($matches[0][0]); - - return array($name, $matches[0][0]); - } + $licenses = array_map('preg_quote', array_keys($this->licenses)); + sort($licenses); + $licenses = array_reverse($licenses); + $licenses = implode('|', $licenses); + + $exceptions = array_map('preg_quote', array_keys($this->exceptions)); + sort($exceptions); + $exceptions = array_reverse($exceptions); + $exceptions = implode('|', $exceptions); + + $regex = "{ + (?(DEFINE) + # idstring: 1*( ALPHA / DIGIT / - / . ) + (?[\pL\pN\-\.]{1,}) + + # license-id: taken from list + (?${licenses}) + + # license-exception-id: taken from list + (?${exceptions}) + + # license-ref: [DocumentRef-1*(idstring):]LicenseRef-1*(idstring) + (?(?:DocumentRef-(?&idstring):)?LicenseRef-(?&idstring)) + + # simple-expresssion: license-id / license-id+ / license-ref + (?(?&licenseid)\+? | (?&licenseid) | (?&licenseref)) + + # compound expression: 1*( + # simple-expression / + # simple-expression WITH license-exception-id / + # compound-expression AND compound-expression / + # compound-expression OR compound-expression + # ) / ( compound-expression ) ) + (? + (?&simple_expression) ( \s+ (?:with|WITH) \s+ (?&licenseexceptionid))? + | \( \s* (?&compound_expression) \s*\) + ) + (? + (?&compound_head) (?: \s+ (?:and|AND|or|OR) \s+ (?&compound_expression))? + ) + + # license-expression: 1*1(simple-expression / compound-expression) + (?NONE | NOASSERTION | (?&compound_expression) | (?&simple_expression)) + ) # end of define + + ^(?&license_expression)$ + }x"; + + $match = preg_match($regex, $license); + + if (0 === $match) { + return false; + } - throw new \RuntimeException( - 'At least the last pattern needs to match, but it did not (dot-match-all is missing?).' - ); - }; - - $open = 0; - $with = false; - $require = true; - $lastop = null; - - while (list($token, $string) = $next()) { - switch ($token) { - case 'po': - if ($open || !$require || $with) { - return false; - } - $open = 1; - break; - case 'pc': - if ($open !== 1 || $require || !$lastop || $with) { - return false; - } - $open = 2; - break; - case 'op': - if ($require || !$open || $with) { - return false; - } - $lastop || $lastop = $string; - if ($lastop !== $string) { - return false; - } - $require = true; - break; - case 'wi': - $with = true; - break; - case 'lix': - if ($open || $with) { - return false; - } - goto lir; - case 'lic': - if ($with && $this->isValidExceptionIdentifier($string)) { - $require = true; - $with = false; - goto lir; - } - if ($with) { - return false; - } - if (!$this->isValidLicenseIdentifier(rtrim($string, '+'))) { - return false; - } - // Fall-through intended - case 'lir': - lir: - if (!$require) { - return false; - } - $require = false; - break; - case 'ws': - break; - case '_': - return false; - default: - throw new \RuntimeException(sprintf('Unparsed token: %s.', print_r($token, true))); - } + if (false === $match) { + throw new \RuntimeException('Regex failed to compile/run.'); } - return !($open % 2 || $require || $with); + return true; } } diff --git a/tests/Composer/Test/Util/SpdxLicenseTest.php b/tests/Composer/Test/Util/SpdxLicenseTest.php index ef6e7d45d..207e6ed80 100644 --- a/tests/Composer/Test/Util/SpdxLicenseTest.php +++ b/tests/Composer/Test/Util/SpdxLicenseTest.php @@ -39,6 +39,7 @@ class SpdxLicenseTest extends TestCase "GPL-2.0 with Autoconf-exception-2.0", "GPL-2.0 WITH Autoconf-exception-2.0", "GPL-2.0+ WITH Autoconf-exception-2.0", + "(GPL-3.0 and GPL-2.0 or GPL-3.0+)", ), $identifiers ); @@ -57,7 +58,6 @@ class SpdxLicenseTest extends TestCase array(array()), array("The system pwns you"), array("()"), - array("(MIT)"), array("(MIT"), array("MIT)"), array("MIT NONE"), @@ -66,8 +66,6 @@ class SpdxLicenseTest extends TestCase array("(MIT and MIT) MIT"), array(array("LGPL-2.0", "The system pwns you")), array("and GPL-3.0+"), - array("EUDatagrid and GPL-3.0+"), - array("(GPL-3.0 and GPL-2.0 or GPL-3.0+)"), array("(EUDatagrid and GPL-3.0+ and )"), array("(EUDatagrid xor GPL-3.0+)"), array("(MIT Or MIT)"),