From 69cb351434f2b72628803c4dee32971839a3ecee Mon Sep 17 00:00:00 2001 From: Thomas Honeyman Date: Mon, 16 Feb 2026 16:12:26 -0500 Subject: [PATCH 1/5] fix #753 with strict manifest SPDX parsing and lenient detected canonicalization --- .../licenses/ambiguous-gfdl/package.json | 5 + .../licenses/deprecated-agpl/package.json | 5 + app/src/App/API.purs | 97 ++++--- app/src/App/Legacy/Manifest.purs | 16 +- app/test/App/API.purs | 29 +- lib/src/License.js | 57 ++-- lib/src/License.purs | 267 +++++++++++++++++- lib/test/Registry.purs | 2 + lib/test/Registry/License.purs | 156 +++++----- 9 files changed, 467 insertions(+), 167 deletions(-) create mode 100644 app/fixtures/licenses/ambiguous-gfdl/package.json create mode 100644 app/fixtures/licenses/deprecated-agpl/package.json diff --git a/app/fixtures/licenses/ambiguous-gfdl/package.json b/app/fixtures/licenses/ambiguous-gfdl/package.json new file mode 100644 index 000000000..75d8c9065 --- /dev/null +++ b/app/fixtures/licenses/ambiguous-gfdl/package.json @@ -0,0 +1,5 @@ +{ + "name": "ambiguous-gfdl-fixture", + "version": "1.0.0", + "license": "GFDL-1.3" +} diff --git a/app/fixtures/licenses/deprecated-agpl/package.json b/app/fixtures/licenses/deprecated-agpl/package.json new file mode 100644 index 000000000..f1b3071bc --- /dev/null +++ b/app/fixtures/licenses/deprecated-agpl/package.json @@ -0,0 +1,5 @@ +{ + "name": "deprecated-agpl-fixture", + "version": "1.0.0", + "license": "AGPL-3.0" +} diff --git a/app/src/App/API.purs b/app/src/App/API.purs index b829ffd10..5724ae1c0 100644 --- a/app/src/App/API.purs +++ b/app/src/App/API.purs @@ -1308,24 +1308,28 @@ instance FsEncodable PursGraphCache where Exists.mkExists $ Cache.AsJson cacheKey codec next -- | Errors that can occur when validating license consistency -data LicenseValidationError = LicenseMismatch - { manifestLicense :: License - , detectedLicenses :: Array License - } +data LicenseValidationError + = LicenseMismatch { manifest :: License, detected :: Array License } + | LicenseParseError (Array { detected :: String, error :: String }) derive instance Eq LicenseValidationError printLicenseValidationError :: LicenseValidationError -> String printLicenseValidationError = case _ of - LicenseMismatch { manifestLicense, detectedLicenses } -> Array.fold + LicenseMismatch licenses -> Array.fold [ "License mismatch: The manifest specifies license '" - , License.print manifestLicense + , License.print licenses.manifest , "' but the following license(s) were detected in your repository: " - , String.joinWith ", " (map License.print detectedLicenses) + , String.joinWith ", " (map License.print licenses.detected) , ". Please ensure your manifest license accurately represents all licenses " , "in your repository. If multiple licenses apply, join them using SPDX " , "conjunctions (e.g., 'MIT AND Apache-2.0' or 'MIT OR Apache-2.0')." ] + LicenseParseError failures -> Array.fold + [ "License validation failed: one or more detected SPDX license expressions " + , "could not be canonicalized.\n" + , String.joinWith "\n" (failures <#> \{ detected, error } -> " - " <> detected <> " (" <> error <> ")") + ] -- | Validate that the license in the manifest is consistent with licenses -- | detected in the repository (LICENSE file, package.json, bower.json). @@ -1344,44 +1348,55 @@ validateLicense packageDir manifestLicense = do pure Nothing Right detectedStrings -> do let + parseDetectedLicense :: String -> Either String License + parseDetectedLicense detectedLicense = do + canonicalized <- License.canonicalizeDetected detectedLicense + License.parse canonicalized + + parsedDetectedLicenses :: Array (Tuple String (Either String License)) + parsedDetectedLicenses = + detectedStrings <#> \detectedLicense -> + Tuple detectedLicense (parseDetectedLicense detectedLicense) + + parseFailures :: Array { detected :: String, error :: String } + parseFailures = + parsedDetectedLicenses # Array.mapMaybe \(Tuple detectedLicense parsed) -> case parsed of + Left error -> Just { detected: detectedLicense, error } + Right _ -> Nothing + parsedLicenses :: Array License - parsedLicenses = Array.mapMaybe (hush <<< License.parse) detectedStrings + parsedLicenses = parsedDetectedLicenses # Array.mapMaybe \(Tuple _ parsed) -> hush parsed Log.debug $ "Detected licenses: " <> String.joinWith ", " detectedStrings - if Array.null parsedLicenses then do + if not (Array.null parseFailures) then do + Log.warn "Some detected licenses could not be canonicalized." + pure $ Just $ LicenseParseError parseFailures + else if Array.null parsedLicenses then do Log.debug "No licenses detected from repository files, nothing to validate." pure Nothing - else case License.extractIds manifestLicense of - Left err -> do - -- This shouldn't be possible (we have already validated the license) - -- as part of constructing the manifest - Log.warn $ "Could not extract license IDs from manifest: " <> err + else do + let + manifestIds = License.extractIds manifestLicense + manifestIdSet = Set.fromFoldable manifestIds + + -- A detected license is covered if all its IDs are in the manifest IDs + isCovered :: License -> Boolean + isCovered license = + License.extractIds license # Array.all \id -> + Set.member id manifestIdSet + + uncoveredLicenses :: Array License + uncoveredLicenses = Array.filter (not <<< isCovered) parsedLicenses + + if Array.null uncoveredLicenses then do + Log.debug "All detected licenses are covered by the manifest license." pure Nothing - Right manifestIds -> do - let - manifestIdSet = Set.fromFoldable manifestIds - - -- A detected license is covered if all its IDs are in the manifest IDs - isCovered :: License -> Boolean - isCovered license = case License.extractIds license of - Left _ -> false - Right ids -> Array.all (\id -> Set.member id manifestIdSet) ids - - uncoveredLicenses :: Array License - uncoveredLicenses = Array.filter (not <<< isCovered) parsedLicenses - - if Array.null uncoveredLicenses then do - Log.debug "All detected licenses are covered by the manifest license." - pure Nothing - else do - Log.warn $ Array.fold - [ "License mismatch detected: manifest has '" - , License.print manifestLicense - , "' but detected " - , String.joinWith ", " (map License.print parsedLicenses) - ] - pure $ Just $ LicenseMismatch - { manifestLicense - , detectedLicenses: uncoveredLicenses - } + else do + Log.warn $ Array.fold + [ "License mismatch detected: manifest has '" + , License.print manifestLicense + , "' but detected " + , String.joinWith ", " (map License.print parsedLicenses) + ] + pure $ Just $ LicenseMismatch { manifest: manifestLicense, detected: uncoveredLicenses } diff --git a/app/src/App/Legacy/Manifest.purs b/app/src/App/Legacy/Manifest.purs index 1ee576d65..ae841d120 100644 --- a/app/src/App/Legacy/Manifest.purs +++ b/app/src/App/Legacy/Manifest.purs @@ -15,6 +15,7 @@ import Registry.App.Prelude import Codec.JSON.DecodeError as CJ.DecodeError import Data.Array as Array +import Data.Array.NonEmpty as NonEmptyArray import Data.Codec as Codec import Data.Codec.JSON as CJ import Data.Codec.JSON.Common as CJ.Common @@ -74,9 +75,18 @@ bowerfileToPursJson :: Bowerfile -> Either String { license :: License, description :: Maybe String, dependencies :: Map PackageName Range } bowerfileToPursJson (Bowerfile { description, dependencies, license }) = do - parsedLicense <- case Array.mapMaybe (hush <<< License.parse) license of - [] -> Left "No valid SPDX license found in bower.json" - multiple -> Right $ License.joinWith License.And multiple + let + parsedLicenses = license <#> \rawLicense -> + lmap (\err -> " - " <> rawLicense <> ": " <> err) $ License.parse rawLicense + { fail: parseErrors, success: validLicenses } = partitionEithers parsedLicenses + + parsedLicense <- + if not (Array.null parseErrors) then do + Left $ "Invalid SPDX license(s) in bower.json:\n" <> String.joinWith "\n" parseErrors + else do + case NonEmptyArray.fromArray validLicenses of + Nothing -> Left "No valid SPDX license found in bower.json" + Just multiple -> Right $ License.joinWith License.And multiple parsedDeps <- parseDependencies dependencies diff --git a/app/test/App/API.purs b/app/test/App/API.purs index e76a0279d..8efc1ca1c 100644 --- a/app/test/App/API.purs +++ b/app/test/App/API.purs @@ -361,7 +361,10 @@ copySourceFiles = Spec.hoistSpec identity (\_ -> Assert.Run.runBaseEffects) $ Sp licenseValidation :: Spec.Spec Unit licenseValidation = do - let fixtures = Path.concat [ "app", "fixtures", "licenses", "halogen-hooks" ] + let + fixtures = Path.concat [ "app", "fixtures", "licenses", "halogen-hooks" ] + deprecatedFixture = Path.concat [ "app", "fixtures", "licenses", "deprecated-agpl" ] + ambiguousFixture = Path.concat [ "app", "fixtures", "licenses", "ambiguous-gfdl" ] Spec.describe "validateLicense" do Spec.it "Passes when manifest license covers all detected licenses" do @@ -375,9 +378,9 @@ licenseValidation = do let manifestLicense = unsafeLicense "MIT" result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense case result of - Just (LicenseMismatch { detectedLicenses }) -> + Just (LicenseMismatch { detected }) -> -- Should detect that Apache-2.0 is not covered - Assert.shouldContain (map License.print detectedLicenses) "Apache-2.0" + Assert.shouldContain (map License.print detected) "Apache-2.0" _ -> Assert.fail "Expected LicenseMismatch error" @@ -386,11 +389,11 @@ licenseValidation = do let manifestLicense = unsafeLicense "BSD-3-Clause" result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense case result of - Just (LicenseMismatch { manifestLicense: ml, detectedLicenses }) -> do + Just (LicenseMismatch { manifest: ml, detected }) -> do Assert.shouldEqual "BSD-3-Clause" (License.print ml) -- Both MIT and Apache-2.0 should be in the detected licenses - Assert.shouldContain (map License.print detectedLicenses) "MIT" - Assert.shouldContain (map License.print detectedLicenses) "Apache-2.0" + Assert.shouldContain (map License.print detected) "MIT" + Assert.shouldContain (map License.print detected) "Apache-2.0" _ -> Assert.fail "Expected LicenseMismatch error" @@ -400,5 +403,19 @@ licenseValidation = do result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense Assert.shouldEqual Nothing result + Spec.it "Canonicalizes deterministic deprecated detected licenses" do + let manifestLicense = unsafeLicense "AGPL-3.0-only" + result <- Assert.Run.runBaseEffects $ validateLicense deprecatedFixture manifestLicense + Assert.shouldEqual Nothing result + + Spec.it "Fails when detected licenses have ambiguous deprecated identifiers" do + let manifestLicense = unsafeLicense "GFDL-1.3-only" + result <- Assert.Run.runBaseEffects $ validateLicense ambiguousFixture manifestLicense + case result of + Just (LicenseParseError failures) -> + Assert.shouldContain (map _.detected failures) "GFDL-1.3" + _ -> + Assert.fail "Expected UncanonicalizableDetectedLicenses error" + unsafeLicense :: String -> License unsafeLicense str = unsafeFromRight $ License.parse str diff --git a/lib/src/License.js b/lib/src/License.js index 1faaccfe8..2d10e0768 100644 --- a/lib/src/License.js +++ b/lib/src/License.js @@ -1,41 +1,40 @@ import parse from "spdx-expression-parse"; +import currentIds from "spdx-license-ids/index.json" with { type: "json" }; +import deprecatedIds from "spdx-license-ids/deprecated.json" with { type: "json" }; -export const parseSPDXLicenseIdImpl = (onError, onSuccess, identifier) => { - try { - parse(identifier); - return onSuccess(identifier); - } catch (_) { - return onError(`Invalid SPDX identifier ${identifier}`); +export { currentIds, deprecatedIds }; + +const toTree = (onError, node, onLeaf, onAnd, onOr) => { + if (node.license != null) { + const plus = node.plus === true; + const exception = node.exception ?? ""; + return onLeaf(node.license)(plus)(exception); } -}; -// Extract all license IDs from a parsed SPDX expression AST. -// The AST structure from spdx-expression-parse is: -// - Simple: { license: 'MIT' } -// - With exception: { license: 'GPL-2.0', exception: 'Classpath-exception-2.0' } -// - Compound: { left: {...}, conjunction: 'and'|'or', right: {...} } -const extractLicenseIds = (ast) => { - const ids = new Set(); - - const walk = (node) => { - if (!node) return; - if (node.license) { - // Normalize to uppercase for case-insensitive comparison - ids.add(node.license.toUpperCase()); + if (node.left != null && node.right != null) { + const left = toTree(onError, node.left, onLeaf, onAnd, onOr); + const right = toTree(onError, node.right, onLeaf, onAnd, onOr); + + if (node.conjunction === "and") { + return onAnd(left)(right); } - if (node.left) walk(node.left); - if (node.right) walk(node.right); - }; - walk(ast); - return Array.from(ids); + if (node.conjunction === "or") { + return onOr(left)(right); + } + + return onError(`Unsupported SPDX conjunction '${String(node.conjunction)}'`); + } + + return onError("Unsupported SPDX AST node"); }; -export const extractLicenseIdsImpl = (onError, onSuccess, expression) => { +export const parseLicenseTreeImpl = (onError, onLeaf, onAnd, onOr, expression) => { try { const ast = parse(expression); - return onSuccess(extractLicenseIds(ast)); - } catch (_) { - return onError(`Invalid SPDX expression: ${expression}`); + return toTree(onError, ast, onLeaf, onAnd, onOr); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return onError(`Invalid SPDX expression '${expression}': ${message}`); } }; diff --git a/lib/src/License.purs b/lib/src/License.purs index 8e8d16783..8149ea7bd 100644 --- a/lib/src/License.purs +++ b/lib/src/License.purs @@ -8,6 +8,7 @@ module Registry.License ( License , SPDXConjunction(..) + , canonicalizeDetected , codec , extractIds , joinWith @@ -19,17 +20,24 @@ import Prelude import Codec.JSON.DecodeError as CJ.DecodeError import Control.Monad.Except (Except, except) +import Data.Array as Array +import Data.Array.NonEmpty (NonEmptyArray) +import Data.Array.NonEmpty as NonEmptyArray import Data.Bifunctor (lmap) import Data.Codec as Codec import Data.Codec.JSON as CJ import Data.Either (Either(..)) -import Data.Function.Uncurried (Fn3, runFn3) +import Data.Function.Uncurried (Fn5, runFn5) +import Data.Map as Map +import Data.Maybe (Maybe(..)) +import Data.Set as Set import Data.String as String +import Data.String.Pattern (Pattern(..)) +import Data.Tuple (Tuple(..)) import JSON (JSON) -import Safe.Coerce (coerce) -- | An SPDX license identifier such as 'MIT' or 'Apache-2.0'. -newtype License = License String +newtype License = License LicenseTree derive newtype instance Eq License @@ -43,23 +51,250 @@ codec = CJ.named "License" $ Codec.codec' decode encode encode :: License -> JSON encode = print >>> CJ.encode CJ.string --- | Print an SPDX license identifier as a string. -print :: License -> String -print (License license) = license +data LicenseTree + = Leaf CanonicalLicenseLeaf + | Branch SPDXConjunction LicenseTree LicenseTree + +derive instance Eq LicenseTree + +type CanonicalLicenseLeaf = + { identifier :: String + , exception :: Maybe String + } + +type ParsedLicenseLeaf = + { identifier :: String + , plus :: Boolean + , exception :: Maybe String + } -foreign import parseSPDXLicenseIdImpl :: forall r. Fn3 (String -> r) (String -> r) String r +data ParsedLicenseTree + = ParsedLeaf ParsedLicenseLeaf + | ParsedBranch SPDXConjunction ParsedLicenseTree ParsedLicenseTree + +derive instance Eq ParsedLicenseTree + +foreign import parseLicenseTreeImpl + :: forall r + . Fn5 + (String -> r) + (String -> Boolean -> String -> r) + (r -> r -> r) + (r -> r -> r) + String + r + +foreign import currentIds :: Array String +foreign import deprecatedIds :: Array String -- | Parse a string as a SPDX license identifier. parse :: String -> Either String License -parse = runFn3 parseSPDXLicenseIdImpl Left (Right <<< License) +parse input = do + parsedTree <- parseExpressionTree input + canonicalTree <- canonicalizeParsedTree canonicalizeStrictLeaf parsedTree + pure $ License canonicalTree + +-- | Canonicalize SPDX IDs detected from external tooling. This is lenient for +-- | deprecated SPDX IDs where the canonical replacement is unambiguous. +canonicalizeDetected :: String -> Either String String +canonicalizeDetected input = do + parsedTree <- parseExpressionTree input + canonicalTree <- canonicalizeParsedTree canonicalizeDetectedLeaf parsedTree + pure $ renderCanonicalTree canonicalTree + +parseExpressionTree :: String -> Either String ParsedLicenseTree +parseExpressionTree = + runFn5 parseLicenseTreeImpl Left onLeaf onAnd onOr + where + onLeaf :: String -> Boolean -> String -> Either String ParsedLicenseTree + onLeaf identifier plus exception = Right $ ParsedLeaf + { identifier + , plus + , exception: if String.null exception then Nothing else Just exception + } + + onAnd :: Either String ParsedLicenseTree -> Either String ParsedLicenseTree -> Either String ParsedLicenseTree + onAnd left right = ParsedBranch And <$> left <*> right + + onOr :: Either String ParsedLicenseTree -> Either String ParsedLicenseTree -> Either String ParsedLicenseTree + onOr left right = ParsedBranch Or <$> left <*> right + +canonicalizeParsedTree + :: (ParsedLicenseLeaf -> Either String CanonicalLicenseLeaf) + -> ParsedLicenseTree + -> Either String LicenseTree +canonicalizeParsedTree canonicalizeLeaf = case _ of + ParsedLeaf leaf -> + map Leaf $ canonicalizeLeaf leaf + ParsedBranch conjunction left right -> + Branch conjunction + <$> canonicalizeParsedTree canonicalizeLeaf left + <*> canonicalizeParsedTree canonicalizeLeaf right + +canonicalizeStrictLeaf :: ParsedLicenseLeaf -> Either String CanonicalLicenseLeaf +canonicalizeStrictLeaf rawLeaf = do + canonicalLeaf <- canonicalizeDetectedLeaf rawLeaf + if printParsedLeaf rawLeaf == printCanonicalLeaf canonicalLeaf then + Right canonicalLeaf + else + Left $ Array.fold + [ "Non-canonical SPDX identifier '" + , printParsedLeaf rawLeaf + , "'. Use '" + , printCanonicalLeaf canonicalLeaf + , "'" + ] + +canonicalizeDetectedLeaf :: ParsedLicenseLeaf -> Either String CanonicalLicenseLeaf +canonicalizeDetectedLeaf { identifier, plus, exception } = do + if isCurrent identifier then do + canonicalIdentifier <- if plus then canonicalizePlusIdentifier identifier else Right identifier + ensureCurrentIdentifier canonicalIdentifier identifier + pure { identifier: canonicalIdentifier, exception } + else if isDeprecated identifier then do + canonicalIdentifier <- canonicalizeDeprecatedIdentifier { identifier, plus } + pure { identifier: canonicalIdentifier, exception } + else do + Left $ "SPDX identifier '" <> identifier <> "' is not recognized in the current SPDX license list" + +canonicalizeVersionedIdentifier :: { base :: String, plus :: Boolean } -> String +canonicalizeVersionedIdentifier { base, plus } = if plus then base <> "-or-later" else base <> "-only" + +canonicalizeDeprecatedIdentifier :: { identifier :: String, plus :: Boolean } -> Either String String +canonicalizeDeprecatedIdentifier { identifier, plus } = do + let canonicalVersioned = canonicalizeVersionedIdentifier { base: identifier, plus } + if plus && isCurrent canonicalVersioned then + Right canonicalVersioned + else if Set.member identifier ambiguousDeprecatedIdentifiers then do + Left $ "Deprecated SPDX identifier '" <> identifier <> "' does not have an unambiguous canonical replacement" + else if isCurrent canonicalVersioned then + Right canonicalVersioned + else case Map.lookup identifier deprecatedIdentifierRenames of + Just replacement -> do + if plus then + Left $ "Deprecated SPDX identifier '" <> identifier <> "+' does not have an unambiguous canonical replacement" + else do + ensureCurrentIdentifier replacement identifier + Right replacement + Nothing -> + Left $ "Deprecated SPDX identifier '" <> identifier <> "' does not have an unambiguous canonical replacement" + +canonicalizePlusIdentifier :: String -> Either String String +canonicalizePlusIdentifier identifier = do + let fallback = identifier <> "-or-later" + let fromOnly = String.stripSuffix (Pattern "-only") identifier <#> (_ <> "-or-later") -foreign import extractLicenseIdsImpl :: forall r. Fn3 (String -> r) (Array String -> r) String r + case fromOnly of + Just candidate -> + if isCurrent candidate then + Right candidate + else if isCurrent fallback then + Right fallback + else + Left $ "Cannot canonicalize '+' for SPDX identifier '" <> identifier <> "'" + Nothing -> + if isCurrent fallback then + Right fallback + else + Left $ "Cannot canonicalize '+' for SPDX identifier '" <> identifier <> "'" + +ensureCurrentIdentifier :: String -> String -> Either String Unit +ensureCurrentIdentifier canonicalIdentifier sourceIdentifier = do + unless (isCurrent canonicalIdentifier) do + Left $ "SPDX identifier '" <> sourceIdentifier <> "' is not recognized in the current SPDX license list" + +isCurrent :: String -> Boolean +isCurrent identifier = + Set.member identifier spdxIdentifierSets.current + && not (Set.member identifier spdxIdentifierSets.deprecated) + +isDeprecated :: String -> Boolean +isDeprecated identifier = Set.member identifier spdxIdentifierSets.deprecated + +spdxIdentifierSets :: { current :: Set.Set String, deprecated :: Set.Set String } +spdxIdentifierSets = + { current: Set.fromFoldable currentIds + , deprecated: Set.fromFoldable deprecatedIds + } + +-- Deprecated identifiers that have deterministic, non-versioned renames. +deprecatedIdentifierRenames :: Map.Map String String +deprecatedIdentifierRenames = Map.fromFoldable + [ Tuple "BSD-2-Clause-NetBSD" "BSD-2-Clause" + , Tuple "StandardML-NJ" "SMLNJ" + , Tuple "bzip2-1.0.5" "bzip2-1.0.6" + ] + +ambiguousDeprecatedIdentifiers :: Set.Set String +ambiguousDeprecatedIdentifiers = Set.fromFoldable + [ "BSD-2-Clause-FreeBSD" + , "GFDL-1.1" + , "GFDL-1.2" + , "GFDL-1.3" + , "Net-SNMP" + , "Nunit" + ] + +-- | Print an SPDX license identifier as a string. +print :: License -> String +print (License tree) = renderCanonicalTree tree + +renderCanonicalTree :: LicenseTree -> String +renderCanonicalTree = go 0 + where + go :: Int -> LicenseTree -> String + go parentPrecedence = case _ of + Leaf leaf -> + printCanonicalLeaf leaf + Branch conjunction left right -> + if conjunctionPrecedence conjunction < parentPrecedence then + "(" <> renderBranch conjunction left right <> ")" + else + renderBranch conjunction left right + + renderBranch :: SPDXConjunction -> LicenseTree -> LicenseTree -> String + renderBranch conjunction left right = Array.fold + [ go (conjunctionPrecedence conjunction) left + , " " + , printConjunction conjunction + , " " + , go (conjunctionPrecedence conjunction) right + ] + +conjunctionPrecedence :: SPDXConjunction -> Int +conjunctionPrecedence = case _ of + Or -> 1 + And -> 2 + +printConjunction :: SPDXConjunction -> String +printConjunction = case _ of + And -> "AND" + Or -> "OR" + +printParsedLeaf :: ParsedLicenseLeaf -> String +printParsedLeaf { identifier, plus, exception } = case exception of + Nothing -> + if plus then identifier <> "+" else identifier + Just exceptionId -> + (if plus then identifier <> "+" else identifier) <> " WITH " <> exceptionId + +printCanonicalLeaf :: CanonicalLicenseLeaf -> String +printCanonicalLeaf { identifier, exception } = case exception of + Nothing -> + identifier + Just exceptionId -> + identifier <> " WITH " <> exceptionId -- | Extract all license identifiers from a SPDX expression. -- | Returns an array of uppercase license IDs for case-insensitive comparison. -- | For example, "MIT AND Apache-2.0" returns ["MIT", "APACHE-2.0"]. -extractIds :: License -> Either String (Array String) -extractIds (License expr) = runFn3 extractLicenseIdsImpl Left Right expr +extractIds :: License -> Array String +extractIds (License tree) = Array.nub (collectIds tree) + where + collectIds :: LicenseTree -> Array String + collectIds = case _ of + Leaf { identifier } -> [ String.toUpper identifier ] + Branch _ left right -> collectIds left <> collectIds right -- | A valid conjunction for SPDX license identifiers. AND means that both -- | licenses must be satisfied; OR means that at least one license must be @@ -70,7 +305,9 @@ derive instance Eq SPDXConjunction -- | Join multiple license identifiers together with the given SPDX conjunction -- | to create a new valid SPDX license identifier. -joinWith :: SPDXConjunction -> Array License -> License -joinWith = case _ of - And -> coerce <<< String.joinWith " AND " <<< coerce - Or -> coerce <<< String.joinWith " OR " <<< coerce +joinWith :: SPDXConjunction -> NonEmptyArray License -> License +joinWith conjunction licenses = case NonEmptyArray.uncons licenses of + { head, tail } -> Array.foldl join head tail + where + join :: License -> License -> License + join (License left) (License right) = License $ Branch conjunction left right diff --git a/lib/test/Registry.purs b/lib/test/Registry.purs index 5e28f12f4..5d2b20c6f 100644 --- a/lib/test/Registry.purs +++ b/lib/test/Registry.purs @@ -4,6 +4,7 @@ import Prelude import Effect (Effect) import Test.Registry.Internal as Test.Internal +import Test.Registry.License as Test.License import Test.Registry.Manifest as Test.Manifest import Test.Registry.ManifestIndex as Test.ManifestIndex import Test.Registry.Metadata as Test.Metadata @@ -31,6 +32,7 @@ main = runSpecAndExitProcess [ Spec.Reporter.consoleReporter ] do Spec.describe "PackageName" Test.PackageName.spec Spec.describe "Version" Test.Version.spec Spec.describe "Range" Test.Range.spec + Spec.describe "License" Test.License.spec Spec.describe "Manifest" Test.Manifest.spec Spec.describe "Metadata" Test.Metadata.spec Spec.describe "Package Set" Test.PackageSet.spec diff --git a/lib/test/Registry/License.purs b/lib/test/Registry/License.purs index 41e914a89..eb0ee7885 100644 --- a/lib/test/Registry/License.purs +++ b/lib/test/Registry/License.purs @@ -4,7 +4,9 @@ import Prelude import Data.Array as Array import Data.Either (Either(..)) +import Data.Foldable (for_) import Data.String as String +import Data.String.Pattern (Pattern(..)) import Registry.License as License import Registry.Test.Assert as Assert import Registry.Test.Utils as Utils @@ -12,107 +14,115 @@ import Test.Spec as Spec spec :: Spec.Spec Unit spec = do - Spec.it "Parses well-formed licenses" do - let { fail } = Utils.partitionEithers $ map License.parse valid + Spec.it "Parses canonical SPDX expressions" do + let { fail } = Utils.partitionEithers $ map License.parse canonical unless (Array.null fail) do Assert.fail $ String.joinWith "\n" - [ "Some well-formed licenses names were not parsed correctly:" + [ "Some canonical SPDX expressions were not parsed correctly:" , Array.foldMap (append "\n - ") fail ] - Spec.it "Fails to parse malformed licenses" do - let { success } = Utils.partitionEithers $ map License.parse invalid + Spec.it "Fails to parse malformed SPDX expressions" do + let { success } = Utils.partitionEithers $ map License.parse malformed unless (Array.null success) do Assert.fail $ String.joinWith "\n" - [ "Some malformed package names were not parsed correctly:" + [ "Some malformed SPDX expressions were parsed unexpectedly:" , Array.foldMap (append "\n - " <<< License.print) success ] - Spec.it "joinWith creates valid parseable license expressions" do + Spec.it "Strict parsing rejects deprecated SPDX identifiers" do let - licenses = [ License.parse "MIT", License.parse "Apache-2.0" ] - { fail, success } = Utils.partitionEithers licenses + rejected = + [ { input: "AGPL-3.0", expectedError: "AGPL-3.0-only" } + , { input: "GPL-2.0-with-classpath-exception", expectedError: "unambiguous canonical replacement" } + , { input: "GPL-2.0+", expectedError: "GPL-2.0-or-later" } + , { input: "GFDL-1.3", expectedError: "unambiguous canonical replacement" } + , { input: "GFDL-1.3+", expectedError: "GFDL-1.3-or-later" } + ] - unless (Array.null fail) do - Assert.fail "Failed to parse test licenses" + for_ rejected \{ input, expectedError } -> + case License.parse input of + Right parsed -> + Assert.fail $ "Expected strict parse to reject " <> input <> ", but parsed as " <> License.print parsed + Left err -> + unless (String.contains (Pattern expectedError) err) do + Assert.fail $ "Expected parse error for " <> input <> " to mention " <> expectedError <> ", but got: " <> err + Spec.it "Canonicalizes deterministic deprecated IDs in detected output" do let - joined = License.joinWith License.And success - reparsed = License.parse (License.print joined) - - case reparsed of - Left err -> Assert.fail $ "joinWith created unparseable expression: " <> License.print joined <> " - Error: " <> err - Right _ -> pure unit - - Spec.describe "extractIds" do - Spec.it "extracts single license ID" do - case License.parse "MIT" of - Left err -> Assert.fail err - Right license -> case License.extractIds license of - Left err -> Assert.fail err - Right ids -> Assert.shouldEqual [ "MIT" ] ids + cases = + [ { input: "AGPL-3.0", output: "AGPL-3.0-only" } + , { input: "GPL-2.0+", output: "GPL-2.0-or-later" } + , { input: "GFDL-1.3+", output: "GFDL-1.3-or-later" } + , { input: "BSD-2-Clause-NetBSD", output: "BSD-2-Clause" } + , { input: "StandardML-NJ", output: "SMLNJ" } + ] - Spec.it "extracts IDs from AND expression" do - case License.parse "MIT AND Apache-2.0" of - Left err -> Assert.fail err - Right license -> case License.extractIds license of - Left err -> Assert.fail err - Right ids -> do - Assert.shouldContain ids "MIT" - Assert.shouldContain ids "APACHE-2.0" + for_ cases \{ input, output } -> + case License.canonicalizeDetected input of + Left err -> + Assert.fail $ "Expected canonicalization to succeed for " <> input <> ", but failed with: " <> err + Right canonicalized -> + Assert.shouldEqual output canonicalized + + Spec.it "Fails to canonicalize ambiguous deprecated IDs in detected output" do + for_ [ "BSD-2-Clause-FreeBSD", "GFDL-1.3", "Net-SNMP", "Nunit", "GPL-2.0-with-classpath-exception" ] \input -> + case License.canonicalizeDetected input of + Right canonicalized -> + Assert.fail $ "Expected canonicalization to fail for " <> input <> ", but got " <> canonicalized + Left err -> + unless (String.contains (Pattern "unambiguous canonical replacement") err) do + Assert.fail $ "Expected ambiguous canonicalization error for " <> input <> ", but got: " <> err + + Spec.it "Prints canonical SPDX expressions" do + case License.parse "MIT AND (Apache-2.0 OR BSD-3-Clause)" of + Left err -> + Assert.fail err + Right parsed -> + Assert.shouldEqual "MIT AND (Apache-2.0 OR BSD-3-Clause)" (License.print parsed) + + Spec.it "joinWith creates valid parseable SPDX expressions" do + let + left = Utils.fromRight "Failed to parse MIT" (License.parse "MIT") + right = Utils.fromRight "Failed to parse Apache-2.0" (License.parse "Apache-2.0") + joined = License.joinWith License.And (Utils.unsafeNonEmptyArray [ left, right ]) - Spec.it "extracts IDs from OR expression" do - case License.parse "MIT OR BSD-3-Clause" of - Left err -> Assert.fail err - Right license -> case License.extractIds license of - Left err -> Assert.fail err - Right ids -> do - Assert.shouldContain ids "MIT" - Assert.shouldContain ids "BSD-3-CLAUSE" + case License.parse (License.print joined) of + Left err -> + Assert.fail $ "joinWith created an unparseable expression: " <> License.print joined <> " - Error: " <> err + Right _ -> + pure unit - Spec.it "extracts IDs from nested expression" do + Spec.describe "extractIds" do + Spec.it "Extracts canonical uppercase IDs from parsed expressions" do case License.parse "MIT AND (Apache-2.0 OR BSD-3-Clause)" of - Left err -> Assert.fail err - Right license -> case License.extractIds license of - Left err -> Assert.fail err - Right ids -> do - Assert.shouldContain ids "MIT" - Assert.shouldContain ids "APACHE-2.0" - Assert.shouldContain ids "BSD-3-CLAUSE" - - Spec.it "normalizes license IDs to uppercase" do - case License.parse "mit" of - Left err -> Assert.fail err - Right license -> case License.extractIds license of - Left err -> Assert.fail err - Right ids -> Assert.shouldEqual [ "MIT" ] ids - -valid :: Array String -valid = + Left err -> + Assert.fail err + Right parsed -> do + let ids = License.extractIds parsed + Assert.shouldContain ids "MIT" + Assert.shouldContain ids "APACHE-2.0" + Assert.shouldContain ids "BSD-3-CLAUSE" + +canonical :: Array String +canonical = [ "MIT" , "BSD-3-Clause" , "CC-BY-1.0" - , "APACHE-2.0" + , "Apache-2.0" , "LGPL-2.1-only" - - -- deprecated licenses are acceptable - , "GPL-3.0" - , "AGPL-1.0" - - -- conjunctions are understood - , "LGPL-2.1 OR BSD-3-CLAUSE AND MIT" - , "MIT AND (LGPL-2.1+ AND BSD-3-CLAUSE)" - - -- exceptions are understood - , "GPS-3.0 WITH GPL-3.0-linking-exception" + , "MIT AND Apache-2.0" + , "MIT AND (LGPL-2.1-only OR BSD-3-Clause)" + , "GPL-2.0-only WITH Classpath-exception-2.0" ] -invalid :: Array String -invalid = +malformed :: Array String +malformed = [ "Apache" , "Apache-2" , "Apache 2" , "BSD-3" , "MIT AND BSD-3" , "MIT AN BSD-3-Clause" + , "MIT OR (Apache-2.0" ] From 0187caa33d9e513fce4575d217159c838ce5dac8 Mon Sep 17 00:00:00 2001 From: Thomas Honeyman Date: Sun, 15 Mar 2026 11:24:49 -0400 Subject: [PATCH 2/5] Relax best-effort license parsing --- app/src/App/API.purs | 37 +++++++------------------------ app/src/App/Legacy/Manifest.purs | 14 +++++------- app/test/App/API.purs | 10 +++------ app/test/App/Legacy/Manifest.purs | 16 +++++++++++++ 4 files changed, 32 insertions(+), 45 deletions(-) diff --git a/app/src/App/API.purs b/app/src/App/API.purs index 5724ae1c0..263b37077 100644 --- a/app/src/App/API.purs +++ b/app/src/App/API.purs @@ -1308,9 +1308,7 @@ instance FsEncodable PursGraphCache where Exists.mkExists $ Cache.AsJson cacheKey codec next -- | Errors that can occur when validating license consistency -data LicenseValidationError - = LicenseMismatch { manifest :: License, detected :: Array License } - | LicenseParseError (Array { detected :: String, error :: String }) +data LicenseValidationError = LicenseMismatch { manifest :: License, detected :: Array License } derive instance Eq LicenseValidationError @@ -1325,11 +1323,6 @@ printLicenseValidationError = case _ of , "in your repository. If multiple licenses apply, join them using SPDX " , "conjunctions (e.g., 'MIT AND Apache-2.0' or 'MIT OR Apache-2.0')." ] - LicenseParseError failures -> Array.fold - [ "License validation failed: one or more detected SPDX license expressions " - , "could not be canonicalized.\n" - , String.joinWith "\n" (failures <#> \{ detected, error } -> " - " <> detected <> " (" <> error <> ")") - ] -- | Validate that the license in the manifest is consistent with licenses -- | detected in the repository (LICENSE file, package.json, bower.json). @@ -1348,31 +1341,17 @@ validateLicense packageDir manifestLicense = do pure Nothing Right detectedStrings -> do let - parseDetectedLicense :: String -> Either String License - parseDetectedLicense detectedLicense = do - canonicalized <- License.canonicalizeDetected detectedLicense - License.parse canonicalized - - parsedDetectedLicenses :: Array (Tuple String (Either String License)) - parsedDetectedLicenses = - detectedStrings <#> \detectedLicense -> - Tuple detectedLicense (parseDetectedLicense detectedLicense) - - parseFailures :: Array { detected :: String, error :: String } - parseFailures = - parsedDetectedLicenses # Array.mapMaybe \(Tuple detectedLicense parsed) -> case parsed of - Left error -> Just { detected: detectedLicense, error } - Right _ -> Nothing - + -- Best effort: keep detected licenses we can canonicalize and parse. parsedLicenses :: Array License - parsedLicenses = parsedDetectedLicenses # Array.mapMaybe \(Tuple _ parsed) -> hush parsed + parsedLicenses = + detectedStrings # Array.mapMaybe \detectedLicense -> + hush do + canonicalized <- License.canonicalizeDetected detectedLicense + License.parse canonicalized Log.debug $ "Detected licenses: " <> String.joinWith ", " detectedStrings - if not (Array.null parseFailures) then do - Log.warn "Some detected licenses could not be canonicalized." - pure $ Just $ LicenseParseError parseFailures - else if Array.null parsedLicenses then do + if Array.null parsedLicenses then do Log.debug "No licenses detected from repository files, nothing to validate." pure Nothing else do diff --git a/app/src/App/Legacy/Manifest.purs b/app/src/App/Legacy/Manifest.purs index ae841d120..bbff7254f 100644 --- a/app/src/App/Legacy/Manifest.purs +++ b/app/src/App/Legacy/Manifest.purs @@ -76,17 +76,13 @@ bowerfileToPursJson -> Either String { license :: License, description :: Maybe String, dependencies :: Map PackageName Range } bowerfileToPursJson (Bowerfile { description, dependencies, license }) = do let - parsedLicenses = license <#> \rawLicense -> - lmap (\err -> " - " <> rawLicense <> ": " <> err) $ License.parse rawLicense - { fail: parseErrors, success: validLicenses } = partitionEithers parsedLicenses + -- Best effort: keep any licenses that parse cleanly and drop the rest. + validLicenses = Array.mapMaybe (hush <<< License.parse) license parsedLicense <- - if not (Array.null parseErrors) then do - Left $ "Invalid SPDX license(s) in bower.json:\n" <> String.joinWith "\n" parseErrors - else do - case NonEmptyArray.fromArray validLicenses of - Nothing -> Left "No valid SPDX license found in bower.json" - Just multiple -> Right $ License.joinWith License.And multiple + case NonEmptyArray.fromArray validLicenses of + Nothing -> Left "No valid SPDX license found in bower.json" + Just multiple -> Right $ License.joinWith License.And multiple parsedDeps <- parseDependencies dependencies diff --git a/app/test/App/API.purs b/app/test/App/API.purs index 8efc1ca1c..fa199f6b3 100644 --- a/app/test/App/API.purs +++ b/app/test/App/API.purs @@ -408,14 +408,10 @@ licenseValidation = do result <- Assert.Run.runBaseEffects $ validateLicense deprecatedFixture manifestLicense Assert.shouldEqual Nothing result - Spec.it "Fails when detected licenses have ambiguous deprecated identifiers" do - let manifestLicense = unsafeLicense "GFDL-1.3-only" + Spec.it "Ignores ambiguous deprecated detected licenses during validation" do + let manifestLicense = unsafeLicense "MIT" result <- Assert.Run.runBaseEffects $ validateLicense ambiguousFixture manifestLicense - case result of - Just (LicenseParseError failures) -> - Assert.shouldContain (map _.detected failures) "GFDL-1.3" - _ -> - Assert.fail "Expected UncanonicalizableDetectedLicenses error" + Assert.shouldEqual Nothing result unsafeLicense :: String -> License unsafeLicense str = unsafeFromRight $ License.parse str diff --git a/app/test/App/Legacy/Manifest.purs b/app/test/App/Legacy/Manifest.purs index aed0bc2e1..c7e58a850 100644 --- a/app/test/App/Legacy/Manifest.purs +++ b/app/test/App/Legacy/Manifest.purs @@ -6,6 +6,7 @@ import Codec.JSON.DecodeError as CJ.DecodeError import Data.Array as Array import Data.Codec.JSON as CJ import Registry.App.Legacy.Manifest as Legacy.Manifest +import Registry.License as License import Registry.Manifest (Manifest(..)) import Registry.Test.Assert as Assert import Test.Spec (Spec) @@ -134,6 +135,21 @@ bowerfileToPursJsonSpec = do Left err -> Assert.fail $ "Failed to convert bowerfile:\n" <> err Right _ -> pure unit + Spec.it "Drops invalid SPDX identifiers when valid licenses remain" do + let + input = + """ + { "license": [ "MIT", "AGPL-3.0" ] + , "dependencies": { "purescript-prelude": "^6.0.0" } + } + """ + case parseJson Legacy.Manifest.bowerfileCodec input of + Left err -> Assert.fail $ "Failed to parse bowerfile:\n" <> CJ.DecodeError.print err + Right bowerfile -> case Legacy.Manifest.bowerfileToPursJson bowerfile of + Left err -> Assert.fail $ "Failed to convert bowerfile:\n" <> err + Right result -> + Assert.shouldEqual "MIT" (License.print result.license) + Spec.describe "Rejects invalid Bowerfiles" do Spec.it "Fails on missing license" do let From ec28727c8537d41214e49c8c7dc771f48fd767ab Mon Sep 17 00:00:00 2001 From: Thomas Honeyman Date: Sun, 15 Mar 2026 13:28:34 -0400 Subject: [PATCH 3/5] fix SPDX validation boundaries Keep historical manifests backward-compatible while requiring canonical SPDX for new publishes. --- SPEC.md | 2 +- app/src/App/API.purs | 39 +++++++--- app/src/App/Legacy/Manifest.purs | 4 +- app/src/App/Manifest/SpagoYaml.purs | 10 ++- app/test/App/API.purs | 55 ++++++++++++-- app/test/App/Manifest/SpagoYaml.purs | 19 +++++ lib/src/License.purs | 109 +++++++++++++++++---------- lib/src/Manifest.purs | 3 +- lib/test/Registry/License.purs | 66 ++++++++++------ lib/test/Registry/Manifest.purs | 42 +++++++++++ lib/test/Registry/ManifestIndex.purs | 41 ++++++++++ 11 files changed, 306 insertions(+), 84 deletions(-) diff --git a/SPEC.md b/SPEC.md index 4eca751dd..419e91ed0 100644 --- a/SPEC.md +++ b/SPEC.md @@ -361,7 +361,7 @@ Alternately, this can be written without an id: **[Source](./lib/src/Registry/License.purs)** **[Spec](./types/v1/License.dhall)** -All packages in the registry must have a license that grants permission for redistribution of the source code. Concretely, the registry requires that all packages use an SPDX license and specify an [SPDX license identifier](https://spdx.dev/ids/). `AND` and `OR` conjunctions are allowed, and licenses can contain exceptions using the `WITH` preposition. The SPDX specification describes [how licenses can be combined and exceptions applied](https://spdx.dev/ids#how). +All packages in the registry must have a license that grants permission for redistribution of the source code. Concretely, the registry requires that all packages use an SPDX license and specify an [SPDX license identifier](https://spdx.dev/ids/). `AND` and `OR` conjunctions are allowed, and licenses can contain exceptions using the `WITH` preposition. The SPDX specification describes [how licenses can be combined and exceptions applied](https://spdx.dev/ids#how). Newly submitted manifests must use current canonical SPDX identifiers, but registry readers should remain backward-compatible with historical stored manifests that still use deprecated SPDX spellings. A `License` is represented as a string, which must be a valid SPDX identifier. For example: diff --git a/app/src/App/API.purs b/app/src/App/API.purs index 263b37077..bdd91d39f 100644 --- a/app/src/App/API.purs +++ b/app/src/App/API.purs @@ -129,6 +129,20 @@ derive instance Eq ManifestOrigin -- | A parsed manifest along with which format it originated from. type ParsedManifest = { manifest :: Manifest, origin :: ManifestOrigin } +type ManifestLicenseField = { license :: String } + +manifestLicenseFieldCodec :: CJ.Codec ManifestLicenseField +manifestLicenseFieldCodec = CJ.named "ManifestLicenseField" $ CJ.Record.object + { license: CJ.string + } + +validateCanonicalManifestLicense :: String -> Either String Unit +validateCanonicalManifestLicense input = do + { license } <- case parseJson manifestLicenseFieldCodec input of + Left err -> Left $ CJ.DecodeError.print err + Right decoded -> Right decoded + void $ License.parseCanonical license + -- | Effect row for package set updates. Authentication is done at the API -- | boundary, so we don't need GITHUB_EVENT_ENV effects here. type PackageSetUpdateEffects r = (REGISTRY + PACKAGE_SETS + LOG + EXCEPT String + r) @@ -436,13 +450,17 @@ parseSourceManifest { packageDir, name, version, ref, location } = do Left error -> do Log.error $ "Manifest does not typecheck: " <> error Except.throw $ "Found a valid purs.json file in the package source, but it does not typecheck." - Right _ -> case parseJson Manifest.codec string of + Right _ -> case validateCanonicalManifestLicense string of Left err -> do - Log.error $ "Failed to parse manifest: " <> CJ.DecodeError.print err - Except.throw $ "Found a purs.json file in the package source, but it could not be decoded." - Right m -> do - Log.debug $ "Read a valid purs.json manifest from the package source:\n" <> stringifyJson Manifest.codec m - pure m + Log.error $ "Manifest license is not canonical: " <> err + Except.throw $ "Found a purs.json file in the package source, but its license field is not canonical." + Right _ -> case parseJson Manifest.codec string of + Left err -> do + Log.error $ "Failed to parse manifest: " <> CJ.DecodeError.print err + Except.throw $ "Found a purs.json file in the package source, but it could not be decoded." + Right m -> do + Log.debug $ "Read a valid purs.json manifest from the package source:\n" <> stringifyJson Manifest.codec m + pure m FromSpagoYaml -> do let spagoYamlPath = Path.concat [ packageDir, "spago.yaml" ] @@ -1341,13 +1359,12 @@ validateLicense packageDir manifestLicense = do pure Nothing Right detectedStrings -> do let - -- Best effort: keep detected licenses we can canonicalize and parse. + -- Best effort: keep detected licenses that parse, which canonicalizes + -- deprecated IDs when possible and preserves recognized ambiguous + -- deprecated IDs for validation. parsedLicenses :: Array License parsedLicenses = - detectedStrings # Array.mapMaybe \detectedLicense -> - hush do - canonicalized <- License.canonicalizeDetected detectedLicense - License.parse canonicalized + detectedStrings # Array.mapMaybe (hush <<< License.parse) Log.debug $ "Detected licenses: " <> String.joinWith ", " detectedStrings diff --git a/app/src/App/Legacy/Manifest.purs b/app/src/App/Legacy/Manifest.purs index bbff7254f..0785c3e41 100644 --- a/app/src/App/Legacy/Manifest.purs +++ b/app/src/App/Legacy/Manifest.purs @@ -77,7 +77,7 @@ bowerfileToPursJson bowerfileToPursJson (Bowerfile { description, dependencies, license }) = do let -- Best effort: keep any licenses that parse cleanly and drop the rest. - validLicenses = Array.mapMaybe (hush <<< License.parse) license + validLicenses = Array.mapMaybe (hush <<< License.parseCanonical) license parsedLicense <- case NonEmptyArray.fromArray validLicenses of @@ -140,7 +140,7 @@ spagoDhallToPursJson spagoDhallToPursJson (SpagoDhallJson { license, dependencies, packages }) = do parsedLicense <- case license of Nothing -> Left "No license found in spago.dhall" - Just lic -> case License.parse (NonEmptyString.toString lic) of + Just lic -> case License.parseCanonical (NonEmptyString.toString lic) of Left _ -> Left $ "Invalid SPDX license in spago.dhall: " <> NonEmptyString.toString lic Right l -> Right l diff --git a/app/src/App/Manifest/SpagoYaml.purs b/app/src/App/Manifest/SpagoYaml.purs index f07a0f1b9..03712e435 100644 --- a/app/src/App/Manifest/SpagoYaml.purs +++ b/app/src/App/Manifest/SpagoYaml.purs @@ -88,15 +88,23 @@ type PublishConfig = } publishConfigCodec :: CJ.Codec PublishConfig +-- | Publish metadata is authored input, so it must use canonical SPDX +-- | identifiers even though stored manifests remain backward-compatible. publishConfigCodec = CJ.named "PublishConfig" $ CJ.Record.object { version: Version.codec - , license: License.codec + , license: canonicalLicenseCodec , location: CJ.Record.optional Location.codec , include: CJ.Record.optional (CJ.array CJ.string) , exclude: CJ.Record.optional (CJ.array CJ.string) , owners: CJ.Record.optional (CJ.Common.nonEmptyArray Owner.codec) } +canonicalLicenseCodec :: CJ.Codec License +canonicalLicenseCodec = CJ.named "CanonicalLicense" $ Codec.codec' decode encode + where + encode = CJ.encode CJ.string <<< License.print + decode = Codec.decode CJ.string >=> (License.parseCanonical >>> lmap CJ.DecodeError.basic >>> except) + dependenciesCodec :: CJ.Codec (Map PackageName (Maybe SpagoRange)) dependenciesCodec = Profunctor.dimap toJsonRep fromJsonRep $ CJ.array dependencyCodec where diff --git a/app/test/App/API.purs b/app/test/App/API.purs index fa199f6b3..4476f7c53 100644 --- a/app/test/App/API.purs +++ b/app/test/App/API.purs @@ -8,6 +8,7 @@ import Data.Map as Map import Data.Set as Set import Data.String as String import Data.String.NonEmpty as NonEmptyString +import Data.String.Pattern (Pattern(..)) import Effect.Aff as Aff import Effect.Class.Console as Console import Effect.Ref as Ref @@ -27,6 +28,7 @@ import Registry.Foreign.FSExtra as FS.Extra import Registry.Foreign.FastGlob as FastGlob import Registry.Foreign.Tmp as Tmp import Registry.License as License +import Registry.Location (Location(..)) import Registry.PackageName as PackageName import Registry.Test.Assert as Assert import Registry.Test.Assert.Run as Assert.Run @@ -52,6 +54,9 @@ spec = do Spec.describe "Verifies build plans" do checkBuildPlanToResolutions + Spec.describe "Parses source manifests" do + parseSourceManifestSpec + Spec.describe "Validates licenses match" do licenseValidation @@ -222,6 +227,38 @@ checkBuildPlanToResolutions = do path = Path.concat [ installedResolutions, PackageName.print packageName <> "-" <> Version.print version ] pure $ Tuple bowerName { path, version } +parseSourceManifestSpec :: Spec.Spec Unit +parseSourceManifestSpec = do + Spec.it "Rejects deprecated SPDX identifiers in purs.json" do + resourceEnv <- liftEffect Env.lookupResourceEnv + Aff.bracket Tmp.mkTmpDir FS.Extra.remove \packageDir -> do + let + manifestPath = Path.concat [ packageDir, "purs.json" ] + args = + { packageDir + , name: Utils.unsafePackageName "registry-lib" + , version: Utils.unsafeVersion "0.0.1" + , ref: "v0.0.1" + , location: GitHub { owner: "purescript", repo: "registry-dev", subdir: Nothing } + } + + FS.Aff.writeTextFile UTF8 manifestPath + """{"name":"registry-lib","version":"0.0.1","license":"AGPL-3.0","location":{"githubOwner":"purescript","githubRepo":"registry-dev"},"ref":"v0.0.1","dependencies":{"prelude":">=6.0.0 <7.0.0"}}""" + + result <- + API.parseSourceManifest args + # Env.runResourceEnv resourceEnv + # Log.interpret (\(Log.Log _ _ next) -> pure next) + # Except.runExcept + # Run.runBaseAff' + + case result of + Left err -> + unless (String.contains (Pattern "license field is not canonical") err) do + Assert.fail $ "Expected a canonical license error, but got: " <> err + Right _ -> + Assert.fail "Expected parseSourceManifest to reject deprecated SPDX identifiers" + removeIgnoredTarballFiles :: Spec.Spec Unit removeIgnoredTarballFiles = Spec.before runBefore do Spec.it "Picks correct files when packaging a tarball" \{ tmp, writeDirectories, writeFiles } -> do @@ -403,15 +440,23 @@ licenseValidation = do result <- Assert.Run.runBaseEffects $ validateLicense fixtures manifestLicense Assert.shouldEqual Nothing result - Spec.it "Canonicalizes deterministic deprecated detected licenses" do - let manifestLicense = unsafeLicense "AGPL-3.0-only" + Spec.it "Canonicalizes deterministic deprecated detected licenses during validation" do + let manifestLicense = unsafeLicense "MIT" result <- Assert.Run.runBaseEffects $ validateLicense deprecatedFixture manifestLicense - Assert.shouldEqual Nothing result + case result of + Just (LicenseMismatch { detected }) -> + Assert.shouldContain (map License.print detected) "AGPL-3.0-only" + _ -> + Assert.fail "Expected LicenseMismatch error" - Spec.it "Ignores ambiguous deprecated detected licenses during validation" do + Spec.it "Preserves ambiguous deprecated detected licenses during validation" do let manifestLicense = unsafeLicense "MIT" result <- Assert.Run.runBaseEffects $ validateLicense ambiguousFixture manifestLicense - Assert.shouldEqual Nothing result + case result of + Just (LicenseMismatch { detected }) -> + Assert.shouldContain (map License.print detected) "GFDL-1.3" + _ -> + Assert.fail "Expected LicenseMismatch error" unsafeLicense :: String -> License unsafeLicense str = unsafeFromRight $ License.parse str diff --git a/app/test/App/Manifest/SpagoYaml.purs b/app/test/App/Manifest/SpagoYaml.purs index f67546e2a..eaece039b 100644 --- a/app/test/App/Manifest/SpagoYaml.purs +++ b/app/test/App/Manifest/SpagoYaml.purs @@ -26,6 +26,25 @@ spec = do Left err -> Assert.fail $ path <> " failed: " <> err Right _ -> pure unit + Spec.it "Rejects deprecated SPDX identifiers in publish config" do + let + input = + """ + package: + name: registry-lib + publish: + version: 0.0.1 + license: AGPL-3.0 + location: + githubOwner: purescript + githubRepo: registry-dev + subdir: lib + dependencies: + - prelude: ">=1.0.0 <2.0.0" + """ + + parseYaml SpagoYaml.spagoYamlCodec input `Assert.shouldSatisfy` isLeft + Spec.describe "parseSpagoRange" do Spec.it "parses unbounded range '*'" do SpagoYaml.parseSpagoRange "*" `Assert.shouldEqual` Right Unbounded diff --git a/lib/src/License.purs b/lib/src/License.purs index 8149ea7bd..d929cfc91 100644 --- a/lib/src/License.purs +++ b/lib/src/License.purs @@ -5,14 +5,19 @@ -- | This module relies on the 'spdx-expression-parse' NPM library, which you -- | must install if you are using parsing code from this module. Please see the -- | package.json file for exact versions. +-- | +-- | `parse` accepts canonical SPDX expressions and historical deprecated SPDX +-- | expressions, canonicalizing them when SPDX provides a deterministic +-- | replacement and otherwise preserving recognized deprecated identifiers. +-- | Use `parseCanonical` when validating new user-authored manifest input. module Registry.License ( License , SPDXConjunction(..) - , canonicalizeDetected , codec , extractIds , joinWith , parse + , parseCanonical , print ) where @@ -41,23 +46,27 @@ newtype License = License LicenseTree derive newtype instance Eq License --- | A codec for encoding and decoding a `License` as JSON +-- | A codec for encoding and decoding a `License` as JSON. +-- | This decoder is backward-compatible with historical manifest data. codec :: CJ.Codec License -codec = CJ.named "License" $ Codec.codec' decode encode +codec = licenseCodec parse + +licenseCodec :: (String -> Either String License) -> CJ.Codec License +licenseCodec parseLicense = CJ.named "License" $ Codec.codec' decode encode where decode :: JSON -> Except CJ.DecodeError License - decode = except <<< lmap CJ.DecodeError.basic <<< parse <=< Codec.decode CJ.string + decode = except <<< lmap CJ.DecodeError.basic <<< parseLicense <=< Codec.decode CJ.string encode :: License -> JSON encode = print >>> CJ.encode CJ.string data LicenseTree - = Leaf CanonicalLicenseLeaf + = Leaf LicenseLeaf | Branch SPDXConjunction LicenseTree LicenseTree derive instance Eq LicenseTree -type CanonicalLicenseLeaf = +type LicenseLeaf = { identifier :: String , exception :: Maybe String } @@ -88,19 +97,22 @@ foreign import currentIds :: Array String foreign import deprecatedIds :: Array String -- | Parse a string as a SPDX license identifier. +-- | This is backward-compatible with historical registry manifests and accepts +-- | recognized deprecated SPDX identifiers, canonicalizing them when possible +-- | and otherwise preserving them as written. parse :: String -> Either String License parse input = do parsedTree <- parseExpressionTree input - canonicalTree <- canonicalizeParsedTree canonicalizeStrictLeaf parsedTree + canonicalTree <- canonicalizeParsedTree canonicalizeLenientLeaf parsedTree pure $ License canonicalTree --- | Canonicalize SPDX IDs detected from external tooling. This is lenient for --- | deprecated SPDX IDs where the canonical replacement is unambiguous. -canonicalizeDetected :: String -> Either String String -canonicalizeDetected input = do +-- | Parse a string as a canonical SPDX license identifier. +-- | This is intended for validating newly authored manifest input. +parseCanonical :: String -> Either String License +parseCanonical input = do parsedTree <- parseExpressionTree input - canonicalTree <- canonicalizeParsedTree canonicalizeDetectedLeaf parsedTree - pure $ renderCanonicalTree canonicalTree + canonicalTree <- canonicalizeParsedTree canonicalizeStrictLeaf parsedTree + pure $ License canonicalTree parseExpressionTree :: String -> Either String ParsedLicenseTree parseExpressionTree = @@ -120,7 +132,7 @@ parseExpressionTree = onOr left right = ParsedBranch Or <$> left <*> right canonicalizeParsedTree - :: (ParsedLicenseLeaf -> Either String CanonicalLicenseLeaf) + :: (ParsedLicenseLeaf -> Either String LicenseLeaf) -> ParsedLicenseTree -> Either String LicenseTree canonicalizeParsedTree canonicalizeLeaf = case _ of @@ -131,50 +143,60 @@ canonicalizeParsedTree canonicalizeLeaf = case _ of <$> canonicalizeParsedTree canonicalizeLeaf left <*> canonicalizeParsedTree canonicalizeLeaf right -canonicalizeStrictLeaf :: ParsedLicenseLeaf -> Either String CanonicalLicenseLeaf +canonicalizeStrictLeaf :: ParsedLicenseLeaf -> Either String LicenseLeaf canonicalizeStrictLeaf rawLeaf = do - canonicalLeaf <- canonicalizeDetectedLeaf rawLeaf - if printParsedLeaf rawLeaf == printCanonicalLeaf canonicalLeaf then + canonicalLeaf <- canonicalizeCanonicalLeaf rawLeaf + if printParsedLeaf rawLeaf == printLicenseLeaf canonicalLeaf then Right canonicalLeaf else Left $ Array.fold [ "Non-canonical SPDX identifier '" , printParsedLeaf rawLeaf , "'. Use '" - , printCanonicalLeaf canonicalLeaf + , printLicenseLeaf canonicalLeaf , "'" ] -canonicalizeDetectedLeaf :: ParsedLicenseLeaf -> Either String CanonicalLicenseLeaf -canonicalizeDetectedLeaf { identifier, plus, exception } = do +canonicalizeCanonicalLeaf :: ParsedLicenseLeaf -> Either String LicenseLeaf +canonicalizeCanonicalLeaf { identifier, plus, exception } = do if isCurrent identifier then do canonicalIdentifier <- if plus then canonicalizePlusIdentifier identifier else Right identifier ensureCurrentIdentifier canonicalIdentifier identifier pure { identifier: canonicalIdentifier, exception } else if isDeprecated identifier then do - canonicalIdentifier <- canonicalizeDeprecatedIdentifier { identifier, plus } - pure { identifier: canonicalIdentifier, exception } + canonicalizeDeprecatedLeaf { identifier, plus, exception } else do Left $ "SPDX identifier '" <> identifier <> "' is not recognized in the current SPDX license list" +canonicalizeLenientLeaf :: ParsedLicenseLeaf -> Either String LicenseLeaf +canonicalizeLenientLeaf rawLeaf = + case canonicalizeCanonicalLeaf rawLeaf of + Right canonicalLeaf -> + Right canonicalLeaf + Left err + | isDeprecated rawLeaf.identifier && not rawLeaf.plus -> + Right { identifier: rawLeaf.identifier, exception: rawLeaf.exception } + | otherwise -> + Left err + canonicalizeVersionedIdentifier :: { base :: String, plus :: Boolean } -> String canonicalizeVersionedIdentifier { base, plus } = if plus then base <> "-or-later" else base <> "-only" -canonicalizeDeprecatedIdentifier :: { identifier :: String, plus :: Boolean } -> Either String String -canonicalizeDeprecatedIdentifier { identifier, plus } = do +canonicalizeDeprecatedLeaf :: ParsedLicenseLeaf -> Either String LicenseLeaf +canonicalizeDeprecatedLeaf { identifier, plus, exception } = do let canonicalVersioned = canonicalizeVersionedIdentifier { base: identifier, plus } if plus && isCurrent canonicalVersioned then - Right canonicalVersioned + pure { identifier: canonicalVersioned, exception } else if Set.member identifier ambiguousDeprecatedIdentifiers then do Left $ "Deprecated SPDX identifier '" <> identifier <> "' does not have an unambiguous canonical replacement" else if isCurrent canonicalVersioned then - Right canonicalVersioned - else case Map.lookup identifier deprecatedIdentifierRenames of + pure { identifier: canonicalVersioned, exception } + else case Map.lookup identifier deprecatedIdentifierReplacements of Just replacement -> do if plus then Left $ "Deprecated SPDX identifier '" <> identifier <> "+' does not have an unambiguous canonical replacement" else do - ensureCurrentIdentifier replacement identifier + ensureCurrentIdentifier replacement.identifier identifier Right replacement Nothing -> Left $ "Deprecated SPDX identifier '" <> identifier <> "' does not have an unambiguous canonical replacement" @@ -217,12 +239,19 @@ spdxIdentifierSets = , deprecated: Set.fromFoldable deprecatedIds } --- Deprecated identifiers that have deterministic, non-versioned renames. -deprecatedIdentifierRenames :: Map.Map String String -deprecatedIdentifierRenames = Map.fromFoldable - [ Tuple "BSD-2-Clause-NetBSD" "BSD-2-Clause" - , Tuple "StandardML-NJ" "SMLNJ" - , Tuple "bzip2-1.0.5" "bzip2-1.0.6" +-- Deprecated identifiers that have deterministic canonical replacements. +deprecatedIdentifierReplacements :: Map.Map String LicenseLeaf +deprecatedIdentifierReplacements = Map.fromFoldable + [ Tuple "BSD-2-Clause-NetBSD" { identifier: "BSD-2-Clause", exception: Nothing } + , Tuple "StandardML-NJ" { identifier: "SMLNJ", exception: Nothing } + , Tuple "bzip2-1.0.5" { identifier: "bzip2-1.0.6", exception: Nothing } + , Tuple "GPL-2.0-with-GCC-exception" { identifier: "GPL-2.0-only", exception: Just "GCC-exception-2.0" } + , Tuple "GPL-2.0-with-autoconf-exception" { identifier: "GPL-2.0-only", exception: Just "Autoconf-exception-2.0" } + , Tuple "GPL-2.0-with-bison-exception" { identifier: "GPL-2.0-only", exception: Just "Bison-exception-2.2" } + , Tuple "GPL-2.0-with-classpath-exception" { identifier: "GPL-2.0-only", exception: Just "Classpath-exception-2.0" } + , Tuple "GPL-2.0-with-font-exception" { identifier: "GPL-2.0-only", exception: Just "Font-exception-2.0" } + , Tuple "GPL-3.0-with-GCC-exception" { identifier: "GPL-3.0-only", exception: Just "GCC-exception-3.1" } + , Tuple "GPL-3.0-with-autoconf-exception" { identifier: "GPL-3.0-only", exception: Just "Autoconf-exception-3.0" } ] ambiguousDeprecatedIdentifiers :: Set.Set String @@ -237,15 +266,15 @@ ambiguousDeprecatedIdentifiers = Set.fromFoldable -- | Print an SPDX license identifier as a string. print :: License -> String -print (License tree) = renderCanonicalTree tree +print (License tree) = renderLicenseTree tree -renderCanonicalTree :: LicenseTree -> String -renderCanonicalTree = go 0 +renderLicenseTree :: LicenseTree -> String +renderLicenseTree = go 0 where go :: Int -> LicenseTree -> String go parentPrecedence = case _ of Leaf leaf -> - printCanonicalLeaf leaf + printLicenseLeaf leaf Branch conjunction left right -> if conjunctionPrecedence conjunction < parentPrecedence then "(" <> renderBranch conjunction left right <> ")" @@ -278,8 +307,8 @@ printParsedLeaf { identifier, plus, exception } = case exception of Just exceptionId -> (if plus then identifier <> "+" else identifier) <> " WITH " <> exceptionId -printCanonicalLeaf :: CanonicalLicenseLeaf -> String -printCanonicalLeaf { identifier, exception } = case exception of +printLicenseLeaf :: LicenseLeaf -> String +printLicenseLeaf { identifier, exception } = case exception of Nothing -> identifier Just exceptionId -> diff --git a/lib/src/Manifest.purs b/lib/src/Manifest.purs index 49bb62f2c..0412ae346 100644 --- a/lib/src/Manifest.purs +++ b/lib/src/Manifest.purs @@ -70,7 +70,8 @@ instance Ord Manifest where -- | A codec for encoding and decoding a `Manifest` as JSON. Represented as a -- | JSON object. The implementation uses explicitly ordered keys instead of --- | record sugar. +-- | record sugar. This decoder remains backward-compatible with historical +-- | manifests stored in the registry and manifest index. codec :: CJ.Codec Manifest codec = Profunctor.wrapIso Manifest $ CJ.named "Manifest" $ CJ.object $ CJ.recordProp @"name" PackageName.codec diff --git a/lib/test/Registry/License.purs b/lib/test/Registry/License.purs index eb0ee7885..a789f3f38 100644 --- a/lib/test/Registry/License.purs +++ b/lib/test/Registry/License.purs @@ -30,49 +30,69 @@ spec = do , Array.foldMap (append "\n - " <<< License.print) success ] - Spec.it "Strict parsing rejects deprecated SPDX identifiers" do + Spec.it "Parses and canonicalizes deterministic deprecated SPDX identifiers" do + let + cases = + [ { input: "AGPL-3.0", output: "AGPL-3.0-only" } + , { input: "AGPL-3.0+", output: "AGPL-3.0-or-later" } + , { input: "LGPL-2.1", output: "LGPL-2.1-only" } + , { input: "LGPL-3.0", output: "LGPL-3.0-only" } + , { input: "LGPL-3.0+", output: "LGPL-3.0-or-later" } + , { input: "GPL-3.0", output: "GPL-3.0-only" } + , { input: "GPL-2.0-with-classpath-exception", output: "GPL-2.0-only WITH Classpath-exception-2.0" } + , { input: "GPL-2.0+", output: "GPL-2.0-or-later" } + , { input: "GPL-3.0 AND MIT", output: "GPL-3.0-only AND MIT" } + , { input: "LGPL-2.1 AND LGPL-2.1-only", output: "LGPL-2.1-only AND LGPL-2.1-only" } + , { input: "GFDL-1.3+", output: "GFDL-1.3-or-later" } + , { input: "BSD-2-Clause-NetBSD", output: "BSD-2-Clause" } + , { input: "StandardML-NJ", output: "SMLNJ" } + ] + + for_ cases \{ input, output } -> + case License.parse input of + Left err -> + Assert.fail $ "Expected parse to succeed for " <> input <> ", but failed with: " <> err + Right parsed -> + Assert.shouldEqual output (License.print parsed) + + Spec.it "Canonical parsing rejects deprecated SPDX identifiers" do let rejected = [ { input: "AGPL-3.0", expectedError: "AGPL-3.0-only" } - , { input: "GPL-2.0-with-classpath-exception", expectedError: "unambiguous canonical replacement" } + , { input: "AGPL-3.0+", expectedError: "AGPL-3.0-or-later" } + , { input: "LGPL-2.1", expectedError: "LGPL-2.1-only" } + , { input: "LGPL-3.0", expectedError: "LGPL-3.0-only" } + , { input: "LGPL-3.0+", expectedError: "LGPL-3.0-or-later" } + , { input: "GPL-3.0", expectedError: "GPL-3.0-only" } + , { input: "GPL-2.0-with-classpath-exception", expectedError: "GPL-2.0-only WITH Classpath-exception-2.0" } , { input: "GPL-2.0+", expectedError: "GPL-2.0-or-later" } + , { input: "GPL-3.0 AND MIT", expectedError: "GPL-3.0-only" } + , { input: "LGPL-2.1 AND LGPL-2.1-only", expectedError: "LGPL-2.1-only" } , { input: "GFDL-1.3", expectedError: "unambiguous canonical replacement" } , { input: "GFDL-1.3+", expectedError: "GFDL-1.3-or-later" } ] for_ rejected \{ input, expectedError } -> - case License.parse input of + case License.parseCanonical input of Right parsed -> - Assert.fail $ "Expected strict parse to reject " <> input <> ", but parsed as " <> License.print parsed + Assert.fail $ "Expected canonical parse to reject " <> input <> ", but parsed as " <> License.print parsed Left err -> unless (String.contains (Pattern expectedError) err) do Assert.fail $ "Expected parse error for " <> input <> " to mention " <> expectedError <> ", but got: " <> err - Spec.it "Canonicalizes deterministic deprecated IDs in detected output" do + Spec.it "Parses ambiguous deprecated SPDX identifiers without canonicalization" do let cases = - [ { input: "AGPL-3.0", output: "AGPL-3.0-only" } - , { input: "GPL-2.0+", output: "GPL-2.0-or-later" } - , { input: "GFDL-1.3+", output: "GFDL-1.3-or-later" } - , { input: "BSD-2-Clause-NetBSD", output: "BSD-2-Clause" } - , { input: "StandardML-NJ", output: "SMLNJ" } + [ { input: "GFDL-1.3", output: "GFDL-1.3" } + , { input: "Net-SNMP", output: "Net-SNMP" } ] for_ cases \{ input, output } -> - case License.canonicalizeDetected input of - Left err -> - Assert.fail $ "Expected canonicalization to succeed for " <> input <> ", but failed with: " <> err - Right canonicalized -> - Assert.shouldEqual output canonicalized - - Spec.it "Fails to canonicalize ambiguous deprecated IDs in detected output" do - for_ [ "BSD-2-Clause-FreeBSD", "GFDL-1.3", "Net-SNMP", "Nunit", "GPL-2.0-with-classpath-exception" ] \input -> - case License.canonicalizeDetected input of - Right canonicalized -> - Assert.fail $ "Expected canonicalization to fail for " <> input <> ", but got " <> canonicalized + case License.parse input of Left err -> - unless (String.contains (Pattern "unambiguous canonical replacement") err) do - Assert.fail $ "Expected ambiguous canonicalization error for " <> input <> ", but got: " <> err + Assert.fail $ "Expected parse to succeed for " <> input <> ", but failed with: " <> err + Right parsed -> + Assert.shouldEqual output (License.print parsed) Spec.it "Prints canonical SPDX expressions" do case License.parse "MIT AND (Apache-2.0 OR BSD-3-Clause)" of diff --git a/lib/test/Registry/Manifest.purs b/lib/test/Registry/Manifest.purs index fcbc072f7..172d1a390 100644 --- a/lib/test/Registry/Manifest.purs +++ b/lib/test/Registry/Manifest.purs @@ -2,8 +2,14 @@ module Test.Registry.Manifest (spec) where import Prelude +import Codec.JSON.DecodeError as CJ.DecodeError +import Data.Codec.JSON as CJ +import Data.Either (Either(..)) +import Data.Foldable (for_) import Data.String as String +import Data.String.Pattern (Pattern(..), Replacement(..)) import Data.Traversable (for) +import JSON as JSON import Node.Encoding (Encoding(..)) import Node.FS.Aff as FS.Aff import Node.Path as Path @@ -21,3 +27,39 @@ spec = do rawManifest <- FS.Aff.readTextFile UTF8 $ Path.concat [ manifestFixturesPath, path ] pure { label: path, value: String.trim rawManifest } Assert.shouldRoundTrip "Manifest" Manifest.codec fixtures + + Spec.it "Decodes historical manifests with deprecated SPDX identifiers" do + for_ historicalManifests \{ label, input } -> + case JSON.parse input of + Left err -> + Assert.fail $ "Failed to parse test JSON for " <> label <> ": " <> err + Right json -> + case CJ.decode Manifest.codec json of + Left err -> + Assert.fail $ "Failed to decode historical manifest for " <> label <> ": " <> CJ.DecodeError.print err + Right _ -> + pure unit + +historicalManifests :: Array { label :: String, input :: String } +historicalManifests = + [ { label: "AGPL-3.0", input: historicalManifest "jarilo" "1.0.1" "AGPL-3.0" } + , { label: "LGPL-3.0", input: historicalManifest "matrices" "5.0.0" "LGPL-3.0" } + , { label: "LGPL-3.0+", input: historicalManifest "test-unit" "17.0.0" "LGPL-3.0+" } + , { label: "GPL-3.0 AND MIT", input: historicalManifest "nano-id" "1.1.0" "GPL-3.0 AND MIT" } + , { label: "LGPL-2.1 AND LGPL-2.1-only", input: historicalManifest "bookhound" "0.1.1" "LGPL-2.1 AND LGPL-2.1-only" } + ] + +historicalManifest :: String -> String -> String -> String +historicalManifest name version license = + historicalManifestTemplate + # replace "__LICENSE__" license + # replace "__VERSION__" version + # replace "__NAME__" name + +historicalManifestTemplate :: String +historicalManifestTemplate = + """{"name":"__NAME__","version":"__VERSION__","license":"__LICENSE__","location":{"githubOwner":"purescript","githubRepo":"purescript-__NAME__"},"ref":"v__VERSION__","dependencies":{"prelude":">=6.0.0 <7.0.0"}}""" + +replace :: String -> String -> String -> String +replace pattern replacement = + String.replaceAll (Pattern pattern) (Replacement replacement) diff --git a/lib/test/Registry/ManifestIndex.purs b/lib/test/Registry/ManifestIndex.purs index 1fb7e13a6..bf1a1e16e 100644 --- a/lib/test/Registry/ManifestIndex.purs +++ b/lib/test/Registry/ManifestIndex.purs @@ -7,6 +7,7 @@ import Data.Array as Array import Data.Codec.JSON as CJ import Data.Codec.JSON.Record as CJ.Record import Data.Either (Either(..)) +import Data.Foldable (for_) import Data.Int as Int import Data.List as List import Data.Map (Map) @@ -18,6 +19,7 @@ import Data.Profunctor as Profunctor import Data.Set as Set import Data.Set.NonEmpty as NonEmptySet import Data.String as String +import Data.String.Pattern (Pattern(..), Replacement(..)) import Data.Tuple (Tuple(..)) import Data.Tuple.Nested ((/\)) import Effect.Exception (Error) @@ -45,6 +47,14 @@ spec = do let parsedContext = ManifestIndex.parseEntry contextEntry contextEntry `Assert.shouldEqualRight` map (ManifestIndex.printEntry <<< NonEmptySet.fromFoldable1) parsedContext + Spec.it "Parses historical manifest-index entries with deprecated SPDX identifiers" do + for_ historicalEntries \{ label, input, expected } -> do + case map (ManifestIndex.printEntry <<< NonEmptySet.fromFoldable1) (ManifestIndex.parseEntry input) of + Left err -> + Assert.fail $ "Failed to parse historical manifest-index entry for " <> label <> ": " <> err + Right printed -> + Assert.shouldEqual expected printed + Spec.it "Produces correct entry file paths" do let entries = @@ -156,6 +166,37 @@ contextEntry = {"name":"context","version":"0.0.3","license":"MIT","location":{"githubOwner":"Fresheyeball","githubRepo":"purescript-owner"},"ref":"v0.0.3","dependencies":{}} """ +historicalEntries :: Array { label :: String, input :: String, expected :: String } +historicalEntries = + [ historicalEntry "AGPL-3.0" "jarilo" "1.0.1" "AGPL-3.0" "AGPL-3.0-only" + , historicalEntry "LGPL-3.0" "matrices" "5.0.0" "LGPL-3.0" "LGPL-3.0-only" + , historicalEntry "LGPL-3.0+" "test-unit" "17.0.0" "LGPL-3.0+" "LGPL-3.0-or-later" + , historicalEntry "GPL-3.0 AND MIT" "nano-id" "1.1.0" "GPL-3.0 AND MIT" "GPL-3.0-only AND MIT" + , historicalEntry "LGPL-2.1 AND LGPL-2.1-only" "bookhound" "0.1.1" "LGPL-2.1 AND LGPL-2.1-only" "LGPL-2.1-only AND LGPL-2.1-only" + ] + +historicalEntry :: String -> String -> String -> String -> String -> { label :: String, input :: String, expected :: String } +historicalEntry label name version historical canonical = + { label + , input: historicalLine historical name version + , expected: historicalLine canonical name version + } + where + historicalLine license packageName packageVersion = + ( historicalManifestLineTemplate + # replace "__LICENSE__" license + # replace "__VERSION__" packageVersion + # replace "__NAME__" packageName + ) <> "\n" + +historicalManifestLineTemplate :: String +historicalManifestLineTemplate = + """{"name":"__NAME__","version":"__VERSION__","license":"__LICENSE__","location":{"githubOwner":"purescript","githubRepo":"purescript-__NAME__"},"ref":"v__VERSION__","dependencies":{"prelude":">=6.0.0 <7.0.0"}}""" + +replace :: String -> String -> String -> String +replace pattern replacement = + String.replaceAll (Pattern pattern) (Replacement replacement) + testIndex :: forall m . MonadThrow Error m From 0f2d20acfaeb92a2b02baf4abc543bf445ce43f3 Mon Sep 17 00:00:00 2001 From: Thomas Honeyman Date: Mon, 16 Mar 2026 10:17:39 -0400 Subject: [PATCH 4/5] de-duplicate tests, add 2 missing license mappings --- lib/src/License.purs | 2 ++ lib/test/Registry/License.purs | 4 +++ lib/test/Registry/Manifest.purs | 39 ++++++++++++++++---------- lib/test/Registry/ManifestIndex.purs | 41 ---------------------------- 4 files changed, 30 insertions(+), 56 deletions(-) diff --git a/lib/src/License.purs b/lib/src/License.purs index d929cfc91..35d90e3b8 100644 --- a/lib/src/License.purs +++ b/lib/src/License.purs @@ -245,6 +245,7 @@ deprecatedIdentifierReplacements = Map.fromFoldable [ Tuple "BSD-2-Clause-NetBSD" { identifier: "BSD-2-Clause", exception: Nothing } , Tuple "StandardML-NJ" { identifier: "SMLNJ", exception: Nothing } , Tuple "bzip2-1.0.5" { identifier: "bzip2-1.0.6", exception: Nothing } + , Tuple "eCos-2.0" { identifier: "GPL-2.0-or-later", exception: Just "eCos-exception-2.0" } , Tuple "GPL-2.0-with-GCC-exception" { identifier: "GPL-2.0-only", exception: Just "GCC-exception-2.0" } , Tuple "GPL-2.0-with-autoconf-exception" { identifier: "GPL-2.0-only", exception: Just "Autoconf-exception-2.0" } , Tuple "GPL-2.0-with-bison-exception" { identifier: "GPL-2.0-only", exception: Just "Bison-exception-2.2" } @@ -252,6 +253,7 @@ deprecatedIdentifierReplacements = Map.fromFoldable , Tuple "GPL-2.0-with-font-exception" { identifier: "GPL-2.0-only", exception: Just "Font-exception-2.0" } , Tuple "GPL-3.0-with-GCC-exception" { identifier: "GPL-3.0-only", exception: Just "GCC-exception-3.1" } , Tuple "GPL-3.0-with-autoconf-exception" { identifier: "GPL-3.0-only", exception: Just "Autoconf-exception-3.0" } + , Tuple "wxWindows" { identifier: "LGPL-2.0-or-later", exception: Just "WxWindows-exception-3.1" } ] ambiguousDeprecatedIdentifiers :: Set.Set String diff --git a/lib/test/Registry/License.purs b/lib/test/Registry/License.purs index a789f3f38..bfbb9464a 100644 --- a/lib/test/Registry/License.purs +++ b/lib/test/Registry/License.purs @@ -35,6 +35,7 @@ spec = do cases = [ { input: "AGPL-3.0", output: "AGPL-3.0-only" } , { input: "AGPL-3.0+", output: "AGPL-3.0-or-later" } + , { input: "eCos-2.0", output: "GPL-2.0-or-later WITH eCos-exception-2.0" } , { input: "LGPL-2.1", output: "LGPL-2.1-only" } , { input: "LGPL-3.0", output: "LGPL-3.0-only" } , { input: "LGPL-3.0+", output: "LGPL-3.0-or-later" } @@ -46,6 +47,7 @@ spec = do , { input: "GFDL-1.3+", output: "GFDL-1.3-or-later" } , { input: "BSD-2-Clause-NetBSD", output: "BSD-2-Clause" } , { input: "StandardML-NJ", output: "SMLNJ" } + , { input: "wxWindows", output: "LGPL-2.0-or-later WITH WxWindows-exception-3.1" } ] for_ cases \{ input, output } -> @@ -60,6 +62,7 @@ spec = do rejected = [ { input: "AGPL-3.0", expectedError: "AGPL-3.0-only" } , { input: "AGPL-3.0+", expectedError: "AGPL-3.0-or-later" } + , { input: "eCos-2.0", expectedError: "GPL-2.0-or-later WITH eCos-exception-2.0" } , { input: "LGPL-2.1", expectedError: "LGPL-2.1-only" } , { input: "LGPL-3.0", expectedError: "LGPL-3.0-only" } , { input: "LGPL-3.0+", expectedError: "LGPL-3.0-or-later" } @@ -70,6 +73,7 @@ spec = do , { input: "LGPL-2.1 AND LGPL-2.1-only", expectedError: "LGPL-2.1-only" } , { input: "GFDL-1.3", expectedError: "unambiguous canonical replacement" } , { input: "GFDL-1.3+", expectedError: "GFDL-1.3-or-later" } + , { input: "wxWindows", expectedError: "LGPL-2.0-or-later WITH WxWindows-exception-3.1" } ] for_ rejected \{ input, expectedError } -> diff --git a/lib/test/Registry/Manifest.purs b/lib/test/Registry/Manifest.purs index 172d1a390..ff3b6fdfd 100644 --- a/lib/test/Registry/Manifest.purs +++ b/lib/test/Registry/Manifest.purs @@ -28,8 +28,8 @@ spec = do pure { label: path, value: String.trim rawManifest } Assert.shouldRoundTrip "Manifest" Manifest.codec fixtures - Spec.it "Decodes historical manifests with deprecated SPDX identifiers" do - for_ historicalManifests \{ label, input } -> + Spec.it "Decodes and canonicalizes historical manifests with deprecated SPDX identifiers" do + for_ historicalManifests \{ label, input, expected } -> case JSON.parse input of Left err -> Assert.fail $ "Failed to parse test JSON for " <> label <> ": " <> err @@ -37,27 +37,36 @@ spec = do case CJ.decode Manifest.codec json of Left err -> Assert.fail $ "Failed to decode historical manifest for " <> label <> ": " <> CJ.DecodeError.print err - Right _ -> - pure unit + Right manifest -> + Assert.shouldEqual expected (JSON.print $ CJ.encode Manifest.codec manifest) -historicalManifests :: Array { label :: String, input :: String } +historicalManifests :: Array { label :: String, input :: String, expected :: String } historicalManifests = - [ { label: "AGPL-3.0", input: historicalManifest "jarilo" "1.0.1" "AGPL-3.0" } - , { label: "LGPL-3.0", input: historicalManifest "matrices" "5.0.0" "LGPL-3.0" } - , { label: "LGPL-3.0+", input: historicalManifest "test-unit" "17.0.0" "LGPL-3.0+" } - , { label: "GPL-3.0 AND MIT", input: historicalManifest "nano-id" "1.1.0" "GPL-3.0 AND MIT" } - , { label: "LGPL-2.1 AND LGPL-2.1-only", input: historicalManifest "bookhound" "0.1.1" "LGPL-2.1 AND LGPL-2.1-only" } + [ historicalManifest "AGPL-3.0" "jarilo" "1.0.1" "AGPL-3.0" "AGPL-3.0-only" + , historicalManifest "eCos-2.0" "ecos" "1.2.3" "eCos-2.0" "GPL-2.0-or-later WITH eCos-exception-2.0" + , historicalManifest "LGPL-3.0" "matrices" "5.0.0" "LGPL-3.0" "LGPL-3.0-only" + , historicalManifest "LGPL-3.0+" "test-unit" "17.0.0" "LGPL-3.0+" "LGPL-3.0-or-later" + , historicalManifest "GPL-3.0 AND MIT" "nano-id" "1.1.0" "GPL-3.0 AND MIT" "GPL-3.0-only AND MIT" + , historicalManifest "LGPL-2.1 AND LGPL-2.1-only" "bookhound" "0.1.1" "LGPL-2.1 AND LGPL-2.1-only" "LGPL-2.1-only AND LGPL-2.1-only" + , historicalManifest "wxWindows" "wx" "0.9.0" "wxWindows" "LGPL-2.0-or-later WITH WxWindows-exception-3.1" ] -historicalManifest :: String -> String -> String -> String -historicalManifest name version license = - historicalManifestTemplate +historicalManifest :: String -> String -> String -> String -> String -> { label :: String, input :: String, expected :: String } +historicalManifest label name version historical canonical = + { label + , input: manifestJson historical name version + , expected: manifestJson canonical name version + } + +manifestJson :: String -> String -> String -> String +manifestJson license name version = + manifestTemplate # replace "__LICENSE__" license # replace "__VERSION__" version # replace "__NAME__" name -historicalManifestTemplate :: String -historicalManifestTemplate = +manifestTemplate :: String +manifestTemplate = """{"name":"__NAME__","version":"__VERSION__","license":"__LICENSE__","location":{"githubOwner":"purescript","githubRepo":"purescript-__NAME__"},"ref":"v__VERSION__","dependencies":{"prelude":">=6.0.0 <7.0.0"}}""" replace :: String -> String -> String -> String diff --git a/lib/test/Registry/ManifestIndex.purs b/lib/test/Registry/ManifestIndex.purs index bf1a1e16e..1fb7e13a6 100644 --- a/lib/test/Registry/ManifestIndex.purs +++ b/lib/test/Registry/ManifestIndex.purs @@ -7,7 +7,6 @@ import Data.Array as Array import Data.Codec.JSON as CJ import Data.Codec.JSON.Record as CJ.Record import Data.Either (Either(..)) -import Data.Foldable (for_) import Data.Int as Int import Data.List as List import Data.Map (Map) @@ -19,7 +18,6 @@ import Data.Profunctor as Profunctor import Data.Set as Set import Data.Set.NonEmpty as NonEmptySet import Data.String as String -import Data.String.Pattern (Pattern(..), Replacement(..)) import Data.Tuple (Tuple(..)) import Data.Tuple.Nested ((/\)) import Effect.Exception (Error) @@ -47,14 +45,6 @@ spec = do let parsedContext = ManifestIndex.parseEntry contextEntry contextEntry `Assert.shouldEqualRight` map (ManifestIndex.printEntry <<< NonEmptySet.fromFoldable1) parsedContext - Spec.it "Parses historical manifest-index entries with deprecated SPDX identifiers" do - for_ historicalEntries \{ label, input, expected } -> do - case map (ManifestIndex.printEntry <<< NonEmptySet.fromFoldable1) (ManifestIndex.parseEntry input) of - Left err -> - Assert.fail $ "Failed to parse historical manifest-index entry for " <> label <> ": " <> err - Right printed -> - Assert.shouldEqual expected printed - Spec.it "Produces correct entry file paths" do let entries = @@ -166,37 +156,6 @@ contextEntry = {"name":"context","version":"0.0.3","license":"MIT","location":{"githubOwner":"Fresheyeball","githubRepo":"purescript-owner"},"ref":"v0.0.3","dependencies":{}} """ -historicalEntries :: Array { label :: String, input :: String, expected :: String } -historicalEntries = - [ historicalEntry "AGPL-3.0" "jarilo" "1.0.1" "AGPL-3.0" "AGPL-3.0-only" - , historicalEntry "LGPL-3.0" "matrices" "5.0.0" "LGPL-3.0" "LGPL-3.0-only" - , historicalEntry "LGPL-3.0+" "test-unit" "17.0.0" "LGPL-3.0+" "LGPL-3.0-or-later" - , historicalEntry "GPL-3.0 AND MIT" "nano-id" "1.1.0" "GPL-3.0 AND MIT" "GPL-3.0-only AND MIT" - , historicalEntry "LGPL-2.1 AND LGPL-2.1-only" "bookhound" "0.1.1" "LGPL-2.1 AND LGPL-2.1-only" "LGPL-2.1-only AND LGPL-2.1-only" - ] - -historicalEntry :: String -> String -> String -> String -> String -> { label :: String, input :: String, expected :: String } -historicalEntry label name version historical canonical = - { label - , input: historicalLine historical name version - , expected: historicalLine canonical name version - } - where - historicalLine license packageName packageVersion = - ( historicalManifestLineTemplate - # replace "__LICENSE__" license - # replace "__VERSION__" packageVersion - # replace "__NAME__" packageName - ) <> "\n" - -historicalManifestLineTemplate :: String -historicalManifestLineTemplate = - """{"name":"__NAME__","version":"__VERSION__","license":"__LICENSE__","location":{"githubOwner":"purescript","githubRepo":"purescript-__NAME__"},"ref":"v__VERSION__","dependencies":{"prelude":">=6.0.0 <7.0.0"}}""" - -replace :: String -> String -> String -> String -replace pattern replacement = - String.replaceAll (Pattern pattern) (Replacement replacement) - testIndex :: forall m . MonadThrow Error m From 887b0d612ba8f6bcd0c699a2e6a4675ed1288b67 Mon Sep 17 00:00:00 2001 From: Thomas Honeyman Date: Mon, 16 Mar 2026 10:47:22 -0400 Subject: [PATCH 5/5] move comment --- app/src/App/Manifest/SpagoYaml.purs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/src/App/Manifest/SpagoYaml.purs b/app/src/App/Manifest/SpagoYaml.purs index 03712e435..1a82bc6fc 100644 --- a/app/src/App/Manifest/SpagoYaml.purs +++ b/app/src/App/Manifest/SpagoYaml.purs @@ -88,10 +88,9 @@ type PublishConfig = } publishConfigCodec :: CJ.Codec PublishConfig --- | Publish metadata is authored input, so it must use canonical SPDX --- | identifiers even though stored manifests remain backward-compatible. publishConfigCodec = CJ.named "PublishConfig" $ CJ.Record.object { version: Version.codec + -- Publish metadata is authored input so it must use canonical SPDX identifiers , license: canonicalLicenseCodec , location: CJ.Record.optional Location.codec , include: CJ.Record.optional (CJ.array CJ.string)