Files
openshift-mcp-server/vendor/github.com/google/jsonschema-go/jsonschema/resolve.go
2025-09-25 16:40:49 +03:00

549 lines
18 KiB
Go

// Copyright 2025 The JSON Schema Go Project Authors. All rights reserved.
// Use of this source code is governed by an MIT-style
// license that can be found in the LICENSE file.
// This file deals with preparing a schema for validation, including various checks,
// optimizations, and the resolution of cross-schema references.
package jsonschema
import (
"errors"
"fmt"
"net/url"
"reflect"
"regexp"
"strings"
)
// A Resolved consists of a [Schema] along with associated information needed to
// validate documents against it.
// A Resolved has been validated against its meta-schema, and all its references
// (the $ref and $dynamicRef keywords) have been resolved to their referenced Schemas.
// Call [Schema.Resolve] to obtain a Resolved from a Schema.
type Resolved struct {
root *Schema
// map from $ids to their schemas
resolvedURIs map[string]*Schema
// map from schemas to additional info computed during resolution
resolvedInfos map[*Schema]*resolvedInfo
}
func newResolved(s *Schema) *Resolved {
return &Resolved{
root: s,
resolvedURIs: map[string]*Schema{},
resolvedInfos: map[*Schema]*resolvedInfo{},
}
}
// resolvedInfo holds information specific to a schema that is computed by [Schema.Resolve].
type resolvedInfo struct {
s *Schema
// The JSON Pointer path from the root schema to here.
// Used in errors.
path string
// The schema's base schema.
// If the schema is the root or has an ID, its base is itself.
// Otherwise, its base is the innermost enclosing schema whose base
// is itself.
// Intuitively, a base schema is one that can be referred to with a
// fragmentless URI.
base *Schema
// The URI for the schema, if it is the root or has an ID.
// Otherwise nil.
// Invariants:
// s.base.uri != nil.
// s.base == s <=> s.uri != nil
uri *url.URL
// The schema to which Ref refers.
resolvedRef *Schema
// If the schema has a dynamic ref, exactly one of the next two fields
// will be non-zero after successful resolution.
// The schema to which the dynamic ref refers when it acts lexically.
resolvedDynamicRef *Schema
// The anchor to look up on the stack when the dynamic ref acts dynamically.
dynamicRefAnchor string
// The following fields are independent of arguments to Schema.Resolved,
// so they could live on the Schema. We put them here for simplicity.
// The set of required properties.
isRequired map[string]bool
// Compiled regexps.
pattern *regexp.Regexp
patternProperties map[*regexp.Regexp]*Schema
// Map from anchors to subschemas.
anchors map[string]anchorInfo
}
// Schema returns the schema that was resolved.
// It must not be modified.
func (r *Resolved) Schema() *Schema { return r.root }
// schemaString returns a short string describing the schema.
func (r *Resolved) schemaString(s *Schema) string {
if s.ID != "" {
return s.ID
}
info := r.resolvedInfos[s]
if info.path != "" {
return info.path
}
return "<anonymous schema>"
}
// A Loader reads and unmarshals the schema at uri, if any.
type Loader func(uri *url.URL) (*Schema, error)
// ResolveOptions are options for [Schema.Resolve].
type ResolveOptions struct {
// BaseURI is the URI relative to which the root schema should be resolved.
// If non-empty, must be an absolute URI (one that starts with a scheme).
// It is resolved (in the URI sense; see [url.ResolveReference]) with root's
// $id property.
// If the resulting URI is not absolute, then the schema cannot contain
// relative URI references.
BaseURI string
// Loader loads schemas that are referred to by a $ref but are not under the
// root schema (remote references).
// If nil, resolving a remote reference will return an error.
Loader Loader
// ValidateDefaults determines whether to validate values of "default" keywords
// against their schemas.
// The [JSON Schema specification] does not require this, but it is recommended
// if defaults will be used.
//
// [JSON Schema specification]: https://json-schema.org/understanding-json-schema/reference/annotations
ValidateDefaults bool
}
// Resolve resolves all references within the schema and performs other tasks that
// prepare the schema for validation.
// If opts is nil, the default values are used.
// The schema must not be changed after Resolve is called.
// The same schema may be resolved multiple times.
func (root *Schema) Resolve(opts *ResolveOptions) (*Resolved, error) {
// There are up to five steps required to prepare a schema to validate.
// 1. Load: read the schema from somewhere and unmarshal it.
// This schema (root) may have been loaded or created in memory, but other schemas that
// come into the picture in step 4 will be loaded by the given loader.
// 2. Check: validate the schema against a meta-schema, and perform other well-formedness checks.
// Precompute some values along the way.
// 3. Resolve URIs: determine the base URI of the root and all its subschemas, and
// resolve (in the URI sense) all identifiers and anchors with their bases. This step results
// in a map from URIs to schemas within root.
// 4. Resolve references: all refs in the schemas are replaced with the schema they refer to.
// 5. (Optional.) If opts.ValidateDefaults is true, validate the defaults.
r := &resolver{loaded: map[string]*Resolved{}}
if opts != nil {
r.opts = *opts
}
var base *url.URL
if r.opts.BaseURI == "" {
base = &url.URL{} // so we can call ResolveReference on it
} else {
var err error
base, err = url.Parse(r.opts.BaseURI)
if err != nil {
return nil, fmt.Errorf("parsing base URI: %w", err)
}
}
if r.opts.Loader == nil {
r.opts.Loader = func(uri *url.URL) (*Schema, error) {
return nil, errors.New("cannot resolve remote schemas: no loader passed to Schema.Resolve")
}
}
resolved, err := r.resolve(root, base)
if err != nil {
return nil, err
}
if r.opts.ValidateDefaults {
if err := resolved.validateDefaults(); err != nil {
return nil, err
}
}
// TODO: before we return, throw away anything we don't need for validation.
return resolved, nil
}
// A resolver holds the state for resolution.
type resolver struct {
opts ResolveOptions
// A cache of loaded and partly resolved schemas. (They may not have had their
// refs resolved.) The cache ensures that the loader will never be called more
// than once with the same URI, and that reference cycles are handled properly.
loaded map[string]*Resolved
}
func (r *resolver) resolve(s *Schema, baseURI *url.URL) (*Resolved, error) {
if baseURI.Fragment != "" {
return nil, fmt.Errorf("base URI %s must not have a fragment", baseURI)
}
rs := newResolved(s)
if err := s.check(rs.resolvedInfos); err != nil {
return nil, err
}
if err := resolveURIs(rs, baseURI); err != nil {
return nil, err
}
// Remember the schema by both the URI we loaded it from and its canonical name,
// which may differ if the schema has an $id.
// We must set the map before calling resolveRefs, or ref cycles will cause unbounded recursion.
r.loaded[baseURI.String()] = rs
r.loaded[rs.resolvedInfos[s].uri.String()] = rs
if err := r.resolveRefs(rs); err != nil {
return nil, err
}
return rs, nil
}
func (root *Schema) check(infos map[*Schema]*resolvedInfo) error {
// Check for structural validity. Do this first and fail fast:
// bad structure will cause other code to panic.
if err := root.checkStructure(infos); err != nil {
return err
}
var errs []error
report := func(err error) { errs = append(errs, err) }
for ss := range root.all() {
ss.checkLocal(report, infos)
}
return errors.Join(errs...)
}
// checkStructure verifies that root and its subschemas form a tree.
// It also assigns each schema a unique path, to improve error messages.
func (root *Schema) checkStructure(infos map[*Schema]*resolvedInfo) error {
assert(len(infos) == 0, "non-empty infos")
var check func(reflect.Value, []byte) error
check = func(v reflect.Value, path []byte) error {
// For the purpose of error messages, the root schema has path "root"
// and other schemas' paths are their JSON Pointer from the root.
p := "root"
if len(path) > 0 {
p = string(path)
}
s := v.Interface().(*Schema)
if s == nil {
return fmt.Errorf("jsonschema: schema at %s is nil", p)
}
if info, ok := infos[s]; ok {
// We've seen s before.
// The schema graph at root is not a tree, but it needs to
// be because a schema's base must be unique.
// A cycle would also put Schema.all into an infinite recursion.
return fmt.Errorf("jsonschema: schemas at %s do not form a tree; %s appears more than once (also at %s)",
root, info.path, p)
}
infos[s] = &resolvedInfo{s: s, path: p}
for _, info := range schemaFieldInfos {
fv := v.Elem().FieldByIndex(info.sf.Index)
switch info.sf.Type {
case schemaType:
// A field that contains an individual schema.
// A nil is valid: it just means the field isn't present.
if !fv.IsNil() {
if err := check(fv, fmt.Appendf(path, "/%s", info.jsonName)); err != nil {
return err
}
}
case schemaSliceType:
for i := range fv.Len() {
if err := check(fv.Index(i), fmt.Appendf(path, "/%s/%d", info.jsonName, i)); err != nil {
return err
}
}
case schemaMapType:
iter := fv.MapRange()
for iter.Next() {
key := escapeJSONPointerSegment(iter.Key().String())
if err := check(iter.Value(), fmt.Appendf(path, "/%s/%s", info.jsonName, key)); err != nil {
return err
}
}
}
}
return nil
}
return check(reflect.ValueOf(root), make([]byte, 0, 256))
}
// checkLocal checks s for validity, independently of other schemas it may refer to.
// Since checking a regexp involves compiling it, checkLocal saves those compiled regexps
// in the schema for later use.
// It appends the errors it finds to errs.
func (s *Schema) checkLocal(report func(error), infos map[*Schema]*resolvedInfo) {
addf := func(format string, args ...any) {
msg := fmt.Sprintf(format, args...)
report(fmt.Errorf("jsonschema.Schema: %s: %s", s, msg))
}
if s == nil {
addf("nil subschema")
return
}
if err := s.basicChecks(); err != nil {
report(err)
return
}
// TODO: validate the schema's properties,
// ideally by jsonschema-validating it against the meta-schema.
// Some properties are present so that Schemas can round-trip, but we do not
// validate them.
// Currently, it's just the $vocabulary property.
// As a special case, we can validate the 2020-12 meta-schema.
if s.Vocabulary != nil && s.Schema != draft202012 {
addf("cannot validate a schema with $vocabulary")
}
info := infos[s]
// Check and compile regexps.
if s.Pattern != "" {
re, err := regexp.Compile(s.Pattern)
if err != nil {
addf("pattern: %v", err)
} else {
info.pattern = re
}
}
if len(s.PatternProperties) > 0 {
info.patternProperties = map[*regexp.Regexp]*Schema{}
for reString, subschema := range s.PatternProperties {
re, err := regexp.Compile(reString)
if err != nil {
addf("patternProperties[%q]: %v", reString, err)
continue
}
info.patternProperties[re] = subschema
}
}
// Build a set of required properties, to avoid quadratic behavior when validating
// a struct.
if len(s.Required) > 0 {
info.isRequired = map[string]bool{}
for _, r := range s.Required {
info.isRequired[r] = true
}
}
}
// resolveURIs resolves the ids and anchors in all the schemas of root, relative
// to baseURI.
// See https://json-schema.org/draft/2020-12/json-schema-core#section-8.2, section
// 8.2.1.
//
// Every schema has a base URI and a parent base URI.
//
// The parent base URI is the base URI of the lexically enclosing schema, or for
// a root schema, the URI it was loaded from or the one supplied to [Schema.Resolve].
//
// If the schema has no $id property, the base URI of a schema is that of its parent.
// If the schema does have an $id, it must be a URI, possibly relative. The schema's
// base URI is the $id resolved (in the sense of [url.URL.ResolveReference]) against
// the parent base.
//
// As an example, consider this schema loaded from http://a.com/root.json (quotes omitted):
//
// {
// allOf: [
// {$id: "sub1.json", minLength: 5},
// {$id: "http://b.com", minimum: 10},
// {not: {maximum: 20}}
// ]
// }
//
// The base URIs are as follows. Schema locations are expressed in the JSON Pointer notation.
//
// schema base URI
// root http://a.com/root.json
// allOf/0 http://a.com/sub1.json
// allOf/1 http://b.com (absolute $id; doesn't matter that it's not under the loaded URI)
// allOf/2 http://a.com/root.json (inherited from parent)
// allOf/2/not http://a.com/root.json (inherited from parent)
func resolveURIs(rs *Resolved, baseURI *url.URL) error {
var resolve func(s, base *Schema) error
resolve = func(s, base *Schema) error {
info := rs.resolvedInfos[s]
baseInfo := rs.resolvedInfos[base]
// ids are scoped to the root.
if s.ID != "" {
// A non-empty ID establishes a new base.
idURI, err := url.Parse(s.ID)
if err != nil {
return err
}
if idURI.Fragment != "" {
return fmt.Errorf("$id %s must not have a fragment", s.ID)
}
// The base URI for this schema is its $id resolved against the parent base.
info.uri = baseInfo.uri.ResolveReference(idURI)
if !info.uri.IsAbs() {
return fmt.Errorf("$id %s does not resolve to an absolute URI (base is %q)", s.ID, baseInfo.uri)
}
rs.resolvedURIs[info.uri.String()] = s
base = s // needed for anchors
baseInfo = rs.resolvedInfos[base]
}
info.base = base
// Anchors and dynamic anchors are URI fragments that are scoped to their base.
// We treat them as keys in a map stored within the schema.
setAnchor := func(anchor string, dynamic bool) error {
if anchor != "" {
if _, ok := baseInfo.anchors[anchor]; ok {
return fmt.Errorf("duplicate anchor %q in %s", anchor, baseInfo.uri)
}
if baseInfo.anchors == nil {
baseInfo.anchors = map[string]anchorInfo{}
}
baseInfo.anchors[anchor] = anchorInfo{s, dynamic}
}
return nil
}
setAnchor(s.Anchor, false)
setAnchor(s.DynamicAnchor, true)
for c := range s.children() {
if err := resolve(c, base); err != nil {
return err
}
}
return nil
}
// Set the root URI to the base for now. If the root has an $id, this will change.
rs.resolvedInfos[rs.root].uri = baseURI
// The original base, even if changed, is still a valid way to refer to the root.
rs.resolvedURIs[baseURI.String()] = rs.root
return resolve(rs.root, rs.root)
}
// resolveRefs replaces every ref in the schemas with the schema it refers to.
// A reference that doesn't resolve within the schema may refer to some other schema
// that needs to be loaded.
func (r *resolver) resolveRefs(rs *Resolved) error {
for s := range rs.root.all() {
info := rs.resolvedInfos[s]
if s.Ref != "" {
refSchema, _, err := r.resolveRef(rs, s, s.Ref)
if err != nil {
return err
}
// Whether or not the anchor referred to by $ref fragment is dynamic,
// the ref still treats it lexically.
info.resolvedRef = refSchema
}
if s.DynamicRef != "" {
refSchema, frag, err := r.resolveRef(rs, s, s.DynamicRef)
if err != nil {
return err
}
if frag != "" {
// The dynamic ref's fragment points to a dynamic anchor.
// We must resolve the fragment at validation time.
info.dynamicRefAnchor = frag
} else {
// There is no dynamic anchor in the lexically referenced schema,
// so the dynamic ref behaves like a lexical ref.
info.resolvedDynamicRef = refSchema
}
}
}
return nil
}
// resolveRef resolves the reference ref, which is either s.Ref or s.DynamicRef.
func (r *resolver) resolveRef(rs *Resolved, s *Schema, ref string) (_ *Schema, dynamicFragment string, err error) {
refURI, err := url.Parse(ref)
if err != nil {
return nil, "", err
}
// URI-resolve the ref against the current base URI to get a complete URI.
base := rs.resolvedInfos[s].base
refURI = rs.resolvedInfos[base].uri.ResolveReference(refURI)
// The non-fragment part of a ref URI refers to the base URI of some schema.
// This part is the same for dynamic refs too: their non-fragment part resolves
// lexically.
u := *refURI
u.Fragment = ""
fraglessRefURI := &u
// Look it up locally.
referencedSchema := rs.resolvedURIs[fraglessRefURI.String()]
if referencedSchema == nil {
// The schema is remote. Maybe we've already loaded it.
// We assume that the non-fragment part of refURI refers to a top-level schema
// document. That is, we don't support the case exemplified by
// http://foo.com/bar.json/baz, where the document is in bar.json and
// the reference points to a subschema within it.
// TODO: support that case.
if lrs := r.loaded[fraglessRefURI.String()]; lrs != nil {
referencedSchema = lrs.root
} else {
// Try to load the schema.
ls, err := r.opts.Loader(fraglessRefURI)
if err != nil {
return nil, "", fmt.Errorf("loading %s: %w", fraglessRefURI, err)
}
lrs, err := r.resolve(ls, fraglessRefURI)
if err != nil {
return nil, "", err
}
referencedSchema = lrs.root
assert(referencedSchema != nil, "nil referenced schema")
// Copy the resolvedInfos from lrs into rs, without overwriting
// (hence we can't use maps.Insert).
for s, i := range lrs.resolvedInfos {
if rs.resolvedInfos[s] == nil {
rs.resolvedInfos[s] = i
}
}
}
}
frag := refURI.Fragment
// Look up frag in refSchema.
// frag is either a JSON Pointer or the name of an anchor.
// A JSON Pointer is either the empty string or begins with a '/',
// whereas anchors are always non-empty strings that don't contain slashes.
if frag != "" && !strings.HasPrefix(frag, "/") {
resInfo := rs.resolvedInfos[referencedSchema]
info, found := resInfo.anchors[frag]
if !found {
return nil, "", fmt.Errorf("no anchor %q in %s", frag, s)
}
if info.dynamic {
dynamicFragment = frag
}
return info.schema, dynamicFragment, nil
}
// frag is a JSON Pointer.
s, err = dereferenceJSONPointer(referencedSchema, frag)
return s, "", err
}