Prismatic schema: removing unanticipated keys

1.6k Views Asked by At

My API is receiving some JSON data from the client.

I would like to use Schema to perform validation and coercion on the data I receive, but with one additional requirement: if there is any map key that is not described in the schema, ignore and remove it instead of failing the validation (this is because my client may send me some "garbage" properties along with the ones I care about. I want to be tolerant to that.).

So in a nutshell, I would like to perform a "deep select-keys" on my input data using my schema, before validation/coercion.

Example of what I need:

(require '[schema.core :as sc])
(def MySchema {:a sc/Int
               :b {:c sc/Str
                   (sc/optional-key :d) sc/Bool}
               :e [{:f sc/Inst}]})

(sanitize-and-validate
  MySchema
  {:a 2
   :b {:c "hello"
       :$$garbage-key 32}
   :e [{:f #inst "2015-07-23T12:29:51.822-00:00" :garbage-key 42}]
   :_garbage-key1 "woot"})
=> {:a 2
    :b {:c "hello"}
    :e [{:f #inst "2015-07-23T12:29:51.822-00:00"}]}

I haven't yet found a reliable way of doing this:

  1. I can't seem to do it in a custom transformation, because it seems a walker does not give you access to the keys.
  2. I haven't had any luck trying to walk the schema by hand, because it's hard to differentiate map schemas and scalar schemas in a generic way; also difficult to account for all the possible shapes a schema can have.

Is there an obvious way I'm not seeing?

Thanks!

4

There are 4 best solutions below

3
On BEST ANSWER

A third solution, credits to abp: use schema.coerce/coercer with a matcher that will remove unknown keys from maps.

(require '[schema.core :as s])
(require '[schema.coerce :as coerce])
(require '[schema.utils :as utils])

(defn filter-schema-keys
  [m schema-keys extra-keys-walker]
  (reduce-kv (fn [m k v]
               (if (or (contains? schema-keys k)
                       (and extra-keys-walker
                            (not (utils/error? (extra-keys-walker k)))))
                 m
                 (dissoc m k)))
             m
             m))

(defn map-filter-matcher
  [s]
  (when (or (instance? clojure.lang.PersistentArrayMap s)
            (instance? clojure.lang.PersistentHashMap s))
    (let [extra-keys-schema (#'s/find-extra-keys-schema s)
          extra-keys-walker (when extra-keys-schema (s/walker extra-keys-schema))
          explicit-keys (some->> (dissoc s extra-keys-schema)
                                 keys
                                 (mapv s/explicit-schema-key)
                                 (into #{}))]
      (when (or extra-keys-walker (seq explicit-keys))
        (fn [x]
          (if (map? x)
            (filter-schema-keys x explicit-keys extra-keys-walker)
            x))))))

This was described as the cleanest solution by the primary author of Schema, as is it does not require any change to the schema itself to work. So it's probably the way to go.

Usage example:

(def data {:a 2
           :b {:c "hello"
               :$$garbage-key 32}
           :e [{:f #inst "2015-07-23T12:29:51.822-00:00" :garbage-key 42}]
           :_garbage-key1 "woot"})
((coerce/coercer MySchema map-filter-matcher) data)
;=> {:a 2, :b {:c "hello"}, :e [{:f #inst "2015-07-23T12:29:51.822-00:00"}]}
1
On

There is a schema tool for this called "select-schema". See https://github.com/metosin/schema-tools#select-schema

From the page:

Select Schema

Filtering out illegal schema keys (using coercion):

(st/select-schema {:street "Keskustori 8"
                   :city "Tampere"
                   :description "Metosin HQ" ; disallowed-key
                   :country {:weather "-18" ; disallowed-key
                             :name "Finland"}}
                  Address)
; {:city "Tampere", :street "Keskustori 8", :country {:name "Finland"}}

Filtering out illegal schema map keys using coercion with additional Json-coercion - in a single sweep:

(s/defschema Beer {:beer (s/enum :ipa :apa)})

(def ipa {:beer "ipa" :taste "good"})

(st/select-schema ipa Beer)
; clojure.lang.ExceptionInfo: Could not coerce value to schema: {:beer (not (#{:ipa :apa} "ipa"))}
;     data: {:type :schema.core/error,
;            :schema {:beer {:vs #{:ipa :apa}}},
;            :value {:beer "ipa", :taste "good"},
;            :error {:beer (not (#{:ipa :apa} "ipa"))}}

(require '[schema.coerce :as sc])

(st/select-schema ipa Beer sc/json-coercion-matcher)
; {:beer :ipa}
7
On

From the Schema README:

For the special case of keywords, you can omit the required-key, like {:foo s/Str :bar s/Keyword}. You can also provide specific optional keys, and combine specific keys with generic schemas for the remaining key-value mappings:

(def FancyMap
  "If foo is present, it must map to a Keyword.  Any number of additional
   String-String mappings are allowed as well."
  {(s/optional-key :foo) s/Keyword
    s/Str s/Str})

(s/validate FancyMap {"a" "b"})

(s/validate FancyMap {:foo :f "c" "d" "e" "f"})

So apart from your specific keys (which can be s/optional-key like in the example, or s/required-key like seems to be your need) you can have additional "relaxed" keys, something like:

(def MySchema {:a sc/Int
               :b {:c sc/Str
                   (sc/optional-key :d) sc/Bool
                   s/Any s/Any}
               :e [{:f sc/Inst}]})

EDIT: Found a "hacky" way to do this by adding a :garbage metadata and discarding those entries in the walker:

(def Myschema {:a s/Int
               :b {:c s/Str
                   (s/optional-key :d) s/Bool
                   (with-meta s/Any {:garbage true}) s/Any}
               :e [{:f s/Inst}]
               (with-meta s/Any {:garbage true}) s/Any})

(defn garbage? [s]
  (and (associative? s)
       (:garbage (meta (:kspec s)))))

(defn discard-garbage [schema]
  (s/start-walker
    (fn [s]
      (let [walk (s/walker s)]
        (fn [x]
          (let [result (walk x)]
            (if (garbage? s)
              (do (println "found garbage" x)
                  nil)
              result)))))
    schema))

((discard-garbage Myschema) data)
;=> :a 2, :b {:c "hello"}, :e [{:f #inst "2015-07-23T12:29:51.822-00:00"}]}
0
On

Here's another approach (code below):

  1. Define a custom Garbage schema types, to be matched against properties you want removed; if you want all unknown properties removed, you can use schema.core/Any as a key in your schema (credits to Colin Yates for telling me about this).
  2. As a coercion step, 'flag' all the values to be removed by coercing them to an instance of the garbage type.
  3. Traverse the data structure to strip off all the flags.

This has the advantage of making little assumptions on the internals of Schema (still in alpha at the time of writing), and at least 2 drawbacks:

  1. Assumes the data is a combination of Clojure maps and sequences (not really a problem in the case of JSON input)
  2. Adds another traversal of the data structure, which may not be optimal from a performance viewpoint.

(require '[schema.core :as s])
(require '[schema.coerce :as sco])
(require '[schema.utils :as scu])

(deftype ^:private GarbageType [])
(def ^:private garbage-const (GarbageType.))

(def Garbage "Garbage schema, use it to flag schema attributes to be removed by `cleaner`." GarbageType)

(defn garbage-flagging-matcher "schema.coerce matcher to detect and flag garbage values." [schema]
  (cond (= schema Garbage) (constantly garbage-const)
        :else identity))

(defn- garbage-flagger "Accepts a schema (supposedly that uses Garbage as a sub-schema), and returns a function that flags garbage values by coercing them to `garbage-const`"
  [schema] (sco/coercer schema garbage-flagging-matcher))

(defn clean-garbage "Accepts a clojure data structures, and removes the values equal to `garbage-const."
  [v]
  (cond
    (= garbage-const v) nil
    (map? v) (->> v seq
                  (reduce (fn [m [k nv]]
                            (if (= garbage-const nv)
                              (dissoc m k)
                              (assoc m k (clean-garbage nv)))
                            ) v))
    (vector? v) (->> v (remove #(= % garbage-const)) (map clean-garbage) vec)
    (sequential? v) (->> v (remove #(= % garbage-const)) (map clean-garbage) doall)
    :else v
    ))

(defn cleaner "Accepts a Schema, which presumably uses Garbage to match illegal values, and returns a function that accepts a data structure (potentially an instance of the schema) and will remove its values that are not anticipated in the schema, e.g illegal map keys."
  [schema]
  (let [flag (garbage-flagger schema)]
    (fn [data]
      (-> data flag clean-garbage)
      )))

;; Example

(def MySchema {:a s/Int
               :b {:c  s/Str
                   (s/optional-key :d) s/Bool
                   s/Any Garbage}
               :e [{:f s/Inst
                    s/Any Garbage}]
               s/Any Garbage})

((cleaner MySchema) {:a 1
                       :garbage-key "hello"
                       :b {:c "Hellow world"
                           :d false
                           42432424 23/2}
                       :e [{:f #inst "2015-07-23T15:49:33.073-00:00"
                            'a-garbage-key "remove me!!"
                            "another garbage key" :remove-me!!}
                           {:f #inst "2015-07-23T15:53:33.073-00:00"}]})
  => {:a 1
      :b {:c "Hellow world"
          :d false}
      :e [{:f #inst "2015-07-23T15:49:33.073-00:00"}
          {:f #inst "2015-07-23T15:53:33.073-00:00"}]}