Suppose I have a pair of datastructures; one representing a type and the other a value:
data Schema = Leaf | PairOf Schema Schema | ListOf Schema
data ValueOf (schema :: Schema) where
LeafElem :: String -> ValueOf 'Leaf
PairElem :: ValueOf x -> ValueOf y -> ValueOf ('PairOf x y)
ListElem :: [ValueOf x] -> ValueOf ('ListOf x)
Now I want to write Arbitrary
instances for these so I can use them in a QuickCheck test.
The Schema
instance is straightforward:
instance Arbitrary Schema where
arbitrary = sized $ \s -> if s <= 1
then pure Leaf
else oneof
[ pure Leaf
, scale (`quot` 2) $ PairOf <$> arbitrary <*> arbitrary
, scale floorSqrt $ ListOf <$> arbitrary
]
shrink = \case
Leaf -> empty
PairOf x y -> asum
[ pure x
, pure y
, PairOf <$> shrink x <*> pure y
, PairOf <$> pure x <*> shrink y
]
ListOf x -> asum [pure x, ListOf <$> shrink x]
floorSqrt :: Int -> Int
floorSqrt = floor . sqrt . (fromIntegral :: Int -> Float)
The ValueOf
instance is a little more tricky, but with singletons
it's not too bad:
$(genSingletons [''Schema])
instance SingI schema => Arbitrary (ValueOf schema) where
arbitrary = case sing :: Sing schema of
SLeaf -> LeafElem <$> arbitrary
SPairOf (singInstance -> SingInstance) (singInstance -> SingInstance) ->
scale (`quot` 2) $ PairElem <$> arbitrary <*> arbitrary
SListOf (singInstance -> SingInstance) ->
scale floorSqrt $ ListElem <$> arbitrary
shrink = case sing :: Sing schema of
SLeaf -> \case
LeafElem x -> LeafElem <$> shrink x
SPairOf (singInstance -> SingInstance) (singInstance -> SingInstance) ->
\case
PairElem x y -> asum
[PairElem <$> shrink x <*> pure y, PairElem <$> pure x <*> shrink y]
SListOf (singInstance -> SingInstance) -> \case
ListElem xs -> ListElem <$> shrink xs
But what I actually want is an instance for both a type and a list of values of that type.
data SchemaAndValues = forall schema.
SchemaAndValues (SSchema schema) [ValueOf schema]
instance Arbitrary SchemaAndValues where
arbitrary = arbitrarySchemaAndValues
shrink = shrinkSchemaAndValues
The arbitrary
function is easy; just generate a schema and then generate some values.
arbitrarySchemaAndValues :: Gen SchemaAndValues
arbitrarySchemaAndValues = scale floorSqrt $ do
schema <- arbitrary
withSomeSing schema
$ \sschema -> SchemaAndValues sschema <$> withSingI sschema arbitrary
But for the shrink function I need a way to map a schema-shrink operation to a value-shrink operation. To this end, I define a Shrinker
type which contains both a shrunk schema and the function for shrinking values to match the new schema:
shrinkSchemaAndValues :: SchemaAndValues -> [SchemaAndValues]
shrinkSchemaAndValues (SchemaAndValues sschema values) = asum
[ do
Shrinker stoSchema valShrink <- shrinkers sschema
newValues <- traverse valShrink values
pure $ SchemaAndValues stoSchema newValues
, SchemaAndValues sschema <$> withSingI sschema shrink values
]
data Shrinker fromSchema = forall toSchema.
Shrinker (SSchema toSchema) (ValueOf fromSchema -> [ValueOf toSchema])
shrinkers :: SSchema schema -> [Shrinker schema]
shrinkers = \case
SLeaf -> empty
SPairOf sx sy -> asum
[ pure (Shrinker sx (\(PairElem x _) -> pure x))
, pure (Shrinker sy (\(PairElem _ y) -> pure y))
, do
Shrinker sx' xfn <- shrinkers sx
pure $ Shrinker (SPairOf sx' sy)
(\(PairElem x y) -> PairElem <$> xfn x <*> pure y)
, do
Shrinker sy' yfn <- shrinkers sy
pure $ Shrinker (SPairOf sx sy')
(\(PairElem x y) -> PairElem <$> pure x <*> yfn y)
]
SListOf sx -> asum
[ pure (Shrinker sx (\(ListElem xs) -> xs))
, do
Shrinker sx' xfn <- shrinkers sx
pure $ Shrinker (SListOf sx')
(\(ListElem xs) -> ListElem <$> traverse xfn xs)
]
But the problem with this approach is that the shrink-list can blow up exponentially because of the calls to traverse
in the list monad.
In particular, if I start with a small example such as
example :: SchemaAndValues
example = SchemaAndValues
(SListOf (SListOf SLeaf))
[ ListElem
[ ListElem [LeafElem "a", LeafElem "b", LeafElem "c"]
, ListElem [LeafElem "d", LeafElem "e", LeafElem "f", LeafElem "g"]
]
, ListElem
[ ListElem [LeafElem "h", LeafElem "i"]
, ListElem [LeafElem "j", LeafElem "k", LeafElem "l"]
, ListElem [LeafElem "m", LeafElem "n"]
]
, ListElem
[ ListElem [LeafElem "o", LeafElem "p", LeafElem "q"]
, ListElem [LeafElem "r", LeafElem "s", LeafElem "t"]
]
]
this will generate 1425 immediate shrinks.
How can I avoid this exponential blowup while still shrinking to small counterexamples?
Preamble:
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE ViewPatterns #-}
module Lib where
import Control.Applicative
import Data.Foldable
import Data.Singletons.TH
import Test.QuickCheck