How to use nested pydantic models for sqlalchemy in a flexible way

20.9k Views Asked by At
from fastapi import Depends, FastAPI, HTTPException, Body, Request
from sqlalchemy import create_engine, Boolean, Column, ForeignKey, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session, sessionmaker, relationship
from sqlalchemy.inspection import inspect
from typing import List, Optional
from pydantic import BaseModel
import json

SQLALCHEMY_DATABASE_URL = "sqlite:///./test.db"
engine = create_engine(
    SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False}
)

SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
app = FastAPI()


# sqlalchemy models

class RootModel(Base):
    __tablename__ = "root_table"
    id = Column(Integer, primary_key=True, index=True)
    someRootText = Column(String)
    subData = relationship("SubModel", back_populates="rootData")


class SubModel(Base):
    __tablename__ = "sub_table"
    id = Column(Integer, primary_key=True, index=True)
    someSubText = Column(String)
    root_id = Column(Integer, ForeignKey("root_table.id"))
    rootData = relationship("RootModel", back_populates="subData")


# pydantic models/schemas
class SchemaSubBase(BaseModel):
    someSubText: str

    class Config:
        orm_mode = True


class SchemaSub(SchemaSubBase):
    id: int
    root_id: int

    class Config:
        orm_mode = True


class SchemaRootBase(BaseModel):
    someRootText: str
    subData: List[SchemaSubBase] = []

    class Config:
        orm_mode = True


class SchemaRoot(SchemaRootBase):
    id: int

    class Config:
        orm_mode = True


class SchemaSimpleBase(BaseModel):
    someRootText: str

    class Config:
        orm_mode = True


class SchemaSimple(SchemaSimpleBase):
    id: int

    class Config:
        orm_mode = True


Base.metadata.create_all(bind=engine)


# database functions (CRUD)

def db_add_simple_data_pydantic(db: Session, root: SchemaRootBase):
    db_root = RootModel(**root.dict())
    db.add(db_root)
    db.commit()
    db.refresh(db_root)
    return db_root


def db_add_nested_data_pydantic_generic(db: Session, root: SchemaRootBase):

    # this fails:
    db_root = RootModel(**root.dict())
    db.add(db_root)
    db.commit()
    db.refresh(db_root)
    return db_root


def db_add_nested_data_pydantic(db: Session, root: SchemaRootBase):

    # start: hack: i have to manually generate the sqlalchemy model from the pydantic model
    root_dict = root.dict()
    sub_dicts = []

    # i have to remove the list form root dict in order to fix the error from above
    for key in list(root_dict):
        if isinstance(root_dict[key], list):
            sub_dicts = root_dict[key]
            del root_dict[key]

    # now i can do it
    db_root = RootModel(**root_dict)
    for sub_dict in sub_dicts:
        db_root.subData.append(SubModel(**sub_dict))

    # end: hack
    db.add(db_root)
    db.commit()
    db.refresh(db_root)
    return db_root


def db_add_nested_data_nopydantic(db: Session, root):
    print(root)
    sub_dicts = root.pop("subData")
    print(sub_dicts)
    db_root = RootModel(**root)

    for sub_dict in sub_dicts:
        db_root.subData.append(SubModel(**sub_dict))
    db.add(db_root)
    db.commit()
    db.refresh(db_root)

    # problem
    """
    if I would now "return db_root", the answer would be of this:
    {
        "someRootText": "string",
        "id": 24
    }

    and not containing "subData"
    therefore I have to do the following.
    Why?

    """
    from sqlalchemy.orm import joinedload

    db_root = (
        db.query(RootModel)
            .options(joinedload(RootModel.subData))
            .filter(RootModel.id == db_root.id)
            .all()
    )[0]
    return db_root


# Dependency
def get_db():
    db = SessionLocal()
    try:
        yield db
    finally:
        db.close()


@app.post("/addNestedModel_pydantic_generic", response_model=SchemaRootBase)
def addSipleModel_pydantic_generic(root: SchemaRootBase, db: Session = Depends(get_db)):
    data = db_add_simple_data_pydantic(db=db, root=root)
    return data


@app.post("/addSimpleModel_pydantic", response_model=SchemaSimpleBase)
def add_simple_data_pydantic(root: SchemaSimpleBase, db: Session = Depends(get_db)):
    data = db_add_simple_data_pydantic(db=db, root=root)
    return data


@app.post("/addNestedModel_nopydantic")
def add_nested_data_nopydantic(root=Body(...), db: Session = Depends(get_db)):
    data = db_add_nested_data_nopydantic(db=db, root=root)
    return data


@app.post("/addNestedModel_pydantic", response_model=SchemaRootBase)
def add_nested_data_pydantic(root: SchemaRootBase, db: Session = Depends(get_db)):
    data = db_add_nested_data_pydantic(db=db, root=root)
    return data

Description

My Question is:

How to make nested sqlalchemy models from nested pydantic models (or python dicts) in a generic way and write them to the database in "one shot".

My example model is called RootModel and has a list of submodels called "sub models" in subData key.

Please see above for pydantic and sqlalchemy definitions.

Example: The user provides a nested json string:

{
  "someRootText": "string",
  "subData": [
    {
      "someSubText": "string"
    }
  ]
}

Open the browser and call the endpoint /docs. You can play around with all endpoints and POST the json string from above.

/addNestedModel_pydantic_generic

When you call the endpoint /addNestedModel_pydantic_generic it will fail, because sqlalchemy cannot create the nested model from pydantic nested model directly: AttributeError: 'dict' object has no attribute '_sa_instance_state'

​/addSimpleModel_pydantic

With a non-nested model it works.

The remaining endpoints are showing "hacks" to solve the problem of nested models.

/addNestedModel_pydantic

In this endpoint is generate the root model and andd the submodels with a loop in a non-generic way with pydantic models.

/addNestedModel_pydantic

In this endpoint is generate the root model and andd the submodels with a loop in a non-generic way with python dicts.

My solutions are only hacks, I want a generic way to create nested sqlalchemy models either from pydantic (preferred) or from a python dict.

Environment

  • OS: Windows,
  • FastAPI Version : 0.61.1
  • Python version: Python 3.8.5
  • sqlalchemy: 1.3.19
  • pydantic : 1.6.1
4

There are 4 best solutions below

2
On

I haven't found a nice built-in way to do this within pydantic/SQLAlchemy. How I solved it: I gave every nested pydantic model a Meta class containing the corresponding SQLAlchemy model. Like so:

from pydantic import BaseModel
from models import ChildDBModel, ParentDBModel

class ChildModel(BaseModel):
    some_attribute: str = 'value'
    class Meta:
        orm_model = ChildDBModel

class ParentModel(BaseModel):
    child: SubModel

That allowed me to write a generic function that loops through the pydantic object and transforms submodels into SQLAlchemy models:

def is_pydantic(obj: object):
    """Checks whether an object is pydantic."""
    return type(obj).__class__.__name__ == "ModelMetaclass"


def parse_pydantic_schema(schema):
    """
        Iterates through pydantic schema and parses nested schemas
        to a dictionary containing SQLAlchemy models.
        Only works if nested schemas have specified the Meta.orm_model.
    """
    parsed_schema = dict(schema)
    for key, value in parsed_schema.items():
        try:
            if isinstance(value, list) and len(value):
                if is_pydantic(value[0]):
                    parsed_schema[key] = [schema.Meta.orm_model(**schema.dict()) for schema in value]
            else:
                if is_pydantic(value):
                    parsed_schema[key] = value.Meta.orm_model(**value.dict())
        except AttributeError:
            raise AttributeError("Found nested Pydantic model but Meta.orm_model was not specified.")
    return parsed_schema

The parse_pydantic_schema function returns a dictionary representation of the pydantic model where submodels are substituted by the corresponding SQLAlchemy model specified in Meta.orm_model. You can use this return value to create the parent SQLAlchemy model in one go:

parsed_schema = parse_pydantic_schema(parent_model)  # parent_model is an instance of pydantic ParentModel 
new_db_model = ParentDBModel(**parsed_schema)
# do your db actions/commit here

If you want you can even extend this to also automatically create the parent model, but that requires you to also specify the Meta.orm_model for all pydantic models.

0
On

Nice function @dann, for more than two level of nesting you can use this recursive function:


def pydantic_to_sqlalchemy_model(schema):
    """
    Iterates through pydantic schema and parses nested schemas
    to a dictionary containing SQLAlchemy models.
    Only works if nested schemas have specified the Meta.orm_model.
    """
    parsed_schema = dict(schema)
    for key, value in parsed_schema.items():
        try:
            if isinstance(value, list) and len(value) and is_pydantic(value[0]):
                parsed_schema[key] = [
                    item.Meta.orm_model(**pydantic_to_sqlalchemy_model(item))
                    for item in value
                ]
            elif is_pydantic(value):
                parsed_schema[key] = value.Meta.orm_model(
                    **pydantic_to_sqlalchemy_model(value)
                )
        except AttributeError:
            raise AttributeError(
                f"Found nested Pydantic model in {schema.__class__} but Meta.orm_model was not specified."
            )
    return parsed_schema

Use it sparingly! If you have a cyclical nesting, it will loop forever.

And then call your data transformer like this:

def create_parent(db: Session, parent: Parent_pydantic_schema):
    db_parent = Parent_model(**pydantic_to_sqlalchemy_model(parent))
    db.add(db_parent)
    db.commit()
    return db_parent
0
On

To get rid of having to specify orm_model in Meta class we can access the orm model by investigating the attributes of parent orm model's class. For example class method from_dto can be added to Base class which would act as reverse of from_orm method in pydantic models.

from sqlalchemy.orm import DeclarativeBase
from pydantic import BaseModel


def is_pydantic(obj: object):
    """Checks whether an object is pydantic."""
    return type(obj).__class__.__name__ == "ModelMetaclass"

class Base(DeclarativeBase):
    ...

    @classmethod
    def from_dto(cls, dto:BaseModel):
        obj = cls()
        properties = dict(dto)
        for key, value in properties.items():
            try:       
                if is_pydantic(value):
                    value = getattr(cls, key).property.mapper.class_.from_dto(value)
                setattr(obj, key, value)
            except AttributeError as e:
                raise AttributeError(e)
        return obj

Then we can have convert between orm and pydantic models easily.

pydantic_model = PydanticModel(...)
orm_model = DBModel.from_dto(pydantic_model)
0
On

Using a validators is a lot simpler:

SQLAlchemy models.py:

class ChildModel(Base):
    __tablename__ = "Child"
    name: str = Column(Unicode(255), nullable=False, primary_key=True)


class ParentModel(Base):
    __tablename__ = "Parent"
    some_attribute: str = Column(Unicode(255))
    children = relationship("Child", lazy="joined", cascade="all, delete-orphan")

    @validates("children")
    def adjust_children(self, _, value) -> ChildModel:
        """Instantiate Child object if it is only plain string."""
        if value and isinstance(value, str):
            return ChildModel(some_attribute=value)
        return value

Pydantic schema.py:

class Parent(BaseModel):
    """Model used for parents."""

    some_attribute: str
    children: List[str] = Field(example=["foo", "bar"], default=[])

    @validator("children", pre=True)
    def adjust_children(cls, children):
        """Convert to plain string if it is a Child object."""
        if children and not isinstance(next(iter(children), None), str):
            return [child["name"] for child in children]
        return children