Insert into three-level joined tables using SQLAlchemy, ignoring duplicates on grandparent table

60 Views Asked by At

I am using SQLAlchemy to build a Postgres database. I am creating my schema like this:

import datetime
from typing import List, Optional

import sqlalchemy as sa
from sqlalchemy import orm, MetaData, create_engine, FetchedValue, ForeignKey, Column, Integer, Computed
from sqlalchemy.orm import relationship
from sqlalchemy.orm import Session
from sqlalchemy.orm import (
  mapped_column,
  DeclarativeBase,
  Mapped,
  MappedAsDataclass,
)
from sqlalchemy.dialects.postgresql import ARRAY, TEXT, JSONB

dbUrl = XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

metadata = MetaData()


class Base(MappedAsDataclass, DeclarativeBase):
  pass


engine = create_engine(dbUrl, echo=True)


class MediaType(Base):
  __tablename__ = 'media_types'
  media_type: Mapped[str] = mapped_column(init=False, primary_key=True)
  description: Mapped[str] = mapped_column(default=None, unique=True)
  file_formats: Mapped[list] = mapped_column(ARRAY(TEXT, dimensions=1),
                                             unique=False,
                                             default=None,
                                             nullable=True)

MediaType.__table__


class Collection(MediaType):
  __tablename__ = 'collections'
  collection_id: Mapped[int] = mapped_column(init=False, primary_key=True,autoincrement=True)
  media_type: Mapped[str] = mapped_column(ForeignKey("media_types.media_type"))
  collection_name: Mapped[str] = mapped_column(default=None, unique=True)
  description: Mapped[str] = mapped_column(default=None,
                                           unique=False,
                                           nullable=True)
  tags: Mapped[list] = mapped_column(ARRAY(TEXT, dimensions=1), default=None, nullable=True)
  date_added: Mapped[datetime.datetime] = mapped_column(default=None,
                                                        nullable=True)

  __mapper_args__ = {
    'polymorphic_identity': 'collections',
    'polymorphic_on': 'media_type',
    'eager_defaults': True,
  }


Collection.__table__


class Post(Collection):
  __tablename__ = 'posts'
  id: Mapped[int] = mapped_column(init=False, primary_key=True)
  collection_id: Mapped[int] = mapped_column(
    ForeignKey("collections.collection_id"), default=None)
  user: Mapped[str] = mapped_column(default=None, nullable=True)
  title: Mapped[str] = mapped_column(default=None, nullable=True)
  description: Mapped[str] = mapped_column(default=None, nullable=True)
  date_modified: Mapped[Optional[datetime.datetime]] = mapped_column(
    default=None, nullable=True)
  tags: Mapped[list] = mapped_column(ARRAY(TEXT, dimensions=1),
                                     default=None,
                                     nullable=True)
  views: Mapped[int] = mapped_column(default=0, nullable=True)
  social_media: Mapped[dict] = mapped_column(JSONB,
                                             default=None,
                                             nullable=True)
  date_added: Mapped[datetime.datetime] = mapped_column(default=None,
                                                        nullable=True)
  date_modified: Mapped[datetime.datetime] = mapped_column(default=None,
                                                           nullable=True)

  __mapper_args__ = {
    'polymorphic_identity': 'posts',
    'polymorphic_on': 'media_type',
    'eager_defaults': True,
  }


Post.__table__


class Link(Post):
  __tablename__ = 'links'
  id: Mapped[int] = mapped_column(ForeignKey('posts.id'),
                                  init=False,
                                  primary_key=True)
  url: Mapped[str] = mapped_column(default=None, unique=True)
  other_info: Mapped[str] = mapped_column(default=None, nullable=True)
  clicks: Mapped[int] = mapped_column(default=0, nullable=True)

  __mapper_args__ = {
    'polymorphic_identity': 'links',
    'eager_defaults': True,
  }


Link.__table__


class Image(Post):
  __tablename__ = 'image'
  id: Mapped[int] = mapped_column(ForeignKey('posts.id'),
                                  init=False,
                                  primary_key=True)
  filepath: Mapped[str] = mapped_column(default=None,
                                        unique=True,
                                        nullable=True)

  __mapper_args__ = {
    'polymorphic_identity': 'image',
    'eager_defaults': True,
  }


Image.__table__

Base.metadata.create_all(engine)

The idea here is that there are different types of post (such as a link or an image) which each have unique data attached to them. Thus each data type has its own table. Because all posts share some data (like a title, description, and date), those tables use joined table inheritance with the posts table. I also want to be able to store information about the media types, so I opted to create a table for media types instead of a enum data type. I went with a character primary key because there will never be more than a few rows and the names aren't going to change.

The issue I am having is that when I try to insert data into one of the grandchild tables, I am also attempting to insert the redundant type name into the gandparent table:

collection = newCollection("New collection", "description of collection", sesh)

link = Link(url="http://example.com",
            title="Example link",
            collection_id=collection,
            description="My example link",
            date_added=datetime.datetime.now(),
            media_type='link')
psycopg2.errors.UniqueViolation: duplicate key value violates unique constraint "media_types_pkey"
DETAIL:  Key (media_type)=(link) already exists.

I thought that maybe I could just leave out "media_type" as it should be implied by the collection, which already has a "media_type":

link = Link(url="http://example.com",
            title="Example link",
            collection_id=collection,
            description="My example link",
            date_added=datetime.datetime.now())
sesh.add(link)
sesh.commit()

But that doesn't work either:

Traceback (most recent call last):
  File "main.py", line 149, in <module>
    link = Link(url="http://example.com",
TypeError: __init__() missing 1 required positional argument: 'media_type'

As you can see from the schema, "media_type" is not in the actual Link object, but is required by the hierarchy (from the grandparent table). What I want is for it to not try to insert a new row to media_type every time I create a collection.

So how can "tell" the database (Postgres btw) to either not attempt this redundant insert, or skip it (but not the entire transaction!) when there it is inserting a duplicate?

Edit: I just noticed that before the error, I get this warning, which I suspect is relevant:

main.py:146: SAWarning: Flushing object <Collection at 0x7ffab9bbd5d0> with incompatible polymorphic identity 'link'; the object may not refresh and/or load correctly (this warning may be suppressed after 10 occurrences)
  session.flush()
1

There are 1 best solutions below

0
Logos Masters On

This worked. I didn't realize I had to use a different column name for media_type.

class MediaType(Base):
    __tablename__ = "media_types"
    media_type: Mapped[str] = mapped_column(init=True, primary_key=True)
    file_format: Mapped[str] = mapped_column(
        default=None, unique=True, nullable=True
    )


class Collection(Base):
    __tablename__ = "collections"
    collection_id: Mapped[int] = mapped_column(
        init=False, primary_key=True, autoincrement=True
    )
    collection_name: Mapped[str] = mapped_column(default=None, unique=True)
    collection_description: Mapped[str] = mapped_column(
        default=None, unique=False, nullable=True
    )
    media_type_id: Mapped[str] = mapped_column(
         "media_type",
        ForeignKey("media_types.media_type"), default=None
    )
    media_type: Mapped[MediaType] = relationship(init=False)

    __mapper_args__ = {
        "polymorphic_identity": "collections",
        "eager_defaults": True,
    }


class Post(Base):
    __tablename__ = "posts"
    id: Mapped[int] = mapped_column(init=False, primary_key=True)
    title: Mapped[str] = mapped_column(default=None, nullable=True)
    description: Mapped[str] = mapped_column(default=None, nullable=True)

    collection_id: Mapped[int] = mapped_column(
        ForeignKey("collections.collection_id"), init=False
    )
    collection: Mapped[Collection] = relationship(kw_only=True)

    media_type: Mapped[str] = mapped_column(
        ForeignKey("media_types.media_type"), init=False
    )
    media_type_reference: Mapped[MediaType] = relationship(init=False)

    __mapper_args__ = {
        "polymorphic_identity": "posts",
        "polymorphic_on": "media_type",
        "eager_defaults": True,
    }


class Link(Post):
    __tablename__ = "links"
    id: Mapped[int] = mapped_column(
        ForeignKey("posts.id"), init=False, primary_key=True
    )
    url: Mapped[str] = mapped_column(default=None, unique=True)
    other_info: Mapped[str] = mapped_column(default=None, nullable=True)

    __mapper_args__ = {
        "polymorphic_identity": "links",
        "eager_defaults": True,
    }

class Image(Post):
    __tablename__ = "images"
    id: Mapped[int] = mapped_column(
        ForeignKey("posts.id"), init=False, primary_key=True
    )
    filepath: Mapped[str] = mapped_column(default=None, unique=True)
    __mapper_args__ = {
        "polymorphic_identity": "images",
        "eager_defaults": True,
    }


Image.__table__

Base.metadata.create_all(engine)

sesh = Session(engine)


with Session(engine) as sesh:
    # pre populate all possible MediaTypes
    sesh.add_all(
        [
            MediaType(media_type=mt)
            for mt in ["collections", "posts", "links", "images"]
        ]
    )
    sesh.commit()


with Session(engine) as sesh:
    collection = Collection(
        collection_name="New collection",
        collection_description="My new collection of links",
        media_type_id="links"
    )

    new = Link(
        url="http://example.com",
        collection=collection,
        title="New Link",
        description="Description of link"
    )
    sesh.add(new)
    sesh.commit()

    new = Link(
        url="http://google.com",
        collection=collection,
        title="Google Search",
        description="Search the Internet"
    )
    sesh.add(new)
    sesh.commit()