Good Evening All,
I set out to do what I thought was a simple task. I want to create a seed that can be run multiple times and will only insert new values if they aren't found in the db already. Using flask-sqlalchemy.
My thought process since there are several of the models that require several steps in order to build out the correct models and relationships was to build a multi-step process comprised of dictionaries to hold instance declarations that I would need to reference in relationship attribute of other models and then build a commit_objects() function that would take one list or dictionary at a time, add the items to a session, commit them and then proceed to the next dictionary or list. Here is a sample of what I was trying to do:
def initial_seed():
# Step 1 - Objects with no dependencies
ref_objects = {}
ref_objects["tmro_cat_t"] = AssessmentCategory(name="Technology", abbreviation="T")
ref_objects["rl1"] = AssessmentScore(score=1)
ref_objects["rl_type_trl"] = AssessmentType(name="TRL", sort_order=2)
commit_objects(ref_objects, "Importing Reference Objects - Step 1", True)
#Step 2 - objects with dependencies from step 1
non_ref_objects_2 = [
AssessmentOption(assessment_score=ref_objects["rl1"], assessment_type=ref_objects["rl_type_trl"], name="Basic principles"),
ActivityType(name="Program", abbreviation="PGM"),
]
commit_objects(non_ref_objects_2, "Importing Non-Reference Objects - Step 2", True)
This would go on for 4ish more steps to get all of the dependent relationships and instances defined so that I could build out the models for an initial seed and then add to the lists or dictionaries as needed to introduce more data as required.
This seemed like a simple job. Pass the list or dictionary to a function, determine if the object already exists in the database, skip it if there is an exact match on all the attributes and add it if there isn't. Well one thin led to another and I still can't get this to 1) add new objects and commit to the session and 2) traverse the list multiple times and identify if an object is already in the database, and if there is to skip it.
Where I got with my code is below but I'm no longer entirely sure it's even close to the right direction:
def is_collection(attr):
return hasattr(attr.property, "direction") and attr.property.direction.name in ("ONETOMANY", "MANYTOMANY")
def record_exists(obj):
# Get all columns for the object's table
columns = [prop.key for prop in class_mapper(type(obj)).iterate_properties
if isinstance(prop, sqlalchemy.orm.ColumnProperty)]
# Create a filter for each column
filters = {column: getattr(obj, column) for column in columns}
# Check if a record with the same values already exists
existing_obj = db.session.query(type(obj)).filter_by(**filters).first()
return existing_obj is not None
def commit_objects(objects_list, commit_message, debug=False):
if isinstance(objects_list, dict):
objects_list = objects_list.values()
for obj in objects_list:
if record_exists(obj):
print("A record with the same values already exists. Skipping insertion.")
continue
try:
# Save a copy of the object's attributes before making it transient
inspector = inspect(type(obj)) # inspect the class, not the instance
obj_attributes = {attr.key: getattr(obj, attr.key) for attr in inspector.attrs if not attr.key.startswith('_')}
make_transient(obj)
# Reassign the object's attributes after making it transient
for key, value in obj_attributes.items():
setattr(obj, key, value)
existing_obj = None
if obj_attributes:
query = db.session.query(type(obj))
for key, value in obj_attributes.items():
attr = getattr(type(obj), key)
if key in [column.key for column in inspector.columns]:
query = query.filter(attr == value)
elif key in [relation.key for relation in inspector.relationships]:
if is_collection(attr):
related_obj = value
if related_obj is not None and isinstance(related_obj, list) and len(related_obj) > 0: # check if object is a list before calling len()
primary_key = inspector.primary_key[0].key
query = query.filter(attr.any(**{primary_key: getattr(related_obj[0], primary_key)}))
else:
related_obj = value
primary_key = inspector.primary_key[0].key
query = query.filter(attr == value)
else:
print(f"Attribute '{key}' not found for {type(obj)}")
continue
existing_obj = query.first()
if existing_obj:
for key, value in obj_attributes.items():
setattr(existing_obj, key, value)
if debug:
print(f"Updated {type(obj).__name__} {getattr(existing_obj, inspector.primary_key[0].key)}")
else:
db.session.add(obj)
if debug:
print(f"Added {type(obj).__name__} {getattr(obj, inspector.primary_key[0].key)}")
except Exception as e:
print(f"Failed to commit {type(obj).__name__}: {str(e)}")
db.session.rollback()
continue
try:
db.session.commit()
print(commit_message)
except Exception as e:
print(f"Failed to commit transaction: {str(e)}")
db.session.rollback()
Can someone show me the light at the end of the tunnel?