I have a chunk of python script that I need to functionalise to enable me automate the task. The script below is meant to generate a column using faker library, but because I have many categorical variables that need to be mirrored into a synthetic data I needed to create a function
import pandas as pd
from faker import Faker
from faker.providers import DynamicProvider
year = df.loc['categories', 'year']
list_year = list(year.
replace("'", "").
replace(",", "").
replace("[","").
replace("]", "").
split(' ')
)
year_provider = DynamicProvider(
provider_name="year_prov",
elements= list_year
)
faker.add_provider(year_provider)
gen.col("year").generation_method = faker.year_prov
Below is my function:
col_name = 'year'
provider_name = "year_prov"
def dynamic_var(dataframe,
categories,
col_name
):
var = dataframe.loc[categories]
list_var = list(var.
replace("'", "").
replace(",", "").
replace("[","").
replace("]", "").
split(' ')
)
year_provider = DynamicProvider(
provider_name = provider_name,
elements = list_var
)
faker.add_provider(year_provider)
gen.col(provider_name).generation_method = faker.provider_name
return gen.col(col_name).generation_method
Then call the function
dynamic_var(dataframe = df,
categories = ('categories', 'year_ashe'),
col_name = 'year_col'
)
When I call the function it output several errors I need help