I wrote a main function which uses a stochastic optimization algorithm (Particle Swarm Optimization) to found optimal solution for a ODE system. I would run 50 times to make sure the optimal can be found. At first, it operates normally, but now I found the calculation time would increase with iteration increases. It cost less than 300s for first ten calculations, but it would increase to 500s for final calculation. It seems that it would cost 3~5 seconds more for each calculation. I have followed the high performance tips to optimize my code but it doesn't work.
I am sorry I don't know quite well how to upload my code before, here is the code I wrote below. But in this code, the experimental data is not loaded, I may need to find a way to upload data. In main function, with the increase of i, the time cost is increasing for each calculation.
Oh, by the way, I found another interesting phenomenon. I changed the number of calculations and the calculation time changed again. For first 20 calculations in main loop, each calculation cost about 300s, and the memory useage fluctuates significantly. But something I don't know happend, it is speeding up. It cost 1/4 time less time for each calculation, which is about 80s. And the memory useage became a straight line like this:
I knew the Julia would do pre-heating for first run and then speed up. But this situation seems different. This situation looks like Julia run slowly for first 20 calculation, and then it found a good way to optimize the memory useage and speed up. Then the program just run at full speed.
using CSV, DataFrames
using BenchmarkTools
using DifferentialEquations
using Statistics
using Dates
using Base.Threads
using Suppressor
function uniform(dim::Int, lb::Array{Float64, 1}, ub::Array{Float64, 1})
    arr = rand(Float64, dim)
    @inbounds for i in 1:dim; arr[i] = arr[i] * (ub[i] - lb[i]) + lb[i] end
    return arr
end
mutable struct Problem
    cost_func
    dim::Int
    lb::Array{Float64,1}
    ub::Array{Float64,1}
end
mutable struct Particle
    position::Array{Float64,1}
    velocity::Array{Float64,1}
    cost::Float64
    best_position::Array{Float64,1}
    best_cost::Float64
end
mutable struct Gbest
    position::Array{Float64,1}
    cost::Float64
end
function PSO(problem, data_dict; max_iter=100,population=100,c1=1.4962,c2=1.4962,w=0.7298,wdamp=1.0)
    dim = problem.dim
    lb = problem.lb
    ub = problem.ub
    cost_func = problem.cost_func
    gbest, particles = initialize_particles(problem, population, data_dict)
    # main loop
    for iter in 1:max_iter
        @threads for i in 1:population
            particles[i].velocity .= w .* particles[i].velocity .+
                c1 .* rand(dim) .* (particles[i].best_position .- particles[i].position) .+
                c2 .* rand(dim) .* (gbest.position .- particles[i].position)
            particles[i].position .= particles[i].position .+ particles[i].velocity
            particles[i].position .= max.(particles[i].position, lb)
            particles[i].position .= min.(particles[i].position, ub)
            particles[i].cost = cost_func(particles[i].position,data_dict)
            if particles[i].cost < particles[i].best_cost
                particles[i].best_position = copy(particles[i].position)
                particles[i].best_cost = copy(particles[i].cost)
                if particles[i].best_cost < gbest.cost
                    gbest.position = copy(particles[i].best_position)
                    gbest.cost = copy(particles[i].best_cost)
                end
            end
        end
        w = w * wdamp
        if iter % 50 == 1
            println("Iteration " * string(iter) * ": Best Cost = " * string(gbest.cost))
            println("Best Position = " * string(gbest.position))
            println()
        end
    end
    gbest, particles
end
function initialize_particles(problem, population,data_dict)
    dim = problem.dim
    lb = problem.lb
    ub = problem.ub
    cost_func = problem.cost_func
    gbest_position = uniform(dim, lb, ub)
    gbest = Gbest(gbest_position, cost_func(gbest_position,data_dict))
    particles = []
    for i in 1:population
        position = uniform(dim, lb, ub)
        velocity = zeros(dim)
        cost = cost_func(position,data_dict)
        best_position = copy(position)
        best_cost = copy(cost)
        push!(particles, Particle(position, velocity, cost, best_position, best_cost))
        if best_cost < gbest.cost
            gbest.position = copy(best_position)
            gbest.cost = copy(best_cost)
        end
    end
    return gbest, particles
end
function get_dict_label(beta::Int)
    beta_str = lpad(beta,2,"0")
    T_label = "Temperature_" * beta_str
    M_label = "Mass_" * beta_str
    MLR_label = "MLR_" * beta_str
    return T_label, M_label, MLR_label
end
function get_error(x::Vector{Float64}, y::Vector{Float64})
    numerator = sum((x.-y).^2)
    denominator = var(x) * length(x)
    numerator/denominator
end
function central_diff(x::AbstractArray{Float64}, y::AbstractArray{Float64})
    # Central difference quotient
    dydx = Vector{Float64}(undef, length(x))
    dydx[2:end] .= diff(y) ./ diff(x)
    @views dydx[2:end-1] .= (dydx[2:end-1] .+ dydx[3:end])./2
    # Forward and Backward difference
    dydx[1] = (y[2]-y[1])/(x[2]-x[1])
    dydx[end] = (y[end]-y[end-1])/(x[end]-x[end-1])
    return dydx
end
function decomposition!(dm,m,p,T)
    # A-> residue + volitale
    # B-> residue + volatile
    beta,A1,E1,n1,k1,A2,E2,n2,k2,m1,m2 = p
    R = 8.314
    rxn1 = -m1 * exp(A1-E1/R/T) * max(m[1]/m1,0)^n1 / beta
    rxn2 = -m2 * exp(A2-E2/R/T) * max(m[2]/m2,0)^n2 / beta
    dm[1] = rxn1
    dm[2] = rxn2
    dm[3] = -k1 * rxn1 - k2 * rxn2
    dm[4] = dm[1] + dm[2] + dm[3]
end
function read_file(file_path)
    df = CSV.read(file_path, DataFrame)
    data_dict = Dict{String, Vector{Float64}}()
    for beta in 5:5:21
        T_label, M_label, MLR_label = get_dict_label(beta)
        T_data = collect(skipmissing(df[:, T_label]))
        M_data = collect(skipmissing(df[:, M_label]))
        T = T_data[T_data .< 780]
        M = M_data[T_data .< 780]
        data_dict[T_label] = T
        data_dict[M_label] = M
        data_dict[MLR_label] = central_diff(T, M)
    end
    return data_dict
end
function initial_condition(beta::Int64, ode_parameters::Array{Float64,1})
    m_FR_initial = ode_parameters[end]
    m_PVC_initial = 1 - m_FR_initial
    T_span = (300.0, 800.0) # temperature range
    p = [beta; ode_parameters; m_PVC_initial]
    m0 = [p[end-1], p[end], 0.0, 1.0] # initial mass
    return m0, T_span, p
end
function cost_func(ode_parameters, data_dict)
    total_error = 0.0
    for beta in 5:5:21
        T_label, M_label, MLR_label= get_dict_label(beta)
        T = data_dict[T_label]::Vector{Float64}
        M = data_dict[M_label]::Vector{Float64}
        MLR = data_dict[MLR_label]::Vector{Float64}
        m0, T_span, p = initial_condition(beta,ode_parameters)
        prob = ODEProblem(decomposition!,m0,T_span,p)
        sol = solve(prob, AutoVern9(Rodas5(autodiff=false)),saveat=T,abstol=1e-8,reltol=1e-8,maxiters=1e4)
        if sol.retcode != :Success
            # println(1)
            return Inf
        else
            M_sol = @view sol[end, :]
            MLR_sol = central_diff(T, M_sol)::Array{Float64,1}
            error1 = get_error(MLR, MLR_sol)::Float64
            error2 = get_error(M, M_sol)::Float64
            total_error += error1 + error2
        end
    end
    total_error
end
function main()
    flush(stdout)
    total_time = 0
    best_costs = []
    file_path = raw"F:\17-Fabric\17-Fabric (Smoothed) TG.csv"
    data_dict = read_file(file_path)
    dimension = 9
    lb = [5, 47450, 0.0, 0.0, 24.36, 148010, 0.0, 0.0, 1e-5]
    ub = [25.79, 167700, 5, 1, 58.95, 293890, 5, 1, 0.25]
    problem = Problem(cost_func,dimension,lb,ub)
    global_best_cost = Inf
    println("-"^100)
    println("Running PSO ...")
    population = 50
    max_iter = 1001
    println("The population is: ", population)
    println("Max iteration is:", max_iter)
    for i in 1:50 # The number of calculation
        start_time = Dates.now()
        println("Current iteration is: ", string(i))
        gbest, particles = PSO(problem, data_dict, max_iter=max_iter, population=population)
        if gbest.cost < global_best_cost
            global_best_cost = gbest.cost
            global_best_position = gbest.position
        end
        end_time = Dates.now()
        time_duration = round(end_time-start_time, Second)
        total_time += time_duration.value
        push!(best_costs, gbest.cost)
        println()
        println("The Best is:")
        println(gbest.cost)
        println(gbest.position)
        println("The calculation time is: " * string(time_duration))
        println()
        println("-"^50)
    end
    println('-'^100)
    println("Global best cost is: ", global_best_cost)
    println("Global best position is: ", global_best_position)
    println(total_time)
    best_costs
end
@suppress_err begin
    @time global best_costs = main()
end
So, what is the possible mechanism for this? Is there a way to avoid this problem? Because If I increase the population and max iterations of particles, the time increased would be extremely large and thus is unacceptable.
And what is the possible mechanism for speed up the program I mentioned above? How to trigger this mechanism?

                        
As the parameters of an ODE optimizes it can completely change its characteristics. Your equation could be getting more stiff and require different ODE solvers. There are many other related ways, but you can see how changing parameters could give such a performance issue. It's best to use methods like AutoTsit5(Rodas5()) and the like in such estimation cases because it's hard to know or guess what the performance will be like, and thus adaptiveness in the method choice can be crucial.