Upload a relatively big excel in Django and improve the write speed (Postgresql)

103 Views Asked by At

I am uploading a 4000 row excel in Django (I'm using django-excel as an external plugin), which is taking around 17 seconds. This sounds very low in comparison to a normal POSTGRESQL write qps (queries per second) expected to be about 600-700.To maintain data integrity and to add a specific column, I need to do entry of one row at a time. Following is the code I am using currently.

def import_student(request):
    this_tenant=request.user.tenant
    if request.method == "POST":
        form = UploadFileForm(request.POST,
                          request.FILES)
        batch_selected=Batch.objects.for_tenant(this_tenant).get(id=1)
        def choice_func(row):
            choice_func.counter+=1
            data=student_validate(row, this_tenant,     choice_func.counter, batch_selected)
            return data

        choice_func.counter=0

        if form.is_valid():
            with transaction.atomic():
                try:
                    request.FILES['file'].save_to_database(
                        model=Student,
                        initializer=choice_func,
                        mapdict=['first_name', 'last_name', 'dob','gender','blood_group', 'contact', 'email_id', \
'local_id','address_line_1','address_line_2','state','pincode','batch','key', 'slug', 'tenant','user'])
                    return redirect('student:student_list')
                except:
                    transaction.rollback()
                    return HttpResponse("Failed")
        else:
            return HttpResponseBadRequest()
    else:
        form = UploadFileForm()
    return render(request,'upload_form.html',{'form': form})

And the student validate function is as follows:

def student_validate(row, this_tenant, counter, batch):
    data="st"
    today=dt.date.today()
    today_string=today.strftime('%y%m%d')
    next_student_number='{0:03d}'.format(counter)
    last_student=Student.objects.filter(tenant=this_tenant).\
            filter(key__contains=today_string).order_by('key').last()
    if last_student:
        last_student_number=int(last_student.key[8:])
        next_student_number='{0:03d}'.format(last_student_number +     counter)
    key=data+str(today_string)+str(next_student_number)
    toslug=str(this_tenant)+" " +str(key)
    slug=slugify(toslug)
    item=None
    row.append(key)
    row.append(slug)
    row.append(this_tenant)
    row[12]=batch
    if (row[0] == None or row[0] == "" or row[1] == None or row[1] == "") :
        transaction.rollback()
        return HttpResponse("There is error in uploaded excel")
    if (row[3] != "M" and row[3] != "F" and row[3] != "O"):
        transaction.rollback()
        return HttpResponse("There is error in uploaded excel")
    if (row [2] != None and row [2] != ""):
        if (type(row [2]) != dt.date):
            transaction.rollback()
            return HttpResponse("There is error in uploaded excel")
    return row

Now, is there a way the write speed (importing an excel with about 4K rows in excel) can be improved?

0

There are 0 best solutions below