Reading CSV file , trimming/filtering the data then storing into database in C#

1k Views Asked by At

I am reading a csv using lumenwork csv reader in C# and putting the content of csv file in respective database table columns. The code is below and it works fine

private void ButtonClick(DeliverData context)
     {
        // Set culture for decimal cast
        CultureInfo currentCulture = CultureInfo.DefaultThreadCurrentCulture;
        CultureInfo usCulture = new CultureInfo("en-US");
        CultureInfo.DefaultThreadCurrentCulture = usCulture;
        DateTime start = DateTime.Now;

        Dictionary<Cycle, FileInfo> files = this.GetFilesFromFolder(context, this.FPath);



        // Initiate sql connection
        SqlConnection sqlConnection = new SqlConnection(context.Database.Connection.ConnectionString);
        this.sqlElementsNumber = true;

        try
        {
            CsvReader reader = null;
            sqlConnection.Open();

            // Create temp table
            SqlCommand createTempTable = new SqlCommand();
            createTempTable.CommandText = TempTables.DataTempTable;
            createTempTable.Connection = sqlConnection;
            createTempTable.ExecuteNonQuery();

            this.DeleteOldData(context, files);

            foreach (KeyValuePair<Cycle, FileInfo> file in files)
            {
                // Sql Bulk Copy Code
                reader = new CsvReader(new StreamReader(file.Value.FullName), false);
                this.currentFilePath = file.Value.FullName;

                this.bulkCopyElements = reader.ToArray().Length;
                reader.Dispose();
                reader = new CsvReader(new StreamReader(file.Value.FullName), false);


                reader.Columns = new List<Column>
                {
                    new Column { Name = "Sector", Type = typeof(string) },
                    new Column { Name = "Sub Sector", Type = typeof(string) },
                    new Column { Name = "Category", Type = typeof(string) },
                    new Column { Name = "Brand", Type = typeof(string) },
                    new Column { Name = "Property1", Type = typeof(string) },
                    new Column { Name = "Property2", Type = typeof(string) },
                    new Column { Name = "Property3", Type = typeof(string) },
                    new Column { Name = "Property4", Type = typeof(string) },
                    new Column { Name = "Property5", Type = typeof(string) }

                };

                reader.MoveTo(0);
                reader.UseColumnDefaults = true;



                using (var sbc = new SqlBulkCopy(sqlConnection))
                {
                    sbc.DestinationTableName = "Tablename";
                    sbc.BatchSize = 1000;
                    sbc.EnableStreaming = true;
                    sbc.NotifyAfter = 100;
                    sbc.SqlRowsCopied += new SqlRowsCopiedEventHandler(this.HandleBulkMessage);


                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Sector", "Sector"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Sub Sector", "SubSector"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Category", "Category"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property1", "Property1"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property2", "Property2"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property3", "Property3"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property4", "Property4"));
                    sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property5", "Property5"));

                    sbc.WriteToServer(reader);
                }

                reader.Dispose();
            }

            // start stored procedure
            SqlCommand cmd = new SqlCommand();
            SqlInfoMessageEventHandler handler = new SqlInfoMessageEventHandler(this.HandleSqlMessage);
            sqlConnection.FireInfoMessageEventOnUserErrors = true;
            sqlConnection.InfoMessage += handler;

            cmd.CommandText = "StoredprocedureName";
            cmd.CommandType = CommandType.StoredProcedure;
            cmd.CommandTimeout = 43200;
            cmd.Connection = sqlConnection;

            try
            {
                cmd.ExecuteNonQuery();
            }
            catch (SqlException e)
            {
                this.AddErrorMessage(e.Message, e);
            }

            sqlConnection.FireInfoMessageEventOnUserErrors = false;
            sqlConnection.InfoMessage -= handler;
            sqlConnection.Close();

            // reset culture
            CultureInfo.DefaultThreadCurrentCulture = currentCulture;
            Logger.Write(new CustomLogEntry(this.ErrorTitle, CustomLogEntry.LogPriority.High, "Time for File insert: " + DateTime.Now.Subtract(start).ToString(), this.ForecastPath));
        }
        catch (Exception e)
        {
            sqlConnection.Close();
            this.AddErrorMessage(e.Message, e);
        }
    }

CSV file looks like this

CSV file looks like this

Now I want to filter data of csv before entering the content in the database. For example I need to remove ‘ID’ keyword from all the columns and remove ‘PPTR*_’ from all the attribute columns. Wherever there is no data need to write 'nodata'

My question is what are the different ways to achieve this and How?

1

There are 1 best solutions below

2
On

If you want to update the data before entering it into the database and CSV file is not big, consider to load data into a datatable as

//CsvReader reader = null;
DataTable tblCSV = new DataTable("CSV");
...
reader = new CsvReader(new StreamReader(file.Value.FullName), false);
...
tblCSV.Load(reader); 

foreach(DataRow dr in table.Rows)  
{
   dr["Sector"] = dr["Sector"].ToString().Replace(" ID", ""); 
}

...
//sbc.WriteToServer(reader);
sbc.WriteToServer(tblCSV);

Note, it might be slow on large files and it might make sense to update the data after it loaded into the database, especially because you already have a code that runs after that. Add an update in the beginning of StoredprocedureName and that's it.

Example:

UPDATE Tablename SET Sector=Replace(Sector, ' ID', '')

If Tablename also includes other data that should not be updated again - either add a timestamp column that could help you to identify newly loaded data, or use another table, e.g. TablenameTemp, load data there, modify and move all to Tablename.

UPDATE

If StoredprocedureName is used for that single purpose only (not called from other code) then just add in the update query in the beginning of its code, for example

ALTER PROCEDURE StoredprocedureName
AS
BEGIN
UPDATE Tablename 
   SET Sector=Replace(Sector, ' ID', ''),
   [Product Property1]=Replace([Product Property1], 'PPTR1_', ''),
   [Product Property2]=Replace([Product Property2], 'PPTR2_', ''),
   ...

...rest of sql code

Note, Replace() will replace all occurencies, so make sure if it will work well or you would need more complex logic to get rid of specified strings. You can test sql directly in the database before you put it in to the stored procedure.