I am reading a csv using lumenwork csv reader in C# and putting the content of csv file in respective database table columns. The code is below and it works fine
private void ButtonClick(DeliverData context)
{
// Set culture for decimal cast
CultureInfo currentCulture = CultureInfo.DefaultThreadCurrentCulture;
CultureInfo usCulture = new CultureInfo("en-US");
CultureInfo.DefaultThreadCurrentCulture = usCulture;
DateTime start = DateTime.Now;
Dictionary<Cycle, FileInfo> files = this.GetFilesFromFolder(context, this.FPath);
// Initiate sql connection
SqlConnection sqlConnection = new SqlConnection(context.Database.Connection.ConnectionString);
this.sqlElementsNumber = true;
try
{
CsvReader reader = null;
sqlConnection.Open();
// Create temp table
SqlCommand createTempTable = new SqlCommand();
createTempTable.CommandText = TempTables.DataTempTable;
createTempTable.Connection = sqlConnection;
createTempTable.ExecuteNonQuery();
this.DeleteOldData(context, files);
foreach (KeyValuePair<Cycle, FileInfo> file in files)
{
// Sql Bulk Copy Code
reader = new CsvReader(new StreamReader(file.Value.FullName), false);
this.currentFilePath = file.Value.FullName;
this.bulkCopyElements = reader.ToArray().Length;
reader.Dispose();
reader = new CsvReader(new StreamReader(file.Value.FullName), false);
reader.Columns = new List<Column>
{
new Column { Name = "Sector", Type = typeof(string) },
new Column { Name = "Sub Sector", Type = typeof(string) },
new Column { Name = "Category", Type = typeof(string) },
new Column { Name = "Brand", Type = typeof(string) },
new Column { Name = "Property1", Type = typeof(string) },
new Column { Name = "Property2", Type = typeof(string) },
new Column { Name = "Property3", Type = typeof(string) },
new Column { Name = "Property4", Type = typeof(string) },
new Column { Name = "Property5", Type = typeof(string) }
};
reader.MoveTo(0);
reader.UseColumnDefaults = true;
using (var sbc = new SqlBulkCopy(sqlConnection))
{
sbc.DestinationTableName = "Tablename";
sbc.BatchSize = 1000;
sbc.EnableStreaming = true;
sbc.NotifyAfter = 100;
sbc.SqlRowsCopied += new SqlRowsCopiedEventHandler(this.HandleBulkMessage);
sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Sector", "Sector"));
sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Sub Sector", "SubSector"));
sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Category", "Category"));
sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property1", "Property1"));
sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property2", "Property2"));
sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property3", "Property3"));
sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property4", "Property4"));
sbc.ColumnMappings.Add(new SqlBulkCopyColumnMapping("Property5", "Property5"));
sbc.WriteToServer(reader);
}
reader.Dispose();
}
// start stored procedure
SqlCommand cmd = new SqlCommand();
SqlInfoMessageEventHandler handler = new SqlInfoMessageEventHandler(this.HandleSqlMessage);
sqlConnection.FireInfoMessageEventOnUserErrors = true;
sqlConnection.InfoMessage += handler;
cmd.CommandText = "StoredprocedureName";
cmd.CommandType = CommandType.StoredProcedure;
cmd.CommandTimeout = 43200;
cmd.Connection = sqlConnection;
try
{
cmd.ExecuteNonQuery();
}
catch (SqlException e)
{
this.AddErrorMessage(e.Message, e);
}
sqlConnection.FireInfoMessageEventOnUserErrors = false;
sqlConnection.InfoMessage -= handler;
sqlConnection.Close();
// reset culture
CultureInfo.DefaultThreadCurrentCulture = currentCulture;
Logger.Write(new CustomLogEntry(this.ErrorTitle, CustomLogEntry.LogPriority.High, "Time for File insert: " + DateTime.Now.Subtract(start).ToString(), this.ForecastPath));
}
catch (Exception e)
{
sqlConnection.Close();
this.AddErrorMessage(e.Message, e);
}
}
CSV file looks like this
Now I want to filter data of csv before entering the content in the database. For example I need to remove ‘ID’ keyword from all the columns and remove ‘PPTR*_’ from all the attribute columns. Wherever there is no data need to write 'nodata'
My question is what are the different ways to achieve this and How?
If you want to update the data before entering it into the database and CSV file is not big, consider to load data into a datatable as
Note, it might be slow on large files and it might make sense to update the data after it loaded into the database, especially because you already have a code that runs after that. Add an update in the beginning of
StoredprocedureName
and that's it.Example:
If Tablename also includes other data that should not be updated again - either add a timestamp column that could help you to identify newly loaded data, or use another table, e.g. TablenameTemp, load data there, modify and move all to Tablename.
UPDATE
If
StoredprocedureName
is used for that single purpose only (not called from other code) then just add in the update query in the beginning of its code, for exampleNote, Replace() will replace all occurencies, so make sure if it will work well or you would need more complex logic to get rid of specified strings. You can test sql directly in the database before you put it in to the stored procedure.