As a beginner, I am trying to import data with the name of (Facebook_Ads_2.csv) using pandas on jupyter notebook; the output must be as shown below.
but when I import them using the following lines of Python code:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
T = pd.read_csv('Facebook_Ads_2.csv')
I get the following error:
UnicodeDecodeError Traceback (most recent call last)
Cell In[4], line 1
----> 1 T = pd.read_csv('Facebook_Ads_2.csv')
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\io\parsers\readers.py:912, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
899 kwds_defaults = _refine_defaults_read(
900 dialect,
901 delimiter,
(...)
908 dtype_backend=dtype_backend,
909 )
910 kwds.update(kwds_defaults)
--> 912 return _read(filepath_or_buffer, kwds)
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\io\parsers\readers.py:577, in _read(filepath_or_buffer, kwds)
574 _validate_names(kwds.get("names", None))
576 # Create the parser.
--> 577 parser = TextFileReader(filepath_or_buffer, **kwds)
579 if chunksize or iterator:
580 return parser
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\io\parsers\readers.py:1407, in TextFileReader.__init__(self, f, engine, **kwds)
1404 self.options["has_index_names"] = kwds["has_index_names"]
1406 self.handles: IOHandles | None = None
-> 1407 self._engine = self._make_engine(f, self.engine)
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\io\parsers\readers.py:1679, in TextFileReader._make_engine(self, f, engine)
1676 raise ValueError(msg)
1678 try:
-> 1679 return mapping[engine](f, **self.options)
1680 except Exception:
1681 if self.handles is not None:
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\io\parsers\c_parser_wrapper.py:93, in CParserWrapper.__init__(self, src, **kwds)
90 if kwds["dtype_backend"] == "pyarrow":
91 # Fail here loudly instead of in cython after reading
92 import_optional_dependency("pyarrow")
---> 93 self._reader = parsers.TextReader(src, **kwds)
95 self.unnamed_cols = self._reader.unnamed_cols
97 # error: Cannot determine type of 'names'
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\_libs\parsers.pyx:548, in pandas._libs.parsers.TextReader.__cinit__()
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\_libs\parsers.pyx:637, in pandas._libs.parsers.TextReader._get_header()
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\_libs\parsers.pyx:848, in pandas._libs.parsers.TextReader._tokenize_rows()
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\_libs\parsers.pyx:859, in pandas._libs.parsers.TextReader._check_tokenize_status()
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\_libs\parsers.pyx:2017, in pandas._libs.parsers.raise_parser_error()
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xc5 in position 4001: invalid continuation byte
Any assistance, please?
I assume this can happen because of unexpected symbols in one of the rows.
From the pandas version 1.3.0, there's a handler for these types of errors.
You can try calling
encoding_errors
parameter inread_csv()
:See what does it return, or you can try another type of handling encoding errors
If your pandas version is older than 1.3.0, let me know and we can try to come up with different solution to your problem