[Solved] ParserError: NULL byte detected. This byte cannot be processed in Python‘s native csv library

ParserError: NULL byte detected. This byte cannot be processed in Python’s native csv library at the moment, so please pass in engine=’c’ instead



Error:

file_name = os.listdir(base_dir)[0]

col_list = [feature list]
col = col_list
#encoding
#data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding="GBK",usecols=range(len(col)))
    
data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding = 'unicode_escape', engine ='python')


#data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding = 'utf-8', engine ='python')

path = "D:\\test\\repo\\data.csv"

Solution:

engine =’c’

file_name = os.listdir(base_dir)[0]

#encoding
#data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding="GBK",usecols=range(len(col)))
    
data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding = 'unicode_escape', engine ='c')


#data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding = 'utf-8', engine ='python')

path = "D:\\test\\repo\\data.csv"

Full Error Messages:
—————————————————————————

Error                                     Traceback (most recent call last)
D:\anaconda\lib\site-packages\pandas\io\parsers.py in _next_iter_line(self, row_num)
2967             assert self.data is not None
-> 2968             return next(self.data)
2969         except csv.Error as e:
Error: line contains NULL byte
During handling of the above exception, another exception occurred:
ParserError                               Traceback (most recent call last)
<ipython-input-12-c5d0c651c50e> in <module>
85                    ]
86
---> 87     data = inference_process(data_dir)
88     #print(data.head())
89     f=open("break_model1.pkl",'rb')
<ipython-input-12-c5d0c651c50e> in inference_process(base_dir)
18     #encoding
19 #     data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding="GBK",usecols=range(len(col)))
---> 20     data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding = 'unicode_escape', engine ='python')
21 #     data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding = 'utf-8', engine ='python')
22
D:\anaconda\lib\site-packages\pandas\io\parsers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
608     kwds.update(kwds_defaults)
609
--> 610     return _read(filepath_or_buffer, kwds)
611
612
D:\anaconda\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
460
461     # Create the parser.
--> 462     parser = TextFileReader(filepath_or_buffer, **kwds)
463
464     if chunksize or iterator:
D:\anaconda\lib\site-packages\pandas\io\parsers.py in __init__(self, f, engine, **kwds)
817             self.options["has_index_names"] = kwds["has_index_names"]
818
--> 819         self._engine = self._make_engine(self.engine)
820
821     def close(self):
D:\anaconda\lib\site-packages\pandas\io\parsers.py in _make_engine(self, engine)
1048             )
1049         # error: Too many arguments for "ParserBase"
-> 1050         return mapping[engine](self.f, **self.options)  # type: ignore[call-arg]
1051
1052     def _failover_to_python(self):
D:\anaconda\lib\site-packages\pandas\io\parsers.py in __init__(self, f, **kwds)
2308                 self.num_original_columns,
2309                 self.unnamed_cols,
-> 2310             ) = self._infer_columns()
2311         except (TypeError, ValueError):
2312             self.close()
D:\anaconda\lib\site-packages\pandas\io\parsers.py in _infer_columns(self)
2615             for level, hr in enumerate(header):
2616                 try:
-> 2617                     line = self._buffered_line()
2618
2619                     while self.line_pos <= hr:
D:\anaconda\lib\site-packages\pandas\io\parsers.py in _buffered_line(self)
2809             return self.buf[0]
2810         else:
-> 2811             return self._next_line()
2812
2813     def _check_for_bom(self, first_row):
D:\anaconda\lib\site-packages\pandas\io\parsers.py in _next_line(self)
2906
2907             while True:
-> 2908                 orig_line = self._next_iter_line(row_num=self.pos + 1)
2909                 self.pos += 1
2910
D:\anaconda\lib\site-packages\pandas\io\parsers.py in _next_iter_line(self, row_num)
2989                     msg += ". " + reason
2990
-> 2991                 self._alert_malformed(msg, row_num)
2992             return None
2993
D:\anaconda\lib\site-packages\pandas\io\parsers.py in _alert_malformed(self, msg, row_num)
2946         """
2947         if self.error_bad_lines:
-> 2948             raise ParserError(msg)
2949         elif self.warn_bad_lines:
2950             base = f"Skipping line {row_num}: "
ParserError: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instea

Read More: