ParserError: NULL byte detected. This byte cannot be processed in Python’s native csv library at the moment, so please pass in engine=’c’ instead
Error:
file_name = os.listdir(base_dir)[0]
col_list = [feature list]
col = col_list
#encoding
#data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding="GBK",usecols=range(len(col)))
data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding = 'unicode_escape', engine ='python')
#data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding = 'utf-8', engine ='python')
path = "D:\\test\\repo\\data.csv"
Solution:
engine =’c’
file_name = os.listdir(base_dir)[0]
#encoding
#data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding="GBK",usecols=range(len(col)))
data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding = 'unicode_escape', engine ='c')
#data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding = 'utf-8', engine ='python')
path = "D:\\test\\repo\\data.csv"
Full Error Messages:
—————————————————————————
Error Traceback (most recent call last) D:\anaconda\lib\site-packages\pandas\io\parsers.py in _next_iter_line(self, row_num) 2967 assert self.data is not None -> 2968 return next(self.data) 2969 except csv.Error as e: Error: line contains NULL byte During handling of the above exception, another exception occurred: ParserError Traceback (most recent call last) <ipython-input-12-c5d0c651c50e> in <module> 85 ] 86 ---> 87 data = inference_process(data_dir) 88 #print(data.head()) 89 f=open("break_model1.pkl",'rb') <ipython-input-12-c5d0c651c50e> in inference_process(base_dir) 18 #encoding 19 # data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding="GBK",usecols=range(len(col))) ---> 20 data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding = 'unicode_escape', engine ='python') 21 # data = pd.read_csv("D:\\test\\repo\\data.csv",sep = ',',encoding = 'utf-8', engine ='python') 22 D:\anaconda\lib\site-packages\pandas\io\parsers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options) 608 kwds.update(kwds_defaults) 609 --> 610 return _read(filepath_or_buffer, kwds) 611 612 D:\anaconda\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds) 460 461 # Create the parser. --> 462 parser = TextFileReader(filepath_or_buffer, **kwds) 463 464 if chunksize or iterator: D:\anaconda\lib\site-packages\pandas\io\parsers.py in __init__(self, f, engine, **kwds) 817 self.options["has_index_names"] = kwds["has_index_names"] 818 --> 819 self._engine = self._make_engine(self.engine) 820 821 def close(self): D:\anaconda\lib\site-packages\pandas\io\parsers.py in _make_engine(self, engine) 1048 ) 1049 # error: Too many arguments for "ParserBase" -> 1050 return mapping[engine](self.f, **self.options) # type: ignore[call-arg] 1051 1052 def _failover_to_python(self): D:\anaconda\lib\site-packages\pandas\io\parsers.py in __init__(self, f, **kwds) 2308 self.num_original_columns, 2309 self.unnamed_cols, -> 2310 ) = self._infer_columns() 2311 except (TypeError, ValueError): 2312 self.close() D:\anaconda\lib\site-packages\pandas\io\parsers.py in _infer_columns(self) 2615 for level, hr in enumerate(header): 2616 try: -> 2617 line = self._buffered_line() 2618 2619 while self.line_pos <= hr: D:\anaconda\lib\site-packages\pandas\io\parsers.py in _buffered_line(self) 2809 return self.buf[0] 2810 else: -> 2811 return self._next_line() 2812 2813 def _check_for_bom(self, first_row): D:\anaconda\lib\site-packages\pandas\io\parsers.py in _next_line(self) 2906 2907 while True: -> 2908 orig_line = self._next_iter_line(row_num=self.pos + 1) 2909 self.pos += 1 2910 D:\anaconda\lib\site-packages\pandas\io\parsers.py in _next_iter_line(self, row_num) 2989 msg += ". " + reason 2990 -> 2991 self._alert_malformed(msg, row_num) 2992 return None 2993 D:\anaconda\lib\site-packages\pandas\io\parsers.py in _alert_malformed(self, msg, row_num) 2946 """ 2947 if self.error_bad_lines: -> 2948 raise ParserError(msg) 2949 elif self.warn_bad_lines: 2950 base = f"Skipping line {row_num}: " ParserError: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instea