Tag Archives: #DL data preprocessing

Image input data check: Nan inf and do not exist

import os
import cv2
import numpy as np
from tqdm import tqdm

# read txt file content to list
def ReadTxt_to_list(rootdir):
    lines = []
    with open(rootdir, 'r') as file_to_read:
        while True:
            line = file_to_read.readline()
            if not line:
                break
            line = line.rstrip('\n')
            lines.append(line)
    return lines

def check_exist_nan(lstFile):
    rt = os.path.split(lstFile)[0]
    notExist = open(os.path.join(rt,'Lst_notExist.txt'),'w')
    bad = open(os.path.join(rt,'Lst_bad.txt'),'w')

    lines = ReadTxt_to_list(lstFile)
    notNum = 0
    badNum = 0
    newLines = []
    for line in tqdm(lines):
        info_ = line.split('\t')
        assert len(info_) == 3
        _, filePth, idNum = info_
        if not os.path.exists(filePth):
            print('Not exist:', line)
            notExist.write(line+'\n')
            notNum += 1
        else:
            img = cv2.imread(filePth)
            try:
                if np.any(np.isnan(img)) or not np.all(np.isfinite(img)):
                    print('Nan/Inf:', line)
                    badNum += 1
                    bad.write(line + '\n')
                else:
                    newLines.append(line)
            except:
                print('Error:', line)
                badNum += 1
                bad.write(line + '\terror\n')

    print('Not exist', notNum)
    print('Bad', badNum)
    if len(lines) != len(newLines):
        newLst = open(os.path.join(rt,'Lst_new.txt'), 'w')
        for line in newLines:
            newLst.write(line+'\n')
        newLst.close()

    notExist.close()
    bad.close()

if __name__ == '__main__':
    imgLst = '/home/img.lst'
    check_exist_nan(imgLst)