Tag Archives: TFrecords Create Datas Error

[Solved] TFrecords Create Datas Error: Number of int64 values != expected. Values size: 1 but output shape: [3]

1. For fixed length label and feature

Generate tfrecord data:

Multiple label samples, where the label contains 5

import os
import tensorflow as tf
import numpy as np
output_flie = str(os.path.dirname(os.getcwd()))+"/train.tfrecords"
with tf.python_io.TFRecordWriter(output_flie) as writer:
    labels = np.array([[1,0,0,1,0],[0,1,0,0,1],[0,0,0,0,1],[1,0,0,0,0]])
    features = np.array([[0,0,0,0,0,0],[1,1,1,1,1,2],[1,1,1,0,0,2],[0,0,0,0,1,9]])
    for i in range(4):
        label = labels[i]
        feature = features[i]
        example = tf.train.Example(features=tf.train.Features(feature={
            "label": tf.train.Feature(int64_list=tf.train.Int64List(value=label)),
            'feature': tf.train.Feature(int64_list=tf.train.Int64List(value=feature))
        }))
        writer.write(example.SerializeToString())

Parse tfrecord data:

import os
import tensorflow as tf
import numpy as np


def read_tf(output_flie):
    filename_queue = tf.train.string_input_producer([output_flie])
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    result = tf.parse_single_example(serialized_example,
                                     features={
                                         'label': tf.FixedLenFeature([5], tf.int64),
                                         'feature': tf.FixedLenFeature([6], tf.int64),
                                     })
    feature = result['feature']
    label = result['label']
    return feature, label


output_flie = str(os.path.dirname(os.getcwd())) + "/train.tfrecords"
feature, label = read_tf(output_flie)
imageBatch, labelBatch = tf.train.batch([feature, label], batch_size=2, capacity=3)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    print(1)
    images, labels = sess.run([imageBatch, labelBatch])
    print(images)
    print(labels)
    coord.request_stop()
    coord.join(threads)

Output:

1
('----images: ', array([[0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 2]]))
('----labels:', array([[1, 0, 0, 1, 0],
       [0, 1, 0, 0, 1]]))

2. For variable length label and feature

Generate tfrecord

It is the same as the fixed length data generation method

import os
import tensorflow as tf
import numpy as np
train_TFfile = str(os.path.dirname(os.getcwd()))+"/hh.tfrecords"
writer = tf.python_io.TFRecordWriter(train_TFfile)
labels = [[1,2,3],[3,4],[5,2,6],[6,4,9],[9]]
features = [[2,5],[3],[5,8],[1,4],[5,9]]
for i in range(5):
    label = labels[i]
    print(label)
    feature = features[i]
    example = tf.train.Example(
        features=tf.train.Features(
            feature={'label': tf.train.Feature(int64_list=tf.train.Int64List(value=label)),
                       'feature': tf.train.Feature(int64_list=tf.train.Int64List(value=feature))}))
    writer.write(example.SerializeToString())
writer.close()

Parsing tfrecord

The main changes are:

tf.VarLenFeature(tf.int64)

Unfinished to be continued

Common errors:

When the defined label dimension is different from the dimension during parsing, an error will be reported as follows:

Details of error reporting:

tensorflow.python.framework.errors_impl.InvalidArgumentError: Name: <unknown>, Key: label, Index: 0.  Number of int64 values != expected.  Values size: 1 but output shape: [3]

The size of the label is 1, but when used, it exceeds 1

Solution: when generating tfrecord, the length of label should be the same as that during parsing.