一般来说,可以尝试一些东西,像这样:
import tensorflow as tf
import numpy as np
dataset1 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 300, 300, 3)))
dataset2 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 224, 224, 3)))
dataset3 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 400, 400, 3)))
dataset = dataset1.concatenate(dataset2.concatenate(dataset3))
dataset = dataset.shuffle(1).repeat().batch(32, drop_remainder=True)
def resize_data(images):
tf.print('Original shape -->', tf.shape(images))
SIZE = (180, 180)
return tf.image.resize(images, SIZE)
dataset = dataset.map(resize_data)
for images in dataset.take(3):
tf.print('New shape -->', tf.shape(images))
Original shape --> [32 300 300 3]
New shape --> [32 180 180 3]
Original shape --> [32 224 224 3]
New shape --> [32 180 180 3]
Original shape --> [32 400 400 3]
New shape --> [32 180 180 3]
你也可以使用 tf.image.resize_with_crop_or_pad
如果你想要的:
def resize_data(images):
tf.print('Original shape -->', tf.shape(images))
SIZE = (180, 180)
return tf.image.resize_with_crop_or_pad(images, SIZE[0], SIZE[1])
dataset = dataset.map(resize_data)
for images in dataset.take(3):
tf.print('New shape -->', tf.shape(images))
注意使用 repeat()
将创建一个无限的数据集。
1更新
如果你想要一个随机的尺寸,对每批的,试试这样的事情:
import tensorflow as tf
import numpy as np
dataset1 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 300, 300, 3)))
dataset2 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 224, 224, 3)))
dataset3 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 400, 400, 3)))
dataset = dataset1.concatenate(dataset2.concatenate(dataset3))
dataset = dataset.batch(32, drop_remainder=True).shuffle(96)
def resize_data(images):
batch_size = tf.shape(images)[0]
images_resized = tf.TensorArray(dtype=tf.float32, size = 0, dynamic_size=True)
SIZE = tf.random.uniform((2,), minval=300, maxval=500, dtype=tf.int32)
for i in range(batch_size):
images_resized = images_resized.write(images_resized.size(), tf.image.resize(images[i], SIZE))
return images_resized.stack()
dataset = dataset.map(resize_data)
for images in dataset:
tf.print('New shape -->', tf.shape(images))
New shape --> [32 392 385 3]
New shape --> [32 468 459 3]
New shape --> [32 466 461 3]
更新2
一个非常灵活的选择,适用于任何批的大小看起来是这样的:
import tensorflow as tf
import numpy as np
dataset1 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 300, 300, 3)))
dataset2 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 224, 224, 3)))
dataset3 = tf.data.Dataset.from_tensor_slices(np.random.random((32, 400, 400, 3)))
dataset = dataset1.concatenate(dataset2.concatenate(dataset3))
def resize_and_batch(dataset, batch_size):
final_dataset = None
duration = len(dataset)//batch_size
random_sizes = [tf.random.uniform((2,), minval=300, maxval=500, dtype=tf.int32) for _ in range(duration)]
for i, size in zip(range(duration), random_sizes):
idx = i * batch_size
if i == 0:
final_dataset = tf.data.Dataset.from_tensor_slices([tf.image.resize(x, size) for x in dataset.take(batch_size)])
else:
final_dataset = final_dataset.concatenate(tf.data.Dataset.from_tensor_slices([tf.image.resize(x, size) for x in dataset.skip(idx).take(batch_size)]))
return final_dataset
batch_size = 10
ds = resize_and_batch(dataset, batch_size)
ds = ds.batch(batch_size).shuffle(len(ds))
for images in ds:
tf.print('New shape -->', images.shape)
New shape --> TensorShape([10, 399, 348, 3])
New shape --> TensorShape([10, 356, 329, 3])
New shape --> TensorShape([10, 473, 373, 3])
New shape --> TensorShape([10, 489, 489, 3])
New shape --> TensorShape([10, 421, 335, 3])
New shape --> TensorShape([10, 447, 455, 3])
New shape --> TensorShape([10, 355, 382, 3])
New shape --> TensorShape([10, 310, 396, 3])
New shape --> TensorShape([10, 345, 356, 3])
INVALID_ARGUMENT: Cannot add tensor to the batch: number of elements does not match. Shapes are: [tensor]: [640,426,3], [batch]: [480,640,3]
即使我给大小=(300,300)在tf。图像。调整大小(图像,大小),批具有大小=(480,640). 和以下的图像具有不同大小=(640,426),它未能将它添加到本批处理。 这意味着以某种方式不能适用。地图()function在每个批次。 任何帮助/想法?