错误信息:
Error: tensorflow/stream_executor/cuda/cuda_dnn.cc:444] could not convert BatchDescriptor {count: 0 feature_map_count: 32 spatial: 149 149 value_min: 0.000000 value_max: 0.000000 layout: BatchDepthYX} to cudnn tensor descriptor: CUDNN_STATUS_BAD_PARAM
github上的解决方法
这个是由于keras源码的问题:
在keras.preprocessing.image.py中我们可以看到:
class Iterator(Sequence):
"""Base class for image data iterators. Every `Iterator` must implement the `_get_batches_of_transformed_samples` method. # Arguments n: Integer, total number of samples in the dataset to loop over. batch_size: Integer, size of a batch. shuffle: Boolean, whether to shuffle the data between epochs. seed: Random seeding for data shuffling. """
def __init__(self, n, batch_size, shuffle, seed):
self.n = n
self.batch_size = batch_size
self.seed = seed
self.shuffle = shuffle
self.batch_index = 0
self.total_batches_seen = 0
self.lock = threading.Lock()
self.index_array = None
self.index_generator = self._flow_index()
def _set_index_array(self):
self.index_array = np.arange(self.n)
if self.shuffle:
self.index_array = np.random.permutation(self.n)
def __getitem__(self, idx):
if idx >= len(self):
raise ValueError('Asked to retrieve element {idx}, '
'but the Sequence '
'has length {length}'.format(idx=idx,
length=len(self)))
if self.seed is not None:
np.random.seed(self.seed + self.total_batches_seen)
self.total_batches_seen += 1
if self.index_array is None:
self._set_index_array()
index_array = self.index_array[self.batch_size * idx:
self.batch_size * (idx + 1)]
return self._get_batches_of_transformed_samples(index_array)
def __len__(self):
return (self.n + self.batch_size - 1) // self.batch_size # round up
......
......
...
这里的len是向上取整的。
这导致了有些时候是没有数据的。
因此解决方法是:
- 保证数据量大小n与batchsize满足: n % batch_size > num_gpu
-
不改源码的话保证数据量永远能被可能的gpu数量整除:
possible_num_gpus = [1,2,3,4,8] lcm = lcmm(*possible_num_gpus) # lcm == 24 assert iterator.n % lcm == 0
- 或者更改源码:
def __init__(self, n, batch_size, shuffle, seed):
self.n = n
self.batch_size = batch_size
self.seed = seed
self.shuffle = shuffle
self.batch_index = 0
self.total_batches_seen = 0
self.lock = threading.Lock()
self.index_array = None
self.index_generator = self._flow_index()
self.excess_elements = self.n % self.batch_size
def _set_index_array(self):
self.index_array = np.arange(self.n)
if self.shuffle:
# self.index_array = np.random.permutation(self.n)
self.index_array = np.random.permutation(self.n)[:self.n - self.excess_elements]
else:
self.index_array = self.index_array[: self.n - self.excess_elements]
def __len__(self):
#return (self.n + self.batch_size - 1) // self.batch_size # round up
return (self.n) // self.batch_size # round down
def _flow_index(self):
# Ensure self.batch_index is 0.
self.reset()
while 1:
if self.seed is not None:
np.random.seed(self.seed + self.total_batches_seen)
if self.batch_index == 0:
self._set_index_array()
current_index = (self.batch_index * self.batch_size) % self.n
if self.n > current_index + self.batch_size + self.excess_elements:
self.batch_index += 1
else:
self.batch_index = 0
self.total_batches_seen += 1
yield self.index_array[current_index:
current_index + self.batch_size]