tf.keras官方文档建议我们为自定义层实现[init]、[build]和[call]方法。
- build(input_shape):定义权重的方法,可训练的权重应该在这里被加入列表self.trainable_weights中。
- call(x):这是定义层功能的方法
- get_output_shape_for(input_shape):如果定义层修改了输入数据的shape,应该在这里指定shape变化的方法
from keras import backend as K
from keras.engine.topology import Layer
class MyLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(shape=(input_shape[1], self.output_dim),
initializer='random_uniform',
trainable=True)
super(MyLayer, self).build() # be sure you call this somewhere!
def call(self, x, mask=None):
return K.dot(x, self.W)
def get_output_shape_for(self, input_shape):
return (input_shape[0] + self.output_dim)
例如在目标检测中的anchor层,如下:
class AnchorBoxes(Layer):
def __init__(self,
img_height,
img_width,
this_scale,
next_scale,
aspect_ratios=[0.5, 1.0, 2.0],
two_boxes_for_ar1=True,
limit_boxes=True,
variances=[1.0, 1.0, 1.0, 1.0],
coords='centroids',
normalize_coords=False,
**kwargs):
if K.backend() != 'tensorflow':
raise TypeError("This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend()))
if (this_scale < 0) or (next_scale < 0) or (this_scale > 1):
raise ValueError("`this_scale` must be in [0, 1] and `next_scale` must be >0, but `this_scale` == {}, `next_scale` == {}".format(this_scale, next_scale))
if len(variances) != 4:
raise ValueError("4 variance values must be pased, but {} values were received.".format(len(variances)))
variances = np.array(variances)
if np.any(variances <= 0):
raise ValueError("All variances must be >0, but the variances given are {}".format(variances))
self.img_height = img_height
self.img_width = img_width
self.this_scale = this_scale
self.next_scale = next_scale
self.aspect_ratios = aspect_ratios
self.two_boxes_for_ar1 = two_boxes_for_ar1
self.limit_boxes = limit_boxes
self.variances = variances
self.coords = coords
self.normalize_coords = normalize_coords
# Compute the number of boxes per cell
if (1 in aspect_ratios) & two_boxes_for_ar1:
self.n_boxes = len(aspect_ratios) + 1
else:
self.n_boxes = len(aspect_ratios)
super(AnchorBoxes, self).__init__(**kwargs)
def build(self, input_shape):
self.input_spec = [InputSpec(shape=input_shape)]
super(AnchorBoxes, self).build(input_shape)
def call(self, x, mask=None):
# Compute box width and height for each aspect ratio
# The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`.
self.aspect_ratios = np.sort(self.aspect_ratios)
size = min(self.img_height, self.img_width)
# Compute the box widths and and heights for all aspect ratios
wh_list = []
for ar in self.aspect_ratios:
if (ar == 1) & self.two_boxes_for_ar1:
# Compute the regular default box for aspect ratio 1 and...
w = self.this_scale * size * np.sqrt(ar)
h = self.this_scale * size / np.sqrt(ar)
wh_list.append((w,h))
# ...also compute one slightly larger version using the geometric mean of this scale value and the next
w = np.sqrt(self.this_scale * self.next_scale) * size * np.sqrt(ar)
h = np.sqrt(self.this_scale * self.next_scale) * size / np.sqrt(ar)
wh_list.append((w,h))
else:
w = self.this_scale * size * np.sqrt(ar)
h = self.this_scale * size / np.sqrt(ar)
wh_list.append((w,h))
wh_list = np.array(wh_list)
# We need the shape of the input tensor
if K.image_dim_ordering() == 'tf':
batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape
else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future
batch_size, feature_map_channels, feature_map_height, feature_map_width = x._keras_shape
# Compute the grid of box center points. They are identical for all aspect ratios
cell_height = self.img_height / feature_map_height
cell_width = self.img_width / feature_map_width
cx = np.linspace(cell_width/2, self.img_width-cell_width/2, feature_map_width)
cy = np.linspace(cell_height/2, self.img_height-cell_height/2, feature_map_height)
cx_grid, cy_grid = np.meshgrid(cx, cy)
cx_grid = np.expand_dims(cx_grid, -1) # This is necessary for np.tile() to do what we want further down
cy_grid = np.expand_dims(cy_grid, -1) # This is necessary for np.tile() to do what we want further down
# Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)`
# where the last dimension will contain `(cx, cy, w, h)`
boxes_tensor = np.zeros((feature_map_height, feature_map_width, self.n_boxes, 4))
boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # Set cx
boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # Set cy
boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w
boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h
# Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)`
boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2minmax')
# If `limit_boxes` is enabled, clip the coordinates to lie within the image boundaries
if self.limit_boxes:
x_coords = boxes_tensor[:,:,:,[0, 1]]
x_coords[x_coords >= self.img_width] = self.img_width - 1
x_coords[x_coords < 0] = 0
boxes_tensor[:,:,:,[0, 1]] = x_coords
y_coords = boxes_tensor[:,:,:,[2, 3]]
y_coords[y_coords >= self.img_height] = self.img_height - 1
y_coords[y_coords < 0] = 0
boxes_tensor[:,:,:,[2, 3]] = y_coords
# `normalize_coords` is enabled, normalize the coordinates to be within [0,1]
if self.normalize_coords:
boxes_tensor[:, :, :, :2] /= self.img_width
boxes_tensor[:, :, :, 2:] /= self.img_height
if self.coords == 'centroids':
# TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth
# Convert `(xmin, xmax, ymin, ymax)` back to `(cx, cy, w, h)`
boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='minmax2centroids')
# 4: Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape
# as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis.
variances_tensor = np.zeros_like(boxes_tensor) # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)`
variances_tensor += self.variances # Long live broadcasting
# Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)`
boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1)
# Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along
# The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)`
boxes_tensor = np.expand_dims(boxes_tensor, axis=0)
boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1))
return boxes_tensor
def compute_output_shape(self, input_shape):
if K.image_dim_ordering() == 'tf':
batch_size, feature_map_height, feature_map_width, feature_map_channels = input_shape
else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future
batch_size, feature_map_channels, feature_map_height, feature_map_width = input_shape
return (batch_size, feature_map_height, feature_map_width, self.n_boxes, 8)
ROI自定义层的实现:
class ROIPoolingLayer(Layer):
def __init__(self, pooled_height, pooled_width, **kwargs):
self.pooled_height = pooled_height
self.pooled_width = pooled_width
super(ROIPoolingLayer, self).__init__(**kwargs)
def compute_output_shape(self, input_shape):
""" Returns the shape of the ROI Layer output
"""
feature_map_shape, rois_shape = input_shape
assert feature_map_shape[0] == rois_shape[0]
batch_size = feature_map_shape[0]
n_rois = rois_shape[1]
n_channels = feature_map_shape[3]
return (batch_size, n_rois, self.pooled_height,
self.pooled_width, n_channels)
def call(self, x):
""" Maps the input tensor of the ROI layer to its output
"""
def curried_pool_rois(x):
return ROIPoolingLayer._pool_rois(x[0], x[1],
self.pooled_height,
self.pooled_width)
pooled_areas = tf.map_fn(curried_pool_rois, x, dtype=tf.float32)
return pooled_areas
@staticmethod
def _pool_roi(feature_map, roi, pooled_height, pooled_width):
""" Applies ROI Pooling to a single image and a single ROI
"""
# Compute the region of interest
feature_map_height = int(feature_map.shape[0])
feature_map_width = int(feature_map.shape[1])
h_start = tf.cast(feature_map_height * roi[0], 'int32')
w_start = tf.cast(feature_map_width * roi[1], 'int32')
h_end = tf.cast(feature_map_height * roi[2], 'int32')
w_end = tf.cast(feature_map_width * roi[3], 'int32')
region = feature_map[h_start:h_end, w_start:w_end, :]
# Divide the region into non overlapping areas
region_height = h_end - h_start
region_width = w_end - w_start
h_step = tf.cast(region_height / pooled_height, 'int32')
w_step = tf.cast(region_width / pooled_width , 'int32')
areas = [[(
i*h_step,
j*w_step,
(i+1)*h_step if i+1 < pooled_height else region_height,
(j+1)*w_step if j+1 < pooled_width else region_width
)
for j in range(pooled_width)]
for i in range(pooled_height)]
# Take the maximum of each area and stack the result
def pool_area(x):
return tf.math.reduce_max(region[x[0]:x[2],x[1]:x[3],:], axis=[0,1])
pooled_features = tf.stack([[pool_area(x) for x in row] for row in areas])
return pooled_features
@staticmethod
def _pool_rois(feature_map, rois, pooled_height, pooled_width):
""" Applies ROI pooling for a single image and varios ROIs
"""
def curried_pool_roi(roi):
return ROIPoolingLayer._pool_roi(feature_map, roi,
pooled_height, pooled_width)
pooled_areas = tf.map_fn(curried_pool_roi, rois, dtype=tf.float32)
return pooled_areas
网友评论