Spaces:

awacke1
/

Webcam-Object-Recognition-Yolo-n-Coco

Runtime error

App Files Files Community

Webcam-Object-Recognition-Yolo-n-Coco / custom_layers.py

awacke1

Upload 11 files

0e371d1 almost 3 years ago

raw

history blame contribute delete

11 kB

	import tensorflow as tf
	from tensorflow.keras import layers, initializers, models


	def conv(x, filters, kernel_size, downsampling=False, activation='leaky', batch_norm=True):
	def mish(x):
	return x * tf.math.tanh(tf.math.softplus(x))

	if downsampling:
	x = layers.ZeroPadding2D(padding=((1, 0), (1, 0)))(x) # top & left padding
	padding = 'valid'
	strides = 2
	else:
	padding = 'same'
	strides = 1
	x = layers.Conv2D(filters,
	kernel_size,
	strides=strides,
	padding=padding,
	use_bias=not batch_norm,
	# kernel_regularizer=regularizers.l2(0.0005),
	kernel_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01),
	# bias_initializer=initializers.Zeros()
	)(x)
	if batch_norm:
	x = layers.BatchNormalization()(x)
	if activation == 'mish':
	x = mish(x)
	elif activation == 'leaky':
	x = layers.LeakyReLU(alpha=0.1)(x)
	return x


	def residual_block(x, filters1, filters2, activation='leaky'):
	"""
	:param x: input tensor
	:param filters1: num of filter for 1x1 conv
	:param filters2: num of filter for 3x3 conv
	:param activation: default activation function: leaky relu
	:return:
	"""
	y = conv(x, filters1, kernel_size=1, activation=activation)
	y = conv(y, filters2, kernel_size=3, activation=activation)
	return layers.Add()([x, y])


	def csp_block(x, residual_out, repeat, residual_bottleneck=False):
	"""
	Cross Stage Partial Network (CSPNet)
	transition_bottleneck_dims: 1x1 bottleneck
	output_dims: 3x3
	:param x:
	:param residual_out:
	:param repeat:
	:param residual_bottleneck:
	:return:
	"""
	route = x
	route = conv(route, residual_out, 1, activation="mish")
	x = conv(x, residual_out, 1, activation="mish")
	for i in range(repeat):
	x = residual_block(x,
	residual_out // 2 if residual_bottleneck else residual_out,
	residual_out,
	activation="mish")
	x = conv(x, residual_out, 1, activation="mish")

	x = layers.Concatenate()([x, route])
	return x


	def darknet53(x):
	x = conv(x, 32, 3)
	x = conv(x, 64, 3, downsampling=True)

	for i in range(1):
	x = residual_block(x, 32, 64)
	x = conv(x, 128, 3, downsampling=True)

	for i in range(2):
	x = residual_block(x, 64, 128)
	x = conv(x, 256, 3, downsampling=True)

	for i in range(8):
	x = residual_block(x, 128, 256)
	route_1 = x
	x = conv(x, 512, 3, downsampling=True)

	for i in range(8):
	x = residual_block(x, 256, 512)
	route_2 = x
	x = conv(x, 1024, 3, downsampling=True)

	for i in range(4):
	x = residual_block(x, 512, 1024)

	return route_1, route_2, x


	def cspdarknet53(input):
	x = conv(input, 32, 3)
	x = conv(x, 64, 3, downsampling=True)

	x = csp_block(x, residual_out=64, repeat=1, residual_bottleneck=True)
	x = conv(x, 64, 1, activation='mish')
	x = conv(x, 128, 3, activation='mish', downsampling=True)

	x = csp_block(x, residual_out=64, repeat=2)
	x = conv(x, 128, 1, activation='mish')
	x = conv(x, 256, 3, activation='mish', downsampling=True)

	x = csp_block(x, residual_out=128, repeat=8)
	x = conv(x, 256, 1, activation='mish')
	route0 = x
	x = conv(x, 512, 3, activation='mish', downsampling=True)

	x = csp_block(x, residual_out=256, repeat=8)
	x = conv(x, 512, 1, activation='mish')
	route1 = x
	x = conv(x, 1024, 3, activation='mish', downsampling=True)

	x = csp_block(x, residual_out=512, repeat=4)

	x = conv(x, 1024, 1, activation="mish")

	x = conv(x, 512, 1)
	x = conv(x, 1024, 3)
	x = conv(x, 512, 1)

	x = layers.Concatenate()([layers.MaxPooling2D(pool_size=13, strides=1, padding='same')(x),
	layers.MaxPooling2D(pool_size=9, strides=1, padding='same')(x),
	layers.MaxPooling2D(pool_size=5, strides=1, padding='same')(x),
	x
	])
	x = conv(x, 512, 1)
	x = conv(x, 1024, 3)
	route2 = conv(x, 512, 1)
	return models.Model(input, [route0, route1, route2])


	def yolov4_neck(x, num_classes):
	backbone_model = cspdarknet53(x)
	route0, route1, route2 = backbone_model.output

	route_input = route2
	x = conv(route2, 256, 1)
	x = layers.UpSampling2D()(x)
	route1 = conv(route1, 256, 1)
	x = layers.Concatenate()([route1, x])

	x = conv(x, 256, 1)
	x = conv(x, 512, 3)
	x = conv(x, 256, 1)
	x = conv(x, 512, 3)
	x = conv(x, 256, 1)

	route1 = x
	x = conv(x, 128, 1)
	x = layers.UpSampling2D()(x)
	route0 = conv(route0, 128, 1)
	x = layers.Concatenate()([route0, x])

	x = conv(x, 128, 1)
	x = conv(x, 256, 3)
	x = conv(x, 128, 1)
	x = conv(x, 256, 3)
	x = conv(x, 128, 1)

	route0 = x
	x = conv(x, 256, 3)
	conv_sbbox = conv(x, 3 * (num_classes + 5), 1, activation=None, batch_norm=False)

	x = conv(route0, 256, 3, downsampling=True)
	x = layers.Concatenate()([x, route1])

	x = conv(x, 256, 1)
	x = conv(x, 512, 3)
	x = conv(x, 256, 1)
	x = conv(x, 512, 3)
	x = conv(x, 256, 1)

	route1 = x
	x = conv(x, 512, 3)
	conv_mbbox = conv(x, 3 * (num_classes + 5), 1, activation=None, batch_norm=False)

	x = conv(route1, 512, 3, downsampling=True)
	x = layers.Concatenate()([x, route_input])

	x = conv(x, 512, 1)
	x = conv(x, 1024, 3)
	x = conv(x, 512, 1)
	x = conv(x, 1024, 3)
	x = conv(x, 512, 1)

	x = conv(x, 1024, 3)
	conv_lbbox = conv(x, 3 * (num_classes + 5), 1, activation=None, batch_norm=False)

	return [conv_sbbox, conv_mbbox, conv_lbbox]


	def yolov4_head(yolo_neck_outputs, classes, anchors, xyscale):
	bbox0, object_probability0, class_probabilities0, pred_box0 = get_boxes(yolo_neck_outputs[0],
	anchors=anchors[0, :, :], classes=classes,
	grid_size=52, strides=8,
	xyscale=xyscale[0])
	bbox1, object_probability1, class_probabilities1, pred_box1 = get_boxes(yolo_neck_outputs[1],
	anchors=anchors[1, :, :], classes=classes,
	grid_size=26, strides=16,
	xyscale=xyscale[1])
	bbox2, object_probability2, class_probabilities2, pred_box2 = get_boxes(yolo_neck_outputs[2],
	anchors=anchors[2, :, :], classes=classes,
	grid_size=13, strides=32,
	xyscale=xyscale[2])
	x = [bbox0, object_probability0, class_probabilities0, pred_box0,
	bbox1, object_probability1, class_probabilities1, pred_box1,
	bbox2, object_probability2, class_probabilities2, pred_box2]

	return x


	def get_boxes(pred, anchors, classes, grid_size, strides, xyscale):
	"""

	:param pred:
	:param anchors:
	:param classes:
	:param grid_size:
	:param strides:
	:param xyscale:
	:return:
	"""
	pred = tf.reshape(pred,
	(tf.shape(pred)[0],
	grid_size,
	grid_size,
	3,
	5 + classes)) # (batch_size, grid_size, grid_size, 3, 5+classes)
	box_xy, box_wh, obj_prob, class_prob = tf.split(
	pred, (2, 2, 1, classes), axis=-1
	) # (?, 52, 52, 3, 2) (?, 52, 52, 3, 2) (?, 52, 52, 3, 1) (?, 52, 52, 3, 80)

	box_xy = tf.sigmoid(box_xy) # (?, 52, 52, 3, 2)
	obj_prob = tf.sigmoid(obj_prob) # (?, 52, 52, 3, 1)
	class_prob = tf.sigmoid(class_prob) # (?, 52, 52, 3, 80)
	pred_box_xywh = tf.concat((box_xy, box_wh), axis=-1) # (?, 52, 52, 3, 4)

	grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size)) # (52, 52) (52, 52)
	grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) # (52, 52, 1, 2)
	grid = tf.cast(grid, dtype=tf.float32)

	box_xy = ((box_xy * xyscale) - 0.5 * (xyscale - 1) + grid) * strides # (?, 52, 52, 1, 4)

	box_wh = tf.exp(box_wh) * anchors # (?, 52, 52, 3, 2)
	box_x1y1 = box_xy - box_wh / 2 # (?, 52, 52, 3, 2)
	box_x2y2 = box_xy + box_wh / 2 # (?, 52, 52, 3, 2)
	pred_box_x1y1x2y2 = tf.concat([box_x1y1, box_x2y2], axis=-1) # (?, 52, 52, 3, 4)
	return pred_box_x1y1x2y2, obj_prob, class_prob, pred_box_xywh
	# pred_box_x1y1x2y2: absolute xy value


	def nms(model_ouputs, input_shape, num_class, iou_threshold=0.413, score_threshold=0.3):
	"""
	Apply Non-Maximum suppression
	ref: https://www.tensorflow.org/api_docs/python/tf/image/combined_non_max_suppression
	:param model_ouputs: yolo model model_ouputs
	:param input_shape: size of input image
	:return: nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
	"""
	bs = tf.shape(model_ouputs[0])[0]
	boxes = tf.zeros((bs, 0, 4))
	confidence = tf.zeros((bs, 0, 1))
	class_probabilities = tf.zeros((bs, 0, num_class))

	for output_idx in range(0, len(model_ouputs), 4):
	output_xy = model_ouputs[output_idx]
	output_conf = model_ouputs[output_idx + 1]
	output_classes = model_ouputs[output_idx + 2]
	boxes = tf.concat([boxes, tf.reshape(output_xy, (bs, -1, 4))], axis=1)
	confidence = tf.concat([confidence, tf.reshape(output_conf, (bs, -1, 1))], axis=1)
	class_probabilities = tf.concat([class_probabilities, tf.reshape(output_classes, (bs, -1, num_class))], axis=1)

	scores = confidence * class_probabilities
	boxes = tf.expand_dims(boxes, axis=-2)
	boxes = boxes / input_shape[0] # box normalization: relative img size
	print(f'nms iou: {iou_threshold} score: {score_threshold}')
	(nmsed_boxes, # [bs, max_detections, 4]
	nmsed_scores, # [bs, max_detections]
	nmsed_classes, # [bs, max_detections]
	valid_detections # [batch_size]
	) = tf.image.combined_non_max_suppression(
	boxes=boxes, # y1x1, y2x2 [0~1]
	scores=scores,
	max_output_size_per_class=100,
	max_total_size=100, # max_boxes: Maximum nmsed_boxes in a single img.
	iou_threshold=iou_threshold, # iou_threshold: Minimum overlap that counts as a valid detection.
	score_threshold=score_threshold, # # Minimum confidence that counts as a valid detection.
	)
	return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections