Hello, `I studied your code for a long time, but I still don't understand why your loss function has things that I haven't seen in the yolo / yolov2 articles.
In particular, I've noticed that you apply tf.nn.sparse_categorical_ce_from_logits to class_predicted, but I've seen other implementations that simply apply tf.math.softmax to it, then compute the error tf.math.sqrt (predicted_logits - true_logits).
What do you think about this ?
Another question, is if you recommend me to use a loss function like in the following code:
`import numpy as np
import tensorflow as tf
def center_grid(self, input_tensor):
'''
build grid (xi, yi) for fix x_center, y_center in the yolo prediction
parameters:
-----------
input_tensor: ndarray or tensorflow.python.framework.ops.EagerTensor
return : ndarray shape (batch_size, wg, hg, 1, 1, n_boxs, 2)
'''
batch_size, wg, hg, n_boxs = input_tensor.shape[:4]
xg = np.arange(wg)
yg = np.arange(hg)
grid = np.meshgrid(xg, yg)
grid_x = np.reshape(grid[0], (wg, hg, 1))
grid_y = np.reshape(grid[1], (wg, hg, 1))
grid = np.stack((grid_x, grid_y), -1)
# tile
return np.tile(grid, [batch_size, 1, 1, n_boxs, 1])
def make_priors(self):
'''
make a priors for fix the anchros predictions
'''
if len(self.anchors)/2 != len(self.anchors)//2:
raise ValueError('anchors must be a par number')
n_boxs2 = 0
for i in self.anchors:
n_boxs2 += 1
n_boxs = n_boxs2//2
return np.reshape(self.anchors, (1, 1, 1, n_boxs, 2))
def min_max_boxes(self, tensor_xy, tensor_wh):
'''
compute the mins, maxs, form boxes
'''
half_wh = tensor_wh / 2.
boxs_mins = tensor_xy - half_wh
boxs_maxs = tensor_xy + half_wh
return boxs_mins, boxs_maxs
def compute_iou(self, tensor_xy_t, tensor_wh_t, tensor_xy_p, tensor_wh_p):
'''
compute the IoU with xy and wh given tensors
parameters:
-----------
*_t : are true values
*_p : are predicted values
return : IoU each box tensor (all at once)
'''
# intersectio areas
t_mins, t_maxs = self.min_max_boxes(tensor_xy_t, tensor_wh_t)
p_mins, p_maxs = self.min_max_boxes(tensor_xy_p, tensor_wh_p)
intersect_mins = tf.math.maximum(p_mins, t_mins)
intersect_maxes = tf.math.minimum(p_maxs, t_maxs)
intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, 0.)
intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
# union areas
true_areas = tensor_wh_t[..., 0] * tensor_wh_t[..., 1]
pred_areas = tensor_wh_p[..., 0] * tensor_wh_p[..., 1]
union_areas = pred_areas + true_areas - intersect_areas
# compute IoU
iou = tf.math.truediv(intersect_areas, union_areas)
return iou
def xy_loss(self, true_xy, predict_xy, true_object):
'''
compute x, y cordinate loss like the paper said L2 norm
'''
square = tf.math.square(true_xy - predict_xy)
masqued_square = square*true_object[..., None]
loss = tf.math.reduce_sum(masqued_square, axis=[1, 2, 3, 4])
return loss
def wh_loss(self, true_wh, predict_wh, true_object):
'''
compute L2 norm for w, h box cordinates
'''
squared_sqrt = tf.square(tf.sqrt(true_wh) - tf.sqrt(predict_wh))
masqued_squared_sqrt = true_object[..., None]*squared_sqrt
loss = tf.math.reduce_sum(masqued_squared_sqrt, axis=[1, 2, 3, 4])
return loss
def object_loss(self, iou, true_object, predict_object):
'''
Explanation:
our net is a bounding box predictors, based on anchors, so,
our metric for object detection must be based on IoU metric. This is
the nature of yolo bbox predictions based, predict a type based bb.
'''
square = tf.math.square(iou - predict_object)
masqued_square = true_object * square
loss = tf.math.reduce_sum(masqued_square, axis=[1, 2, 3])
return loss
def no_obj_loss(self, iou, true_object, predict_object):
'''
explanation:
punish hegth IoU, so wee filter Higths IoU (< 0.6) where are noobj.
Remember that (1 - true_object) is a mask where 0 are obj and 1 noobj.
'''
# definitions first
higthst_iou = tf.math.reduce_max(iou, axis=-1)
mask = (tf.cast(higthst_iou < 0.6, dtype=tf.float32)[..., None]
* (1 - true_object)) # noobj mask
# compute loss
squared = tf.math.square(0 - predict_object) # cero is noobj
masqued_square = mask * squared
loss = tf.math.reduce_sum(masqued_square, axis=[1, 2, 3])
return loss
def class_loss(self, true_logits, predict_logits, true_object):
# TODO:
# Test that
# sparse_softmax_cross_entropy_with_logits may work well
# tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
# source: ( https://github.com/experiencor/keras-yolo2/blob/master/
# Yolo%20Step-by-Step.ipynb )
squared = tf.square(true_logits - predict_logits)
masqued_square = true_object[..., None] * squared
loss = tf.math.reduce_sum(masqued_square, axis=[1, 2, 3, 4])
return loss
def compute_loss(self,
predict_xy, predict_wh, predict_object, predict_logits,
true_xy, true_wh, true_object, true_logits):
iou = self.compute_iou(true_xy, true_wh, predict_xy, predict_wh)
loss = (
+ self.lambda_coord*self.xy_loss(true_xy, predict_xy, true_object)
+ self.lambda_coord*self.wh_loss(true_wh, predict_wh, true_object)
+ self.object_loss(iou, true_object, predict_object)
+ self.lambda_noobj*self.no_obj_loss(iou, true_object, predict_object
)
+ self.class_loss(true_logits, predict_logits, true_object))
return loss
def loss(self, y_pred, y_true):
if y_pred.shape != y_true.shape:
raise ValueError('imput shape and output shape must be equal')
if self.first_run:
self.first_run = False
self.wh_grid = self.center_grid(y_pred)
# predicted x, y, w, h, conf and class box cordinates adjustment
predict_xy = tf.nn.sigmoid(y_pred[..., :2]) + self.wh_grid
predict_wh = tf.math.exp(y_pred[..., 2:4]) * self.priors
predict_object = tf.nn.sigmoid(y_pred[..., 4])
predict_logits = tf.nn.softmax(y_pred[..., 5:])
# predicted and true x, y, w, h box cordinates
true_xy = y_true[..., 0:2]
true_wh = y_true[..., 2:4]
true_logits = y_true[..., 5:]
true_object = y_true[..., 4]
loss = self.compute_loss(predict_xy, predict_wh,
predict_object, predict_logits,
true_xy, true_wh,
true_object, true_logits)
return loss`
Hello, `I studied your code for a long time, but I still don't understand why your loss function has things that I haven't seen in the yolo / yolov2 articles.
In particular, I've noticed that you apply tf.nn.sparse_categorical_ce_from_logits to class_predicted, but I've seen other implementations that simply apply tf.math.softmax to it, then compute the error tf.math.sqrt (predicted_logits - true_logits).
What do you think about this ?
Another question, is if you recommend me to use a loss function like in the following code:
`import numpy as np
import tensorflow as tf
class Loss:
def init(self, anchors, lambda_coord=5, lambda_noobj=1):
self.anchors = anchors
self.lambda_coord = lambda_coord
self.lambda_noobj = lambda_noobj
self.priors = self.make_priors()
self.first_run = True
pass
I hope you can help me with this doubt that torments me a lot, thnks!