# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Forked from: https://github.com/rafaelpadilla/Object-Detection-Metrics # Developed by: Rafael Padilla (rafael.padilla@smt.ufrj.br) import glob import os import shutil import sys from collections import Counter import numpy as np from enum import Enum import cv2 class MethodAveragePrecision(Enum): """ Class representing if the coordinates are relative to the image size or are absolute values. Developed by: Rafael Padilla Last modification: Apr 28 2018 """ EveryPointInterpolation = 1 ElevenPointInterpolation = 2 class CoordinatesType(Enum): """ Class representing if the coordinates are relative to the image size or are absolute values. Developed by: Rafael Padilla Last modification: Apr 28 2018 """ Relative = 1 Absolute = 2 class BBType(Enum): """ Class representing if the bounding box is groundtruth or not. Developed by: Rafael Padilla Last modification: May 24 2018 """ GroundTruth = 1 Detected = 2 class BBFormat(Enum): """ Class representing the format of a bounding box. It can be (X,Y,width,height) => XYWH or (X1,Y1,X2,Y2) => XYX2Y2 Developed by: Rafael Padilla Last modification: May 24 2018 """ XYWH = 1 XYX2Y2 = 2 def convertToRelativeValues(size, box): dw = 1. / (size[0]) dh = 1. / (size[1]) cx = (box[1] + box[0]) / 2.0 cy = (box[3] + box[2]) / 2.0 w = box[1] - box[0] h = box[3] - box[2] x = cx * dw y = cy * dh w = w * dw h = h * dh return x, y, w, h def convertToAbsoluteValues(size, box): xIn = round(((2 * float(box[0]) - float(box[2])) * size[0] / 2)) yIn = round(((2 * float(box[1]) - float(box[3])) * size[1] / 2)) xEnd = xIn + round(float(box[2]) * size[0]) yEnd = yIn + round(float(box[3]) * size[1]) if xIn < 0: xIn = 0 if yIn < 0: yIn = 0 if xEnd >= size[0]: xEnd = size[0] - 1 if yEnd >= size[1]: yEnd = size[1] - 1 return xIn, yIn, xEnd, yEnd def add_bb_into_image(image, bb, color=(255, 0, 0), thickness=2, label=None): r = int(color[0]) g = int(color[1]) b = int(color[2]) font = cv2.FONT_HERSHEY_SIMPLEX fontScale = 0.5 fontThickness = 1 x1, y1, x2, y2 = bb.getAbsoluteBoundingBox(BBFormat.XYX2Y2) x1 = int(x1) y1 = int(y1) x2 = int(x2) y2 = int(y2) cv2.rectangle(image, (x1, y1), (x2, y2), (b, g, r), thickness) # Add label if label is not None: # Get size of the text box (tw, th) = cv2.getTextSize(label, font, fontScale, fontThickness)[0] # Top-left coord of the textbox (xin_bb, yin_bb) = (x1 + thickness, y1 - th + int(12.5 * fontScale)) # Checking position of the text top-left (outside or inside the bb) if yin_bb - th <= 0: # if outside the image yin_bb = y1 + th # put it inside the bb r_Xin = x1 - int(thickness / 2) r_Yin = y1 - th - int(thickness / 2) # Draw filled rectangle to put the text in it cv2.rectangle(image, (r_Xin, r_Yin - thickness), (r_Xin + tw + thickness * 3, r_Yin + th + int(12.5 * fontScale)), (b, g, r), -1) cv2.putText(image, label, (xin_bb, yin_bb), font, fontScale, (0, 0, 0), fontThickness, cv2.LINE_AA) return image class BoundingBox: def __init__(self, imageName, classId, x, y, w, h, typeCoordinates=None, imgSize=None, bbType=None, classConfidence=None, format=None): """Constructor. Args: imageName: String representing the image name. classId: String value representing class id. x: Float value representing the X upper-left coordinate of the bounding box. y: Float value representing the Y upper-left coordinate of the bounding box. w: Float value representing the width bounding box. h: Float value representing the height bounding box. typeCoordinates: (optional) Enum (Relative or Absolute) represents if the bounding box coordinates (x,y,w,h) are absolute or relative to size of the image. Default:'Absolute'. imgSize: (optional) 2D vector (width, height)=>(int, int) represents the size of the image of the bounding box. If typeCoordinates is 'Relative', imgSize is required. bbType: (optional) Enum (Groundtruth or Detection) identifies if the bounding box represents a ground truth or a detection. If it is a detection, the classConfidence has to be informed. classConfidence: (optional) Float value representing the confidence of the detected class. If detectionType is Detection, classConfidence needs to be informed. format: (optional) Enum (BBFormat.XYWH or BBFormat.XYX2Y2) indicating the format of the coordinates of the bounding boxes. BBFormat.XYWH: BBFormat.XYX2Y2: . """ self._imageName = imageName self._typeCoordinates = typeCoordinates if typeCoordinates == CoordinatesType.Relative and imgSize is None: raise IOError( 'Parameter \'imgSize\' is required. It is necessary to inform the image size.') if bbType == BBType.Detected and classConfidence is None: raise IOError( 'For bbType=\'Detection\', it is necessary to inform the classConfidence value.') self._classConfidence = classConfidence self._bbType = bbType self._classId = classId self._format = format # If relative coordinates, convert to absolute values # For relative coords: (x,y,w,h)=(X_center/img_width , Y_center/img_height) if typeCoordinates == CoordinatesType.Relative: (self._x, self._y, self._w, self._h) = convertToAbsoluteValues(imgSize, (x, y, w, h)) self._width_img = imgSize[0] self._height_img = imgSize[1] if format == BBFormat.XYWH: self._x2 = self._w self._y2 = self._h self._w = self._x2 - self._x self._h = self._y2 - self._y else: raise IOError( 'For relative coordinates, the format must be XYWH (x,y,width,height)') # For absolute coords: (x,y,w,h)=real bb coords else: self._x = x self._y = y if format == BBFormat.XYWH: self._w = w self._h = h self._x2 = self._x + self._w self._y2 = self._y + self._h else: # format == BBFormat.XYX2Y2: . self._x2 = w self._y2 = h self._w = self._x2 - self._x self._h = self._y2 - self._y if imgSize is None: self._width_img = None self._height_img = None else: self._width_img = imgSize[0] self._height_img = imgSize[1] def getAbsoluteBoundingBox(self, format=None): if format == BBFormat.XYWH: return self._x, self._y, self._w, self._h elif format == BBFormat.XYX2Y2: return self._x, self._y, self._x2, self._y2 def getRelativeBoundingBox(self, imgSize=None): if imgSize is None and self._width_img is None and self._height_img is None: raise IOError( 'Parameter \'imgSize\' is required. It is necessary to inform the image size.') if imgSize is None: return convertToRelativeValues((imgSize[0], imgSize[1]), (self._x, self._y, self._w, self._h)) else: return convertToRelativeValues((self._width_img, self._height_img), (self._x, self._y, self._w, self._h)) def getImageName(self): return self._imageName def getConfidence(self): return self._classConfidence def getFormat(self): return self._format def getClassId(self): return self._classId def getImageSize(self): return self._width_img, self._height_img def getCoordinatesType(self): return self._typeCoordinates def getBBType(self): return self._bbType @staticmethod def compare(det1, det2): det1BB = det1.getAbsoluteBoundingBox(format=BBFormat.XYWH) det1ImgSize = det1.getImageSize() det2BB = det2.getAbsoluteBoundingBox(format=BBFormat.XYWH) det2ImgSize = det2.getImageSize() if det1.getClassId() == det2.getClassId() and \ det1.classConfidence == det2.classConfidenc() and \ det1BB[0] == det2BB[0] and \ det1BB[1] == det2BB[1] and \ det1BB[2] == det2BB[2] and \ det1BB[3] == det2BB[3] and \ det1ImgSize[0] == det1ImgSize[0] and \ det2ImgSize[1] == det2ImgSize[1]: return True return False @staticmethod def clone(boundingBox): absBB = boundingBox.getAbsoluteBoundingBox(format=BBFormat.XYWH) newBoundingBox = BoundingBox( boundingBox.getImageName(), boundingBox.getClassId(), absBB[0], absBB[1], absBB[2], absBB[3], typeCoordinates=boundingBox.getCoordinatesType(), imgSize=boundingBox.getImageSize(), bbType=boundingBox.getBBType(), classConfidence=boundingBox.getConfidence(), format=BBFormat.XYWH) return newBoundingBox class BoundingBoxes: def __init__(self): self._boundingBoxes = [] def addBoundingBox(self, bb): self._boundingBoxes.append(bb) def removeBoundingBox(self, _boundingBox): for d in self._boundingBoxes: if BoundingBox.compare(d, _boundingBox): del self._boundingBoxes[d] return def removeAllBoundingBoxes(self): self._boundingBoxes = [] def getBoundingBoxes(self): return self._boundingBoxes def getBoundingBoxByClass(self, classId): boundingBoxes = [] for d in self._boundingBoxes: if d.getClassId() == classId: # get only specified bounding box type boundingBoxes.append(d) return boundingBoxes def getClasses(self): classes = [] for d in self._boundingBoxes: c = d.getClassId() if c not in classes: classes.append(c) return classes def getBoundingBoxesByType(self, bbType): # get only specified bb type return [d for d in self._boundingBoxes if d.getBBType() == bbType] def getBoundingBoxesByImageName(self, imageName): # get only specified bb type return [d for d in self._boundingBoxes if d.getImageName() == imageName] def count(self, bbType=None): if bbType is None: # Return all bounding boxes return len(self._boundingBoxes) count = 0 for d in self._boundingBoxes: if d.getBBType() == bbType: # get only specified bb type count += 1 return count def clone(self): newBoundingBoxes = BoundingBoxes() for d in self._boundingBoxes: det = BoundingBox.clone(d) newBoundingBoxes.addBoundingBox(det) return newBoundingBoxes def drawAllBoundingBoxes(self, image, imageName): bbxes = self.getBoundingBoxesByImageName(imageName) for bb in bbxes: if bb.getBBType() == BBType.GroundTruth: # if ground truth image = add_bb_into_image(image, bb, color=(0, 255, 0)) # green else: # if detection image = add_bb_into_image(image, bb, color=(255, 0, 0)) # red return image class Evaluator: def GetPascalVOCMetrics(self, boundingboxes, IOUThreshold=0.5, method=None): """Get the metrics used by the VOC Pascal 2012 challenge. Get Args: boundingboxes: Object of the class BoundingBoxes representing ground truth and detected bounding boxes; IOUThreshold: IOU threshold indicating which detections will be considered TP or FP (default value = 0.5); method (default = EveryPointInterpolation): It can be calculated as the implementation in the official PASCAL VOC toolkit (EveryPointInterpolation), or applying the 11-point interpolatio as described in the paper "The PASCAL Visual Object Classes(VOC) Challenge" or EveryPointInterpolation" (ElevenPointInterpolation); Returns: A list of dictionaries. Each dictionary contains information and metrics of each class. The keys of each dictionary are: dict['class']: class representing the current dictionary; dict['precision']: array with the precision values; dict['recall']: array with the recall values; dict['AP']: average precision; dict['interpolated precision']: interpolated precision values; dict['interpolated recall']: interpolated recall values; dict['total positives']: total number of ground truth positives; dict['total TP']: total number of True Positive detections; dict['total FP']: total number of False Negative detections; """ ret = [] # list containing metrics (precision, recall, average precision) of each class # List with all ground truths (Ex: [imageName,class,confidence=1, (bb coordinates XYX2Y2)]) groundTruths = [] # List with all detections (Ex: [imageName,class,confidence,(bb coordinates XYX2Y2)]) detections = [] # Get all classes classes = [] # Loop through all bounding boxes and separate them into GTs and detections for bb in boundingboxes.getBoundingBoxes(): # [imageName, class, confidence, (bb coordinates XYX2Y2)] if bb.getBBType() == BBType.GroundTruth: groundTruths.append([ bb.getImageName(), bb.getClassId(), 1, bb.getAbsoluteBoundingBox(BBFormat.XYX2Y2) ]) else: detections.append([ bb.getImageName(), bb.getClassId(), bb.getConfidence(), bb.getAbsoluteBoundingBox(BBFormat.XYX2Y2) ]) # get class if bb.getClassId() not in classes: classes.append(bb.getClassId()) classes = sorted(classes) # Precision x Recall is obtained individually by each class # Loop through by classes for c in classes: # Get only detection of class c dects = [] [dects.append(d) for d in detections if d[1] == c] # Get only ground truths of class c gts = [] [gts.append(g) for g in groundTruths if g[1] == c] npos = len(gts) # sort detections by decreasing confidence dects = sorted(dects, key=lambda conf: conf[2], reverse=True) TP = np.zeros(len(dects)) FP = np.zeros(len(dects)) # create dictionary with amount of gts for each image det = Counter([cc[0] for cc in gts]) for key, val in det.items(): det[key] = np.zeros(val) # Loop through detections for d in range(len(dects)): # Find ground truth image gt = [gt for gt in gts if gt[0] == dects[d][0]] iouMax = sys.float_info.min for j in range(len(gt)): iou = Evaluator.iou(dects[d][3], gt[j][3]) if iou > iouMax: iouMax = iou jmax = j # Assign detection as true positive/don't care/false positive if iouMax >= IOUThreshold: if det[dects[d][0]][jmax] == 0: TP[d] = 1 # count as true positive det[dects[d][0]][jmax] = 1 # flag as already 'seen' else: FP[d] = 1 # count as false positive # - A detected "cat" is overlaped with a GT "cat" with IOU >= IOUThreshold. else: FP[d] = 1 # count as false positive # compute precision, recall and average precision acc_FP = np.cumsum(FP) acc_TP = np.cumsum(TP) rec = acc_TP / npos prec = np.divide(acc_TP, (acc_FP + acc_TP)) # Depending on the method, call the right implementation if method == MethodAveragePrecision.EveryPointInterpolation: [ap, mpre, mrec, ii] = Evaluator.CalculateAveragePrecision(rec, prec) else: [ap, mpre, mrec, _] = Evaluator.ElevenPointInterpolatedAP(rec, prec) # add class result in the dictionary to be returned r = { 'class': c, 'precision': prec, 'recall': rec, 'AP': ap, 'interpolated precision': mpre, 'interpolated recall': mrec, 'total positives': npos, 'total TP': np.sum(TP), 'total FP': np.sum(FP) } ret.append(r) return ret @staticmethod def CalculateAveragePrecision(rec, prec): mrec = [0] [mrec.append(e) for e in rec] mrec.append(1) mpre = [0] [mpre.append(e) for e in prec] mpre.append(0) for i in range(len(mpre) - 1, 0, -1): mpre[i - 1] = max(mpre[i - 1], mpre[i]) ii = [] for i in range(len(mrec) - 1): if mrec[1:][i] != mrec[0:-1][i]: ii.append(i + 1) ap = 0 for i in ii: ap = ap + np.sum((mrec[i] - mrec[i - 1]) * mpre[i]) return [ap, mpre[0:len(mpre) - 1], mrec[0:len(mpre) - 1], ii] @staticmethod # 11-point interpolated average precision def ElevenPointInterpolatedAP(rec, prec): mrec = [] [mrec.append(e) for e in rec] mpre = [] [mpre.append(e) for e in prec] recallValues = np.linspace(0, 1, 11) recallValues = list(recallValues[::-1]) rhoInterp = [] recallValid = [] for r in recallValues: # Obtain all recall values higher or equal than r argGreaterRecalls = np.argwhere(mrec[:] >= r) pmax = 0 # If there are recalls above r if argGreaterRecalls.size != 0: pmax = max(mpre[argGreaterRecalls.min():]) recallValid.append(r) rhoInterp.append(pmax) # By definition AP = sum(max(precision whose recall is above r))/11 ap = sum(rhoInterp) / 11 # Generating values for the plot rvals = [recallValid[0]] [rvals.append(e) for e in recallValid] rvals.append(0) pvals = [0] [pvals.append(e) for e in rhoInterp] pvals.append(0) # rhoInterp = rhoInterp[::-1] cc = [] for i in range(len(rvals)): p = (rvals[i], pvals[i - 1]) if p not in cc: cc.append(p) p = (rvals[i], pvals[i]) if p not in cc: cc.append(p) recallValues = [i[0] for i in cc] rhoInterp = [i[1] for i in cc] return [ap, rhoInterp, recallValues, None] # For each detections, calculate IOU with reference @staticmethod def _getAllIOUs(reference, detections): ret = [] bbReference = reference.getAbsoluteBoundingBox(BBFormat.XYX2Y2) # img = np.zeros((200,200,3), np.uint8) for d in detections: bb = d.getAbsoluteBoundingBox(BBFormat.XYX2Y2) iou = Evaluator.iou(bbReference, bb) ret.append((iou, reference, d)) # iou, reference, detection return sorted(ret, key=lambda i: i[0], reverse=True) # sort by iou (from highest to lowest) @staticmethod def iou(boxA, boxB): # if boxes dont intersect if Evaluator._boxesIntersect(boxA, boxB) is False: return 0 interArea = Evaluator._getIntersectionArea(boxA, boxB) union = Evaluator._getUnionAreas(boxA, boxB, interArea=interArea) # intersection over union iou = interArea / union assert iou >= 0 return iou @staticmethod def _boxesIntersect(boxA, boxB): if boxA[0] > boxB[2]: return False # boxA is right of boxB if boxB[0] > boxA[2]: return False # boxA is left of boxB if boxA[3] < boxB[1]: return False # boxA is above boxB if boxA[1] > boxB[3]: return False # boxA is below boxB return True @staticmethod def _getIntersectionArea(boxA, boxB): xA = max(boxA[0], boxB[0]) yA = max(boxA[1], boxB[1]) xB = min(boxA[2], boxB[2]) yB = min(boxA[3], boxB[3]) # intersection area return (xB - xA + 1) * (yB - yA + 1) @staticmethod def _getUnionAreas(boxA, boxB, interArea=None): area_A = Evaluator._getArea(boxA) area_B = Evaluator._getArea(boxB) if interArea is None: interArea = Evaluator._getIntersectionArea(boxA, boxB) return float(area_A + area_B - interArea) @staticmethod def _getArea(box): return (box[2] - box[0] + 1) * (box[3] - box[1] + 1) # Validate formats def ValidateFormats(argFormat, argName, errors): if argFormat == 'xywh': return BBFormat.XYWH elif argFormat == 'xyrb': return BBFormat.XYX2Y2 elif argFormat is None: return BBFormat.XYWH # default when nothing is passed else: errors.append( 'argument %s: invalid value. It must be either \'xywh\' or \'xyrb\'' % argName) # Validate mandatory args def ValidateMandatoryArgs(arg, argName, errors): if arg is None: errors.append('argument %s: required argument' % argName) else: return True def ValidateImageSize(arg, argName, argInformed, errors): errorMsg = 'argument %s: required argument if %s is relative' % (argName, argInformed) ret = None if arg is None: errors.append(errorMsg) else: arg = arg.replace('(', '').replace(')', '') args = arg.split(',') if len(args) != 2: errors.append( '%s. It must be in the format \'width,height\' (e.g. \'600,400\')' % errorMsg) else: if not args[0].isdigit() or not args[1].isdigit(): errors.append( '%s. It must be in INdiaTEGER the format \'width,height\' (e.g. \'600,400\')' % errorMsg) else: ret = (int(args[0]), int(args[1])) return ret # Validate coordinate types def ValidateCoordinatesTypes(arg, argName, errors): if arg == 'abs': return CoordinatesType.Absolute elif arg == 'rel': return CoordinatesType.Relative elif arg is None: return CoordinatesType.Absolute # default when nothing is passed errors.append('argument %s: invalid value. It must be either \'rel\' or \'abs\'' % argName) def getBoundingBoxes(directory, isGT, bbFormat, coordType, allBoundingBoxes=None, allClasses=None, imgSize=(0, 0)): """Read txt files containing bounding boxes (ground truth and detections).""" print(directory) if allBoundingBoxes is None: allBoundingBoxes = BoundingBoxes() if allClasses is None: allClasses = [] # Read ground truths os.chdir(directory) files = glob.glob("*.txt") files.sort() for f in files: nameOfImage = f.replace(".txt", "") fh1 = open(f, "r") for line in fh1: line = line.replace("\n", "") if line.replace(' ', '') == '': continue splitLine = line.split(" ") if isGT: idClass = (splitLine[0]) # class x = float(splitLine[1]) y = float(splitLine[2]) w = float(splitLine[3]) h = float(splitLine[4]) bb = BoundingBox( nameOfImage, idClass, x, y, w, h, coordType, imgSize, BBType.GroundTruth, format=bbFormat) else: idClass = (splitLine[0]) # class confidence = float(splitLine[1]) x = float(splitLine[2]) y = float(splitLine[3]) w = float(splitLine[4]) h = float(splitLine[5]) bb = BoundingBox( nameOfImage, idClass, x, y, w, h, coordType, imgSize, BBType.Detected, confidence, format=bbFormat) allBoundingBoxes.addBoundingBox(bb) if idClass not in allClasses: allClasses.append(idClass) fh1.close() return allBoundingBoxes, allClasses def get_mAP(gtFolder, detFolder, threshold=0.5, savePath=None): gtFormat = 'xyrb' detFormat = 'xyrb' gtCoordinates = 'abs' detCoordinates = 'abs' gtFolder = os.path.join(os.path.abspath('.'), gtFolder) detFolder = os.path.join(os.path.abspath('.'), detFolder) iouThreshold = threshold # Arguments validation errors = [] # Validate formats gtFormat = ValidateFormats(gtFormat, 'gtFormat', errors) detFormat = ValidateFormats(detFormat, '-detformat', errors) # Coordinates types gtCoordType = ValidateCoordinatesTypes(gtCoordinates, '-gtCoordinates', errors) detCoordType = ValidateCoordinatesTypes(detCoordinates, '-detCoordinates', errors) imgSize = (0, 0) # Create directory to save results shutil.rmtree(savePath, ignore_errors=True) # Clear folder if savePath is not None: os.makedirs(savePath) # Get groundtruth boxes allBoundingBoxes, allClasses = getBoundingBoxes( gtFolder, True, gtFormat, gtCoordType, imgSize=imgSize) # Get detected boxes allBoundingBoxes, allClasses = getBoundingBoxes( detFolder, False, detFormat, detCoordType, allBoundingBoxes, allClasses, imgSize=imgSize) allClasses.sort() evaluator = Evaluator() acc_AP = 0 validClasses = 0 # Plot Precision x Recall curve detections = evaluator.GetPascalVOCMetrics(allBoundingBoxes, iouThreshold, method=MethodAveragePrecision.EveryPointInterpolation) # each detection is a class and store AP and mAP results in AP_res list AP_res = [] for metricsPerClass in detections: # Get metric values per each class cl = metricsPerClass['class'] ap = metricsPerClass['AP'] totalPositives = metricsPerClass['total positives'] if totalPositives > 0: validClasses = validClasses + 1 acc_AP = acc_AP + ap ap_str = "{0:.2f}%".format(ap * 100) AP_res.append('AP: %s (%s)' % (ap_str, cl)) mAP = acc_AP / validClasses mAP_str = "{0:.2f}%".format(mAP * 100) AP_res.append('mAP: %s' % mAP_str) return AP_res