From 3c3737c66772ecc227accbdd2afa48ea0140520b Mon Sep 17 00:00:00 2001 From: jiangxt <1579525634@qq.com> Date: Mon, 7 Aug 2023 16:35:37 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8F=90=E4=BA=A4demo.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tool/demo.py | 89 +++++++++++++++++++++ tool/mediapipe_detection.py | 149 ++++++++++++++++++------------------ 2 files changed, 163 insertions(+), 75 deletions(-) create mode 100644 tool/demo.py diff --git a/tool/demo.py b/tool/demo.py new file mode 100644 index 0000000..b7227d6 --- /dev/null +++ b/tool/demo.py @@ -0,0 +1,89 @@ +from PP_TSMv2_infer import * +from mediapipe_detection import mediapipe_detect +import mediapipe as mp +import cv2 +mp_holistic = mp.solutions.holistic + + +def main(input_path,output_path, face_b, left_hand_b, right_hand_b): + + cap = cv2.VideoCapture(input_path) + config = 'D:/download/PaddleVideo1/output/output/pptsm_lcnet_k400_16frames_uniform.yaml' + model_file = 'D:/download/PaddleVideo1/output/output/ppTSMv2.pdmodel' # 推理模型存放地址 + params_file = 'D:/download/PaddleVideo1/output/output/ppTSMv2.pdiparams' # 推理模型参数存放地址 + batch_size = 1 # 输出推理模型 + infer,predictor = PP_TSMv2_predict().create_inference_model(config,model_file,params_file) + res = PP_TSMv2_predict().predict(config, input_path, batch_size, predictor,infer) + label = res["topk_class"] + if label == 0: + label = "Nodding!" + elif label == 1: + label = "not playing phone!" + elif label == 2: + label = "not sleep!" + elif label == 3: + label = "playing phone!" + elif label == 4: + label = "sleep!" + else: + pass + fps_video = cap.get(cv2.CAP_PROP_FPS) + frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + codec = cv2.VideoWriter_fourcc(*'XVID') + video_name = os.path.basename(input_path) + out = cv2.VideoWriter(output_path + "/" + video_name, codec, fps_video, (frame_width, frame_height)) + with mp_holistic.Holistic(model_complexity=2,min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: + while True: + ret, frame = cap.read() + if not ret: + break + image, results = mediapipe_detect().mediapipe_detection(frame, holistic) + cv2.namedWindow("mediapipe_detections", cv2.WINDOW_AUTOSIZE) + if label == "Nodding!": + image, res = mediapipe_detect().get_bbox(image, results, face_b, left_hand_b, right_hand_b,label) + cv2.putText(image, "the person's head is " + label, (0, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), + 1) + elif label == "sleep!": + image, res = mediapipe_detect().get_bbox(image, results, face_b, left_hand_b, right_hand_b,label) + cv2.putText(image, "the person is " + label, (0, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), + 1) + elif label == "not sleep!": + image, res = mediapipe_detect().get_bbox(image, results, face_b, left_hand_b, right_hand_b,label) + cv2.putText(image, "the person is " + label, (0, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), + 1) + elif label == "playing phone!": + image, res = mediapipe_detect().get_bbox(image, results, face_b, left_hand_b, right_hand_b,label) + cv2.putText(image, "the person'hand is " + label, (0, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), + 1) + elif label == "not playing phone!": + image, res = mediapipe_detect().get_bbox(image, results, face_b, left_hand_b, right_hand_b,label) + cv2.putText(image, "the person'hand is " + label, (0, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), + 1) + + cv2.imshow("mediapipe_detections", image) + out.write(image) + if cv2.waitKey(10) & 0xFF == ord('q'): + break + out.write(image) + # print(res) + cap.release() + out.release() + cv2.destroyAllWindows() + + +if __name__=="__main__": + # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-1_0_1.avi' + # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-1_398_1.avi' + # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-1_597_0.avi' #正例 + # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-1_597_1.avi' + input = 'D:/download/PaddleVideo1/output/output/after_1/0711-1_796_0.avi' #正例,推理成功 + # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-1_796_1.avi' + # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-3_0_0.avi' + # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-3_1400_0.avi' + + output = "D:/download/PaddleVideo1/output/output1" + face_b = 50 + left_hand_b = 7 + right_hand_b = 7 + main(input,output,face_b,left_hand_b,right_hand_b) \ No newline at end of file diff --git a/tool/mediapipe_detection.py b/tool/mediapipe_detection.py index cafacc6..25e1eb7 100644 --- a/tool/mediapipe_detection.py +++ b/tool/mediapipe_detection.py @@ -7,7 +7,6 @@ import numpy as np from mediapipe.framework.formats import landmark_pb2 import os - mp_holistic = mp.solutions.holistic _PRESENCE_THRESHOLD = 0.5 @@ -53,15 +52,15 @@ def _normalized_to_pixel_coordinates( x_px = min(math.floor(normalized_x * image_width), image_width - 1) y_px = min(math.floor(normalized_y * image_height), image_height - 1) # return print("转化的真实坐标:",x_px, y_px) - return x_px,y_px + return x_px, y_px def draw_landmarks( - image: np.ndarray, - landmark_list: landmark_pb2.NormalizedLandmarkList, - connections: Optional[List[Tuple[int, int]]] = None, - landmark_drawing_spec: Union[DrawingSpec,Mapping[int, DrawingSpec]] = DrawingSpec(color=RED_COLOR), - connection_drawing_spec: Union[DrawingSpec, Mapping[Tuple[int, int],DrawingSpec]] = DrawingSpec()): + image: np.ndarray, + landmark_list: landmark_pb2.NormalizedLandmarkList, + connections: Optional[List[Tuple[int, int]]] = None, + landmark_drawing_spec: Union[DrawingSpec, Mapping[int, DrawingSpec]] = DrawingSpec(color=RED_COLOR), + connection_drawing_spec: Union[DrawingSpec, Mapping[Tuple[int, int], DrawingSpec]] = DrawingSpec()): """ 主要是绘制关键点的连接图 image:输入的数据 @@ -81,7 +80,7 @@ def draw_landmarks( (landmark.HasField('presence') and landmark.presence < _PRESENCE_THRESHOLD)): continue - landmark_px = _normalized_to_pixel_coordinates(landmark.x, landmark.y, #将归一化坐标值转换为图像坐标值 + landmark_px = _normalized_to_pixel_coordinates(landmark.x, landmark.y, # 将归一化坐标值转换为图像坐标值 image_cols, image_rows) # print('图像像素坐标:',landmark_px) if landmark_px: @@ -90,7 +89,7 @@ def draw_landmarks( dot_list = [] if connections: # num_landmarks = len(landmark_list.landmark) - #connections:keypoint索引元组的列表,用于指定如何在图形中连接地标。 + # connections:keypoint索引元组的列表,用于指定如何在图形中连接地标。 # Draws the connections if the start and end landmarks are both visible. starts = [] @@ -139,7 +138,7 @@ def draw_landmarks( class mediapipe_detect: - def mediapipe_detection(self,image, model): + def mediapipe_detection(self, image, model): """ mediapipe检测模块 image:输入数据集 @@ -152,7 +151,7 @@ class mediapipe_detect: # image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR return image, results - def Drawing_bbox(self,result,bias): + def Drawing_bbox(self, result, bias): ''' 根据关键点坐标,获取最大外接矩形的坐标点 @@ -163,15 +162,14 @@ class mediapipe_detect: result = np.array(result) b = bias if result.any(): - - rect = cv2.boundingRect(result) #返回值, 左上角的坐标[x,y, w,h] + rect = cv2.boundingRect(result) # 返回值, 左上角的坐标[x,y, w,h] bbox = [[rect[0] - b, rect[1] - b], [rect[0] + rect[2] + b, rect[1] - b], - [rect[0] - b, rect[1] + rect[3] + b], [rect[0] + rect[2] + b, rect[1] + rect[3] + b]] #四个角的坐标 - + [rect[0] - b, rect[1] + rect[3] + b], [rect[0] + rect[2] + b, rect[1] + rect[3] + b]] # 四个角的坐标 + return bbox - - def get_bbox(self,image,results,face_b,left_hand_b,right_hand_b): + + def get_bbox(self, image, results, face_b, left_hand_b, right_hand_b, label): ''' 主要是根据关键点坐标,绘制矩形框 @@ -181,7 +179,7 @@ class mediapipe_detect: image.flags.writeable = True image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) - h,w,g = image.shape + h, w, g = image.shape # print("h:",h,"w:",w,"g:",g) """获取头部、手部关键点""" @@ -210,9 +208,15 @@ class mediapipe_detect: ) """根据关键点的坐标绘制最大外接矩形""" - fl_bbox = self.Drawing_bbox(face_location,face_b) - lh_bbox = self.Drawing_bbox(left_hand_location,left_hand_b) - rh_bbox = self.Drawing_bbox(right_hand_location,right_hand_b) + fl_bbox = self.Drawing_bbox(face_location, face_b) + lh_bbox = self.Drawing_bbox(left_hand_location, left_hand_b) + rh_bbox = self.Drawing_bbox(right_hand_location, right_hand_b) + + if label == "Nodding" or label == "sleep!" or label == "not playing phone!": + lh_bbox = None + rh_bbox = None + elif label == "not sleep!" or label == "playing phone!": + fl_bbox = None """调整头部检测框的大小""" if fl_bbox is not None: @@ -222,34 +226,34 @@ class mediapipe_detect: fl_bbox[0][0] = fl_bbox[0][0] + 30 fl_bbox[0][1] = fl_bbox[0][1] + 5 # print(fl_bbox) - for i in range(0,4): - for j in range(0,2): + for i in range(0, 4): + for j in range(0, 2): if fl_bbox[i][j] < 0: fl_bbox[i][j] = 0 elif fl_bbox[i][0] > w: fl_bbox[i][0] = w - elif fl_bbox[i][1] > h : + elif fl_bbox[i][1] > h: fl_bbox[i][1] = h else: pass - cv2.rectangle(image, fl_bbox[0], fl_bbox[3],DrawingSpec.color, DrawingSpec.thickness) + cv2.rectangle(image, fl_bbox[0], fl_bbox[3], DrawingSpec.color, DrawingSpec.thickness) if lh_bbox is not None: - for i in range(0,4): - for j in range(0,2): + for i in range(0, 4): + for j in range(0, 2): if lh_bbox[i][j] < 0: lh_bbox[i][j] = 0 elif lh_bbox[i][0] > w: lh_bbox[i][0] = w - elif lh_bbox[i][1] > h : + elif lh_bbox[i][1] > h: lh_bbox[i][1] = h else: pass - cv2.rectangle(image, lh_bbox[0], lh_bbox[3],DrawingSpec.color, DrawingSpec.thickness) + cv2.rectangle(image, lh_bbox[0], lh_bbox[3], DrawingSpec.color, DrawingSpec.thickness) if rh_bbox is not None: - for i in range(0,4): - for j in range(0,2): + for i in range(0, 4): + for j in range(0, 2): if rh_bbox[i][j] < 0: rh_bbox[i][j] = 0 elif rh_bbox[i][0] > w: @@ -258,56 +262,51 @@ class mediapipe_detect: rh_bbox[i][1] = h else: pass - cv2.rectangle(image, rh_bbox[0], rh_bbox[3],DrawingSpec.color, DrawingSpec.thickness) - - - res = {'face_bbox': fl_bbox, 'hand_bbox': [lh_bbox,rh_bbox]} - + cv2.rectangle(image, rh_bbox[0], rh_bbox[3], DrawingSpec.color, DrawingSpec.thickness) + + res = {'face_bbox': fl_bbox, 'hand_bbox': [lh_bbox, rh_bbox]} + + return image, res + + +def main(input_path, output_path, face_b, left_hand_b, right_hand_b): + cap = cv2.VideoCapture(input_path) + frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(cap.get(cv2.CAP_PROP_FPS)) + codec = cv2.VideoWriter_fourcc(*'XVID') + video_name = os.path.basename(input_path) + out = cv2.VideoWriter(output_path + "/" + video_name, codec, fps, (frame_width, frame_height)) + label = "" + with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: + while True: + ret, frame = cap.read() + if not ret: + break + image, results = mediapipe_detect().mediapipe_detection(frame, holistic) + image, res = mediapipe_detect().get_bbox(image, results, face_b, left_hand_b, right_hand_b, label) + out.write(image) + cv2.namedWindow("mediapipe_detections", cv2.WINDOW_AUTOSIZE) + cv2.imshow("mediapipe_detections", image) + # print(res) + if cv2.waitKey(10) & 0xFF == ord('q'): + break + + cap.release() + out.release() + cv2.destroyAllWindows() - # print(res) - - return image,res - - -def main(input_path,output_path,face_b,left_hand_b,right_hand_b): - - cap = cv2.VideoCapture(input_path) - frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = int(cap.get(cv2.CAP_PROP_FPS)) - codec = cv2.VideoWriter_fourcc(*'XVID') - video_name = os.path.basename(input_path) - out = cv2.VideoWriter(output_path +"/"+ video_name, codec, fps, (frame_width, frame_height)) - with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: - while True: - ret, frame = cap.read() - if not ret: - break - image, results = mediapipe_detect().mediapipe_detection(frame,holistic) - image,res = mediapipe_detect().get_bbox(image,results,face_b,left_hand_b,right_hand_b) - out.write(image) - cv2.namedWindow("mediapipe_detections", cv2.WINDOW_AUTOSIZE) - cv2.imshow("mediapipe_detections", image) - # print(res) - if cv2.waitKey(10) & 0xFF == ord('q'): - break - - cap.release() - out.release() - cv2.destroyAllWindows() if __name__ == "__main__": - input = 'D:/download/PaddleVideo1/output/output/after_1/test02_0.avi' + # input = 'D:/download/PaddleVideo1/output/output/after_1/test02_0.avi' # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-1_0_1.avi' # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-3_1400_0.avi' # input = "C:/Users/Administrator/Pictures/video_seg_re_hand/test01_3.avi" # input = 'C:/Users/Administrator/Pictures/video3.0/sleep/0711-3_7_01_5.avi' - # input = " D:/download/PaddleVideo1/output/output/after_1/0711-1_199_0.avi" + input = " D:/download/PaddleVideo1/output/output/after_1/0711-1_199_0.avi" # input = 'D:/download/PaddleVideo1/output/output/after_1/test05_10750_1.avi' output = 'D:/download/PaddleVideo1/output/output/output' - face_b = 50 #头部标注框修正值 - left_hand_b = 7 #左手部分标注框修正值 - right_hand_b = 7 #右手部分标注框修正值 - main(input,output,face_b,left_hand_b,right_hand_b) - - + face_b = 50 # 头部标注框修正值 + left_hand_b = 7 # 左手部分标注框修正值 + right_hand_b = 7 # 右手部分标注框修正值 + main(input, output, face_b, left_hand_b, right_hand_b) \ No newline at end of file