from PP_TSMv2_infer import * from mediapipe_detection import mediapipe_detect import mediapipe as mp import cv2 mp_holistic = mp.solutions.holistic def main(input_path,output_path, face_b, left_hand_b, right_hand_b): cap = cv2.VideoCapture(input_path) config = 'D:/download/PaddleVideo1/output/output/pptsm_lcnet_k400_16frames_uniform.yaml' model_file = 'D:/download/PaddleVideo1/output/output/ppTSMv2.pdmodel' # 推理模型存放地址 params_file = 'D:/download/PaddleVideo1/output/output/ppTSMv2.pdiparams' # 推理模型参数存放地址 batch_size = 1 # 输出推理模型 infer,predictor = PP_TSMv2_predict().create_inference_model(config,model_file,params_file) res = PP_TSMv2_predict().predict(config, input_path, batch_size, predictor,infer) label = res["topk_class"] if label == 0: label = "Nodding!" elif label == 1: label = "not playing phone!" elif label == 2: label = "not sleep!" elif label == 3: label = "playing phone!" elif label == 4: label = "sleep!" else: pass fps_video = cap.get(cv2.CAP_PROP_FPS) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) codec = cv2.VideoWriter_fourcc(*'XVID') video_name = os.path.basename(input_path) out = cv2.VideoWriter(output_path + "/" + video_name, codec, fps_video, (frame_width, frame_height)) with mp_holistic.Holistic(model_complexity=2,min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic: while True: ret, frame = cap.read() if not ret: break image, results = mediapipe_detect().mediapipe_detection(frame, holistic) cv2.namedWindow("mediapipe_detections", cv2.WINDOW_AUTOSIZE) if label == "Nodding!": image, res = mediapipe_detect().get_bbox(image, results, face_b, left_hand_b, right_hand_b,label) cv2.putText(image, "the person's head is " + label, (0, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) elif label == "sleep!": image, res = mediapipe_detect().get_bbox(image, results, face_b, left_hand_b, right_hand_b,label) cv2.putText(image, "the person is " + label, (0, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) elif label == "not sleep!": image, res = mediapipe_detect().get_bbox(image, results, face_b, left_hand_b, right_hand_b,label) cv2.putText(image, "the person is " + label, (0, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) elif label == "playing phone!": image, res = mediapipe_detect().get_bbox(image, results, face_b, left_hand_b, right_hand_b,label) cv2.putText(image, "the person'hand is " + label, (0, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) elif label == "not playing phone!": image, res = mediapipe_detect().get_bbox(image, results, face_b, left_hand_b, right_hand_b,label) cv2.putText(image, "the person'hand is " + label, (0, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) cv2.imshow("mediapipe_detections", image) out.write(image) if cv2.waitKey(10) & 0xFF == ord('q'): break out.write(image) # print(res) cap.release() out.release() cv2.destroyAllWindows() if __name__=="__main__": # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-1_0_1.avi' # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-1_398_1.avi' # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-1_597_0.avi' #正例 # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-1_597_1.avi' input = 'D:/download/PaddleVideo1/output/output/after_1/0711-1_796_0.avi' #正例,推理成功 # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-1_796_1.avi' # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-3_0_0.avi' # input = 'D:/download/PaddleVideo1/output/output/after_1/0711-3_1400_0.avi' output = "D:/download/PaddleVideo1/output/output1" face_b = 50 left_hand_b = 7 right_hand_b = 7 main(input,output,face_b,left_hand_b,right_hand_b)