人脸关键点估计人头姿态

mac2024-04-01 32

一、前言

本篇主要记录由mtcnn检测得的关键点作人头姿态估计，思路较为简单，mtcnn是一种可以检测输出5个关键点的人脸检测算法，分别是左眼，右眼，鼻尖，嘴的左角和嘴的右角。当获得图像中人脸的5个2D关键点，再由Opencv中POSIT的姿态估计算法将5个世界坐标系的模板3D关键点通过旋转、平移等变换投射至这5个2D关键点，进而估计得变换参数，最后求得2D平面中的人头的姿态参数，分别为Yaw:摇头左正右负、Pitch:点头上负下正、Roll:摆头（歪头）左负右正

二、Mtcnn-light

对于mtcnn，网上具有较多开源版本，这里使用light版本，优点是速度较快，缺点为模型准确性略有下降，为输出5个关键点，对原来src/mtcnn.cpp中增加重载函数 void findFace(Mat &image , vector<struct Bbox> &resBox );

三、mtcnn人头姿态估计

人头姿态估计代码参考自https://blog.csdn.net/zzyy0929/article/details/78323363

#include "network.h" #include "mtcnn.h" #include <time.h> #include <iostream> #include <fstream> #include <vector> #include <map> #include "opencv2/opencv.hpp" void rot2Euler(cv::Mat faceImg,const cv::Mat& rotation3_3) { cv::resize(faceImg,faceImg,cv::Size(faceImg.cols*2,faceImg.rows*2)); double q0 = std::sqrt(1+rotation3_3.at<double>(1,1)+rotation3_3.at<double>(2,2)+rotation3_3.at<double>(3,3))/2; double q1 = (rotation3_3.at<double>(3,2)-rotation3_3.at<double>(2,3))/(4*q0); double q2 = (rotation3_3.at<double>(1,3)-rotation3_3.at<double>(3,1))/(4*q0); double q3 = (rotation3_3.at<double>(2,1)-rotation3_3.at<double>(1,2))/(4*q0); double yaw = std::asin( 2*(q0*q2 + q1*q3) ); double pitch = std::atan2(2*(q0*q1-q2*q3), q0*q0-q1*q1-q2*q2+q3*q3); double roll = std::atan2(2*(q0*q3-q1*q2), q0*q0+q1*q1-q2*q2-q3*q3); std::cout<<"yaw:"<<yaw<<" pitch:"<< pitch <<" roll:"<< roll<< std::endl; char ch[20]; sprintf(ch, "yaw:%0.4f", yaw); cv::putText(faceImg,ch,cv::Point(20,40), FONT_HERSHEY_SIMPLEX,1,Scalar(255,23,0),2,3); sprintf(ch, "pitch:%0.4f", pitch); cv::putText(faceImg,ch,cv::Point(20,80), FONT_HERSHEY_SIMPLEX,1,Scalar(255,23,0),2,3); sprintf(ch, "roll:%0.4f", roll); cv::putText(faceImg,ch,cv::Point(20,120), FONT_HERSHEY_SIMPLEX,1,Scalar(255,23,0),2,3); cv::imshow("faceImg",faceImg); } void headPosEstimate(const cv::Mat & faceImg , const std::vector<cv::Point2d>& facial5Pts ) { // 3D model points std::vector<cv::Point3f> model_points; model_points.push_back(cv::Point3d(-165.0f, 170.0f, -115.0f)); // Left eye model_points.push_back(cv::Point3d( 165.0f, 170.0f, -115.0f)); // Right eye model_points.push_back(cv::Point3d(0.0f, 0.0f, 0.0f)); // Nose tip model_points.push_back(cv::Point3d(-150.0f, -150.0f, -125.0f)); // Left Mouth corner model_points.push_back(cv::Point3d(150.0f, -150.0f, -125.0f)); // Right Mouth corner // Camera internals double focal_length = faceImg.cols; cv::Point2d center = cv::Point2d(faceImg.cols/2,faceImg.rows/2); cv::Mat camera_matrix =(cv::Mat_<double>(3,3) << focal_length, 0, center.x, 0,focal_length,center.y,0, 0,1); cv::Mat dist_coeffs = cv::Mat::zeros(4,1,cv::DataType<double>::type); cv::Mat rotation_vector; cv::Mat translation_vector; cv::solvePnP(model_points,facial5Pts , camera_matrix, dist_coeffs,rotation_vector, translation_vector); /*投影一条直线而已 std::vector<Point3d> nose_end_point3D; std::vector<Point2d> nose_end_point2D; nose_end_point3D.push_back(cv::Point3d(0,0,1000.0)); projectPoints(nose_end_point3D, rotation_vector, translation_vector,camera_matrix, dist_coeffs, nose_end_point2D); //std::cout << "Rotation Vector " << std::endl << rotation_vector << std::endl; //std::cout << "Translation Vector" << std::endl << translation_vector << std::endl; cv::Mat temp(faceImg); cv::line(temp ,facial5Pts[2], nose_end_point2D[0], cv::Scalar(255,0,0), 2); cv::imshow("vvvvvvvv" ,temp ); cv::waitKey(1); */ cv::Mat rotation3_3; cv::Rodrigues(rotation_vector,rotation3_3); rot2Euler(faceImg.clone(),rotation3_3); } void showheadPost(const cv::Mat& img, std::vector<struct Bbox>& resBox) { for( vector<struct Bbox>::iterator it=resBox.begin(); it!=resBox.end();it++){ if((*it).exist){ int it_x1 = (*it).y1; //这里需注意，(*it).y1表示第一个点的x坐标，(*it).x1表示y坐标 int it_y1 = (*it).x1; int it_x2 = (*it).y2; int it_y2 = (*it).x2; const cv::Mat faceImg = img(Rect(it_x1 ,it_y1 ,it_x2-it_x1 ,it_y2-it_y1 )); std::vector<Point2d> face5Pts; //脸部5个点的坐标，原点坐标为(0,0) for(int i=0 ;i<5 ; ++i ){ face5Pts.push_back(Point2f(*(it->ppoint+i)-it_x1 , *(it->ppoint+i+5)-it_y1 )); } headPosEstimate(faceImg , face5Pts); } } } int main() { cv::Mat img = cv::imread("ldh.jpg"); mtcnn find(img.rows, img.cols); std::vector<struct Bbox> resBox; find.findFace(img , resBox); showheadPost(img,resBox); waitKey(0); return 0; } 四、实验结果载入两张图片实验，结果如下所示，可以评估侧脸程度，不过失败时会出现nan的计算

c++自己算的：

https://blog.csdn.net/Taily_Duan/article/details/60580694

python版的mtcnn：

https://github.com/JuneoXIE/mtcnn-opencv_face_pose_estimation/blob/bedc3e7503190b2d468f64ef01a48d0e61ed3e3c/pose_estimate.py

https://github.com/JuneoXIE/Face_blur_detection/blob/9d3d350343b0726006d9fb98f699254158336512/mtcnn/pose_estimate.py

# encoding=utf8 import os import numpy as np import cv2 from test import * import math import _pickle as pickle # from PIL import Image,ImageDraw,ImageFont data_dir = r'.../test_datasets/' save_dir = r'.../results/' def drawResult(imgpath, yaw, pitch, roll,save_dir): img = cv2.imread(imgpath) draw = img.copy() cv2.putText(draw,"Yaw:"+str(yaw),(20,40),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0)) cv2.putText(draw,"Pitch:"+str(pitch),(20,80),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0)) cv2.putText(draw,"Roll:"+str(roll),(20,120),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0)) cv2.waitKey() cv2.imwrite(save_dir+os.path.splitext(imgpath)[0]+'_pose_estimate1.jpg',draw) def rot2Euler(imgpath,rotation_vector,save_dir): # calculate rotation angles theta = cv2.norm(rotation_vector, cv2.NORM_L2) # transformed to quaterniond w = math.cos(theta / 2) x = math.sin(theta / 2)*rotation_vector[0][0] / theta y = math.sin(theta / 2)*rotation_vector[1][0] / theta z = math.sin(theta / 2)*rotation_vector[2][0] / theta ysqr = y * y # pitch (x-axis rotation) t0 = 2.0 * (w * x + y * z) t1 = 1.0 - 2.0 * (x * x + ysqr) print('t0:{}, t1:{}'.format(t0, t1)) pitch = math.atan2(t0, t1) - 0.8356857 # yaw (y-axis rotation) t2 = 2.0 * (w * y - z * x) if t2 > 1.0: t2 = 1.0 if t2 < -1.0: t2 = -1.0 yaw = math.asin(t2) + 0.005409 # roll (z-axis rotation) t3 = 2.0 * (w * z + x * y) t4 = 1.0 - 2.0 * (ysqr + z * z) roll = math.atan2(t3, t4) - 2.573345436 # 单位转换：将弧度转换为度 pitch_degree = int((pitch/math.pi)*180) yaw_degree = int((yaw/math.pi)*180) roll_degree = int((roll/math.pi)*180) drawResult(imgpath,yaw, pitch, roll,save_dir) print("Radians:") print("Yaw:",yaw) print("Pitch:",pitch) print("Roll:",roll) img = cv2.imread(imgpath) draw = img.copy() cv2.putText(draw,"Yaw:"+str(yaw),(20,40),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0)) cv2.putText(draw,"Pitch:"+str(pitch),(20,80),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0)) cv2.putText(draw,"Roll:"+str(roll),(20,120),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0)) cv2.waitKey() cv2.imwrite(os.path.splitext(imgpath)[0]+'_pose_estimate1.jpg',draw) print("Degrees:") draw = img.copy() if yaw_degree > 0: output_yaw = "face turns left:"+str(abs(yaw_degree))+" degrees" cv2.putText(draw,output_yaw,(20,40),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print(output_yaw) if yaw_degree < 0: output_yaw = "face turns right:"+str(abs(yaw_degree))+" degrees" cv2.putText(draw,output_yaw,(20,40),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print(output_yaw) if pitch_degree < 0: output_pitch = "face downwards:"+str(abs(pitch_degree))+" degrees" cv2.putText(draw,output_pitch,(20,80),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print(output_pitch) if pitch_degree > 0: output_pitch = "face upwards:"+str(abs(pitch_degree))+" degrees" cv2.putText(draw,output_pitch,(20,80),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print(output_pitch) if roll_degree < 0: output_roll = "face bends to the right:"+str(abs(roll_degree))+" degrees" cv2.putText(draw,output_roll,(20,120),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print(output_roll) if roll_degree > 0: output_roll = "face bends to the left:"+str(abs(roll_degree))+" degrees" cv2.putText(draw,output_roll,(20,120),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print(output_roll) if abs(yaw) < 0.00001 and abs(pitch) < 0.00001 and abs(roll)< 0.00001: cv2.putText(draw,"Initial ststus",(20,40),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print("Initial ststus") cv2.imwrite(save_dir+os.path.splitext(imgpath)[0]+'_pose_estimate2.jpg',draw) def headPosEstimate(imgpath, landmarks, bbox,save_dir): # solvePnP函数的所有输入矩阵必须是double类型 # 3D model points model_3d_points = np.array(([-165.0, 170.0, -115.0], # Left eye [165.0, 170.0, -115.0], # Right eye [0.0, 0.0, 0.0], # Nose tip [-150.0, -150.0, -125.0], # Left Mouth corner [150.0, -150.0, -125.0]), dtype=np.double) # Right Mouth corner) landmarks.dtype = np.double # Camera internals img = cv2.imread(imgpath) img_size = img.shape focal_length = img_size[1] center = [img_size[1]/2, img_size[0]/2] camera_matrix = np.array(([focal_length, 0, center[0]], [0, focal_length, center[1]], [0, 0, 1]),dtype=np.double) dist_coeffs = np.array([0,0,0,0], dtype=np.double) found, rotation_vector, translation_vector = cv2.solvePnP(model_3d_points, landmarks, camera_matrix, dist_coeffs) rot2Euler(imgpath,rotation_vector,save_dir) # 测试图像路径 image_names = os.listdir(data_dir) for index, image_name in enumerate(image_names): imgpath = data_dir + image_name bbox, landmarks = get_landmarks(imgpath) print("Image_index:", index) headPosEstimate(imgpath, landmarks, bbox, save_dir)

dlib 68个：

https://github.com/JuneoXIE/dlib-opencv_face_pose_estimation/blob/master/pose_estimate_dlib.py

#!/usr/bin/env python import os import cv2 import numpy as np import dlib import time import math data_dir = r"...\test_datasets" save_dir = r"...\results" detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor(r"...\shape_predictor_68_face_landmarks.dat") POINTS_NUM_LANDMARK = 68 # 获取最大的人脸 def _largest_face(dets): if len(dets) == 1: return 0 face_areas = [ (det.right()-det.left())*(det.bottom()-det.top()) for det in dets] largest_area = face_areas[0] largest_index = 0 for index in range(1, len(dets)): if face_areas[index] > largest_area : largest_index = index largest_area = face_areas[index] print("largest_face index is {} in {} faces".format(largest_index, len(dets))) return largest_index # 从dlib的检测结果抽取姿态估计需要的点坐标 def get_image_points_from_landmark_shape(landmark_shape): if landmark_shape.num_parts != POINTS_NUM_LANDMARK: print("ERROR:landmark_shape.num_parts-{}".format(landmark_shape.num_parts)) return -1, None #2D image points. If you change the image, you need to change vector image_points = np.array([ (landmark_shape.part(17).x, landmark_shape.part(17).y), #17 left brow left corner (landmark_shape.part(21).x, landmark_shape.part(21).y), #21 left brow right corner (landmark_shape.part(22).x, landmark_shape.part(22).y), #22 right brow left corner (landmark_shape.part(26).x, landmark_shape.part(26).y), #26 right brow right corner (landmark_shape.part(36).x, landmark_shape.part(36).y), #36 left eye left corner (landmark_shape.part(39).x, landmark_shape.part(39).y), #39 left eye right corner (landmark_shape.part(42).x, landmark_shape.part(42).y), #42 right eye left corner (landmark_shape.part(45).x, landmark_shape.part(45).y), #45 right eye right corner (landmark_shape.part(31).x, landmark_shape.part(31).y), #31 nose left corner (landmark_shape.part(35).x, landmark_shape.part(35).y), #35 nose right corner (landmark_shape.part(48).x, landmark_shape.part(48).y), #48 mouth left corner (landmark_shape.part(54).x, landmark_shape.part(54).y), #54 mouth right corner (landmark_shape.part(57).x, landmark_shape.part(57).y), #57 mouth central bottom corner (landmark_shape.part(8).x, landmark_shape.part(8).y), #8 chin corner ], dtype="double") return 0, image_points # 用dlib检测关键点，返回姿态估计需要的几个点坐标 def get_image_points(img): gray = cv2.cvtColor( img, cv2.COLOR_BGR2GRAY ) # 图片调整为灰色 dets = detector( img, 0 ) if 0 == len( dets ): print( "ERROR: found no face" ) return -1, None largest_index = _largest_face(dets) face_rectangle = dets[largest_index] landmark_shape = predictor(img, face_rectangle) return get_image_points_from_landmark_shape(landmark_shape) # 获取旋转向量和平移向量 def get_pose_estimation(img_size, image_points ): # 3D model points. model_points = np.array([ (6.825897, 6.760612, 4.402142), #33 left brow left corner (1.330353, 7.122144, 6.903745), #29 left brow right corner (-1.330353, 7.122144, 6.903745), #34 right brow left corner (-6.825897, 6.760612, 4.402142), #38 right brow right corner (5.311432, 5.485328, 3.987654), #13 left eye left corner (1.789930, 5.393625, 4.413414), #17 left eye right corner (-1.789930, 5.393625, 4.413414), #25 right eye left corner (-5.311432, 5.485328, 3.987654), #21 right eye right corner (2.005628, 1.409845, 6.165652), #55 nose left corner (-2.005628, 1.409845, 6.165652), #49 nose right corner (2.774015, -2.080775, 5.048531), #43 mouth left corner (-2.774015, -2.080775, 5.048531), #39 mouth right corner (0.000000, -3.116408, 6.097667), #45 mouth central bottom corner (0.000000, -7.415691, 4.070434) #6 chin corner ]) # Camera internals focal_length = img_size[1] center = (img_size[1]/2, img_size[0]/2) camera_matrix = np.array( [[focal_length, 0, center[0]], [0, focal_length, center[1]], [0, 0, 1]], dtype = "double" ) dist_coeffs = np.array([7.0834633684407095e-002, 6.9140193737175351e-002, 0.0, 0.0, -1.3073460323689292e+000],dtype= "double") # Assuming no lens distortion (success, rotation_vector, translation_vector) = cv2.solvePnP(model_points, image_points, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE ) # print("Rotation Vector:\n {}".format(rotation_vector)) # print("Translation Vector:\n {}".format(translation_vector)) return success, rotation_vector, translation_vector, camera_matrix, dist_coeffs # 从旋转向量转换为欧拉角 def get_euler_angle(rotation_vector): # calculate rotation angles theta = cv2.norm(rotation_vector, cv2.NORM_L2) # transformed to quaterniond w = math.cos(theta / 2) x = math.sin(theta / 2)*rotation_vector[0][0] / theta y = math.sin(theta / 2)*rotation_vector[1][0] / theta z = math.sin(theta / 2)*rotation_vector[2][0] / theta ysqr = y * y # pitch (x-axis rotation) t0 = 2.0 * (w * x + y * z) t1 = 1.0 - 2.0 * (x * x + ysqr) # print('t0:{}, t1:{}'.format(t0, t1)) pitch = math.atan2(t0, t1) # yaw (y-axis rotation) t2 = 2.0 * (w * y - z * x) if t2 > 1.0: t2 = 1.0 if t2 < -1.0: t2 = -1.0 yaw = math.asin(t2) # roll (z-axis rotation) t3 = 2.0 * (w * z + x * y) t4 = 1.0 - 2.0 * (ysqr + z * z) roll = math.atan2(t3, t4) print('pitch:{}, yaw:{}, roll:{}'.format(pitch, yaw, roll)) # 单位转换：将弧度转换为度 pitch_degree = int((pitch/math.pi)*180) yaw_degree = int((yaw/math.pi)*180) roll_degree = int((roll/math.pi)*180) return 0, pitch, yaw, roll, pitch_degree, yaw_degree, roll_degree def get_pose_estimation_in_euler_angle(landmark_shape, im_szie): try: ret, image_points = get_image_points_from_landmark_shape(landmark_shape) if ret != 0: print('get_image_points failed') return -1, None, None, None ret, rotation_vector, translation_vector, camera_matrix, dist_coeffs = get_pose_estimation(im_szie, image_points) if ret != True: print('get_pose_estimation failed') return -1, None, None, None ret, pitch, yaw, roll = get_euler_angle(rotation_vector) if ret != 0: print('get_euler_angle failed') return -1, None, None, None euler_angle_str = 'Pitch:{}, Yaw:{}, Roll:{}'.format(pitch, yaw, roll) print(euler_angle_str) return 0, pitch, yaw, roll except Exception as e: print('get_pose_estimation_in_euler_angle exception:{}'.format(e)) return -1, None, None, None if __name__ == '__main__': # Read Image image_names = os.listdir(data_dir) for index, image_name in enumerate(image_names): print("Image:", image_name) imgpath = data_dir +'\\'+ image_name im = cv2.imread(imgpath) size = im.shape if size[0] > 700: h = size[0] / 3 w = size[1] / 3 im = cv2.resize( im, (int( w ), int( h )), interpolation=cv2.INTER_CUBIC ) size = im.shape ret, image_points = get_image_points(im) if ret != 0: print('get_image_points failed') continue ret, rotation_vector, translation_vector, camera_matrix, dist_coeffs = get_pose_estimation(size, image_points) if ret != True: print('get_pose_estimation failed') continue ret, pitch, yaw, roll,pitch_degree, yaw_degree, roll_degree = get_euler_angle(rotation_vector) draw = im.copy() # Yaw: if yaw_degree < 0: output_yaw = "face turns left:"+str(abs(yaw_degree))+" degrees" # cv2.putText(draw,output_yaw,(20,40),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print(output_yaw) if yaw_degree == 0: print("face doesn't turns left or right") if yaw_degree > 0: output_yaw = "face turns right:"+str(abs(yaw_degree))+" degrees" # cv2.putText(draw,output_yaw,(20,40),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print(output_yaw) # Pitch: if pitch_degree > 0: output_pitch = "face downwards:"+str(abs(pitch_degree))+" degrees" # cv2.putText(draw,output_pitch,(20,80),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print(output_pitch) if pitch_degree == 0: print("face not downwards or upwards") if pitch_degree < 0: output_pitch = "face upwards:"+str(abs(pitch_degree))+" degrees" # cv2.putText(draw,output_pitch,(20,80),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print(output_pitch) # Roll: if roll_degree < 0: output_roll = "face bends to the right:"+str(abs(roll_degree))+" degrees" # cv2.putText(draw,output_roll,(20,120),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print(output_roll) if roll_degree == 0: print("face doesn't bend to the right or the left.") if roll_degree > 0: output_roll = "face bends to the left:"+str(abs(roll_degree))+" degrees" # cv2.putText(draw,output_roll,(20,120),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print(output_roll) # Initial status: if abs(yaw) < 0.00001 and abs(pitch) < 0.00001 and abs(roll) < 0.00001: # cv2.putText(draw,"Initial ststus",(20,40),cv2.FONT_HERSHEY_SIMPLEX,.5,(0,255,0)) print("Initial ststus") # cv2.imwrite(save_dir+"\\"+os.path.splitext(imgpath)[0]+'_pose_estimate.jpg',draw)

最新回复(0)