人脸关键点检测和头部姿态估计数据集生成

mac2025-07-29 26

接上篇人脸关键点检测和头部姿态估计数据集整理

1. 借助于OpenPose生成人脸关键点和头部姿态，使用链接：https://github.com/TadasBaltrusaitis/OpenFace/wiki/Command-line-arguments

build/bin/FaceLandmarkImg.exe -fdir "images path"

具体使用可参考博客，每张人脸可得到一个csv文件，里面记录了人脸的很多信息，我们只需要68个关键点和头部姿态

2. 借助于人脸检测网络生成人脸检测结果，每张人脸可得到一个csv文件，里面记录了人脸的左上角和右下角坐标，具体使用可参考博客

3.结合OpenPose和人脸检测结果生成数据集label标注文件，格式为：

face_path x1 y1 x2 y2 x3 y3 ...... x68 y68 pitch yaw roll

执行代码：Generate_labels.py

import sys, os import cv2 import numpy as np import csv import argparse import math from shutil import copyfile #Determine whether it is a rotation matrix def isRotationMatrix(R): Rt = np.transpose(R) shouldBeIdentity = np.dot(Rt, R) I = np.identity(3, dtype=R.dtype) n = np.linalg.norm(I - shouldBeIdentity) return n < 1e-6 #rotationvector to rorationmatrix def vec2matrix(rvec): theta = np.linalg.norm(rvec) r = rvec / theta R_ = np.array([[0, -r[2][0], r[1][0]], [r[2][0], 0, -r[0][0]], [-r[1][0], r[0][0], 0]]) R = np.cos(theta) * np.eye(3) + (1 - np.cos(theta)) * r * r.T + np.sin(theta) * R_ return R #rotationmatrix to EulerAngles def rotationMatrixToAngles(R): assert (isRotationMatrix(R)) sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0]) singular = sy < 1e-6 if not singular: x = math.atan2(R[2, 1], R[2, 2]) y = math.atan2(-R[2, 0], sy) z = math.atan2(R[1, 0], R[0, 0]) else: x = math.atan2(-R[1, 2], R[1, 1]) y = math.atan2(-R[2, 0], sy) z = 0 x = x*180.0/3.141592653589793 y = y*180.0/3.141592653589793 z = z*180.0/3.141592653589793 return np.array([-1.0*x+10, y, z]) def compute_iou(rec1, rec2): """ computing IoU :param rec1: (y0, x0, y1, x1), which reflects (top, left, bottom, right) :param rec2: (y0, x0, y1, x1) :return: scala value of IoU """ # computing area of each rectangles S_rec1 = (rec1[2] - rec1[0]) * (rec1[3] - rec1[1]) S_rec2 = (rec2[2] - rec2[0]) * (rec2[3] - rec2[1]) # computing the sum_area sum_area = S_rec1 + S_rec2 # find the each edge of intersect rectangle left_line = max(rec1[1], rec2[1]) right_line = min(rec1[3], rec2[3]) top_line = max(rec1[0], rec2[0]) bottom_line = min(rec1[2], rec2[2]) # judge if there is an intersect if left_line >= right_line or top_line >= bottom_line: return 0 else: intersect = (right_line - left_line) * (bottom_line - top_line) return (intersect / (sum_area - intersect)) * 1.0 def Generate_labels(img_path, csv_path, txt_path, face_path, label_path): """ :param img_path: :param csv_path: :param txt_path: :param face_path: :param label_path: :return: """ img_list = os.listdir(img_path) fp_label = open(os.path.join(label_path, '300w_label.txt'), 'w') for img_file in img_list: print('now is dealing with '+str(img_file)) # Iterative execution of each image filename, extension = os.path.splitext(img_file) extension = extension[1:] if extension == 'jpg' or extension == 'png': img = cv2.imread(os.path.join(img_path, img_file)) # read and organize the result of OpenFace, result is saved to 'csv_result' for format as: # [ # [point0_x,point0_y], # [point1_x,point1_y], # [point2_x,point2_y], # ...... # [pose_x,pose_y,pose_z], # [xmin_csv,ymin_csv,xmax_csv,ymax_csv] # ] if not os.path.exists(os.path.join(csv_path, filename + '.csv')): continue csv_file = open(os.path.join(csv_path, filename + '.csv'), 'r') reader = csv.reader(csv_file) csv_result = [] for item in reader: csv_result_temp = [] xmin_csv = 50000 ymin_csv = 50000 xmax_csv = 0 ymax_csv = 0 if reader.line_num == 1: continue for i in range(68): csv_result_temp.append([float(item[296 + i]), float(item[296 + i + 68])]) xmin_csv = min(xmin_csv, float(item[296 + i])) xmax_csv = max(xmax_csv, float(item[296 + i])) ymin_csv = min(ymin_csv, float(item[296 + i + 68])) ymax_csv = max(ymax_csv, float(item[296 + i + 68])) csv_result_temp.append([item[293], item[294], item[295]]) csv_result_temp.append([xmin_csv, ymin_csv, xmax_csv, ymax_csv]) csv_result.append(csv_result_temp) csv_file.close() # read and organize the label of 300w dataset, result is saved to 'pts_result' for format as: # [ # [point0_x,point0_y], # [point1_x,point1_y], # [point2_x,point2_y], # ...... # [xmin_pts,ymin_pts,xmax_pts,ymax_pts] # ] fp_pts = open(os.path.join(img_path, filename + '.pts'), 'r') lines = fp_pts.readlines() lines = lines[3:71] xmin_pts = 50000 ymin_pts = 50000 xmax_pts = 0 ymax_pts = 0 pts_result = [] for line in lines: S = line.split(' ') point_x = float(S[0]) point_y = float(S[1]) xmin_pts = min(xmin_pts, point_x) ymin_pts = min(ymin_pts, point_y) xmax_pts = max(xmax_pts, point_x) ymax_pts = max(ymax_pts, point_y) pts_result.append([point_x, point_y]) pts_result.append([xmin_pts, ymin_pts, xmax_pts, ymax_pts]) fp_pts.close() face_pose = [] face_bbox = [] iou_max = 0 for i in range(len(csv_result)): if compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts), ( csv_result[i][69][1], csv_result[i][69][0], csv_result[i][69][3], csv_result[i][69][2])) > iou_max\ and compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts), ( csv_result[i][69][1], csv_result[i][69][0], csv_result[i][69][3], csv_result[i][69][2])) >= 0.5: face_pose = csv_result[i][68][0:] if not face_pose: continue # read the face detected result with txt format and get the face bbox, which is in Square form fp_txt = open(os.path.join(txt_path, filename + '.txt'), 'r') lines = fp_txt.readlines() iou_max = 0 for line in lines: line = line[:-1] S = line.split(' ') xmin_txt = float(S[0]) ymin_txt = float(S[1]) xmax_txt = float(S[2]) ymax_txt = float(S[3]) if compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts), (ymin_txt, xmin_txt, ymax_txt, xmax_txt)) > iou_max\ and compute_iou((ymin_pts, xmin_pts, ymax_pts, xmax_pts), (ymin_txt, xmin_txt, ymax_txt, xmax_txt)) >= 0.7: face_bbox.append([xmin_txt, ymin_txt, xmax_txt, ymax_txt]) fp_txt.close() # determine if the face is detected if face_bbox: width = xmax_txt - xmin_txt height = ymax_txt - ymin_txt adjust_value = 1.0 * (height - width) / 2.0 xmin = int(round(max(0, xmin_txt - adjust_value))) ymin = int(round(ymin_txt)) xmax = int(round(min(img.shape[1], xmin+height))) ymax = int(round(ymax_txt)) print(xmax-xmin, ymax-ymin) else: width = xmax_pts - xmin_pts height = ymax_pts - ymin_pts long_size = max(width, height) adjust_value_x = 1.0 * (long_size - width) / 2.0 adjust_value_y = 1.0 * (long_size - height) / 2.0 xmin = int(round(max(0, xmin_pts - adjust_value_x))) xmax = int(round(min(img.shape[1], xmin+long_size))) ymin = int(round(max(0, ymin_pts - adjust_value_y))) ymax = int(round(min(img.shape[0], ymin+long_size))) print(xmax-xmin,ymax-ymin) # crop face from original img img_face = img[ymin:ymax, xmin:xmax, :] cv2.imwrite(os.path.join(face_path, filename + '.jpg'), img_face) # write the path of cropped face to txt fp_label.write(os.path.join(face_path, filename + '.jpg') + ' ') #fp_label.write('/home/OpenFace/300w_face/'+filename + '.jpg'+' ') # write the point coordinates to txt for i in range(68): fp_label.write(str(pts_result[i][0] - xmin) + ' ' + str(pts_result[i][1] - ymin) + ' ') # write the face pose to txt ''' rotation_vector = [] rotation_vector.append([float(face_pose[0])]) rotation_vector.append([float(face_pose[1])]) rotation_vector.append([float(face_pose[2])]) R = vec2matrix(rotation_vector) headpose = rotationMatrixToAngles(R) ''' headpose = [] headpose.append(float(face_pose[0])/math.pi*180) headpose.append(float(face_pose[1])/math.pi*180) headpose.append(float(face_pose[2])/math.pi*180) fp_label.write(str(headpose[0]) + ' ' + str(headpose[1]) +' ' + str(headpose[2]) + '\n') fp_label.close() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--img_path", type=str, default='img_path', help="the path to the 300w dataset") parser.add_argument("--csv_path", type=str, default='csv_path', help="the path to the result of OpenFace") parser.add_argument("--txt_path", type=str, default='txt_path', help="the path to the face detect") parser.add_argument("--face_path", type=str, default='face_path_test', help="the path to the crop face saved") parser.add_argument("--label_path", type=str, default='label_path_test', help="the path to the label of 300w dataset") opt = parser.parse_args() Generate_labels(opt.img_path, opt.csv_path, opt.txt_path, opt.face_path, opt.label_path)

4.参考github：https://github.com/lsy17096535/face-landmark/blob/master/train/mainloop.py

生成hdf5时对数据集做的预处理操作如下所示：

image 人脸 1. resize(img,(60,60)).astype('f4') 2. cvtcolor(BGR2GRAY).reshape(1,60,60) 3. m,s = cv2.meanstdDev(image) 4. image = (image - m) / (1.e-6 + s) landmarks 关键点 1. 乘以scale系数 rx=60/face img width ry=60/face img height 2. 归一化 /60 poses 头部姿态 1. 归一化 /50

执行代码：Generate_hdf5.py

import sys,os import cv2 import numpy as np from shutil import copyfile caffe_root = 'caffe/python/' sys.path.insert(0, caffe_root + 'python') import caffe import h5py import argparse IMAGE_SIZE = 60 #fixed size to all images def Generate_hdf5(train_txt_file, train_h5_file, train_h5_list_file): """ Generate hdf5 format based on the txt label :param train_txt_file: label in txt format (imgpath x1 y1 x2 y2 ...... x68 y68 pitch yaw roll) :param train_h5_file: h5 file to be saved :param train_h5_list_file: list file in txt format to be saved :return: """ with open(train_txt_file,'r') as T: lines = T.readlines() HD5Images = np.zeros([len(lines), 1, IMAGE_SIZE, IMAGE_SIZE], dtype='float32') HD5Landmarks = np.zeros([len(lines), 136], dtype='float32') HD5Poses = np.zeros([len(lines), 3], dtype='float32') for i,l in enumerate(lines): sp = l.split(' ') print(sp[0]) img = cv2.imread(sp[0]) height,width = img.shape[0], img.shape[1] rx,ry = 1.0*IMAGE_SIZE/width, 1.0*IMAGE_SIZE/height res = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE), 0.0, 0.0, interpolation=cv2.INTER_CUBIC) image = res.astype('f4') image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY).reshape(1, IMAGE_SIZE, IMAGE_SIZE) m, s = cv2.meanStdDev(image) image = (image - m) / (1.e-6 + s) HD5Images[i, :] = image label_ = np.zeros([136], dtype='float32') pose_ = np.zeros([3], dtype='float32') for j in range(136): if (j + 1) % 2: scale_factor = rx else: scale_factor = ry label_[j] = float(sp[j + 1]) * float(scale_factor) label_[j] = label_[j]/(1.0*IMAGE_SIZE) label_[j] = label_[j].astype('f4') HD5Landmarks[i, :] = label_[:] # print(HD5Landmarks[i, :]) for j in range(3): normalize_factor = 50 pose_[j] = float(sp[j + 1 + 136]) / float(normalize_factor) pose_[j] = pose_[j].astype('f4') HD5Poses[i, :] = pose_[:] with h5py.File(train_h5_file, 'w') as H: H.create_dataset('data', data=HD5Images) H.create_dataset('label', data=HD5Landmarks) H.create_dataset('pose', data=HD5Poses) with open(train_h5_list_file, 'w') as L: L.write(train_h5_file) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--train_txt_file",type=str,default="label_path/300w_label.txt", help="path to label in txt format") parser.add_argument("--train_h5_file",type=str,default="test.h5",help="path to generated h5 file") parser.add_argument("--train_h5_list_file",type=str,default="test_h5_list.txt",help="path to generated he list file") opt=parser.parse_args() Generate_hdf5(opt.train_txt_file, opt.train_h5_file, opt.train_h5_list_file)

5. 可视化hdf5文件是否正确

执行代码：visual_h5.py

import sys,os import cv2 import h5py import numpy as np f = h5py.File('../data/300w_ori/train.h5','r') data = f['data'] label = f['label'] pose = f['pose'] num = data.shape[0] for i in range(1,100): # print(data.shape) img = np.array(data[i,0,:,:]*255) image = np.zeros([img.shape[0],img.shape[1],3],dtype='float32') image[:,:,0] = img[:] image[:,:,1] = img[:] image[:,:,2] = img[:] print(image.shape) landmark = np.array(label[i,:]) print(landmark) # print(landmark.shape) cv2.imwrite(os.path.join('../data/300w_ori/result',str(i)+'.jpg'),image) img = cv2.imread(os.path.join('../data/300w_ori/result',str(i)+'.jpg')) for j in range(68): x = int(round(landmark[2*j]*60)) y = int(round(landmark[2*j+1]*60)) # print(x,y) cv2.circle(img,(x,y),1,(0,0,255),1) Euler = np.array(pose[i,:]) # print(Euler.shape) cv2.imwrite(os.path.join('../data/300w_ori/result',str(i)+'.jpg'), img)

最新回复(0)