From 0ca55268753a7649e32cea7d4abc9d374e4f5adb Mon Sep 17 00:00:00 2001 From: WC Date: Mon, 11 Nov 2024 14:42:42 +0800 Subject: [PATCH] update many convertors 241111 --- mmhuman3d/data/data_converters/__init__.py | 2 + mmhuman3d/data/data_converters/arctic.py | 472 ++++++++++++++++- mmhuman3d/data/data_converters/idea400.py | 17 +- .../data/data_converters/interhand26m.py | 31 +- mmhuman3d/data/data_converters/signavatar.py | 491 +++++++++--------- 5 files changed, 745 insertions(+), 268 deletions(-) diff --git a/mmhuman3d/data/data_converters/__init__.py b/mmhuman3d/data/data_converters/__init__.py index 238dd191..e3335e49 100644 --- a/mmhuman3d/data/data_converters/__init__.py +++ b/mmhuman3d/data/data_converters/__init__.py @@ -54,6 +54,7 @@ from .rich import RichConverter from .sgnify import SgnifyConverter from .shapy import ShapyConverter +from .signavatar import SignAvatarConverter from .sloper4d import Sloper4dConverter from .sminchisescu import ImarDatasetsConverter from .spin import SpinConverter @@ -91,4 +92,5 @@ 'Crowd3dConverter', 'BedlamConverter','Pw3dConverter', 'DecoConverter', 'Hi4dConverter', 'Idea400Converter', 'Pw3dBedlamConverter', 'RichConverter', 'SynbodyWhacConverter', 'ArcticConverter', 'SynHandConverter', + 'SignAvatarConverter', ] diff --git a/mmhuman3d/data/data_converters/arctic.py b/mmhuman3d/data/data_converters/arctic.py index 5be9329f..21946170 100644 --- a/mmhuman3d/data/data_converters/arctic.py +++ b/mmhuman3d/data/data_converters/arctic.py @@ -39,7 +39,7 @@ class ArcticConverter(BaseModeConverter): 'p2_train', 'p2_val'] def __init__(self, modes: List = []) -> None: - self.device = torch.device('cuda:0') + self.device = torch.device('cuda') self.misc_config = dict( bbox_body_scale=1.2, bbox_facehand_scale=1.0, @@ -183,9 +183,12 @@ def convert_by_mode(self, dataset_path: str, out_path: str, split_num = 4 else: split_num = 1 - - - for split_id in range(split_num): + + # for split_id in range(split_num): + for split_id in [0]: + + # set cuda + # os.environ['CUDA_VISIBLE_DEVICES'] = str(split_id) # group seq names size_b = len(seq_names) // split_num @@ -223,6 +226,8 @@ def convert_by_mode(self, dataset_path: str, out_path: str, # high level path raw_path = os.path.join(dataset_path, 'raw_seqs') + # seq_names_batch = seq_names_batch[:5] + # group by sequence for seq in tqdm(seq_names_batch, desc=f'Split: {split_id+1} / {split_num}', position=0, leave=False): @@ -277,7 +282,7 @@ def convert_by_mode(self, dataset_path: str, out_path: str, data_len = data_params["K_ego"].shape[0] - for cidx in range(9): + for cidx in range(1, 9): cid = str(cidx) @@ -297,8 +302,8 @@ def convert_by_mode(self, dataset_path: str, out_path: str, if len(image_paths) == 0: continue - if cidx == 0: - pdb.set_trace() + # if cidx == 0: + # pdb.set_trace() smplx_param_save = {} for key in self.smplx_shape.keys(): @@ -441,9 +446,9 @@ def convert_by_mode(self, dataset_path: str, out_path: str, # save verts3d verts3d_path = image_path.replace('jpg', 'npy').replace('images', 'vertices3d') verts3dp = imgp.replace('jpg', 'npy').replace('images', 'vertices3d') - # os.makedirs(os.path.dirname(verts3dp), exist_ok=True) - # np.save(verts3dp, verts3d) - # pdb.set_trace() + os.makedirs(os.path.dirname(verts3dp), exist_ok=True) + if not os.path.exists(verts3dp): + np.save(verts3dp, verts3d) # append image path image_path_.append(image_path) @@ -471,13 +476,16 @@ def convert_by_mode(self, dataset_path: str, out_path: str, meta_['is_valid'].append(is_valid) meta_['right_hand_valid'].append(right_valid) meta_['left_hand_valid'].append(left_valid) - - # pdb.set_trace() - - - - + + shape = np.concatenate(keypoints2d_, axis=0).reshape(-1, 144, 2).shape[0] + if len(image_path_) != shape: + pdb.set_trace() + if len(meta_['focal_length']) != shape: + pdb.set_trace() + if len(bboxs_['bbox_xywh']) != shape: + pdb.set_trace() + pdb.set_trace() # save keypoints 2d smplx keypoints2d = np.concatenate(keypoints2d_, axis=0).reshape(-1, 144, 2) keypoints2d_conf = np.ones([keypoints2d.shape[0], 144, 1]) @@ -526,10 +534,440 @@ def convert_by_mode(self, dataset_path: str, out_path: str, out_path, f'arctic_{mode}_{seed}_{"{:03d}".format(size_i)}_{split_id}.npz') human_data.dump(out_file) - + # def process_image(args): + # # Unpack arguments + # (image_path, dataset_path, data_2d, data_cam, data_params, seq_extrx, seq_intrx, + # ioi_offset, cidx, image_size_list, betas, sub_id, seq_name, random_ids, gender_seq, + # vtemplates_path, smplx_shape, misc_config) = args + + # def _check_valid(self, data_2d, data_cam, vidx, view_idx): + # assert ( + # vidx < data_2d["joints.right"].shape[0] + # ), "The requested camera id does not exist in annotation" + # is_valid = data_cam["is_valid"][vidx, view_idx] + # right_valid = data_cam["right_valid"][vidx, view_idx] + # left_valid = data_cam["left_valid"][vidx, view_idx] + # return vidx, is_valid, right_valid, left_valid + + # # Initialize device within subprocess + # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + # # Load or initialize your model within subprocess + # vtemplates = trimesh.load(vtemplates_path, force='mesh').vertices.reshape(1, 10475, 3) + # gendered_smplx = build_body_model( + # dict( + # type='SMPLX', + # keypoint_src='smplx', + # keypoint_dst='smplx', + # model_path='data/body_models/smplx', + # gender=gender_seq, + # v_template=vtemplates, + # num_betas=10, + # use_face_contour=True, + # flat_hand_mean=misc_config['flat_hand_mean'], + # use_pca=False, + # batch_size=1)).to(device) + + # # 局部变量用于存储结果 + # local_smplx_ = {} + # for key in smplx_shape.keys(): + # local_smplx_[key] = [] + # local_keypoints2d_, local_keypoints3d_ = [], [] + # local_bboxs_ = {} + # for bbox_name in [ + # 'bbox_xywh', 'face_bbox_xywh', 'lhand_bbox_xywh', + # 'rhand_bbox_xywh' + # ]: + # local_bboxs_[bbox_name] = [] + # local_meta_ = {} + # for meta_name in ['principal_point', 'focal_length', 'height', 'width', 'RT', + # 'sequence_name', 'track_id', 'gender', 'is_valid', 'right_hand_valid', 'left_hand_valid']: + # local_meta_[meta_name] = [] + # local_image_path_ = [] + # local_vtemplate_path_ = [] + # local_verts3d_path_ = [] + + # cid = str(cidx) + # try: + # imgp = os.path.join(dataset_path, image_path) + + # # image idx and mocap idx (vidx) + # image_idx = image_path.split("/")[-1] + # image_id = image_idx.split(".")[0] + # vidx = int(image_id) - ioi_offset + + # # check image and hand validity + # vidx, is_valid, right_valid, left_valid = _check_valid( + # data_2d, data_cam, vidx, cidx) + + # # prepare intrinsics + # extrinsics = seq_extrx[vidx] + # intrinsics = seq_intrx[vidx] + + # # smplx params in world space + # smplx_param = {} + # for key in smplx_shape.keys(): + # if key != 'betas': + # smplx_param[key] = data_params[f'smplx_{key}'][vidx].reshape(smplx_shape[key]) + # smplx_param['betas'] = betas + + # # prepare smplx tensor + # smplx_param_tensor = {} + # for key in smplx_shape.keys(): + # smplx_param_tensor[key] = torch.tensor(smplx_param[key].reshape(smplx_shape[key]), + # dtype=torch.float).to(device) + + # # get output + # output_world = gendered_smplx(**smplx_param_tensor) + # kps3d = output_world['joints'].detach().cpu().numpy() + # pelvis_world = kps3d[:, get_keypoint_idx('pelvis', 'smplx'), :] + + # # transfrom to camera space + # global_orient_cam, transl_cam = transform_to_camera_frame( + # global_orient=smplx_param['global_orient'], + # transl=smplx_param['transl'], + # pelvis=pelvis_world, + # extrinsic=extrinsics) + + # smplx_param['global_orient'] = global_orient_cam + # smplx_param['transl'] = transl_cam + + # # prepare smplx tensor + # smplx_param_tensor = {} + # for key in smplx_shape.keys(): + # smplx_param_tensor[key] = torch.tensor(smplx_param[key].reshape(smplx_shape[key]), + # dtype=torch.float).to(device) + + # # get output + # output = gendered_smplx(**smplx_param_tensor, return_verts=True) + + # width, height = image_size_list[cidx] + # focal_length = [intrinsics[0, 0], intrinsics[1, 1]] + # principal_point = [intrinsics[0, 2], intrinsics[1, 2]] + + # camera = build_cameras( + # dict( + # type='PerspectiveCameras', + # convention='opencv', + # in_ndc=False, + # focal_length=focal_length, + # image_size=(width, height), + # principal_point=principal_point)).to(device) + + # # project 3d to 2d + # kps3d = output['joints'] + # verts3d = output['vertices'] + + # kps3d_c = kps3d.detach().cpu().numpy().squeeze() + # kps2d = camera.transform_points_screen(kps3d).detach().cpu().numpy().squeeze()[:, :2] + # verts3d = verts3d.detach().cpu().numpy().squeeze() + + # # get bbox from 2d keypoints + # bboxs = self._keypoints_to_scaled_bbox_bfh( + # kps2d, + # body_scale=self.misc_config['bbox_body_scale'], + # fh_scale=self.misc_config['bbox_facehand_scale']) + # for i, bbox_name in enumerate([ + # 'bbox_xywh', 'face_bbox_xywh', 'lhand_bbox_xywh', + # 'rhand_bbox_xywh' + # ]): + # xmin, ymin, xmax, ymax, conf = bboxs[i] + # bbox = np.array([ + # max(0, xmin), + # max(0, ymin), + # min(width, xmax), + # min(height, ymax) + # ]) + # bbox_xywh = _xyxy2xywh(bbox) # list of len 4 + # bbox_xywh.append(conf) # (5,) + # local_bboxs_[bbox_name].append(bbox_xywh) + + # # save verts3d + # verts3d_path = image_path.replace('jpg', 'npy').replace('images', 'vertices3d') + # verts3dp = imgp.replace('jpg', 'npy').replace('images', 'vertices3d') + # os.makedirs(os.path.dirname(verts3dp), exist_ok=True) + # np.save(verts3dp, verts3d) + + # # append image path + # local_image_path_.append(image_path) + # local_verts3d_path_.append(verts3d_path) + # local_vtemplate_path_.append(vtemplates_path.replace(f'{dataset_path}/', '')) + + # # append keypoints2d and 3d + # local_keypoints2d_.append(kps2d) + # local_keypoints3d_.append(kps3d_c) + + # # add smplx params + # for key in smplx_param.keys(): + # local_smplx_[key].append(smplx_param[key]) + + # sequence_name = f'{sub_id}_{seq_name}_{cid}' + # # append meta + # local_meta_['principal_point'].append(principal_point) + # local_meta_['focal_length'].append(focal_length) + # local_meta_['height'].append(height) + # local_meta_['width'].append(width) + # local_meta_['sequence_name'].append(sequence_name) + # local_meta_['RT'].append(extrinsics) + # local_meta_['track_id'].append(random_ids[int(sub_id[1:])]) + # local_meta_['gender'].append(gender_seq) + # local_meta_['is_valid'].append(is_valid) + # local_meta_['right_hand_valid'].append(right_valid) + # local_meta_['left_hand_valid'].append(left_valid) + + # # 返回结果 + # return { + # 'smplx_': local_smplx_, + # 'keypoints2d_': local_keypoints2d_, + # 'keypoints3d_': local_keypoints3d_, + # 'bboxs_': local_bboxs_, + # 'meta_': local_meta_, + # 'image_path_': local_image_path_, + # 'vtemplate_path_': local_vtemplate_path_, + # 'verts3d_path_': local_verts3d_path_ + # } + # except Exception as e: + # print(f"Error processing image {image_path}: {e}") + # return None + + # def convert_by_mode(self, dataset_path: str, out_path: str, + # mode: str) -> dict: + + # import multiprocessing + # multiprocessing.set_start_method('spawn') + # # use HumanData to store all data + # human_data = HumanData() + + # # init seed and size + # seed, size = '241023', '999' + # random.seed(int(seed)) + # np.set_printoptions(suppress=True) + # random_ids = np.random.RandomState(seed=int(seed)).permutation(999999) + # used_id_num = 0 + + # # load split + # split_path = os.path.join(dataset_path, 'splits', mode + '.npy') + # split_info = np.load(split_path, allow_pickle=True).item() + + # image_names = split_info['imgnames'] + # image_names = [img.replace('./arctic_data/data/', '') for img in image_names] + # image_names = [img.replace('./arctic_data/', '') for img in image_names] + # data_dict = split_info['data_dict'] + # seq_names = list(data_dict.keys()) + + # # get size + # size_i = min(int(size), len(seq_names)) + + # # train 4 split, val 1 split + # if 'train' in mode: + # split_num = 4 + # else: + # split_num = 1 + + + # for split_id in range(split_num): + + # # group seq names + # size_b = len(seq_names) // split_num + # seq_names_batch = seq_names[split_id * size_b: (split_id + 1) * size_b] + + # # load meta + # meta_path = os.path.join(dataset_path, 'meta', 'misc.json') + # with open(meta_path, 'r') as f: + # metadata = json.load(f) + + # cam_params = {} + # for sub in metadata.keys(): + # cam_params[sub] = { # [8, 4, 4] and [8, 3, 3] + # "world2cam": np.array(metadata[sub]["world2cam"]), + # "intrinsics": np.array(metadata[sub]["intris_mat"]),} + + # smplx_ = {} + # for key in self.smplx_shape.keys(): + # smplx_[key] = [] + # keypoints2d_, keypoints3d_ = [], [] + # bboxs_ = {} + # for bbox_name in [ + # 'bbox_xywh', 'face_bbox_xywh', 'lhand_bbox_xywh', + # 'rhand_bbox_xywh' + # ]: + # bboxs_[bbox_name] = [] + # meta_ = {} + # for meta_name in ['principal_point', 'focal_length', 'height', 'width', 'RT', + # 'sequence_name', 'track_id', 'gender', 'is_valid', 'right_hand_valid', 'left_hand_valid']: + # meta_[meta_name] = [] + # image_path_ = [] + # vtemplate_path_ = [] + # verts3d_path_ = [] + + # # high level path + # raw_path = os.path.join(dataset_path, 'raw_seqs') + + # # seq_names_batch = seq_names_batch[:5] + + # # group by sequence + # for seq in tqdm(seq_names_batch, desc=f'Split: {split_id+1} / {split_num}', position=0, leave=False): + + # seq_data = data_dict[seq] + # sub_id = seq.split('/')[0] + # seq_name = seq.split('/')[1] + + # # smplx idx = image idx - ioi_offset + # ioi_offset = metadata[sub_id]['ioi_offset'] + # gender_seq = metadata[sub_id]['gender'] + # image_size_list = metadata[sub_id]['image_size'] + + # betas = np.zeros((1, 10)) + + # # prepare vtemplates path + # vtemplates_path = os.path.join(dataset_path, 'meta', 'subject_vtemplates', f'{sub_id}.obj') + # vtemplates = trimesh.load(vtemplates_path, force='mesh').vertices.reshape(1, 10475, 3) + + # # build gendered smplx + # gendered_smplx = build_body_model( + # dict( + # type='SMPLX', + # keypoint_src='smplx', + # keypoint_dst='smplx', + # model_path='data/body_models/smplx', + # gender=gender_seq, + # v_template=vtemplates, + # num_betas=10, + # use_face_contour=True, + # flat_hand_mean=self.misc_config['flat_hand_mean'], + # use_pca=False, + # batch_size=1)).to(self.device) + + # # load params + # data_cam = seq_data["cam_coord"] + # data_2d = seq_data["2d"] + # data_bbox = seq_data["bbox"] + # data_params = seq_data["params"] + + # data_len = data_params["K_ego"].shape[0] + + # # 准备并行处理的参数列表 + # args_list = [] + # for cidx in range(1, 9): + # cid = str(cidx) + # # prepare intrinsics + # if cidx == 0: # ego space + # seq_extrx = data_params['world2ego'].copy() + # seq_intrx = data_params["K_ego"].copy() + # else: + # seq_extrx = np.array(cam_params[sub_id]['world2cam'])[cidx-1] + # seq_extrx = np.repeat(seq_extrx[np.newaxis, :], data_len, axis=0) + # seq_intrx = np.array(cam_params[sub_id]['intrinsics'])[cidx-1] + # seq_intrx = np.repeat(seq_intrx[np.newaxis, :], data_len, axis=0) + # # prepare images to load + # imgp_pattern = f'{seq}/{cid}' + # image_paths = [imgp for imgp in image_names if imgp_pattern in imgp] + + # if len(image_paths) == 0: + # continue + + # for image_path in image_paths: + # args = ( + # image_path, + # dataset_path, + # data_2d, + # data_cam, + # data_params, + # seq_extrx, + # seq_intrx, + # ioi_offset, + # cidx, + # image_size_list, + # betas, + # sub_id, + # seq_name, + # random_ids, + # gender_seq, + # vtemplates_path, + # self.smplx_shape, + # self.misc_config # Pass misc_config instead of self + # ) + # args_list.append(args) + + # import concurrent + # # Start multiprocessing within __main__ + # results = [] + # with concurrent.futures.ProcessPoolExecutor(max_workers=8) as executor: + # futures = [executor.submit(self.process_image, args) for args in args_list] + # for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc=f'Processing images', position=1): + # result = future.result() + # if result is not None: + # results.append(result) + + # # 合并结果 + # for res in results: + # # 合并 smplx_ + # for key in res['smplx_'].keys(): + # smplx_[key].extend(res['smplx_'][key]) + # # 合并 keypoints2d_ 和 keypoints3d_ + # keypoints2d_.extend(res['keypoints2d_']) + # keypoints3d_.extend(res['keypoints3d_']) + # # 合并 bboxs_ + # for bbox_name in res['bboxs_'].keys(): + # bboxs_[bbox_name].extend(res['bboxs_'][bbox_name]) + # # 合并 meta_ + # for meta_name in res['meta_'].keys(): + # meta_[meta_name].extend(res['meta_'][meta_name]) + # # 合并 image_path_, vtemplate_path_, verts3d_path_ + # image_path_.extend(res['image_path_']) + # vtemplate_path_.extend(res['vtemplate_path_']) + # verts3d_path_.extend(res['verts3d_path_']) + + # # save keypoints 2d smplx + # keypoints2d = np.concatenate(keypoints2d_, axis=0).reshape(-1, 144, 2) + # keypoints2d_conf = np.ones([keypoints2d.shape[0], 144, 1]) + # keypoints2d = np.concatenate([keypoints2d, keypoints2d_conf], axis=-1) + # keypoints2d, keypoints2d_mask = convert_kps( + # keypoints2d, src='smplx', dst='human_data') + # human_data['keypoints2d_smplx'] = keypoints2d + # human_data['keypoints2d_smplx_mask'] = keypoints2d_mask + + # # save keypoints 3d smplx + # keypoints3d = np.concatenate(keypoints3d_, axis=0).reshape(-1, 144, 3) + # keypoints3d_conf = np.ones([keypoints3d.shape[0], 144, 1]) + # keypoints3d = np.concatenate([keypoints3d, keypoints3d_conf], axis=-1) + # keypoints3d, keypoints3d_mask = convert_kps( + # keypoints3d, src='smplx', dst='human_data') + # human_data['keypoints3d_smplx'] = keypoints3d + # human_data['keypoints3d_smplx_mask'] = keypoints3d_mask + + # # save bbox + # for bbox_name in [ + # 'bbox_xywh', 'face_bbox_xywh', 'lhand_bbox_xywh', + # 'rhand_bbox_xywh' + # ]: + # bbox_xywh_ = np.array(bboxs_[bbox_name]).reshape((-1, 5)) + # human_data[bbox_name] = bbox_xywh_ + + # # save smplx + # for key in smplx_.keys(): + # smplx_[key] = np.concatenate( + # smplx_[key], axis=0).reshape(self.smplx_shape[key]) + # human_data['smplx'] = smplx_ + # # save image path + # human_data['image_path'] = image_path_ + # human_data['vtemplate_path'] = vtemplate_path_ + # human_data['vertices3d_path'] = verts3d_path_ + + # # save meta and misc + # human_data['config'] = 'arctic' + # human_data['misc'] = self.misc_config + # human_data['meta'] = meta_ + + # os.makedirs(out_path, exist_ok=True) + # out_file = os.path.join( + # # out_path, f'moyo_{self.misc_config["flat_hand_mean"]}.npz') + # out_path, f'arctic_{mode}_{seed}_{"{:03d}".format(size_i)}_{split_id}.npz') + + # human_data.dump(out_file) diff --git a/mmhuman3d/data/data_converters/idea400.py b/mmhuman3d/data/data_converters/idea400.py index af45721f..5dd28106 100644 --- a/mmhuman3d/data/data_converters/idea400.py +++ b/mmhuman3d/data/data_converters/idea400.py @@ -142,8 +142,11 @@ def convert_by_mode(self, dataset_path: str, out_path: str, slice_len = len(anno_ps) // slices for sl_id in range(slices): + # for sl_id in [4]: # use HumanData to store all data human_data = HumanData() + + recorded_tpose = 0 # initialize output for human_data smplx_ = {} @@ -183,7 +186,7 @@ def convert_by_mode(self, dataset_path: str, out_path: str, # continue # image_folder = image_folder_ps[i] if not os.path.exists(image_folder): - print(f'Image folder {image_folder} does not exist!') + # print(f'Image folder {image_folder} does not exist!') continue # get height and width @@ -253,6 +256,11 @@ def convert_by_mode(self, dataset_path: str, out_path: str, # continue # iid = fid / 2 + 1 + # pdb.set_trace() + # if np.sum(smplx_param['body_pose'][fid]) < 1e-5: + # recorded_tpose += 1 + # continue + # get image path imgp = os.path.join(image_folder, f'{fid+1:06d}.png') image_path = imgp.replace(f'{dataset_path}/', '') @@ -262,8 +270,9 @@ def convert_by_mode(self, dataset_path: str, out_path: str, # filter out T pose # pdb.set_trace() - if np.sum(np.abs(smplx_param_tensor['body_pose'][fid].detach().cpu().numpy())) < 1: - print(f'Image {imgp} is T pose!') + if np.sum(np.abs(smplx_param_tensor['body_pose'][fid].detach().cpu().numpy())) < 1e-2: + # print(f'Image {imgp} is T pose!') + recorded_tpose += 1 continue # project kps3d @@ -378,5 +387,7 @@ def convert_by_mode(self, dataset_path: str, out_path: str, out_file = os.path.join( out_path, f'idea400_{mode}_{seed}_{"{:05d}".format(size_i)}_{sl_id}.npz') human_data.dump(out_file) + + print(f'Find {recorded_tpose} T poses in slice {sl_id}') diff --git a/mmhuman3d/data/data_converters/interhand26m.py b/mmhuman3d/data/data_converters/interhand26m.py index 6a58e9af..07c6875b 100644 --- a/mmhuman3d/data/data_converters/interhand26m.py +++ b/mmhuman3d/data/data_converters/interhand26m.py @@ -186,6 +186,10 @@ def convert_by_mode(self, dataset_path: str, out_path: str, seed = '141017' size = 999999 + checked_inst = 0 + failed_inst = 0 + + # initialize smplx_ = {} for hand_type in ['left', 'right']: @@ -197,7 +201,7 @@ def convert_by_mode(self, dataset_path: str, out_path: str, image_path_, keypoints2d_smplx_ = [], [] keypoints3d_smplx_ = [] meta_ = {} - for meta_key in ['principal_point', 'focal_length', 'right_hand_valid', 'left_hand_valid']: + for meta_key in ['principal_point', 'focal_length', 'right_hand_valid', 'left_hand_valid', 'R', 'T']: meta_[meta_key] = [] # save mano params for vis purpose mano_ = [] @@ -312,6 +316,17 @@ def convert_by_mode(self, dataset_path: str, out_path: str, keypoints2d_smplx_.append(j2d_orig) keypoints3d_smplx_.append(j3d_orig) + + for kps2d in j2d_orig: + if kps2d[0] < 0 or kps2d[0] > 334: + if kps2d[2] != 0: + failed_inst += 1 + break + if kps2d[1] < 0 or kps2d[1] > 512: + if kps2d[2] != 0: + failed_inst += 1 + break + checked_inst += 1 # append image_path_.append(image_path) @@ -341,13 +356,16 @@ def convert_by_mode(self, dataset_path: str, out_path: str, meta_['right_hand_valid'].append(right_hand_valid) meta_['left_hand_valid'].append(left_hand_valid) + + meta_['R'].append(R) + meta_['T'].append(T) # append mano params mano_.append(hand_param) - instance_num += 1 - if instance_num > 20000: - break + # instance_num += 1 + # if instance_num > 200: + # break # j2d_mano, _ = convert_kps(j2d_orig.reshape(1, -1, 3), src='interhand', dst='smplx') # j2d_mano = j2d_orig.reshape(-1, 3) @@ -421,6 +439,7 @@ def convert_by_mode(self, dataset_path: str, out_path: str, size_i = min(len(seqs), int(size)) out_file = os.path.join( out_path, - f'interhand26m_{mode}_{fps_mode}_{seed}_{"{:06d}".format(size_i)}_sample.npz' + f'interhand26m_{mode}_{fps_mode}_{seed}_{"{:06d}".format(size_i)}.npz' ) - human_data.dump(out_file) + # human_data.dump(out_file) + print(f'Checked {checked_inst} instances, failed {failed_inst} instances') \ No newline at end of file diff --git a/mmhuman3d/data/data_converters/signavatar.py b/mmhuman3d/data/data_converters/signavatar.py index 80b87814..5d31d0d0 100644 --- a/mmhuman3d/data/data_converters/signavatar.py +++ b/mmhuman3d/data/data_converters/signavatar.py @@ -189,256 +189,263 @@ def convert_by_mode(self, dataset_path: str, out_path: str, print('Total sequences:', len(annot_files)) - # use HumanData to store all data - human_data = HumanData() - - # initialize output for human_data - smplx_ = {} - for key in self.smplx_shape.keys(): - smplx_[key] = [] - keypoints2d_, keypoints3d_ = [], [] - bboxs_ = {} - for bbox_name in [ - 'bbox_xywh', 'face_bbox_xywh', 'lhand_bbox_xywh', - 'rhand_bbox_xywh' - ]: - bboxs_[bbox_name] = [] - meta_ = {} - for meta_name in ['principal_point', 'focal_length', 'height', 'width', 'gender', - 'sequence_name', 'left_hand_valid', 'right_hand_valid']: - meta_[meta_name] = [] - image_path_ = [] - - annot_files = annot_files[:size_i] + slice_num = 8 + slice_len = len(annot_files) // slice_num - # for annot_path in tqdm(annot_files, desc=f'Splitting {mode}', - # leave=False, position=0): - # vid_path = annot_path.replace('annotations', 'videos').replace('.pkl', '.mp4') - # self.split_video(vid_path) - - # from concurrent.futures import ThreadPoolExecutor, as_completed - # from tqdm import tqdm - - # # 使用线程池并行处理视频分割 - # with ThreadPoolExecutor(max_workers=16) as executor: - # futures = [ - # executor.submit(self.split_video, annot_path.replace('annotations', 'videos').replace('.pkl', '.mp4')) - # for annot_path in annot_files - # ] - - # # 使用 tqdm 追踪任务进度 - # for future in tqdm(as_completed(futures), desc=f'Splitting {mode}', leave=False, position=0, total=len(annot_files)): - # # try: - # future.result() # 捕获异常并确保进度条准确 - # # except Exception as e: - # # print(f"Error processing file: {e}") + for sid in range(slice_num): + print(f'Slice {sid+1}/{slice_num}') + # use HumanData to store all data + human_data = HumanData() - # test_seqs = ['_20g7MG8K1U_3-8-rgb_front', '_Dh512GX6d8_14-8-rgb_front', - # '00kppw3aqus_11-3-rgb_front'] - # annot_files = [f'{annot_base_folder}/{seq}.pkl' for seq in test_seqs] - - for annot_path in tqdm(annot_files, desc=f'Converting {mode}', - leave=False, position=0): - - # load annot pickle - annot_seq = np.load(annot_path, allow_pickle=True) - # for key in annot_seq.keys(): - # print(key, annot_seq[key].shape) - vid_path = annot_path.replace('annotations', 'videos').replace('.pkl', '.mp4') - frame_folder = vid_path.replace('.mp4', '').replace('videos', 'images') - - annot_len = annot_seq['smplx'].shape[0] - split_success = self.split_video(vid_path, annot_len) - if not split_success: - pdb.set_trace() - continue - - smplx_seq = annot_seq['smplx'].copy() - gender = 'neutral' - smplx_param = { - 'global_orient': smplx_seq[:, :3], - 'body_pose': smplx_seq[:, 3:66], - 'left_hand_pose': smplx_seq[:, 66:111], - 'right_hand_pose': smplx_seq[:, 111:156], - 'jaw_pose': smplx_seq[:, 156:159], - 'betas': smplx_seq[:, 159:169], - 'expression': smplx_seq[:, 169:179], - 'transl': smplx_seq[:, 179:182] - } + # initialize output for human_data + smplx_ = {} for key in self.smplx_shape.keys(): - if key in smplx_param.keys(): - smplx_param[key] = smplx_param[key].reshape(self.smplx_shape[key]) - else: - pad_shape = np.array(self.smplx_shape[key]) - pad_shape[0] = annot_len - pad_shape = tuple(pad_shape) - smplx_param[key] = np.zeros(pad_shape) + smplx_[key] = [] + keypoints2d_, keypoints3d_ = [], [] + bboxs_ = {} + for bbox_name in [ + 'bbox_xywh', 'face_bbox_xywh', 'lhand_bbox_xywh', + 'rhand_bbox_xywh' + ]: + bboxs_[bbox_name] = [] + meta_ = {} + for meta_name in ['principal_point', 'focal_length', 'height', 'width', 'gender', + 'sequence_name', 'left_hand_valid', 'right_hand_valid']: + meta_[meta_name] = [] + image_path_ = [] + + # annot_files = annot_files[:size_i] + annot_files2process = annot_files[sid*slice_len:(sid+1)*slice_len] - # prepare smplx tensor - smplx_param_tensor = {} - for key in self.smplx_shape.keys(): - smplx_param_tensor[key] = torch.tensor(smplx_param[key].reshape(self.smplx_shape[key]), - dtype=torch.float).to(self.device) - - # ue2opencv = np.array([[-1.0, 0, 0, 0], - # [0, -1, 0, 0], - # [0, 0, 1, 0], - # [0, 0, 0, 1]]) - - # get output - output = gendered_smplx[gender](**smplx_param_tensor) - kps3d_c = output['joints'] - # kps3d_c = output['joints'].detach().cpu().numpy() - # pelvis_world = kps3d_c[:, get_keypoint_idx('pelvis', 'smplx'), :] - - # # transform to cam space - # global_orient, transl = batch_transform_to_camera_frame( - # global_orient=smplx_param['global_orient'].reshape(-1, 3), - # transl=smplx_param['transl'].reshape(-1, 3), - # pelvis=pelvis_world.reshape(-1, 3), - # extrinsic=ue2opencv) - # smplx_param['global_orient'] = global_orient - # smplx_param['transl'] = transl - - # # prepare smplx tensor - # smplx_param_tensor = {} - # for key in self.smplx_shape.keys(): - # smplx_param_tensor[key] = torch.tensor(smplx_param[key].reshape(self.smplx_shape[key]), - # dtype=torch.float).to(self.device) - - # get image size - img_path = os.path.join(frame_folder, '000001.jpg') - img = cv2.imread(img_path) - height, width, _ = img.shape - - for fid in tqdm(annot_seq['total_valid_index'], position=1, leave=False): - # get image path - img_p = os.path.join(frame_folder, f'{fid+1:06d}.jpg') - image_path = img_p.replace(dataset_path + '/', '') - if not os.path.exists(img_path): - pdb.set_trace() - - left_valid = annot_seq['left_valid'][fid].cpu().item() - right_valid = annot_seq['right_valid'][fid].cpu().item() - # smplx_valid = True if str(fid) in annot_seq['total_valid_index'] else False + # for annot_path in tqdm(annot_files, desc=f'Splitting {mode}', + # leave=False, position=0): + # vid_path = annot_path.replace('annotations', 'videos').replace('.pkl', '.mp4') + # self.split_video(vid_path) - focal_length = list(annot_seq['focal'][fid]) - principal_point = list(annot_seq['princpt'][fid]) + from concurrent.futures import ThreadPoolExecutor, as_completed + from tqdm import tqdm + + # 使用线程池并行处理视频分割 + # with ThreadPoolExecutor(max_workers=16) as executor: + # futures = [ + # executor.submit(self.split_video, annot_path.replace('annotations', 'videos').replace('.pkl', '.mp4')) + # for annot_path in annot_files + # ] + + # # 使用 tqdm 追踪任务进度 + # for future in tqdm(as_completed(futures), desc=f'Splitting {mode}', leave=False, position=0, total=len(annot_files)): + # # try: + # future.result() # 捕获异常并确保进度条准确 + # except Exception as e: + # print(f"Error processing file: {e}") + + # test_seqs = ['_20g7MG8K1U_3-8-rgb_front', '_Dh512GX6d8_14-8-rgb_front', + # '00kppw3aqus_11-3-rgb_front'] + # annot_files = [f'{annot_base_folder}/{seq}.pkl' for seq in test_seqs] - camera = build_cameras( - dict( - type='PerspectiveCameras', - convention='opencv', - in_ndc=False, - focal_length=focal_length, - image_size=(width, height), - principal_point=principal_point)).to(self.device) + for annot_path in tqdm(annot_files2process, desc=f'Converting {mode}', + leave=False, position=0): - # 3d -> 2d - kps2d = camera.transform_points_screen(kps3d_c[fid]).detach().cpu().numpy().squeeze()[:, :2] - kps3d = kps3d_c[fid].detach().cpu().numpy().squeeze() - - # test overlay - # img = cv2.imread(img_p) - # for kp in kps2d: - # cv2.circle(img, (int(kp[0]), int(kp[1])), 5, (0, 255, 0), -1) - # cv2.imwrite(f'{out_path}/{os.path.basename(frame_folder)}_{fid}.jpg', img) + # load annot pickle + annot_seq = np.load(annot_path, allow_pickle=True) + # for key in annot_seq.keys(): + # print(key, annot_seq[key].shape) + vid_path = annot_path.replace('annotations', 'videos').replace('.pkl', '.mp4') + frame_folder = vid_path.replace('.mp4', '').replace('videos', 'images') + + annot_len = annot_seq['smplx'].shape[0] + split_success = self.split_video(vid_path, annot_len) + if not split_success: + # pdb.set_trace() + continue - # get bbox from 2d keypoints - bboxs = self._keypoints_to_scaled_bbox_bfh( - kps2d, - body_scale=self.misc_config['bbox_body_scale'], - fh_scale=self.misc_config['bbox_facehand_scale']) - for i, bbox_name in enumerate([ - 'bbox_xywh', 'face_bbox_xywh', 'lhand_bbox_xywh', - 'rhand_bbox_xywh' - ]): - xmin, ymin, xmax, ymax, conf = bboxs[i] - bbox = np.array([ - max(0, xmin), - max(0, ymin), - min(width, xmax), - min(height, ymax) - ]) - bbox_xywh = self._xyxy2xywh(bbox) # list of len 4 - bbox_xywh.append(conf) # (5,) - bboxs_[bbox_name].append(bbox_xywh) - - # append image path - image_path_.append(image_path) - - # append keypoints - keypoints2d_.append(kps2d) - keypoints3d_.append(kps3d) - - # append smplx + smplx_seq = annot_seq['smplx'].copy() + gender = 'neutral' + smplx_param = { + 'global_orient': smplx_seq[:, :3], + 'body_pose': smplx_seq[:, 3:66], + 'left_hand_pose': smplx_seq[:, 66:111], + 'right_hand_pose': smplx_seq[:, 111:156], + 'jaw_pose': smplx_seq[:, 156:159], + 'betas': smplx_seq[:, 159:169], + 'expression': smplx_seq[:, 169:179], + 'transl': smplx_seq[:, 179:182] + } for key in self.smplx_shape.keys(): - # try: - smplx_[key].append(smplx_param[key][fid]) - # except: - # pdb.set_trace() - - # append meta - meta_['principal_point'].append(principal_point) - meta_['focal_length'].append(focal_length) - meta_['height'].append(height) - meta_['width'].append(width) - meta_['gender'].append(gender) - meta_['sequence_name'].append(os.path.basename(frame_folder)) - meta_['left_hand_valid'].append(left_valid) - meta_['right_hand_valid'].append(right_valid) + if key in smplx_param.keys(): + smplx_param[key] = smplx_param[key].reshape(self.smplx_shape[key]) + else: + pad_shape = np.array(self.smplx_shape[key]) + pad_shape[0] = annot_len + pad_shape = tuple(pad_shape) + smplx_param[key] = np.zeros(pad_shape) + + # prepare smplx tensor + smplx_param_tensor = {} + for key in self.smplx_shape.keys(): + smplx_param_tensor[key] = torch.tensor(smplx_param[key].reshape(self.smplx_shape[key]), + dtype=torch.float).to(self.device) - # get size - size_i = len(annot_files) - - # save keypoints 2d smplx - keypoints2d = np.concatenate(keypoints2d_, axis=0).reshape(-1, 144, 2) - keypoints2d_conf = np.ones([keypoints2d.shape[0], 144, 1]) - keypoints2d = np.concatenate([keypoints2d, keypoints2d_conf], axis=-1) - keypoints2d, keypoints2d_mask = convert_kps( - keypoints2d, src='smplx', dst='human_data') - human_data['keypoints2d_smplx'] = keypoints2d - human_data['keypoints2d_smplx_mask'] = keypoints2d_mask - - # save keypoints 3d smplx - keypoints3d = np.concatenate(keypoints3d_, axis=0).reshape(-1, 144, 3) - keypoints3d_conf = np.ones([keypoints3d.shape[0], 144, 1]) - keypoints3d = np.concatenate([keypoints3d, keypoints3d_conf], axis=-1) - keypoints3d, keypoints3d_mask = convert_kps( - keypoints3d, src='smplx', dst='human_data') - human_data['keypoints3d_smplx'] = keypoints3d - human_data['keypoints3d_smplx_mask'] = keypoints3d_mask - - # pdb.set_trace() - # save bbox - for bbox_name in [ - 'bbox_xywh', 'face_bbox_xywh', 'lhand_bbox_xywh', - 'rhand_bbox_xywh' - ]: - bbox_xywh_ = np.array(bboxs_[bbox_name]).reshape((-1, 5)) - human_data[bbox_name] = bbox_xywh_ - - # save smplx - for key in smplx_.keys(): - smplx_[key] = np.concatenate( - smplx_[key], axis=0).reshape(self.smplx_shape[key]) - - human_data['smplx'] = smplx_ - - # save image path - human_data['image_path'] = image_path_ - - # save contact - # human_data['contact'] = contact_ - - # save meta and misc - human_data['config'] = f'signavatar_{mode}' - human_data['misc'] = self.misc_config - human_data['meta'] = meta_ - - os.makedirs(out_path, exist_ok=True) - out_file = os.path.join( - # out_path, f'moyo_{self.misc_config["flat_hand_mean"]}.npz') - out_path, f'signavatar_{mode}_{seed}_{"{:05d}".format(size_i)}.npz') - human_data.dump(out_file) \ No newline at end of file + # ue2opencv = np.array([[-1.0, 0, 0, 0], + # [0, -1, 0, 0], + # [0, 0, 1, 0], + # [0, 0, 0, 1]]) + + # get output + output = gendered_smplx[gender](**smplx_param_tensor) + kps3d_c = output['joints'] + # kps3d_c = output['joints'].detach().cpu().numpy() + # pelvis_world = kps3d_c[:, get_keypoint_idx('pelvis', 'smplx'), :] + + # # transform to cam space + # global_orient, transl = batch_transform_to_camera_frame( + # global_orient=smplx_param['global_orient'].reshape(-1, 3), + # transl=smplx_param['transl'].reshape(-1, 3), + # pelvis=pelvis_world.reshape(-1, 3), + # extrinsic=ue2opencv) + + # smplx_param['global_orient'] = global_orient + # smplx_param['transl'] = transl + + # # prepare smplx tensor + # smplx_param_tensor = {} + # for key in self.smplx_shape.keys(): + # smplx_param_tensor[key] = torch.tensor(smplx_param[key].reshape(self.smplx_shape[key]), + # dtype=torch.float).to(self.device) + + # get image size + img_path = os.path.join(frame_folder, '000001.jpg') + img = cv2.imread(img_path) + height, width, _ = img.shape + + for fid in tqdm(annot_seq['total_valid_index'], position=1, leave=False): + # get image path + img_p = os.path.join(frame_folder, f'{fid+1:06d}.jpg') + image_path = img_p.replace(dataset_path + '/', '') + if not os.path.exists(img_path): + pdb.set_trace() + + left_valid = annot_seq['left_valid'][fid].cpu().item() + right_valid = annot_seq['right_valid'][fid].cpu().item() + # smplx_valid = True if str(fid) in annot_seq['total_valid_index'] else False + + focal_length = list(annot_seq['focal'][fid]) + principal_point = list(annot_seq['princpt'][fid]) + + camera = build_cameras( + dict( + type='PerspectiveCameras', + convention='opencv', + in_ndc=False, + focal_length=focal_length, + image_size=(width, height), + principal_point=principal_point)).to(self.device) + + # 3d -> 2d + kps2d = camera.transform_points_screen(kps3d_c[fid]).detach().cpu().numpy().squeeze()[:, :2] + kps3d = kps3d_c[fid].detach().cpu().numpy().squeeze() + + # test overlay + # img = cv2.imread(img_p) + # for kp in kps2d: + # cv2.circle(img, (int(kp[0]), int(kp[1])), 5, (0, 255, 0), -1) + # cv2.imwrite(f'{out_path}/{os.path.basename(frame_folder)}_{fid}.jpg', img) + + # get bbox from 2d keypoints + bboxs = self._keypoints_to_scaled_bbox_bfh( + kps2d, + body_scale=self.misc_config['bbox_body_scale'], + fh_scale=self.misc_config['bbox_facehand_scale']) + for i, bbox_name in enumerate([ + 'bbox_xywh', 'face_bbox_xywh', 'lhand_bbox_xywh', + 'rhand_bbox_xywh' + ]): + xmin, ymin, xmax, ymax, conf = bboxs[i] + bbox = np.array([ + max(0, xmin), + max(0, ymin), + min(width, xmax), + min(height, ymax) + ]) + bbox_xywh = self._xyxy2xywh(bbox) # list of len 4 + bbox_xywh.append(conf) # (5,) + bboxs_[bbox_name].append(bbox_xywh) + + # append image path + image_path_.append(image_path) + + # append keypoints + keypoints2d_.append(kps2d) + keypoints3d_.append(kps3d) + + # append smplx + for key in self.smplx_shape.keys(): + # try: + smplx_[key].append(smplx_param[key][fid]) + # except: + # pdb.set_trace() + + # append meta + meta_['principal_point'].append(principal_point) + meta_['focal_length'].append(focal_length) + meta_['height'].append(height) + meta_['width'].append(width) + meta_['gender'].append(gender) + meta_['sequence_name'].append(os.path.basename(frame_folder)) + meta_['left_hand_valid'].append(left_valid) + meta_['right_hand_valid'].append(right_valid) + + # get size + size_i = len(annot_files) + + # save keypoints 2d smplx + keypoints2d = np.concatenate(keypoints2d_, axis=0).reshape(-1, 144, 2) + keypoints2d_conf = np.ones([keypoints2d.shape[0], 144, 1]) + keypoints2d = np.concatenate([keypoints2d, keypoints2d_conf], axis=-1) + keypoints2d, keypoints2d_mask = convert_kps( + keypoints2d, src='smplx', dst='human_data') + human_data['keypoints2d_smplx'] = keypoints2d + human_data['keypoints2d_smplx_mask'] = keypoints2d_mask + + # save keypoints 3d smplx + keypoints3d = np.concatenate(keypoints3d_, axis=0).reshape(-1, 144, 3) + keypoints3d_conf = np.ones([keypoints3d.shape[0], 144, 1]) + keypoints3d = np.concatenate([keypoints3d, keypoints3d_conf], axis=-1) + keypoints3d, keypoints3d_mask = convert_kps( + keypoints3d, src='smplx', dst='human_data') + human_data['keypoints3d_smplx'] = keypoints3d + human_data['keypoints3d_smplx_mask'] = keypoints3d_mask + + # pdb.set_trace() + # save bbox + for bbox_name in [ + 'bbox_xywh', 'face_bbox_xywh', 'lhand_bbox_xywh', + 'rhand_bbox_xywh' + ]: + bbox_xywh_ = np.array(bboxs_[bbox_name]).reshape((-1, 5)) + human_data[bbox_name] = bbox_xywh_ + + # save smplx + for key in smplx_.keys(): + smplx_[key] = np.concatenate( + smplx_[key], axis=0).reshape(self.smplx_shape[key]) + + human_data['smplx'] = smplx_ + + # save image path + human_data['image_path'] = image_path_ + + # save contact + # human_data['contact'] = contact_ + + # save meta and misc + human_data['config'] = f'signavatar_{mode}' + human_data['misc'] = self.misc_config + human_data['meta'] = meta_ + + os.makedirs(out_path, exist_ok=True) + out_file = os.path.join( + # out_path, f'moyo_{self.misc_config["flat_hand_mean"]}.npz') + out_path, f'signavatar_{mode}_{seed}_{"{:05d}".format(size_i)}_{sid}.npz') + human_data.dump(out_file) \ No newline at end of file