From 6556cfd9180a63dadd45180885390ec784d01735 Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 19:25:10 +0200 Subject: [PATCH 01/28] inference on the 3DHP dataset --- 3dhp_test/infer_3dhp.py | 446 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 446 insertions(+) create mode 100644 3dhp_test/infer_3dhp.py diff --git a/3dhp_test/infer_3dhp.py b/3dhp_test/infer_3dhp.py new file mode 100644 index 0000000..f5ae586 --- /dev/null +++ b/3dhp_test/infer_3dhp.py @@ -0,0 +1,446 @@ +""" +FMPose3D: monocular 3D Pose Estimation via Flow Matching + +Official implementation of the paper: +"FMPose3D: monocular 3D Pose Estimation via Flow Matching" +by Ti Wang, Xiaohang Yu, and Mackenzie Weygandt Mathis +Licensed under Apache 2.0 +""" + +import argparse +import importlib.util +import json +import logging +import os +import random +import sys +import time +from pathlib import Path + +import numpy as np +import torch +import torch.utils.data +from tqdm import tqdm + +ROOT = Path(__file__).resolve().parent +REPO_ROOT = ROOT.parent +for path in (ROOT, REPO_ROOT): + if str(path) not in sys.path: + sys.path.insert(0, str(path)) + +from fmpose3d.aggregation_methods import aggregation_RPEA_joint_level +from fmpose3d.models import get_model + +from lib.camera import camera_params_for_subject +from lib.dataset_3dhp import ThreeDHPTestDataset +from lib.utils import AccumLoss, define_actions_3dhp, define_error_list +import lib.eval_cal as eval_cal + + +def str2bool(value): + if isinstance(value, bool): + return value + value = value.lower() + if value in {"true", "1", "yes", "y"}: + return True + if value in {"false", "0", "no", "n"}: + return False + raise argparse.ArgumentTypeError(f"Expected boolean value, got {value}") + + +def parse_args(): + parser = argparse.ArgumentParser(description="Clean 3DHP inference with processed test npz.") + parser.add_argument("--dataset-path", type=Path, default=ROOT / "dataset" / "data_test_3dhp.npz") + parser.add_argument( + "--model-path", + type=Path, + default=None, + help="Optional path to a Model definition. Defaults to the package FMPose3D human model.", + ) + parser.add_argument("--model-type", default="fmpose3d_humans", type=str) + parser.add_argument("--saved-model-path", type=Path, default=ROOT / "pretrained" / "fmpose3d_h36m" / "FMpose3D_pretrained_weights.pth") + parser.add_argument("--results-dir", type=Path, default=ROOT / "results") + parser.add_argument("--folder-name", type=str, default="") + parser.add_argument("--gpu", default="0", type=str) + parser.add_argument("--workers", default=8, type=int) + parser.add_argument("--batch-size", default=1024, type=int) + parser.add_argument("--frames", default=1, type=int) + parser.add_argument("--layers", default=5, type=int) + parser.add_argument("--channel", default=512, type=int) + parser.add_argument("--d-hid", default=1024, type=int) + parser.add_argument("--token-dim", default=256, type=int) + parser.add_argument("--n-joints", default=17, type=int) + parser.add_argument("--dataset", default="3dhp_valid", type=str) + parser.add_argument("--actions", default="*", type=str) + parser.add_argument("--subjects-test", default="TS1,TS2,TS3,TS4,TS5,TS6", type=str) + parser.add_argument("--eval-sample-steps", default="2", type=str) + parser.add_argument("--num-hypothesis-list", default="1", type=str) + parser.add_argument("--topk", default=6, type=int) + parser.add_argument("--exp-temp", default=0.005, type=float) + parser.add_argument("--test-augmentation", default=True, type=str2bool) + parser.add_argument("--test-augmentation-flip-hypothesis", default=True, type=str2bool) + parser.add_argument("--max-batches", default=0, type=int, help="Smoke test limit. 0 means full evaluation.") + parser.add_argument("--manual-seed", default=1, type=int) + args = parser.parse_args() + + args.pad = (args.frames - 1) // 2 + args.root_joint = 0 + args.train = 0 + args.test = True + args.keypoints = "gt_17_univ" + args.joints_left = [4, 5, 6, 11, 12, 13] + args.joints_right = [1, 2, 3, 14, 15, 16] + args.kps_left = args.joints_left + args.kps_right = args.joints_right + return args + + +def configure_reproducibility(seed): + random.seed(seed) + torch.manual_seed(seed) + np.random.seed(seed) + torch.cuda.manual_seed_all(seed) + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True + + +def load_model_class(model_path, model_type): + if model_path is None: + return get_model(model_type) + model_path = Path(model_path).resolve() + spec = importlib.util.spec_from_file_location(model_path.stem, model_path) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + spec.loader.exec_module(module) + return getattr(module, "Model") + + +def get_device(gpu): + if gpu in {"", "-1", "cpu", "none", "None"}: + return torch.device("cpu") + if torch.cuda.is_available(): + return torch.device("cuda") + print(f"CUDA is not available; running on CPU instead of GPU {gpu}.") + return torch.device("cpu") + + +def camera_tensor_for_subjects(subjects, device, dtype=torch.float32): + cam_params = [camera_params_for_subject(subject) for subject in subjects] + return torch.tensor(cam_params, dtype=dtype, device=device) + + +def print_error(data_type, action_error_sum, is_train): + if data_type == "h36m" or data_type.startswith("3dhp"): + return print_error_action(action_error_sum, is_train, data_type) + return 0, 0, 0, 0 + + +def print_error_action(action_error_sum, is_train, data_type): + mean_error_each = {"p1": 0.0, "p2": 0.0, "pck": 0.0, "auc": 0.0} + mean_error_all = {"p1": AccumLoss(), "p2": AccumLoss(), "pck": AccumLoss(), "auc": AccumLoss()} + + if not is_train: + if data_type.startswith("3dhp"): + print("{0:=^12} {1:=^10} {2:=^8} {3:=^8} {4:=^8}".format("Action", "p#1 mm", "p#2 mm", "PCK", "AUC")) + logging.info("{0:=^12} {1:=^10} {2:=^8} {3:=^8} {4:=^8}".format("Action", "p#1 mm", "p#2 mm", "PCK", "AUC")) + else: + print("{0:=^12} {1:=^10} {2:=^8}".format("Action", "p#1 mm", "p#2 mm")) + + for action in action_error_sum.keys(): + if not is_train: + print("{0:<12} ".format(action), end="") + + mean_error_each["p1"] = action_error_sum[action]["p1"].avg * 1000.0 + mean_error_all["p1"].update(mean_error_each["p1"], 1) + mean_error_each["p2"] = action_error_sum[action]["p2"].avg * 1000.0 + mean_error_all["p2"].update(mean_error_each["p2"], 1) + mean_error_each["pck"] = action_error_sum[action]["pck"].avg * 100.0 + mean_error_all["pck"].update(mean_error_each["pck"], 1) + mean_error_each["auc"] = action_error_sum[action]["auc"].avg * 100.0 + mean_error_all["auc"].update(mean_error_each["auc"], 1) + + if is_train == 0: + if data_type.startswith("3dhp"): + print( + "{0:>6.2f} {1:>10.2f} {2:>10.2f} {3:>10.2f}".format( + mean_error_each["p1"], + mean_error_each["p2"], + mean_error_each["pck"], + mean_error_each["auc"], + ) + ) + logging.info( + "{0:<12} {1:>6.2f} {2:>10.2f} {3:>10.2f} {4:>10.2f}".format( + action, + mean_error_each["p1"], + mean_error_each["p2"], + mean_error_each["pck"], + mean_error_each["auc"], + ) + ) + else: + print("{0:>6.2f} {1:>10.2f}".format(mean_error_each["p1"], mean_error_each["p2"])) + + if is_train == 0: + if data_type.startswith("3dhp"): + print( + "{0:<12} {1:>6.2f} {2:>10.2f} {3:>10.2f} {4:>10.2f}".format( + "Average", + mean_error_all["p1"].avg, + mean_error_all["p2"].avg, + mean_error_all["pck"].avg, + mean_error_all["auc"].avg, + ) + ) + logging.info( + "{0:<12} {1:>6.2f} {2:>10.2f} {3:>10.2f} {4:>10.2f}".format( + "Average", + mean_error_all["p1"].avg, + mean_error_all["p2"].avg, + mean_error_all["pck"].avg, + mean_error_all["auc"].avg, + ) + ) + else: + print("{0:<12} {1:>6.2f} {2:>10.2f}".format("Average", mean_error_all["p1"].avg, mean_error_all["p2"].avg)) + + if data_type.startswith("3dhp"): + return mean_error_all["p1"].avg, mean_error_all["p2"].avg, mean_error_all["pck"].avg, mean_error_all["auc"].avg + return mean_error_all["p1"].avg, mean_error_all["p2"].avg, 0, 0 + + +def test(actions, dataloader, model, args, hypothesis_num=1): + model.eval() + eval_steps = sorted({int(s) for s in str(args.eval_sample_steps).split(",") if str(s).strip()}) + action_error_sum_multi = {s: define_error_list(actions) for s in eval_steps} + + print(f"\n{'=' * 80}") + print(f"Testing with {hypothesis_num} hypothesis(es), eval_steps: {eval_steps}") + print(f"{'=' * 80}\n") + + for i, data in enumerate(tqdm(dataloader, 0)): + _, gt_3d, input_2d, _, action, subject, _ = data + input_2d = input_2d.contiguous().to(args.device, dtype=torch.float32) + gt_3d = gt_3d.contiguous().to(args.device, dtype=torch.float32) + + input_2d_nonflip = input_2d[:, 0] + input_2d_flip = input_2d[:, 1] if input_2d.size(1) > 1 else input_2d[:, 0] + + out_target = gt_3d.clone() + out_target[:, :, args.root_joint] = 0 + + def euler_sample(x2d, y_local, steps, model_3d): + dt = 1.0 / steps + for s in range(steps): + t_s = torch.full((gt_3d.size(0), 1, 1, 1), s * dt, device=gt_3d.device, dtype=gt_3d.dtype) + v_s = model_3d(x2d, y_local, t_s) + y_local = y_local + dt * v_s + return y_local + + if i == 0: + print(f"eval_steps: {eval_steps}, hypothesis_num: {hypothesis_num}") + + for s_keep in eval_steps: + list_hypothesis = [] + for _ in range(hypothesis_num): + y = torch.randn_like(gt_3d) + y_s = euler_sample(input_2d_nonflip, y, s_keep, model) + + if args.test_augmentation_flip_hypothesis: + y_flip = torch.randn_like(gt_3d) + y_flip[:, :, :, 0] *= -1 + y_flip[:, :, args.joints_left + args.joints_right, :] = y_flip[ + :, :, args.joints_right + args.joints_left, : + ] + y_flip_s = euler_sample(input_2d_flip, y_flip, s_keep, model) + y_flip_s[:, :, :, 0] *= -1 + y_flip_s[:, :, args.joints_left + args.joints_right, :] = y_flip_s[ + :, :, args.joints_right + args.joints_left, : + ] + y_flip_s_frame = y_flip_s[:, args.pad].unsqueeze(1) + y_flip_s_frame[:, :, 0, :] = 0 + list_hypothesis.append(y_flip_s_frame) + + y_s_frame = y_s[:, args.pad].unsqueeze(1) + y_s_frame[:, :, 0, :] = 0 + list_hypothesis.append(y_s_frame) + + cam_tensor = camera_tensor_for_subjects(subject, gt_3d.device, dtype=gt_3d.dtype) + output_3d_s = aggregation_RPEA_joint_level( + args, list_hypothesis, cam_tensor, input_2d_nonflip, gt_3d + ) + action_error_sum_multi[s_keep] = eval_cal.test_calculation( + output_3d_s, out_target, action, action_error_sum_multi[s_keep], args.dataset, subject + ) + + if args.max_batches and i + 1 >= args.max_batches: + break + + per_step_p1 = {} + per_step_p2 = {} + per_step_pck = {} + per_step_auc = {} + for s_keep in sorted(action_error_sum_multi.keys()): + p1_s, p2_s, pck_s, auc_s = print_error(args.dataset, action_error_sum_multi[s_keep], args.train) + per_step_p1[s_keep] = float(p1_s) + per_step_p2[s_keep] = float(p2_s) + per_step_pck[s_keep] = float(pck_s) + per_step_auc[s_keep] = float(auc_s) + + return per_step_p1, per_step_p2, per_step_pck, per_step_auc + + +def setup_logging(args): + if args.folder_name: + folder_name = args.folder_name + else: + folder_name = ( + f"s_{args.eval_sample_steps}_Top{args.topk}_exp_temp{args.exp_temp}_" + f"S{args.subjects_test}_h{args.num_hypothesis_list}_{time.strftime('%Y%m%d_%H%M%S')}" + ) + result_dir = args.results_dir / folder_name + result_dir.mkdir(parents=True, exist_ok=True) + log_path = result_dir / "train.log" + logging.basicConfig(filename=log_path, level=logging.INFO, format="%(message)s") + return result_dir, log_path + + +def main(): + args = parse_args() + if args.gpu not in {"", "-1", "cpu", "none", "None"}: + os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu + args.device = get_device(args.gpu) + configure_reproducibility(args.manual_seed) + + result_dir, log_path = setup_logging(args) + print(f"Results: {result_dir}") + print(f"Log: {log_path}") + + subjects = [s for s in args.subjects_test.split(",") if s] + dataset = ThreeDHPTestDataset( + args.dataset_path, + subjects=subjects, + test_augmentation=args.test_augmentation, + kps_left=args.kps_left, + kps_right=args.kps_right, + joints_left=args.joints_left, + joints_right=args.joints_right, + ) + print(f"Dataset: {args.dataset_path}") + print(f"Dataset summary: {dataset.summary()}") + logging.info(f"Dataset: {args.dataset_path}") + logging.info(f"Dataset summary: {dataset.summary()}") + + dataloader = torch.utils.data.DataLoader( + dataset, + batch_size=args.batch_size, + shuffle=False, + num_workers=int(args.workers), + pin_memory=args.device.type == "cuda", + ) + + model_cls = load_model_class(args.model_path, args.model_type) + model = model_cls(args).to(args.device) + + print(args.saved_model_path) + pre_dict = torch.load(args.saved_model_path, map_location=args.device, weights_only=True) + model_dict = model.state_dict() + state_dict = {k: v for k, v in pre_dict.items() if k in model_dict.keys()} + model_dict.update(state_dict) + model.load_state_dict(model_dict) + print("model loaded successfully!") + + actions = define_actions_3dhp(args.actions, train=False) + hypothesis_list = [int(x) for x in str(args.num_hypothesis_list).split(",") if str(x).strip()] + eval_steps_list = [int(s) for s in str(args.eval_sample_steps).split(",") if str(s).strip()] + + best_global_p1 = None + best_global_p2 = None + best_global_pck = None + best_global_auc = None + best_global_pair = None + all_metrics = {} + + for s_eval in eval_steps_list: + p1_by_hyp = {} + p2_by_hyp = {} + pck_by_hyp = {} + auc_by_hyp = {} + + for hypothesis_num in hypothesis_list: + print(f"\n{'=' * 80}") + print(f"Evaluating step {s_eval} with {hypothesis_num} hypotheses") + print(f"{'=' * 80}\n") + logging.info(f"Evaluating step {s_eval} with {hypothesis_num} hypotheses") + + with torch.no_grad(): + args_backup = args.eval_sample_steps + args.eval_sample_steps = str(s_eval) + p1_per_step, p2_per_step, pck_per_step, auc_per_step = test( + actions, dataloader, model, args, hypothesis_num=hypothesis_num + ) + args.eval_sample_steps = args_backup + + p1 = p1_per_step[int(s_eval)] + p2 = p2_per_step[int(s_eval)] + pck_s = pck_per_step[int(s_eval)] + auc_s = auc_per_step[int(s_eval)] + + p1_by_hyp[int(hypothesis_num)] = float(p1) + p2_by_hyp[int(hypothesis_num)] = float(p2) + pck_by_hyp[int(hypothesis_num)] = float(pck_s) + auc_by_hyp[int(hypothesis_num)] = float(auc_s) + + all_metrics[f"step_{s_eval}_hyp_{hypothesis_num}"] = { + "p1": float(p1), + "p2": float(p2), + "pck": float(pck_s), + "auc": float(auc_s), + } + + if best_global_p1 is None or float(p1) < best_global_p1: + best_global_p1 = float(p1) + best_global_p2 = float(p2) + best_global_pck = float(pck_s) + best_global_auc = float(auc_s) + best_global_pair = (int(s_eval), int(hypothesis_num)) + + hyp_sorted = sorted(p1_by_hyp.keys()) + hyp_strs = [ + f"h{h}_p1: {p1_by_hyp[h]:.4f}, h{h}_p2: {p2_by_hyp[h]:.4f}, " + f"h{h}_pck: {pck_by_hyp[h]:.4f}, h{h}_auc: {auc_by_hyp[h]:.4f}" + for h in hyp_sorted + ] + print("\n" + "=" * 80) + print(f"Step: {s_eval} | " + " | ".join(hyp_strs)) + print("=" * 80 + "\n") + logging.info(f"step: {s_eval} | " + " | ".join(hyp_strs)) + + if best_global_p1 is not None: + print("\n" + "=" * 80) + print( + f"BEST RESULT: step {best_global_pair[0]}, hyp {best_global_pair[1]}: " + f"p1: {best_global_p1:.4f}, p2: {best_global_p2:.4f}, " + f"pck: {best_global_pck:.4f}, auc: {best_global_auc:.4f}" + ) + print("=" * 80 + "\n") + logging.info( + f"BEST: step {best_global_pair[0]}, hyp {best_global_pair[1]}: " + f"p1: {best_global_p1:.4f}, p2: {best_global_p2:.4f}, " + f"pck: {best_global_pck:.4f}, auc: {best_global_auc:.4f}" + ) + all_metrics["best"] = { + "step": best_global_pair[0], + "hypothesis": best_global_pair[1], + "p1": best_global_p1, + "p2": best_global_p2, + "pck": best_global_pck, + "auc": best_global_auc, + } + + with open(result_dir / "metrics.json", "w", encoding="utf-8") as f: + json.dump(all_metrics, f, indent=2, sort_keys=True) + + +if __name__ == "__main__": + main() From a8da9424ea689119a72410bd2c2735c3593600d0 Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 19:25:20 +0200 Subject: [PATCH 02/28] Add test script for 3DHP dataset inference --- 3dhp_test/test_3dhp.sh | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100755 3dhp_test/test_3dhp.sh diff --git a/3dhp_test/test_3dhp.sh b/3dhp_test/test_3dhp.sh new file mode 100755 index 0000000..03be54a --- /dev/null +++ b/3dhp_test/test_3dhp.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + +layers=5 +gpu_id=1 +eval_sample_steps=3 +batch_size=1024 +saved_model_path="${SCRIPT_DIR}/pretrained/fmpose3d_h36m/FMpose3D_pretrained_weights.pth" + +num_hypothesis_list=1 +topk=6 +subjects_test=TS1,TS2,TS3,TS4,TS5,TS6 + +folder_name=s_${eval_sample_steps}_S${subjects_test}_h${num_hypothesis_list}_$(date +%Y%m%d_%H%M%S) + +python3 "${SCRIPT_DIR}/infer_3dhp.py" \ + --gpu "${gpu_id}" \ + --batch-size "${batch_size}" \ + --frames 1 \ + --layers "${layers}" \ + --channel 512 \ + --d-hid 1024 \ + --token-dim 256 \ + --eval-sample-steps "${eval_sample_steps}" \ + --dataset-path "${SCRIPT_DIR}/dataset/data_test_3dhp.npz" \ + --saved-model-path "${saved_model_path}" \ + --num-hypothesis-list "${num_hypothesis_list}" \ + --topk "${topk}" \ + --folder-name "${folder_name}" \ + --test-augmentation True \ + --test-augmentation-flip-hypothesis True \ + --subjects-test "${subjects_test}" \ + --results-dir "${SCRIPT_DIR}/results" \ + "$@" From 2c73d3ad530b8eed408dcb8db0ae516fef71363c Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 19:26:01 +0200 Subject: [PATCH 03/28] add lib --- 3dhp_test/lib/__init__.py | 0 3dhp_test/lib/camera.py | 82 +++++++++++++++++++ 3dhp_test/lib/dataset_3dhp.py | 113 ++++++++++++++++++++++++++ 3dhp_test/lib/eval_cal.py | 149 ++++++++++++++++++++++++++++++++++ 3dhp_test/lib/utils.py | 62 ++++++++++++++ 5 files changed, 406 insertions(+) create mode 100644 3dhp_test/lib/__init__.py create mode 100644 3dhp_test/lib/camera.py create mode 100644 3dhp_test/lib/dataset_3dhp.py create mode 100644 3dhp_test/lib/eval_cal.py create mode 100644 3dhp_test/lib/utils.py diff --git a/3dhp_test/lib/__init__.py b/3dhp_test/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/3dhp_test/lib/camera.py b/3dhp_test/lib/camera.py new file mode 100644 index 0000000..3367b1f --- /dev/null +++ b/3dhp_test/lib/camera.py @@ -0,0 +1,82 @@ +""" +FMPose3D: monocular 3D Pose Estimation via Flow Matching + +Official implementation of the paper: +"FMPose3D: monocular 3D Pose Estimation via Flow Matching" +by Ti Wang, Xiaohang Yu, and Mackenzie Weygandt Mathis +Licensed under Apache 2.0 +""" + +import numpy as np +import torch + + +def normalize_screen_coordinates(x, w, h): + assert x.shape[-1] == 2 + return x / w * 2 - [1, h / w] + + +def project_to_2d(x, camera_params): + """ + Project 3D points to normalized 2D using the same camera model as the + original FMPose 3DHP inference script. + """ + assert x.shape[-1] == 3 + assert len(camera_params.shape) == 2 + assert camera_params.shape[-1] == 9 + assert x.shape[0] == camera_params.shape[0] + + while len(camera_params.shape) < len(x.shape): + camera_params = camera_params.unsqueeze(1) + + f = camera_params[..., :2] + c = camera_params[..., 2:4] + k = camera_params[..., 4:7] + p = camera_params[..., 7:] + + xx = torch.clamp(x[..., :2] / x[..., 2:], min=-1, max=1) + r2 = torch.sum(xx[..., :2] ** 2, dim=len(xx.shape) - 1, keepdim=True) + + radial = 1 + torch.sum( + k * torch.cat((r2, r2 ** 2, r2 ** 3), dim=len(r2.shape) - 1), + dim=len(r2.shape) - 1, + keepdim=True, + ) + tan = torch.sum(p * xx, dim=len(xx.shape) - 1, keepdim=True) + xxx = xx * (radial + tan) + p * r2 + + return f * xxx + c + + +def resolution_for_subject(subject): + if subject in {"TS5", "TS6"}: + return 1920, 1080 + return 2048, 2048 + + +def camera_params_for_subject(subject): + # Official MPI-INF-3DHP test intrinsics, normalized with + # normalize_screen_coordinates(): x / width * 2 - [1, height / width]. + if subject in {"TS5", "TS6"}: + return [ + 1.7541495005289713, + 1.7541495005289713, + -0.021502558390299464, + 0.020459111531575536, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + ] + return [ + 1.4650119543075562, + 1.4650119543075562, + -0.006945967674255371, + 0.018097639083862305, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + ] diff --git a/3dhp_test/lib/dataset_3dhp.py b/3dhp_test/lib/dataset_3dhp.py new file mode 100644 index 0000000..fd7d731 --- /dev/null +++ b/3dhp_test/lib/dataset_3dhp.py @@ -0,0 +1,113 @@ +""" +FMPose3D: monocular 3D Pose Estimation via Flow Matching + +Official implementation of the paper: +"FMPose3D: monocular 3D Pose Estimation via Flow Matching" +by Ti Wang, Xiaohang Yu, and Mackenzie Weygandt Mathis +Licensed under Apache 2.0 +""" + +import numpy as np +from torch.utils.data import Dataset + +from .camera import normalize_screen_coordinates, resolution_for_subject + + +TEST_NPZ_TO_FMPOSE_17 = np.array( + [14, 8, 9, 10, 11, 12, 13, 15, 1, 16, 0, 5, 6, 7, 2, 3, 4], + dtype=np.int64, +) + + +class ThreeDHPTestDataset(Dataset): + def __init__( + self, + dataset_path, + subjects, + test_augmentation=True, + kps_left=None, + kps_right=None, + joints_left=None, + joints_right=None, + ): + self.dataset_path = dataset_path + self.subjects = subjects + self.test_augmentation = bool(test_augmentation) + self.kps_left = kps_left or [4, 5, 6, 11, 12, 13] + self.kps_right = kps_right or [1, 2, 3, 14, 15, 16] + self.joints_left = joints_left or [4, 5, 6, 11, 12, 13] + self.joints_right = joints_right or [1, 2, 3, 14, 15, 16] + self._data = self._load() + self.pairs = self._build_pairs() + + def _load(self): + raw = np.load(self.dataset_path, allow_pickle=True)["data"].item() + out = {} + + for subject in self.subjects: + if subject not in raw: + raise KeyError(f"Subject {subject} is missing from {self.dataset_path}") + + anim = raw[subject] + valid = anim["valid"].astype(bool) + data_2d = anim["data_2d"][valid][:, TEST_NPZ_TO_FMPOSE_17, :] + data_3d = anim["data_3d"][valid][:, TEST_NPZ_TO_FMPOSE_17, :] / 1000.0 + + data_3d[:, 1:] -= data_3d[:, :1] + + width, height = resolution_for_subject(subject) + data_2d = normalize_screen_coordinates(data_2d, w=width, h=height) + + out[subject] = { + "positions_2d": data_2d, + "positions_3d": data_3d, + "valid_count": int(valid.sum()), + "original_count": int(valid.shape[0]), + } + + return out + + def _build_pairs(self): + pairs = [] + for subject in self.subjects: + n_frames = self._data[subject]["positions_2d"].shape[0] + for frame_idx in range(n_frames): + pairs.append((subject, frame_idx)) + return pairs + + def summary(self): + return { + subject: { + "valid_count": self._data[subject]["valid_count"], + "original_count": self._data[subject]["original_count"], + } + for subject in self.subjects + } + + def __len__(self): + return len(self.pairs) + + def __getitem__(self, index): + subject, frame_idx = self.pairs[index] + pose_2d = self._data[subject]["positions_2d"][frame_idx : frame_idx + 1].copy() + pose_3d = self._data[subject]["positions_3d"][frame_idx : frame_idx + 1].copy() + + input_2d = pose_2d[None, ...] + input_2d_gt = pose_2d[None, ...] + + if self.test_augmentation: + flip_2d = pose_2d.copy() + flip_2d[:, :, 0] *= -1 + flip_2d[:, self.kps_left + self.kps_right] = flip_2d[:, self.kps_right + self.kps_left] + input_2d = np.concatenate((input_2d, flip_2d[None, ...]), axis=0) + input_2d_gt = np.concatenate((input_2d_gt, flip_2d[None, ...]), axis=0) + + return ( + np.zeros(9, dtype=np.float32), + pose_3d, + input_2d.astype(np.float32), + input_2d_gt.astype(np.float32), + "Seq1", + subject, + 0, + ) diff --git a/3dhp_test/lib/eval_cal.py b/3dhp_test/lib/eval_cal.py new file mode 100644 index 0000000..f61490b --- /dev/null +++ b/3dhp_test/lib/eval_cal.py @@ -0,0 +1,149 @@ +""" +FMPose3D: monocular 3D Pose Estimation via Flow Matching + +Official implementation of the paper: +"FMPose3D: monocular 3D Pose Estimation via Flow Matching" +by Ti Wang, Xiaohang Yu, and Mackenzie Weygandt Mathis +Licensed under Apache 2.0 +""" + +import numpy as np +import torch + + +def mpjpe(predicted, target): + assert predicted.shape == target.shape + return torch.mean(torch.norm(predicted - target, dim=-1)) + + +def pck(predicted, target): + assert predicted.shape == target.shape + dis = torch.norm(predicted - target, dim=len(target.shape) - 1) + threshold = torch.tensor(0.150, dtype=dis.dtype, device=dis.device) + return (dis < threshold).float().mean() + + +def auc(predicted, target): + assert predicted.shape == target.shape + dis = torch.norm(predicted - target, dim=len(target.shape) - 1) + thresholds = torch.arange(0, 151, 5, dtype=dis.dtype, device=dis.device) / 1000.0 + threshold_shape = (-1,) + (1,) * dis.ndim + return (dis.unsqueeze(0) < thresholds.view(threshold_shape)).float().mean() + + +def test_calculation(predicted, target, action, error_sum, data_type, subject): + if data_type == "h36m" or data_type.startswith("3dhp"): + error_sum = mpjpe_by_action_p1(predicted, target, action, error_sum) + error_sum = mpjpe_by_action_p2(predicted, target, action, error_sum) + if data_type.startswith("3dhp"): + error_sum = mpjpe_by_action_pck(predicted, target, action, error_sum) + error_sum = mpjpe_by_action_auc(predicted, target, action, error_sum) + return error_sum + + +def _action_name(action): + end_index = action.find(" ") + if end_index != -1: + return action[:end_index] + return action + + +def mpjpe_by_action_p1(predicted, target, action, action_error_sum): + assert predicted.shape == target.shape + num = predicted.size(0) + dist = torch.mean(torch.norm(predicted - target, dim=len(target.shape) - 1), dim=len(target.shape) - 2) + + if len(set(list(action))) == 1: + action_name = _action_name(action[0]) + action_error_sum[action_name]["p1"].update(torch.mean(dist).item() * num, num) + else: + for i in range(num): + action_name = _action_name(action[i]) + action_error_sum[action_name]["p1"].update(dist[i].item(), 1) + return action_error_sum + + +def mpjpe_by_action_p2(predicted, target, action, action_error_sum): + assert predicted.shape == target.shape + num = predicted.size(0) + pred = predicted.detach().cpu().numpy().reshape(-1, predicted.shape[-2], predicted.shape[-1]) + gt = target.detach().cpu().numpy().reshape(-1, target.shape[-2], target.shape[-1]) + dist = p_mpjpe(pred, gt) + + if len(set(list(action))) == 1: + action_name = _action_name(action[0]) + action_error_sum[action_name]["p2"].update(np.mean(dist) * num, num) + else: + for i in range(num): + action_name = _action_name(action[i]) + action_error_sum[action_name]["p2"].update(np.mean(dist), 1) + return action_error_sum + + +def mpjpe_by_action_pck(predicted, target, action, action_error_sum): + assert predicted.shape == target.shape + num = predicted.size(0) + + if len(set(list(action))) == 1: + action_name = _action_name(action[0]) + action_error_sum[action_name]["pck"].update(pck(predicted, target).item() * num, num) + else: + for i in range(num): + action_name = _action_name(action[i]) + action_error_sum[action_name]["pck"].update(pck(predicted[i : i + 1], target[i : i + 1]).item(), 1) + return action_error_sum + + +def mpjpe_by_action_auc(predicted, target, action, action_error_sum): + assert predicted.shape == target.shape + num = predicted.size(0) + + if len(set(list(action))) == 1: + action_name = _action_name(action[0]) + action_error_sum[action_name]["auc"].update(auc(predicted, target).item() * num, num) + else: + for i in range(num): + action_name = _action_name(action[i]) + action_error_sum[action_name]["auc"].update(auc(predicted[i : i + 1], target[i : i + 1]).item(), 1) + return action_error_sum + + +def p_mpjpe(predicted, target): + assert predicted.shape == target.shape + + mu_x = np.mean(target, axis=1, keepdims=True) + mu_y = np.mean(predicted, axis=1, keepdims=True) + x0 = target - mu_x + y0 = predicted - mu_y + + norm_x = np.sqrt(np.sum(x0**2, axis=(1, 2), keepdims=True)) + norm_y = np.sqrt(np.sum(y0**2, axis=(1, 2), keepdims=True)) + x0 /= norm_x + y0 /= norm_y + + h = np.matmul(x0.transpose(0, 2, 1), y0) + u, s, vt = np.linalg.svd(h) + v = vt.transpose(0, 2, 1) + r = np.matmul(v, u.transpose(0, 2, 1)) + + sign_det_r = np.sign(np.expand_dims(np.linalg.det(r), axis=1)) + v[:, :, -1] *= sign_det_r + s[:, -1] *= sign_det_r.flatten() + r = np.matmul(v, u.transpose(0, 2, 1)) + + tr = np.expand_dims(np.sum(s, axis=1, keepdims=True), axis=2) + a = tr * norm_x / norm_y + t = mu_x - a * np.matmul(mu_y, r) + predicted_aligned = a * np.matmul(predicted, r) + t + + return np.mean(np.linalg.norm(predicted_aligned - target, axis=len(target.shape) - 1), axis=len(target.shape) - 2) + + +def p_mpjpe_action(predicted, target): + assert predicted.shape == target.shape + predicted_np = predicted.detach().cpu().numpy() + target_np = target.detach().cpu().numpy() + n, t, v, c = predicted_np.shape + predicted_np = predicted_np.reshape(n * t, v, c) + target_np = target_np.reshape(n * t, v, c) + return p_mpjpe(predicted_np, target_np).reshape(n, t) diff --git a/3dhp_test/lib/utils.py b/3dhp_test/lib/utils.py new file mode 100644 index 0000000..f2918ad --- /dev/null +++ b/3dhp_test/lib/utils.py @@ -0,0 +1,62 @@ +""" +FMPose3D: monocular 3D Pose Estimation via Flow Matching + +Official implementation of the paper: +"FMPose3D: monocular 3D Pose Estimation via Flow Matching" +by Ti Wang, Xiaohang Yu, and Mackenzie Weygandt Mathis +Licensed under Apache 2.0 +""" + +import hashlib + +import torch +from torch.autograd import Variable + + +class AccumLoss(object): + def __init__(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val + self.count += n + self.avg = self.sum / self.count + + +def get_variable(split, target): + out = [] + for item in target: + if split == "train": + out.append(Variable(item, requires_grad=False).contiguous().type(torch.cuda.FloatTensor)) + else: + out.append(Variable(item).contiguous().cuda().type(torch.cuda.FloatTensor)) + return out + + +def define_error_list(actions): + return { + action: { + "p1": AccumLoss(), + "p2": AccumLoss(), + "pck": AccumLoss(), + "auc": AccumLoss(), + } + for action in actions + } + + +def define_actions_3dhp(action="*", train=False): + if train: + return ["Seq1", "Seq2"] + return ["Seq1"] + + +def deterministic_random(min_value, max_value, data): + digest = hashlib.sha256(data.encode()).digest() + raw_value = int.from_bytes(digest[:4], byteorder="little", signed=False) + return int(raw_value / (2**32 - 1) * (max_value - min_value)) + min_value + From fe273491f236e79d4e0ea488539de19f89174c89 Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 19:33:58 +0200 Subject: [PATCH 04/28] clean --- 3dhp_test/test_3dhp.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/3dhp_test/test_3dhp.sh b/3dhp_test/test_3dhp.sh index 03be54a..df1383b 100755 --- a/3dhp_test/test_3dhp.sh +++ b/3dhp_test/test_3dhp.sh @@ -11,7 +11,6 @@ batch_size=1024 saved_model_path="${SCRIPT_DIR}/pretrained/fmpose3d_h36m/FMpose3D_pretrained_weights.pth" num_hypothesis_list=1 -topk=6 subjects_test=TS1,TS2,TS3,TS4,TS5,TS6 folder_name=s_${eval_sample_steps}_S${subjects_test}_h${num_hypothesis_list}_$(date +%Y%m%d_%H%M%S) @@ -28,7 +27,6 @@ python3 "${SCRIPT_DIR}/infer_3dhp.py" \ --dataset-path "${SCRIPT_DIR}/dataset/data_test_3dhp.npz" \ --saved-model-path "${saved_model_path}" \ --num-hypothesis-list "${num_hypothesis_list}" \ - --topk "${topk}" \ --folder-name "${folder_name}" \ --test-augmentation True \ --test-augmentation-flip-hypothesis True \ From dde13f4c2034e4a1d19080f28d4b90e40fdeee22 Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 19:34:08 +0200 Subject: [PATCH 05/28] Remove unused ground truth input in 3DHP dataset class and simplify data augmentation logic. --- 3dhp_test/lib/dataset_3dhp.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/3dhp_test/lib/dataset_3dhp.py b/3dhp_test/lib/dataset_3dhp.py index fd7d731..04e2c70 100644 --- a/3dhp_test/lib/dataset_3dhp.py +++ b/3dhp_test/lib/dataset_3dhp.py @@ -93,20 +93,17 @@ def __getitem__(self, index): pose_3d = self._data[subject]["positions_3d"][frame_idx : frame_idx + 1].copy() input_2d = pose_2d[None, ...] - input_2d_gt = pose_2d[None, ...] if self.test_augmentation: flip_2d = pose_2d.copy() flip_2d[:, :, 0] *= -1 flip_2d[:, self.kps_left + self.kps_right] = flip_2d[:, self.kps_right + self.kps_left] input_2d = np.concatenate((input_2d, flip_2d[None, ...]), axis=0) - input_2d_gt = np.concatenate((input_2d_gt, flip_2d[None, ...]), axis=0) return ( np.zeros(9, dtype=np.float32), pose_3d, input_2d.astype(np.float32), - input_2d_gt.astype(np.float32), "Seq1", subject, 0, From 271f180203dc19b848211e2e35b0660d2aabf4a6 Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 19:34:21 +0200 Subject: [PATCH 06/28] Remove unused p_mpjpe_action function from eval_cal.py to streamline code. --- 3dhp_test/lib/eval_cal.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/3dhp_test/lib/eval_cal.py b/3dhp_test/lib/eval_cal.py index f61490b..864daa9 100644 --- a/3dhp_test/lib/eval_cal.py +++ b/3dhp_test/lib/eval_cal.py @@ -137,13 +137,3 @@ def p_mpjpe(predicted, target): predicted_aligned = a * np.matmul(predicted, r) + t return np.mean(np.linalg.norm(predicted_aligned - target, axis=len(target.shape) - 1), axis=len(target.shape) - 2) - - -def p_mpjpe_action(predicted, target): - assert predicted.shape == target.shape - predicted_np = predicted.detach().cpu().numpy() - target_np = target.detach().cpu().numpy() - n, t, v, c = predicted_np.shape - predicted_np = predicted_np.reshape(n * t, v, c) - target_np = target_np.reshape(n * t, v, c) - return p_mpjpe(predicted_np, target_np).reshape(n, t) From a5f05dfa147e4ed61af1ae15d7cb7b0417f5dab7 Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 19:34:43 +0200 Subject: [PATCH 07/28] Remove unused imports and functions from utils.py to clean up the codebase. --- 3dhp_test/lib/utils.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/3dhp_test/lib/utils.py b/3dhp_test/lib/utils.py index f2918ad..f88d822 100644 --- a/3dhp_test/lib/utils.py +++ b/3dhp_test/lib/utils.py @@ -7,11 +7,6 @@ Licensed under Apache 2.0 """ -import hashlib - -import torch -from torch.autograd import Variable - class AccumLoss(object): def __init__(self): @@ -27,16 +22,6 @@ def update(self, val, n=1): self.avg = self.sum / self.count -def get_variable(split, target): - out = [] - for item in target: - if split == "train": - out.append(Variable(item, requires_grad=False).contiguous().type(torch.cuda.FloatTensor)) - else: - out.append(Variable(item).contiguous().cuda().type(torch.cuda.FloatTensor)) - return out - - def define_error_list(actions): return { action: { @@ -53,10 +38,3 @@ def define_actions_3dhp(action="*", train=False): if train: return ["Seq1", "Seq2"] return ["Seq1"] - - -def deterministic_random(min_value, max_value, data): - digest = hashlib.sha256(data.encode()).digest() - raw_value = int.from_bytes(digest[:4], byteorder="little", signed=False) - return int(raw_value / (2**32 - 1) * (max_value - min_value)) + min_value - From 68b3594e5426c6773fb094d4fe7f2963a7b27e22 Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 19:34:57 +0200 Subject: [PATCH 08/28] Refactor test function in infer_3dhp.py to remove unused variable from data unpacking. --- 3dhp_test/infer_3dhp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3dhp_test/infer_3dhp.py b/3dhp_test/infer_3dhp.py index f5ae586..13e9c92 100644 --- a/3dhp_test/infer_3dhp.py +++ b/3dhp_test/infer_3dhp.py @@ -219,7 +219,7 @@ def test(actions, dataloader, model, args, hypothesis_num=1): print(f"{'=' * 80}\n") for i, data in enumerate(tqdm(dataloader, 0)): - _, gt_3d, input_2d, _, action, subject, _ = data + _, gt_3d, input_2d, action, subject, _ = data input_2d = input_2d.contiguous().to(args.device, dtype=torch.float32) gt_3d = gt_3d.contiguous().to(args.device, dtype=torch.float32) From 172fc0ef507c88e8ab0679b18cda3e197e9af788 Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 20:31:10 +0200 Subject: [PATCH 09/28] Add script to convert MPI-INF-3DHP test annotations to FMPose3D format --- 3dhp_test/dataset/prepare_3dhp_test_npz.py | 70 ++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 3dhp_test/dataset/prepare_3dhp_test_npz.py diff --git a/3dhp_test/dataset/prepare_3dhp_test_npz.py b/3dhp_test/dataset/prepare_3dhp_test_npz.py new file mode 100644 index 0000000..7dd6428 --- /dev/null +++ b/3dhp_test/dataset/prepare_3dhp_test_npz.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +""" +FMPose3D: monocular 3D Pose Estimation via Flow Matching + +Official implementation of the paper: +"FMPose3D: monocular 3D Pose Estimation via Flow Matching" +by Ti Wang, Xiaohang Yu, and Mackenzie Weygandt Mathis +Licensed under Apache 2.0 +""" + +import argparse +from pathlib import Path + +import h5py +import numpy as np + + +DEFAULT_OUTPUT = Path(__file__).resolve().parent / "data_test_3dhp.npz" + + +def convert_test(test_root, output_path): + test_root = Path(test_root) + output_path = Path(output_path) + data_by_subject = {} + + for annot_path in sorted(test_root.glob("TS*/annot_data.mat")): + subject = annot_path.parent.name + print(f"loading {subject}...") + + with h5py.File(annot_path, "r") as data: + valid_frame = np.squeeze(data["valid_frame"][()]) + data_2d = np.squeeze(data["annot2"][()]) + data_3d = np.squeeze(data["univ_annot3"][()]) + + data_by_subject[subject] = { + "data_2d": data_2d, + "data_3d": data_3d, + "valid": valid_frame, + } + + if not data_by_subject: + raise FileNotFoundError(f"No test annot_data.mat files found under {test_root}") + + output_path.parent.mkdir(parents=True, exist_ok=True) + np.savez_compressed(output_path, data=data_by_subject) + print(f"saved {output_path}") + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Convert MPI-INF-3DHP test annotations to the FMPose3D 3DHP test npz." + ) + parser.add_argument( + "--test-root", + type=Path, + required=True, + help="Path containing TS1..TS6 folders from the official MPI-INF-3DHP test set.", + ) + parser.add_argument( + "--output", + type=Path, + default=DEFAULT_OUTPUT, + help="Output npz path consumed by 3dhp_test/infer_3dhp.py.", + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + convert_test(args.test_root, args.output) From 4a06f50f4f994e10474aa79b020c9406ccb62c44 Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 20:31:20 +0200 Subject: [PATCH 10/28] Add README for MPI-INF-3DHP test dataset with setup instructions and conversion details --- 3dhp_test/dataset/README.md | 75 +++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 3dhp_test/dataset/README.md diff --git a/3dhp_test/dataset/README.md b/3dhp_test/dataset/README.md new file mode 100644 index 0000000..b9194de --- /dev/null +++ b/3dhp_test/dataset/README.md @@ -0,0 +1,75 @@ +# MPI-INF-3DHP Test Data + +`3dhp_test/infer_3dhp.py` expects a processed MPI-INF-3DHP test file at: + +```bash +3dhp_test/dataset/data_test_3dhp.npz +``` + +The `.npz` file is not committed to this repository. Generate it from the official MPI-INF-3DHP test set annotations with `prepare_3dhp_test_npz.py`. + +## Get the official dataset + +Get MPI-INF-3DHP from the [official dataset website](https://vcai.mpi-inf.mpg.de/3dhp-dataset/) and follow its license and access instructions. The official package includes download scripts under `source/`; read the included `README.txt`, edit `source/conf.ig` as instructed, then run the test-set downloader: + +```bash +cd /path/to/mpi_inf_3dhp/source +bash get_testset.sh +``` + +After the script downloads and extracts the test set, set `${MPI_INF_3DHP_TEST_ROOT}` to the extracted test-set root. It should contain `TS1` through `TS6`: + +```text +/path/to/mpi_inf_3dhp/ + mpi_inf_3dhp_test_set/ + TS1/ + annot_data.mat + ... + TS2/ + annot_data.mat + ... + ... + TS6/ + annot_data.mat + ... +``` + +Verify that `TS1` through `TS6` each contain `annot_data.mat`: + +```bash +for subject in TS1 TS2 TS3 TS4 TS5 TS6; do + test -f "${MPI_INF_3DHP_TEST_ROOT}/${subject}/annot_data.mat" \ + && echo "${subject}: ok" \ + || echo "${subject}: missing annot_data.mat" +done +``` + +## Generate `data_test_3dhp.npz` + +Run from the repository root: + +```bash +python 3dhp_test/dataset/prepare_3dhp_test_npz.py \ + --test-root "${MPI_INF_3DHP_TEST_ROOT}" \ + --output 3dhp_test/dataset/data_test_3dhp.npz +``` + +It reads each `TS*/annot_data.mat` and writes a compressed npz with this schema: + +```text +data = { + "TS1": { + "data_2d": annot2, + "data_3d": univ_annot3, + "valid": valid_frame, + }, + ... + "TS6": ... +} +``` + +`ThreeDHPTestDataset` then applies the valid-frame mask, maps the 28-joint 3DHP layout to the 17-joint FMPose3D layout, converts 3D from millimeters to meters, root-centers joints 1-16 around joint 0, and normalizes the 2D coordinates. + +## Acknowledgement + +The MPI-INF-3DHP npz conversion is adapted from the preprocessing workflow in [P-STMO](https://github.com/paTRICK-swk/P-STMO). From 85bd8cc7f2f76eda4182d99adb5e934f1b9abd25 Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 20:31:27 +0200 Subject: [PATCH 11/28] Update GPU ID in test script for 3DHP dataset to use GPU 0 --- 3dhp_test/test_3dhp.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3dhp_test/test_3dhp.sh b/3dhp_test/test_3dhp.sh index df1383b..288763e 100755 --- a/3dhp_test/test_3dhp.sh +++ b/3dhp_test/test_3dhp.sh @@ -5,7 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" layers=5 -gpu_id=1 +gpu_id=0 eval_sample_steps=3 batch_size=1024 saved_model_path="${SCRIPT_DIR}/pretrained/fmpose3d_h36m/FMpose3D_pretrained_weights.pth" From b9a6525fed72e6ea3f4d65f3ca1d6492ab49f00e Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 23:27:23 +0200 Subject: [PATCH 12/28] Add README for MPI-INF-3DHP test evaluation with detailed dataset preparation, model configuration, and conversion instructions --- 3dhp_test/{dataset => }/README.md | 40 +++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 5 deletions(-) rename 3dhp_test/{dataset => }/README.md (57%) diff --git a/3dhp_test/dataset/README.md b/3dhp_test/README.md similarity index 57% rename from 3dhp_test/dataset/README.md rename to 3dhp_test/README.md index b9194de..487f555 100644 --- a/3dhp_test/dataset/README.md +++ b/3dhp_test/README.md @@ -1,14 +1,18 @@ -# MPI-INF-3DHP Test Data +# MPI-INF-3DHP Test Evaluation -`3dhp_test/infer_3dhp.py` expects a processed MPI-INF-3DHP test file at: +This folder contains the MPI-INF-3DHP test-set evaluation entrypoint for FMPose3D. + +## Dataset Preparation + +`infer_3dhp.py` expects a processed MPI-INF-3DHP test file at: ```bash 3dhp_test/dataset/data_test_3dhp.npz ``` -The `.npz` file is not committed to this repository. Generate it from the official MPI-INF-3DHP test set annotations with `prepare_3dhp_test_npz.py`. +The `.npz` file is not committed to this repository. Generate it from the official MPI-INF-3DHP test set annotations with `dataset/prepare_3dhp_test_npz.py`. -## Get the official dataset +### Get the official dataset Get MPI-INF-3DHP from the [official dataset website](https://vcai.mpi-inf.mpg.de/3dhp-dataset/) and follow its license and access instructions. The official package includes download scripts under `source/`; read the included `README.txt`, edit `source/conf.ig` as instructed, then run the test-set downloader: @@ -44,7 +48,7 @@ for subject in TS1 TS2 TS3 TS4 TS5 TS6; do done ``` -## Generate `data_test_3dhp.npz` +### Generate `data_test_3dhp.npz` Run from the repository root: @@ -70,6 +74,32 @@ data = { `ThreeDHPTestDataset` then applies the valid-frame mask, maps the 28-joint 3DHP layout to the 17-joint FMPose3D layout, converts 3D from millimeters to meters, root-centers joints 1-16 around joint 0, and normalizes the 2D coordinates. +## Model and Weights + +By default, the 3DHP test uses the packaged human FMPose3D lifting model (`model_type=fmpose3d_humans`) and leaves `model_weights_path` empty so weights are downloaded automatically from Hugging Face Hub. + +`test_3dhp.sh` exposes the model and weights near the top of the script: + +```bash +model_type="fmpose3d_humans" +model_weights_path="" +model_path="" +``` + +To use local weights, set `model_weights_path` to your own checkpoint or to the human pretrained weights we provide on [Google Drive](https://drive.google.com/drive/folders/1aRZ6t_6IxSfM1nCTFOUXcYVaOk-5koGA?usp=sharing): + +```bash +model_weights_path="${SCRIPT_DIR}/pretrained/fmpose3d_h36m/FMpose3D_pretrained_weights.pth" +``` + +To use a local model definition instead of the packaged registry model, set `model_path` to a Python file containing class `Model`: + +```bash +model_path="${SCRIPT_DIR}/pretrained/fmpose3d_h36m/model_GAMLP.py" +``` + +When both `model_path` and `model_weights_path` are set, make sure the local model architecture matches the checkpoint. + ## Acknowledgement The MPI-INF-3DHP npz conversion is adapted from the preprocessing workflow in [P-STMO](https://github.com/paTRICK-swk/P-STMO). From e79043f7799d98cf731669f4e3739fd45946f04c Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 23:28:05 +0200 Subject: [PATCH 13/28] Update test script for 3DHP dataset to include model type, weights, and path configurations --- 3dhp_test/test_3dhp.sh | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/3dhp_test/test_3dhp.sh b/3dhp_test/test_3dhp.sh index 288763e..08a08d7 100755 --- a/3dhp_test/test_3dhp.sh +++ b/3dhp_test/test_3dhp.sh @@ -8,9 +8,20 @@ layers=5 gpu_id=0 eval_sample_steps=3 batch_size=1024 -saved_model_path="${SCRIPT_DIR}/pretrained/fmpose3d_h36m/FMpose3D_pretrained_weights.pth" -num_hypothesis_list=1 +model_type="fmpose3d_humans" + +# By default, weights are automatically downloaded from Hugging Face Hub. +# To use local weights instead, uncomment the line below: +# model_weights_path="${SCRIPT_DIR}/pretrained/fmpose3d_h36m/FMpose3D_pretrained_weights.pth" +model_weights_path="" + +# By default, use the packaged FMPose3D human model definition. To use a local +# model definition instead, uncomment the line below: +# model_path="${SCRIPT_DIR}/pretrained/fmpose3d_h36m/model_GAMLP.py" +model_path="" + +num_hypothesis_list=10 subjects_test=TS1,TS2,TS3,TS4,TS5,TS6 folder_name=s_${eval_sample_steps}_S${subjects_test}_h${num_hypothesis_list}_$(date +%Y%m%d_%H%M%S) @@ -25,7 +36,9 @@ python3 "${SCRIPT_DIR}/infer_3dhp.py" \ --token-dim 256 \ --eval-sample-steps "${eval_sample_steps}" \ --dataset-path "${SCRIPT_DIR}/dataset/data_test_3dhp.npz" \ - --saved-model-path "${saved_model_path}" \ + --model-type "${model_type}" \ + --model-weights-path "${model_weights_path}" \ + --model-path "${model_path}" \ --num-hypothesis-list "${num_hypothesis_list}" \ --folder-name "${folder_name}" \ --test-augmentation True \ From b2e64d29308337bd5359b2d9dde7765c142a7048 Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 23:28:14 +0200 Subject: [PATCH 14/28] Enhance infer_3dhp.py by adding model weights path argument and updating model loading logic to resolve weights path dynamically. --- 3dhp_test/infer_3dhp.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/3dhp_test/infer_3dhp.py b/3dhp_test/infer_3dhp.py index 13e9c92..b78dad0 100644 --- a/3dhp_test/infer_3dhp.py +++ b/3dhp_test/infer_3dhp.py @@ -30,6 +30,7 @@ from fmpose3d.aggregation_methods import aggregation_RPEA_joint_level from fmpose3d.models import get_model +from fmpose3d.utils.weights import resolve_weights_path from lib.camera import camera_params_for_subject from lib.dataset_3dhp import ThreeDHPTestDataset @@ -53,12 +54,17 @@ def parse_args(): parser.add_argument("--dataset-path", type=Path, default=ROOT / "dataset" / "data_test_3dhp.npz") parser.add_argument( "--model-path", - type=Path, - default=None, + type=str, + default="", help="Optional path to a Model definition. Defaults to the package FMPose3D human model.", ) parser.add_argument("--model-type", default="fmpose3d_humans", type=str) - parser.add_argument("--saved-model-path", type=Path, default=ROOT / "pretrained" / "fmpose3d_h36m" / "FMpose3D_pretrained_weights.pth") + parser.add_argument( + "--model-weights-path", + default="", + type=str, + help="Local checkpoint path. Empty downloads the weights for --model-type from Hugging Face.", + ) parser.add_argument("--results-dir", type=Path, default=ROOT / "results") parser.add_argument("--folder-name", type=str, default="") parser.add_argument("--gpu", default="0", type=str) @@ -105,7 +111,7 @@ def configure_reproducibility(seed): def load_model_class(model_path, model_type): - if model_path is None: + if not model_path: return get_model(model_type) model_path = Path(model_path).resolve() spec = importlib.util.spec_from_file_location(model_path.stem, model_path) @@ -342,8 +348,9 @@ def main(): model_cls = load_model_class(args.model_path, args.model_type) model = model_cls(args).to(args.device) - print(args.saved_model_path) - pre_dict = torch.load(args.saved_model_path, map_location=args.device, weights_only=True) + model_weights_path = resolve_weights_path(args.model_weights_path, args.model_type) + print(model_weights_path) + pre_dict = torch.load(model_weights_path, map_location=args.device, weights_only=True) model_dict = model.state_dict() state_dict = {k: v for k, v in pre_dict.items() if k in model_dict.keys()} model_dict.update(state_dict) From ad158c691d1c449124d7a55b0cb9816aae02839a Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 23:37:34 +0200 Subject: [PATCH 15/28] Update --- 3dhp_test/README.md | 54 ++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/3dhp_test/README.md b/3dhp_test/README.md index 487f555..5a44e90 100644 --- a/3dhp_test/README.md +++ b/3dhp_test/README.md @@ -1,6 +1,32 @@ # MPI-INF-3DHP Test Evaluation -This folder contains the MPI-INF-3DHP test-set evaluation entrypoint for FMPose3D. +This folder contains utilities for evaluating monocular 3D pose lifting models on the MPI-INF-3DHP test set. + +## Model and Weights + +By default, the 3DHP test uses the packaged human FMPose3D lifting model (`model_type=fmpose3d_humans`) and leaves `model_weights_path` empty so weights are downloaded automatically from Hugging Face Hub. + +`test_3dhp.sh` exposes the model and weights near the top of the script: + +```bash +model_type="fmpose3d_humans" +model_weights_path="" +model_path="" +``` + +To use local weights, set `model_weights_path` to your own checkpoint or to the human pretrained weights we provide on [Google Drive](https://drive.google.com/drive/folders/1aRZ6t_6IxSfM1nCTFOUXcYVaOk-5koGA?usp=sharing): + +```bash +model_weights_path="${SCRIPT_DIR}/pretrained/fmpose3d_h36m/FMpose3D_pretrained_weights.pth" +``` + +To use a local model definition instead of the packaged registry model, set `model_path` to a Python file that defines `Model`: + +```bash +model_path="${SCRIPT_DIR}/pretrained/fmpose3d_h36m/model_GAMLP.py" +``` + +When both `model_path` and `model_weights_path` are set, make sure the local model architecture matches the checkpoint. ## Dataset Preparation @@ -74,32 +100,6 @@ data = { `ThreeDHPTestDataset` then applies the valid-frame mask, maps the 28-joint 3DHP layout to the 17-joint FMPose3D layout, converts 3D from millimeters to meters, root-centers joints 1-16 around joint 0, and normalizes the 2D coordinates. -## Model and Weights - -By default, the 3DHP test uses the packaged human FMPose3D lifting model (`model_type=fmpose3d_humans`) and leaves `model_weights_path` empty so weights are downloaded automatically from Hugging Face Hub. - -`test_3dhp.sh` exposes the model and weights near the top of the script: - -```bash -model_type="fmpose3d_humans" -model_weights_path="" -model_path="" -``` - -To use local weights, set `model_weights_path` to your own checkpoint or to the human pretrained weights we provide on [Google Drive](https://drive.google.com/drive/folders/1aRZ6t_6IxSfM1nCTFOUXcYVaOk-5koGA?usp=sharing): - -```bash -model_weights_path="${SCRIPT_DIR}/pretrained/fmpose3d_h36m/FMpose3D_pretrained_weights.pth" -``` - -To use a local model definition instead of the packaged registry model, set `model_path` to a Python file containing class `Model`: - -```bash -model_path="${SCRIPT_DIR}/pretrained/fmpose3d_h36m/model_GAMLP.py" -``` - -When both `model_path` and `model_weights_path` are set, make sure the local model architecture matches the checkpoint. - ## Acknowledgement The MPI-INF-3DHP npz conversion is adapted from the preprocessing workflow in [P-STMO](https://github.com/paTRICK-swk/P-STMO). From 9ef98479697532aecb1c28e859fee2436986f936 Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 23:38:40 +0200 Subject: [PATCH 16/28] Add .gitignore file for 3DHP test directory to exclude cache, results, and dataset files --- 3dhp_test/.gitignore | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 3dhp_test/.gitignore diff --git a/3dhp_test/.gitignore b/3dhp_test/.gitignore new file mode 100644 index 0000000..679ba8d --- /dev/null +++ b/3dhp_test/.gitignore @@ -0,0 +1,10 @@ +__pycache__/ +*.py[cod] + +results/ + +*.pth +*.pt +*.ckpt + +dataset/*.npz From ef0bd5c4dedd767b982001b77e5bc7a004081d2a Mon Sep 17 00:00:00 2001 From: ti Date: Sat, 13 Jun 2026 23:39:38 +0200 Subject: [PATCH 17/28] Update .gitignore to include exceptions for 3dhp_test/lib and its Python files, while maintaining exclusion for predictions directory. --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 3f7f92b..6220907 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,8 @@ downloads/ eggs/ .eggs/ lib/ +!3dhp_test/lib/ +!3dhp_test/lib/*.py lib64/ parts/ sdist/ @@ -50,4 +52,4 @@ htmlcov/ pre_trained_models/ demo/predictions/ demo/images/ -**/predictions/ \ No newline at end of file +**/predictions/ From 318743725a2299a1928c1794ec3b1a2bb49fd14d Mon Sep 17 00:00:00 2001 From: ti Date: Sun, 14 Jun 2026 00:04:43 +0200 Subject: [PATCH 18/28] Fix action error sum update in mpjpe_by_action_p2 to use individual distance values --- 3dhp_test/lib/eval_cal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3dhp_test/lib/eval_cal.py b/3dhp_test/lib/eval_cal.py index 864daa9..e48f131 100644 --- a/3dhp_test/lib/eval_cal.py +++ b/3dhp_test/lib/eval_cal.py @@ -76,7 +76,7 @@ def mpjpe_by_action_p2(predicted, target, action, action_error_sum): else: for i in range(num): action_name = _action_name(action[i]) - action_error_sum[action_name]["p2"].update(np.mean(dist), 1) + action_error_sum[action_name]["p2"].update(float(dist[i]), 1) return action_error_sum From a3ccb98abf99f46c703e8d89ed44570d976b38f0 Mon Sep 17 00:00:00 2001 From: ti Date: Sun, 14 Jun 2026 09:44:35 +0200 Subject: [PATCH 19/28] Refactor tqdm usage in test function to remove unnecessary argument --- 3dhp_test/infer_3dhp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3dhp_test/infer_3dhp.py b/3dhp_test/infer_3dhp.py index b78dad0..1742efa 100644 --- a/3dhp_test/infer_3dhp.py +++ b/3dhp_test/infer_3dhp.py @@ -224,7 +224,7 @@ def test(actions, dataloader, model, args, hypothesis_num=1): print(f"Testing with {hypothesis_num} hypothesis(es), eval_steps: {eval_steps}") print(f"{'=' * 80}\n") - for i, data in enumerate(tqdm(dataloader, 0)): + for i, data in enumerate(tqdm(dataloader)): _, gt_3d, input_2d, action, subject, _ = data input_2d = input_2d.contiguous().to(args.device, dtype=torch.float32) gt_3d = gt_3d.contiguous().to(args.device, dtype=torch.float32) From 8a344b8eee8b8771268ab1181f68e1b6b3e7d08e Mon Sep 17 00:00:00 2001 From: ti Date: Sun, 14 Jun 2026 09:46:53 +0200 Subject: [PATCH 20/28] Enhance model loading error handling in load_model_class function --- 3dhp_test/infer_3dhp.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/3dhp_test/infer_3dhp.py b/3dhp_test/infer_3dhp.py index 1742efa..476b006 100644 --- a/3dhp_test/infer_3dhp.py +++ b/3dhp_test/infer_3dhp.py @@ -115,10 +115,13 @@ def load_model_class(model_path, model_type): return get_model(model_type) model_path = Path(model_path).resolve() spec = importlib.util.spec_from_file_location(model_path.stem, model_path) + if spec is None or spec.loader is None: + raise ImportError(f"Could not load model definition from {model_path}") module = importlib.util.module_from_spec(spec) - assert spec.loader is not None spec.loader.exec_module(module) - return getattr(module, "Model") + if not hasattr(module, "Model"): + raise AttributeError(f"Model definition file {model_path} does not define a Model class") + return module.Model def get_device(gpu): From 4a7bd6093d258464d153e0d784aa84b91627a408 Mon Sep 17 00:00:00 2001 From: ti Date: Sun, 14 Jun 2026 09:56:14 +0200 Subject: [PATCH 21/28] Add tests for 3DHP inference module and error calculation --- tests/test_3dhp_infer_model_loading.py | 95 ++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 tests/test_3dhp_infer_model_loading.py diff --git a/tests/test_3dhp_infer_model_loading.py b/tests/test_3dhp_infer_model_loading.py new file mode 100644 index 0000000..2a91072 --- /dev/null +++ b/tests/test_3dhp_infer_model_loading.py @@ -0,0 +1,95 @@ +""" +FMPose3D: monocular 3D Pose Estimation via Flow Matching + +Official implementation of the paper: +"FMPose3D: monocular 3D Pose Estimation via Flow Matching" +by Ti Wang, Xiaohang Yu, and Mackenzie Weygandt Mathis +Licensed under Apache 2.0 +""" + +import importlib.util +from pathlib import Path + +import torch + +from fmpose3d.models import get_model +from fmpose3d.utils.weights import resolve_weights_path + + +def load_infer_3dhp_module(): + module_path = Path(__file__).resolve().parents[1] / "3dhp_test" / "infer_3dhp.py" + spec = importlib.util.spec_from_file_location("infer_3dhp", module_path) + if spec is None or spec.loader is None: + raise ImportError(f"Could not load {module_path}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_default_3dhp_model_loads_from_registry(): + infer_3dhp = load_infer_3dhp_module() + + model_cls = infer_3dhp.load_model_class(None, "fmpose3d_humans") + + assert model_cls is get_model("fmpose3d_humans") + + +def test_camera_tensor_uses_each_sample_subject(): + infer_3dhp = load_infer_3dhp_module() + subjects = ["TS4", "TS5"] + + cam_tensor = infer_3dhp.camera_tensor_for_subjects(subjects, torch.device("cpu")) + + assert cam_tensor.shape == (2, 9) + assert not torch.equal(cam_tensor[0], cam_tensor[1]) + + +def test_3dhp_imports_common_weight_resolver(): + infer_3dhp = load_infer_3dhp_module() + + assert infer_3dhp.resolve_weights_path is resolve_weights_path + + +def test_p_mpjpe_mixed_actions_uses_per_sample_errors(): + infer_3dhp = load_infer_3dhp_module() + eval_cal = infer_3dhp.eval_cal + error_sum = infer_3dhp.define_error_list(["Walk", "Run"]) + + target = torch.tensor( + [ + [ + [ + [0.0, 0.0, 0.0], + [1.0, 0.0, 0.0], + [0.0, 1.0, 0.0], + ] + ], + [ + [ + [0.0, 0.0, 0.0], + [1.0, 0.0, 0.0], + [0.0, 1.0, 0.0], + ] + ], + ], + dtype=torch.float32, + ) + predicted = target.clone() + predicted[1, 0, 1] = torch.tensor([1.4, 0.2, 0.3]) + predicted[1, 0, 2] = torch.tensor([-0.1, 0.7, 0.5]) + + expected = eval_cal.p_mpjpe( + predicted.numpy().reshape(-1, 3, 3), + target.numpy().reshape(-1, 3, 3), + ) + + eval_cal.mpjpe_by_action_p2( + predicted, + target, + ["Walk", "Run"], + error_sum, + ) + + assert error_sum["Walk"]["p2"].avg == float(expected[0]) + assert error_sum["Run"]["p2"].avg == float(expected[1]) + assert error_sum["Walk"]["p2"].avg != error_sum["Run"]["p2"].avg From 5e282b9eb855115ea3839e7e507500e8d6e3ad9b Mon Sep 17 00:00:00 2001 From: ti Date: Sun, 14 Jun 2026 09:57:03 +0200 Subject: [PATCH 22/28] Update .gitignore to exclude animal dataset and checkpoint directories --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 6220907..3ffcb78 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,9 @@ htmlcov/ *.pkl *.h5 *.ckpt +animals/checkpoint/ +animals/dataset/animal3d/ +animals/dataset/control_animal3dlatest/ # Excluded directories pre_trained_models/ From db70555ab26916c33e119f69cc7bf16b8d56af7e Mon Sep 17 00:00:00 2001 From: ti Date: Sun, 14 Jun 2026 10:09:28 +0200 Subject: [PATCH 23/28] Add load_model_weights function for improved model loading --- 3dhp_test/infer_3dhp.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/3dhp_test/infer_3dhp.py b/3dhp_test/infer_3dhp.py index 476b006..0165d1d 100644 --- a/3dhp_test/infer_3dhp.py +++ b/3dhp_test/infer_3dhp.py @@ -124,6 +124,14 @@ def load_model_class(model_path, model_type): return module.Model +def load_model_weights(model, model_weights_path, device): + pre_dict = torch.load(model_weights_path, map_location=device, weights_only=True) + try: + model.load_state_dict(pre_dict, strict=True) + except RuntimeError as exc: + raise RuntimeError(f"Checkpoint {model_weights_path} is incompatible with {model.__class__.__name__}") from exc + + def get_device(gpu): if gpu in {"", "-1", "cpu", "none", "None"}: return torch.device("cpu") @@ -353,11 +361,7 @@ def main(): model_weights_path = resolve_weights_path(args.model_weights_path, args.model_type) print(model_weights_path) - pre_dict = torch.load(model_weights_path, map_location=args.device, weights_only=True) - model_dict = model.state_dict() - state_dict = {k: v for k, v in pre_dict.items() if k in model_dict.keys()} - model_dict.update(state_dict) - model.load_state_dict(model_dict) + load_model_weights(model, model_weights_path, args.device) print("model loaded successfully!") actions = define_actions_3dhp(args.actions, train=False) From 5ae8326fea77d8a1be38ea68194c17480bac8969 Mon Sep 17 00:00:00 2001 From: ti Date: Sun, 14 Jun 2026 11:20:46 +0200 Subject: [PATCH 24/28] Add tests for handling partial checkpoints and action filtering in 3DHP module --- tests/test_3dhp_infer_model_loading.py | 32 ++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/test_3dhp_infer_model_loading.py b/tests/test_3dhp_infer_model_loading.py index 2a91072..7047836 100644 --- a/tests/test_3dhp_infer_model_loading.py +++ b/tests/test_3dhp_infer_model_loading.py @@ -11,6 +11,7 @@ from pathlib import Path import torch +from torch import nn from fmpose3d.models import get_model from fmpose3d.utils.weights import resolve_weights_path @@ -50,6 +51,37 @@ def test_3dhp_imports_common_weight_resolver(): assert infer_3dhp.resolve_weights_path is resolve_weights_path +def test_3dhp_weight_loading_rejects_partial_checkpoints(tmp_path): + infer_3dhp = load_infer_3dhp_module() + model = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 1)) + checkpoint_path = tmp_path / "partial.pth" + torch.save({"0.weight": model.state_dict()["0.weight"]}, checkpoint_path) + + try: + infer_3dhp.load_model_weights(model, checkpoint_path, torch.device("cpu")) + except RuntimeError as exc: + assert "incompatible" in str(exc) + else: + raise AssertionError("partial checkpoint should fail strict model loading") + + +def test_3dhp_actions_argument_allows_only_default_bucket(): + infer_3dhp = load_infer_3dhp_module() + + assert infer_3dhp.define_actions_3dhp("*", train=False) == ["Seq1"] + + +def test_3dhp_actions_argument_rejects_action_filtering(): + infer_3dhp = load_infer_3dhp_module() + + try: + infer_3dhp.define_actions_3dhp("Seq1", train=False) + except ValueError as exc: + assert "do not include action labels" in str(exc) + else: + raise AssertionError("3DHP action filtering should fail") + + def test_p_mpjpe_mixed_actions_uses_per_sample_errors(): infer_3dhp = load_infer_3dhp_module() eval_cal = infer_3dhp.eval_cal From 7cd0cbb014ba0590a30125bd9f021f78a378783a Mon Sep 17 00:00:00 2001 From: ti Date: Sun, 14 Jun 2026 11:20:51 +0200 Subject: [PATCH 25/28] Refactor define_actions_3dhp function to improve error handling for action parameter --- 3dhp_test/lib/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/3dhp_test/lib/utils.py b/3dhp_test/lib/utils.py index f88d822..cd5aa78 100644 --- a/3dhp_test/lib/utils.py +++ b/3dhp_test/lib/utils.py @@ -35,6 +35,6 @@ def define_error_list(actions): def define_actions_3dhp(action="*", train=False): - if train: - return ["Seq1", "Seq2"] - return ["Seq1"] + if action != "*": + raise ValueError("MPI-INF-3DHP test annotations do not include action labels; use --actions '*'.") + return ["Seq1", "Seq2"] if train else ["Seq1"] From c58d8f9f8c37875a94dfb5c4a9eff6c544e0f85f Mon Sep 17 00:00:00 2001 From: ti Date: Sun, 14 Jun 2026 11:22:42 +0200 Subject: [PATCH 26/28] Add help text for actions argument in parse_args function to clarify usage --- 3dhp_test/infer_3dhp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3dhp_test/infer_3dhp.py b/3dhp_test/infer_3dhp.py index 0165d1d..bd74639 100644 --- a/3dhp_test/infer_3dhp.py +++ b/3dhp_test/infer_3dhp.py @@ -77,7 +77,7 @@ def parse_args(): parser.add_argument("--token-dim", default=256, type=int) parser.add_argument("--n-joints", default=17, type=int) parser.add_argument("--dataset", default="3dhp_valid", type=str) - parser.add_argument("--actions", default="*", type=str) + parser.add_argument("--actions", default="*", type=str, help="3DHP test annotations do not include action labels; only '*' is supported.") parser.add_argument("--subjects-test", default="TS1,TS2,TS3,TS4,TS5,TS6", type=str) parser.add_argument("--eval-sample-steps", default="2", type=str) parser.add_argument("--num-hypothesis-list", default="1", type=str) From 99e54e2febef7765cc8a1c7efdc44dfbb36a3a35 Mon Sep 17 00:00:00 2001 From: ti Date: Sun, 14 Jun 2026 11:30:12 +0200 Subject: [PATCH 27/28] Add h5py dependency to pyproject.toml for improved data handling --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index e7df467..a3021b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "numpy>=1.18.5,<2.0", "tqdm>=4.60.0", "scipy>=1.7.0", + "h5py>=3.0.0", "yacs>=0.1.8", "opencv-python>=4.5.0", "numba>=0.56.0", From 0a1bdbc45f1b8b7841758ca0680158f36653597d Mon Sep 17 00:00:00 2001 From: ti Date: Sun, 14 Jun 2026 11:33:21 +0200 Subject: [PATCH 28/28] Update parse_args function to accept argv parameter and add help text for frames argument --- 3dhp_test/infer_3dhp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/3dhp_test/infer_3dhp.py b/3dhp_test/infer_3dhp.py index bd74639..ac3a575 100644 --- a/3dhp_test/infer_3dhp.py +++ b/3dhp_test/infer_3dhp.py @@ -49,7 +49,7 @@ def str2bool(value): raise argparse.ArgumentTypeError(f"Expected boolean value, got {value}") -def parse_args(): +def parse_args(argv=None): parser = argparse.ArgumentParser(description="Clean 3DHP inference with processed test npz.") parser.add_argument("--dataset-path", type=Path, default=ROOT / "dataset" / "data_test_3dhp.npz") parser.add_argument( @@ -70,7 +70,7 @@ def parse_args(): parser.add_argument("--gpu", default="0", type=str) parser.add_argument("--workers", default=8, type=int) parser.add_argument("--batch-size", default=1024, type=int) - parser.add_argument("--frames", default=1, type=int) + parser.add_argument("--frames", default=1, type=int, choices=[1], help="3DHP test evaluation uses single-frame samples; only 1 is supported.") parser.add_argument("--layers", default=5, type=int) parser.add_argument("--channel", default=512, type=int) parser.add_argument("--d-hid", default=1024, type=int) @@ -87,7 +87,7 @@ def parse_args(): parser.add_argument("--test-augmentation-flip-hypothesis", default=True, type=str2bool) parser.add_argument("--max-batches", default=0, type=int, help="Smoke test limit. 0 means full evaluation.") parser.add_argument("--manual-seed", default=1, type=int) - args = parser.parse_args() + args = parser.parse_args(argv) args.pad = (args.frames - 1) // 2 args.root_joint = 0