xrt: Remove old_rgb hand tracking · matrixfurry.com/monado@1bba34e

-1

src/xrt/drivers/CMakeLists.txt

··· 293 293 aux_util 294 294 aux_math 295 295 t_ht_mercury 296 - t_ht_old_rgb 297 296 hand_async 298 297 ) 299 298

+2 -11

src/xrt/drivers/ht/ht_driver.c

··· 29 29 30 30 // Save me, Obi-Wan! 31 31 32 - #include "../../tracking/hand/old_rgb/rgb_interface.h" 33 32 #include "../../tracking/hand/mercury/hg_interface.h" 34 33 35 34 #ifdef XRT_BUILD_DRIVER_DEPTHAI ··· 240 239 int 241 240 ht_device_create(struct xrt_frame_context *xfctx, 242 241 struct t_stereo_camera_calibration *calib, 243 - enum t_hand_tracking_algorithm algorithm_choice, 244 242 struct t_camera_extra_info extra_camera_info, 245 243 struct xrt_slam_sinks **out_sinks, 246 244 struct xrt_device **out_device) ··· 251 249 252 250 struct t_hand_tracking_sync *sync = NULL; 253 251 254 - switch (algorithm_choice) { 255 - case HT_ALGORITHM_MERCURY: { 256 - sync = t_hand_tracking_sync_mercury_create(calib, extra_camera_info); 257 - } break; 258 - case HT_ALGORITHM_OLD_RGB: { 259 - //!@todo Either have this deal with the output space correctly, or have everything use LEFT_CAMERA 260 - sync = t_hand_tracking_sync_old_rgb_create(calib); 261 - } 262 - } 252 + sync = t_hand_tracking_sync_mercury_create(calib, extra_camera_info); 253 + 263 254 struct ht_device *htd = ht_device_create_common(calib, false, xfctx, sync); 264 255 265 256 HT_DEBUG(htd, "Hand Tracker initialized!");

-2

src/xrt/drivers/ht/ht_interface.h

··· 36 36 * 37 37 * @param xfctx Frame context to attach the tracker to 38 38 * @param calib Calibration struct for stereo camera 39 - * @param algorithm_choice Which algorithm to use for hand tracking 40 39 * @param out_sinks Sinks to stream camera data to 41 40 * @param out_device Newly created hand tracker "device" 42 41 * @return int 0 on success ··· 44 43 int 45 44 ht_device_create(struct xrt_frame_context *xfctx, 46 45 struct t_stereo_camera_calibration *calib, 47 - enum t_hand_tracking_algorithm algorithm_choice, 48 46 struct t_camera_extra_info extra_camera_info, 49 47 struct xrt_slam_sinks **out_sinks, 50 48 struct xrt_device **out_device);

+2 -3

src/xrt/drivers/rift_s/rift_s_tracker.c

··· 256 256 extra_camera_info.views[0].camera_orientation = CAMERA_ORIENTATION_90; 257 257 extra_camera_info.views[1].camera_orientation = CAMERA_ORIENTATION_90; 258 258 259 - int create_status = ht_device_create(xfctx, // 260 - t->stereo_calib, // 261 - HT_ALGORITHM_MERCURY, // 259 + int create_status = ht_device_create(xfctx, // 260 + t->stereo_calib, // 262 261 extra_camera_info, 263 262 &sinks, // 264 263 &device);

+4 -5

src/xrt/drivers/wmr/wmr_hmd.c

··· 1549 1549 extra_camera_info.views[0].boundary_type = HT_IMAGE_BOUNDARY_NONE; 1550 1550 extra_camera_info.views[1].boundary_type = HT_IMAGE_BOUNDARY_NONE; 1551 1551 1552 - int create_status = ht_device_create(&wh->tracking.xfctx, // 1553 - stereo_calib, // 1554 - HT_ALGORITHM_MERCURY, // 1555 - extra_camera_info, // 1556 - &sinks, // 1552 + int create_status = ht_device_create(&wh->tracking.xfctx, // 1553 + stereo_calib, // 1554 + extra_camera_info, // 1555 + &sinks, // 1557 1556 &device); 1558 1557 if (create_status != 0) { 1559 1558 return create_status;

-12

src/xrt/include/tracking/t_hand_tracking.h

··· 105 105 }; 106 106 107 107 /*! 108 - * @brief Which hand-tracking algorithm should we use? 109 - * 110 - * Never use HT_ALGORITHM_OLD_RGB. The tracking quality is extremely poor. 111 - * @ingroup xrt_iface 112 - */ 113 - enum t_hand_tracking_algorithm 114 - { 115 - HT_ALGORITHM_MERCURY, 116 - HT_ALGORITHM_OLD_RGB 117 - }; 118 - 119 - /*! 120 108 * Synchronously processes frames and returns two hands. 121 109 */ 122 110 struct t_hand_tracking_sync

-1

src/xrt/state_trackers/gui/gui_scene_hand_tracking_demo.c

··· 80 80 int create_status = ht_device_create( // 81 81 &usysd->xfctx, // 82 82 calib, // 83 - HT_ALGORITHM_MERCURY, // 84 83 extra_camera_info, // 85 84 &hand_sinks, // 86 85 &ht_dev); //

+2 -18

src/xrt/targets/common/target_builder_lighthouse.c

··· 59 59 DEBUG_GET_ONCE_BOOL_OPTION(vive_over_survive, "VIVE_OVER_SURVIVE", false) 60 60 DEBUG_GET_ONCE_BOOL_OPTION(vive_slam, "VIVE_SLAM", false) 61 61 DEBUG_GET_ONCE_TRISTATE_OPTION(lh_handtracking, "LH_HANDTRACKING") 62 - DEBUG_GET_ONCE_BOOL_OPTION(ht_use_old_rgb, "HT_USE_OLD_RGB", false) 63 62 64 63 #define LH_TRACE(...) U_LOG_IFL_T(debug_get_log_option_lh_log(), __VA_ARGS__) 65 64 #define LH_DEBUG(...) U_LOG_IFL_D(debug_get_log_option_lh_log(), __VA_ARGS__) ··· 220 219 info.views[0].boundary.circle.normalized_radius = 0.55; 221 220 info.views[1].boundary.circle.normalized_radius = 0.55; 222 221 223 - bool old_rgb = debug_get_bool_option_ht_use_old_rgb(); 224 - enum t_hand_tracking_algorithm ht_algorithm = old_rgb ? HT_ALGORITHM_OLD_RGB : HT_ALGORITHM_MERCURY; 225 - 226 222 struct xrt_device *ht_device = NULL; 227 223 int create_status = ht_device_create(&lhs->devices->xfctx, // 228 224 stereo_calib, // 229 - ht_algorithm, // 230 225 info, // 231 226 &sinks, // 232 227 &ht_device); ··· 419 414 struct xrt_frame_sink *entry_left_sink = NULL; 420 415 struct xrt_frame_sink *entry_right_sink = NULL; 421 416 struct xrt_frame_sink *entry_sbs_sink = NULL; 422 - bool old_rgb_ht = debug_get_bool_option_ht_use_old_rgb(); 423 417 424 - if (slam_enabled && hand_enabled && !old_rgb_ht) { 418 + if (slam_enabled && hand_enabled) { 425 419 u_sink_split_create(&lhs->devices->xfctx, slam_sinks->left, hand_sinks->left, &entry_left_sink); 426 420 u_sink_split_create(&lhs->devices->xfctx, slam_sinks->right, hand_sinks->right, &entry_right_sink); 427 421 u_sink_stereo_sbs_to_slam_sbs_create(&lhs->devices->xfctx, entry_left_sink, entry_right_sink, 428 422 &entry_sbs_sink); 429 423 u_sink_create_format_converter(&lhs->devices->xfctx, XRT_FORMAT_L8, entry_sbs_sink, &entry_sbs_sink); 430 - } else if (slam_enabled && hand_enabled && old_rgb_ht) { 431 - struct xrt_frame_sink *hand_sbs = NULL; 432 - struct xrt_frame_sink *slam_sbs = NULL; 433 - u_sink_stereo_sbs_to_slam_sbs_create(&lhs->devices->xfctx, hand_sinks->left, hand_sinks->right, 434 - &hand_sbs); 435 - u_sink_stereo_sbs_to_slam_sbs_create(&lhs->devices->xfctx, slam_sinks->left, slam_sinks->right, 436 - &slam_sbs); 437 - u_sink_create_format_converter(&lhs->devices->xfctx, XRT_FORMAT_L8, slam_sbs, &slam_sbs); 438 - u_sink_split_create(&lhs->devices->xfctx, slam_sbs, hand_sbs, &entry_sbs_sink); 439 424 } else if (slam_enabled) { 440 425 entry_left_sink = slam_sinks->left; 441 426 entry_right_sink = slam_sinks->right; ··· 443 428 &entry_sbs_sink); 444 429 u_sink_create_format_converter(&lhs->devices->xfctx, XRT_FORMAT_L8, entry_sbs_sink, &entry_sbs_sink); 445 430 } else if (hand_enabled) { 446 - enum xrt_format fmt = old_rgb_ht ? XRT_FORMAT_R8G8B8 : XRT_FORMAT_L8; 447 431 entry_left_sink = hand_sinks->left; 448 432 entry_right_sink = hand_sinks->right; 449 433 u_sink_stereo_sbs_to_slam_sbs_create(&lhs->devices->xfctx, entry_left_sink, entry_right_sink, 450 434 &entry_sbs_sink); 451 - u_sink_create_format_converter(&lhs->devices->xfctx, fmt, entry_sbs_sink, &entry_sbs_sink); 435 + u_sink_create_format_converter(&lhs->devices->xfctx, XRT_FORMAT_L8, entry_sbs_sink, &entry_sbs_sink); 452 436 } else { 453 437 LH_WARN("No visual trackers were set"); 454 438 return false;

+4 -5

src/xrt/targets/common/target_builder_north_star.c

··· 294 294 extra_camera_info.views[0].boundary_type = HT_IMAGE_BOUNDARY_NONE; 295 295 extra_camera_info.views[1].boundary_type = HT_IMAGE_BOUNDARY_NONE; 296 296 297 - int create_status = ht_device_create(&usysd->xfctx, // 298 - calib, // 299 - HT_ALGORITHM_MERCURY, // 300 - extra_camera_info, // 301 - &hand_sinks, // 297 + int create_status = ht_device_create(&usysd->xfctx, // 298 + calib, // 299 + extra_camera_info, // 300 + &hand_sinks, // 302 301 out_hand_device); 303 302 t_stereo_camera_calibration_reference(&calib, NULL); 304 303 if (create_status != 0) {

-1

src/xrt/tracking/hand/CMakeLists.txt

-31

src/xrt/tracking/hand/old_rgb/CMakeLists.txt

··· 1 - # Copyright 2019-2022, Collabora, Ltd. 2 - # SPDX-License-Identifier: BSL-1.0 3 - 4 - # Old RGB hand tracking library. 5 - add_library( 6 - t_ht_old_rgb STATIC 7 - rgb_hand_math.hpp 8 - rgb_image_math.hpp 9 - rgb_interface.h 10 - rgb_model.hpp 11 - rgb_nms.hpp 12 - rgb_sync.cpp 13 - rgb_sync.hpp 14 - ) 15 - target_link_libraries( 16 - t_ht_old_rgb 17 - PUBLIC aux-includes xrt-external-cjson 18 - PRIVATE 19 - aux_math 20 - aux_tracking 21 - aux_os 22 - aux_util 23 - ONNXRuntime::ONNXRuntime 24 - ${OpenCV_LIBRARIES} 25 - ) 26 - if(XRT_HAVE_OPENCV) 27 - target_include_directories( 28 - t_ht_old_rgb SYSTEM PRIVATE ${OpenCV_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR} 29 - ) 30 - target_link_libraries(t_ht_old_rgb PUBLIC ${OpenCV_LIBRARIES}) 31 - endif()

-90

src/xrt/tracking/hand/old_rgb/readme.md

··· 1 -  7 - 8 - # What is this? 9 - This is a driver to do optical hand tracking. The actual code mostly written by Moses Turner, with tons of help from Marcus Edel, Jakob Bornecrantz, Ryan Pavlik, and Christoph Haag. Jakob Bornecrantz and Marcus Edel are the main people who gathered training data for the initial Collabora models. 10 - 11 - In `main` it only works with Valve Index, although we've used a lot of Luxonis cameras in development. With additional work, it should work fine with devices like the T265, or PS4/PS5 cam, should there be enough interest for any of those. 12 - 13 - Under good lighting, I would say it's around as good as Oculus Quest 2's hand tracking. Not that I'm trying to make any claims; that's just what I honestly would tell somebody if they are wondering if it's worth testing out. 14 - 15 - 16 - # How to get started 17 - ## Get dependencies 18 - ### Get OpenCV 19 - Each distro has its own way to get OpenCV, and it can change at any time; there's no specific reason to trust this documentation over anything else. 20 - 21 - Having said that, on Ubuntu, it would look something like 22 - 23 - ``` 24 - sudo apt install libopencv-dev libopencv-contrib-dev 25 - ``` 26 - 27 - Or you could build it from source, or get it from one of the other 1000s of package managers. Whatever floats your boat. 28 - 29 - ### Get ONNXRuntime 30 - I followed the instructions here: https://onnxruntime.ai/docs/how-to/build/inferencing.html#linux 31 - 32 - then had to do 33 - ``` 34 - cd build/Linux/RelWithDebInfo/ 35 - sudo make install 36 - ``` 37 - 38 - ### Get the ML models 39 - Make sure you have git-lfs installed, then run ./scripts/get-ht-models.sh. Should work fine. 40 - 41 - ## Building the driver 42 - Once onnxruntime is installed, you should be able to build like normal with CMake or Meson. 43 - 44 - If it properly found everything, - CMake should say 45 - 46 - ``` 47 - -- Found ONNXRUNTIME: /usr/local/include/onnxruntime 48 - 49 - [...] 50 - 51 - -- # DRIVER_HANDTRACKING: ON 52 - ``` 53 - 54 - and Meson should say 55 - 56 - ``` 57 - Run-time dependency libonnxruntime found: YES 1.8.2 58 - 59 - [...] 60 - 61 - Message: Configuration done! 62 - Message: drivers: [...] handtracking, [...] 63 - ``` 64 - 65 - ## Running the driver 66 - Currently, it's only set up to work on Valve Index. 67 - 68 - So, the two things you can do are 69 - * Use the `survive` driver with both controllers off - It should automagically start hand tracking upon not finding any controllers. 70 - * Use the `vive` driver with `VIVE_USE_HANDTRACKING=ON` and it should work the same as the survive driver. 71 - 72 - You can see if the driver is working with `openxr-simple-playground`, StereoKit, or any other app you know of. Poke me (Moses) if you find any other cool hand-tracking apps; I'm always looking for more! 73 - 74 - # Tips and tricks 75 - 76 - This tracking likes to be in a bright, evenly-lit room with multiple light sources. Turn all the lights on, see if you can find any lamps. If the ML models can see well, the tracking quality can get surprisingly nice. 77 - 78 - Sometimes, the tracking fails when it can see more than one hand. As the tracking gets better (we train better ML models and squash more bugs) this should happen less often or not at all. If it does, put one of your hands down, and it should resume tracking the remaining hand just fine. 79 - 80 - # Future improvements 81 - 82 - * Get more training data; train better ML models. 83 - * Improve the tracking math 84 - * Be smarter about keeping tracking lock on a hand 85 - * Try predicting the next bounding box based on the estimated keypoints of the last few frames instead of uncritically trusting the detection model, and not run the detection model *every single* frame. 86 - * Instead of directly doing disparity on the observed keypoints, use a kinematic model of the hand and fit that to the 2D observations - this should get rid of a *lot* of jitter and make it look better to the end user if the ML models fail 87 - * Make something that also works with non-stereo (mono, trinocular, or N cameras) camera setups 88 - * Optionally run the ML models on GPU - currently, everything's CPU bound which could be sub-optimal under some circumstances 89 - * Write a lot of generic code so that you can run this on any stereo camera 90 - * More advanced prediction/interpolation code that doesn't care at all about the input frame cadence. One-euro filters are pretty good about this, but we can get better!

-416

src/xrt/tracking/hand/old_rgb/rgb_hand_math.hpp

··· 1 - #pragma once 2 - 3 - // Copyright 2021, Collabora, Ltd. 4 - // SPDX-License-Identifier: BSL-1.0 5 - /*! 6 - * @file 7 - * @brief Helper math to do things with 3D hands for the camera-based hand tracker 8 - * @author Moses Turner <moses@collabora.com> 9 - * @author Nick Klingensmith <programmerpichu@gmail.com> 10 - * @ingroup drv_ht 11 - */ 12 - 13 - #include "math/m_api.h" 14 - #include "math/m_vec3.h" 15 - 16 - #include "rgb_sync.hpp" 17 - #include "util/u_time.h" 18 - #include "xrt/xrt_defines.h" 19 - 20 - static constexpr int num_real_joints = 21; 21 - 22 - float 23 - sumOfHandJointDistances(const Hand3D &one, const Hand3D &two) 24 - { 25 - float dist = 0.0f; 26 - for (int i = 0; i < num_real_joints; i++) { 27 - dist += m_vec3_len(one.kps[i] - two.kps[i]); 28 - } 29 - return dist; 30 - } 31 - 32 - float 33 - errHandHistory(const HandHistory3D &history_hand, const Hand3D &present_hand) 34 - { 35 - // Remember we never have to deal with an empty hand. Can always read the last element. 36 - return sumOfHandJointDistances(history_hand.last_hands_unfiltered.back(), present_hand); 37 - } 38 - 39 - float 40 - errHandDisparity(const Hand2D &left_rays, const Hand2D &right_rays) 41 - { 42 - float error_y_diff = 0.0f; 43 - for (int i = 0; i < 21; i++) { 44 - float diff_y = fabsf(left_rays.kps[i].y - right_rays.kps[i].y); 45 - // Big question about what's the best loss function. Gut feeling was "I should be using sum of squared 46 - // errors" but I don't really know. Using just sum of errors for now. Ideally it'd also be not very 47 - // sensitive to one or two really bad outliers. 48 - error_y_diff += diff_y; 49 - } 50 - // U_LOG_E("stereo camera err is %f, y_disparity is %f", err_stereo_camera, error_y_diff); 51 - return error_y_diff; 52 - } 53 - 54 - void 55 - applyThumbIndexDrag(Hand3D *hand) 56 - { 57 - // TERRIBLE HACK. 58 - // Puts the thumb and pointer a bit closer together to be better at triggering XR clients' pinch detection. 59 - static const float max_radius = 0.05; 60 - static const float min_radius = 0.00; 61 - 62 - // no min drag, min drag always 0. 63 - static const float max_drag = 0.85f; 64 - 65 - xrt_vec3 thumb = hand->kps[THMB_TIP]; 66 - xrt_vec3 index = hand->kps[INDX_TIP]; 67 - xrt_vec3 ttp = index - thumb; 68 - float length = m_vec3_len(ttp); 69 - if ((length > max_radius)) { 70 - return; 71 - } 72 - 73 - 74 - float amount = math_map_ranges(length, min_radius, max_radius, max_drag, 0.0f); 75 - 76 - hand->kps[THMB_TIP] = m_vec3_lerp(thumb, index, amount * 0.5f); 77 - hand->kps[INDX_TIP] = m_vec3_lerp(index, thumb, amount * 0.5f); 78 - } 79 - 80 - static inline xrt_vec3 81 - get_joint_position(struct xrt_hand_joint_set *set, xrt_hand_joint jt) 82 - { 83 - return set->values.hand_joint_set_default[jt].relation.pose.position; 84 - } 85 - 86 - template <size_t N> 87 - static inline void 88 - set_finger(struct xrt_hand_joint_set *set, 89 - const xrt_vec3 &pinky_to_index_prox, 90 - const std::array<xrt_hand_joint, N> &finger) 91 - { 92 - for (size_t i = 0; i < N - 1; i++) { 93 - // Don't do fingertips. (Fingertip would be index 4.) 94 - struct xrt_vec3 forwards = 95 - m_vec3_normalize(get_joint_position(set, finger[i + 1]) - get_joint_position(set, finger[i])); 96 - struct xrt_vec3 backwards = m_vec3_mul_scalar(forwards, -1.0f); 97 - 98 - struct xrt_vec3 left = m_vec3_orthonormalize(forwards, pinky_to_index_prox); 99 - // float dot = m_vec3_dot(backwards, left); 100 - // assert((m_vec3_dot(backwards,left) == 0.0f)); 101 - math_quat_from_plus_x_z(&left, &backwards, 102 - &set->values.hand_joint_set_default[finger[i]].relation.pose.orientation); 103 - } 104 - // Do fingertip! Per XR_EXT_hand_tracking, just copy the distal joint's orientation. Doing anything else 105 - // is wrong. 106 - set->values.hand_joint_set_default[finger[N - 1]].relation.pose.orientation = 107 - set->values.hand_joint_set_default[finger[N - 2]].relation.pose.orientation; 108 - } 109 - 110 - void 111 - applyJointOrientations(struct xrt_hand_joint_set *set, bool is_right) 112 - { 113 - // The real rule to follow is that each joint's "X" axis is along the axis along which it can bend. 114 - // The nature of our estimation makes this a bit difficult, but these should work okay-ish under perfect 115 - // conditions 116 - if (set->is_active == false) { 117 - return; 118 - } 119 - 120 - auto gl = [&](xrt_hand_joint jt) { return get_joint_position(set, jt); }; 121 - 122 - xrt_vec3 pinky_prox = gl(XRT_HAND_JOINT_LITTLE_PROXIMAL); 123 - 124 - xrt_vec3 index_prox = gl(XRT_HAND_JOINT_INDEX_PROXIMAL); 125 - 126 - 127 - xrt_vec3 pinky_to_index_prox = m_vec3_normalize(index_prox - pinky_prox); 128 - if (is_right) { 129 - pinky_to_index_prox = m_vec3_mul_scalar(pinky_to_index_prox, -1.0f); 130 - } 131 - 132 - using Finger = std::array<xrt_hand_joint, 5>; 133 - static const std::array<Finger, 4> fingers_with_joints_in_them = {{ 134 - 135 - {XRT_HAND_JOINT_INDEX_METACARPAL, XRT_HAND_JOINT_INDEX_PROXIMAL, XRT_HAND_JOINT_INDEX_INTERMEDIATE, 136 - XRT_HAND_JOINT_INDEX_DISTAL, XRT_HAND_JOINT_INDEX_TIP}, 137 - 138 - {XRT_HAND_JOINT_MIDDLE_METACARPAL, XRT_HAND_JOINT_MIDDLE_PROXIMAL, XRT_HAND_JOINT_MIDDLE_INTERMEDIATE, 139 - XRT_HAND_JOINT_MIDDLE_DISTAL, XRT_HAND_JOINT_MIDDLE_TIP}, 140 - 141 - {XRT_HAND_JOINT_RING_METACARPAL, XRT_HAND_JOINT_RING_PROXIMAL, XRT_HAND_JOINT_RING_INTERMEDIATE, 142 - XRT_HAND_JOINT_RING_DISTAL, XRT_HAND_JOINT_RING_TIP}, 143 - 144 - {XRT_HAND_JOINT_LITTLE_METACARPAL, XRT_HAND_JOINT_LITTLE_PROXIMAL, XRT_HAND_JOINT_LITTLE_INTERMEDIATE, 145 - XRT_HAND_JOINT_LITTLE_DISTAL, XRT_HAND_JOINT_LITTLE_TIP}, 146 - 147 - }}; 148 - for (Finger const &finger : fingers_with_joints_in_them) { 149 - set_finger(set, pinky_to_index_prox, finger); 150 - } 151 - 152 - // wrist! 153 - // Not the best but acceptable. Eventually, probably, do triangle of wrist pinky prox and index prox. 154 - set->values.hand_joint_set_default[XRT_HAND_JOINT_WRIST].relation.pose.orientation = 155 - set->values.hand_joint_set_default[XRT_HAND_JOINT_MIDDLE_METACARPAL].relation.pose.orientation; 156 - 157 - 158 - // palm! 159 - set->values.hand_joint_set_default[XRT_HAND_JOINT_PALM].relation.pose.orientation = 160 - set->values.hand_joint_set_default[XRT_HAND_JOINT_MIDDLE_METACARPAL].relation.pose.orientation; 161 - 162 - // thumb! 163 - // When I look at Ultraleap tracking, there's like, a "plane" made by the tip, distal and proximal (and kinda 164 - // MCP, but least squares fitting a plane is too hard for my baby brain) Normal to this plane is the +X, and 165 - // obviously forwards to the next joint is the -Z. 166 - xrt_vec3 thumb_prox_to_dist = gl(XRT_HAND_JOINT_THUMB_DISTAL) - gl(XRT_HAND_JOINT_THUMB_PROXIMAL); 167 - xrt_vec3 thumb_dist_to_tip = gl(XRT_HAND_JOINT_THUMB_TIP) - gl(XRT_HAND_JOINT_THUMB_DISTAL); 168 - xrt_vec3 plane_normal{}; 169 - if (!is_right) { 170 - math_vec3_cross(&thumb_prox_to_dist, &thumb_dist_to_tip, &plane_normal); 171 - } else { 172 - math_vec3_cross(&thumb_dist_to_tip, &thumb_prox_to_dist, &plane_normal); 173 - } 174 - constexpr std::array<enum xrt_hand_joint, 4> thumbs = {XRT_HAND_JOINT_THUMB_METACARPAL, 175 - XRT_HAND_JOINT_THUMB_PROXIMAL, 176 - XRT_HAND_JOINT_THUMB_DISTAL, XRT_HAND_JOINT_THUMB_TIP}; 177 - //! @todo this code isn't quite the same as set_finger, can we make it the same so we can use that? 178 - for (int i = 0; i < 3; i++) { 179 - struct xrt_vec3 backwards = 180 - m_vec3_mul_scalar(m_vec3_normalize(gl(thumbs[i + 1]) - gl(thumbs[i])), -1.0f); 181 - 182 - struct xrt_vec3 left = m_vec3_orthonormalize(backwards, plane_normal); 183 - math_quat_from_plus_x_z(&left, &backwards, 184 - &set->values.hand_joint_set_default[thumbs[i]].relation.pose.orientation); 185 - } 186 - struct xrt_quat *tip = &set->values.hand_joint_set_default[XRT_HAND_JOINT_THUMB_TIP].relation.pose.orientation; 187 - struct xrt_quat *distal = 188 - &set->values.hand_joint_set_default[XRT_HAND_JOINT_THUMB_DISTAL].relation.pose.orientation; 189 - memcpy(tip, distal, sizeof(struct xrt_quat)); 190 - } 191 - 192 - float 193 - handednessJointSet(Hand3D *set) 194 - { 195 - // Guess if hand is left or right. 196 - // Left is negative, right is positive. 197 - 198 - 199 - // xrt_vec3 middle_mcp = gl(XRT_HAND_JOINT_MIDDLE_METACARPAL); 200 - 201 - xrt_vec3 pinky_prox = set->kps[LITL_PXM]; // gl(XRT_HAND_JOINT_LITTLE_PROXIMAL); 202 - 203 - xrt_vec3 index_prox = set->kps[INDX_PXM]; // gl(XRT_HAND_JOINT_INDEX_PROXIMAL); 204 - 205 - xrt_vec3 pinky_to_index_prox = m_vec3_normalize(index_prox - pinky_prox); 206 - 207 - float handedness = 0.0f; 208 - 209 - for (int i : {INDX_PXM, MIDL_PXM, RING_PXM, LITL_PXM}) { 210 - xrt_vec3 prox = set->kps[i]; 211 - xrt_vec3 intr = set->kps[i + 1]; 212 - xrt_vec3 dist = set->kps[i + 2]; 213 - xrt_vec3 tip = set->kps[i + 3]; 214 - 215 - xrt_vec3 prox_to_int = m_vec3_normalize(intr - prox); 216 - xrt_vec3 int_to_dist = m_vec3_normalize(dist - intr); 217 - xrt_vec3 dist_to_tip = m_vec3_normalize(tip - dist); 218 - 219 - xrt_vec3 checks[2]; 220 - 221 - math_vec3_cross(&prox_to_int, &int_to_dist, &checks[0]); 222 - math_vec3_cross(&int_to_dist, &dist_to_tip, &checks[1]); 223 - 224 - handedness += m_vec3_dot(m_vec3_normalize(pinky_to_index_prox), (checks[0])); 225 - handedness += m_vec3_dot(m_vec3_normalize(pinky_to_index_prox), (checks[1])); 226 - } 227 - set->handedness = handedness / (4 * 2); 228 - return set->handedness; 229 - } 230 - 231 - void 232 - handednessHandHistory3D(HandHistory3D *history) 233 - { 234 - 235 - float inter = handednessJointSet(&history->last_hands_unfiltered.back()); 236 - 237 - if ((fabsf(inter) > 0.3f) || (fabsf(history->handedness) < 0.3f)) { 238 - history->handedness += inter; 239 - } 240 - static const int max_handedness = 2.0f; 241 - if (history->handedness > max_handedness) { 242 - history->handedness = max_handedness; 243 - } else if (history->handedness < -max_handedness) { 244 - history->handedness = -max_handedness; 245 - } 246 - } 247 - 248 - void 249 - handEuroFiltersInit(HandHistory3D *history, double fc_min, double fc_min_d, double beta) 250 - { 251 - for (int i = 0; i < 21; i++) { 252 - m_filter_euro_vec3_init(&history->filters[i], fc_min, fc_min_d, beta); 253 - } 254 - } 255 - 256 - static double 257 - calc_smoothing_alpha(double Fc, double dt) 258 - { 259 - /* Calculate alpha = (1 / (1 + tau/dt)) where tau = 1.0 / (2 * pi * Fc), 260 - * this is a straight rearrangement with fewer divisions */ 261 - double r = 2.0 * M_PI * Fc * dt; 262 - return r / (r + 1.0); 263 - } 264 - 265 - static double 266 - exp_smooth(double alpha, double y, double prev_y) 267 - { 268 - return alpha * y + (1.0 - alpha) * prev_y; 269 - } 270 - 271 - void 272 - handEuroFiltersRun(struct HandTracking *htd, HandHistory3D *f, Hand3D *out_hand) 273 - { 274 - // Assume present hand is in element 0! 275 - #if 0 276 - // float vals[4] = {0.5, 0.33, 0.1, 0.07}; 277 - float vals[4] = {0.9, 0.09, 0.009, 0.001}; 278 - auto m = f->last_hands_unfiltered.size() - 1; 279 - double ts_out = (vals[0] * (double)f->last_hands_unfiltered.get_at_age(std::min(m, 0))->timestamp) + 280 - (vals[1] * (double)f->last_hands_unfiltered.get_at_age(std::min(m, 1))->timestamp) + 281 - (vals[2] * (double)f->last_hands_unfiltered.get_at_age(std::min(m, 2))->timestamp) + 282 - (vals[3] * (double)f->last_hands_unfiltered.get_at_age(std::min(m, 3))->timestamp); 283 - out_hand->timestamp = (uint64_t)ts_out; 284 - 285 - for (int kp_idx = 0; kp_idx < 21; kp_idx++) { 286 - for (int hist_idx = 0; hist_idx < 4; hist_idx++) { 287 - float *in_y_arr = 288 - (float *)&f->last_hands_unfiltered.get_at_age(std::min(m, hist_idx))->kps[kp_idx]; 289 - float *out_y_arr = (float *)&out_hand->kps[kp_idx]; 290 - for (int i = 0; i < 3; i++) { 291 - out_y_arr[i] += in_y_arr[i] * vals[hist_idx]; 292 - } 293 - } 294 - } 295 - #elif 0 296 - for (int i = 0; i < 21; i++) { 297 - m_filter_euro_vec3_run(&f->filters[i], f->last_hands_unfiltered.back().timestamp, 298 - &f->last_hands_unfiltered.back().kps[i], &out_hand->kps[i]); 299 - } 300 - // conspicuously wrong! 301 - out_hand->timestamp = f->last_hands_unfiltered.back().timestamp; 302 - #else 303 - 304 - if (!f->have_prev_hand) { 305 - f->last_hands_filtered.push_back(f->last_hands_unfiltered.back()); 306 - uint64_t ts = f->last_hands_unfiltered.back().timestamp; 307 - f->prev_ts_for_alpha = ts; 308 - f->first_ts = ts; 309 - f->prev_filtered_ts = ts; 310 - f->prev_dy = 0; 311 - f->have_prev_hand = true; 312 - *out_hand = f->last_hands_unfiltered.back(); 313 - } 314 - uint64_t ts = f->last_hands_unfiltered.back().timestamp; 315 - double dt, alpha_d; 316 - dt = (double)(ts - f->prev_ts_for_alpha) / U_TIME_1S_IN_NS; 317 - 318 - double abs_dy = 319 - (sumOfHandJointDistances(f->last_hands_unfiltered.back(), f->last_hands_filtered.back()) / 21.0f) * 0.7f; 320 - alpha_d = calc_smoothing_alpha(htd->dynamic_config.hand_fc_min_d.val, dt); 321 - 322 - double alpha, fc_cutoff; 323 - f->prev_dy = exp_smooth(alpha_d, abs_dy, f->prev_dy); 324 - 325 - fc_cutoff = htd->dynamic_config.hand_fc_min.val + htd->dynamic_config.hand_beta.val * f->prev_dy; 326 - alpha = calc_smoothing_alpha(fc_cutoff, dt); 327 - HT_DEBUG(htd, "dt is %f, abs_dy is %f, alpha is %f", dt, abs_dy, alpha); 328 - 329 - for (int i = 0; i < 21; i++) { 330 - out_hand->kps[i].x = 331 - exp_smooth(alpha, f->last_hands_unfiltered.back().kps[i].x, f->last_hands_filtered.back().kps[i].x); 332 - out_hand->kps[i].y = 333 - exp_smooth(alpha, f->last_hands_unfiltered.back().kps[i].y, f->last_hands_filtered.back().kps[i].y); 334 - out_hand->kps[i].z = 335 - exp_smooth(alpha, f->last_hands_unfiltered.back().kps[i].z, f->last_hands_filtered.back().kps[i].z); 336 - } 337 - double prev_ts_offset = (double)(f->prev_filtered_ts - f->first_ts); 338 - double current_ts_offset = (double)(ts - f->first_ts); 339 - double new_filtered_ts_offset = exp_smooth(alpha, current_ts_offset, prev_ts_offset); 340 - uint64_t new_filtered_ts = (uint64_t)(new_filtered_ts_offset) + f->first_ts; 341 - out_hand->timestamp = new_filtered_ts; 342 - f->prev_filtered_ts = out_hand->timestamp; 343 - f->prev_ts_for_alpha = ts; // NOT the filtered timestamp. NO. 344 - #endif 345 - } 346 - 347 - bool 348 - rejectTooFar(struct HandTracking *htd, Hand3D *hand) 349 - { 350 - static const float max_dist = 1.0f; // this sucks too - make it bigger if you can. 351 - const float max_dist_from_camera_sqrd = max_dist * max_dist; 352 - for (int i = 0; i < 21; i++) { 353 - xrt_vec3 pos = hand->kps[i]; 354 - float len = m_vec3_len_sqrd(pos); // Faster. 355 - if (len > max_dist_from_camera_sqrd) { 356 - goto reject; 357 - } 358 - } 359 - return true; 360 - 361 - reject: 362 - HT_TRACE(htd, "Rejected too far!"); 363 - return false; 364 - } 365 - 366 - bool 367 - rejectTooClose(struct HandTracking *htd, Hand3D *hand) 368 - { 369 - const float min_dist = 0.12f; // Be a bit aggressive here - it's nice to not let people see our tracking fail 370 - // when the hands are way too close 371 - const float min_dist_from_camera_sqrd = min_dist * min_dist; 372 - 373 - for (int i = 0; i < 21; i++) { 374 - xrt_vec3 pos = hand->kps[i]; 375 - float len = m_vec3_len_sqrd(pos); // Faster. 376 - if (len < min_dist_from_camera_sqrd) { 377 - goto reject; 378 - } 379 - if (pos.z > min_dist) { // remember negative-Z is forward! 380 - goto reject; 381 - } 382 - } 383 - return true; 384 - 385 - reject: 386 - HT_TRACE(htd, "Rejected too close!"); 387 - return false; 388 - } 389 - 390 - bool 391 - rejectTinyPalm(struct HandTracking *htd, Hand3D *hand) 392 - { 393 - // This one sucks, because some people really have tiny hands. If at some point you can stop using it, stop 394 - // using it. 395 - // Weird scoping so that we can still do gotos 396 - 397 - { 398 - float len = m_vec3_len(hand->kps[WRIST] - hand->kps[INDX_PXM]); 399 - if ((len < 0.03f || len > 0.25f)) { 400 - goto reject; 401 - } 402 - } 403 - 404 - { 405 - float len = m_vec3_len(hand->kps[WRIST] - hand->kps[MIDL_PXM]); 406 - if (len < 0.03f || len > 0.25f) { 407 - goto reject; 408 - } 409 - } 410 - 411 - return true; 412 - 413 - reject: 414 - HT_TRACE(htd, "Rejected because too big or too small!"); 415 - return false; 416 - }

-254

src/xrt/tracking/hand/old_rgb/rgb_image_math.hpp

··· 1 - // Copyright 2021, Collabora, Ltd. 2 - // SPDX-License-Identifier: BSL-1.0 3 - /*! 4 - * @file 5 - * @brief Helper math to do things with images for the camera-based hand tracker 6 - * @author Moses Turner <moses@collabora.com> 7 - * @ingroup drv_ht 8 - */ 9 - #pragma once 10 - 11 - #include "math/m_vec2.h" 12 - #include "math/m_vec3.h" 13 - 14 - #include <opencv2/imgproc.hpp> 15 - #include <opencv2/core/mat.hpp> 16 - #include <opencv2/core/types.hpp> 17 - 18 - /*! 19 - * This is a template so that we can use xrt_vec3 or xrt_vec2. 20 - * Please don't use this for anything other than xrt_vec3 or xrt_vec2! 21 - */ 22 - template <typename T> 23 - T 24 - transformVecBy2x3(T in, cv::Matx23f warp_back) 25 - { 26 - T rr; 27 - rr.x = (in.x * warp_back(0, 0)) + (in.y * warp_back(0, 1)) + warp_back(0, 2); 28 - rr.y = (in.x * warp_back(1, 0)) + (in.y * warp_back(1, 1)) + warp_back(1, 2); 29 - return rr; 30 - } 31 - 32 - cv::Scalar 33 - hsv2rgb(float fH, float fS, float fV) 34 - { 35 - const float fC = fV * fS; // Chroma 36 - const float fHPrime = fmod(fH / 60.0, 6); 37 - const float fX = fC * (1 - fabs(fmod(fHPrime, 2) - 1)); 38 - const float fM = fV - fC; 39 - 40 - float fR, fG, fB; 41 - 42 - if (0 <= fHPrime && fHPrime < 1) { 43 - fR = fC; 44 - fG = fX; 45 - fB = 0; 46 - } else if (1 <= fHPrime && fHPrime < 2) { 47 - fR = fX; 48 - fG = fC; 49 - fB = 0; 50 - } else if (2 <= fHPrime && fHPrime < 3) { 51 - fR = 0; 52 - fG = fC; 53 - fB = fX; 54 - } else if (3 <= fHPrime && fHPrime < 4) { 55 - fR = 0; 56 - fG = fX; 57 - fB = fC; 58 - } else if (4 <= fHPrime && fHPrime < 5) { 59 - fR = fX; 60 - fG = 0; 61 - fB = fC; 62 - } else if (5 <= fHPrime && fHPrime < 6) { 63 - fR = fC; 64 - fG = 0; 65 - fB = fX; 66 - } else { 67 - fR = 0; 68 - fG = 0; 69 - fB = 0; 70 - } 71 - 72 - fR += fM; 73 - fG += fM; 74 - fB += fM; 75 - return {fR * 255.0f, fG * 255.0f, fB * 255.0f}; 76 - } 77 - 78 - struct xrt_vec3 79 - raycoord(struct ht_view *htv, struct xrt_vec3 model_out) 80 - { 81 - cv::Mat in_px_coords(1, 1, CV_32FC2); 82 - float *write_in; 83 - write_in = in_px_coords.ptr<float>(0); 84 - write_in[0] = model_out.x; 85 - write_in[1] = model_out.y; 86 - cv::Mat out_ray(1, 1, CV_32FC2); 87 - 88 - cv::fisheye::undistortPoints(in_px_coords, out_ray, htv->cameraMatrix, htv->distortion); 89 - 90 - 91 - float n_x = out_ray.at<float>(0, 0); 92 - float n_y = out_ray.at<float>(0, 1); 93 - 94 - 95 - struct xrt_vec3 n = {n_x, n_y, 1.0f}; 96 - 97 - cv::Matx33f R = htv->rotate_camera_to_stereo_camera; 98 - 99 - struct xrt_vec3 o = { 100 - (n.x * R(0, 0)) + (n.y * R(0, 1)) + (n.z * R(0, 2)), 101 - (n.x * R(1, 0)) + (n.y * R(1, 1)) + (n.z * R(1, 2)), 102 - (n.x * R(2, 0)) + (n.y * R(2, 1)) + (n.z * R(2, 2)), 103 - }; 104 - 105 - math_vec3_scalar_mul(1.0f / o.z, &o); 106 - return o; 107 - } 108 - 109 - cv::Matx23f 110 - blackbar(const cv::Mat &in, cv::Mat &out, xrt_size out_size) 111 - { 112 - #if 1 113 - // Easy to think about, always right, but pretty slow: 114 - // Get a matrix from the original to the scaled down / blackbar'd image, then get one that goes back. 115 - // Then just warpAffine() it. 116 - // Easy in programmer time - never have to worry about off by one, special cases. We can come back and optimize 117 - // later. 118 - 119 - // Do the black bars need to be on top and bottom, or on left and right? 120 - float scale_down_w = (float)out_size.w / (float)in.cols; // 128/1280 = 0.1 121 - float scale_down_h = (float)out_size.h / (float)in.rows; // 128/800 = 0.16 122 - 123 - float scale_down = fmin(scale_down_w, scale_down_h); // 0.1 124 - 125 - float width_inside = (float)in.cols * scale_down; 126 - float height_inside = (float)in.rows * scale_down; 127 - 128 - float translate_x = (out_size.w - width_inside) / 2; // should be 0 for 1280x800 129 - float translate_y = (out_size.h - height_inside) / 2; // should be (1280-800)/2 = 240 130 - 131 - cv::Matx23f go; 132 - // clang-format off 133 - go(0,0) = scale_down; go(0,1) = 0.0f; go(0,2) = translate_x; 134 - go(1,0) = 0.0f; go(1,1) = scale_down; go(1,2) = translate_y; 135 - // clang-format on 136 - 137 - cv::warpAffine(in, out, go, cv::Size(out_size.w, out_size.h)); 138 - 139 - cv::Matx23f ret; 140 - 141 - // clang-format off 142 - ret(0,0) = 1.0f/scale_down; ret(0,1) = 0.0f; ret(0,2) = -translate_x/scale_down; 143 - ret(1,0) = 0.0f; ret(1,1) = 1.0f/scale_down; ret(1,2) = -translate_y/scale_down; 144 - // clang-format on 145 - 146 - return ret; 147 - #else 148 - // Fast, always wrong if the input isn't square. You'd end up using something like this, plus some 149 - // copyMakeBorder if you want to optimize. 150 - if (aspect_ratio_input == aspect_ratio_output) { 151 - cv::resize(in, out, {out_size.w, out_size.h}); 152 - cv::Matx23f ret; 153 - float scale_from_out_to_in = (float)in.cols / (float)out_size.w; 154 - // clang-format off 155 - ret(0,0) = scale_from_out_to_in; ret(0,1) = 0.0f; ret(0,2) = 0.0f; 156 - ret(1,0) = 0.0f; ret(1,1) = scale_from_out_to_in; ret(1,2) = 0.0f; 157 - // clang-format on 158 - cv::imshow("hi", out); 159 - cv::waitKey(1); 160 - return ret; 161 - } 162 - assert(!"Uh oh! Unimplemented!"); 163 - return {}; 164 - #endif 165 - } 166 - 167 - void 168 - handDot(cv::Mat &mat, xrt_vec2 place, float radius, float hue, float intensity, int type) 169 - { 170 - cv::circle(mat, {(int)place.x, (int)place.y}, radius, hsv2rgb(hue * 360.0f, intensity, intensity), type); 171 - } 172 - 173 - void 174 - centerAndRotationFromJoints(struct ht_view *htv, 175 - const xrt_vec2 *wrist, 176 - const xrt_vec2 *index, 177 - const xrt_vec2 *middle, 178 - const xrt_vec2 *little, 179 - xrt_vec2 *out_center, 180 - xrt_vec2 *out_wrist_to_middle) 181 - { 182 - // Close to what Mediapipe does, but slightly different - just uses the middle proximal instead of "estimating" 183 - // it from the pinky and index. 184 - // at the end of the day I should probably do that basis vector filtering thing to get a nicer middle metacarpal 185 - // from 6 keypoints (not thumb proximal) OR SHOULD I. because distortion. hmm 186 - 187 - // Feel free to look at the way MP does it, you can see it's different. 188 - // https://github.com/google/mediapipe/blob/master/mediapipe/modules/holistic_landmark/calculators/hand_detections_from_pose_to_rects_calculator.cc 189 - 190 - // struct xrt_vec2 hand_center = m_vec2_mul_scalar(middle, 0.5) + m_vec2_mul_scalar(index, 0.5*(2.0f/3.0f)) + 191 - // m_vec2_mul_scalar(little, 0.5f*((1.0f/3.0f))); // Middle proximal, straight-up. 192 - // U_LOG_E("%f %f %f %f %f %f %f %f ", wrist.x, wrist.y, index.x, index.y, middle.x, middle.y, little.x, 193 - // little.y); 194 - *out_center = m_vec2_lerp(*middle, m_vec2_lerp(*index, *little, 1.0f / 3.0f), 0.25f); 195 - 196 - *out_wrist_to_middle = *out_center - *wrist; 197 - } 198 - 199 - struct DetectionModelOutput 200 - rotatedRectFromJoints(struct ht_view *htv, xrt_vec2 center, xrt_vec2 wrist_to_middle, DetectionModelOutput *out) 201 - { 202 - float box_size = m_vec2_len(wrist_to_middle) * 2.0f * 1.73f; 203 - 204 - double rot = atan2(wrist_to_middle.x, wrist_to_middle.y) * (-180.0f / M_PI); 205 - 206 - out->rotation = rot; 207 - out->size = box_size; 208 - out->center = center; 209 - 210 - cv::RotatedRect rrect = 211 - cv::RotatedRect(cv::Point2f(out->center.x, out->center.y), cv::Size2f(out->size, out->size), out->rotation); 212 - 213 - 214 - cv::Point2f vertices[4]; 215 - rrect.points(vertices); 216 - if (htv->htd->debug_scribble && htv->htd->dynamic_config.scribble_bounding_box) { 217 - for (int i = 0; i < 4; i++) { 218 - cv::Scalar b = cv::Scalar(10, 30, 30); 219 - if (i == 3) { 220 - b = cv::Scalar(255, 255, 0); 221 - } 222 - cv::line(htv->debug_out_to_this, vertices[i], vertices[(i + 1) % 4], b, 2); 223 - } 224 - } 225 - // topright is 0. bottomright is 1. bottomleft is 2. topleft is 3. 226 - 227 - cv::Point2f src_tri[3] = {vertices[3], vertices[2], vertices[1]}; // top-left, bottom-left, bottom-right 228 - 229 - cv::Point2f dest_tri[3] = {cv::Point2f(0, 0), cv::Point2f(0, 224), cv::Point2f(224, 224)}; 230 - 231 - out->warp_there = getAffineTransform(src_tri, dest_tri); 232 - out->warp_back = getAffineTransform(dest_tri, src_tri); 233 - 234 - // out->wrist = wrist; 235 - 236 - return *out; 237 - } 238 - 239 - void 240 - planarize(const cv::Mat &input, uint8_t *output) 241 - { 242 - // output better be the right size, because we are not doing any bounds checking! 243 - assert(input.isContinuous()); 244 - int lix = input.cols; 245 - int liy = input.rows; 246 - cv::Mat planes[3]; 247 - cv::split(input, planes); 248 - cv::Mat red = planes[0]; 249 - cv::Mat green = planes[1]; 250 - cv::Mat blue = planes[2]; 251 - memcpy(output, red.data, lix * liy); 252 - memcpy(output + (lix * liy), green.data, lix * liy); 253 - memcpy(output + (lix * liy * 2), blue.data, lix * liy); 254 - }

-29

src/xrt/tracking/hand/old_rgb/rgb_interface.h

··· 1 - // Copyright 2022, Collabora, Ltd. 2 - // SPDX-License-Identifier: BSL-1.0 3 - /*! 4 - * @file 5 - * @brief Public interface of old rgb hand tracking. 6 - * @author Jakob Bornecrantz <jakob@collabora.com> 7 - * @ingroup aux_tracking 8 - */ 9 - 10 - #include "tracking/t_tracking.h" 11 - #include "tracking/t_hand_tracking.h" 12 - 13 - #ifdef __cplusplus 14 - extern "C" { 15 - #endif 16 - 17 - 18 - /*! 19 - * Create a old style RGB hand tracking pipeline. 20 - * 21 - * @ingroup aux_tracking 22 - */ 23 - struct t_hand_tracking_sync * 24 - t_hand_tracking_sync_old_rgb_create(struct t_stereo_camera_calibration *calib); 25 - 26 - 27 - #ifdef __cplusplus 28 - } // extern "C" 29 - #endif

-646

src/xrt/tracking/hand/old_rgb/rgb_model.hpp

··· 1 - // Copyright 2021, Collabora, Ltd. 2 - // SPDX-License-Identifier: BSL-1.0 3 - /*! 4 - * @file 5 - * @brief Code to run machine learning models for camera-based hand tracker. 6 - * @author Moses Turner <moses@collabora.com> 7 - * @author Marcus Edel <marcus.edel@collabora.com> 8 - * @author Simon Zeni <simon@bl4ckb0ne.ca> 9 - * @ingroup drv_ht 10 - */ 11 - 12 - // Many C api things were stolen from here (MIT license): 13 - // https://github.com/microsoft/onnxruntime-inference-examples/blob/main/c_cxx/fns_candy_style_transfer/fns_candy_style_transfer.c 14 - #pragma once 15 - 16 - #include "rgb_sync.hpp" 17 - #include "rgb_image_math.hpp" 18 - #include "rgb_nms.hpp" 19 - 20 - #include <onnxruntime_c_api.h> 21 - 22 - #include <filesystem> 23 - #include <array> 24 - 25 - #undef HEAVY_SCRIBBLE 26 - 27 - // forward-declare 28 - struct OrtApi; 29 - struct OrtEnv; 30 - struct OrtMemoryInfo; 31 - struct OrtSession; 32 - struct OrtSessionOptions; 33 - struct OrtValue; 34 - 35 - namespace xrt::tracking::hand::old_rgb { 36 - 37 - 38 - // struct ht_device; 39 - 40 - class ht_model 41 - { 42 - HandTracking *device = nullptr; 43 - 44 - const OrtApi *api = nullptr; 45 - OrtEnv *env = nullptr; 46 - 47 - OrtMemoryInfo *palm_detection_meminfo = nullptr; 48 - OrtSession *palm_detection_session = nullptr; 49 - OrtValue *palm_detection_tensor = nullptr; 50 - std::array<float, 3 * 128 * 128> palm_detection_data; 51 - 52 - std::mutex hand_landmark_lock; 53 - OrtMemoryInfo *hand_landmark_meminfo = nullptr; 54 - OrtSession *hand_landmark_session = nullptr; 55 - OrtValue *hand_landmark_tensor = nullptr; 56 - std::array<float, 3 * 224 * 224> hand_landmark_data; 57 - 58 - void 59 - init_palm_detection(OrtSessionOptions *opts); 60 - void 61 - init_hand_landmark(OrtSessionOptions *opts); 62 - 63 - public: 64 - ht_model(struct HandTracking *htd); 65 - ~ht_model(); 66 - 67 - std::vector<Palm7KP> 68 - palm_detection(ht_view *htv, const cv::Mat &input); 69 - Hand2D 70 - hand_landmark(const cv::Mat input); 71 - }; 72 - 73 - 74 - /* 75 - * Anchors data taken from mediapipe's palm detection, used for single-shot detector model. 76 - * 77 - * See: 78 - * https://google.github.io/mediapipe/solutions/hands.html#palm-detection-model 79 - * https://github.com/google/mediapipe/blob/v0.8.8/mediapipe/calculators/tflite/ssd_anchors_calculator.cc#L101 80 - * https://github.com/google/mediapipe/blob/v0.8.8/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt#L60 81 - */ 82 - struct anchor 83 - { 84 - float x, y; 85 - }; 86 - 87 - static const struct anchor anchors[896]{ 88 - {0.031250, 0.031250}, {0.031250, 0.031250}, {0.093750, 0.031250}, {0.093750, 0.031250}, // 89 - {0.156250, 0.031250}, {0.156250, 0.031250}, {0.218750, 0.031250}, {0.218750, 0.031250}, // 90 - {0.281250, 0.031250}, {0.281250, 0.031250}, {0.343750, 0.031250}, {0.343750, 0.031250}, // 91 - {0.406250, 0.031250}, {0.406250, 0.031250}, {0.468750, 0.031250}, {0.468750, 0.031250}, // 92 - {0.531250, 0.031250}, {0.531250, 0.031250}, {0.593750, 0.031250}, {0.593750, 0.031250}, // 93 - {0.656250, 0.031250}, {0.656250, 0.031250}, {0.718750, 0.031250}, {0.718750, 0.031250}, // 94 - {0.781250, 0.031250}, {0.781250, 0.031250}, {0.843750, 0.031250}, {0.843750, 0.031250}, // 95 - {0.906250, 0.031250}, {0.906250, 0.031250}, {0.968750, 0.031250}, {0.968750, 0.031250}, // 96 - {0.031250, 0.093750}, {0.031250, 0.093750}, {0.093750, 0.093750}, {0.093750, 0.093750}, // 97 - {0.156250, 0.093750}, {0.156250, 0.093750}, {0.218750, 0.093750}, {0.218750, 0.093750}, // 98 - {0.281250, 0.093750}, {0.281250, 0.093750}, {0.343750, 0.093750}, {0.343750, 0.093750}, // 99 - {0.406250, 0.093750}, {0.406250, 0.093750}, {0.468750, 0.093750}, {0.468750, 0.093750}, // 100 - {0.531250, 0.093750}, {0.531250, 0.093750}, {0.593750, 0.093750}, {0.593750, 0.093750}, // 101 - {0.656250, 0.093750}, {0.656250, 0.093750}, {0.718750, 0.093750}, {0.718750, 0.093750}, // 102 - {0.781250, 0.093750}, {0.781250, 0.093750}, {0.843750, 0.093750}, {0.843750, 0.093750}, // 103 - {0.906250, 0.093750}, {0.906250, 0.093750}, {0.968750, 0.093750}, {0.968750, 0.093750}, // 104 - {0.031250, 0.156250}, {0.031250, 0.156250}, {0.093750, 0.156250}, {0.093750, 0.156250}, // 105 - {0.156250, 0.156250}, {0.156250, 0.156250}, {0.218750, 0.156250}, {0.218750, 0.156250}, // 106 - {0.281250, 0.156250}, {0.281250, 0.156250}, {0.343750, 0.156250}, {0.343750, 0.156250}, // 107 - {0.406250, 0.156250}, {0.406250, 0.156250}, {0.468750, 0.156250}, {0.468750, 0.156250}, // 108 - {0.531250, 0.156250}, {0.531250, 0.156250}, {0.593750, 0.156250}, {0.593750, 0.156250}, // 109 - {0.656250, 0.156250}, {0.656250, 0.156250}, {0.718750, 0.156250}, {0.718750, 0.156250}, // 110 - {0.781250, 0.156250}, {0.781250, 0.156250}, {0.843750, 0.156250}, {0.843750, 0.156250}, // 111 - {0.906250, 0.156250}, {0.906250, 0.156250}, {0.968750, 0.156250}, {0.968750, 0.156250}, // 112 - {0.031250, 0.218750}, {0.031250, 0.218750}, {0.093750, 0.218750}, {0.093750, 0.218750}, // 113 - {0.156250, 0.218750}, {0.156250, 0.218750}, {0.218750, 0.218750}, {0.218750, 0.218750}, // 114 - {0.281250, 0.218750}, {0.281250, 0.218750}, {0.343750, 0.218750}, {0.343750, 0.218750}, // 115 - {0.406250, 0.218750}, {0.406250, 0.218750}, {0.468750, 0.218750}, {0.468750, 0.218750}, // 116 - {0.531250, 0.218750}, {0.531250, 0.218750}, {0.593750, 0.218750}, {0.593750, 0.218750}, // 117 - {0.656250, 0.218750}, {0.656250, 0.218750}, {0.718750, 0.218750}, {0.718750, 0.218750}, // 118 - {0.781250, 0.218750}, {0.781250, 0.218750}, {0.843750, 0.218750}, {0.843750, 0.218750}, // 119 - {0.906250, 0.218750}, {0.906250, 0.218750}, {0.968750, 0.218750}, {0.968750, 0.218750}, // 120 - {0.031250, 0.281250}, {0.031250, 0.281250}, {0.093750, 0.281250}, {0.093750, 0.281250}, // 121 - {0.156250, 0.281250}, {0.156250, 0.281250}, {0.218750, 0.281250}, {0.218750, 0.281250}, // 122 - {0.281250, 0.281250}, {0.281250, 0.281250}, {0.343750, 0.281250}, {0.343750, 0.281250}, // 123 - {0.406250, 0.281250}, {0.406250, 0.281250}, {0.468750, 0.281250}, {0.468750, 0.281250}, // 124 - {0.531250, 0.281250}, {0.531250, 0.281250}, {0.593750, 0.281250}, {0.593750, 0.281250}, // 125 - {0.656250, 0.281250}, {0.656250, 0.281250}, {0.718750, 0.281250}, {0.718750, 0.281250}, // 126 - {0.781250, 0.281250}, {0.781250, 0.281250}, {0.843750, 0.281250}, {0.843750, 0.281250}, // 127 - {0.906250, 0.281250}, {0.906250, 0.281250}, {0.968750, 0.281250}, {0.968750, 0.281250}, // 128 - {0.031250, 0.343750}, {0.031250, 0.343750}, {0.093750, 0.343750}, {0.093750, 0.343750}, // 129 - {0.156250, 0.343750}, {0.156250, 0.343750}, {0.218750, 0.343750}, {0.218750, 0.343750}, // 130 - {0.281250, 0.343750}, {0.281250, 0.343750}, {0.343750, 0.343750}, {0.343750, 0.343750}, // 131 - {0.406250, 0.343750}, {0.406250, 0.343750}, {0.468750, 0.343750}, {0.468750, 0.343750}, // 132 - {0.531250, 0.343750}, {0.531250, 0.343750}, {0.593750, 0.343750}, {0.593750, 0.343750}, // 133 - {0.656250, 0.343750}, {0.656250, 0.343750}, {0.718750, 0.343750}, {0.718750, 0.343750}, // 134 - {0.781250, 0.343750}, {0.781250, 0.343750}, {0.843750, 0.343750}, {0.843750, 0.343750}, // 135 - {0.906250, 0.343750}, {0.906250, 0.343750}, {0.968750, 0.343750}, {0.968750, 0.343750}, // 136 - {0.031250, 0.406250}, {0.031250, 0.406250}, {0.093750, 0.406250}, {0.093750, 0.406250}, // 137 - {0.156250, 0.406250}, {0.156250, 0.406250}, {0.218750, 0.406250}, {0.218750, 0.406250}, // 138 - {0.281250, 0.406250}, {0.281250, 0.406250}, {0.343750, 0.406250}, {0.343750, 0.406250}, // 139 - {0.406250, 0.406250}, {0.406250, 0.406250}, {0.468750, 0.406250}, {0.468750, 0.406250}, // 140 - {0.531250, 0.406250}, {0.531250, 0.406250}, {0.593750, 0.406250}, {0.593750, 0.406250}, // 141 - {0.656250, 0.406250}, {0.656250, 0.406250}, {0.718750, 0.406250}, {0.718750, 0.406250}, // 142 - {0.781250, 0.406250}, {0.781250, 0.406250}, {0.843750, 0.406250}, {0.843750, 0.406250}, // 143 - {0.906250, 0.406250}, {0.906250, 0.406250}, {0.968750, 0.406250}, {0.968750, 0.406250}, // 144 - {0.031250, 0.468750}, {0.031250, 0.468750}, {0.093750, 0.468750}, {0.093750, 0.468750}, // 145 - {0.156250, 0.468750}, {0.156250, 0.468750}, {0.218750, 0.468750}, {0.218750, 0.468750}, // 146 - {0.281250, 0.468750}, {0.281250, 0.468750}, {0.343750, 0.468750}, {0.343750, 0.468750}, // 147 - {0.406250, 0.468750}, {0.406250, 0.468750}, {0.468750, 0.468750}, {0.468750, 0.468750}, // 148 - {0.531250, 0.468750}, {0.531250, 0.468750}, {0.593750, 0.468750}, {0.593750, 0.468750}, // 149 - {0.656250, 0.468750}, {0.656250, 0.468750}, {0.718750, 0.468750}, {0.718750, 0.468750}, // 150 - {0.781250, 0.468750}, {0.781250, 0.468750}, {0.843750, 0.468750}, {0.843750, 0.468750}, // 151 - {0.906250, 0.468750}, {0.906250, 0.468750}, {0.968750, 0.468750}, {0.968750, 0.468750}, // 152 - {0.031250, 0.531250}, {0.031250, 0.531250}, {0.093750, 0.531250}, {0.093750, 0.531250}, // 153 - {0.156250, 0.531250}, {0.156250, 0.531250}, {0.218750, 0.531250}, {0.218750, 0.531250}, // 154 - {0.281250, 0.531250}, {0.281250, 0.531250}, {0.343750, 0.531250}, {0.343750, 0.531250}, // 155 - {0.406250, 0.531250}, {0.406250, 0.531250}, {0.468750, 0.531250}, {0.468750, 0.531250}, // 156 - {0.531250, 0.531250}, {0.531250, 0.531250}, {0.593750, 0.531250}, {0.593750, 0.531250}, // 157 - {0.656250, 0.531250}, {0.656250, 0.531250}, {0.718750, 0.531250}, {0.718750, 0.531250}, // 158 - {0.781250, 0.531250}, {0.781250, 0.531250}, {0.843750, 0.531250}, {0.843750, 0.531250}, // 159 - {0.906250, 0.531250}, {0.906250, 0.531250}, {0.968750, 0.531250}, {0.968750, 0.531250}, // 160 - {0.031250, 0.593750}, {0.031250, 0.593750}, {0.093750, 0.593750}, {0.093750, 0.593750}, // 161 - {0.156250, 0.593750}, {0.156250, 0.593750}, {0.218750, 0.593750}, {0.218750, 0.593750}, // 162 - {0.281250, 0.593750}, {0.281250, 0.593750}, {0.343750, 0.593750}, {0.343750, 0.593750}, // 163 - {0.406250, 0.593750}, {0.406250, 0.593750}, {0.468750, 0.593750}, {0.468750, 0.593750}, // 164 - {0.531250, 0.593750}, {0.531250, 0.593750}, {0.593750, 0.593750}, {0.593750, 0.593750}, // 165 - {0.656250, 0.593750}, {0.656250, 0.593750}, {0.718750, 0.593750}, {0.718750, 0.593750}, // 166 - {0.781250, 0.593750}, {0.781250, 0.593750}, {0.843750, 0.593750}, {0.843750, 0.593750}, // 167 - {0.906250, 0.593750}, {0.906250, 0.593750}, {0.968750, 0.593750}, {0.968750, 0.593750}, // 168 - {0.031250, 0.656250}, {0.031250, 0.656250}, {0.093750, 0.656250}, {0.093750, 0.656250}, // 169 - {0.156250, 0.656250}, {0.156250, 0.656250}, {0.218750, 0.656250}, {0.218750, 0.656250}, // 170 - {0.281250, 0.656250}, {0.281250, 0.656250}, {0.343750, 0.656250}, {0.343750, 0.656250}, // 171 - {0.406250, 0.656250}, {0.406250, 0.656250}, {0.468750, 0.656250}, {0.468750, 0.656250}, // 172 - {0.531250, 0.656250}, {0.531250, 0.656250}, {0.593750, 0.656250}, {0.593750, 0.656250}, // 173 - {0.656250, 0.656250}, {0.656250, 0.656250}, {0.718750, 0.656250}, {0.718750, 0.656250}, // 174 - {0.781250, 0.656250}, {0.781250, 0.656250}, {0.843750, 0.656250}, {0.843750, 0.656250}, // 175 - {0.906250, 0.656250}, {0.906250, 0.656250}, {0.968750, 0.656250}, {0.968750, 0.656250}, // 176 - {0.031250, 0.718750}, {0.031250, 0.718750}, {0.093750, 0.718750}, {0.093750, 0.718750}, // 177 - {0.156250, 0.718750}, {0.156250, 0.718750}, {0.218750, 0.718750}, {0.218750, 0.718750}, // 178 - {0.281250, 0.718750}, {0.281250, 0.718750}, {0.343750, 0.718750}, {0.343750, 0.718750}, // 179 - {0.406250, 0.718750}, {0.406250, 0.718750}, {0.468750, 0.718750}, {0.468750, 0.718750}, // 180 - {0.531250, 0.718750}, {0.531250, 0.718750}, {0.593750, 0.718750}, {0.593750, 0.718750}, // 181 - {0.656250, 0.718750}, {0.656250, 0.718750}, {0.718750, 0.718750}, {0.718750, 0.718750}, // 182 - {0.781250, 0.718750}, {0.781250, 0.718750}, {0.843750, 0.718750}, {0.843750, 0.718750}, // 183 - {0.906250, 0.718750}, {0.906250, 0.718750}, {0.968750, 0.718750}, {0.968750, 0.718750}, // 184 - {0.031250, 0.781250}, {0.031250, 0.781250}, {0.093750, 0.781250}, {0.093750, 0.781250}, // 185 - {0.156250, 0.781250}, {0.156250, 0.781250}, {0.218750, 0.781250}, {0.218750, 0.781250}, // 186 - {0.281250, 0.781250}, {0.281250, 0.781250}, {0.343750, 0.781250}, {0.343750, 0.781250}, // 187 - {0.406250, 0.781250}, {0.406250, 0.781250}, {0.468750, 0.781250}, {0.468750, 0.781250}, // 188 - {0.531250, 0.781250}, {0.531250, 0.781250}, {0.593750, 0.781250}, {0.593750, 0.781250}, // 189 - {0.656250, 0.781250}, {0.656250, 0.781250}, {0.718750, 0.781250}, {0.718750, 0.781250}, // 190 - {0.781250, 0.781250}, {0.781250, 0.781250}, {0.843750, 0.781250}, {0.843750, 0.781250}, // 191 - {0.906250, 0.781250}, {0.906250, 0.781250}, {0.968750, 0.781250}, {0.968750, 0.781250}, // 192 - {0.031250, 0.843750}, {0.031250, 0.843750}, {0.093750, 0.843750}, {0.093750, 0.843750}, // 193 - {0.156250, 0.843750}, {0.156250, 0.843750}, {0.218750, 0.843750}, {0.218750, 0.843750}, // 194 - {0.281250, 0.843750}, {0.281250, 0.843750}, {0.343750, 0.843750}, {0.343750, 0.843750}, // 195 - {0.406250, 0.843750}, {0.406250, 0.843750}, {0.468750, 0.843750}, {0.468750, 0.843750}, // 196 - {0.531250, 0.843750}, {0.531250, 0.843750}, {0.593750, 0.843750}, {0.593750, 0.843750}, // 197 - {0.656250, 0.843750}, {0.656250, 0.843750}, {0.718750, 0.843750}, {0.718750, 0.843750}, // 198 - {0.781250, 0.843750}, {0.781250, 0.843750}, {0.843750, 0.843750}, {0.843750, 0.843750}, // 199 - {0.906250, 0.843750}, {0.906250, 0.843750}, {0.968750, 0.843750}, {0.968750, 0.843750}, // 200 - {0.031250, 0.906250}, {0.031250, 0.906250}, {0.093750, 0.906250}, {0.093750, 0.906250}, // 201 - {0.156250, 0.906250}, {0.156250, 0.906250}, {0.218750, 0.906250}, {0.218750, 0.906250}, // 202 - {0.281250, 0.906250}, {0.281250, 0.906250}, {0.343750, 0.906250}, {0.343750, 0.906250}, // 203 - {0.406250, 0.906250}, {0.406250, 0.906250}, {0.468750, 0.906250}, {0.468750, 0.906250}, // 204 - {0.531250, 0.906250}, {0.531250, 0.906250}, {0.593750, 0.906250}, {0.593750, 0.906250}, // 205 - {0.656250, 0.906250}, {0.656250, 0.906250}, {0.718750, 0.906250}, {0.718750, 0.906250}, // 206 - {0.781250, 0.906250}, {0.781250, 0.906250}, {0.843750, 0.906250}, {0.843750, 0.906250}, // 207 - {0.906250, 0.906250}, {0.906250, 0.906250}, {0.968750, 0.906250}, {0.968750, 0.906250}, // 208 - {0.031250, 0.968750}, {0.031250, 0.968750}, {0.093750, 0.968750}, {0.093750, 0.968750}, // 209 - {0.156250, 0.968750}, {0.156250, 0.968750}, {0.218750, 0.968750}, {0.218750, 0.968750}, // 210 - {0.281250, 0.968750}, {0.281250, 0.968750}, {0.343750, 0.968750}, {0.343750, 0.968750}, // 211 - {0.406250, 0.968750}, {0.406250, 0.968750}, {0.468750, 0.968750}, {0.468750, 0.968750}, // 212 - {0.531250, 0.968750}, {0.531250, 0.968750}, {0.593750, 0.968750}, {0.593750, 0.968750}, // 213 - {0.656250, 0.968750}, {0.656250, 0.968750}, {0.718750, 0.968750}, {0.718750, 0.968750}, // 214 - {0.781250, 0.968750}, {0.781250, 0.968750}, {0.843750, 0.968750}, {0.843750, 0.968750}, // 215 - {0.906250, 0.968750}, {0.906250, 0.968750}, {0.968750, 0.968750}, {0.968750, 0.968750}, // 216 - {0.062500, 0.062500}, {0.062500, 0.062500}, {0.062500, 0.062500}, {0.062500, 0.062500}, // 217 - {0.062500, 0.062500}, {0.062500, 0.062500}, {0.187500, 0.062500}, {0.187500, 0.062500}, // 218 - {0.187500, 0.062500}, {0.187500, 0.062500}, {0.187500, 0.062500}, {0.187500, 0.062500}, // 219 - {0.312500, 0.062500}, {0.312500, 0.062500}, {0.312500, 0.062500}, {0.312500, 0.062500}, // 220 - {0.312500, 0.062500}, {0.312500, 0.062500}, {0.437500, 0.062500}, {0.437500, 0.062500}, // 221 - {0.437500, 0.062500}, {0.437500, 0.062500}, {0.437500, 0.062500}, {0.437500, 0.062500}, // 222 - {0.562500, 0.062500}, {0.562500, 0.062500}, {0.562500, 0.062500}, {0.562500, 0.062500}, // 223 - {0.562500, 0.062500}, {0.562500, 0.062500}, {0.687500, 0.062500}, {0.687500, 0.062500}, // 224 - {0.687500, 0.062500}, {0.687500, 0.062500}, {0.687500, 0.062500}, {0.687500, 0.062500}, // 225 - {0.812500, 0.062500}, {0.812500, 0.062500}, {0.812500, 0.062500}, {0.812500, 0.062500}, // 226 - {0.812500, 0.062500}, {0.812500, 0.062500}, {0.937500, 0.062500}, {0.937500, 0.062500}, // 227 - {0.937500, 0.062500}, {0.937500, 0.062500}, {0.937500, 0.062500}, {0.937500, 0.062500}, // 228 - {0.062500, 0.187500}, {0.062500, 0.187500}, {0.062500, 0.187500}, {0.062500, 0.187500}, // 229 - {0.062500, 0.187500}, {0.062500, 0.187500}, {0.187500, 0.187500}, {0.187500, 0.187500}, // 230 - {0.187500, 0.187500}, {0.187500, 0.187500}, {0.187500, 0.187500}, {0.187500, 0.187500}, // 231 - {0.312500, 0.187500}, {0.312500, 0.187500}, {0.312500, 0.187500}, {0.312500, 0.187500}, // 232 - {0.312500, 0.187500}, {0.312500, 0.187500}, {0.437500, 0.187500}, {0.437500, 0.187500}, // 233 - {0.437500, 0.187500}, {0.437500, 0.187500}, {0.437500, 0.187500}, {0.437500, 0.187500}, // 234 - {0.562500, 0.187500}, {0.562500, 0.187500}, {0.562500, 0.187500}, {0.562500, 0.187500}, // 235 - {0.562500, 0.187500}, {0.562500, 0.187500}, {0.687500, 0.187500}, {0.687500, 0.187500}, // 236 - {0.687500, 0.187500}, {0.687500, 0.187500}, {0.687500, 0.187500}, {0.687500, 0.187500}, // 237 - {0.812500, 0.187500}, {0.812500, 0.187500}, {0.812500, 0.187500}, {0.812500, 0.187500}, // 238 - {0.812500, 0.187500}, {0.812500, 0.187500}, {0.937500, 0.187500}, {0.937500, 0.187500}, // 239 - {0.937500, 0.187500}, {0.937500, 0.187500}, {0.937500, 0.187500}, {0.937500, 0.187500}, // 240 - {0.062500, 0.312500}, {0.062500, 0.312500}, {0.062500, 0.312500}, {0.062500, 0.312500}, // 241 - {0.062500, 0.312500}, {0.062500, 0.312500}, {0.187500, 0.312500}, {0.187500, 0.312500}, // 242 - {0.187500, 0.312500}, {0.187500, 0.312500}, {0.187500, 0.312500}, {0.187500, 0.312500}, // 243 - {0.312500, 0.312500}, {0.312500, 0.312500}, {0.312500, 0.312500}, {0.312500, 0.312500}, // 244 - {0.312500, 0.312500}, {0.312500, 0.312500}, {0.437500, 0.312500}, {0.437500, 0.312500}, // 245 - {0.437500, 0.312500}, {0.437500, 0.312500}, {0.437500, 0.312500}, {0.437500, 0.312500}, // 246 - {0.562500, 0.312500}, {0.562500, 0.312500}, {0.562500, 0.312500}, {0.562500, 0.312500}, // 247 - {0.562500, 0.312500}, {0.562500, 0.312500}, {0.687500, 0.312500}, {0.687500, 0.312500}, // 248 - {0.687500, 0.312500}, {0.687500, 0.312500}, {0.687500, 0.312500}, {0.687500, 0.312500}, // 249 - {0.812500, 0.312500}, {0.812500, 0.312500}, {0.812500, 0.312500}, {0.812500, 0.312500}, // 250 - {0.812500, 0.312500}, {0.812500, 0.312500}, {0.937500, 0.312500}, {0.937500, 0.312500}, // 251 - {0.937500, 0.312500}, {0.937500, 0.312500}, {0.937500, 0.312500}, {0.937500, 0.312500}, // 252 - {0.062500, 0.437500}, {0.062500, 0.437500}, {0.062500, 0.437500}, {0.062500, 0.437500}, // 253 - {0.062500, 0.437500}, {0.062500, 0.437500}, {0.187500, 0.437500}, {0.187500, 0.437500}, // 254 - {0.187500, 0.437500}, {0.187500, 0.437500}, {0.187500, 0.437500}, {0.187500, 0.437500}, // 255 - {0.312500, 0.437500}, {0.312500, 0.437500}, {0.312500, 0.437500}, {0.312500, 0.437500}, // 256 - {0.312500, 0.437500}, {0.312500, 0.437500}, {0.437500, 0.437500}, {0.437500, 0.437500}, // 257 - {0.437500, 0.437500}, {0.437500, 0.437500}, {0.437500, 0.437500}, {0.437500, 0.437500}, // 258 - {0.562500, 0.437500}, {0.562500, 0.437500}, {0.562500, 0.437500}, {0.562500, 0.437500}, // 259 - {0.562500, 0.437500}, {0.562500, 0.437500}, {0.687500, 0.437500}, {0.687500, 0.437500}, // 260 - {0.687500, 0.437500}, {0.687500, 0.437500}, {0.687500, 0.437500}, {0.687500, 0.437500}, // 261 - {0.812500, 0.437500}, {0.812500, 0.437500}, {0.812500, 0.437500}, {0.812500, 0.437500}, // 262 - {0.812500, 0.437500}, {0.812500, 0.437500}, {0.937500, 0.437500}, {0.937500, 0.437500}, // 263 - {0.937500, 0.437500}, {0.937500, 0.437500}, {0.937500, 0.437500}, {0.937500, 0.437500}, // 264 - {0.062500, 0.562500}, {0.062500, 0.562500}, {0.062500, 0.562500}, {0.062500, 0.562500}, // 265 - {0.062500, 0.562500}, {0.062500, 0.562500}, {0.187500, 0.562500}, {0.187500, 0.562500}, // 266 - {0.187500, 0.562500}, {0.187500, 0.562500}, {0.187500, 0.562500}, {0.187500, 0.562500}, // 267 - {0.312500, 0.562500}, {0.312500, 0.562500}, {0.312500, 0.562500}, {0.312500, 0.562500}, // 268 - {0.312500, 0.562500}, {0.312500, 0.562500}, {0.437500, 0.562500}, {0.437500, 0.562500}, // 269 - {0.437500, 0.562500}, {0.437500, 0.562500}, {0.437500, 0.562500}, {0.437500, 0.562500}, // 270 - {0.562500, 0.562500}, {0.562500, 0.562500}, {0.562500, 0.562500}, {0.562500, 0.562500}, // 271 - {0.562500, 0.562500}, {0.562500, 0.562500}, {0.687500, 0.562500}, {0.687500, 0.562500}, // 272 - {0.687500, 0.562500}, {0.687500, 0.562500}, {0.687500, 0.562500}, {0.687500, 0.562500}, // 273 - {0.812500, 0.562500}, {0.812500, 0.562500}, {0.812500, 0.562500}, {0.812500, 0.562500}, // 274 - {0.812500, 0.562500}, {0.812500, 0.562500}, {0.937500, 0.562500}, {0.937500, 0.562500}, // 275 - {0.937500, 0.562500}, {0.937500, 0.562500}, {0.937500, 0.562500}, {0.937500, 0.562500}, // 276 - {0.062500, 0.687500}, {0.062500, 0.687500}, {0.062500, 0.687500}, {0.062500, 0.687500}, // 277 - {0.062500, 0.687500}, {0.062500, 0.687500}, {0.187500, 0.687500}, {0.187500, 0.687500}, // 278 - {0.187500, 0.687500}, {0.187500, 0.687500}, {0.187500, 0.687500}, {0.187500, 0.687500}, // 279 - {0.312500, 0.687500}, {0.312500, 0.687500}, {0.312500, 0.687500}, {0.312500, 0.687500}, // 280 - {0.312500, 0.687500}, {0.312500, 0.687500}, {0.437500, 0.687500}, {0.437500, 0.687500}, // 281 - {0.437500, 0.687500}, {0.437500, 0.687500}, {0.437500, 0.687500}, {0.437500, 0.687500}, // 282 - {0.562500, 0.687500}, {0.562500, 0.687500}, {0.562500, 0.687500}, {0.562500, 0.687500}, // 283 - {0.562500, 0.687500}, {0.562500, 0.687500}, {0.687500, 0.687500}, {0.687500, 0.687500}, // 284 - {0.687500, 0.687500}, {0.687500, 0.687500}, {0.687500, 0.687500}, {0.687500, 0.687500}, // 285 - {0.812500, 0.687500}, {0.812500, 0.687500}, {0.812500, 0.687500}, {0.812500, 0.687500}, // 286 - {0.812500, 0.687500}, {0.812500, 0.687500}, {0.937500, 0.687500}, {0.937500, 0.687500}, // 287 - {0.937500, 0.687500}, {0.937500, 0.687500}, {0.937500, 0.687500}, {0.937500, 0.687500}, // 288 - {0.062500, 0.812500}, {0.062500, 0.812500}, {0.062500, 0.812500}, {0.062500, 0.812500}, // 289 - {0.062500, 0.812500}, {0.062500, 0.812500}, {0.187500, 0.812500}, {0.187500, 0.812500}, // 290 - {0.187500, 0.812500}, {0.187500, 0.812500}, {0.187500, 0.812500}, {0.187500, 0.812500}, // 291 - {0.312500, 0.812500}, {0.312500, 0.812500}, {0.312500, 0.812500}, {0.312500, 0.812500}, // 292 - {0.312500, 0.812500}, {0.312500, 0.812500}, {0.437500, 0.812500}, {0.437500, 0.812500}, // 293 - {0.437500, 0.812500}, {0.437500, 0.812500}, {0.437500, 0.812500}, {0.437500, 0.812500}, // 294 - {0.562500, 0.812500}, {0.562500, 0.812500}, {0.562500, 0.812500}, {0.562500, 0.812500}, // 295 - {0.562500, 0.812500}, {0.562500, 0.812500}, {0.687500, 0.812500}, {0.687500, 0.812500}, // 296 - {0.687500, 0.812500}, {0.687500, 0.812500}, {0.687500, 0.812500}, {0.687500, 0.812500}, // 297 - {0.812500, 0.812500}, {0.812500, 0.812500}, {0.812500, 0.812500}, {0.812500, 0.812500}, // 298 - {0.812500, 0.812500}, {0.812500, 0.812500}, {0.937500, 0.812500}, {0.937500, 0.812500}, // 299 - {0.937500, 0.812500}, {0.937500, 0.812500}, {0.937500, 0.812500}, {0.937500, 0.812500}, // 300 - {0.062500, 0.937500}, {0.062500, 0.937500}, {0.062500, 0.937500}, {0.062500, 0.937500}, // 301 - {0.062500, 0.937500}, {0.062500, 0.937500}, {0.187500, 0.937500}, {0.187500, 0.937500}, // 302 - {0.187500, 0.937500}, {0.187500, 0.937500}, {0.187500, 0.937500}, {0.187500, 0.937500}, // 303 - {0.312500, 0.937500}, {0.312500, 0.937500}, {0.312500, 0.937500}, {0.312500, 0.937500}, // 304 - {0.312500, 0.937500}, {0.312500, 0.937500}, {0.437500, 0.937500}, {0.437500, 0.937500}, // 305 - {0.437500, 0.937500}, {0.437500, 0.937500}, {0.437500, 0.937500}, {0.437500, 0.937500}, // 306 - {0.562500, 0.937500}, {0.562500, 0.937500}, {0.562500, 0.937500}, {0.562500, 0.937500}, // 307 - {0.562500, 0.937500}, {0.562500, 0.937500}, {0.687500, 0.937500}, {0.687500, 0.937500}, // 308 - {0.687500, 0.937500}, {0.687500, 0.937500}, {0.687500, 0.937500}, {0.687500, 0.937500}, // 309 - {0.812500, 0.937500}, {0.812500, 0.937500}, {0.812500, 0.937500}, {0.812500, 0.937500}, // 310 - {0.812500, 0.937500}, {0.812500, 0.937500}, {0.937500, 0.937500}, {0.937500, 0.937500}, // 311 - {0.937500, 0.937500}, {0.937500, 0.937500}, {0.937500, 0.937500}, {0.937500, 0.937500}, // 312 - }; 313 - 314 - #define ORT(expr) \ 315 - do { \ 316 - OrtStatus *status = this->api->expr; \ 317 - if (status != nullptr) { \ 318 - const char *msg = this->api->GetErrorMessage(status); \ 319 - HT_ERROR(this->device, "[%s:%d]: %s\n", __FILE__, __LINE__, msg); \ 320 - this->api->ReleaseStatus(status); \ 321 - assert(false); \ 322 - } \ 323 - } while (0) 324 - 325 - void 326 - ht_model::init_palm_detection(OrtSessionOptions *opts) 327 - { 328 - // Both models have slightly different shapes, preventing us to constexpr the input shape 329 - std::array<int64_t, 4> input_shape; 330 - std::array<std::string, 1> input_names; 331 - 332 - std::filesystem::path path = this->device->startup_config.model_slug; 333 - if (this->device->startup_config.keypoint_estimation_use_mediapipe) { 334 - path /= "palm_detection_MEDIAPIPE.onnx"; 335 - 336 - input_shape = {1, 3, 128, 128}; 337 - input_names = {"input"}; 338 - } else { 339 - path /= "palm_detection_COLLABORA.onnx"; 340 - 341 - input_shape = {1, 128, 128, 3}; 342 - input_names = {"input:0"}; 343 - } 344 - 345 - HT_DEBUG(this->device, "Loading palm detection model from file '%s'", path.c_str()); 346 - ORT(CreateSession(this->env, path.c_str(), opts, &this->palm_detection_session)); 347 - assert(this->palm_detection_session); 348 - 349 - constexpr size_t input_size = 3 * 128 * 128; 350 - 351 - ORT(CreateTensorWithDataAsOrtValue(this->palm_detection_meminfo, this->palm_detection_data.data(), 352 - input_size * sizeof(float), input_shape.data(), input_shape.size(), 353 - ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &this->palm_detection_tensor)); 354 - 355 - assert(this->palm_detection_tensor); 356 - int is_tensor; 357 - ORT(IsTensor(this->palm_detection_tensor, &is_tensor)); 358 - assert(is_tensor); 359 - } 360 - 361 - void 362 - ht_model::init_hand_landmark(OrtSessionOptions *opts) 363 - { 364 - std::filesystem::path path = this->device->startup_config.model_slug; 365 - if (this->device->startup_config.keypoint_estimation_use_mediapipe) { 366 - path /= "hand_landmark_MEDIAPIPE.onnx"; 367 - } else { 368 - path /= "hand_landmark_COLLABORA.onnx"; 369 - } 370 - 371 - HT_DEBUG(this->device, "Loading hand landmark model from file '%s'", path.c_str()); 372 - ORT(CreateSession(this->env, path.c_str(), opts, &this->hand_landmark_session)); 373 - assert(this->hand_landmark_session); 374 - 375 - constexpr size_t input_size = 3 * 224 * 224; 376 - 377 - constexpr std::array<int64_t, 4> input_shape = {1, 3, 224, 224}; 378 - ORT(CreateTensorWithDataAsOrtValue(this->hand_landmark_meminfo, this->hand_landmark_data.data(), 379 - input_size * sizeof(float), input_shape.data(), input_shape.size(), 380 - ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &this->hand_landmark_tensor)); 381 - 382 - assert(this->hand_landmark_tensor != nullptr); 383 - int is_tensor; 384 - ORT(IsTensor(hand_landmark_tensor, &is_tensor)); 385 - assert(is_tensor); 386 - } 387 - 388 - ht_model::ht_model(struct HandTracking *htd) : device(htd), api(OrtGetApiBase()->GetApi(ORT_API_VERSION)) 389 - { 390 - ORT(CreateEnv(ORT_LOGGING_LEVEL_WARNING, "monado_ht", &this->env)); 391 - 392 - ORT(CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &this->palm_detection_meminfo)); 393 - ORT(CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &this->hand_landmark_meminfo)); 394 - 395 - OrtSessionOptions *opts = nullptr; 396 - ORT(CreateSessionOptions(&opts)); 397 - 398 - // TODO review options, config for threads? 399 - ORT(SetSessionGraphOptimizationLevel(opts, ORT_ENABLE_ALL)); 400 - ORT(SetIntraOpNumThreads(opts, 1)); 401 - 402 - this->init_palm_detection(opts); 403 - this->init_hand_landmark(opts); 404 - 405 - this->api->ReleaseSessionOptions(opts); 406 - } 407 - 408 - 409 - ht_model::~ht_model() 410 - { 411 - this->api->ReleaseMemoryInfo(this->palm_detection_meminfo); 412 - this->api->ReleaseSession(this->palm_detection_session); 413 - this->api->ReleaseValue(this->palm_detection_tensor); 414 - 415 - this->api->ReleaseMemoryInfo(this->hand_landmark_meminfo); 416 - this->api->ReleaseSession(this->hand_landmark_session); 417 - this->api->ReleaseValue(this->hand_landmark_tensor); 418 - 419 - this->api->ReleaseEnv(this->env); 420 - } 421 - 422 - std::vector<Palm7KP> 423 - ht_model::palm_detection(ht_view *htv, const cv::Mat &input) 424 - { 425 - // TODO use opencv to handle input preprocessing 426 - constexpr int hd_size = 128; 427 - constexpr size_t nb_planes = 3; 428 - constexpr size_t size = hd_size * hd_size * nb_planes; 429 - 430 - cv::Mat img; 431 - cv::Matx23f back_from_blackbar = blackbar(input, img, {hd_size, hd_size}); 432 - 433 - float scale_factor = back_from_blackbar(0, 0); // 960/128 434 - assert(img.isContinuous()); 435 - constexpr float mean = 128.0f; 436 - constexpr float std = 128.0f; 437 - 438 - if (htv->htd->startup_config.palm_detection_use_mediapipe) { 439 - std::vector<uint8_t> combined_planes(size); 440 - planarize(img, combined_planes.data()); 441 - for (size_t i = 0; i < size; i++) { 442 - float val = (float)combined_planes[i]; 443 - this->palm_detection_data[i] = (val - mean) / std; 444 - } 445 - } else { 446 - 447 - assert(img.isContinuous()); 448 - 449 - for (size_t i = 0; i < size; i++) { 450 - int val = img.data[i]; 451 - 452 - this->palm_detection_data[i] = (val - mean) / std; 453 - } 454 - } 455 - 456 - const char *input_names[1]; 457 - if (this->device->startup_config.keypoint_estimation_use_mediapipe) { 458 - input_names[0] = "input"; 459 - } else { 460 - input_names[0] = "input:0"; 461 - } 462 - 463 - static const char *const output_names[] = {"classificators", "regressors"}; 464 - 465 - OrtValue *output_tensor[] = {nullptr, nullptr}; 466 - ORT(Run(this->palm_detection_session, nullptr, input_names, &this->palm_detection_tensor, 1, output_names, 2, 467 - output_tensor)); 468 - 469 - // TODO define types to handle data 470 - float *classificators = nullptr; 471 - float *regressors = nullptr; 472 - 473 - // Output is 896 floats 474 - ORT(GetTensorMutableData(output_tensor[0], (void **)&classificators)); 475 - 476 - // Output is 896 * 18 floats 477 - ORT(GetTensorMutableData(output_tensor[1], (void **)&regressors)); 478 - 479 - std::vector<NMSPalm> detections; 480 - for (size_t i = 0; i < 896; ++i) { 481 - const float score = 1.0 / (1.0 + exp(-classificators[i])); 482 - 483 - // Let a lot of detections in - they'll be slowly rejected later 484 - if (score <= this->device->dynamic_config.nms_threshold.val) { 485 - continue; 486 - } 487 - 488 - const struct anchor *anchor = &anchors[i]; 489 - 490 - // Boundary box. 491 - NMSPalm det; 492 - 493 - float anchx = anchor->x * 128; 494 - float anchy = anchor->y * 128; 495 - 496 - float shiftx = regressors[i * 18]; 497 - float shifty = regressors[i * 18 + 1]; 498 - 499 - float w = regressors[i * 18 + 2]; 500 - float h = regressors[i * 18 + 3]; 501 - 502 - float cx = shiftx + anchx; 503 - float cy = shifty + anchy; 504 - 505 - struct xrt_vec2 *kps = det.keypoints; 506 - 507 - kps[0] = {regressors[i * 18 + 4], regressors[i * 18 + 5]}; 508 - kps[1] = {regressors[i * 18 + 6], regressors[i * 18 + 7]}; 509 - kps[2] = {regressors[i * 18 + 8], regressors[i * 18 + 9]}; 510 - kps[3] = {regressors[i * 18 + 10], regressors[i * 18 + 11]}; 511 - kps[4] = {regressors[i * 18 + 12], regressors[i * 18 + 13]}; 512 - kps[5] = {regressors[i * 18 + 14], regressors[i * 18 + 15]}; 513 - kps[6] = {regressors[i * 18 + 16], regressors[i * 18 + 17]}; 514 - 515 - 516 - for (int i = 0; i < 7; i++) { 517 - struct xrt_vec2 *b = &kps[i]; 518 - b->x += anchx; 519 - b->y += anchy; 520 - } 521 - 522 - det.bbox.w = w; 523 - det.bbox.h = h; 524 - det.bbox.cx = cx; 525 - det.bbox.cy = cy; 526 - det.confidence = score; 527 - detections.push_back(det); 528 - 529 - if (htv->htd->debug_scribble && (htv->htd->dynamic_config.scribble_raw_detections)) { 530 - xrt_vec2 center = transformVecBy2x3(xrt_vec2{cx, cy}, back_from_blackbar); 531 - 532 - float sz = det.bbox.w * scale_factor; 533 - 534 - cv::rectangle(htv->debug_out_to_this, 535 - {(int)(center.x - (sz / 2)), (int)(center.y - (sz / 2)), (int)sz, (int)sz}, 536 - hsv2rgb(0.0f, math_map_ranges(det.confidence, 0.0f, 1.0f, 1.5f, -0.1f), 537 - math_map_ranges(det.confidence, 0.0f, 1.0f, 0.2f, 1.4f)), 538 - 1); 539 - 540 - for (int i = 0; i < 7; i++) { 541 - handDot(htv->debug_out_to_this, transformVecBy2x3(kps[i], back_from_blackbar), 542 - det.confidence * 7, ((float)i) * (360.0f / 7.0f), det.confidence, 1); 543 - } 544 - } 545 - } 546 - 547 - this->api->ReleaseValue(output_tensor[0]); 548 - this->api->ReleaseValue(output_tensor[1]); 549 - 550 - std::vector<Palm7KP> output; 551 - if (detections.empty()) { 552 - return output; 553 - } 554 - 555 - std::vector<NMSPalm> nms_palms = filterBoxesWeightedAvg(detections, htv->htd->dynamic_config.nms_iou.val); 556 - 557 - for (const NMSPalm &cooler : nms_palms) { 558 - 559 - // Display box 560 - 561 - struct xrt_vec2 tl = {cooler.bbox.cx - cooler.bbox.w / 2, cooler.bbox.cy - cooler.bbox.h / 2}; 562 - struct xrt_vec2 bob = transformVecBy2x3(tl, back_from_blackbar); 563 - float sz = cooler.bbox.w * scale_factor; 564 - 565 - if (htv->htd->debug_scribble && htv->htd->dynamic_config.scribble_nms_detections) { 566 - cv::rectangle(htv->debug_out_to_this, {(int)bob.x, (int)bob.y, (int)sz, (int)sz}, 567 - hsv2rgb(180.0f, math_map_ranges(cooler.confidence, 0.0f, 1.0f, 0.8f, -0.1f), 568 - math_map_ranges(cooler.confidence, 0.0f, 1.0f, 0.2f, 1.4f)), 569 - 2); 570 - for (int i = 0; i < 7; i++) { 571 - handDot(htv->debug_out_to_this, 572 - transformVecBy2x3(cooler.keypoints[i], back_from_blackbar), 573 - cooler.confidence * 14, ((float)i) * (360.0f / 7.0f), cooler.confidence, 3); 574 - } 575 - } 576 - 577 - 578 - Palm7KP this_element; 579 - 580 - for (int i = 0; i < 7; i++) { 581 - struct xrt_vec2 b = cooler.keypoints[i]; 582 - this_element.kps[i] = transformVecBy2x3(b, back_from_blackbar); 583 - } 584 - this_element.confidence = cooler.confidence; 585 - 586 - output.push_back(this_element); 587 - } 588 - 589 - 590 - return output; 591 - } 592 - 593 - Hand2D 594 - ht_model::hand_landmark(const cv::Mat input) 595 - { 596 - std::scoped_lock lock(this->hand_landmark_lock); 597 - 598 - // TODO use opencv to handle input preprocessing 599 - constexpr size_t lix = 224; 600 - constexpr size_t liy = 224; 601 - constexpr size_t nb_planes = 3; 602 - cv::Mat planes[nb_planes]; 603 - 604 - constexpr size_t size = lix * liy * nb_planes; 605 - 606 - std::vector<uint8_t> combined_planes(size); 607 - planarize(input, combined_planes.data()); 608 - 609 - // Normalize - supposedly, the keypoint estimator wants keypoints in [0,1] 610 - for (size_t i = 0; i < size; i++) { 611 - this->hand_landmark_data[i] = (float)combined_planes[i] / 255.0; 612 - } 613 - 614 - static const char *const input_names[] = {"input_1"}; 615 - static const char *const output_names[] = {"Identity", "Identity_1", "Identity_2"}; 616 - 617 - OrtValue *output_tensor[] = {nullptr, nullptr, nullptr}; 618 - ORT(Run(this->hand_landmark_session, nullptr, input_names, &this->hand_landmark_tensor, 1, output_names, 3, 619 - output_tensor)); 620 - 621 - Hand2D hand{}; 622 - 623 - float *landmarks = nullptr; 624 - 625 - // Should give a pointer to data that is freed on g_ort->ReleaseValue(output_tensor[0]);. 626 - ORT(GetTensorMutableData(output_tensor[0], (void **)&landmarks)); 627 - 628 - constexpr int stride = 3; 629 - for (size_t i = 0; i < 21; i++) { 630 - int rt = i * stride; 631 - float x = landmarks[rt]; 632 - float y = landmarks[rt + 1]; 633 - float z = landmarks[rt + 2]; 634 - hand.kps[i].x = x; 635 - hand.kps[i].y = y; 636 - hand.kps[i].z = z; 637 - } 638 - 639 - this->api->ReleaseValue(output_tensor[0]); 640 - this->api->ReleaseValue(output_tensor[1]); 641 - this->api->ReleaseValue(output_tensor[2]); 642 - 643 - return hand; 644 - } 645 - 646 - } // namespace xrt::tracking::hand::old_rgb

-114

src/xrt/tracking/hand/old_rgb/rgb_nms.hpp

··· 1 - // Copyright 2021-2022, Collabora, Ltd. 2 - // SPDX-License-Identifier: BSL-1.0 3 - /*! 4 - * @file 5 - * @brief Code to deal with bounding boxes for camera-based hand-tracking. 6 - * @author Moses Turner <moses@collabora.com> 7 - * @author Marcus Edel <marcus.edel@collabora.com> 8 - * @ingroup drv_ht 9 - */ 10 - 11 - #include "rgb_sync.hpp" 12 - #include <math.h> 13 - 14 - #include "util/u_box_iou.hpp" 15 - 16 - using namespace xrt::auxiliary::util::box_iou; 17 - struct NMSPalm 18 - { 19 - Box bbox; 20 - struct xrt_vec2 keypoints[7]; 21 - float confidence; 22 - }; 23 - 24 - 25 - 26 - static NMSPalm 27 - weightedAvgBoxes(const std::vector<NMSPalm> &detections) 28 - { 29 - float weight = 0.0f; // or, sum_confidences. 30 - float cx = 0.0f; 31 - float cy = 0.0f; 32 - float size = 0.0f; 33 - NMSPalm out = {}; 34 - 35 - for (const NMSPalm &detection : detections) { 36 - weight += detection.confidence; 37 - cx += detection.bbox.cx * detection.confidence; 38 - cy += detection.bbox.cy * detection.confidence; 39 - size += detection.bbox.w * .5 * detection.confidence; 40 - size += detection.bbox.h * .5 * detection.confidence; 41 - 42 - for (int i = 0; i < 7; i++) { 43 - out.keypoints[i].x += detection.keypoints[i].x * detection.confidence; 44 - out.keypoints[i].y += detection.keypoints[i].y * detection.confidence; 45 - } 46 - } 47 - cx /= weight; 48 - cy /= weight; 49 - size /= weight; 50 - for (int i = 0; i < 7; i++) { 51 - out.keypoints[i].x /= weight; 52 - out.keypoints[i].y /= weight; 53 - } 54 - 55 - 56 - float bare_confidence = weight / detections.size(); 57 - 58 - // desmos \frac{1}{1+e^{-.5x}}-.5 59 - 60 - float steep = 0.2; 61 - float cent = 0.5; 62 - 63 - float exp = detections.size(); 64 - 65 - float sigmoid_addendum = (1.0f / (1.0f + pow(M_E, (-steep * exp)))) - cent; 66 - 67 - float diff_bare_to_one = 1.0f - bare_confidence; 68 - 69 - out.confidence = bare_confidence + (sigmoid_addendum * diff_bare_to_one); 70 - 71 - // U_LOG_E("Bare %f num %f sig %f diff %f out %f", bare_confidence, exp, sigmoid_addendum, diff_bare_to_one, 72 - // out.confidence); 73 - 74 - out.bbox.cx = cx; 75 - out.bbox.cy = cy; 76 - out.bbox.w = size; 77 - out.bbox.h = size; 78 - return out; 79 - } 80 - 81 - std::vector<NMSPalm> 82 - filterBoxesWeightedAvg(const std::vector<NMSPalm> &detections, float min_iou) 83 - { 84 - std::vector<std::vector<NMSPalm>> overlaps; 85 - std::vector<NMSPalm> outs; 86 - 87 - // U_LOG_D("\n\nStarting filtering boxes. There are %zu boxes to look at.\n", detections.size()); 88 - for (const NMSPalm &detection : detections) { 89 - // U_LOG_D("Starting looking at one detection\n"); 90 - bool foundAHome = false; 91 - for (size_t i = 0; i < outs.size(); i++) { 92 - float iou = boxIOU(outs[i].bbox, detection.bbox); 93 - // U_LOG_D("IOU is %f\n", iou); 94 - // U_LOG_D("Outs box is %f %f %f %f", outs[i].bbox.cx, outs[i].bbox.cy, outs[i].bbox.w, 95 - // outs[i].bbox.h) 96 - if (iou > min_iou) { 97 - // This one intersects with the whole thing 98 - overlaps[i].push_back(detection); 99 - outs[i] = weightedAvgBoxes(overlaps[i]); 100 - foundAHome = true; 101 - break; 102 - } 103 - } 104 - if (!foundAHome) { 105 - // U_LOG_D("No home\n"); 106 - overlaps.push_back({detection}); 107 - outs.push_back({detection}); 108 - } else { 109 - // U_LOG_D("Found a home!\n"); 110 - } 111 - } 112 - // U_LOG_D("Sizeeeeeeeeeeeeeeeeeeeee is %zu\n", outs.size()); 113 - return outs; 114 - }

-1271

src/xrt/tracking/hand/old_rgb/rgb_sync.cpp

··· 1 - // Copyright 2022, Collabora, Ltd. 2 - // SPDX-License-Identifier: BSL-1.0 3 - /*! 4 - * @file 5 - * @brief Old RGB hand tracking main file. 6 - * @author Jakob Bornecrantz <jakob@collabora.com> 7 - * @ingroup aux_tracking 8 - */ 9 - 10 - #include "rgb_interface.h" 11 - #include "rgb_sync.hpp" 12 - #include "xrt/xrt_frame.h" 13 - 14 - 15 - using namespace xrt::tracking::hand::old_rgb; 16 - 17 - 18 - 19 - #include "xrt/xrt_defines.h" 20 - 21 - #include "math/m_vec2.h" 22 - #include "util/u_frame.h" 23 - #include "util/u_trace_marker.h" 24 - 25 - 26 - #include "templates/NaivePermutationSort.hpp" 27 - 28 - #include <future> 29 - 30 - 31 - // Copyright 2021, Collabora, Ltd. 32 - // SPDX-License-Identifier: BSL-1.0 33 - /*! 34 - * @file 35 - * @brief Camera based hand tracking driver code. 36 - * @author Moses Turner <moses@collabora.com> 37 - * @author Jakob Bornecrantz <jakob@collabora.com> 38 - * @ingroup drv_ht 39 - */ 40 - 41 - #if defined(EXPERIMENTAL_DATASET_RECORDING) 42 - #include "gstreamer/gst_pipeline.h" 43 - #include "gstreamer/gst_sink.h" 44 - #endif 45 - 46 - #include "xrt/xrt_defines.h" 47 - #include "xrt/xrt_frame.h" 48 - #include "xrt/xrt_frameserver.h" 49 - 50 - #include "os/os_time.h" 51 - #include "os/os_threading.h" 52 - 53 - #include "math/m_api.h" 54 - #include "math/m_eigen_interop.hpp" 55 - 56 - #include "util/u_device.h" 57 - #include "util/u_frame.h" 58 - #include "util/u_hand_tracking.h" 59 - #include "util/u_sink.h" 60 - #include "util/u_format.h" 61 - #include "util/u_logging.h" 62 - #include "util/u_time.h" 63 - #include "util/u_trace_marker.h" 64 - #include "util/u_time.h" 65 - #include "util/u_json.h" 66 - #include "util/u_config_json.h" 67 - 68 - #include "tracking/t_frame_cv_mat_wrapper.hpp" 69 - #include "tracking/t_calibration_opencv.hpp" 70 - 71 - #include "rgb_hand_math.hpp" 72 - #include "rgb_image_math.hpp" 73 - #include "rgb_model.hpp" 74 - 75 - #include <cjson/cJSON.h> 76 - #include <opencv2/core/mat.hpp> 77 - #include <opencv2/calib3d.hpp> 78 - 79 - #include <math.h> 80 - #include <float.h> 81 - #include <stdio.h> 82 - #include <unistd.h> 83 - #include <string.h> 84 - 85 - #include <cmath> 86 - 87 - #include <limits> 88 - #include <thread> 89 - #include <future> 90 - #include <fstream> 91 - #include <numeric> 92 - #include <sstream> 93 - #include <iostream> 94 - #include <exception> 95 - #include <algorithm> 96 - 97 - 98 - 99 - // Flags to tell state tracker that these are indeed valid joints 100 - static const enum xrt_space_relation_flags valid_flags_ht = (enum xrt_space_relation_flags)( 101 - XRT_SPACE_RELATION_ORIENTATION_VALID_BIT | XRT_SPACE_RELATION_ORIENTATION_TRACKED_BIT | 102 - XRT_SPACE_RELATION_POSITION_VALID_BIT | XRT_SPACE_RELATION_POSITION_TRACKED_BIT); 103 - 104 - 105 - static void 106 - htProcessJoint(struct HandTracking *htd, 107 - struct xrt_vec3 model_out, 108 - struct xrt_hand_joint_set *hand, 109 - enum xrt_hand_joint idx) 110 - { 111 - hand->values.hand_joint_set_default[idx].relation.relation_flags = valid_flags_ht; 112 - hand->values.hand_joint_set_default[idx].relation.pose.position.x = model_out.x; 113 - hand->values.hand_joint_set_default[idx].relation.pose.position.y = model_out.y; 114 - hand->values.hand_joint_set_default[idx].relation.pose.position.z = model_out.z; 115 - } 116 - 117 - static float 118 - errHistory2D(const HandHistory2DBBox &past, const Palm7KP &present) 119 - { 120 - if (!past.htAlgorithm_approves) { 121 - // U_LOG_E("Returning big number because htAlgorithm told me to!"); 122 - return 100000000000000000000000000000.0f; 123 - } 124 - float sum_of_lengths = m_vec2_len(past.wrist_unfiltered.back() - past.middle_unfiltered.back()) + 125 - m_vec2_len(present.kps[WRIST_7KP] - present.kps[MIDDLE_7KP]); 126 - 127 - float sum_of_distances = (m_vec2_len(past.wrist_unfiltered.back() - present.kps[WRIST_7KP]) + 128 - m_vec2_len(past.middle_unfiltered.back() - present.kps[MIDDLE_7KP])); 129 - 130 - 131 - float final = sum_of_distances / sum_of_lengths; 132 - 133 - return final; 134 - } 135 - 136 - static std::vector<Hand2D> 137 - htImageToKeypoints(struct ht_view *htv) 138 - { 139 - struct HandTracking *htd = htv->htd; 140 - ht_model *htm = htv->htm; 141 - 142 - cv::Mat raw_input = htv->run_model_on_this; 143 - 144 - // Get a list of palms - drop confidences and ssd bounding boxes, just keypoints. 145 - 146 - 147 - std::vector<Palm7KP> hand_detections = htm->palm_detection(htv, raw_input); 148 - 149 - std::vector<bool> used_histories; 150 - std::vector<bool> used_detections; 151 - 152 - std::vector<size_t> history_indices; 153 - std::vector<size_t> detection_indices; 154 - std::vector<float> dontuse; 155 - 156 - 157 - // Strategy here is: We have a big list of palms. Match 'em up to previous palms. 158 - naive_sort_permutation_by_error<HandHistory2DBBox, Palm7KP>(htv->bbox_histories, hand_detections, 159 - 160 - // bools 161 - used_histories, used_detections, 162 - 163 - history_indices, detection_indices, dontuse, 164 - errHistory2D, 1.0f); 165 - 166 - // Here's the trick - we use the associated bbox_filter to get an output but *never commit* the noisy 128x128 167 - // detection; instead later on we commit the (hopefully) nicer palm and wrist from the 224x224 keypoint 168 - // estimation. 169 - 170 - // Add extra detections! 171 - for (size_t i = 0; i < used_detections.size(); i++) { 172 - if ((used_detections[i] == false) && hand_detections[i].confidence > 0.65) { 173 - // Confidence to get in the door is 0.65, confidence to stay in is 0.3 174 - HandHistory2DBBox hist_new = {}; 175 - m_filter_euro_vec2_init(&hist_new.m_filter_center, FCMIN_BBOX_POSITION, FCMIN_D_BB0X_POSITION, 176 - BETA_BB0X_POSITION); 177 - m_filter_euro_vec2_init(&hist_new.m_filter_direction, FCMIN_BBOX_ORIENTATION, 178 - FCMIN_D_BB0X_ORIENTATION, BETA_BB0X_ORIENTATION); 179 - 180 - htv->bbox_histories.push_back(hist_new); 181 - history_indices.push_back(htv->bbox_histories.size() - 1); 182 - detection_indices.push_back(i); 183 - } 184 - } 185 - 186 - // Do the things for each active bbox history! 187 - for (size_t i = 0; i < history_indices.size(); i++) { 188 - HandHistory2DBBox *hist_of_interest = &htv->bbox_histories[history_indices[i]]; 189 - hist_of_interest->wrist_unfiltered.push_back(hand_detections[detection_indices[i]].kps[WRIST_7KP]); 190 - hist_of_interest->index_unfiltered.push_back(hand_detections[detection_indices[i]].kps[INDEX_7KP]); 191 - hist_of_interest->middle_unfiltered.push_back(hand_detections[detection_indices[i]].kps[MIDDLE_7KP]); 192 - hist_of_interest->pinky_unfiltered.push_back(hand_detections[detection_indices[i]].kps[LITTLE_7KP]); 193 - // Eh do the rest later 194 - } 195 - 196 - // Prune stale detections! (After we don't need {history,detection}_indices to be correct) 197 - int bob = 0; 198 - for (size_t i = 0; i < used_histories.size(); i++) { 199 - if (used_histories[i] == false) { 200 - // history never got assigned a present hand to it. treat it as stale delete it. 201 - 202 - HT_TRACE(htv->htd, "Removing bbox from history!\n"); 203 - htv->bbox_histories.erase(htv->bbox_histories.begin() + i + bob); 204 - bob--; 205 - } 206 - } 207 - if (htv->bbox_histories.size() == 0) { 208 - return {}; // bail early 209 - } 210 - 211 - std::vector<std::future<Hand2D>> await_list_of_hand_in_bbox; //(htv->bbox_histories.size()); 212 - 213 - std::vector<DetectionModelOutput> blah(htv->bbox_histories.size()); 214 - 215 - std::vector<Hand2D> output; 216 - 217 - if (htv->bbox_histories.size() > 2) { 218 - HT_DEBUG(htd, "More than two hands (%zu) in 2D view %i", htv->bbox_histories.size(), htv->view); 219 - } 220 - 221 - for (size_t i = 0; i < htv->bbox_histories.size(); i++) { //(BBoxHistory * entry : htv->bbox_histories) { 222 - HandHistory2DBBox *entry = &htv->bbox_histories[i]; 223 - cv::Mat hand_rect = cv::Mat(224, 224, CV_8UC3); 224 - xrt_vec2 unfiltered_middle; 225 - xrt_vec2 unfiltered_direction; 226 - 227 - centerAndRotationFromJoints(htv, &entry->wrist_unfiltered.back(), &entry->index_unfiltered.back(), 228 - &entry->middle_unfiltered.back(), &entry->pinky_unfiltered.back(), 229 - &unfiltered_middle, &unfiltered_direction); 230 - 231 - xrt_vec2 filtered_middle; 232 - xrt_vec2 filtered_direction; 233 - 234 - m_filter_euro_vec2_run_no_commit(&entry->m_filter_center, htv->htd->current_frame_timestamp, 235 - &unfiltered_middle, &filtered_middle); 236 - m_filter_euro_vec2_run_no_commit(&entry->m_filter_direction, htv->htd->current_frame_timestamp, 237 - &unfiltered_direction, &filtered_direction); 238 - 239 - rotatedRectFromJoints(htv, filtered_middle, filtered_direction, &blah[i]); 240 - 241 - warpAffine(raw_input, hand_rect, blah[i].warp_there, hand_rect.size()); 242 - 243 - await_list_of_hand_in_bbox.push_back( 244 - std::async(std::launch::async, std::bind(&ht_model::hand_landmark, htm, hand_rect))); 245 - } 246 - 247 - for (size_t i = 0; i < htv->bbox_histories.size(); i++) { 248 - Hand2D in_bbox = await_list_of_hand_in_bbox[i].get(); 249 - 250 - cv::Matx23f warp_back = blah[i].warp_back; 251 - 252 - Hand2D in_image_ray_coords; 253 - Hand2D in_image_px_coords; 254 - 255 - for (int i = 0; i < 21; i++) { 256 - struct xrt_vec3 vec = in_bbox.kps[i]; 257 - 258 - #if 1 259 - xrt_vec3 rr = transformVecBy2x3(vec, warp_back); 260 - rr.z = vec.z; 261 - #else 262 - xrt_vec3 rr; 263 - rr.x = (vec.x * warp_back(0, 0)) + (vec.y * warp_back(0, 1)) + warp_back(0, 2); 264 - rr.y = (vec.x * warp_back(1, 0)) + (vec.y * warp_back(1, 1)) + warp_back(1, 2); 265 - rr.z = vec.z; 266 - #endif 267 - in_image_px_coords.kps[i] = rr; 268 - 269 - in_image_ray_coords.kps[i] = raycoord(htv, rr); 270 - if (htd->debug_scribble && htd->dynamic_config.scribble_2d_keypoints) { 271 - handDot(htv->debug_out_to_this, {rr.x, rr.y}, fmax((-vec.z + 100 - 20) * .08, 2), 272 - ((float)i) / 21.0f, 0.95f, cv::FILLED); 273 - } 274 - } 275 - xrt_vec2 wrist_in_px_coords = {in_image_px_coords.kps[WRIST].x, in_image_px_coords.kps[WRIST].y}; 276 - xrt_vec2 index_in_px_coords = {in_image_px_coords.kps[INDX_PXM].x, in_image_px_coords.kps[INDX_PXM].y}; 277 - xrt_vec2 middle_in_px_coords = {in_image_px_coords.kps[MIDL_PXM].x, in_image_px_coords.kps[MIDL_PXM].y}; 278 - xrt_vec2 little_in_px_coords = {in_image_px_coords.kps[LITL_PXM].x, in_image_px_coords.kps[LITL_PXM].y}; 279 - xrt_vec2 dontuse; 280 - 281 - xrt_vec2 unfiltered_middle, unfiltered_direction; 282 - centerAndRotationFromJoints(htv, &wrist_in_px_coords, &index_in_px_coords, &middle_in_px_coords, 283 - &little_in_px_coords, &unfiltered_middle, &unfiltered_direction); 284 - 285 - m_filter_euro_vec2_run(&htv->bbox_histories[i].m_filter_center, htv->htd->current_frame_timestamp, 286 - &unfiltered_middle, &dontuse); 287 - 288 - m_filter_euro_vec2_run(&htv->bbox_histories[i].m_filter_direction, htv->htd->current_frame_timestamp, 289 - &unfiltered_direction, &dontuse); 290 - 291 - output.push_back(in_image_ray_coords); 292 - } 293 - return output; 294 - } 295 - 296 - #if defined(EXPERIMENTAL_DATASET_RECORDING) 297 - 298 - static void 299 - jsonAddJoint(cJSON *into_this, xrt_pose loc, const char *name) 300 - { 301 - 302 - cJSON *container = cJSON_CreateObject(); 303 - cJSON *joint_loc = cJSON_CreateArray(); 304 - cJSON_AddItemToArray(joint_loc, cJSON_CreateNumber(loc.position.x)); 305 - cJSON_AddItemToArray(joint_loc, cJSON_CreateNumber(loc.position.y)); 306 - cJSON_AddItemToArray(joint_loc, cJSON_CreateNumber(loc.position.z)); 307 - 308 - cJSON_AddItemToObject(container, "position", joint_loc); 309 - 310 - cJSON *joint_rot = cJSON_CreateArray(); 311 - 312 - 313 - cJSON_AddItemToArray(joint_rot, cJSON_CreateNumber(loc.orientation.x)); 314 - cJSON_AddItemToArray(joint_rot, cJSON_CreateNumber(loc.orientation.y)); 315 - cJSON_AddItemToArray(joint_rot, cJSON_CreateNumber(loc.orientation.z)); 316 - cJSON_AddItemToArray(joint_rot, cJSON_CreateNumber(loc.orientation.w)); 317 - 318 - cJSON_AddItemToObject(container, "rotation_quat_xyzw", joint_rot); 319 - 320 - cJSON_AddItemToObject(into_this, name, container); 321 - } 322 - 323 - void 324 - jsonMaybeAddSomeHands(struct HandTracking *htd, bool err) 325 - { 326 - if (!htd->tracking_should_record_dataset) { 327 - return; 328 - } 329 - cJSON *j_this_frame = cJSON_CreateObject(); 330 - cJSON_AddItemToObject(j_this_frame, "seq_since_start", cJSON_CreateNumber(htd->gst.current_index)); 331 - cJSON_AddItemToObject(j_this_frame, "seq_src", cJSON_CreateNumber(htd->frame_for_process->source_sequence)); 332 - cJSON_AddItemToObject(j_this_frame, "ts", cJSON_CreateNumber(htd->gst.last_frame_ns)); 333 - 334 - cJSON *j_hands_in_frame = cJSON_AddArrayToObject(j_this_frame, "detected_hands"); 335 - if (!err) { 336 - for (size_t idx_hand = 0; idx_hand < htd->histories_3d.size(); idx_hand++) { 337 - cJSON *j_hand_in_frame = cJSON_CreateObject(); 338 - 339 - cJSON *j_uuid = cJSON_CreateNumber(htd->histories_3d[idx_hand].uuid); 340 - cJSON_AddItemToObject(j_hand_in_frame, "uuid", j_uuid); 341 - 342 - cJSON *j_handedness = cJSON_CreateNumber(htd->histories_3d[idx_hand].handedness); 343 - cJSON_AddItemToObject(j_hand_in_frame, "handedness", j_handedness); 344 - 345 - static const char *keys[21] = { 346 - "WRIST", 347 - 348 - "THMB_MCP", "THMB_PXM", "THMB_DST", "THMB_TIP", 349 - 350 - "INDX_PXM", "INDX_INT", "INDX_DST", "INDX_TIP", 351 - 352 - "MIDL_PXM", "MIDL_INT", "MIDL_DST", "MIDL_TIP", 353 - 354 - "RING_PXM", "RING_INT", "RING_DST", "RING_TIP", 355 - 356 - "LITL_PXM", "LITL_INT", "LITL_DST", "LITL_TIP", 357 - }; 358 - 359 - for (int idx_joint = 0; idx_joint < 21; idx_joint++) { 360 - // const char* key = keys[idx_joint]; 361 - cJSON *j_vec3 = cJSON_AddArrayToObject(j_hand_in_frame, keys[idx_joint]); 362 - cJSON_AddItemToArray( 363 - j_vec3, 364 - cJSON_CreateNumber( 365 - htd->histories_3d[idx_hand].last_hands_unfiltered.back().kps[idx_joint].x)); 366 - cJSON_AddItemToArray( 367 - j_vec3, 368 - cJSON_CreateNumber( 369 - htd->histories_3d[idx_hand].last_hands_unfiltered.back().kps[idx_joint].y)); 370 - cJSON_AddItemToArray( 371 - j_vec3, 372 - cJSON_CreateNumber( 373 - htd->histories_3d[idx_hand].last_hands_unfiltered.back().kps[idx_joint].z)); 374 - } 375 - 376 - 377 - cJSON_AddItemToArray(j_hands_in_frame, j_hand_in_frame); 378 - } 379 - } 380 - cJSON_AddItemToArray(htd->gst.output_array, j_this_frame); 381 - } 382 - 383 - #endif 384 - 385 - 386 - 387 - static void 388 - htJointDisparityMath(struct HandTracking *htd, Hand2D *hand_in_left, Hand2D *hand_in_right, Hand3D *out_hand) 389 - { 390 - for (int i = 0; i < 21; i++) { 391 - // Believe it or not, this is where the 3D stuff happens! 392 - float t = htd->baseline / (hand_in_left->kps[i].x - hand_in_right->kps[i].x); 393 - 394 - out_hand->kps[i].z = -t; 395 - 396 - out_hand->kps[i].x = (hand_in_left->kps[i].x * t); 397 - out_hand->kps[i].y = -hand_in_left->kps[i].y * t; 398 - 399 - out_hand->kps[i].x += htd->baseline + (hand_in_right->kps[i].x * t); 400 - out_hand->kps[i].y += -hand_in_right->kps[i].y * t; 401 - 402 - out_hand->kps[i].x *= .5; 403 - out_hand->kps[i].y *= .5; 404 - } 405 - } 406 - int64_t last_frame, this_frame; 407 - 408 - DEBUG_GET_ONCE_LOG_OPTION(ht_log, "HT_LOG", U_LOGGING_WARN) 409 - 410 - /*! 411 - * Setup helper functions. 412 - */ 413 - 414 - static bool 415 - getCalibration(struct HandTracking *htd, t_stereo_camera_calibration *calibration) 416 - { 417 - xrt::auxiliary::tracking::StereoCameraCalibrationWrapper wrap(calibration); 418 - xrt_vec3 trans = {(float)wrap.camera_translation_mat(0, 0), (float)wrap.camera_translation_mat(1, 0), 419 - (float)wrap.camera_translation_mat(2, 0)}; 420 - htd->baseline = m_vec3_len(trans); 421 - 422 - #if 0 423 - std::cout << "\n\nTRANSLATION VECTOR IS\n" << wrap.camera_translation_mat; 424 - std::cout << "\n\nROTATION FROM LEFT TO RIGHT IS\n" << wrap.camera_rotation_mat << "\n"; 425 - #endif 426 - 427 - cv::Matx34d P1; 428 - cv::Matx34d P2; 429 - 430 - cv::Matx44d Q; 431 - 432 - // The only reason we're calling stereoRectify is because we want R1 and R2 for the 433 - cv::stereoRectify(wrap.view[0].intrinsics_mat, // cameraMatrix1 434 - wrap.view[0].distortion_mat, // distCoeffs1 435 - wrap.view[1].intrinsics_mat, // cameraMatrix2 436 - wrap.view[1].distortion_mat, // distCoeffs2 437 - wrap.view[0].image_size_pixels_cv, // imageSize* 438 - wrap.camera_rotation_mat, // R 439 - wrap.camera_translation_mat, // T 440 - htd->views[0].rotate_camera_to_stereo_camera, // R1 441 - htd->views[1].rotate_camera_to_stereo_camera, // R2 442 - P1, // P1 443 - P2, // P2 444 - Q, // Q 445 - 0, // flags 446 - -1.0f, // alpha 447 - cv::Size(), // newImageSize 448 - NULL, // validPixROI1 449 - NULL); // validPixROI2 450 - 451 - //* Good enough guess that view 0 and view 1 are the same size. 452 - 453 - for (int i = 0; i < 2; i++) { 454 - htd->views[i].cameraMatrix = wrap.view[i].intrinsics_mat; 455 - 456 - htd->views[i].distortion = wrap.view[i].distortion_fisheye_mat; 457 - } 458 - 459 - htd->one_view_size_px.w = wrap.view[0].image_size_pixels.w; 460 - htd->one_view_size_px.h = wrap.view[0].image_size_pixels.h; 461 - 462 - U_LOG_E("%d %d %p %p", htd->one_view_size_px.w, htd->one_view_size_px.h, (void *)&htd->one_view_size_px.w, 463 - (void *)&htd->one_view_size_px.h); 464 - 465 - 466 - 467 - cv::Matx33d rotate_stereo_camera_to_left_camera = htd->views[0].rotate_camera_to_stereo_camera.inv(); 468 - 469 - xrt_matrix_3x3 s; 470 - s.v[0] = rotate_stereo_camera_to_left_camera(0, 0); 471 - s.v[1] = rotate_stereo_camera_to_left_camera(0, 1); 472 - s.v[2] = rotate_stereo_camera_to_left_camera(0, 2); 473 - 474 - s.v[3] = rotate_stereo_camera_to_left_camera(1, 0); 475 - s.v[4] = rotate_stereo_camera_to_left_camera(1, 1); 476 - s.v[5] = rotate_stereo_camera_to_left_camera(1, 2); 477 - 478 - s.v[6] = rotate_stereo_camera_to_left_camera(2, 0); 479 - s.v[7] = rotate_stereo_camera_to_left_camera(2, 1); 480 - s.v[8] = rotate_stereo_camera_to_left_camera(2, 2); 481 - 482 - xrt_quat tmp; 483 - 484 - math_quat_from_matrix_3x3(&s, &tmp); 485 - 486 - // Weird that I have to invert this quat, right? I think at some point - like probably just before this - I must 487 - // have swapped row-major and col-major - remember, if you transpose a rotation matrix, you get its inverse. 488 - // Doesn't matter that I don't understand - non-inverted looks definitely wrong, inverted looks definitely 489 - // right. 490 - math_quat_invert(&tmp, &htd->stereo_camera_to_left_camera); 491 - 492 - #if 0 493 - U_LOG_E("%f %f %f %f", htd->stereo_camera_to_left_camera.w, htd->stereo_camera_to_left_camera.x, 494 - htd->stereo_camera_to_left_camera.y, htd->stereo_camera_to_left_camera.z); 495 - #endif 496 - 497 - return true; 498 - } 499 - 500 - #if 0 501 - static void 502 - getStartupConfig(struct HandTracking *htd, const cJSON *startup_config) 503 - { 504 - const cJSON *palm_detection_type = u_json_get(startup_config, "palm_detection_model"); 505 - const cJSON *keypoint_estimation_type = u_json_get(startup_config, "keypoint_estimation_model"); 506 - const cJSON *uvc_wire_format = u_json_get(startup_config, "uvc_wire_format"); 507 - 508 - // IsString does its own null-checking 509 - if (cJSON_IsString(palm_detection_type)) { 510 - bool is_collabora = (strcmp(cJSON_GetStringValue(palm_detection_type), "collabora") == 0); 511 - bool is_mediapipe = (strcmp(cJSON_GetStringValue(palm_detection_type), "mediapipe") == 0); 512 - if (!is_collabora && !is_mediapipe) { 513 - HT_WARN(htd, "Unknown palm detection type %s - should be \"collabora\" or \"mediapipe\"", 514 - cJSON_GetStringValue(palm_detection_type)); 515 - } 516 - htd->startup_config.palm_detection_use_mediapipe = is_mediapipe; 517 - } 518 - 519 - if (cJSON_IsString(keypoint_estimation_type)) { 520 - bool is_collabora = (strcmp(cJSON_GetStringValue(keypoint_estimation_type), "collabora") == 0); 521 - bool is_mediapipe = (strcmp(cJSON_GetStringValue(keypoint_estimation_type), "mediapipe") == 0); 522 - if (!is_collabora && !is_mediapipe) { 523 - HT_WARN(htd, "Unknown keypoint estimation type %s - should be \"collabora\" or \"mediapipe\"", 524 - cJSON_GetStringValue(keypoint_estimation_type)); 525 - } 526 - htd->startup_config.keypoint_estimation_use_mediapipe = is_mediapipe; 527 - } 528 - 529 - if (cJSON_IsString(uvc_wire_format)) { 530 - bool is_yuv = (strcmp(cJSON_GetStringValue(uvc_wire_format), "yuv") == 0); 531 - bool is_mjpeg = (strcmp(cJSON_GetStringValue(uvc_wire_format), "mjpeg") == 0); 532 - if (!is_yuv && !is_mjpeg) { 533 - HT_WARN(htd, "Unknown wire format type %s - should be \"yuv\" or \"mjpeg\"", 534 - cJSON_GetStringValue(uvc_wire_format)); 535 - } 536 - if (is_yuv) { 537 - HT_DEBUG(htd, "Using YUYV422!"); 538 - htd->startup_config.desired_format = XRT_FORMAT_YUYV422; 539 - } else { 540 - HT_DEBUG(htd, "Using MJPEG!"); 541 - htd->startup_config.desired_format = XRT_FORMAT_MJPEG; 542 - } 543 - } 544 - } 545 - 546 - static void 547 - getUserConfig(struct HandTracking *htd) 548 - { 549 - // The game here is to avoid bugs + be paranoid, not to be fast. If you see something that seems "slow" - don't 550 - // fix it. Any of the tracking code is way stickier than this could ever be. 551 - 552 - struct u_config_json config_json = {}; 553 - 554 - u_config_json_open_or_create_main_file(&config_json); 555 - if (!config_json.file_loaded) { 556 - return; 557 - } 558 - 559 - cJSON *ht_config_json = cJSON_GetObjectItemCaseSensitive(config_json.root, "config_ht"); 560 - if (ht_config_json == NULL) { 561 - return; 562 - } 563 - 564 - // Don't get it twisted: initializing these to NULL is not cargo-culting. 565 - // Uninitialized values on the stack aren't guaranteed to be 0, so these could end up pointing to what we 566 - // *think* is a valid address but what is *not* one. 567 - char *startup_config_string = NULL; 568 - char *dynamic_config_string = NULL; 569 - 570 - { 571 - const cJSON *startup_config_string_json = u_json_get(ht_config_json, "startup_config_index"); 572 - if (cJSON_IsString(startup_config_string_json)) { 573 - startup_config_string = cJSON_GetStringValue(startup_config_string_json); 574 - } 575 - 576 - const cJSON *dynamic_config_string_json = u_json_get(ht_config_json, "dynamic_config_index"); 577 - if (cJSON_IsString(dynamic_config_string_json)) { 578 - dynamic_config_string = cJSON_GetStringValue(dynamic_config_string_json); 579 - } 580 - } 581 - 582 - if (startup_config_string != NULL) { 583 - const cJSON *startup_config_obj = 584 - u_json_get(u_json_get(ht_config_json, "startup_configs"), startup_config_string); 585 - getStartupConfig(htd, startup_config_obj); 586 - } 587 - 588 - if (dynamic_config_string != NULL) { 589 - const cJSON *dynamic_config_obj = 590 - u_json_get(u_json_get(ht_config_json, "dynamic_configs"), dynamic_config_string); 591 - { 592 - ht_dynamic_config *hdc = &htd->dynamic_config; 593 - // Do the thing 594 - u_json_get_string_into_array(u_json_get(dynamic_config_obj, "name"), hdc->name, 64); 595 - 596 - u_json_get_float(u_json_get(dynamic_config_obj, "hand_fc_min"), &hdc->hand_fc_min.val); 597 - u_json_get_float(u_json_get(dynamic_config_obj, "hand_fc_min_d"), &hdc->hand_fc_min_d.val); 598 - u_json_get_float(u_json_get(dynamic_config_obj, "hand_beta"), &hdc->hand_beta.val); 599 - 600 - u_json_get_float(u_json_get(dynamic_config_obj, "nms_iou"), &hdc->nms_iou.val); 601 - u_json_get_float(u_json_get(dynamic_config_obj, "nms_threshold"), &hdc->nms_threshold.val); 602 - 603 - u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_nms_detections"), 604 - &hdc->scribble_nms_detections); 605 - u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_raw_detections"), 606 - &hdc->scribble_raw_detections); 607 - u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_2d_keypoints"), 608 - &hdc->scribble_2d_keypoints); 609 - u_json_get_bool(u_json_get(dynamic_config_obj, "scribble_bounding_box"), 610 - &hdc->scribble_bounding_box); 611 - 612 - char *dco_str = cJSON_Print(dynamic_config_obj); 613 - U_LOG_D("Config %s %s", dynamic_config_string, dco_str); 614 - free(dco_str); 615 - } 616 - } 617 - 618 - 619 - 620 - cJSON_Delete(config_json.root); 621 - return; 622 - } 623 - #endif 624 - 625 - static void 626 - userConfigSetDefaults(struct HandTracking *htd) 627 - { 628 - // Admit defeat: for now, Mediapipe's are still better than ours. 629 - htd->startup_config.palm_detection_use_mediapipe = true; 630 - htd->startup_config.keypoint_estimation_use_mediapipe = true; 631 - 632 - // Make sure you build DebugOptimized! 633 - htd->startup_config.desired_format = XRT_FORMAT_YUYV422; 634 - 635 - 636 - ht_dynamic_config *hdc = &htd->dynamic_config; 637 - 638 - hdc->scribble_nms_detections = true; 639 - hdc->scribble_raw_detections = false; 640 - hdc->scribble_2d_keypoints = true; 641 - hdc->scribble_bounding_box = false; 642 - 643 - hdc->hand_fc_min.min = 0.0f; 644 - hdc->hand_fc_min.max = 50.0f; 645 - hdc->hand_fc_min.step = 0.05f; 646 - hdc->hand_fc_min.val = FCMIN_HAND; 647 - 648 - hdc->hand_fc_min_d.min = 0.0f; 649 - hdc->hand_fc_min_d.max = 50.0f; 650 - hdc->hand_fc_min_d.step = 0.05f; 651 - hdc->hand_fc_min_d.val = FCMIN_D_HAND; 652 - 653 - 654 - hdc->hand_beta.min = 0.0f; 655 - hdc->hand_beta.max = 50.0f; 656 - hdc->hand_beta.step = 0.05f; 657 - hdc->hand_beta.val = BETA_HAND; 658 - 659 - hdc->max_vel.min = 0.0f; 660 - hdc->max_vel.max = 50.0f; 661 - hdc->max_vel.step = 0.05f; 662 - hdc->max_vel.val = 30.0f; // 30 m/s; about 108 kph. If your hand is going this fast, our tracking failing is the 663 - // least of your problems. 664 - 665 - hdc->max_acc.min = 0.0f; 666 - hdc->max_acc.max = 100.0f; 667 - hdc->max_acc.step = 0.1f; 668 - hdc->max_acc.val = 100.0f; // 100 m/s^2; about 10 Gs. Ditto. 669 - 670 - hdc->nms_iou.min = 0.0f; 671 - hdc->nms_iou.max = 1.0f; 672 - hdc->nms_iou.step = 0.01f; 673 - 674 - 675 - hdc->nms_threshold.min = 0.0f; 676 - hdc->nms_threshold.max = 1.0f; 677 - hdc->nms_threshold.step = 0.01f; 678 - 679 - hdc->new_detection_threshold.min = 0.0f; 680 - hdc->new_detection_threshold.max = 1.0f; 681 - hdc->new_detection_threshold.step = 0.01f; 682 - 683 - 684 - hdc->nms_iou.val = 0.05f; 685 - hdc->nms_threshold.val = 0.3f; 686 - hdc->new_detection_threshold.val = 0.6f; 687 - } 688 - 689 - 690 - static void 691 - getModelsFolder(struct HandTracking *htd) 692 - { 693 - // Please bikeshed me on this! I don't know where is the best place to put this stuff! 694 - #if 0 695 - char exec_location[1024] = {}; 696 - readlink("/proc/self/exe", exec_location, 1024); 697 - 698 - HT_DEBUG(htd, "Exec at %s\n", exec_location); 699 - 700 - int end = 0; 701 - while (exec_location[end] != '\0') { 702 - HT_DEBUG(htd, "%d", end); 703 - end++; 704 - } 705 - 706 - while (exec_location[end] != '/' && end != 0) { 707 - HT_DEBUG(htd, "%d %c", end, exec_location[end]); 708 - exec_location[end] = '\0'; 709 - end--; 710 - } 711 - 712 - strcat(exec_location, "../share/monado/hand-tracking-models/"); 713 - strcpy(htd->startup_config.model_slug, exec_location); 714 - #else 715 - const char *xdg_home = getenv("XDG_CONFIG_HOME"); 716 - const char *home = getenv("HOME"); 717 - if (xdg_home != NULL) { 718 - strcpy(htd->startup_config.model_slug, xdg_home); 719 - } else if (home != NULL) { 720 - strcpy(htd->startup_config.model_slug, home); 721 - } else { 722 - assert(false); 723 - } 724 - strcat(htd->startup_config.model_slug, "/.local/share/monado/hand-tracking-models/"); 725 - #endif 726 - } 727 - 728 - 729 - 730 - static void 731 - htExitFrame(struct HandTracking *htd, 732 - bool err, 733 - struct xrt_hand_joint_set final_hands_ordered_by_handedness[2], 734 - uint64_t timestamp, 735 - struct xrt_hand_joint_set *out_left, 736 - struct xrt_hand_joint_set *out_right, 737 - uint64_t *out_timestamp_ns) 738 - { 739 - 740 - os_mutex_lock(&htd->openxr_hand_data_mediator); 741 - *out_timestamp_ns = timestamp; 742 - 743 - if (err) { 744 - out_left->is_active = false; 745 - out_right->is_active = false; 746 - } else { 747 - *out_left = final_hands_ordered_by_handedness[0]; 748 - *out_right = final_hands_ordered_by_handedness[1]; 749 - 750 - 751 - HT_DEBUG(htd, "Adding ts %zu", htd->hands_for_openxr_timestamp); 752 - } 753 - os_mutex_unlock(&htd->openxr_hand_data_mediator); 754 - #ifdef EXPERIMENTAL_DATASET_RECORDING 755 - if (htd->tracking_should_record_dataset) { 756 - // Add nothing-entry to json file. 757 - jsonMaybeAddSomeHands(htd, err); 758 - htd->gst.current_index++; 759 - } 760 - #endif 761 - } 762 - 763 - /* 764 - * 765 - * Member functions. 766 - * 767 - */ 768 - 769 - HandTracking::HandTracking() 770 - { 771 - this->base.process = &HandTracking::cCallbackProcess; 772 - this->base.destroy = &HandTracking::cCallbackDestroy; 773 - } 774 - 775 - HandTracking::~HandTracking() 776 - { 777 - // 778 - } 779 - 780 - //!@todo vVERY BAD 781 - static void 782 - combine_frames_r8g8b8_hack(struct xrt_frame *l, struct xrt_frame *r, struct xrt_frame *f) 783 - { 784 - // SINK_TRACE_MARKER(); 785 - 786 - uint32_t height = l->height; 787 - 788 - for (uint32_t y = 0; y < height; y++) { 789 - uint8_t *dst = f->data + f->stride * y; 790 - uint8_t *src = l->data + l->stride * y; 791 - 792 - for (uint32_t x = 0; x < l->width * 3; x++) { 793 - *dst++ = *src++; 794 - } 795 - 796 - dst = f->data + f->stride * y + l->width * 3; 797 - src = r->data + r->stride * y; 798 - for (uint32_t x = 0; x < r->width * 3; x++) { 799 - *dst++ = *src++; 800 - } 801 - } 802 - } 803 - 804 - void 805 - HandTracking::cCallbackProcess(struct t_hand_tracking_sync *ht_sync, 806 - struct xrt_frame *left_frame, 807 - struct xrt_frame *right_frame, 808 - struct xrt_hand_joint_set *out_left_hand, 809 - struct xrt_hand_joint_set *out_right_hand, 810 - uint64_t *out_timestamp_ns) 811 - { 812 - XRT_TRACE_MARKER(); 813 - 814 - HandTracking *htd = (struct HandTracking *)ht_sync; 815 - 816 - // U_LOG_E("htd is at %p", htd); 817 - 818 - htd->current_frame_timestamp = left_frame->timestamp; 819 - 820 - int64_t start, end; 821 - start = os_monotonic_get_ns(); 822 - 823 - 824 - /* 825 - * Setup views. 826 - */ 827 - 828 - assert(left_frame->width == right_frame->width); 829 - assert(left_frame->height == right_frame->height); 830 - 831 - const int full_height = left_frame->height; 832 - const int full_width = left_frame->width * 2; 833 - 834 - const int view_width = htd->one_view_size_px.w; 835 - const int view_height = htd->one_view_size_px.h; 836 - 837 - assert(full_height == view_height); 838 - 839 - const cv::Size full_size = cv::Size(full_width, full_height); 840 - const cv::Size view_size = cv::Size(view_width, view_height); 841 - const cv::Point view_offsets[2] = {cv::Point(0, 0), cv::Point(view_width, 0)}; 842 - 843 - // cv::Mat full_frame(full_size, CV_8UC3, htd->frame_for_process->data, htd->frame_for_process->stride); 844 - htd->views[0].run_model_on_this = cv::Mat(view_size, CV_8UC3, left_frame->data, left_frame->stride); 845 - htd->views[1].run_model_on_this = cv::Mat(view_size, CV_8UC3, right_frame->data, right_frame->stride); 846 - 847 - 848 - // Convenience 849 - uint64_t timestamp = left_frame->timestamp; 850 - 851 - htd->debug_scribble = u_sink_debug_is_active(&htd->debug_sink); 852 - 853 - cv::Mat debug_output = {}; 854 - xrt_frame *debug_frame = nullptr; 855 - 856 - 857 - if (htd->debug_scribble) { 858 - u_frame_create_one_off(XRT_FORMAT_R8G8B8, full_width, full_height, &debug_frame); 859 - combine_frames_r8g8b8_hack(left_frame, right_frame, debug_frame); 860 - 861 - debug_output = cv::Mat(full_size, CV_8UC3, debug_frame->data, debug_frame->stride); 862 - htd->views[0].debug_out_to_this = debug_output(cv::Rect(view_offsets[0], view_size)); 863 - htd->views[1].debug_out_to_this = debug_output(cv::Rect(view_offsets[1], view_size)); 864 - } 865 - 866 - 867 - /* 868 - * Do the hand tracking! 869 - */ 870 - 871 - std::future<std::vector<Hand2D>> future_left = 872 - std::async(std::launch::async, htImageToKeypoints, &htd->views[0]); 873 - std::future<std::vector<Hand2D>> future_right = 874 - std::async(std::launch::async, htImageToKeypoints, &htd->views[1]); 875 - std::vector<Hand2D> hands_in_left_view = future_left.get(); 876 - std::vector<Hand2D> hands_in_right_view = future_right.get(); 877 - 878 - end = os_monotonic_get_ns(); 879 - 880 - 881 - this_frame = os_monotonic_get_ns(); 882 - 883 - double time_ms = (double)(end - start) / (double)U_TIME_1MS_IN_NS; 884 - double _1_time = 1 / (time_ms * 0.001); 885 - 886 - char t[64]; 887 - char t2[64]; 888 - sprintf(t, "% 8.2f ms", time_ms); 889 - sprintf(t2, "% 8.2f fps", _1_time); 890 - last_frame = this_frame; 891 - 892 - 893 - if (htd->debug_scribble) { 894 - cv::putText(debug_output, t, cv::Point(30, 60), cv::FONT_HERSHEY_SIMPLEX, 1.0f, cv::Scalar(0, 255, 0), 895 - 4); 896 - cv::putText(debug_output, t2, cv::Point(30, 100), cv::FONT_HERSHEY_SIMPLEX, 1.0f, cv::Scalar(0, 255, 0), 897 - 4); 898 - } else { 899 - HT_DEBUG(htd, "%s", t); 900 - HT_DEBUG(htd, "%s", t2); 901 - } 902 - 903 - 904 - 905 - if (htd->debug_scribble) { 906 - u_sink_debug_push_frame(&htd->debug_sink, debug_frame); 907 - xrt_frame_reference(&debug_frame, NULL); 908 - } 909 - 910 - // Bail early this frame if no hands were detected. 911 - // In the long run, this'll be a silly thing - we shouldn't always take the detection model's word for it 912 - // especially when part of the pipeline is an arbitrary confidence threshold. 913 - if (hands_in_left_view.size() == 0 || hands_in_right_view.size() == 0) { 914 - htExitFrame(htd, true, NULL, timestamp, out_left_hand, out_right_hand, out_timestamp_ns); 915 - return; 916 - } 917 - 918 - std::vector<Hand3D> possible_3d_hands; 919 - 920 - // for every possible combination of hands in left view and hands in right view, 921 - for (size_t idx_l = 0; idx_l < hands_in_left_view.size(); idx_l++) { 922 - for (size_t idx_r = 0; idx_r < hands_in_right_view.size(); idx_r++) { 923 - Hand3D cur_hand = {}; 924 - 925 - Hand2D &left_2d = hands_in_left_view[idx_l]; 926 - Hand2D &right_2d = hands_in_right_view[idx_r]; 927 - 928 - // Calculate a 3D hand for this combination 929 - htJointDisparityMath(htd, &hands_in_left_view[idx_l], &hands_in_right_view[idx_r], &cur_hand); 930 - cur_hand.timestamp = timestamp; 931 - cur_hand.rejected_by_smush = false; 932 - 933 - cur_hand.idx_l = idx_l; 934 - cur_hand.idx_r = idx_r; 935 - 936 - // Calculate a y-disparity for this combination 937 - cur_hand.y_disparity_error = errHandDisparity(left_2d, right_2d); 938 - 939 - possible_3d_hands.push_back(cur_hand); 940 - } 941 - } 942 - 943 - HT_DEBUG(htd, "Starting with %zu hands!", possible_3d_hands.size()); 944 - 945 - // For each pair of 3D hands we just made 946 - for (size_t idx_one = 0; idx_one < possible_3d_hands.size(); idx_one++) { 947 - for (size_t idx_two = 0; idx_two < possible_3d_hands.size(); idx_two++) { 948 - if ((idx_one <= idx_two)) { 949 - continue; 950 - } 951 - 952 - // See if this pair is suspiciously close together. 953 - // If it is, then this pairing is wrong - this is what was causing the "hands smushing together" 954 - // issue - we weren't catching these reliably. 955 - float errr = sumOfHandJointDistances(possible_3d_hands[idx_one], possible_3d_hands[idx_two]); 956 - HT_TRACE(htd, "%zu %zu is smush %f", idx_one, idx_two, errr); 957 - if (errr < 0.03f * 21.0f) { 958 - possible_3d_hands[idx_one].rejected_by_smush = true; 959 - possible_3d_hands[idx_two].rejected_by_smush = true; 960 - } 961 - } 962 - } 963 - 964 - std::vector<Hand3D> hands_unfiltered; 965 - 966 - for (Hand3D hand : possible_3d_hands) { 967 - // If none of these are false, then all our heuristics indicate this is a real hand, so we add it to our 968 - // list of real hands. 969 - bool selected = !hand.rejected_by_smush && // 970 - hand.y_disparity_error < 1.0f && // 971 - rejectTooClose(htd, &hand) && // 972 - rejectTooFar(htd, &hand) && // 973 - rejectTinyPalm(htd, &hand); 974 - if (selected) { 975 - HT_TRACE(htd, "Pushing back with y-error %f", hand.y_disparity_error); 976 - hands_unfiltered.push_back(hand); 977 - } 978 - } 979 - 980 - 981 - std::vector<bool> past_hands_taken; 982 - std::vector<bool> present_hands_taken; 983 - 984 - std::vector<size_t> past_indices; 985 - std::vector<size_t> present_indices; 986 - std::vector<float> flow_errors; 987 - 988 - 989 - float max_dist_between_frames = 1.0f; 990 - 991 - naive_sort_permutation_by_error<HandHistory3D, Hand3D>(htd->histories_3d, // past 992 - hands_unfiltered, // present 993 - 994 - 995 - // outputs 996 - past_hands_taken, present_hands_taken, past_indices, 997 - present_indices, flow_errors, errHandHistory, 998 - (max_dist_between_frames * 21.0f) 999 - 1000 - ); 1001 - 1002 - 1003 - for (size_t i = 0; i < past_indices.size(); i++) { 1004 - htd->histories_3d[past_indices[i]].last_hands_unfiltered.push_back( 1005 - hands_unfiltered[present_indices[i]]); 1006 - } 1007 - // The preceding may not do anything, because we'll start out with no hand histories! All the numbers of 1008 - // elements should be zero. 1009 - 1010 - 1011 - for (size_t i = 0; i < present_hands_taken.size(); i++) { 1012 - if (present_hands_taken[i] == false) { 1013 - // if this hand never got assigned to a history 1014 - HandHistory3D history_new; 1015 - history_new.uuid = rand(); // Not a great uuid, huh? Good enough for us, this only has to be 1016 - // unique across say an hour period max. 1017 - handEuroFiltersInit(&history_new, FCMIN_HAND, FCMIN_D_HAND, BETA_HAND); 1018 - history_new.last_hands_unfiltered.push_back(hands_unfiltered[i]); 1019 - // history_new. 1020 - htd->histories_3d.push_back( 1021 - history_new); // Add something to the end - don't initialize any of it. 1022 - } 1023 - } 1024 - 1025 - int bob = 0; 1026 - for (size_t i = 0; i < past_hands_taken.size(); i++) { 1027 - if (past_hands_taken[i] == false) { 1028 - htd->histories_3d.erase(htd->histories_3d.begin() + i + bob); 1029 - bob--; 1030 - } 1031 - } 1032 - 1033 - if (htd->histories_3d.size() == 0) { 1034 - HT_DEBUG(htd, "Bailing"); 1035 - htExitFrame(htd, true, NULL, timestamp, out_left_hand, out_right_hand, out_timestamp_ns); 1036 - return; 1037 - } 1038 - 1039 - size_t num_hands = htd->histories_3d.size(); 1040 - // if (num_hands > 2) { 1041 - HT_DEBUG(htd, "Ending with %zu hands!", 1042 - num_hands); // this is quite bad, but rarely happens. 1043 - // } 1044 - 1045 - // Here, we go back to our bbox_histories and remove the histories for any bounding boxes that never turned into 1046 - // good hands. 1047 - 1048 - // Iterate over all hands we're keeping track of, compute their current handedness. 1049 - std::vector<size_t> valid_2d_idxs[2]; 1050 - 1051 - 1052 - for (size_t i = 0; i < htd->histories_3d.size(); i++) { 1053 - // U_LOG_E("Valid hand %zu l_idx %i r_idx %i", i, htd->histories_3d[i].last_hands[0]->idx_l, 1054 - // htd->histories_3d[i].last_hands[0]->idx_r); 1055 - valid_2d_idxs[0].push_back(htd->histories_3d[i].last_hands_unfiltered.back().idx_l); 1056 - valid_2d_idxs[1].push_back(htd->histories_3d[i].last_hands_unfiltered.back().idx_r); 1057 - handednessHandHistory3D(&htd->histories_3d[i]); 1058 - } 1059 - 1060 - // Almost certainly not the cleanest way of doing this but leave me alone 1061 - // Per camera view 1062 - for (int view = 0; view < 2; view++) { 1063 - // Per entry in bbox_histories 1064 - for (size_t hist_idx = 0; hist_idx < htd->views[view].bbox_histories.size(); hist_idx++) { 1065 - // See if this entry in bbox_histories ever turned into a 3D hand. If not, we notify (in a very 1066 - // silly way) htImageToKeypoints that it should go away because it was an erroneous detection. 1067 - for (size_t valid_idx : valid_2d_idxs[view]) { 1068 - if (valid_idx == hist_idx) { 1069 - htd->views[view].bbox_histories[hist_idx].htAlgorithm_approves = true; 1070 - break; 1071 - } else { 1072 - htd->views[view].bbox_histories[hist_idx].htAlgorithm_approves = false; 1073 - } 1074 - } 1075 - } 1076 - } 1077 - 1078 - // Whoo! Okay, now we have some unfiltered hands in htd->histories_3d[i].last_hands[0]! Euro filter them! 1079 - 1080 - std::vector<Hand3D> filtered_hands(num_hands); 1081 - 1082 - for (size_t hand_index = 0; hand_index < num_hands; hand_index++) { 1083 - handEuroFiltersRun(htd, &htd->histories_3d[hand_index], &filtered_hands[hand_index]); 1084 - htd->histories_3d[hand_index].last_hands_filtered.push_back(filtered_hands[hand_index]); 1085 - applyThumbIndexDrag(&filtered_hands[hand_index]); 1086 - filtered_hands[hand_index].handedness = htd->histories_3d[hand_index].handedness; 1087 - } 1088 - 1089 - std::vector<size_t> xr_indices; 1090 - std::vector<Hand3D *> hands_to_use; 1091 - 1092 - if (filtered_hands.size() == 1) { 1093 - if (filtered_hands[0].handedness < 0) { 1094 - // Left 1095 - xr_indices = {0}; 1096 - hands_to_use = {&filtered_hands[0]}; 1097 - } else { 1098 - xr_indices = {1}; 1099 - hands_to_use = {&filtered_hands[0]}; 1100 - } 1101 - } else { 1102 - // filtered_hands better be two for now. 1103 - if (filtered_hands[0].handedness < filtered_hands[1].handedness) { 1104 - xr_indices = {0, 1}; 1105 - hands_to_use = {&filtered_hands[0], &filtered_hands[1]}; 1106 - } else { 1107 - xr_indices = {1, 0}; 1108 - hands_to_use = {&filtered_hands[0], &filtered_hands[1]}; 1109 - } 1110 - } 1111 - 1112 - struct xrt_hand_joint_set final_hands_ordered_by_handedness[2]; 1113 - memset(&final_hands_ordered_by_handedness[0], 0, sizeof(xrt_hand_joint_set)); 1114 - memset(&final_hands_ordered_by_handedness[1], 0, sizeof(xrt_hand_joint_set)); 1115 - final_hands_ordered_by_handedness[0].is_active = false; 1116 - final_hands_ordered_by_handedness[1].is_active = false; 1117 - 1118 - for (size_t i = 0; (i < xr_indices.size()); i++) { 1119 - Hand3D *hand = hands_to_use[i]; 1120 - 1121 - struct xrt_hand_joint_set *put_in_set = &final_hands_ordered_by_handedness[xr_indices[i]]; 1122 - 1123 - xrt_vec3 wrist = hand->kps[0]; 1124 - 1125 - xrt_vec3 index_prox = hand->kps[5]; 1126 - xrt_vec3 middle_prox = hand->kps[9]; 1127 - xrt_vec3 ring_prox = hand->kps[13]; 1128 - xrt_vec3 pinky_prox = hand->kps[17]; 1129 - 1130 - xrt_vec3 middle_to_index = m_vec3_sub(index_prox, middle_prox); 1131 - xrt_vec3 middle_to_ring = m_vec3_sub(ring_prox, middle_prox); 1132 - xrt_vec3 middle_to_pinky = m_vec3_sub(pinky_prox, middle_prox); 1133 - 1134 - xrt_vec3 three_fourths_down_middle_mcp = 1135 - m_vec3_add(m_vec3_mul_scalar(wrist, 3.0f / 4.0f), m_vec3_mul_scalar(middle_prox, 1.0f / 4.0f)); 1136 - 1137 - xrt_vec3 middle_metacarpal = three_fourths_down_middle_mcp; 1138 - 1139 - float s = 0.6f; 1140 - 1141 - xrt_vec3 index_metacarpal = middle_metacarpal + m_vec3_mul_scalar(middle_to_index, s); 1142 - xrt_vec3 ring_metacarpal = middle_metacarpal + m_vec3_mul_scalar(middle_to_ring, s); 1143 - xrt_vec3 pinky_metacarpal = middle_metacarpal + m_vec3_mul_scalar(middle_to_pinky, s); 1144 - 1145 - float palm_ness = 0.33; 1146 - xrt_vec3 palm = 1147 - m_vec3_add(m_vec3_mul_scalar(wrist, palm_ness), m_vec3_mul_scalar(middle_prox, (1.0f - palm_ness))); 1148 - 1149 - 1150 - 1151 - htProcessJoint(htd, palm, put_in_set, XRT_HAND_JOINT_PALM); 1152 - 1153 - htProcessJoint(htd, hand->kps[0], put_in_set, XRT_HAND_JOINT_WRIST); 1154 - htProcessJoint(htd, hand->kps[1], put_in_set, XRT_HAND_JOINT_THUMB_METACARPAL); 1155 - htProcessJoint(htd, hand->kps[2], put_in_set, XRT_HAND_JOINT_THUMB_PROXIMAL); 1156 - htProcessJoint(htd, hand->kps[3], put_in_set, XRT_HAND_JOINT_THUMB_DISTAL); 1157 - htProcessJoint(htd, hand->kps[4], put_in_set, XRT_HAND_JOINT_THUMB_TIP); 1158 - 1159 - htProcessJoint(htd, index_metacarpal, put_in_set, XRT_HAND_JOINT_INDEX_METACARPAL); 1160 - htProcessJoint(htd, hand->kps[5], put_in_set, XRT_HAND_JOINT_INDEX_PROXIMAL); 1161 - htProcessJoint(htd, hand->kps[6], put_in_set, XRT_HAND_JOINT_INDEX_INTERMEDIATE); 1162 - htProcessJoint(htd, hand->kps[7], put_in_set, XRT_HAND_JOINT_INDEX_DISTAL); 1163 - htProcessJoint(htd, hand->kps[8], put_in_set, XRT_HAND_JOINT_INDEX_TIP); 1164 - 1165 - htProcessJoint(htd, middle_metacarpal, put_in_set, XRT_HAND_JOINT_MIDDLE_METACARPAL); 1166 - htProcessJoint(htd, hand->kps[9], put_in_set, XRT_HAND_JOINT_MIDDLE_PROXIMAL); 1167 - htProcessJoint(htd, hand->kps[10], put_in_set, XRT_HAND_JOINT_MIDDLE_INTERMEDIATE); 1168 - htProcessJoint(htd, hand->kps[11], put_in_set, XRT_HAND_JOINT_MIDDLE_DISTAL); 1169 - htProcessJoint(htd, hand->kps[12], put_in_set, XRT_HAND_JOINT_MIDDLE_TIP); 1170 - 1171 - htProcessJoint(htd, ring_metacarpal, put_in_set, XRT_HAND_JOINT_RING_METACARPAL); 1172 - htProcessJoint(htd, hand->kps[13], put_in_set, XRT_HAND_JOINT_RING_PROXIMAL); 1173 - htProcessJoint(htd, hand->kps[14], put_in_set, XRT_HAND_JOINT_RING_INTERMEDIATE); 1174 - htProcessJoint(htd, hand->kps[15], put_in_set, XRT_HAND_JOINT_RING_DISTAL); 1175 - htProcessJoint(htd, hand->kps[16], put_in_set, XRT_HAND_JOINT_RING_TIP); 1176 - 1177 - htProcessJoint(htd, pinky_metacarpal, put_in_set, XRT_HAND_JOINT_LITTLE_METACARPAL); 1178 - htProcessJoint(htd, hand->kps[17], put_in_set, XRT_HAND_JOINT_LITTLE_PROXIMAL); 1179 - htProcessJoint(htd, hand->kps[18], put_in_set, XRT_HAND_JOINT_LITTLE_INTERMEDIATE); 1180 - htProcessJoint(htd, hand->kps[19], put_in_set, XRT_HAND_JOINT_LITTLE_DISTAL); 1181 - htProcessJoint(htd, hand->kps[20], put_in_set, XRT_HAND_JOINT_LITTLE_TIP); 1182 - 1183 - put_in_set->is_active = true; 1184 - math_pose_identity(&put_in_set->hand_pose.pose); 1185 - 1186 - 1187 - put_in_set->hand_pose.pose.orientation = htd->stereo_camera_to_left_camera; 1188 - 1189 - put_in_set->hand_pose.relation_flags = valid_flags_ht; 1190 - 1191 - u_hand_joints_apply_joint_width(put_in_set); 1192 - applyJointOrientations(put_in_set, xr_indices[i]); 1193 - } 1194 - htExitFrame(htd, false, final_hands_ordered_by_handedness, filtered_hands[0].timestamp, out_left_hand, 1195 - out_right_hand, out_timestamp_ns); 1196 - } 1197 - 1198 - void 1199 - HandTracking::cCallbackDestroy(t_hand_tracking_sync *ht_sync) 1200 - { 1201 - auto ht_ptr = &HandTracking::fromC(ht_sync); 1202 - 1203 - u_sink_debug_destroy(&ht_ptr->debug_sink); 1204 - 1205 - delete ht_ptr->views[0].htm; 1206 - delete ht_ptr->views[1].htm; 1207 - delete ht_ptr; 1208 - } 1209 - 1210 - 1211 - /* 1212 - * 1213 - * 'Exported' functions. 1214 - * 1215 - */ 1216 - 1217 - extern "C" t_hand_tracking_sync * 1218 - t_hand_tracking_sync_old_rgb_create(struct t_stereo_camera_calibration *calib) 1219 - { 1220 - XRT_TRACE_MARKER(); 1221 - 1222 - auto htd = new HandTracking(); 1223 - 1224 - U_LOG_E("htd is at %p", (void *)htd); 1225 - 1226 - // Setup logging first. We like logging. 1227 - htd->log_level = debug_get_log_option_ht_log(); 1228 - 1229 - /* 1230 - * Get configuration 1231 - */ 1232 - 1233 - u_sink_debug_init(&htd->debug_sink); 1234 - assert(calib != NULL); 1235 - getCalibration(htd, calib); 1236 - // Set defaults - most people won't have a config json and it won't get past here. 1237 - userConfigSetDefaults(htd); 1238 - getModelsFolder(htd); 1239 - 1240 - 1241 - htd->views[0].htd = htd; 1242 - htd->views[1].htd = htd; // :) 1243 - 1244 - htd->views[0].htm = new ht_model(htd); 1245 - htd->views[1].htm = new ht_model(htd); 1246 - 1247 - htd->views[0].view = 0; 1248 - htd->views[1].view = 1; 1249 - 1250 - u_var_add_root(htd, "Camera-based Hand Tracker", true); 1251 - 1252 - u_var_add_draggable_f32(htd, &htd->dynamic_config.hand_fc_min, "hand_fc_min"); 1253 - u_var_add_draggable_f32(htd, &htd->dynamic_config.hand_fc_min_d, "hand_fc_min_d"); 1254 - u_var_add_draggable_f32(htd, &htd->dynamic_config.hand_beta, "hand_beta"); 1255 - u_var_add_draggable_f32(htd, &htd->dynamic_config.nms_iou, "nms_iou"); 1256 - u_var_add_draggable_f32(htd, &htd->dynamic_config.nms_threshold, "nms_threshold"); 1257 - u_var_add_draggable_f32(htd, &htd->dynamic_config.new_detection_threshold, "new_detection_threshold"); 1258 - 1259 - u_var_add_bool(htd, &htd->dynamic_config.scribble_raw_detections, "Scribble raw detections"); 1260 - u_var_add_bool(htd, &htd->dynamic_config.scribble_nms_detections, "Scribble NMS detections"); 1261 - u_var_add_bool(htd, &htd->dynamic_config.scribble_2d_keypoints, "Scribble 2D keypoints"); 1262 - u_var_add_bool(htd, &htd->dynamic_config.scribble_bounding_box, "Scribble bounding box"); 1263 - 1264 - u_var_add_sink_debug(htd, &htd->debug_sink, "i"); 1265 - 1266 - 1267 - HT_DEBUG(htd, "Hand Tracker initialized!"); 1268 - 1269 - 1270 - return &htd->base; 1271 - }

-305

src/xrt/tracking/hand/old_rgb/rgb_sync.hpp

··· 1 - // Copyright 2022, Collabora, Ltd. 2 - // SPDX-License-Identifier: BSL-1.0 3 - /*! 4 - * @file 5 - * @brief Old RGB hand tracking header. 6 - * @author Jakob Bornecrantz <jakob@collabora.com> 7 - * @author Moses Turner <moses@collabora.com> 8 - * @ingroup tracking 9 - */ 10 - 11 - #pragma once 12 - 13 - #include "tracking/t_hand_tracking.h" 14 - 15 - #include "os/os_threading.h" 16 - 17 - #include "xrt/xrt_device.h" 18 - #include "xrt/xrt_prober.h" 19 - #include "xrt/xrt_frame.h" 20 - #include "xrt/xrt_frameserver.h" 21 - 22 - #include "math/m_api.h" 23 - #include "math/m_vec3.h" 24 - #include "math/m_filter_one_euro.h" 25 - 26 - #include "util/u_var.h" 27 - #include "util/u_json.h" 28 - #include "util/u_sink.h" 29 - #include "util/u_debug.h" 30 - #include "util/u_device.h" 31 - 32 - #include "util/u_template_historybuf.hpp" 33 - 34 - #include <opencv2/opencv.hpp> 35 - 36 - #include <vector> 37 - namespace xrt::tracking::hand::old_rgb { 38 - 39 - using namespace xrt::auxiliary::util; 40 - 41 - #define HT_TRACE(htd, ...) U_LOG_IFL_T(htd->log_level, __VA_ARGS__) 42 - #define HT_DEBUG(htd, ...) U_LOG_IFL_D(htd->log_level, __VA_ARGS__) 43 - #define HT_INFO(htd, ...) U_LOG_IFL_I(htd->log_level, __VA_ARGS__) 44 - #define HT_WARN(htd, ...) U_LOG_IFL_W(htd->log_level, __VA_ARGS__) 45 - #define HT_ERROR(htd, ...) U_LOG_IFL_E(htd->log_level, __VA_ARGS__) 46 - 47 - #undef EXPERIMENTAL_DATASET_RECORDING 48 - 49 - #define FCMIN_BBOX_ORIENTATION 3.0f 50 - #define FCMIN_D_BB0X_ORIENTATION 10.0f 51 - #define BETA_BB0X_ORIENTATION 0.0f 52 - 53 - #define FCMIN_BBOX_POSITION 30.0f 54 - #define FCMIN_D_BB0X_POSITION 25.0f 55 - #define BETA_BB0X_POSITION 0.01f 56 - 57 - 58 - 59 - #define FCMIN_HAND 4.0f 60 - #define FCMIN_D_HAND 12.0f 61 - #define BETA_HAND 0.0083f 62 - 63 - class ht_model; 64 - 65 - enum HandJoint7Keypoint 66 - { 67 - WRIST_7KP = 0, 68 - INDEX_7KP = 1, 69 - MIDDLE_7KP = 2, 70 - RING_7KP = 3, 71 - LITTLE_7KP = 4, 72 - THUMB_METACARPAL_7KP = 5, 73 - THMB_PROXIMAL_7KP = 6 74 - }; 75 - 76 - enum HandJoint21Keypoint 77 - { 78 - WRIST = 0, 79 - 80 - THMB_MCP = 1, 81 - THMB_PXM = 2, 82 - THMB_DST = 3, 83 - THMB_TIP = 4, 84 - 85 - INDX_PXM = 5, 86 - INDX_INT = 6, 87 - INDX_DST = 7, 88 - INDX_TIP = 8, 89 - 90 - MIDL_PXM = 9, 91 - MIDL_INT = 10, 92 - MIDL_DST = 11, 93 - MIDL_TIP = 12, 94 - 95 - RING_PXM = 13, 96 - RING_INT = 14, 97 - RING_DST = 15, 98 - RING_TIP = 16, 99 - 100 - LITL_PXM = 17, 101 - LITL_INT = 18, 102 - LITL_DST = 19, 103 - LITL_TIP = 20 104 - }; 105 - 106 - struct Palm7KP 107 - { 108 - struct xrt_vec2 kps[7]; 109 - float confidence; // between 0 and 1 110 - }; 111 - 112 - struct DetectionModelOutput 113 - { 114 - float rotation; 115 - float size; 116 - xrt_vec2 center; 117 - Palm7KP palm; 118 - 119 - cv::Matx23f warp_there; 120 - cv::Matx23f warp_back; 121 - }; 122 - 123 - // To keep you on your toes. *Don't* think the 2D hand is the same as the 3D! 124 - struct Hand2D 125 - { 126 - struct xrt_vec3 kps[21]; 127 - // Third value is depth from ML model. Do not believe the depth. 128 - }; 129 - 130 - struct Hand3D 131 - { 132 - struct xrt_vec3 kps[21]; 133 - float y_disparity_error; 134 - float flow_error; 135 - int idx_l; 136 - int idx_r; 137 - bool rejected_by_smush; // init to false. 138 - 139 - float handedness; 140 - uint64_t timestamp; 141 - }; 142 - 143 - 144 - struct HandHistory3D 145 - { 146 - // Index 0 is current frame, index 1 is last frame, index 2 is second to last frame. 147 - // No particular reason to keep the last 5 frames. we only really only use the current and last one. 148 - float handedness; 149 - bool have_prev_hand = false; 150 - double prev_dy; 151 - uint64_t prev_ts_for_alpha; // also in last_hands_unfiltered.back() but go away. 152 - 153 - uint64_t first_ts; 154 - uint64_t prev_filtered_ts; 155 - 156 - HistoryBuffer<Hand3D, 10> last_hands_unfiltered; 157 - HistoryBuffer<Hand3D, 10> last_hands_filtered; 158 - 159 - // Euro filter for 21kps. 160 - m_filter_euro_vec3 filters[21]; 161 - int uuid; 162 - }; 163 - 164 - struct HandHistory2DBBox 165 - { 166 - m_filter_euro_vec2 m_filter_center; 167 - m_filter_euro_vec2 m_filter_direction; 168 - 169 - HistoryBuffer<xrt_vec2, 50> wrist_unfiltered; 170 - HistoryBuffer<xrt_vec2, 50> index_unfiltered; 171 - HistoryBuffer<xrt_vec2, 50> middle_unfiltered; 172 - HistoryBuffer<xrt_vec2, 50> pinky_unfiltered; 173 - bool htAlgorithm_approves = false; 174 - }; 175 - 176 - // Forward declaration for ht_view 177 - struct HandTracking; 178 - 179 - struct ht_view 180 - { 181 - HandTracking *htd; 182 - ht_model *htm; 183 - int view; 184 - 185 - cv::Matx<double, 4, 1> distortion; 186 - cv::Matx<double, 3, 3> cameraMatrix; 187 - cv::Matx33d rotate_camera_to_stereo_camera; // R1 or R2 188 - 189 - cv::Mat run_model_on_this; 190 - cv::Mat debug_out_to_this; 191 - 192 - std::vector<HandHistory2DBBox> bbox_histories; 193 - }; 194 - 195 - struct ht_dynamic_config 196 - { 197 - char name[64]; 198 - struct u_var_draggable_f32 hand_fc_min; 199 - struct u_var_draggable_f32 hand_fc_min_d; 200 - struct u_var_draggable_f32 hand_beta; 201 - struct u_var_draggable_f32 max_vel; 202 - struct u_var_draggable_f32 max_acc; 203 - struct u_var_draggable_f32 nms_iou; 204 - struct u_var_draggable_f32 nms_threshold; 205 - struct u_var_draggable_f32 new_detection_threshold; 206 - bool scribble_raw_detections; 207 - bool scribble_nms_detections; 208 - bool scribble_2d_keypoints; 209 - bool scribble_bounding_box; 210 - }; 211 - 212 - struct ht_startup_config 213 - { 214 - bool palm_detection_use_mediapipe = false; 215 - bool keypoint_estimation_use_mediapipe = false; 216 - enum xrt_format desired_format; 217 - char model_slug[1024]; 218 - }; 219 - 220 - /*! 221 - * Main class of old style RGB hand tracking. 222 - * 223 - * @ingroup aux_tracking 224 - */ 225 - struct HandTracking 226 - { 227 - public: 228 - // Base thing, has to be first. 229 - t_hand_tracking_sync base = {}; 230 - 231 - struct u_sink_debug debug_sink = {}; 232 - 233 - struct xrt_size one_view_size_px = {}; 234 - 235 - #if defined(EXPERIMENTAL_DATASET_RECORDING) 236 - struct 237 - { 238 - struct u_var_button start_json_record = {}; 239 - } gui = {}; 240 - 241 - struct 242 - { 243 - struct gstreamer_pipeline *gp = nullptr; 244 - struct gstreamer_sink *gs = nullptr; 245 - struct xrt_frame_sink *sink = nullptr; 246 - struct xrt_frame_context xfctx = {}; 247 - uint64_t offset_ns = {}; 248 - uint64_t last_frame_ns = {}; 249 - uint64_t current_index = {}; 250 - 251 - cJSON *output_root = nullptr; 252 - cJSON *output_array = nullptr; 253 - } gst = {}; 254 - #endif 255 - 256 - struct ht_view views[2] = {}; 257 - 258 - float baseline = {}; 259 - struct xrt_quat stereo_camera_to_left_camera = {}; 260 - 261 - uint64_t current_frame_timestamp = {}; 262 - 263 - std::vector<HandHistory3D> histories_3d = {}; 264 - 265 - struct os_mutex openxr_hand_data_mediator = {}; 266 - struct xrt_hand_joint_set hands_for_openxr[2] = {}; 267 - uint64_t hands_for_openxr_timestamp = {}; 268 - 269 - // Only change these when you have unlocked_between_frames, ie. when the hand tracker is between frames. 270 - bool tracking_should_die = {}; 271 - bool tracking_should_record_dataset = {}; 272 - struct os_mutex unlocked_between_frames = {}; 273 - 274 - // Change this whenever you want 275 - volatile bool debug_scribble = true; 276 - 277 - struct ht_startup_config startup_config = {}; 278 - struct ht_dynamic_config dynamic_config = {}; 279 - 280 - enum u_logging_level log_level = U_LOGGING_INFO; 281 - 282 - public: 283 - explicit HandTracking(); 284 - ~HandTracking(); 285 - 286 - static inline HandTracking & 287 - fromC(t_hand_tracking_sync *ht_sync) 288 - { 289 - return *reinterpret_cast<HandTracking *>(ht_sync); 290 - } 291 - 292 - static void 293 - cCallbackProcess(struct t_hand_tracking_sync *ht_sync, 294 - struct xrt_frame *left_frame, 295 - struct xrt_frame *right_frame, 296 - struct xrt_hand_joint_set *out_left_hand, 297 - struct xrt_hand_joint_set *out_right_hand, 298 - uint64_t *out_timestamp_ns); 299 - 300 - static void 301 - cCallbackDestroy(t_hand_tracking_sync *ht_sync); 302 - }; 303 - 304 - 305 - } // namespace xrt::tracking::hand::old_rgb

-91

src/xrt/tracking/hand/old_rgb/templates/NaivePermutationSort.hpp

··· 1 - // Copyright 2021, Collabora, Ltd. 2 - // SPDX-License-Identifier: BSL-1.0 3 - /*! 4 - * @file 5 - * @brief Camera based hand tracking sorting implementation. 6 - * @author Moses Turner <moses@collabora.com> 7 - * @ingroup drv_ht 8 - */ 9 - 10 - #pragma once 11 - 12 - #include <math.h> 13 - #include <vector> 14 - #include <algorithm> 15 - #include <iostream> 16 - // Other thing: sort by speed? like, if our thing must have suddenly changed directions, add to error? 17 - // Easy enough to do using more complicated structs. 18 - // Like a past thing with position, velocity and timestamp - present thing with position and timestamp. 19 - 20 - // typedef bool booool; 21 - 22 - struct psort_atom_t 23 - { 24 - size_t idx_1; 25 - size_t idx_2; 26 - float err; 27 - }; 28 - 29 - 30 - bool 31 - comp_err(psort_atom_t one, psort_atom_t two) 32 - { 33 - return (one.err < two.err); 34 - } 35 - 36 - 37 - template <typename Tp_1, typename Tp_2> 38 - void 39 - naive_sort_permutation_by_error( 40 - // Inputs - shall be initialized with real data before calling. This function shall not modify them in any way. 41 - std::vector<Tp_1> &in_1, 42 - std::vector<Tp_2> &in_2, 43 - 44 - // Outputs - shall be uninitialized. This function shall initialize them to the right size and fill them with the 45 - // proper values. 46 - std::vector<bool> &used_1, 47 - std::vector<bool> &used_2, 48 - std::vector<size_t> &out_indices_1, 49 - std::vector<size_t> &out_indices_2, 50 - std::vector<float> &out_errs, 51 - 52 - float (*calc_error)(const Tp_1 &one, const Tp_2 &two), 53 - float max_err = std::numeric_limits<float>::max()) 54 - { 55 - used_1 = std::vector<bool>(in_1.size()); // silly? Unsure. 56 - used_2 = std::vector<bool>(in_2.size()); 57 - 58 - size_t out_size = std::min(in_1.size(), in_2.size()); 59 - 60 - out_indices_1.reserve(out_size); 61 - out_indices_2.reserve(out_size); 62 - 63 - std::vector<psort_atom_t> associations; 64 - 65 - for (size_t idx_1 = 0; idx_1 < in_1.size(); idx_1++) { 66 - for (size_t idx_2 = 0; idx_2 < in_2.size(); idx_2++) { 67 - float err = calc_error(in_1[idx_1], in_2[idx_2]); 68 - if (err > 0.0f) { 69 - // Negative error means the error calculator thought there was something so bad with 70 - // these that they shouldn't be considered at all. 71 - associations.push_back({idx_1, idx_2, err}); 72 - } 73 - } 74 - } 75 - 76 - std::sort(associations.begin(), associations.end(), comp_err); 77 - 78 - for (size_t i = 0; i < associations.size(); i++) { 79 - psort_atom_t chonk = associations[i]; 80 - if (used_1[chonk.idx_1] || used_2[chonk.idx_2] || (chonk.err > max_err)) { 81 - continue; 82 - } 83 - used_1[chonk.idx_1] = true; 84 - used_2[chonk.idx_2] = true; 85 - 86 - out_indices_1.push_back(chonk.idx_1); 87 - out_indices_2.push_back(chonk.idx_2); 88 - 89 - out_errs.push_back(chonk.err); 90 - } 91 - }

Configure Feed

Configure Feed