···29293030// Save me, Obi-Wan!
31313232-#include "../../tracking/hand/old_rgb/rgb_interface.h"
3332#include "../../tracking/hand/mercury/hg_interface.h"
34333534#ifdef XRT_BUILD_DRIVER_DEPTHAI
···240239int
241240ht_device_create(struct xrt_frame_context *xfctx,
242241 struct t_stereo_camera_calibration *calib,
243243- enum t_hand_tracking_algorithm algorithm_choice,
244242 struct t_camera_extra_info extra_camera_info,
245243 struct xrt_slam_sinks **out_sinks,
246244 struct xrt_device **out_device)
···251249252250 struct t_hand_tracking_sync *sync = NULL;
253251254254- switch (algorithm_choice) {
255255- case HT_ALGORITHM_MERCURY: {
256256- sync = t_hand_tracking_sync_mercury_create(calib, extra_camera_info);
257257- } break;
258258- case HT_ALGORITHM_OLD_RGB: {
259259- //!@todo Either have this deal with the output space correctly, or have everything use LEFT_CAMERA
260260- sync = t_hand_tracking_sync_old_rgb_create(calib);
261261- }
262262- }
252252+ sync = t_hand_tracking_sync_mercury_create(calib, extra_camera_info);
253253+263254 struct ht_device *htd = ht_device_create_common(calib, false, xfctx, sync);
264255265256 HT_DEBUG(htd, "Hand Tracker initialized!");
-2
src/xrt/drivers/ht/ht_interface.h
···3636 *
3737 * @param xfctx Frame context to attach the tracker to
3838 * @param calib Calibration struct for stereo camera
3939- * @param algorithm_choice Which algorithm to use for hand tracking
4039 * @param out_sinks Sinks to stream camera data to
4140 * @param out_device Newly created hand tracker "device"
4241 * @return int 0 on success
···4443int
4544ht_device_create(struct xrt_frame_context *xfctx,
4645 struct t_stereo_camera_calibration *calib,
4747- enum t_hand_tracking_algorithm algorithm_choice,
4846 struct t_camera_extra_info extra_camera_info,
4947 struct xrt_slam_sinks **out_sinks,
5048 struct xrt_device **out_device);
···11-<!--
22-Copyright 2021, Collabora, Ltd.
33-Authors:
44-Moses Turner <moses@collabora.com>
55-SPDX-License-Identifier: BSL-1.0
66--->
77-88-# What is this?
99-This is a driver to do optical hand tracking. The actual code mostly written by Moses Turner, with tons of help from Marcus Edel, Jakob Bornecrantz, Ryan Pavlik, and Christoph Haag. Jakob Bornecrantz and Marcus Edel are the main people who gathered training data for the initial Collabora models.
1010-1111-In `main` it only works with Valve Index, although we've used a lot of Luxonis cameras in development. With additional work, it should work fine with devices like the T265, or PS4/PS5 cam, should there be enough interest for any of those.
1212-1313-Under good lighting, I would say it's around as good as Oculus Quest 2's hand tracking. Not that I'm trying to make any claims; that's just what I honestly would tell somebody if they are wondering if it's worth testing out.
1414-1515-1616-# How to get started
1717-## Get dependencies
1818-### Get OpenCV
1919-Each distro has its own way to get OpenCV, and it can change at any time; there's no specific reason to trust this documentation over anything else.
2020-2121-Having said that, on Ubuntu, it would look something like
2222-2323-```
2424-sudo apt install libopencv-dev libopencv-contrib-dev
2525-```
2626-2727-Or you could build it from source, or get it from one of the other 1000s of package managers. Whatever floats your boat.
2828-2929-### Get ONNXRuntime
3030-I followed the instructions here: https://onnxruntime.ai/docs/how-to/build/inferencing.html#linux
3131-3232-then had to do
3333-```
3434-cd build/Linux/RelWithDebInfo/
3535-sudo make install
3636-```
3737-3838-### Get the ML models
3939-Make sure you have git-lfs installed, then run ./scripts/get-ht-models.sh. Should work fine.
4040-4141-## Building the driver
4242-Once onnxruntime is installed, you should be able to build like normal with CMake or Meson.
4343-4444-If it properly found everything, - CMake should say
4545-4646-```
4747--- Found ONNXRUNTIME: /usr/local/include/onnxruntime
4848-4949-[...]
5050-5151--- # DRIVER_HANDTRACKING: ON
5252-```
5353-5454-and Meson should say
5555-5656-```
5757-Run-time dependency libonnxruntime found: YES 1.8.2
5858-5959-[...]
6060-6161-Message: Configuration done!
6262-Message: drivers: [...] handtracking, [...]
6363-```
6464-6565-## Running the driver
6666-Currently, it's only set up to work on Valve Index.
6767-6868-So, the two things you can do are
6969-* Use the `survive` driver with both controllers off - It should automagically start hand tracking upon not finding any controllers.
7070-* Use the `vive` driver with `VIVE_USE_HANDTRACKING=ON` and it should work the same as the survive driver.
7171-7272-You can see if the driver is working with `openxr-simple-playground`, StereoKit, or any other app you know of. Poke me (Moses) if you find any other cool hand-tracking apps; I'm always looking for more!
7373-7474-# Tips and tricks
7575-7676-This tracking likes to be in a bright, evenly-lit room with multiple light sources. Turn all the lights on, see if you can find any lamps. If the ML models can see well, the tracking quality can get surprisingly nice.
7777-7878-Sometimes, the tracking fails when it can see more than one hand. As the tracking gets better (we train better ML models and squash more bugs) this should happen less often or not at all. If it does, put one of your hands down, and it should resume tracking the remaining hand just fine.
7979-8080-# Future improvements
8181-8282-* Get more training data; train better ML models.
8383-* Improve the tracking math
8484- * Be smarter about keeping tracking lock on a hand
8585- * Try predicting the next bounding box based on the estimated keypoints of the last few frames instead of uncritically trusting the detection model, and not run the detection model *every single* frame.
8686- * Instead of directly doing disparity on the observed keypoints, use a kinematic model of the hand and fit that to the 2D observations - this should get rid of a *lot* of jitter and make it look better to the end user if the ML models fail
8787- * Make something that also works with non-stereo (mono, trinocular, or N cameras) camera setups
8888-* Optionally run the ML models on GPU - currently, everything's CPU bound which could be sub-optimal under some circumstances
8989-* Write a lot of generic code so that you can run this on any stereo camera
9090-* More advanced prediction/interpolation code that doesn't care at all about the input frame cadence. One-euro filters are pretty good about this, but we can get better!
-416
src/xrt/tracking/hand/old_rgb/rgb_hand_math.hpp
···11-#pragma once
22-33-// Copyright 2021, Collabora, Ltd.
44-// SPDX-License-Identifier: BSL-1.0
55-/*!
66- * @file
77- * @brief Helper math to do things with 3D hands for the camera-based hand tracker
88- * @author Moses Turner <moses@collabora.com>
99- * @author Nick Klingensmith <programmerpichu@gmail.com>
1010- * @ingroup drv_ht
1111- */
1212-1313-#include "math/m_api.h"
1414-#include "math/m_vec3.h"
1515-1616-#include "rgb_sync.hpp"
1717-#include "util/u_time.h"
1818-#include "xrt/xrt_defines.h"
1919-2020-static constexpr int num_real_joints = 21;
2121-2222-float
2323-sumOfHandJointDistances(const Hand3D &one, const Hand3D &two)
2424-{
2525- float dist = 0.0f;
2626- for (int i = 0; i < num_real_joints; i++) {
2727- dist += m_vec3_len(one.kps[i] - two.kps[i]);
2828- }
2929- return dist;
3030-}
3131-3232-float
3333-errHandHistory(const HandHistory3D &history_hand, const Hand3D &present_hand)
3434-{
3535- // Remember we never have to deal with an empty hand. Can always read the last element.
3636- return sumOfHandJointDistances(history_hand.last_hands_unfiltered.back(), present_hand);
3737-}
3838-3939-float
4040-errHandDisparity(const Hand2D &left_rays, const Hand2D &right_rays)
4141-{
4242- float error_y_diff = 0.0f;
4343- for (int i = 0; i < 21; i++) {
4444- float diff_y = fabsf(left_rays.kps[i].y - right_rays.kps[i].y);
4545- // Big question about what's the best loss function. Gut feeling was "I should be using sum of squared
4646- // errors" but I don't really know. Using just sum of errors for now. Ideally it'd also be not very
4747- // sensitive to one or two really bad outliers.
4848- error_y_diff += diff_y;
4949- }
5050- // U_LOG_E("stereo camera err is %f, y_disparity is %f", err_stereo_camera, error_y_diff);
5151- return error_y_diff;
5252-}
5353-5454-void
5555-applyThumbIndexDrag(Hand3D *hand)
5656-{
5757- // TERRIBLE HACK.
5858- // Puts the thumb and pointer a bit closer together to be better at triggering XR clients' pinch detection.
5959- static const float max_radius = 0.05;
6060- static const float min_radius = 0.00;
6161-6262- // no min drag, min drag always 0.
6363- static const float max_drag = 0.85f;
6464-6565- xrt_vec3 thumb = hand->kps[THMB_TIP];
6666- xrt_vec3 index = hand->kps[INDX_TIP];
6767- xrt_vec3 ttp = index - thumb;
6868- float length = m_vec3_len(ttp);
6969- if ((length > max_radius)) {
7070- return;
7171- }
7272-7373-7474- float amount = math_map_ranges(length, min_radius, max_radius, max_drag, 0.0f);
7575-7676- hand->kps[THMB_TIP] = m_vec3_lerp(thumb, index, amount * 0.5f);
7777- hand->kps[INDX_TIP] = m_vec3_lerp(index, thumb, amount * 0.5f);
7878-}
7979-8080-static inline xrt_vec3
8181-get_joint_position(struct xrt_hand_joint_set *set, xrt_hand_joint jt)
8282-{
8383- return set->values.hand_joint_set_default[jt].relation.pose.position;
8484-}
8585-8686-template <size_t N>
8787-static inline void
8888-set_finger(struct xrt_hand_joint_set *set,
8989- const xrt_vec3 &pinky_to_index_prox,
9090- const std::array<xrt_hand_joint, N> &finger)
9191-{
9292- for (size_t i = 0; i < N - 1; i++) {
9393- // Don't do fingertips. (Fingertip would be index 4.)
9494- struct xrt_vec3 forwards =
9595- m_vec3_normalize(get_joint_position(set, finger[i + 1]) - get_joint_position(set, finger[i]));
9696- struct xrt_vec3 backwards = m_vec3_mul_scalar(forwards, -1.0f);
9797-9898- struct xrt_vec3 left = m_vec3_orthonormalize(forwards, pinky_to_index_prox);
9999- // float dot = m_vec3_dot(backwards, left);
100100- // assert((m_vec3_dot(backwards,left) == 0.0f));
101101- math_quat_from_plus_x_z(&left, &backwards,
102102- &set->values.hand_joint_set_default[finger[i]].relation.pose.orientation);
103103- }
104104- // Do fingertip! Per XR_EXT_hand_tracking, just copy the distal joint's orientation. Doing anything else
105105- // is wrong.
106106- set->values.hand_joint_set_default[finger[N - 1]].relation.pose.orientation =
107107- set->values.hand_joint_set_default[finger[N - 2]].relation.pose.orientation;
108108-}
109109-110110-void
111111-applyJointOrientations(struct xrt_hand_joint_set *set, bool is_right)
112112-{
113113- // The real rule to follow is that each joint's "X" axis is along the axis along which it can bend.
114114- // The nature of our estimation makes this a bit difficult, but these should work okay-ish under perfect
115115- // conditions
116116- if (set->is_active == false) {
117117- return;
118118- }
119119-120120- auto gl = [&](xrt_hand_joint jt) { return get_joint_position(set, jt); };
121121-122122- xrt_vec3 pinky_prox = gl(XRT_HAND_JOINT_LITTLE_PROXIMAL);
123123-124124- xrt_vec3 index_prox = gl(XRT_HAND_JOINT_INDEX_PROXIMAL);
125125-126126-127127- xrt_vec3 pinky_to_index_prox = m_vec3_normalize(index_prox - pinky_prox);
128128- if (is_right) {
129129- pinky_to_index_prox = m_vec3_mul_scalar(pinky_to_index_prox, -1.0f);
130130- }
131131-132132- using Finger = std::array<xrt_hand_joint, 5>;
133133- static const std::array<Finger, 4> fingers_with_joints_in_them = {{
134134-135135- {XRT_HAND_JOINT_INDEX_METACARPAL, XRT_HAND_JOINT_INDEX_PROXIMAL, XRT_HAND_JOINT_INDEX_INTERMEDIATE,
136136- XRT_HAND_JOINT_INDEX_DISTAL, XRT_HAND_JOINT_INDEX_TIP},
137137-138138- {XRT_HAND_JOINT_MIDDLE_METACARPAL, XRT_HAND_JOINT_MIDDLE_PROXIMAL, XRT_HAND_JOINT_MIDDLE_INTERMEDIATE,
139139- XRT_HAND_JOINT_MIDDLE_DISTAL, XRT_HAND_JOINT_MIDDLE_TIP},
140140-141141- {XRT_HAND_JOINT_RING_METACARPAL, XRT_HAND_JOINT_RING_PROXIMAL, XRT_HAND_JOINT_RING_INTERMEDIATE,
142142- XRT_HAND_JOINT_RING_DISTAL, XRT_HAND_JOINT_RING_TIP},
143143-144144- {XRT_HAND_JOINT_LITTLE_METACARPAL, XRT_HAND_JOINT_LITTLE_PROXIMAL, XRT_HAND_JOINT_LITTLE_INTERMEDIATE,
145145- XRT_HAND_JOINT_LITTLE_DISTAL, XRT_HAND_JOINT_LITTLE_TIP},
146146-147147- }};
148148- for (Finger const &finger : fingers_with_joints_in_them) {
149149- set_finger(set, pinky_to_index_prox, finger);
150150- }
151151-152152- // wrist!
153153- // Not the best but acceptable. Eventually, probably, do triangle of wrist pinky prox and index prox.
154154- set->values.hand_joint_set_default[XRT_HAND_JOINT_WRIST].relation.pose.orientation =
155155- set->values.hand_joint_set_default[XRT_HAND_JOINT_MIDDLE_METACARPAL].relation.pose.orientation;
156156-157157-158158- // palm!
159159- set->values.hand_joint_set_default[XRT_HAND_JOINT_PALM].relation.pose.orientation =
160160- set->values.hand_joint_set_default[XRT_HAND_JOINT_MIDDLE_METACARPAL].relation.pose.orientation;
161161-162162- // thumb!
163163- // When I look at Ultraleap tracking, there's like, a "plane" made by the tip, distal and proximal (and kinda
164164- // MCP, but least squares fitting a plane is too hard for my baby brain) Normal to this plane is the +X, and
165165- // obviously forwards to the next joint is the -Z.
166166- xrt_vec3 thumb_prox_to_dist = gl(XRT_HAND_JOINT_THUMB_DISTAL) - gl(XRT_HAND_JOINT_THUMB_PROXIMAL);
167167- xrt_vec3 thumb_dist_to_tip = gl(XRT_HAND_JOINT_THUMB_TIP) - gl(XRT_HAND_JOINT_THUMB_DISTAL);
168168- xrt_vec3 plane_normal{};
169169- if (!is_right) {
170170- math_vec3_cross(&thumb_prox_to_dist, &thumb_dist_to_tip, &plane_normal);
171171- } else {
172172- math_vec3_cross(&thumb_dist_to_tip, &thumb_prox_to_dist, &plane_normal);
173173- }
174174- constexpr std::array<enum xrt_hand_joint, 4> thumbs = {XRT_HAND_JOINT_THUMB_METACARPAL,
175175- XRT_HAND_JOINT_THUMB_PROXIMAL,
176176- XRT_HAND_JOINT_THUMB_DISTAL, XRT_HAND_JOINT_THUMB_TIP};
177177- //! @todo this code isn't quite the same as set_finger, can we make it the same so we can use that?
178178- for (int i = 0; i < 3; i++) {
179179- struct xrt_vec3 backwards =
180180- m_vec3_mul_scalar(m_vec3_normalize(gl(thumbs[i + 1]) - gl(thumbs[i])), -1.0f);
181181-182182- struct xrt_vec3 left = m_vec3_orthonormalize(backwards, plane_normal);
183183- math_quat_from_plus_x_z(&left, &backwards,
184184- &set->values.hand_joint_set_default[thumbs[i]].relation.pose.orientation);
185185- }
186186- struct xrt_quat *tip = &set->values.hand_joint_set_default[XRT_HAND_JOINT_THUMB_TIP].relation.pose.orientation;
187187- struct xrt_quat *distal =
188188- &set->values.hand_joint_set_default[XRT_HAND_JOINT_THUMB_DISTAL].relation.pose.orientation;
189189- memcpy(tip, distal, sizeof(struct xrt_quat));
190190-}
191191-192192-float
193193-handednessJointSet(Hand3D *set)
194194-{
195195- // Guess if hand is left or right.
196196- // Left is negative, right is positive.
197197-198198-199199- // xrt_vec3 middle_mcp = gl(XRT_HAND_JOINT_MIDDLE_METACARPAL);
200200-201201- xrt_vec3 pinky_prox = set->kps[LITL_PXM]; // gl(XRT_HAND_JOINT_LITTLE_PROXIMAL);
202202-203203- xrt_vec3 index_prox = set->kps[INDX_PXM]; // gl(XRT_HAND_JOINT_INDEX_PROXIMAL);
204204-205205- xrt_vec3 pinky_to_index_prox = m_vec3_normalize(index_prox - pinky_prox);
206206-207207- float handedness = 0.0f;
208208-209209- for (int i : {INDX_PXM, MIDL_PXM, RING_PXM, LITL_PXM}) {
210210- xrt_vec3 prox = set->kps[i];
211211- xrt_vec3 intr = set->kps[i + 1];
212212- xrt_vec3 dist = set->kps[i + 2];
213213- xrt_vec3 tip = set->kps[i + 3];
214214-215215- xrt_vec3 prox_to_int = m_vec3_normalize(intr - prox);
216216- xrt_vec3 int_to_dist = m_vec3_normalize(dist - intr);
217217- xrt_vec3 dist_to_tip = m_vec3_normalize(tip - dist);
218218-219219- xrt_vec3 checks[2];
220220-221221- math_vec3_cross(&prox_to_int, &int_to_dist, &checks[0]);
222222- math_vec3_cross(&int_to_dist, &dist_to_tip, &checks[1]);
223223-224224- handedness += m_vec3_dot(m_vec3_normalize(pinky_to_index_prox), (checks[0]));
225225- handedness += m_vec3_dot(m_vec3_normalize(pinky_to_index_prox), (checks[1]));
226226- }
227227- set->handedness = handedness / (4 * 2);
228228- return set->handedness;
229229-}
230230-231231-void
232232-handednessHandHistory3D(HandHistory3D *history)
233233-{
234234-235235- float inter = handednessJointSet(&history->last_hands_unfiltered.back());
236236-237237- if ((fabsf(inter) > 0.3f) || (fabsf(history->handedness) < 0.3f)) {
238238- history->handedness += inter;
239239- }
240240- static const int max_handedness = 2.0f;
241241- if (history->handedness > max_handedness) {
242242- history->handedness = max_handedness;
243243- } else if (history->handedness < -max_handedness) {
244244- history->handedness = -max_handedness;
245245- }
246246-}
247247-248248-void
249249-handEuroFiltersInit(HandHistory3D *history, double fc_min, double fc_min_d, double beta)
250250-{
251251- for (int i = 0; i < 21; i++) {
252252- m_filter_euro_vec3_init(&history->filters[i], fc_min, fc_min_d, beta);
253253- }
254254-}
255255-256256-static double
257257-calc_smoothing_alpha(double Fc, double dt)
258258-{
259259- /* Calculate alpha = (1 / (1 + tau/dt)) where tau = 1.0 / (2 * pi * Fc),
260260- * this is a straight rearrangement with fewer divisions */
261261- double r = 2.0 * M_PI * Fc * dt;
262262- return r / (r + 1.0);
263263-}
264264-265265-static double
266266-exp_smooth(double alpha, double y, double prev_y)
267267-{
268268- return alpha * y + (1.0 - alpha) * prev_y;
269269-}
270270-271271-void
272272-handEuroFiltersRun(struct HandTracking *htd, HandHistory3D *f, Hand3D *out_hand)
273273-{
274274- // Assume present hand is in element 0!
275275-#if 0
276276- // float vals[4] = {0.5, 0.33, 0.1, 0.07};
277277- float vals[4] = {0.9, 0.09, 0.009, 0.001};
278278- auto m = f->last_hands_unfiltered.size() - 1;
279279- double ts_out = (vals[0] * (double)f->last_hands_unfiltered.get_at_age(std::min(m, 0))->timestamp) +
280280- (vals[1] * (double)f->last_hands_unfiltered.get_at_age(std::min(m, 1))->timestamp) +
281281- (vals[2] * (double)f->last_hands_unfiltered.get_at_age(std::min(m, 2))->timestamp) +
282282- (vals[3] * (double)f->last_hands_unfiltered.get_at_age(std::min(m, 3))->timestamp);
283283- out_hand->timestamp = (uint64_t)ts_out;
284284-285285- for (int kp_idx = 0; kp_idx < 21; kp_idx++) {
286286- for (int hist_idx = 0; hist_idx < 4; hist_idx++) {
287287- float *in_y_arr =
288288- (float *)&f->last_hands_unfiltered.get_at_age(std::min(m, hist_idx))->kps[kp_idx];
289289- float *out_y_arr = (float *)&out_hand->kps[kp_idx];
290290- for (int i = 0; i < 3; i++) {
291291- out_y_arr[i] += in_y_arr[i] * vals[hist_idx];
292292- }
293293- }
294294- }
295295-#elif 0
296296- for (int i = 0; i < 21; i++) {
297297- m_filter_euro_vec3_run(&f->filters[i], f->last_hands_unfiltered.back().timestamp,
298298- &f->last_hands_unfiltered.back().kps[i], &out_hand->kps[i]);
299299- }
300300- // conspicuously wrong!
301301- out_hand->timestamp = f->last_hands_unfiltered.back().timestamp;
302302-#else
303303-304304- if (!f->have_prev_hand) {
305305- f->last_hands_filtered.push_back(f->last_hands_unfiltered.back());
306306- uint64_t ts = f->last_hands_unfiltered.back().timestamp;
307307- f->prev_ts_for_alpha = ts;
308308- f->first_ts = ts;
309309- f->prev_filtered_ts = ts;
310310- f->prev_dy = 0;
311311- f->have_prev_hand = true;
312312- *out_hand = f->last_hands_unfiltered.back();
313313- }
314314- uint64_t ts = f->last_hands_unfiltered.back().timestamp;
315315- double dt, alpha_d;
316316- dt = (double)(ts - f->prev_ts_for_alpha) / U_TIME_1S_IN_NS;
317317-318318- double abs_dy =
319319- (sumOfHandJointDistances(f->last_hands_unfiltered.back(), f->last_hands_filtered.back()) / 21.0f) * 0.7f;
320320- alpha_d = calc_smoothing_alpha(htd->dynamic_config.hand_fc_min_d.val, dt);
321321-322322- double alpha, fc_cutoff;
323323- f->prev_dy = exp_smooth(alpha_d, abs_dy, f->prev_dy);
324324-325325- fc_cutoff = htd->dynamic_config.hand_fc_min.val + htd->dynamic_config.hand_beta.val * f->prev_dy;
326326- alpha = calc_smoothing_alpha(fc_cutoff, dt);
327327- HT_DEBUG(htd, "dt is %f, abs_dy is %f, alpha is %f", dt, abs_dy, alpha);
328328-329329- for (int i = 0; i < 21; i++) {
330330- out_hand->kps[i].x =
331331- exp_smooth(alpha, f->last_hands_unfiltered.back().kps[i].x, f->last_hands_filtered.back().kps[i].x);
332332- out_hand->kps[i].y =
333333- exp_smooth(alpha, f->last_hands_unfiltered.back().kps[i].y, f->last_hands_filtered.back().kps[i].y);
334334- out_hand->kps[i].z =
335335- exp_smooth(alpha, f->last_hands_unfiltered.back().kps[i].z, f->last_hands_filtered.back().kps[i].z);
336336- }
337337- double prev_ts_offset = (double)(f->prev_filtered_ts - f->first_ts);
338338- double current_ts_offset = (double)(ts - f->first_ts);
339339- double new_filtered_ts_offset = exp_smooth(alpha, current_ts_offset, prev_ts_offset);
340340- uint64_t new_filtered_ts = (uint64_t)(new_filtered_ts_offset) + f->first_ts;
341341- out_hand->timestamp = new_filtered_ts;
342342- f->prev_filtered_ts = out_hand->timestamp;
343343- f->prev_ts_for_alpha = ts; // NOT the filtered timestamp. NO.
344344-#endif
345345-}
346346-347347-bool
348348-rejectTooFar(struct HandTracking *htd, Hand3D *hand)
349349-{
350350- static const float max_dist = 1.0f; // this sucks too - make it bigger if you can.
351351- const float max_dist_from_camera_sqrd = max_dist * max_dist;
352352- for (int i = 0; i < 21; i++) {
353353- xrt_vec3 pos = hand->kps[i];
354354- float len = m_vec3_len_sqrd(pos); // Faster.
355355- if (len > max_dist_from_camera_sqrd) {
356356- goto reject;
357357- }
358358- }
359359- return true;
360360-361361-reject:
362362- HT_TRACE(htd, "Rejected too far!");
363363- return false;
364364-}
365365-366366-bool
367367-rejectTooClose(struct HandTracking *htd, Hand3D *hand)
368368-{
369369- const float min_dist = 0.12f; // Be a bit aggressive here - it's nice to not let people see our tracking fail
370370- // when the hands are way too close
371371- const float min_dist_from_camera_sqrd = min_dist * min_dist;
372372-373373- for (int i = 0; i < 21; i++) {
374374- xrt_vec3 pos = hand->kps[i];
375375- float len = m_vec3_len_sqrd(pos); // Faster.
376376- if (len < min_dist_from_camera_sqrd) {
377377- goto reject;
378378- }
379379- if (pos.z > min_dist) { // remember negative-Z is forward!
380380- goto reject;
381381- }
382382- }
383383- return true;
384384-385385-reject:
386386- HT_TRACE(htd, "Rejected too close!");
387387- return false;
388388-}
389389-390390-bool
391391-rejectTinyPalm(struct HandTracking *htd, Hand3D *hand)
392392-{
393393- // This one sucks, because some people really have tiny hands. If at some point you can stop using it, stop
394394- // using it.
395395- // Weird scoping so that we can still do gotos
396396-397397- {
398398- float len = m_vec3_len(hand->kps[WRIST] - hand->kps[INDX_PXM]);
399399- if ((len < 0.03f || len > 0.25f)) {
400400- goto reject;
401401- }
402402- }
403403-404404- {
405405- float len = m_vec3_len(hand->kps[WRIST] - hand->kps[MIDL_PXM]);
406406- if (len < 0.03f || len > 0.25f) {
407407- goto reject;
408408- }
409409- }
410410-411411- return true;
412412-413413-reject:
414414- HT_TRACE(htd, "Rejected because too big or too small!");
415415- return false;
416416-}
-254
src/xrt/tracking/hand/old_rgb/rgb_image_math.hpp
···11-// Copyright 2021, Collabora, Ltd.
22-// SPDX-License-Identifier: BSL-1.0
33-/*!
44- * @file
55- * @brief Helper math to do things with images for the camera-based hand tracker
66- * @author Moses Turner <moses@collabora.com>
77- * @ingroup drv_ht
88- */
99-#pragma once
1010-1111-#include "math/m_vec2.h"
1212-#include "math/m_vec3.h"
1313-1414-#include <opencv2/imgproc.hpp>
1515-#include <opencv2/core/mat.hpp>
1616-#include <opencv2/core/types.hpp>
1717-1818-/*!
1919- * This is a template so that we can use xrt_vec3 or xrt_vec2.
2020- * Please don't use this for anything other than xrt_vec3 or xrt_vec2!
2121- */
2222-template <typename T>
2323-T
2424-transformVecBy2x3(T in, cv::Matx23f warp_back)
2525-{
2626- T rr;
2727- rr.x = (in.x * warp_back(0, 0)) + (in.y * warp_back(0, 1)) + warp_back(0, 2);
2828- rr.y = (in.x * warp_back(1, 0)) + (in.y * warp_back(1, 1)) + warp_back(1, 2);
2929- return rr;
3030-}
3131-3232-cv::Scalar
3333-hsv2rgb(float fH, float fS, float fV)
3434-{
3535- const float fC = fV * fS; // Chroma
3636- const float fHPrime = fmod(fH / 60.0, 6);
3737- const float fX = fC * (1 - fabs(fmod(fHPrime, 2) - 1));
3838- const float fM = fV - fC;
3939-4040- float fR, fG, fB;
4141-4242- if (0 <= fHPrime && fHPrime < 1) {
4343- fR = fC;
4444- fG = fX;
4545- fB = 0;
4646- } else if (1 <= fHPrime && fHPrime < 2) {
4747- fR = fX;
4848- fG = fC;
4949- fB = 0;
5050- } else if (2 <= fHPrime && fHPrime < 3) {
5151- fR = 0;
5252- fG = fC;
5353- fB = fX;
5454- } else if (3 <= fHPrime && fHPrime < 4) {
5555- fR = 0;
5656- fG = fX;
5757- fB = fC;
5858- } else if (4 <= fHPrime && fHPrime < 5) {
5959- fR = fX;
6060- fG = 0;
6161- fB = fC;
6262- } else if (5 <= fHPrime && fHPrime < 6) {
6363- fR = fC;
6464- fG = 0;
6565- fB = fX;
6666- } else {
6767- fR = 0;
6868- fG = 0;
6969- fB = 0;
7070- }
7171-7272- fR += fM;
7373- fG += fM;
7474- fB += fM;
7575- return {fR * 255.0f, fG * 255.0f, fB * 255.0f};
7676-}
7777-7878-struct xrt_vec3
7979-raycoord(struct ht_view *htv, struct xrt_vec3 model_out)
8080-{
8181- cv::Mat in_px_coords(1, 1, CV_32FC2);
8282- float *write_in;
8383- write_in = in_px_coords.ptr<float>(0);
8484- write_in[0] = model_out.x;
8585- write_in[1] = model_out.y;
8686- cv::Mat out_ray(1, 1, CV_32FC2);
8787-8888- cv::fisheye::undistortPoints(in_px_coords, out_ray, htv->cameraMatrix, htv->distortion);
8989-9090-9191- float n_x = out_ray.at<float>(0, 0);
9292- float n_y = out_ray.at<float>(0, 1);
9393-9494-9595- struct xrt_vec3 n = {n_x, n_y, 1.0f};
9696-9797- cv::Matx33f R = htv->rotate_camera_to_stereo_camera;
9898-9999- struct xrt_vec3 o = {
100100- (n.x * R(0, 0)) + (n.y * R(0, 1)) + (n.z * R(0, 2)),
101101- (n.x * R(1, 0)) + (n.y * R(1, 1)) + (n.z * R(1, 2)),
102102- (n.x * R(2, 0)) + (n.y * R(2, 1)) + (n.z * R(2, 2)),
103103- };
104104-105105- math_vec3_scalar_mul(1.0f / o.z, &o);
106106- return o;
107107-}
108108-109109-cv::Matx23f
110110-blackbar(const cv::Mat &in, cv::Mat &out, xrt_size out_size)
111111-{
112112-#if 1
113113- // Easy to think about, always right, but pretty slow:
114114- // Get a matrix from the original to the scaled down / blackbar'd image, then get one that goes back.
115115- // Then just warpAffine() it.
116116- // Easy in programmer time - never have to worry about off by one, special cases. We can come back and optimize
117117- // later.
118118-119119- // Do the black bars need to be on top and bottom, or on left and right?
120120- float scale_down_w = (float)out_size.w / (float)in.cols; // 128/1280 = 0.1
121121- float scale_down_h = (float)out_size.h / (float)in.rows; // 128/800 = 0.16
122122-123123- float scale_down = fmin(scale_down_w, scale_down_h); // 0.1
124124-125125- float width_inside = (float)in.cols * scale_down;
126126- float height_inside = (float)in.rows * scale_down;
127127-128128- float translate_x = (out_size.w - width_inside) / 2; // should be 0 for 1280x800
129129- float translate_y = (out_size.h - height_inside) / 2; // should be (1280-800)/2 = 240
130130-131131- cv::Matx23f go;
132132- // clang-format off
133133- go(0,0) = scale_down; go(0,1) = 0.0f; go(0,2) = translate_x;
134134- go(1,0) = 0.0f; go(1,1) = scale_down; go(1,2) = translate_y;
135135- // clang-format on
136136-137137- cv::warpAffine(in, out, go, cv::Size(out_size.w, out_size.h));
138138-139139- cv::Matx23f ret;
140140-141141- // clang-format off
142142- ret(0,0) = 1.0f/scale_down; ret(0,1) = 0.0f; ret(0,2) = -translate_x/scale_down;
143143- ret(1,0) = 0.0f; ret(1,1) = 1.0f/scale_down; ret(1,2) = -translate_y/scale_down;
144144- // clang-format on
145145-146146- return ret;
147147-#else
148148- // Fast, always wrong if the input isn't square. You'd end up using something like this, plus some
149149- // copyMakeBorder if you want to optimize.
150150- if (aspect_ratio_input == aspect_ratio_output) {
151151- cv::resize(in, out, {out_size.w, out_size.h});
152152- cv::Matx23f ret;
153153- float scale_from_out_to_in = (float)in.cols / (float)out_size.w;
154154- // clang-format off
155155- ret(0,0) = scale_from_out_to_in; ret(0,1) = 0.0f; ret(0,2) = 0.0f;
156156- ret(1,0) = 0.0f; ret(1,1) = scale_from_out_to_in; ret(1,2) = 0.0f;
157157- // clang-format on
158158- cv::imshow("hi", out);
159159- cv::waitKey(1);
160160- return ret;
161161- }
162162- assert(!"Uh oh! Unimplemented!");
163163- return {};
164164-#endif
165165-}
166166-167167-void
168168-handDot(cv::Mat &mat, xrt_vec2 place, float radius, float hue, float intensity, int type)
169169-{
170170- cv::circle(mat, {(int)place.x, (int)place.y}, radius, hsv2rgb(hue * 360.0f, intensity, intensity), type);
171171-}
172172-173173-void
174174-centerAndRotationFromJoints(struct ht_view *htv,
175175- const xrt_vec2 *wrist,
176176- const xrt_vec2 *index,
177177- const xrt_vec2 *middle,
178178- const xrt_vec2 *little,
179179- xrt_vec2 *out_center,
180180- xrt_vec2 *out_wrist_to_middle)
181181-{
182182- // Close to what Mediapipe does, but slightly different - just uses the middle proximal instead of "estimating"
183183- // it from the pinky and index.
184184- // at the end of the day I should probably do that basis vector filtering thing to get a nicer middle metacarpal
185185- // from 6 keypoints (not thumb proximal) OR SHOULD I. because distortion. hmm
186186-187187- // Feel free to look at the way MP does it, you can see it's different.
188188- // https://github.com/google/mediapipe/blob/master/mediapipe/modules/holistic_landmark/calculators/hand_detections_from_pose_to_rects_calculator.cc
189189-190190- // struct xrt_vec2 hand_center = m_vec2_mul_scalar(middle, 0.5) + m_vec2_mul_scalar(index, 0.5*(2.0f/3.0f)) +
191191- // m_vec2_mul_scalar(little, 0.5f*((1.0f/3.0f))); // Middle proximal, straight-up.
192192- // U_LOG_E("%f %f %f %f %f %f %f %f ", wrist.x, wrist.y, index.x, index.y, middle.x, middle.y, little.x,
193193- // little.y);
194194- *out_center = m_vec2_lerp(*middle, m_vec2_lerp(*index, *little, 1.0f / 3.0f), 0.25f);
195195-196196- *out_wrist_to_middle = *out_center - *wrist;
197197-}
198198-199199-struct DetectionModelOutput
200200-rotatedRectFromJoints(struct ht_view *htv, xrt_vec2 center, xrt_vec2 wrist_to_middle, DetectionModelOutput *out)
201201-{
202202- float box_size = m_vec2_len(wrist_to_middle) * 2.0f * 1.73f;
203203-204204- double rot = atan2(wrist_to_middle.x, wrist_to_middle.y) * (-180.0f / M_PI);
205205-206206- out->rotation = rot;
207207- out->size = box_size;
208208- out->center = center;
209209-210210- cv::RotatedRect rrect =
211211- cv::RotatedRect(cv::Point2f(out->center.x, out->center.y), cv::Size2f(out->size, out->size), out->rotation);
212212-213213-214214- cv::Point2f vertices[4];
215215- rrect.points(vertices);
216216- if (htv->htd->debug_scribble && htv->htd->dynamic_config.scribble_bounding_box) {
217217- for (int i = 0; i < 4; i++) {
218218- cv::Scalar b = cv::Scalar(10, 30, 30);
219219- if (i == 3) {
220220- b = cv::Scalar(255, 255, 0);
221221- }
222222- cv::line(htv->debug_out_to_this, vertices[i], vertices[(i + 1) % 4], b, 2);
223223- }
224224- }
225225- // topright is 0. bottomright is 1. bottomleft is 2. topleft is 3.
226226-227227- cv::Point2f src_tri[3] = {vertices[3], vertices[2], vertices[1]}; // top-left, bottom-left, bottom-right
228228-229229- cv::Point2f dest_tri[3] = {cv::Point2f(0, 0), cv::Point2f(0, 224), cv::Point2f(224, 224)};
230230-231231- out->warp_there = getAffineTransform(src_tri, dest_tri);
232232- out->warp_back = getAffineTransform(dest_tri, src_tri);
233233-234234- // out->wrist = wrist;
235235-236236- return *out;
237237-}
238238-239239-void
240240-planarize(const cv::Mat &input, uint8_t *output)
241241-{
242242- // output better be the right size, because we are not doing any bounds checking!
243243- assert(input.isContinuous());
244244- int lix = input.cols;
245245- int liy = input.rows;
246246- cv::Mat planes[3];
247247- cv::split(input, planes);
248248- cv::Mat red = planes[0];
249249- cv::Mat green = planes[1];
250250- cv::Mat blue = planes[2];
251251- memcpy(output, red.data, lix * liy);
252252- memcpy(output + (lix * liy), green.data, lix * liy);
253253- memcpy(output + (lix * liy * 2), blue.data, lix * liy);
254254-}
-29
src/xrt/tracking/hand/old_rgb/rgb_interface.h
···11-// Copyright 2022, Collabora, Ltd.
22-// SPDX-License-Identifier: BSL-1.0
33-/*!
44- * @file
55- * @brief Public interface of old rgb hand tracking.
66- * @author Jakob Bornecrantz <jakob@collabora.com>
77- * @ingroup aux_tracking
88- */
99-1010-#include "tracking/t_tracking.h"
1111-#include "tracking/t_hand_tracking.h"
1212-1313-#ifdef __cplusplus
1414-extern "C" {
1515-#endif
1616-1717-1818-/*!
1919- * Create a old style RGB hand tracking pipeline.
2020- *
2121- * @ingroup aux_tracking
2222- */
2323-struct t_hand_tracking_sync *
2424-t_hand_tracking_sync_old_rgb_create(struct t_stereo_camera_calibration *calib);
2525-2626-2727-#ifdef __cplusplus
2828-} // extern "C"
2929-#endif