rustdf/cluster/
utility.rs

1//! Utility functions for DIA clustering.
2//!
3//! This module provides various utility functions organized into logical sections:
4//!
5//! - **TOF Scale** - TOF axis binning and scaling
6//! - **Smoothing** - Gaussian smoothing and blur functions
7//! - **Marginal Building** - Build RT/IM marginals from frame data
8//! - **Peak Detection** - Find peaks in 1D traces
9//! - **Fitting** - Fit1D struct and moment-based fitting
10//! - **Stitching** - Stitch overlapping peaks across windows
11
12use mscore::timstof::frame::TimsFrame;
13use crate::cluster::peak::{FrameBinView, ImPeak1D, PeakId, RtPeak1D, RtTraceCtx};
14use std::hash::{Hash, Hasher};
15use rustc_hash::FxHasher;
16use serde::{Deserialize, Serialize};
17use std::sync::Arc;
18use rustc_hash::FxHashMap;
19
20/// Optional mobility callback: scan -> 1/K0
21pub type MobilityFn = Option<fn(scan: usize) -> f32>;
22
23// ==========================================================
24// Section 1: TOF Scale - TOF axis binning and scaling
25// ==========================================================
26
27#[derive(Clone, Debug)]
28pub struct TofScale {
29    /// Smallest TOF index we include in the grid (inclusive).
30    pub tof_min: i32,
31    /// Largest TOF index we include in the grid (inclusive).
32    pub tof_max: i32,
33    /// Step between bins in TOF units (usually 1, 2, 4, …).
34    pub tof_step: i32,
35    /// Bin edges and centers in TOF space.
36    pub edges: Vec<f32>,    // len = num_bins + 1
37    pub centers: Vec<f32>,  // len = num_bins
38}
39
40impl TofScale {
41    /// Build a TOF grid from a set of frames.
42    ///
43    /// - `tof_step = 1` => 1 bin per raw TOF index (max resolution).
44    /// - Larger steps downsample TOF to reduce rows.
45    pub fn build_from_frames(frames: &[TimsFrame], tof_step: i32) -> Option<Self> {
46        assert!(tof_step > 0);
47
48        let mut tof_min = i32::MAX;
49        let mut tof_max = i32::MIN;
50
51        for fr in frames {
52            // ADAPT THIS if your TimsFrame layout is different
53            let tofs: &[i32] = &fr.tof;
54            for &t in tofs {
55                if t < tof_min { tof_min = t; }
56                if t > tof_max { tof_max = t; }
57            }
58        }
59
60        if tof_min >= tof_max {
61            return None;
62        }
63
64        // Snap min/max to step
65        let tof_min_aligned = tof_min - (tof_min % tof_step);
66        let tof_max_aligned = tof_max - (tof_max % tof_step);
67
68        let num_bins = ((tof_max_aligned - tof_min_aligned) / tof_step + 1) as usize;
69        if num_bins == 0 {
70            return None;
71        }
72
73        let mut edges = Vec::with_capacity(num_bins + 1);
74        let mut centers = Vec::with_capacity(num_bins);
75
76        for i in 0..=num_bins {
77            let tof_edge = tof_min_aligned + (i as i32) * tof_step;
78            edges.push(tof_edge as f32);
79        }
80        for i in 0..num_bins {
81            let a = edges[i];
82            let b = edges[i + 1];
83            centers.push(0.5 * (a + b)); // arithmetic center in TOF space
84        }
85
86        Some(Self {
87            tof_min: tof_min_aligned,
88            tof_max: tof_max_aligned,
89            tof_step,
90            edges,
91            centers,
92        })
93    }
94
95    /// Backwards-compatible alias if you already referenced `build_from_tof`.
96    #[inline]
97    pub fn build_from_tof(frames: &[TimsFrame], tof_step: i32) -> Option<Self> {
98        Self::build_from_frames(frames, tof_step)
99    }
100
101    #[inline(always)]
102    pub fn num_bins(&self) -> usize { self.centers.len() }
103
104    /// Map TOF → bin index (usize). Returns None if outside range.
105    #[inline(always)]
106    pub fn index_of_tof(&self, tof: i32) -> Option<usize> {
107        if tof < self.tof_min || tof > self.tof_max {
108            return None;
109        }
110        let delta = tof - self.tof_min;
111        let idx = (delta / self.tof_step) as isize;
112        if idx < 0 {
113            None
114        } else {
115            let u = idx as usize;
116            if u >= self.num_bins() { None } else { Some(u) }
117        }
118    }
119
120    /// Return a bin-index range for a TOF window [a, b] (inclusive, clamped).
121    #[inline(always)]
122    pub fn index_range_for_tof_window(&self, a: i32, b: i32) -> (usize, usize) {
123        let (lo, hi) = if a <= b { (a, b) } else { (b, a) };
124        if hi <= self.tof_min {
125            return (0, 0);
126        }
127        if lo >= self.tof_max {
128            let last = self.num_bins().saturating_sub(1);
129            return (last, last);
130        }
131        let lo_idx = self
132            .index_of_tof(lo.max(self.tof_min))
133            .unwrap_or(0);
134        let hi_idx = self
135            .index_of_tof(hi.min(self.tof_max))
136            .unwrap_or(self.num_bins().saturating_sub(1));
137        (lo_idx.min(hi_idx), hi_idx.max(lo_idx))
138    }
139
140    #[inline(always)]
141    pub fn center(&self, idx: usize) -> f32 {
142        self.centers[idx]
143    }
144
145    /// Continuous center for a *fractional* TOF-bin index mu.
146    /// Used by `tof_center_at` on the Py side.
147    #[inline]
148    pub fn center_at(&self, mu: f32) -> f32 {
149        let n = self.num_bins();
150        if n == 0 {
151            return 0.0;
152        }
153        if mu <= 0.0 {
154            return self.centers[0];
155        }
156        let last = (n - 1) as f32;
157        if mu >= last {
158            return self.centers[n - 1];
159        }
160        let i0 = mu.floor() as usize;
161        let i1 = (i0 + 1).min(n - 1);
162        let t = mu - (i0 as f32);
163        self.centers[i0] * (1.0 - t) + self.centers[i1] * t
164    }
165
166    /// Optional helper: get TOF edges (for debug / plotting).
167    #[inline(always)]
168    pub fn tof_bounds_for_row(&self, r: usize) -> (f32, f32) {
169        (self.edges[r], self.edges[r + 1])
170    }
171}
172
173// ==========================================================
174// Section 2: Smoothing - Gaussian smoothing and blur functions
175// ==========================================================
176
177/// Light 1D Gaussian smoothing on a vector (along scans).
178pub fn smooth_vector_gaussian(v: &mut [f32], sigma: f32, truncate: f32) {
179    if v.is_empty() || sigma <= 0.0 { return; }
180    let radius = (truncate * sigma).ceil() as i32;
181    let two_sigma2 = 2.0 * sigma * sigma;
182    let mut w = Vec::with_capacity((2*radius + 1) as usize);
183    for dx in -radius..=radius {
184        let x = dx as f32;
185        w.push((-x*x / two_sigma2).exp());
186    }
187    let sum: f32 = w.iter().copied().sum();
188    for v_ in &mut w { *v_ /= sum; }
189
190    let n = v.len();
191    let mut out = vec![0.0f32; n];
192    for i in 0..n {
193        let mut acc = 0.0f32;
194        let mut norm = 0.0f32;
195        for (k,&wk) in w.iter().enumerate() {
196            let di = i as isize + (k as isize - (w.len() as isize -1)/2);
197            if di >= 0 && (di as usize) < n {
198                acc += wk * v[di as usize];
199                norm += wk;
200            }
201        }
202        out[i] = if norm > 0.0 { acc / norm } else { 0.0 };
203    }
204    v.copy_from_slice(&out);
205}
206
207// ==========================================================
208// Section 3: Marginal Building - Build RT/IM marginals
209// ==========================================================
210
211/// Build IM marginal (absolute scan axis). We have to touch selected entries:
212pub fn build_im_marginal(
213    frames: &[FrameBinView],
214    rt_lo: usize, rt_hi: usize,
215    bin_lo: usize, bin_hi: usize,
216    im_lo: usize, im_hi: usize,
217) -> Vec<f32> {
218    let len = im_hi + 1 - im_lo;
219    let mut out = vec![0.0f32; len];
220
221    for fbv in &frames[rt_lo..=rt_hi] {
222        let ub = &fbv.unique_bins;
223        if ub.is_empty() { continue; }
224        let start = match ub.binary_search(&bin_lo) { Ok(i)=>i, Err(i)=>i.min(ub.len()) };
225        let mut i = start;
226        while i < ub.len() {
227            let b = ub[i];
228            if b > bin_hi { break; }
229            let lo = fbv.offsets[i]; let hi = fbv.offsets[i+1];
230            let scans = &fbv.scan_idx[lo..hi];
231            let ints  = &fbv.intensity[lo..hi];
232            for (s, val) in scans.iter().zip(ints.iter()) {
233                let s_abs = *s as usize;
234                if s_abs >= im_lo && s_abs <= im_hi {
235                    out[s_abs - im_lo] += *val;
236                }
237            }
238            i += 1;
239        }
240    }
241    out
242}
243
244// Key for accumulation
245#[derive(Clone, Copy, PartialEq, Eq, Hash)]
246struct Key { bin: usize, scan: u32 }
247
248pub fn gaussian_blur_tof_sparse(
249    fbv: &FrameBinView,
250    sigma_bins: f32,
251    truncate_k: f32,
252) -> FrameBinView {
253    if fbv.unique_bins.is_empty() { return fbv.clone(); }
254
255    let (deltas, weights) = gaussian_kernel_bins(sigma_bins, truncate_k);
256    let nnz = fbv.intensity.len();
257    let k = deltas.len();
258
259    // Reserve: heuristic ~ nnz * effective support (not all deltas hit valid bins)
260    let mut acc: FxHashMap<Key, f32> = FxHashMap::with_capacity_and_hasher(nnz.saturating_mul(k/2), Default::default());
261
262    // Optional: quick bounds to avoid branching
263    let bin_min = *fbv.unique_bins.first().unwrap();
264    let bin_max = *fbv.unique_bins.last().unwrap();
265
266    // Iterate bins in order
267    for (i, &bin) in fbv.unique_bins.iter().enumerate() {
268        let lo = fbv.offsets[i];
269        let hi = fbv.offsets[i + 1];
270        for j in lo..hi {
271            let scan = fbv.scan_idx[j];   // u32 absolute scan
272            let val  = fbv.intensity[j];
273            if val <= 0.0 { continue; }
274
275            // Spread to neighbors
276            for (d, &w) in deltas.iter().zip(weights.iter()) {
277                let b2 = if d.is_negative() {
278                    bin.saturating_sub((-d) as usize)
279                } else {
280                    bin.saturating_add(*d as usize)
281                };
282                if b2 < bin_min || b2 > bin_max { continue; }
283                let key = Key { bin: b2, scan };
284                *acc.entry(key).or_insert(0.0) += val * w;
285            }
286        }
287    }
288
289    // Compact back into FrameBinView: group by bin, then by insertion order build CSR
290    let mut items: Vec<(usize, u32, f32)> = Vec::with_capacity(acc.len());
291    for (k, v) in acc.into_iter() {
292        if v > 0.0 && k.bin >= bin_min && k.bin <= bin_max {
293            items.push((k.bin, k.scan, v));
294        }
295    }
296    items.sort_unstable_by(|a, b| a.0.cmp(&b.0).then(a.1.cmp(&b.1)));
297
298    let mut unique_bins = Vec::new();
299    let mut offsets = Vec::new();
300    let mut scan_idx = Vec::with_capacity(items.len());
301    let mut intensity = Vec::with_capacity(items.len());
302
303    offsets.push(0);
304    let mut cur_bin: Option<usize> = None;
305    for (bin, scan, val) in items.into_iter() {
306        if cur_bin.map_or(true, |cb| cb != bin) {
307            unique_bins.push(bin);
308            offsets.push(offsets.last().copied().unwrap());
309            cur_bin = Some(bin);
310        }
311        scan_idx.push(scan);
312        intensity.push(val);
313        *offsets.last_mut().unwrap() += 1;
314    }
315    // Guarantee at least one offset (if frame is empty after filtering)
316    if unique_bins.is_empty() {
317        return FrameBinView {
318            _frame_id: fbv._frame_id,
319            unique_bins: Vec::new(),
320            offsets: vec![0],
321            scan_idx: Vec::new(),
322            intensity: Vec::new(),
323        };
324    }
325
326    FrameBinView {
327        _frame_id: fbv._frame_id,
328        unique_bins,
329        offsets,
330        scan_idx,
331        intensity,
332    }
333}
334
335pub fn blur_tof_all_frames(
336    frames: &[FrameBinView],
337    sigma_bins: f32,
338    truncate_k: f32,
339) -> Vec<FrameBinView> {
340    use rayon::prelude::*;
341    frames.par_iter()
342        .map(|f| gaussian_blur_tof_sparse(f, sigma_bins, truncate_k))
343        .collect()
344}
345
346/// Build RT marginal (len = frames in [rt_lo..rt_hi]), using (bin,scan) window
347pub fn build_rt_marginal(
348    frames: &[FrameBinView],
349    rt_lo: usize, rt_hi: usize,
350    bin_lo: usize, bin_hi: usize,
351    im_lo: usize, im_hi: usize,
352) -> Vec<f32> {
353    let mut out = vec![0.0f32; rt_hi + 1 - rt_lo];
354    for (k, fbv) in frames[rt_lo..=rt_hi].iter().enumerate() {
355        out[k] = sum_frame_block(fbv, bin_lo, bin_hi, im_lo, im_hi);
356    }
357    out
358}
359
360// ==========================================================
361// Section 4: Peak Detection - Find peaks in 1D traces
362// ==========================================================
363
364pub fn find_im_peaks_row(
365    y_smoothed: &[f32],
366    y_raw: &[f32],
367    tof_row: usize,
368    tof_center: i32,
369    tof_bounds: (i32, i32),
370    rt_bounds: (usize, usize),
371    frame_id_bounds: (u32, u32),
372    window_group: Option<u32>,
373    mobility_of: MobilityFn,
374    min_prom: f32,
375    min_distance_scans: usize,
376    min_width_scans: usize,
377    scan_axis: &[usize],
378) -> Vec<ImPeak1D> {
379    let n = y_smoothed.len();
380    if n < 3 { return Vec::new(); }
381
382    let row_max = y_raw.iter().copied().fold(0.0f32, f32::max);
383    if row_max < min_prom { return Vec::new(); }
384
385    // 1) strict local maxima on smoothed
386    let mut cands = Vec::new();
387    for i in 1..n-1 {
388        let yi = y_smoothed[i];
389        if yi > y_smoothed[i-1] && yi >= y_smoothed[i+1] {
390            cands.push(i);
391        }
392    }
393
394    let mut peaks: Vec<ImPeak1D> = Vec::new();
395    for &i in &cands {
396        let apex = y_smoothed[i];
397
398        // 2) prominence baseline (walk to mins bounded by taller peaks)
399        let mut l = i; let mut left_min = apex;
400        while l > 0 { l -= 1; left_min = left_min.min(y_smoothed[l]); if y_smoothed[l] > apex { break; } }
401        let mut r = i; let mut right_min = apex;
402        while r + 1 < n { r += 1; right_min = right_min.min(y_smoothed[r]); if y_smoothed[r] > apex { break; } }
403
404        let baseline = left_min.max(right_min);
405        let prom = apex - baseline;
406        if prom < min_prom { continue; }
407
408        // 3) half-prom crossings (fractional)
409        let half = baseline + 0.5 * prom;
410
411        // left crossing
412        let mut wl = i;
413        while wl > 0 && y_smoothed[wl] > half { wl -= 1; }
414        let left_x = if wl < i && wl + 1 < n {
415            let y0 = y_smoothed[wl]; let y1 = y_smoothed[wl + 1];
416            wl as f32 + if y1 != y0 { (half - y0) / (y1 - y0) } else { 0.0 }
417        } else { wl as f32 };
418
419        // right crossing
420        let mut wr = i;
421        while wr + 1 < n && y_smoothed[wr] > half { wr += 1; }
422        let right_x = if wr > i && wr < n {
423            let y0 = y_smoothed[wr - 1]; let y1 = y_smoothed[wr];
424            (wr - 1) as f32 + if y1 != y0 { (half - y0) / (y1 - y0) } else { 0.0 }
425        } else { wr as f32 };
426
427        let width = (right_x - left_x).max(0.0);
428        let width_scans = width.round() as usize;
429        if width_scans < min_width_scans { continue; }
430
431        // 4) sub-scan apex offset
432        let sub = if i > 0 && i + 1 < n {
433            quad_subsample(y_smoothed[i - 1], y_smoothed[i], y_smoothed[i + 1]).clamp(-0.5, 0.5)
434        } else { 0.0 };
435
436        // 5) NMS by min_distance_scans (keep the taller)
437        if let Some(last) = peaks.last() {
438            if i.abs_diff(last.scan) < min_distance_scans {
439                if apex <= last.apex_smoothed { continue; }
440                // replace last
441                peaks.pop();
442            }
443        }
444
445        // 6) area on raw between fractional bounds
446        let left_idx  = left_x.floor().clamp(0.0, (n-1) as f32) as usize;
447        let right_idx = right_x.ceil().clamp(0.0, (n-1) as f32) as usize;
448        let area = trapezoid_area_fractional(y_raw, left_x.max(0.0), right_x.min((n-1) as f32));
449
450        let mobility = mobility_of.map(|f| f(i));
451
452        let scan_abs  = scan_axis[i];
453        let left_abs  = scan_axis[left_idx.min(scan_axis.len()-1)];
454        let right_abs = scan_axis[right_idx.min(scan_axis.len()-1)];
455
456        let mut peak = ImPeak1D {
457            tof_row,
458            tof_center,
459            tof_bounds,
460            rt_bounds,
461            frame_id_bounds,
462            window_group,
463            scan: i,
464            scan_sigma: None,
465            mobility,
466            apex_smoothed: apex,
467            apex_raw: y_raw[i],
468            prominence: prom,
469            left: left_idx,
470            right: right_idx,
471            left_x,
472            right_x,
473            width_scans,
474            area_raw: area,
475            subscan: sub,
476            id: 0, // to be filled later
477            scan_abs,
478            left_abs,
479            right_abs,
480        };
481
482        peak.id = im_peak_id(&peak);
483        peaks.push(peak);
484    }
485    peaks
486}
487
488#[inline(always)]
489pub fn lerp(a: f32, b: f32, t: f32) -> f32 { a + t * (b - a) }
490
491/// Safe integration of a piecewise-linear signal y over [x0, x1].
492/// x is in sample-index units, segments are [s, s+1] for s=0..n-2.
493/// Handles boundaries: no y[n] access, supports n==0/1.
494pub fn trapezoid_area_fractional(y: &[f32], mut x0: f32, mut x1: f32) -> f32 {
495    let n = y.len();
496    if n < 2 {
497        // With <2 samples, treat area as 0
498        return 0.0;
499    }
500
501    // Clamp to [0, n-1]
502    let max_x = (n - 1) as f32;
503    x0 = x0.clamp(0.0, max_x);
504    x1 = x1.clamp(0.0, max_x);
505    if x1 <= x0 { return 0.0; }
506
507    let i0 = x0.floor() as usize;
508    let i1 = x1.floor() as usize;
509
510    // Both inside same segment [i0, i0+1] (ensure i0 < n-1)
511    if i0 == i1 {
512        let i = i0.min(n - 2);
513        let t0 = x0 - i as f32;
514        let t1 = x1 - i as f32;
515        let y0 = lerp(y[i], y[i + 1], t0);
516        let y1 = lerp(y[i], y[i + 1], t1);
517        return 0.5 * (y0 + y1) * (t1 - t0);
518    }
519
520    // Work with segment indices s in [0..n-2]
521    let s0 = i0.min(n - 2);
522    let s1 = i1.min(n - 2);
523
524    let mut area = 0.0f32;
525
526    // Left partial: from x0 within segment s0 up to s0+1
527    let t0 = x0 - s0 as f32;              // in [0,1)
528    let yl0 = lerp(y[s0], y[s0 + 1], t0); // value at x0
529    let yl1 = y[s0 + 1];                  // value at segment end
530    area += 0.5 * (yl0 + yl1) * (1.0 - t0);
531
532    // Full interior segments: s in (s0+1 .. s1-1)
533    // Only if there is at least one full segment strictly between left/right partials
534    if s1 > s0 + 1 {
535        for s in (s0 + 1)..s1 {
536            area += 0.5 * (y[s] + y[s + 1]); // width = 1
537        }
538    }
539
540    // Right partial: only if x1 lies *inside* some segment (i1 <= n-2)
541    if i1 <= n - 2 {
542        let t1 = x1 - s1 as f32;             // in (0,1]
543        let yr0 = y[s1];
544        let yr1 = lerp(y[s1], y[s1 + 1], t1);
545        area += 0.5 * (yr0 + yr1) * t1;
546    } else {
547        // i1 == n-1 → x1 is exactly at the last node; no right partial to add.
548    }
549
550    area
551}
552
553#[inline(always)]
554pub fn quad_subsample(y0: f32, y1: f32, y2: f32) -> f32 {
555    let denom = y0 - 2.0*y1 + y2;
556    if denom.abs() < 1e-12 { 0.0 } else { 0.5 * (y0 - y2) / denom }
557}
558
559
560#[inline]
561pub fn im_peak_id(p: &ImPeak1D) -> PeakId {
562    use rustc_hash::FxHasher;
563    use std::hash::{Hash, Hasher};
564
565    // Deterministic over the identity-defining fields
566    let mut h = FxHasher::default();
567    p.window_group.hash(&mut h);
568    p.tof_row.hash(&mut h);
569    p.scan.hash(&mut h);
570    p.left.hash(&mut h);
571    p.right.hash(&mut h);
572    p.frame_id_bounds.hash(&mut h);
573
574    let u: u64 = h.finish();
575    // force into non-negative i64 space (avoids pandas/NumPy uint64 weirdness)
576    (u & 0x7FFF_FFFF_FFFF_FFFF) as i64
577}
578
579#[inline]
580pub fn rt_peak_id(r: &RtPeak1D) -> PeakId {
581    let mut h = FxHasher::default();
582    r.parent_im_id.hash(&mut h);
583    r.tof_row.hash(&mut h);
584    r.rt_idx.hash(&mut h);
585    r.frame_id_bounds.hash(&mut h);
586    r.window_group.hash(&mut h);
587    let u: u64 = h.finish();
588    // force into non-negative i64 space (avoids pandas/NumPy uint64 weirdness)
589    (u & 0x7FFF_FFFF_FFFF_FFFF) as i64
590}
591
592pub fn fallback_rt_peak_from_trace(
593    trace_raw: &[f32],
594    ctx: RtTraceCtx<'_>,
595    im_peak: &ImPeak1D,
596    frac: f32, // e.g., 0.10
597) -> Option<RtPeak1D> {
598    let n = trace_raw.len();
599    if n == 0 { return None; }
600
601    // apex on raw
602    let (mut i_max, mut y_max) = (0usize, 0.0f32);
603    for (i, &y) in trace_raw.iter().enumerate() {
604        if y > y_max { y_max = y; i_max = i; }
605    }
606    if y_max <= 0.0 { return None; }
607
608    // centroid (first moment) for a stable center estimate
609    let mut sum_y = 0.0f32;
610    let mut sum_ty = 0.0f32;
611    for (i, &y) in trace_raw.iter().enumerate() {
612        sum_y  += y;
613        sum_ty += (i as f32) * y;
614    }
615    let mu = if sum_y > 0.0 { sum_ty / sum_y } else { i_max as f32 };
616    let rt_idx = mu.floor().clamp(0.0, (n - 1) as f32) as usize;
617    let subframe = (mu - (rt_idx as f32)).clamp(-0.5, 0.5);
618
619    // fractional width at frac * apex
620    let thr = frac * y_max;
621    // left crossing
622    let mut l = i_max;
623    while l > 0 && trace_raw[l] > thr { l -= 1; }
624    let left_x = if l < i_max {
625        let y0 = trace_raw[l]; let y1 = trace_raw[l + 1];
626        l as f32 + if y1 != y0 { (thr - y0) / (y1 - y0) } else { 0.0 }
627    } else { l as f32 };
628
629    // right crossing
630    let mut r = i_max;
631    while r + 1 < n && trace_raw[r] > thr { r += 1; }
632    let right_x = if r > i_max && r < n {
633        let y0 = trace_raw[r - 1]; let y1 = trace_raw[r];
634        (r - 1) as f32 + if y1 != y0 { (thr - y0) / (y1 - y0) } else { 0.0 }
635    } else { r as f32 };
636
637    let width_frames = (right_x - left_x).max(0.0).round() as usize;
638    let area_raw = trapezoid_area_fractional(trace_raw, left_x.max(0.0), right_x.min((n - 1) as f32));
639
640    // “prominence” proxy: apex – min in support (conservative)
641    let base = trace_raw.iter().copied().fold(f32::INFINITY, f32::min);
642    let prom = (y_max - base).max(0.0);
643
644    // integer bounds & frame ids
645    let l_i = left_x.floor().clamp(0.0, n.saturating_sub(1) as f32) as usize;
646    let r_i = right_x.ceil().clamp(0.0, n.saturating_sub(1) as f32) as usize;
647    let rt_bounds_frames = (l_i.min(r_i), r_i.max(l_i));
648
649    let frame_id_bounds = if ctx.frame_ids_sorted.is_empty() {
650        im_peak.frame_id_bounds
651    } else {
652        let lo = ctx.frame_ids_sorted[rt_bounds_frames.0.min(ctx.frame_ids_sorted.len()-1)];
653        let hi = ctx.frame_ids_sorted[rt_bounds_frames.1.min(ctx.frame_ids_sorted.len()-1)];
654        (lo.min(hi), lo.max(hi))
655    };
656
657    let t = ctx.rt_times_sec;
658    let j0 = rt_idx.min(t.len() - 1);
659    let j1 = (j0 + 1).min(t.len() - 1);
660    let frac = (subframe + (rt_idx as f32) - j0 as f32).clamp(0.0, 1.0);
661    let rt_sec = Some((1.0 - frac) * t[j0] + frac * t[j1]);
662
663    let mut rp = RtPeak1D {
664        rt_idx,
665        rt_sec,
666        apex_smoothed: y_max, // no smoothing in fallback
667        apex_raw: y_max,
668        prominence: prom,
669        left_x,
670        right_x,
671        width_frames,
672        area_raw,
673        subframe,
674
675        rt_bounds_frames,
676        frame_id_bounds,
677        window_group: im_peak.window_group,
678
679        tof_row: im_peak.tof_row,
680        tof_center: im_peak.tof_center,
681        tof_bounds: im_peak.tof_bounds,
682
683        parent_im_id: Some(im_peak.id),
684        id: 0,
685    };
686    rp.id = rt_peak_id(&rp);
687    Some(rp)
688}
689
690/// Sum across a single frame in bin [bin_lo..bin_hi], scan [im_lo..im_hi]
691#[inline]
692fn sum_frame_block(fbv:&FrameBinView, bin_lo:usize, bin_hi:usize, im_lo:usize, im_hi:usize) -> f32 {
693    // identical approach as your sum_frame_bins_scans, but for a range
694    let ub = &fbv.unique_bins;
695    if ub.is_empty() || bin_lo > bin_hi { return 0.0; }
696    let start = match ub.binary_search(&bin_lo) { Ok(i)=>i, Err(i)=>i.min(ub.len()) };
697    let mut acc = 0.0f32;
698    let mut i = start;
699    while i < ub.len() {
700        let b = ub[i];
701        if b > bin_hi { break; }
702        let lo = fbv.offsets[i];
703        let hi = fbv.offsets[i+1];
704        let scans = &fbv.scan_idx[lo..hi];
705        let ints  = &fbv.intensity[lo..hi];
706        for (s, val) in scans.iter().zip(ints.iter()) {
707            let s = *s as usize;
708            if s >= im_lo && s <= im_hi { acc += *val; }
709        }
710        i += 1;
711    }
712    acc
713}
714
715/// Build a histogram over CSR bins (one value per bin in [bin_lo..bin_hi]).
716///
717/// Returns (hist, centers) where:
718///   - `hist[k]` = summed intensity for bin (bin_lo + k)
719///   - `centers[k]` = TOF center of that bin (scale.center)
720pub fn build_tof_hist(
721    frames: &[FrameBinView],
722    rt_lo: usize, rt_hi: usize,
723    bin_lo: usize, bin_hi: usize,
724    im_lo: usize, im_hi: usize,
725    scale: &TofScale,
726) -> (Vec<f32>, Vec<f32>) {
727    let r = bin_hi + 1 - bin_lo;
728    let mut hist = vec![0.0f32; r];
729    for fbv in &frames[rt_lo..=rt_hi] {
730        let ub = &fbv.unique_bins;
731        if ub.is_empty() { continue; }
732        let start = match ub.binary_search(&bin_lo) { Ok(i)=>i, Err(i)=>i.min(ub.len()) };
733        let mut i = start;
734        while i < ub.len() {
735            let b = ub[i];
736            if b > bin_hi { break; }
737            let lo = fbv.offsets[i]; let hi = fbv.offsets[i+1];
738            let scans = &fbv.scan_idx[lo..hi];
739            let ints  = &fbv.intensity[lo..hi];
740            let mut sum = 0.0f32;
741            for (s, val) in scans.iter().zip(ints.iter()) {
742                let s_abs = *s as usize;
743                if s_abs >= im_lo && s_abs <= im_hi { sum += *val; }
744            }
745            hist[b - bin_lo] += sum;
746            i += 1;
747        }
748    }
749    let centers = (bin_lo..=bin_hi).map(|i| scale.center(i)).collect::<Vec<_>>();
750    (hist, centers)
751}
752
753// ==========================================================
754// Section 5: Fitting - Fit1D and moment-based fitting
755// ==========================================================
756
757#[inline]
758pub fn quantile(values: &[f32], q: f32) -> f32 {
759    // Drop non-finite values to avoid NaN poisoning the order
760    let mut v: Vec<f32> = values.iter().copied().filter(|x| x.is_finite()).collect();
761    if v.is_empty() { return 0.0; }
762
763    let n = v.len();
764    let q = q.clamp(0.0, 1.0);
765    let idx = (q * (n.saturating_sub(1) as f32)).round() as usize;
766
767    // For floats, use the comparator form + total ordering
768    let (_left, nth, _right) = v.select_nth_unstable_by(idx, |a, b| a.total_cmp(b));
769    *nth
770}
771
772#[inline]
773pub fn quantile_mut(v: &mut [f32], q: f32) -> f32 {
774    if v.is_empty() { return 0.0; }
775    // Replace NaNs with -inf so they sort to the front (or filter them beforehand)
776    for x in v.iter_mut() {
777        if !x.is_finite() { *x = f32::NEG_INFINITY; }
778    }
779    let idx = (q.clamp(0.0, 1.0) * (v.len().saturating_sub(1) as f32)).round() as usize;
780    let (_l, nth, _r) = v.select_nth_unstable_by(idx, |a, b| a.total_cmp(b));
781    *nth
782}
783
784// --- Robust noise estimation for adaptive thresholds ---
785
786/// Compute robust noise estimate using MAD (Median Absolute Deviation).
787/// Returns noise in the same units as the input signal.
788///
789/// MAD is more robust to outliers (peaks) than standard deviation,
790/// making it suitable for estimating baseline noise in signals with peaks.
791#[inline]
792pub fn robust_noise_mad(signal: &[f32]) -> f32 {
793    if signal.len() < 3 {
794        return 0.0;
795    }
796
797    // Compute median
798    let mut sorted: Vec<f32> = signal.iter().copied().filter(|x| x.is_finite()).collect();
799    if sorted.is_empty() {
800        return 0.0;
801    }
802    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
803    let median = sorted[sorted.len() / 2];
804
805    // Compute MAD (Median Absolute Deviation)
806    let mut deviations: Vec<f32> = sorted.iter().map(|&x| (x - median).abs()).collect();
807    deviations.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
808    let mad = deviations[deviations.len() / 2];
809
810    // Convert MAD to standard deviation estimate (1.4826 for normal distribution)
811    mad * 1.4826
812}
813
814/// Compute noise from neighbor differences.
815/// Uses the median of absolute neighbor differences, which is robust to peaks.
816#[inline]
817pub fn robust_noise_neighbor_diff(y: &[f32]) -> f32 {
818    if y.len() < 2 {
819        return 0.0;
820    }
821    let mut diffs: Vec<f32> = y.windows(2)
822        .map(|w| (w[1] - w[0]).abs())
823        .filter(|&d| d.is_finite())
824        .collect();
825    if diffs.is_empty() {
826        return 0.0;
827    }
828    diffs.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
829    // Median of diffs, scaled to approximate standard deviation
830    diffs[diffs.len() / 2] / 1.4826
831}
832
833#[derive(Clone, Debug, Default, Serialize, Deserialize)]
834pub struct Fit1D {
835    pub mu: f32,
836    pub sigma: f32,
837    pub height: f32,
838    pub baseline: f32,
839    pub area: f32,
840    pub r2: f32,
841    pub n: usize,
842}
843
844pub fn fit1d_moment(y:&[f32], x: Option<&[f32]>) -> Fit1D {
845    let n = y.len();
846    if n == 0 { return Fit1D::default(); }
847
848    // --- Baseline via 10% quantile (guarded) ---
849    let mut ys = y.to_vec();
850    let mut b0 = quantile_mut(&mut ys, 0.10);
851    if !b0.is_finite() { b0 = 0.0; }
852
853    // --- Domain helpers ---
854    let (x_lo, x_hi) = if let Some(xx) = x {
855        if !xx.is_empty() {
856            let lo = *xx.first().unwrap_or(&0.0);
857            let hi = *xx.last().unwrap_or(&lo);
858            (if lo.is_finite(){lo}else{0.0}, if hi.is_finite(){hi}else{0.0})
859        } else { (0.0, 0.0) }
860    } else {
861        if n > 0 { (0.0, n.saturating_sub(1) as f32) } else { (0.0, 0.0) }
862    };
863
864    let mu_fallback: f32 = 0.5 * (x_lo + x_hi);
865
866    // --- Weighted moments (baseline-subtracted positives) + trapezoid area ---
867    let mut wsum=0.0f64;
868    let mut xsum=0.0f64;
869    let mut x2sum=0.0f64;
870
871    let mut y_max = f32::NEG_INFINITY;
872    let mut trap = 0.0f64;
873
874    for i in 0..n {
875        let xi_f32 = x.map(|xx| xx[i]).unwrap_or(i as f32);
876        let xi = xi_f32 as f64;
877        let yi_pos = (y[i] - b0).max(0.0) as f64;
878
879        if yi_pos > 0.0 {
880            wsum += yi_pos;
881            xsum += yi_pos * xi;
882            x2sum += yi_pos * xi * xi;
883        }
884
885        if y[i] > y_max { y_max = y[i]; }
886
887        // trapezoid area: handle both irregular and unit spacing
888        if i > 0 {
889            let y0 = (y[i-1] - b0).max(0.0) as f64;
890            let yi = yi_pos;
891            let dx = if let Some(xx) = x {
892                (xx[i] - xx[i-1]) as f64
893            } else {
894                1.0
895            };
896            trap += 0.5 * (y0 + yi) * dx;
897        }
898    }
899
900    // No positive mass → sane fallback
901    if wsum <= 0.0 {
902        let mut area = trap as f32;
903        if !area.is_finite() || area < 0.0 { area = 0.0; }
904
905        let mut mu = mu_fallback;
906        if !mu.is_finite() { mu = 0.0; }
907        // clamp mu to domain
908        mu = mu.clamp(x_lo, x_hi);
909
910        let mut height = (y_max - b0).max(0.0);
911        if !height.is_finite() || height < 0.0 { height = 0.0; }
912
913        let mut baseline = b0.max(0.0);
914        if !baseline.is_finite() || baseline < 0.0 { baseline = 0.0; }
915
916        return Fit1D { mu, sigma: 0.0, height, baseline, area, r2: 0.0, n };
917    }
918
919    let mut mu = (xsum/wsum) as f32;
920    if !mu.is_finite() { mu = mu_fallback; }
921    // clamp to domain
922    mu = mu.clamp(x_lo, x_hi);
923
924    let var = (x2sum/wsum - (mu as f64)*(mu as f64)).max(0.0) as f32;
925    let mut sigma = var.sqrt();
926    if !sigma.is_finite() || sigma < 0.0 { sigma = 0.0; }
927    // tiny epsilon floor to stabilize Gaussian basis
928    if sigma > 0.0 && sigma < 1e-6 { sigma = 1e-6; }
929
930    // --- 2×2 LS for (baseline b, height h) with Gaussian g(mu, sigma) ---
931    let mut s_gg=0.0f64; let mut s_g1=0.0f64; let s_11=n as f64;
932    let mut s_yg=0.0f64; let mut s_y1=0.0f64;
933
934    if sigma > 0.0 {
935        for i in 0..n {
936            let xi = x.map(|xx| xx[i]).unwrap_or(i as f32);
937            let z = (xi - mu) as f64 / (sigma as f64);
938            let g = (-0.5*z*z).exp();
939            let yi = y[i] as f64;
940            s_gg += g*g; s_g1 += g; s_yg += yi*g; s_y1 += yi;
941        }
942        let det = s_11*s_gg - s_g1*s_g1;
943        if det.abs() > 1e-12 {
944            let mut b = ( s_gg*s_y1 - s_g1*s_yg)/det;
945            let mut h = (-s_g1*s_y1 + s_11*s_yg)/det;
946
947            // physical clamp
948            if !b.is_finite() || b < 0.0 { b = 0.0; }
949            if !h.is_finite() || h < 0.0 { h = 0.0; }
950
951            let mut area = (h as f32)*sigma*std::f32::consts::TAU.sqrt();
952            if !area.is_finite() || area < 0.0 { area = 0.0; }
953
954            // --- Baseline-aware, clamped R^2 ---
955            // Null model is the fitted baseline b (better than raw mean for chromatographic/IM traces)
956            let baseline_ref = b as f32;
957            let mut ss_res=0.0f64;
958            let mut ss_tot=0.0f64;
959            for i in 0..n {
960                let xi = x.map(|xx| xx[i]).unwrap_or(i as f32);
961                let z = (xi - mu)/sigma;
962                let yhat = baseline_ref + (h as f32)*(-0.5*z*z).exp();
963                let e = (y[i]-yhat) as f64;
964                ss_res += e*e;
965                let d = (y[i]-baseline_ref) as f64;
966                ss_tot += d*d;
967            }
968            let mut r2 = if ss_tot > 1e-20 { (1.0 - ss_res/ss_tot) as f32 } else { 0.0 };
969            if !r2.is_finite() { r2 = 0.0; }
970            // clamp to [0,1]
971            r2 = r2.clamp(0.0, 1.0);
972
973            return Fit1D { mu, sigma, height: h as f32, baseline: b as f32, area, r2, n };
974        }
975    }
976
977    // --- Fallback: use moment height & sigma, Gaussian area or trapezoid ---
978    let mut height = (y_max - b0).max(0.0);
979    if !height.is_finite() || height < 0.0 { height = 0.0; }
980
981    let mut area = if sigma > 0.0 {
982        height * sigma * std::f32::consts::TAU.sqrt()
983    } else { trap as f32 };
984    if !area.is_finite() || area < 0.0 { area = 0.0; }
985
986    let mut baseline = b0.max(0.0);
987    if !baseline.is_finite() || baseline < 0.0 { baseline = 0.0; }
988
989    Fit1D { mu, sigma, height, baseline, area, r2: 0.0, n }
990}
991
992fn gaussian_kernel_bins(sigma_bins: f32, truncate_k: f32) -> (Vec<i32>, Vec<f32>) {
993    let sigma = sigma_bins.max(0.3);
994    let radius = (truncate_k * sigma).ceil() as i32;
995    let mut offs = Vec::with_capacity((2*radius + 1) as usize);
996    let mut w    = Vec::with_capacity(offs.capacity());
997    let two_s2 = 2.0 * sigma * sigma;
998    let mut sum = 0.0f32;
999    for d in -radius..=radius {
1000        let x = d as f32;
1001        let wd = (-x*x / two_s2).exp();
1002        offs.push(d);
1003        w.push(wd);
1004        sum += wd;
1005    }
1006    for wi in &mut w { *wi /= sum.max(1e-12); }
1007    (offs, w)
1008}
1009
1010fn interval_overlap_usize(a: (usize, usize), b: (usize, usize)) -> usize {
1011    let lo = a.0.max(b.0);
1012    let hi = a.1.min(b.1);
1013    if lo > hi {
1014        0
1015    } else {
1016        hi - lo + 1
1017    }
1018}
1019
1020fn jaccard_1d_usize(a: (usize, usize), b: (usize, usize)) -> f32 {
1021    let inter = interval_overlap_usize(a, b) as f32;
1022    if inter <= 0.0 {
1023        return 0.0;
1024    }
1025    let len_a = if a.1 >= a.0 { (a.1 - a.0 + 1) as f32 } else { 0.0 };
1026    let len_b = if b.1 >= b.0 { (b.1 - b.0 + 1) as f32 } else { 0.0 };
1027    let union = (len_a + len_b - inter).max(1.0);
1028    inter / union
1029}
1030
1031/// Fast compatibility check for deciding whether two ImPeak1D should be merged.
1032///
1033/// All checks are *conservative*: if any condition fails, we do NOT merge.
1034fn compatible_fast(a: &ImPeak1D, b: &ImPeak1D, params: &StitchParams) -> bool {
1035    // 1) Window group (unless crossing allowed)
1036    if !params.allow_cross_groups {
1037        if a.window_group != b.window_group {
1038            return false;
1039        }
1040    }
1041
1042    // 2) TOF row proximity (we now use max_mz_row_delta in tof_row space)
1043    if params.max_tof_row_delta > 0 {
1044        let d = if a.tof_row > b.tof_row {
1045            a.tof_row - b.tof_row
1046        } else {
1047            b.tof_row - a.tof_row
1048        };
1049        if d > params.max_tof_row_delta {
1050            return false;
1051        }
1052    }
1053
1054    // 3) RT overlap (rt_bounds are indices in a frame grid, inclusive)
1055    let rt_a = a.rt_bounds;
1056    let rt_b = b.rt_bounds;
1057    let rt_overlap = interval_overlap_usize(rt_a, rt_b);
1058    if rt_overlap < params.min_overlap_frames {
1059        return false;
1060    }
1061
1062    if params.jaccard_min > 0.0 {
1063        let j_rt = jaccard_1d_usize(rt_a, rt_b);
1064        if j_rt < params.jaccard_min {
1065            return false;
1066        }
1067    }
1068
1069    // 4) IM (scan) overlap; left/right are inclusive scan indices on the grid
1070    let im_a = (a.left, a.right);
1071    let im_b = (b.left, b.right);
1072    let im_overlap = interval_overlap_usize(im_a, im_b);
1073    if im_overlap < params.min_im_overlap_scans {
1074        return false;
1075    }
1076
1077    if params.im_jaccard_min > 0.0 {
1078        let j_im = jaccard_1d_usize(im_a, im_b);
1079        if j_im < params.im_jaccard_min {
1080            return false;
1081        }
1082    }
1083
1084    // 5) Mutual apex inside overlap band (in absolute scan coords)
1085    // 5) Mutual apex inside overlap band (in absolute scan coords)
1086    if params.require_mutual_apex_inside {
1087        let left_abs_overlap  = a.left_abs.max(b.left_abs);
1088        let right_abs_overlap = a.right_abs.min(b.right_abs);
1089        if left_abs_overlap > right_abs_overlap {
1090            return false;
1091        }
1092
1093        let lo_f = left_abs_overlap as f32;
1094        let hi_f = right_abs_overlap as f32;
1095
1096        // Use absolute scan center as apex; subscan is *fractional* around the local index,
1097        // not a global coordinate.
1098        let a_apex = a.scan_abs as f32;
1099        let b_apex = b.scan_abs as f32;
1100
1101        if !(a_apex >= lo_f && a_apex <= hi_f && b_apex >= lo_f && b_apex <= hi_f) {
1102            return false;
1103        }
1104    }
1105
1106    true
1107}
1108
1109fn merge_group(peaks: &[&ImPeak1D]) -> ImPeak1D {
1110    use std::cmp::{min, max};
1111
1112    assert!(!peaks.is_empty());
1113
1114    // --- initialize accumulators -----------------------------------------
1115    let mut rt_lo = usize::MAX;
1116    let mut rt_hi = 0usize;
1117
1118    let mut fid_lo = u32::MAX;
1119    let mut fid_hi = 0u32;
1120
1121    let mut tof_lo = i32::MAX;
1122    let mut tof_hi = i32::MIN;
1123
1124    let mut left = usize::MAX;
1125    let mut right = 0usize;
1126
1127    let mut left_abs = usize::MAX;
1128    let mut right_abs = 0usize;
1129
1130    let mut area_sum = 0.0f32;
1131
1132    // weighted centers
1133    let mut w_sum   = 0.0f32;
1134    let mut w_tof_row = 0.0f32;
1135    let mut w_tof_center = 0.0f32;
1136    let mut w_scan      = 0.0f32;
1137    let mut w_scan_abs  = 0.0f32;
1138    let mut w_subscan   = 0.0f32;
1139
1140    // apex-like
1141    let mut apex_smoothed_max = f32::MIN;
1142    let mut apex_raw_max      = f32::MIN;
1143    let mut prominence_max    = f32::MIN;
1144
1145    // scan_sigma aggregation (only over Some)
1146    let mut scan_sigma_w_sum = 0.0f32;
1147    let mut scan_sigma_w_val = 0.0f32;
1148
1149    // mobility: keep first non-None
1150    let mut mobility_agg: Option<f32> = None;
1151
1152    // window_group: if they differ, collapse to None
1153    let mut wg_agg = peaks[0].window_group;
1154    let mut id_min = peaks[0].id;
1155
1156    for p in peaks {
1157        let w = p.area_raw.max(1.0);
1158        w_sum += w;
1159        area_sum += p.area_raw;
1160
1161        // bounds
1162        let (rt0, rt1) = p.rt_bounds;
1163        rt_lo = min(rt_lo, rt0);
1164        rt_hi = max(rt_hi, rt1);
1165
1166        let (fid0, fid1) = p.frame_id_bounds;
1167        fid_lo = min(fid_lo, fid0);
1168        fid_hi = max(fid_hi, fid1);
1169
1170        let (tof0, tof1) = p.tof_bounds;
1171        tof_lo = min(tof_lo, tof0);
1172        tof_hi = max(tof_hi, tof1);
1173
1174        left     = min(left, p.left);
1175        right    = max(right, p.right);
1176        left_abs = min(left_abs, p.left_abs);
1177        right_abs= max(right_abs, p.right_abs);
1178
1179        // centers
1180        w_tof_row    += (p.tof_row as f32)    * w;
1181        w_tof_center += (p.tof_center as f32) * w;
1182        w_scan       += (p.scan as f32)       * w;
1183        w_scan_abs   += (p.scan_abs as f32)   * w;
1184        w_subscan    +=  p.subscan            * w;
1185
1186        // apex
1187        apex_smoothed_max = apex_smoothed_max.max(p.apex_smoothed);
1188        apex_raw_max      = apex_raw_max.max(p.apex_raw);
1189        prominence_max    = prominence_max.max(p.prominence);
1190
1191        // scan_sigma
1192        if let Some(s) = p.scan_sigma {
1193            scan_sigma_w_sum += w;
1194            scan_sigma_w_val += s * w;
1195        }
1196
1197        // mobility
1198        if mobility_agg.is_none() {
1199            mobility_agg = p.mobility;
1200        }
1201
1202        // window_group
1203        if wg_agg != p.window_group {
1204            wg_agg = None;
1205        }
1206
1207        // ID
1208        if p.id < id_min {
1209            id_min = p.id;
1210        }
1211    }
1212
1213    // --- finalize weighted centers ----------------------------------------
1214    let w_sum_safe = if w_sum > 0.0 { w_sum } else { 1.0 };
1215
1216    let tof_row = (w_tof_row / w_sum_safe).round().max(0.0) as usize;
1217    let tof_center = (w_tof_center / w_sum_safe).round() as i32;
1218    let scan = (w_scan / w_sum_safe).round().max(0.0) as usize;
1219    let scan_abs = (w_scan_abs / w_sum_safe).round().max(0.0) as usize;
1220    let subscan = w_subscan / w_sum_safe;
1221
1222    let width_scans = right_abs
1223        .saturating_sub(left_abs)
1224        .saturating_add(1);
1225
1226    let scan_sigma = if scan_sigma_w_sum > 0.0 {
1227        Some(scan_sigma_w_val / scan_sigma_w_sum)
1228    } else {
1229        None
1230    };
1231
1232    ImPeak1D {
1233        tof_row,
1234        tof_center,
1235        tof_bounds: (tof_lo, tof_hi),
1236        rt_bounds: (rt_lo, rt_hi),
1237        frame_id_bounds: (fid_lo, fid_hi),
1238        window_group: wg_agg,
1239
1240        scan,
1241        left,
1242        right,
1243
1244        scan_abs,
1245        left_abs,
1246        right_abs,
1247
1248        scan_sigma,
1249        mobility: mobility_agg,
1250        apex_smoothed: apex_smoothed_max,
1251        apex_raw: apex_raw_max,
1252        prominence: prominence_max,
1253        left_x: left_abs as f32,
1254        right_x: right_abs as f32,
1255        width_scans,
1256        area_raw: area_sum,
1257        subscan,
1258        id: id_min,
1259    }
1260}
1261
1262// ==========================================================
1263// Section 6: Stitching - Stitch overlapping peaks across windows
1264// ==========================================================
1265
1266pub struct StitchParams {
1267    pub min_overlap_frames: usize,
1268    pub max_scan_delta: usize,
1269    pub jaccard_min: f32,
1270    pub max_tof_row_delta: usize,  // now acts on tof_row
1271    pub allow_cross_groups: bool,
1272    pub min_im_overlap_scans: usize,
1273    pub im_jaccard_min: f32,
1274    pub require_mutual_apex_inside: bool,
1275}
1276
1277#[derive(Hash, Eq, PartialEq, Clone, Debug)]
1278struct KeyFlat {
1279    pub wg_norm: Option<u32>,
1280    pub scan_bin: usize,
1281    pub tof_bucket: usize,
1282}
1283
1284#[inline]
1285fn norm_wg(wg: Option<u32>, allow_cross: bool) -> Option<u32> {
1286    if allow_cross { None } else { wg }
1287}
1288
1289#[inline]
1290fn tof_bucket(tof_row: usize, delta: usize) -> usize {
1291    // Behaves like old mz_bucket() but in TOF-row space.
1292    // delta == 0 → each row stands alone; otherwise bucket size = delta+1.
1293    let w = delta.saturating_add(1);
1294    tof_row / w
1295}
1296
1297/// Core stitching logic on ImPeak1D in TOF space.
1298///
1299/// - Buckets by (normed window_group, scan_bin, tof_bucket)
1300/// - Runs an in-bucket fold using `compatible_fast` + `merge_into`
1301/// - Then a single consolidation pass across bucket boundaries
1302pub fn stitch_im_peaks_flat_unordered_impl(
1303    flat: Vec<Arc<ImPeak1D>>,
1304    params: &StitchParams,
1305) -> Vec<ImPeak1D> {
1306    if flat.is_empty() {
1307        return Vec::new();
1308    }
1309
1310    let mut buckets: FxHashMap<KeyFlat, Vec<Arc<ImPeak1D>>> = FxHashMap::default();
1311
1312    // 1) Collect into buckets
1313    for arc in flat.into_iter() {
1314        let r = arc.as_ref();
1315        let key = KeyFlat {
1316            wg_norm: norm_wg(r.window_group, params.allow_cross_groups),
1317            scan_bin: r.scan / params.max_scan_delta.max(1),
1318            tof_bucket: tof_bucket(r.tof_row, params.max_tof_row_delta),
1319        };
1320        buckets.entry(key).or_default().push(arc);
1321    }
1322
1323    // 2) In-bucket folding with deterministic order + group merge
1324    let mut stitched: Vec<ImPeak1D> = Vec::new();
1325
1326    for (_k, mut vec_arc) in buckets.into_iter() {
1327        vec_arc.sort_unstable_by(|a, b| {
1328            let aa = a.as_ref();
1329            let bb = b.as_ref();
1330            aa.scan
1331                .cmp(&bb.scan)
1332                .then_with(|| aa.rt_bounds.0.cmp(&bb.rt_bounds.0))
1333                .then_with(|| aa.tof_row.cmp(&bb.tof_row))
1334                .then_with(|| aa.id.cmp(&bb.id))
1335        });
1336
1337        let mut group: Vec<Arc<ImPeak1D>> = Vec::new();
1338
1339        // Track span inside the current group
1340        let mut cur_min_row: usize = usize::MAX;
1341        let mut cur_max_row: usize = 0;
1342        let mut cur_min_scan_abs: usize = usize::MAX;
1343        let mut cur_max_scan_abs: usize = 0;
1344
1345        for arc in vec_arc.into_iter() {
1346            let p = arc.as_ref();
1347
1348            if let Some(last) = group.last() {
1349                // hypothetical new spans if we add p
1350                let new_min_row = cur_min_row.min(p.tof_row);
1351                let new_max_row = cur_max_row.max(p.tof_row);
1352                let span_rows = new_max_row.saturating_sub(new_min_row);
1353
1354                let new_min_scan = cur_min_scan_abs.min(p.scan_abs);
1355                let new_max_scan = cur_max_scan_abs.max(p.scan_abs);
1356                let span_scans = new_max_scan.saturating_sub(new_min_scan);
1357
1358                let tof_span_ok = params.max_tof_row_delta == 0
1359                    || span_rows <= params.max_tof_row_delta;
1360                let scan_span_ok = params.max_scan_delta == 0
1361                    || span_scans <= params.max_scan_delta;
1362
1363                if compatible_fast(last.as_ref(), p, params) && tof_span_ok && scan_span_ok {
1364                    group.push(arc);
1365                    cur_min_row = new_min_row;
1366                    cur_max_row = new_max_row;
1367                    cur_min_scan_abs = new_min_scan;
1368                    cur_max_scan_abs = new_max_scan;
1369                } else {
1370                    // finalize previous group
1371                    let refs: Vec<&ImPeak1D> = group.iter().map(|a| a.as_ref()).collect();
1372                    stitched.push(merge_group(&refs));
1373
1374                    group.clear();
1375                    group.push(arc.clone());
1376                    cur_min_row = p.tof_row;
1377                    cur_max_row = p.tof_row;
1378                    cur_min_scan_abs = p.scan_abs;
1379                    cur_max_scan_abs = p.scan_abs;
1380                }
1381            } else {
1382                // first element in this bucket
1383                cur_min_row = p.tof_row;
1384                cur_max_row = p.tof_row;
1385                cur_min_scan_abs = p.scan_abs;
1386                cur_max_scan_abs = p.scan_abs;
1387                group.push(arc);
1388            }
1389        }
1390
1391        if !group.is_empty() {
1392            let refs: Vec<&ImPeak1D> = group.iter().map(|a| a.as_ref()).collect();
1393            stitched.push(merge_group(&refs));
1394        }
1395    }
1396
1397    // 3) Cross-bucket consolidation pass with the same span guards
1398    stitched.sort_unstable_by(|a, b| {
1399        a.window_group
1400            .cmp(&b.window_group)
1401            .then_with(|| a.tof_row.cmp(&b.tof_row))
1402            .then_with(|| a.scan.cmp(&b.scan))
1403            .then_with(|| a.rt_bounds.0.cmp(&b.rt_bounds.0))
1404            .then_with(|| a.id.cmp(&b.id))
1405    });
1406
1407    let mut final_out: Vec<ImPeak1D> = Vec::with_capacity(stitched.len());
1408    let mut group: Vec<ImPeak1D> = Vec::new();
1409    let mut cur_min_row: usize = usize::MAX;
1410    let mut cur_max_row: usize = 0;
1411    let mut cur_min_scan_abs: usize = usize::MAX;
1412    let mut cur_max_scan_abs: usize = 0;
1413
1414    for p in stitched.into_iter() {
1415        if let Some(last) = group.last() {
1416            let new_min_row = cur_min_row.min(p.tof_row);
1417            let new_max_row = cur_max_row.max(p.tof_row);
1418            let span_rows = new_max_row.saturating_sub(new_min_row);
1419
1420            let new_min_scan = cur_min_scan_abs.min(p.scan_abs);
1421            let new_max_scan = cur_max_scan_abs.max(p.scan_abs);
1422            let span_scans = new_max_scan.saturating_sub(new_min_scan);
1423
1424            let tof_span_ok = params.max_tof_row_delta == 0
1425                || span_rows <= params.max_tof_row_delta;
1426            let scan_span_ok = params.max_scan_delta == 0
1427                || span_scans <= params.max_scan_delta;
1428
1429            if compatible_fast(last, &p, params) && tof_span_ok && scan_span_ok {
1430                cur_min_row = new_min_row;
1431                cur_max_row = new_max_row;
1432                cur_min_scan_abs = new_min_scan;
1433                cur_max_scan_abs = new_max_scan;
1434                group.push(p);
1435            } else {
1436                let refs: Vec<&ImPeak1D> = group.iter().collect();
1437                final_out.push(merge_group(&refs));
1438
1439                group.clear();
1440                cur_min_row = p.tof_row;
1441                cur_max_row = p.tof_row;
1442                cur_min_scan_abs = p.scan_abs;
1443                cur_max_scan_abs = p.scan_abs;
1444                group.push(p);
1445            }
1446        } else {
1447            cur_min_row = p.tof_row;
1448            cur_max_row = p.tof_row;
1449            cur_min_scan_abs = p.scan_abs;
1450            cur_max_scan_abs = p.scan_abs;
1451            group.push(p);
1452        }
1453    }
1454
1455    if !group.is_empty() {
1456        let refs: Vec<&ImPeak1D> = group.iter().collect();
1457        final_out.push(merge_group(&refs));
1458    }
1459
1460    final_out
1461}
rustdf/cluster/utility.rs

rustdf/cluster/
utility.rs