rustdf/cluster/
cluster.rs

1use std::hash::{DefaultHasher, Hash, Hasher};
2use rayon::prelude::*;
3use rayon::ThreadPoolBuilder;
4use serde::{Deserialize, Serialize};
5use mscore::timstof::frame::TimsFrame;
6use mscore::timstof::slice::TimsSlice;
7use crate::cluster::peak::{FrameBinView, ImPeak1D, RtFrames, RtPeak1D};
8use crate::cluster::utility::{Fit1D, TofScale, fit1d_moment};
9use crate::data::handle::IndexConverter;
10
11fn compute_cluster_id_from_spec(spec: &ClusterSpec1D) -> u64 {
12    let mut h = DefaultHasher::new();
13
14    // Integer stuff is straightforward
15    spec.rt_lo.hash(&mut h);
16    spec.rt_hi.hash(&mut h);
17    spec.im_lo.hash(&mut h);
18    spec.im_hi.hash(&mut h);
19    spec.tof_win.0.hash(&mut h);
20    spec.tof_win.1.hash(&mut h);
21    spec.tof_hist_bins.hash(&mut h);
22    spec.ms_level.hash(&mut h);
23
24    // Options: encode presence + value
25    spec.window_group.unwrap_or(0).hash(&mut h);
26    spec.parent_im_id.unwrap_or(-1).hash(&mut h);
27    spec.parent_rt_id.unwrap_or(-1).hash(&mut h);
28
29    // im_prior_sigma is f32 – hash the bit pattern if present
30    let sigma_bits: u32 = spec
31        .im_prior_sigma
32        .map(|s| s.to_bits())
33        .unwrap_or(0u32);
34    sigma_bits.hash(&mut h);
35
36    h.finish()
37}
38
39#[derive(Copy, Clone, Debug)]
40pub struct ScanSlice {
41    pub scan: usize,
42    pub start: usize,
43    pub end: usize
44}
45
46pub fn build_scan_slices(fr: &TimsFrame) -> Vec<ScanSlice> {
47    let scv = &fr.scan;
48    let mut out = Vec::new();
49    if scv.is_empty() { return out; }
50    let mut s_cur = scv[0];
51    let mut i_start = 0usize;
52    for i in 1..scv.len() {
53        if scv[i] != s_cur {
54            if s_cur >= 0 {
55                out.push(ScanSlice { scan: s_cur as usize, start: i_start, end: i });
56            }
57            s_cur = scv[i];
58            i_start = i;
59        }
60    }
61    if s_cur >= 0 {
62        out.push(ScanSlice { scan: s_cur as usize, start: i_start, end: scv.len() });
63    }
64    out
65}
66
67pub struct RawAttachContext {
68    pub slice: TimsSlice,
69    pub scan_slices: Vec<Vec<ScanSlice>>,
70    pub frame_ids_local: Vec<u32>,
71    pub rt_axis_sec: Vec<f32>,
72}
73
74#[derive(Clone, Debug, Default)]
75pub struct Attach1DOptions {
76    pub attach_points: bool,
77    pub attach_axes: bool,
78    pub max_points: Option<usize>, // thinning cap
79}
80
81impl  Attach1DOptions {
82    pub fn default() -> Self {
83        Self {
84            attach_points: false,
85            attach_axes: false,
86            max_points: None,
87        }
88    }
89}
90
91#[derive(Clone, Debug)]
92pub struct Eval1DOpts {
93    /// Whether to do a 2nd-pass refine of the TOF/axis window around the argmax.
94    pub refine_tof_once: bool,
95    /// k for "fallback σ" in IM, and generic σ policies.
96    pub refine_k_sigma: f32,
97    /// Whether to attach RT / IM / axis centers into the result.
98    pub attach_axes: bool,
99    /// Whether to attach RT and/or IM traces (XICs).
100    pub attach_rt_trace: bool,
101    pub attach_im_trace: bool,
102    /// Raw point attachment options.
103    pub attach: Attach1DOptions,
104    pub compute_mz_from_tof: bool,
105
106    /// Padding to add around the final windows (in units).
107    pub pad_rt_frames: usize,
108    pub pad_im_scans: usize,
109    pub pad_tof_bins: usize,
110}
111
112impl Default for Eval1DOpts {
113    fn default() -> Self {
114        Self {
115            refine_tof_once: true,
116            refine_k_sigma: 3.0,
117            attach_axes: false,
118            attach_rt_trace: false,
119            attach_im_trace: false,
120            attach: Attach1DOptions::default(),
121            compute_mz_from_tof: false,
122            pad_rt_frames: 0,
123            pad_im_scans: 0,
124            pad_tof_bins: 0,
125        }
126    }
127}
128
129#[derive(Clone, Debug)]
130pub struct ClusterSpec1D {
131    // local (RT) indices inside the provided RtFrames slice (inclusive)
132    pub rt_lo: usize,
133    pub rt_hi: usize,
134    // absolute scan bounds (inclusive) in the frame (TIMS axis)
135    pub im_lo: usize,
136    pub im_hi: usize,
137    // bin-index window [lo, hi] on the TofScale axis (encoded as i32)
138    pub tof_win: (i32, i32),
139    // histogram resolution along the TOF-backed axis (currently a policy knob)
140    pub tof_hist_bins: usize,
141
142    // provenance (optional)
143    pub window_group: Option<u32>,
144    pub parent_im_id: Option<i64>,
145    pub parent_rt_id: Option<i64>,
146    pub ms_level: u8,
147
148    // prior σ in scan units, from detector/refiner
149    pub im_prior_sigma: Option<f32>,
150}
151
152#[derive(Clone, Debug, Default, Serialize, Deserialize)]
153pub struct RawPoints {
154    pub mz: Vec<f32>,      // kept for future use; not used in this module
155    pub rt: Vec<f32>,
156    pub im: Vec<f32>,
157    pub scan: Vec<u32>,
158    pub intensity: Vec<f32>,
159    pub tof: Vec<i32>,
160    pub frame: Vec<u32>,
161}
162
163// ==========================================================
164// ClusterResult1D sub-structs (Phase 1 refactoring)
165// ==========================================================
166
167/// Window boundaries for each axis (RT in frame indices, IM in scan indices, TOF in bin indices).
168#[derive(Clone, Debug, Default, Serialize, Deserialize)]
169pub struct Windows1D {
170    pub rt: (usize, usize),
171    pub im: (usize, usize),
172    pub tof: (usize, usize),
173    /// TOF index window (raw instrument indices, not bin indices).
174    pub tof_index: (i32, i32),
175    /// Optional m/z window (Da).
176    pub mz: Option<(f32, f32)>,
177}
178
179/// Gaussian fits for each axis.
180#[derive(Clone, Debug, Default, Serialize, Deserialize)]
181pub struct AxisFits {
182    pub rt: Fit1D,
183    pub im: Fit1D,
184    pub tof: Fit1D,
185    /// Optional m/z-domain fit (derived from TOF fit).
186    pub mz: Option<Fit1D>,
187}
188
189/// Heavy optional data (raw points, axes, traces).
190#[derive(Clone, Debug, Default, Serialize, Deserialize)]
191pub struct ClusterRawData {
192    pub raw_points: Option<RawPoints>,
193    pub rt_axis_sec: Option<Vec<f32>>,
194    pub im_axis_scans: Option<Vec<usize>>,
195    pub mz_axis_da: Option<Vec<f32>>,
196    #[serde(default)]
197    pub rt_trace: Option<Vec<f32>>,
198    #[serde(default)]
199    pub im_trace: Option<Vec<f32>>,
200}
201
202/// Cluster provenance/metadata.
203#[derive(Clone, Debug, Default, Serialize, Deserialize)]
204pub struct ClusterProvenance {
205    pub window_group: Option<u32>,
206    pub parent_im_id: Option<i64>,
207    pub parent_rt_id: Option<i64>,
208    pub ms_level: u8,
209}
210
211/// Intensity metrics for a cluster.
212#[derive(Clone, Debug, Default, Serialize, Deserialize)]
213pub struct IntensityMetrics {
214    pub raw_sum: f32,
215    pub volume_proxy: f32,
216}
217
218#[derive(Clone, Debug, Serialize, Deserialize)]
219pub struct ClusterResult1D {
220    pub cluster_id: u64,
221    pub rt_window: (usize, usize),
222    pub im_window: (usize, usize),
223    pub tof_window: (usize, usize),
224    pub tof_index_window: (i32, i32),
225
226    /// Optional m/z window (not set by this module).
227    pub mz_window: Option<(f32, f32)>,
228
229    pub rt_fit: Fit1D,
230    pub im_fit: Fit1D,
231    /// Fit on the third axis (TOF-backed / generic axis).
232    pub tof_fit: Fit1D,
233    /// Optional m/z-domain fit (not set by this module).
234    pub mz_fit: Option<Fit1D>,
235
236    pub raw_sum: f32,
237    pub volume_proxy: f32,
238
239    pub frame_ids_used: Vec<u32>,
240    pub window_group: Option<u32>,
241    pub parent_im_id: Option<i64>,
242    pub parent_rt_id: Option<i64>,
243    pub ms_level: u8,
244
245    pub rt_axis_sec: Option<Vec<f32>>,
246    pub im_axis_scans: Option<Vec<usize>>,
247    /// Optional m/z axis (not set by this module).
248    pub mz_axis_da: Option<Vec<f32>>,
249
250    /// Optional attachment of raw points (only set if requested elsewhere).
251    pub raw_points: Option<RawPoints>,
252
253    /// Optional RT XIC (intensities along rt_window).
254    #[serde(default)]
255    pub rt_trace: Option<Vec<f32>>,
256
257    /// Optional IM trace (intensities along im_window).
258    #[serde(default)]
259    pub im_trace: Option<Vec<f32>>,
260}
261
262impl ClusterResult1D {
263    pub fn merged_with(
264        &self,
265        other: &Self,
266        new_id: u64,
267        policy: &ClusterMergePolicy,
268    ) -> Self {
269        merge_clusters(self, other, new_id, policy)
270    }
271
272    // --- Grouped accessors (Phase 1 refactoring) ---
273
274    /// Get all windows as a grouped struct.
275    #[inline]
276    pub fn windows(&self) -> Windows1D {
277        Windows1D {
278            rt: self.rt_window,
279            im: self.im_window,
280            tof: self.tof_window,
281            tof_index: self.tof_index_window,
282            mz: self.mz_window,
283        }
284    }
285
286    /// Get all axis fits as a grouped struct.
287    #[inline]
288    pub fn fits(&self) -> AxisFits {
289        AxisFits {
290            rt: self.rt_fit.clone(),
291            im: self.im_fit.clone(),
292            tof: self.tof_fit.clone(),
293            mz: self.mz_fit.clone(),
294        }
295    }
296
297    /// Get raw data (axes, traces, raw points) as a grouped struct.
298    #[inline]
299    pub fn raw_data(&self) -> ClusterRawData {
300        ClusterRawData {
301            raw_points: self.raw_points.clone(),
302            rt_axis_sec: self.rt_axis_sec.clone(),
303            im_axis_scans: self.im_axis_scans.clone(),
304            mz_axis_da: self.mz_axis_da.clone(),
305            rt_trace: self.rt_trace.clone(),
306            im_trace: self.im_trace.clone(),
307        }
308    }
309
310    /// Get provenance/metadata as a grouped struct.
311    #[inline]
312    pub fn provenance(&self) -> ClusterProvenance {
313        ClusterProvenance {
314            window_group: self.window_group,
315            parent_im_id: self.parent_im_id,
316            parent_rt_id: self.parent_rt_id,
317            ms_level: self.ms_level,
318        }
319    }
320
321    /// Get intensity metrics as a grouped struct.
322    #[inline]
323    pub fn intensity(&self) -> IntensityMetrics {
324        IntensityMetrics {
325            raw_sum: self.raw_sum,
326            volume_proxy: self.volume_proxy,
327        }
328    }
329
330    /// Check if any raw data is attached (traces, axes, or raw points).
331    #[inline]
332    pub fn has_raw_data(&self) -> bool {
333        self.raw_points.is_some()
334            || self.rt_axis_sec.is_some()
335            || self.im_axis_scans.is_some()
336            || self.mz_axis_da.is_some()
337            || self.rt_trace.is_some()
338            || self.im_trace.is_some()
339    }
340
341    /// Check if this cluster has valid fits on all three main axes.
342    #[inline]
343    pub fn has_valid_fits(&self) -> bool {
344        self.rt_fit.sigma > 0.0 && self.im_fit.sigma > 0.0 && self.tof_fit.sigma > 0.0
345    }
346}
347
348// --- Sub-struct impl blocks ---
349
350impl Windows1D {
351    /// Check if two window sets overlap in all dimensions.
352    #[inline]
353    pub fn overlaps(&self, other: &Self) -> bool {
354        let rt_overlaps = self.rt.1 >= other.rt.0 && other.rt.1 >= self.rt.0;
355        let im_overlaps = self.im.1 >= other.im.0 && other.im.1 >= self.im.0;
356        let tof_overlaps = self.tof.1 >= other.tof.0 && other.tof.1 >= self.tof.0;
357        rt_overlaps && im_overlaps && tof_overlaps
358    }
359
360    /// Merge two window sets (union).
361    #[inline]
362    pub fn merge(&self, other: &Self) -> Self {
363        Self {
364            rt: (self.rt.0.min(other.rt.0), self.rt.1.max(other.rt.1)),
365            im: (self.im.0.min(other.im.0), self.im.1.max(other.im.1)),
366            tof: (self.tof.0.min(other.tof.0), self.tof.1.max(other.tof.1)),
367            tof_index: (
368                self.tof_index.0.min(other.tof_index.0),
369                self.tof_index.1.max(other.tof_index.1),
370            ),
371            mz: match (self.mz, other.mz) {
372                (Some((a, b)), Some((c, d))) => Some((a.min(c), b.max(d))),
373                (Some(x), None) | (None, Some(x)) => Some(x),
374                (None, None) => None,
375            },
376        }
377    }
378}
379
380impl AxisFits {
381    /// Check if the distance between two fit sets is within tolerances.
382    #[inline]
383    pub fn within_distance(&self, other: &Self, rt_tol: f32, im_tol: f32, tof_tol: f32) -> bool {
384        let drt = (self.rt.mu - other.rt.mu).abs();
385        let dim = (self.im.mu - other.im.mu).abs();
386        let dtof = (self.tof.mu - other.tof.mu).abs();
387        drt <= rt_tol && dim <= im_tol && dtof <= tof_tol
388    }
389}
390
391impl IntensityMetrics {
392    /// Create from raw sum, using raw_sum as volume_proxy.
393    #[inline]
394    pub fn from_raw_sum(raw_sum: f32) -> Self {
395        Self {
396            raw_sum,
397            volume_proxy: raw_sum,
398        }
399    }
400}
401
402pub fn decorate_with_mz_for_cluster<D: IndexConverter>(
403    ds: &D,
404    rt_frames: &RtFrames,
405    res: &mut ClusterResult1D,
406) {
407    let n_frames = rt_frames.frame_ids.len();
408    if n_frames == 0 {
409        return;
410    }
411
412    let scale = &*rt_frames.scale;
413
414    // ----------------------------------------------------
415    // 1) Pick a representative frame (mid of final rt_window)
416    // ----------------------------------------------------
417    let (mut rt_lo, mut rt_hi) = res.rt_window;
418    if rt_lo > rt_hi {
419        std::mem::swap(&mut rt_lo, &mut rt_hi);
420    }
421    if rt_lo >= n_frames {
422        return;
423    }
424    rt_hi = rt_hi.min(n_frames.saturating_sub(1));
425    if rt_lo > rt_hi {
426        return;
427    }
428
429    let mid_local = (rt_lo + rt_hi) / 2;
430    let frame_id = rt_frames.frame_ids[mid_local];
431
432    // ----------------------------------------------------
433    // 2) Final TOF-bin window (CSR bins) → TOF indices
434    // ----------------------------------------------------
435    let (mut bin_lo, mut bin_hi) = res.tof_window;
436    if bin_lo > bin_hi {
437        std::mem::swap(&mut bin_lo, &mut bin_hi);
438    }
439
440    let n_bins = scale.num_bins();
441    if n_bins == 0 {
442        return;
443    }
444    bin_lo = bin_lo.min(n_bins.saturating_sub(1));
445    bin_hi = bin_hi.min(n_bins.saturating_sub(1));
446    if bin_lo > bin_hi {
447        return;
448    }
449
450    let tof_lo_idx = scale.center(bin_lo).floor().max(0.0) as u32;
451    let tof_hi_idx = scale.center(bin_hi).ceil().max(0.0) as u32;
452
453    // ----------------------------------------------------
454    // 3) TOF → m/z for window endpoints
455    // ----------------------------------------------------
456    let tof_pair: Vec<u32> = vec![tof_lo_idx, tof_hi_idx];
457    let mz_pair = ds.tof_to_mz(frame_id, &tof_pair);
458    if mz_pair.len() != 2 {
459        return;
460    }
461
462    let mut mz_lo = mz_pair[0] as f32;
463    let mut mz_hi = mz_pair[1] as f32;
464    if !mz_lo.is_finite() || !mz_hi.is_finite() {
465        return;
466    }
467    if mz_lo > mz_hi {
468        std::mem::swap(&mut mz_lo, &mut mz_hi);
469    }
470
471    res.mz_window = Some((mz_lo, mz_hi));
472
473    // ----------------------------------------------------
474    // 4) Build an m/z axis (one point per TOF bin)
475    // ----------------------------------------------------
476    let tof_centers: Vec<u32> = (bin_lo..=bin_hi)
477        .map(|b| scale.center(b).round().max(0.0) as u32)
478        .collect();
479
480    let mz_axis_f64 = ds.tof_to_mz(frame_id, &tof_centers);
481    if mz_axis_f64.len() == tof_centers.len() && !mz_axis_f64.is_empty() {
482        let mz_axis_da: Vec<f32> = mz_axis_f64.iter().map(|&x| x as f32).collect();
483        res.mz_axis_da = Some(mz_axis_da.clone());
484
485        // ------------------------------------------------
486        // 5) Derive an m/z-domain Fit1D from the TOF fit
487        // ------------------------------------------------
488        if res.tof_fit.mu.is_finite() && res.tof_fit.sigma.is_finite() && res.tof_fit.sigma > 0.0 {
489            let tof_mu = res.tof_fit.mu.max(0.0).round() as u32;
490            let tof_minus = (res.tof_fit.mu - res.tof_fit.sigma).max(0.0).round() as u32;
491            let tof_plus = (res.tof_fit.mu + res.tof_fit.sigma).max(0.0).round() as u32;
492
493            let tof_triplet: Vec<u32> = vec![tof_mu, tof_minus, tof_plus];
494            let mz_triplet = ds.tof_to_mz(frame_id, &tof_triplet);
495            if mz_triplet.len() == 3 {
496                let mu_mz = mz_triplet[0] as f32;
497                let sigma_mz = ((mz_triplet[2] - mz_triplet[1]).abs() as f32 * 0.5).max(1e-6);
498
499                let mut mz_fit = res.tof_fit.clone();
500                mz_fit.mu = mu_mz;
501                mz_fit.sigma = sigma_mz;
502
503                res.mz_fit = Some(mz_fit);
504            }
505        }
506    }
507}
508// ==========================================================
509// Core CSR helpers (RT / IM / TOF axis)
510// ==========================================================
511
512#[inline]
513fn sum_frame_block(
514    fbv: &FrameBinView,
515    bin_lo: usize,
516    bin_hi: usize,
517    im_lo: usize,
518    im_hi: usize,
519) -> f32 {
520    let ub = &fbv.unique_bins;
521    if ub.is_empty() || bin_lo > bin_hi {
522        return 0.0;
523    }
524
525    let start = match ub.binary_search(&bin_lo) {
526        Ok(i) => i,
527        Err(i) => i.min(ub.len()),
528    };
529
530    let mut acc = 0.0f32;
531    let mut i = start;
532    while i < ub.len() {
533        let b = ub[i];
534        if b > bin_hi {
535            break;
536        }
537
538        let lo = fbv.offsets[i];
539        let hi = fbv.offsets[i + 1];
540        let scans = &fbv.scan_idx[lo..hi];
541        let ints = &fbv.intensity[lo..hi];
542
543        for (s, val) in scans.iter().zip(ints.iter()) {
544            let s = *s as usize;
545            if s >= im_lo && s <= im_hi {
546                acc += *val;
547            }
548        }
549
550        i += 1;
551    }
552
553    acc
554}
555
556/// Build RT marginal (len = frames in [rt_lo..rt_hi]), using (bin, scan) window.
557pub fn build_rt_marginal(
558    frames: &[FrameBinView],
559    rt_lo: usize,
560    rt_hi: usize,
561    bin_lo: usize,
562    bin_hi: usize,
563    im_lo: usize,
564    im_hi: usize,
565) -> Vec<f32> {
566    let mut out = vec![0.0f32; rt_hi + 1 - rt_lo];
567    for (k, fbv) in frames[rt_lo..=rt_hi].iter().enumerate() {
568        out[k] = sum_frame_block(fbv, bin_lo, bin_hi, im_lo, im_hi);
569    }
570    out
571}
572
573/// Build IM marginal (absolute scan axis).
574pub fn build_im_marginal(
575    frames: &[FrameBinView],
576    rt_lo: usize,
577    rt_hi: usize,
578    bin_lo: usize,
579    bin_hi: usize,
580    im_lo: usize,
581    im_hi: usize,
582) -> Vec<f32> {
583    let len = im_hi + 1 - im_lo;
584    let mut out = vec![0.0f32; len];
585
586    for fbv in &frames[rt_lo..=rt_hi] {
587        let ub = &fbv.unique_bins;
588        if ub.is_empty() {
589            continue;
590        }
591
592        let start = match ub.binary_search(&bin_lo) {
593            Ok(i) => i,
594            Err(i) => i.min(ub.len()),
595        };
596
597        let mut i = start;
598        while i < ub.len() {
599            let b = ub[i];
600            if b > bin_hi {
601                break;
602            }
603            let lo = fbv.offsets[i];
604            let hi = fbv.offsets[i + 1];
605            let scans = &fbv.scan_idx[lo..hi];
606            let ints = &fbv.intensity[lo..hi];
607
608            for (s, val) in scans.iter().zip(ints.iter()) {
609                let s_abs = *s as usize;
610                if s_abs >= im_lo && s_abs <= im_hi {
611                    out[s_abs - im_lo] += *val;
612                }
613            }
614
615            i += 1;
616        }
617    }
618
619    out
620}
621
622/// Build TOF/axis histogram over CSR bins [bin_lo..bin_hi].
623///
624/// The centers returned are whatever `TofScale::center(i)` yields
625/// (historically m/z, but here treated as a generic axis).
626pub fn build_tof_hist(
627    frames: &[FrameBinView],
628    rt_lo: usize,
629    rt_hi: usize,
630    bin_lo: usize,
631    bin_hi: usize,
632    im_lo: usize,
633    im_hi: usize,
634    scale: &TofScale,
635) -> (Vec<f32>, Vec<f32>) {
636    let r = bin_hi + 1 - bin_lo;
637    let mut hist = vec![0.0f32; r];
638
639    for fbv in &frames[rt_lo..=rt_hi] {
640        let ub = &fbv.unique_bins;
641        if ub.is_empty() {
642            continue;
643        }
644
645        let start = match ub.binary_search(&bin_lo) {
646            Ok(i) => i,
647            Err(i) => i.min(ub.len()),
648        };
649
650        let mut i = start;
651        while i < ub.len() {
652            let b = ub[i];
653            if b > bin_hi {
654                break;
655            }
656
657            let lo = fbv.offsets[i];
658            let hi = fbv.offsets[i + 1];
659
660            let scans = &fbv.scan_idx[lo..hi];
661            let ints = &fbv.intensity[lo..hi];
662
663            let mut sum = 0.0f32;
664            for (s, val) in scans.iter().zip(ints.iter()) {
665                let s_abs = *s as usize;
666                if s_abs >= im_lo && s_abs <= im_hi {
667                    sum += *val;
668                }
669            }
670
671            hist[b - bin_lo] += sum;
672            i += 1;
673        }
674    }
675
676    let centers = (bin_lo..=bin_hi).map(|i| scale.center(i)).collect::<Vec<_>>();
677    (hist, centers)
678}
679
680#[inline]
681fn sanitize_fit(
682    f: &mut Fit1D,
683    mu_bounds: Option<(f32, f32)>,
684    min_sigma: f32,
685    enforce_nonneg: bool,
686) {
687    // μ
688    if !f.mu.is_finite() {
689        if let Some((lo, hi)) = mu_bounds {
690            if lo.is_finite() && hi.is_finite() && lo <= hi {
691                f.mu = if hi > lo { 0.5 * (lo + hi) } else { lo };
692            } else {
693                f.mu = 0.0;
694            }
695        } else {
696            f.mu = 0.0;
697        }
698    }
699    // σ, height, baseline, area, r2
700    if !f.sigma.is_finite() {
701        f.sigma = 0.0;
702    }
703    if !f.height.is_finite() {
704        f.height = 0.0;
705    }
706    if !f.baseline.is_finite() {
707        f.baseline = 0.0;
708    }
709    if !f.area.is_finite() {
710        f.area = 0.0;
711    }
712    if !f.r2.is_finite() {
713        f.r2 = 0.0;
714    }
715
716    if let Some((lo, hi)) = mu_bounds {
717        if lo.is_finite() && hi.is_finite() && lo < hi {
718            if f.mu < lo {
719                f.mu = lo;
720            }
721            if f.mu > hi {
722                f.mu = hi;
723            }
724        }
725    }
726
727    if f.sigma < 0.0 {
728        f.sigma = 0.0;
729    }
730    if f.sigma > 0.0 && f.sigma < min_sigma {
731        f.sigma = min_sigma;
732    }
733
734    if enforce_nonneg {
735        if f.baseline < 0.0 {
736            f.baseline = 0.0;
737        }
738        if f.height < 0.0 {
739            f.height = 0.0;
740        }
741        if f.area < 0.0 {
742            f.area = 0.0;
743        }
744    }
745}
746
747#[inline]
748fn argmax_idx(v: &[f32]) -> usize {
749    let mut i_max = 0usize;
750    let mut best = f32::NEG_INFINITY;
751    for (i, &y) in v.iter().enumerate() {
752        if y > best {
753            best = y;
754            i_max = i;
755        }
756    }
757    i_max
758}
759
760// ==========================================================
761// Helpers for RT overlap and spec building
762// ==========================================================
763
764#[inline]
765fn rt_overlap((a0,a1):(usize, usize),(b0,b1):(usize,usize)) -> usize {
766    if a0 > a1 || b0 > b1 { return 0; }
767    let lo = a0.max(b0);
768    let hi = a1.min(b1);
769    hi.saturating_sub(lo).saturating_add(1)
770}
771
772/// Helper: derive a conservative RT window for an IM peak in local frame indices.
773/// Uses the IM peak’s stored `rt_bounds` on the same grid.
774fn rt_bounds_from_im(im: &ImPeak1D, n_frames: usize) -> (usize, usize) {
775    let (a,b) = im.rt_bounds; // (usize, usize) on the same grid as RtFrames
776    let mut lo = a.min(n_frames.saturating_sub(1));
777    let mut hi = b.min(n_frames.saturating_sub(1));
778    if lo > hi { std::mem::swap(&mut lo, &mut hi); }
779    (lo, hi)
780}
781
782/// Interpret `tof_win` as a bin-index range [lo..hi] encoded as i32.
783#[inline]
784pub fn bin_range_for_win(_scale: &TofScale, tof_win: (i32, i32)) -> (usize, usize) {
785    let (a, b) = tof_win;
786    let mut lo = a.min(b).max(0) as usize;
787    let mut hi = a.max(b).max(0) as usize;
788    if lo > hi {
789        std::mem::swap(&mut lo, &mut hi);
790    }
791    (lo, hi)
792}
793
794// ==========================================================
795// Main evaluator (grid-based, CSR/TofScale)
796// ==========================================================
797
798pub fn evaluate_spec_1d(
799    rt_frames: &RtFrames,
800    spec: &ClusterSpec1D,
801    opts: &Eval1DOpts,
802) -> ClusterResult1D {
803    debug_assert!(rt_frames.is_consistent());
804    let scale = &*rt_frames.scale;
805
806    let n_frames = rt_frames.frames.len();
807    let n_bins   = scale.num_bins();
808
809    // -------------------------------
810    // 0) Effective RT / IM / TOF windows with padding
811    // -------------------------------
812
813    // Start from spec RT window and clamp + pad in frame space
814    let mut rt_lo_eff = spec.rt_lo.min(n_frames.saturating_sub(1));
815    let mut rt_hi_eff = spec.rt_hi.min(n_frames.saturating_sub(1));
816    if rt_lo_eff > rt_hi_eff {
817        std::mem::swap(&mut rt_lo_eff, &mut rt_hi_eff);
818    }
819    if opts.pad_rt_frames > 0 && n_frames > 0 {
820        let pad = opts.pad_rt_frames;
821        rt_lo_eff = rt_lo_eff.saturating_sub(pad);
822        rt_hi_eff = (rt_hi_eff + pad).min(n_frames.saturating_sub(1));
823    }
824
825    // IM window – can pad without clamping to a global max (no bound known here).
826    let mut im_lo_eff = spec.im_lo;
827    let mut im_hi_eff = spec.im_hi;
828    if im_lo_eff > im_hi_eff {
829        std::mem::swap(&mut im_lo_eff, &mut im_hi_eff);
830    }
831    if opts.pad_im_scans > 0 {
832        let pad = opts.pad_im_scans;
833        im_lo_eff = im_lo_eff.saturating_sub(pad);
834        im_hi_eff = im_hi_eff.saturating_add(pad);
835    }
836
837    // TOF: start from spec.tof_win → CSR bin range, then pad and clamp to axis.
838    let (mut bin_lo0, mut bin_hi0) = bin_range_for_win(scale, spec.tof_win);
839    if opts.pad_tof_bins > 0 && n_bins > 0 {
840        let pad = opts.pad_tof_bins.min(n_bins.saturating_sub(1));
841        bin_lo0 = bin_lo0.saturating_sub(pad);
842        bin_hi0 = (bin_hi0 + pad).min(n_bins.saturating_sub(1));
843    }
844
845    // --- 1) first pass accumulation ---------------------------------------
846    let rt_marg1 = build_rt_marginal(
847        &rt_frames.frames,
848        rt_lo_eff,
849        rt_hi_eff,
850        bin_lo0,
851        bin_hi0,
852        im_lo_eff,
853        im_hi_eff,
854    );
855    let im_marg1 = build_im_marginal(
856        &rt_frames.frames,
857        rt_lo_eff,
858        rt_hi_eff,
859        bin_lo0,
860        bin_hi0,
861        im_lo_eff,
862        im_hi_eff,
863    );
864    let (axis_hist1, axis_centers1) = build_tof_hist(
865        &rt_frames.frames,
866        rt_lo_eff,
867        rt_hi_eff,
868        bin_lo0,
869        bin_hi0,
870        im_lo_eff,
871        im_hi_eff,
872        scale,
873    );
874
875    let rt_sum1: f32 = rt_marg1.iter().copied().sum();
876    let im_sum1: f32 = im_marg1.iter().copied().sum();
877    let ax_sum1: f32 = axis_hist1.iter().copied().sum();
878
879    let cluster_id = compute_cluster_id_from_spec(spec);
880
881    // ---- derive TOF index window from final bin window -----------------
882    let n_bins = scale.num_bins();
883    let mut bin_lo_idx = bin_lo0.min(n_bins.saturating_sub(1));
884    let mut bin_hi_idx = bin_hi0.min(n_bins.saturating_sub(1));
885    if bin_lo_idx > bin_hi_idx {
886        std::mem::swap(&mut bin_lo_idx, &mut bin_hi_idx);
887    }
888
889    // centers are in the same axis units as `fr.tof` (instrument TOF index)
890    let tof_idx_lo = scale.center(bin_lo_idx).floor().max(0.0) as i32;
891    let tof_idx_hi = scale.center(bin_hi_idx).ceil().max(0.0) as i32;
892
893    let tof_index_window = (tof_idx_lo, tof_idx_hi);
894
895    // Early exit: no signal in any axis
896    if rt_sum1 <= 0.0 || im_sum1 <= 0.0 || ax_sum1 <= 0.0 {
897        return ClusterResult1D {
898            cluster_id,
899            rt_window: (rt_lo_eff, rt_hi_eff),
900            im_window: (im_lo_eff, im_hi_eff),
901            tof_window: (bin_lo0, bin_hi0),
902            tof_index_window,
903
904            mz_window: None,
905            rt_fit: Fit1D::default(),
906            im_fit: Fit1D::default(),
907            tof_fit: Fit1D::default(),
908            mz_fit: None,
909
910            raw_sum: 0.0,
911            volume_proxy: 0.0,
912            frame_ids_used: rt_frames.frame_ids[rt_lo_eff..=rt_hi_eff].to_vec(),
913            window_group: spec.window_group,
914            parent_im_id: spec.parent_im_id,
915            parent_rt_id: spec.parent_rt_id,
916            ms_level: spec.ms_level,
917            rt_axis_sec: opts
918                .attach_axes
919                .then_some(rt_frames.rt_times[rt_lo_eff..=rt_hi_eff].to_vec()),
920            im_axis_scans: opts
921                .attach_axes
922                .then_some((im_lo_eff..=im_hi_eff).collect()),
923            mz_axis_da: None,
924            raw_points: None,
925            rt_trace: opts
926                .attach_rt_trace
927                .then_some(thin_f32_vec(&rt_marg1, None)),
928            im_trace: opts
929                .attach_im_trace
930                .then_some(thin_f32_vec(&im_marg1, None)),
931        };
932    }
933
934    // --- 2) argmax + optional refine of bin window ------------------------
935    let i_max1 = argmax_idx(&axis_hist1);
936
937    let (bin_lo, bin_hi, axis_centers2) = {
938        if opts.refine_tof_once {
939            let center_bin = bin_lo0 + i_max1; // local index into [bin_lo0..bin_hi0]
940            let half_span: usize = 4;
941
942            let bl = center_bin.saturating_sub(half_span).max(bin_lo0);
943            let bh = (center_bin + half_span).min(bin_hi0);
944
945            (bl, bh, (bl..=bh).map(|i| scale.center(i)).collect::<Vec<_>>())
946        } else {
947            (bin_lo0, bin_hi0, axis_centers1.clone())
948        }
949    };
950
951    // --- 3) re-accumulate in refined window -------------------------------
952    let rt_marg = build_rt_marginal(
953        &rt_frames.frames,
954        rt_lo_eff,
955        rt_hi_eff,
956        bin_lo,
957        bin_hi,
958        im_lo_eff,
959        im_hi_eff,
960    );
961    let im_marg = build_im_marginal(
962        &rt_frames.frames,
963        rt_lo_eff,
964        rt_hi_eff,
965        bin_lo,
966        bin_hi,
967        im_lo_eff,
968        im_hi_eff,
969    );
970    let (axis_hist, _axis_centers_final) = build_tof_hist(
971        &rt_frames.frames,
972        rt_lo_eff,
973        rt_hi_eff,
974        bin_lo,
975        bin_hi,
976        im_lo_eff,
977        im_hi_eff,
978        scale,
979    );
980
981    let rt_sum: f32 = rt_marg.iter().copied().sum();
982    let im_sum: f32 = im_marg.iter().copied().sum();
983    let ax_sum: f32 = axis_hist.iter().copied().sum();
984
985    let (use_bin_lo, use_bin_hi, use_rt_marg, use_im_marg, use_axis_hist, use_axis_centers) =
986        if rt_sum <= 0.0 || im_sum <= 0.0 || ax_sum <= 0.0 {
987            (bin_lo0, bin_hi0, &rt_marg1, &im_marg1, &axis_hist1, &axis_centers1)
988        } else {
989            (bin_lo, bin_hi, &rt_marg, &im_marg, &axis_hist, &axis_centers2)
990        };
991
992    // capture traces (possibly thinned)
993    let rt_trace_opt = if opts.attach_rt_trace {
994        Some(thin_f32_vec(use_rt_marg, None))
995    } else {
996        None
997    };
998    let im_trace_opt = if opts.attach_im_trace {
999        Some(thin_f32_vec(use_im_marg, None))
1000    } else {
1001        None
1002    };
1003
1004    // --- 4) final fits: RT/IM moments, axis argmax + FWHM σ ---------------
1005    // --- 4) final fits: RT/IM/TOF via moments -----------------------------
1006    let rt_times: Vec<f32> = rt_frames.rt_times[rt_lo_eff..=rt_hi_eff].to_vec();
1007    let im_axis: Vec<usize> = (im_lo_eff..=im_hi_eff).collect();
1008    let im_axis_f32: Vec<f32> = im_axis.iter().map(|&s| s as f32).collect();
1009
1010    // RT / IM moments as before
1011    let mut rt_fit = fit1d_moment(use_rt_marg, Some(&rt_times));
1012    let mut im_fit = fit1d_moment(use_im_marg, Some(&im_axis_f32));
1013
1014    // TOF / third axis: moment-based fit instead of FWHM/Parabola
1015    let axis_centers_f32: Vec<f32> = use_axis_centers.iter().copied().collect();
1016    let mut tof_fit = fit1d_moment(use_axis_hist, Some(&axis_centers_f32));
1017
1018    // bounds for μ clamping
1019    let rt_mu_bounds =
1020        if rt_lo_eff < rt_frames.rt_times.len() && rt_hi_eff < rt_frames.rt_times.len() {
1021            Some((rt_frames.rt_times[rt_lo_eff], rt_frames.rt_times[rt_hi_eff]))
1022        } else {
1023            None
1024        };
1025
1026    let axis_min = *use_axis_centers
1027        .first()
1028        .unwrap_or(&tof_fit.mu);
1029    let axis_max = *use_axis_centers
1030        .last()
1031        .unwrap_or(&tof_fit.mu);
1032    let axis_mu_bounds = Some((axis_min, axis_max));
1033    let im_mu_bounds = Some((im_lo_eff as f32, im_hi_eff as f32));
1034
1035    // sanitize + IM fallbacks (same as before)
1036    sanitize_fit(&mut tof_fit, axis_mu_bounds, 1e-6, true);
1037    sanitize_fit(&mut rt_fit, rt_mu_bounds, 1e-6, true);
1038    sanitize_fit(&mut im_fit, im_mu_bounds, 1e-3, true);
1039
1040    if im_fit.mu == 0.0 {
1041        let lo = im_lo_eff as f32;
1042        let hi = im_hi_eff as f32;
1043        im_fit.mu = if hi > lo { 0.5 * (lo + hi) } else { lo.max(1.0) };
1044    }
1045    if !im_fit.sigma.is_finite() || im_fit.sigma <= 0.0 {
1046        let k = opts.refine_k_sigma.max(1.0);
1047        let width = (im_hi_eff.saturating_sub(im_lo_eff) as f32) + 1.0;
1048        let from_window = ((width - 1.0) / (2.0 * k)).max(0.0);
1049        let prior = spec.im_prior_sigma.unwrap_or(0.0).max(0.0);
1050        let mut s = prior.max(from_window).max(1e-3);
1051        if !s.is_finite() {
1052            s = 1e-3;
1053        }
1054        im_fit.sigma = s;
1055    }
1056
1057    let raw_sum: f32 = use_rt_marg.iter().copied().sum();
1058    let volume_proxy = (rt_fit.area.max(0.0))
1059        * (im_fit.area.max(0.0))
1060        * (tof_fit.area.max(0.0));
1061
1062    // ---- derive TOF index window from final bin window -----------------
1063    let n_bins = scale.num_bins();
1064    let mut bin_lo_idx = use_bin_lo.min(n_bins.saturating_sub(1));
1065    let mut bin_hi_idx = use_bin_hi.min(n_bins.saturating_sub(1));
1066    if bin_lo_idx > bin_hi_idx {
1067        std::mem::swap(&mut bin_lo_idx, &mut bin_hi_idx);
1068    }
1069
1070    // centers are in the same axis units as `fr.tof` (instrument TOF index)
1071    let tof_idx_lo = scale.center(bin_lo_idx).floor().max(0.0) as i32;
1072    let tof_idx_hi = scale.center(bin_hi_idx).ceil().max(0.0) as i32;
1073
1074    let tof_index_window = (tof_idx_lo, tof_idx_hi);
1075
1076    ClusterResult1D {
1077        cluster_id,
1078        rt_window: (rt_lo_eff, rt_hi_eff),
1079        im_window: (im_lo_eff, im_hi_eff),
1080        tof_window: (use_bin_lo, use_bin_hi),
1081        tof_index_window,
1082
1083        mz_window: None,   // not set here
1084        rt_fit,
1085        im_fit,
1086        tof_fit,
1087        mz_fit: None,      // not set here
1088
1089        raw_sum,
1090        volume_proxy,
1091        frame_ids_used: rt_frames.frame_ids[rt_lo_eff..=rt_hi_eff].to_vec(),
1092        window_group: spec.window_group,
1093        parent_im_id: spec.parent_im_id,
1094        parent_rt_id: spec.parent_rt_id,
1095        ms_level: spec.ms_level,
1096        rt_axis_sec: opts.attach_axes.then_some(rt_times),
1097        im_axis_scans: opts.attach_axes.then_some(im_axis),
1098        mz_axis_da: None,  // not set here
1099        raw_points: None,
1100        rt_trace: rt_trace_opt,
1101        im_trace: im_trace_opt,
1102    }
1103}
1104
1105// ==========================================================
1106// Spec building (IM+RT → ClusterSpec1D)
1107// ==========================================================
1108
1109#[derive(Clone, Copy, Debug)]
1110pub struct BuildSpecOpts {
1111    /// Extra RT padding (in local frame indices) around the RT window
1112    /// derived from the RT peak / IM peak overlap.
1113    pub extra_rt_pad: usize,
1114
1115    /// Extra IM padding (in scan indices) around the IM window derived
1116    /// from the IM peak [left, right] scan range.
1117    pub extra_im_pad: usize,
1118
1119    /// Symmetric padding in *TOF bins* around the minimal TOF window
1120    /// implied by the IM peak.
1121    pub tof_bin_pad: usize,
1122
1123    /// Histogram resolution along the TOF-backed axis.
1124    pub tof_hist_bins: usize,
1125
1126    /// MS level that this spec is intended for (1 = MS1, 2 = MS2/DIA).
1127    pub ms_level: u8,
1128
1129    /// Minimum IM span in absolute scan units (e.g. 10).
1130    pub min_im_span: usize,
1131
1132    /// k for σ logic in IM: when we need a fallback σ_im, we ensure that
1133    /// the window roughly covers ±k·σ around the IM peak apex.
1134    pub im_k_sigma: f32,
1135}
1136
1137impl BuildSpecOpts {
1138    #[inline]
1139    pub fn with_ms_level(self, level: u8) -> Self {
1140        let mut o = self;
1141        o.ms_level = level;
1142        o
1143    }
1144
1145    #[inline]
1146    pub fn ms1_defaults() -> Self {
1147        BuildSpecOpts {
1148            extra_rt_pad: 0,
1149            extra_im_pad: 0,
1150            tof_bin_pad: 0,
1151            tof_hist_bins: 16,
1152            ms_level: 1,
1153            min_im_span: 10,
1154            im_k_sigma: 3.0,
1155        }
1156    }
1157
1158    #[inline]
1159    pub fn ms2_defaults() -> Self {
1160        BuildSpecOpts {
1161            extra_rt_pad: 0,
1162            extra_im_pad: 0,
1163            tof_bin_pad: 0,
1164            tof_hist_bins: 16,
1165            ms_level: 2,
1166            min_im_span: 10,
1167            im_k_sigma: 3.0,
1168        }
1169    }
1170}
1171
1172#[inline]
1173fn widen_scan_window(lo: usize, hi: usize, want_span: usize, center: usize) -> (usize, usize) {
1174    let cur_span = hi.saturating_sub(lo).saturating_add(1);
1175    if cur_span >= want_span { return (lo, hi); }
1176    let half = want_span / 2;
1177    let new_lo = center.saturating_sub(half);
1178    let new_hi = center.saturating_add(want_span - 1 - half);
1179    (new_lo.min(lo), new_hi.max(hi))  // keep original covered, but ensure ≥ want_span overall
1180}
1181
1182#[inline]
1183fn scans_from_sigma(k: f32, sigma: f32) -> usize {
1184    // cover ±kσ around the apex, +1 for inclusive range
1185    let k = k.max(0.0);
1186    let s = sigma.max(0.0);
1187    let span = (2.0 * k * s).ceil() as isize + 1;
1188    span.max(1) as usize
1189}
1190
1191/// Map a bin-index window from ImPeak1D (`tof_bounds`) to a clamped CSR bin range.
1192#[inline]
1193fn axis_bin_range_for_bounds(scale: &TofScale, bounds: (i32, i32)) -> (usize, usize) {
1194    let (mut lo, mut hi) = bounds;
1195    if lo > hi {
1196        std::mem::swap(&mut lo, &mut hi);
1197    }
1198
1199    let n_bins = scale.num_bins();
1200    if n_bins == 0 {
1201        return (0, 0);
1202    }
1203
1204    // Use the same logic as for traces: map axis window to a bin range
1205    let (mut bin_lo, mut bin_hi) = scale.index_range_for_tof_window(lo, hi);
1206
1207    // Clamp (defensive)
1208    bin_lo = bin_lo.min(n_bins.saturating_sub(1));
1209    bin_hi = bin_hi.min(n_bins.saturating_sub(1));
1210    if bin_lo > bin_hi {
1211        std::mem::swap(&mut bin_lo, &mut bin_hi);
1212    }
1213
1214    (bin_lo, bin_hi)
1215}
1216
1217pub fn make_spec_from_pair(
1218    im: &ImPeak1D,
1219    rt: &RtPeak1D,
1220    rt_frames: &RtFrames,
1221    opts: &BuildSpecOpts,
1222) -> ClusterSpec1D {
1223    let frames_len = rt_frames.frames.len();
1224    let scale      = &*rt_frames.scale;
1225
1226    // 1) RT window (local frame indices) + padding
1227    let (mut rt_lo, mut rt_hi) = rt.rt_bounds_frames;
1228    rt_lo = rt_lo
1229        .saturating_sub(opts.extra_rt_pad)
1230        .min(frames_len.saturating_sub(1));
1231    rt_hi = rt_hi
1232        .saturating_add(opts.extra_rt_pad)
1233        .min(frames_len.saturating_sub(1));
1234    if rt_lo > rt_hi {
1235        std::mem::swap(&mut rt_lo, &mut rt_hi);
1236    }
1237
1238    // 2) IM window (scan indices) with min span & prior σ
1239    let mut im_lo = im.left.saturating_sub(opts.extra_im_pad);
1240    let mut im_hi = im.right.saturating_add(opts.extra_im_pad);
1241    if im_lo > im_hi {
1242        std::mem::swap(&mut im_lo, &mut im_hi);
1243    }
1244
1245    let prior_sigma = im.scan_sigma.unwrap_or(0.0).max(0.0);
1246    let k_im = if opts.im_k_sigma.is_finite() {
1247        opts.im_k_sigma.max(0.0)
1248    } else {
1249        3.0
1250    };
1251
1252    let want_from_prior = if prior_sigma > 0.0 {
1253        scans_from_sigma(k_im, prior_sigma)
1254    } else {
1255        0
1256    };
1257    let want_from_measured = im.width_scans.max(2);
1258
1259    let mut want_span = opts.min_im_span.max(want_from_measured);
1260    if want_from_prior > 0 {
1261        want_span = want_span.max(want_from_prior);
1262    }
1263
1264    let (w_lo, w_hi) = widen_scan_window(im_lo, im_hi, want_span, im.scan);
1265    im_lo = w_lo;
1266    im_hi = w_hi;
1267
1268    // 3) TOF-bin window (no ppm, pad in bin space)
1269    let axis_bounds = im.tof_bounds; // bin-ish bounds from detector
1270    let (mut bin_lo, mut bin_hi) = axis_bin_range_for_bounds(scale, axis_bounds);
1271    if bin_lo > bin_hi {
1272        std::mem::swap(&mut bin_lo, &mut bin_hi);
1273    }
1274
1275    let n_bins = scale.edges.len().saturating_sub(1);
1276    if n_bins > 0 {
1277        let pad = opts.tof_bin_pad;
1278        if pad > 0 {
1279            bin_lo = bin_lo.saturating_sub(pad);
1280            bin_hi = (bin_hi + pad).min(n_bins - 1);
1281        }
1282    } else {
1283        bin_lo = 0;
1284        bin_hi = 0;
1285    }
1286
1287    let tof_win = (bin_lo as i32, bin_hi as i32);
1288
1289    debug_assert!(
1290        bin_lo <= bin_hi && bin_hi < scale.num_bins(),
1291        "axis_bin_range_for_bounds produced invalid bin range: {:?} -> ({}, {}) / n_bins={}",
1292        im.tof_bounds, bin_lo, bin_hi, scale.num_bins()
1293    );
1294
1295    ClusterSpec1D {
1296        rt_lo,
1297        rt_hi,
1298        im_lo,
1299        im_hi,
1300        tof_win,
1301        tof_hist_bins: opts.tof_hist_bins.max(16),
1302        window_group: im.window_group,
1303        parent_im_id: Some(im.id),
1304        parent_rt_id: Some(rt.id),
1305        ms_level: opts.ms_level,
1306        im_prior_sigma: if prior_sigma > 0.0 { Some(prior_sigma) } else { None },
1307    }
1308}
1309
1310// ==========================================================
1311// Parallel spec construction
1312// ==========================================================
1313
1314pub fn make_specs_from_im_and_rt_groups_threads(
1315    im_peaks: &[ImPeak1D],
1316    rt_groups: &[Vec<RtPeak1D>],
1317    rt_frames: &RtFrames,
1318    opts: &BuildSpecOpts,
1319    require_rt_overlap: bool,
1320    num_threads: usize,
1321) -> Vec<ClusterSpec1D> {
1322    assert_eq!(im_peaks.len(), rt_groups.len(), "rt_groups length mismatch");
1323    let n_frames = rt_frames.frames.len();
1324    if n_frames == 0 || im_peaks.is_empty() { return Vec::new(); }
1325
1326    let build = || {
1327        (0..im_peaks.len()).into_par_iter()
1328            .flat_map_iter(|i| {
1329                let im = &im_peaks[i];
1330                let rts = &rt_groups[i];
1331
1332                let mut out: Vec<ClusterSpec1D> = Vec::new();
1333                if rts.is_empty() { return out.into_iter(); }
1334
1335                let im_rt = rt_bounds_from_im(im, n_frames);
1336
1337                for rt in rts {
1338                    let mut rt_lo = rt.rt_bounds_frames.0.min(n_frames.saturating_sub(1));
1339                    let mut rt_hi = rt.rt_bounds_frames.1.min(n_frames.saturating_sub(1));
1340                    if rt_lo > rt_hi { std::mem::swap(&mut rt_lo, &mut rt_hi); }
1341
1342                    if require_rt_overlap && rt_overlap((rt_lo, rt_hi), im_rt) == 0 {
1343                        continue;
1344                    }
1345
1346                    let mut spec = make_spec_from_pair(im, rt, rt_frames, opts);
1347                    spec.rt_lo = spec.rt_lo.min(n_frames.saturating_sub(1));
1348                    spec.rt_hi = spec.rt_hi.min(n_frames.saturating_sub(1));
1349                    if spec.rt_lo > spec.rt_hi { std::mem::swap(&mut spec.rt_lo, &mut spec.rt_hi); }
1350                    if spec.im_lo > spec.im_hi { std::mem::swap(&mut spec.im_lo, &mut spec.im_hi); }
1351
1352                    out.push(spec);
1353                }
1354
1355                out.into_iter()
1356            })
1357            .collect::<Vec<_>>()
1358    };
1359
1360    if num_threads == 0 {
1361        build()
1362    } else {
1363        ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap().install(build)
1364    }
1365}
1366
1367// ==========================================================
1368// Helpers for axis padding + search
1369// ==========================================================
1370
1371#[inline]
1372fn cushion_hi_edge(scale: &TofScale, hi_edge: f32) -> f32 {
1373    // Generic: expand the upper edge by a small fraction of a bin width,
1374    // clamp to the axis max (last edge).
1375    let axis_max = scale
1376        .edges
1377        .last()
1378        .copied()
1379        .unwrap_or(hi_edge);
1380
1381    let n = scale.edges.len();
1382    let bin_width = if n >= 2 {
1383        let span = scale.edges[n - 1] - scale.edges[0];
1384        if span.is_finite() && span != 0.0 {
1385            (span / (n as f32 - 1.0)).abs()
1386        } else {
1387            0.0
1388        }
1389    } else {
1390        0.0
1391    };
1392
1393    let eps = 0.25 * bin_width; // small cushion, axis-agnostic
1394    (hi_edge + eps).min(axis_max)
1395}
1396
1397#[inline]
1398fn thin_stride(total: usize, cap: usize) -> usize {
1399    if cap == 0 || total <= cap {
1400        1
1401    } else {
1402        (total + cap - 1) / cap
1403    }
1404}
1405
1406// ==========================================================
1407// Context-based raw attachment (no I/O here)
1408// ==========================================================
1409
1410pub fn attach_raw_points_for_spec_1d_in_ctx(
1411    ctx: &RawAttachContext,
1412    scale: &TofScale,
1413    final_bin_lo: usize,
1414    final_bin_hi: usize,
1415    final_im_lo: usize,
1416    final_im_hi: usize,
1417    final_rt_lo: usize,
1418    final_rt_hi: usize,
1419    max_points: Option<usize>,
1420) -> RawPoints {
1421    let n_bins = scale.num_bins();
1422    if n_bins == 0 {
1423        return RawPoints::default();
1424    }
1425
1426    // Clamp bin indices
1427    let mut bin_lo = final_bin_lo.min(n_bins.saturating_sub(1));
1428    let mut bin_hi = final_bin_hi.min(n_bins.saturating_sub(1));
1429    if bin_lo > bin_hi {
1430        std::mem::swap(&mut bin_lo, &mut bin_hi);
1431    }
1432
1433    // Axis window from bin edges in TOF-scale units
1434    let axis_lo = scale.edges[bin_lo];
1435    let hi_edge_idx = (bin_hi + 1).min(scale.edges.len().saturating_sub(1));
1436    let axis_hi = cushion_hi_edge(scale, scale.edges[hi_edge_idx]);
1437
1438    // Reuse the pre-built slice and scan_slices
1439    let slice = &ctx.slice;
1440    let scan_slices = &ctx.scan_slices;
1441
1442    // Defensive clamp of RT window to slice length
1443    let n_frames = slice.frames.len();
1444    if n_frames == 0 {
1445        return RawPoints::default();
1446    }
1447    let rt_lo = final_rt_lo.min(n_frames.saturating_sub(1));
1448    let rt_hi = final_rt_hi.min(n_frames.saturating_sub(1));
1449    if rt_lo > rt_hi {
1450        return RawPoints::default();
1451    }
1452
1453    // 1) Count how many points fall into (RT, IM, TOF) window
1454    let mut total = 0usize;
1455    for fi in rt_lo..=rt_hi {
1456        let fr = &slice.frames[fi];
1457        let tofs = &fr.tof;             // <-- TOF index / axis
1458        let len_all = tofs.len();
1459
1460        for sl in &scan_slices[fi] {
1461            let s_abs = sl.scan;
1462            if s_abs < final_im_lo || s_abs > final_im_hi {
1463                continue;
1464            }
1465
1466            // Linear scan over this scan slice in TOF space
1467            let start = sl.start.min(len_all);
1468            let end   = sl.end.min(len_all);
1469            for idx in start..end {
1470                let tof_val = tofs[idx] as f32;
1471                if tof_val >= axis_lo && tof_val < axis_hi {
1472                    total += 1;
1473                }
1474            }
1475        }
1476    }
1477
1478    // If still empty → bail with empty container
1479    if total == 0 {
1480        return RawPoints::default();
1481    }
1482
1483    let stride = max_points
1484        .map(|cap| thin_stride(total, cap))
1485        .unwrap_or(1);
1486    let reserve = total / stride + 8;
1487
1488    let mut pts = RawPoints {
1489        mz: Vec::with_capacity(reserve),
1490        rt: Vec::with_capacity(reserve),
1491        im: Vec::with_capacity(reserve),
1492        scan: Vec::with_capacity(reserve),
1493        intensity: Vec::with_capacity(reserve),
1494        tof: Vec::with_capacity(reserve),
1495        frame: Vec::with_capacity(reserve),
1496    };
1497
1498    let rt_axis_sec = &ctx.rt_axis_sec[rt_lo..=rt_hi];
1499    let frame_ids_local = &ctx.frame_ids_local[rt_lo..=rt_hi];
1500
1501    // 2) Extract with thinning, TOF-based membership test
1502    let mut seen = 0usize;
1503    for fi in rt_lo..=rt_hi {
1504        let fr    = &slice.frames[fi];
1505        let mz    = &fr.ims_frame.mz;
1506        let it    = &fr.ims_frame.intensity;
1507        let ims   = &fr.ims_frame.mobility;
1508        let tofs  = &fr.tof;
1509
1510        let len_all = mz
1511            .len()
1512            .min(it.len())
1513            .min(ims.len())
1514            .min(tofs.len());
1515
1516        let rt_val   = rt_axis_sec[fi - rt_lo];
1517        let frame_id = frame_ids_local[fi - rt_lo];
1518
1519        for sl in &scan_slices[fi] {
1520            let s_abs = sl.scan;
1521            if s_abs < final_im_lo || s_abs > final_im_hi {
1522                continue;
1523            }
1524
1525            let start = sl.start.min(len_all);
1526            let end   = sl.end.min(len_all);
1527            if start >= end {
1528                continue;
1529            }
1530
1531            let mut idx = start;
1532            while idx < end {
1533                let tof_val = tofs[idx] as f32;
1534                if tof_val >= axis_lo && tof_val < axis_hi {
1535                    if stride == 1 || (seen % stride == 0) {
1536                        pts.mz.push(mz[idx] as f32);          // payload in m/z
1537                        pts.rt.push(rt_val);
1538                        pts.im.push(ims[idx] as f32);
1539                        pts.scan.push(s_abs as u32);
1540                        pts.intensity.push(it[idx] as f32);
1541                        pts.frame.push(frame_id);
1542                        pts.tof.push(tofs[idx]);             // keep raw TOF index
1543                    }
1544                    seen += 1;
1545                }
1546                idx += 1;
1547            }
1548        }
1549    }
1550
1551    pts
1552}
1553
1554#[inline]
1555fn thin_f32_vec(v: &[f32], cap: Option<usize>) -> Vec<f32> {
1556    let n = v.len();
1557    let stride = cap.map(|c| thin_stride(n, c)).unwrap_or(1);
1558    if stride <= 1 {
1559        return v.to_vec();
1560    }
1561    let mut out = Vec::with_capacity((n + stride - 1) / stride);
1562    let mut i = 0usize;
1563    while i < n {
1564        out.push(v[i]);
1565        i += stride;
1566    }
1567    out
1568}
1569
1570/// For a set of clusters that are all defined on the same RtFrames/grid
1571/// (i.e. same slice/context), extract RawPoints per cluster.
1572///
1573/// This is the "does the box actually give us the right raw points?" primitive.
1574pub fn extract_raw_points_for_clusters_in_ctx(
1575    ctx: &RawAttachContext,
1576    scale: &TofScale,
1577    clusters: &[ClusterResult1D],
1578    max_points: Option<usize>,
1579) -> Vec<RawPoints> {
1580    clusters
1581        .iter()
1582        .map(|c| {
1583            let (rt_lo, rt_hi) = c.rt_window;
1584            let (im_lo, im_hi) = c.im_window;
1585            let (tof_lo, tof_hi) = c.tof_window;
1586
1587            attach_raw_points_for_spec_1d_in_ctx(
1588                ctx,
1589                scale,
1590                tof_lo,
1591                tof_hi,
1592                im_lo,
1593                im_hi,
1594                rt_lo,
1595                rt_hi,
1596                max_points,
1597            )
1598        })
1599        .collect()
1600}
1601
1602#[derive(Clone, Debug)]
1603pub struct ClusterMergePolicy {
1604    pub require_same_ms_level: bool,
1605    pub require_same_window_group: bool,
1606    pub require_same_parents: bool,
1607
1608    /// If true, we keep axis/traces only if they’re bit-identical.
1609    /// Otherwise, we drop them (set to None) in the merged result.
1610    pub keep_axes_if_identical: bool,
1611    pub keep_traces_if_identical: bool,
1612}
1613
1614impl Default for ClusterMergePolicy {
1615    fn default() -> Self {
1616        Self {
1617            require_same_ms_level: true,
1618            require_same_window_group: true,
1619            require_same_parents: true,
1620            keep_axes_if_identical: true,
1621            keep_traces_if_identical: true,
1622        }
1623    }
1624}
1625
1626#[derive(Clone, Debug)]
1627pub struct ClusterMergeDistancePolicy {
1628    /// Max allowed difference in RT centers (same units as rt_fit.mu, usually frames).
1629    pub max_rt_center_delta: f32,
1630    /// Max allowed difference in IM centers (same units as im_fit.mu, usually scans).
1631    pub max_im_center_delta: f32,
1632    /// Max allowed difference in TOF centers (same units as tof_fit.mu, usually bins).
1633    pub max_tof_center_delta: f32,
1634    /// Optional m/z tolerance in Da for mz_fit (0.0 => ignore).
1635    pub max_mz_center_delta_da: f32,
1636}
1637
1638impl Default for ClusterMergeDistancePolicy {
1639    fn default() -> Self {
1640        Self {
1641            max_rt_center_delta: 0.0,
1642            max_im_center_delta: 0.0,
1643            max_tof_center_delta: 0.0,
1644            max_mz_center_delta_da: 0.0,
1645        }
1646    }
1647}
1648
1649/// Helper: sum intensities of a RawPoints object (used after dedup).
1650fn sum_intensity(raw: &RawPoints) -> f32 {
1651    raw.intensity
1652        .iter()
1653        .copied()
1654        .fold(0.0_f32, |acc, x| acc + x)
1655}
1656
1657fn merge_raw_points(a: &Option<RawPoints>, b: &Option<RawPoints>) -> Option<RawPoints> {
1658    match (a, b) {
1659        (None, None) => None,
1660        (Some(x), None) => Some(x.clone()),
1661        (None, Some(y)) => Some(y.clone()),
1662        (Some(x), Some(y)) => {
1663            // 1) Concatenate into a temporary buffer
1664            let total = x.mz.len() + y.mz.len();
1665
1666            let mut tmp = RawPoints {
1667                mz: Vec::with_capacity(total),
1668                rt: Vec::with_capacity(total),
1669                im: Vec::with_capacity(total),
1670                scan: Vec::with_capacity(total),
1671                intensity: Vec::with_capacity(total),
1672                tof: Vec::with_capacity(total),
1673                frame: Vec::with_capacity(total),
1674            };
1675
1676            tmp.mz.extend_from_slice(&x.mz);
1677            tmp.mz.extend_from_slice(&y.mz);
1678            tmp.rt.extend_from_slice(&x.rt);
1679            tmp.rt.extend_from_slice(&y.rt);
1680            tmp.im.extend_from_slice(&x.im);
1681            tmp.im.extend_from_slice(&y.im);
1682            tmp.scan.extend_from_slice(&x.scan);
1683            tmp.scan.extend_from_slice(&y.scan);
1684            tmp.intensity.extend_from_slice(&x.intensity);
1685            tmp.intensity.extend_from_slice(&y.intensity);
1686            tmp.tof.extend_from_slice(&x.tof);
1687            tmp.tof.extend_from_slice(&y.tof);
1688            tmp.frame.extend_from_slice(&x.frame);
1689            tmp.frame.extend_from_slice(&y.frame);
1690
1691            debug_assert_eq!(tmp.mz.len(), total);
1692            debug_assert_eq!(tmp.rt.len(), total);
1693            debug_assert_eq!(tmp.im.len(), total);
1694            debug_assert_eq!(tmp.scan.len(), total);
1695            debug_assert_eq!(tmp.intensity.len(), total);
1696            debug_assert_eq!(tmp.tof.len(), total);
1697            debug_assert_eq!(tmp.frame.len(), total);
1698
1699            // 2) Sort by (frame, scan, tof, mz_bits, original_index)
1700            use std::cmp::Ordering;
1701
1702            let n = total;
1703            let mut idx: Vec<usize> = (0..n).collect();
1704
1705            idx.sort_unstable_by(|&i, &j| {
1706                let fi = tmp.frame[i];
1707                let fj = tmp.frame[j];
1708                match fi.cmp(&fj) {
1709                    Ordering::Less => return Ordering::Less,
1710                    Ordering::Greater => return Ordering::Greater,
1711                    Ordering::Equal => {}
1712                }
1713
1714                let si = tmp.scan[i];
1715                let sj = tmp.scan[j];
1716                match si.cmp(&sj) {
1717                    Ordering::Less => return Ordering::Less,
1718                    Ordering::Greater => return Ordering::Greater,
1719                    Ordering::Equal => {}
1720                }
1721
1722                let ti = tmp.tof[i];
1723                let tj = tmp.tof[j];
1724                match ti.cmp(&tj) {
1725                    Ordering::Less => return Ordering::Less,
1726                    Ordering::Greater => return Ordering::Greater,
1727                    Ordering::Equal => {}
1728                }
1729
1730                // mz: use to_bits for a total order; avoids NaN weirdness
1731                let mi = tmp.mz[i].to_bits();
1732                let mj = tmp.mz[j].to_bits();
1733                match mi.cmp(&mj) {
1734                    Ordering::Less => return Ordering::Less,
1735                    Ordering::Greater => return Ordering::Greater,
1736                    Ordering::Equal => {}
1737                }
1738
1739                // final tie-breaker: original index → deterministic
1740                i.cmp(&j)
1741            });
1742
1743            // 3) Apply permutation with deduplication:
1744            //    only keep a point if its (frame, scan, tof, mz_bits) key is new.
1745            let mut out = RawPoints {
1746                mz: Vec::with_capacity(n),
1747                rt: Vec::with_capacity(n),
1748                im: Vec::with_capacity(n),
1749                scan: Vec::with_capacity(n),
1750                intensity: Vec::with_capacity(n),
1751                tof: Vec::with_capacity(n),
1752                frame: Vec::with_capacity(n),
1753            };
1754
1755            let mut last_key: Option<(u32, u32, i32, u32)> = None;
1756
1757            for k in idx {
1758                let key = (
1759                    tmp.frame[k],
1760                    tmp.scan[k],
1761                    tmp.tof[k],
1762                    tmp.mz[k].to_bits(),
1763                );
1764
1765                if Some(key) == last_key {
1766                    // exact duplicate of previous point → skip
1767                    continue;
1768                }
1769                last_key = Some(key);
1770
1771                out.mz.push(tmp.mz[k]);
1772                out.rt.push(tmp.rt[k]);
1773                out.im.push(tmp.im[k]);
1774                out.scan.push(tmp.scan[k]);
1775                out.intensity.push(tmp.intensity[k]);
1776                out.tof.push(tmp.tof[k]);
1777                out.frame.push(tmp.frame[k]);
1778            }
1779
1780            Some(out)
1781        }
1782    }
1783}
1784
1785fn merge_fit1d(a: &Fit1D, b: &Fit1D) -> Fit1D {
1786    // If one is basically empty, return the other
1787    if a.area <= 0.0 {
1788        return b.clone();
1789    }
1790    if b.area <= 0.0 {
1791        return a.clone();
1792    }
1793
1794    let a_area = a.area;
1795    let b_area = b.area;
1796    let total = a_area + b_area;
1797
1798    let mu = (a.mu * a_area + b.mu * b_area) / total;
1799
1800    let s1 = a.sigma * a.sigma + a.mu * a.mu;
1801    let s2 = b.sigma * b.sigma + b.mu * b.mu;
1802    let second_moment = (a_area * s1 + b_area * s2) / total;
1803    let var = (second_moment - mu * mu).max(0.0);
1804    let sigma = var.sqrt();
1805
1806    // height: keep max of the two (good enough approximation)
1807    let height = a.height.max(b.height);
1808
1809    let mut out = a.clone();
1810    out.mu = mu;
1811    out.sigma = sigma;
1812    out.area = total;
1813    out.height = height;
1814    out
1815}
1816
1817fn merge_opt_fit1d(a: &Option<Fit1D>, b: &Option<Fit1D>) -> Option<Fit1D> {
1818    match (a, b) {
1819        (None, None) => None,
1820        (Some(x), None) => Some(x.clone()),
1821        (None, Some(y)) => Some(y.clone()),
1822        (Some(x), Some(y)) => Some(merge_fit1d(x, y)),
1823    }
1824}
1825
1826fn merge_axis_vec<T: Clone + PartialEq>(
1827    a: &Option<Vec<T>>,
1828    b: &Option<Vec<T>>,
1829    keep_if_identical: bool,
1830) -> Option<Vec<T>> {
1831    match (a, b) {
1832        (None, None) => None,
1833        (Some(x), None) => Some(x.clone()),
1834        (None, Some(y)) => Some(y.clone()),
1835        (Some(x), Some(y)) => {
1836            if keep_if_identical && x == y {
1837                Some(x.clone())
1838            } else if keep_if_identical {
1839                // non-equal -> drop
1840                None
1841            } else {
1842                // Hook for future behavior (prefer longer, etc.). For now: drop.
1843                None
1844            }
1845        }
1846    }
1847}
1848
1849fn merge_trace_vec(
1850    a: &Option<Vec<f32>>,
1851    b: &Option<Vec<f32>>,
1852    keep_if_identical: bool,
1853) -> Option<Vec<f32>> {
1854    match (a, b) {
1855        (None, None) => None,
1856        (Some(x), None) => Some(x.clone()),
1857        (None, Some(y)) => Some(y.clone()),
1858        (Some(x), Some(y)) => {
1859            if keep_if_identical && x == y {
1860                Some(x.clone())
1861            } else if keep_if_identical {
1862                None
1863            } else {
1864                None
1865            }
1866        }
1867    }
1868}
1869
1870pub fn clusters_can_merge(
1871    a: &ClusterResult1D,
1872    b: &ClusterResult1D,
1873    policy: &ClusterMergePolicy,
1874) -> bool {
1875    if policy.require_same_ms_level && a.ms_level != b.ms_level {
1876        return false;
1877    }
1878    if policy.require_same_window_group && a.window_group != b.window_group {
1879        return false;
1880    }
1881    if policy.require_same_parents {
1882        if a.parent_im_id != b.parent_im_id || a.parent_rt_id != b.parent_rt_id {
1883            return false;
1884        }
1885    }
1886
1887    // Optional extra sanity: require overlapping RT/IM/TOF windows.
1888    let rt_overlaps = a.rt_window.1 >= b.rt_window.0 && b.rt_window.1 >= a.rt_window.0;
1889    let im_overlaps = a.im_window.1 >= b.im_window.0 && b.im_window.1 >= a.im_window.0;
1890    let tof_overlaps = a.tof_window.1 >= b.tof_window.0 && b.tof_window.1 >= a.tof_window.0;
1891
1892    rt_overlaps && im_overlaps && tof_overlaps
1893}
1894
1895pub fn merge_clusters(
1896    a: &ClusterResult1D,
1897    b: &ClusterResult1D,
1898    new_id: u64,
1899    policy: &ClusterMergePolicy,
1900) -> ClusterResult1D {
1901    debug_assert!(
1902        clusters_can_merge(a, b, policy),
1903        "merge_clusters called on incompatible clusters"
1904    );
1905
1906    let rt_window = (
1907        a.rt_window.0.min(b.rt_window.0),
1908        a.rt_window.1.max(b.rt_window.1),
1909    );
1910    let im_window = (
1911        a.im_window.0.min(b.im_window.0),
1912        a.im_window.1.max(b.im_window.1),
1913    );
1914    let tof_window = (
1915        a.tof_window.0.min(b.tof_window.0),
1916        a.tof_window.1.max(b.tof_window.1),
1917    );
1918    let tof_index_window = (
1919        a.tof_index_window.0.min(b.tof_index_window.0),
1920        a.tof_index_window.1.max(b.tof_index_window.1),
1921    );
1922
1923    let rt_fit = merge_fit1d(&a.rt_fit, &b.rt_fit);
1924    let im_fit = merge_fit1d(&a.im_fit, &b.im_fit);
1925    let tof_fit = merge_fit1d(&a.tof_fit, &b.tof_fit);
1926    let mz_fit = merge_opt_fit1d(&a.mz_fit, &b.mz_fit);
1927
1928    // merged + deduped raw points (if any)
1929    let raw_points = merge_raw_points(&a.raw_points, &b.raw_points);
1930
1931    // ---- intensity / volume logic --------------------------------------
1932    //
1933    // Case 1: at least one side has raw_points
1934    //   -> trust raw_points and recompute from them (handles partial overlap correctly).
1935    //
1936    // Case 2: neither side has raw_points
1937    //   -> we cannot dedup; use stats from the more intense cluster.
1938    let (raw_sum, volume_proxy) = if a.raw_points.is_some() || b.raw_points.is_some() {
1939        if let Some(ref rp) = raw_points {
1940            let s = sum_intensity(rp);
1941            // If volume_proxy is something else, you can adjust this later;
1942            // for now, tie it to the same recomputed intensity.
1943            (s, s)
1944        } else {
1945            // Should not really happen, but keep a safe fallback:
1946            // pick the more intense original cluster.
1947            if a.raw_sum >= b.raw_sum {
1948                (a.raw_sum, a.volume_proxy)
1949            } else {
1950                (b.raw_sum, b.volume_proxy)
1951            }
1952        }
1953    } else {
1954        // No raw points on either side: pick the more intense cluster’s stats.
1955        if a.raw_sum >= b.raw_sum {
1956            (a.raw_sum, a.volume_proxy)
1957        } else {
1958            (b.raw_sum, b.volume_proxy)
1959        }
1960    };
1961
1962    // frame_ids_used: sorted union
1963    let mut frame_ids_used = a.frame_ids_used.clone();
1964    frame_ids_used.extend_from_slice(&b.frame_ids_used);
1965    frame_ids_used.sort_unstable();
1966    frame_ids_used.dedup();
1967
1968    let window_group = a.window_group.or(b.window_group);
1969    let parent_im_id = a.parent_im_id.or(b.parent_im_id);
1970    let parent_rt_id = a.parent_rt_id.or(b.parent_rt_id);
1971    let ms_level = a.ms_level; // same as b if clusters_can_merge was true
1972
1973    let rt_axis_sec =
1974        merge_axis_vec(&a.rt_axis_sec, &b.rt_axis_sec, policy.keep_axes_if_identical);
1975    let im_axis_scans =
1976        merge_axis_vec(&a.im_axis_scans, &b.im_axis_scans, policy.keep_axes_if_identical);
1977    let mz_axis_da =
1978        merge_axis_vec(&a.mz_axis_da, &b.mz_axis_da, policy.keep_axes_if_identical);
1979
1980    let rt_trace = merge_trace_vec(&a.rt_trace, &b.rt_trace, policy.keep_traces_if_identical);
1981    let im_trace = merge_trace_vec(&a.im_trace, &b.im_trace, policy.keep_traces_if_identical);
1982
1983    ClusterResult1D {
1984        cluster_id: new_id,
1985        rt_window,
1986        im_window,
1987        tof_window,
1988        tof_index_window,
1989        mz_window: a.mz_window.or(b.mz_window),
1990
1991        rt_fit,
1992        im_fit,
1993        tof_fit,
1994        mz_fit,
1995
1996        raw_sum,
1997        volume_proxy,
1998
1999        frame_ids_used,
2000        window_group,
2001        parent_im_id,
2002        parent_rt_id,
2003        ms_level,
2004
2005        rt_axis_sec,
2006        im_axis_scans,
2007        mz_axis_da,
2008
2009        raw_points,
2010        rt_trace,
2011        im_trace,
2012    }
2013}
2014
2015pub fn merge_cluster_group(
2016    clusters: &[ClusterResult1D],
2017    new_id: u64,
2018    policy: &ClusterMergePolicy,
2019) -> Option<ClusterResult1D> {
2020    let mut it = clusters.iter();
2021    let first = it.next()?.clone();
2022    Some(it.fold(first, |acc, c| merge_clusters(&acc, c, new_id, policy)))
2023}
2024
2025// --- distance-based compatibility --------------------------------------------
2026
2027fn clusters_can_merge_with_distance(
2028    a: &ClusterResult1D,
2029    b: &ClusterResult1D,
2030    dist: &ClusterMergeDistancePolicy,
2031) -> bool {
2032    // 1) Cheap invariants: same MS level + same window group
2033    if a.ms_level != b.ms_level {
2034        return false;
2035    }
2036    if a.window_group != b.window_group {
2037        return false;
2038    }
2039
2040    // 2) Require overlapping windows as sanity check
2041    let rt_overlaps = a.rt_window.1 >= b.rt_window.0 && b.rt_window.1 >= a.rt_window.0;
2042    let im_overlaps = a.im_window.1 >= b.im_window.0 && b.im_window.1 >= a.im_window.0;
2043    let tof_overlaps = a.tof_window.1 >= b.tof_window.0 && b.tof_window.1 >= a.tof_window.0;
2044    if !(rt_overlaps && im_overlaps && tof_overlaps) {
2045        return false;
2046    }
2047
2048    // 3) Center-distance constraints (all interpreted in index units)
2049    let drt = (a.rt_fit.mu - b.rt_fit.mu).abs();
2050    if dist.max_rt_center_delta > 0.0 && drt > dist.max_rt_center_delta {
2051        return false;
2052    }
2053
2054    let dim = (a.im_fit.mu - b.im_fit.mu).abs();
2055    if dist.max_im_center_delta > 0.0 && dim > dist.max_im_center_delta {
2056        return false;
2057    }
2058
2059    let dtof = (a.tof_fit.mu - b.tof_fit.mu).abs();
2060    if dist.max_tof_center_delta > 0.0 && dtof > dist.max_tof_center_delta {
2061        return false;
2062    }
2063
2064    // Optional m/z center tolerance if both have mz_fit and a non-zero threshold
2065    if dist.max_mz_center_delta_da > 0.0 {
2066        if let (Some(mza), Some(mzb)) = (&a.mz_fit, &b.mz_fit) {
2067            let dmz = (mza.mu - mzb.mu).abs();
2068            if dmz > dist.max_mz_center_delta_da {
2069                return false;
2070            }
2071        }
2072    }
2073
2074    true
2075}
2076
2077/// Merge clusters that are close in RT/IM/TOF (and optionally m/z).
2078///
2079/// - Uses `clusters_can_merge_with_distance` to decide compatibility.
2080/// - Merges *chains* of mutually compatible clusters (A-B-C all close) into one.
2081/// - Keeps the cluster_id of the first cluster in each chain.
2082pub fn merge_clusters_by_distance(
2083    mut clusters: Vec<ClusterResult1D>,
2084    dist: &ClusterMergeDistancePolicy,
2085) -> Vec<ClusterResult1D> {
2086    if clusters.len() <= 1 {
2087        return clusters;
2088    }
2089
2090    // Order them so that "neighbors" in sort order are likely merge candidates.
2091    clusters.sort_unstable_by(|a, b| {
2092        a.ms_level
2093            .cmp(&b.ms_level)
2094            .then_with(|| a.window_group.cmp(&b.window_group))
2095            .then_with(|| {
2096                a.tof_fit
2097                    .mu
2098                    .partial_cmp(&b.tof_fit.mu)
2099                    .unwrap_or(std::cmp::Ordering::Equal)
2100            })
2101            .then_with(|| {
2102                a.rt_fit
2103                    .mu
2104                    .partial_cmp(&b.rt_fit.mu)
2105                    .unwrap_or(std::cmp::Ordering::Equal)
2106            })
2107            .then_with(|| {
2108                a.im_fit
2109                    .mu
2110                    .partial_cmp(&b.im_fit.mu)
2111                    .unwrap_or(std::cmp::Ordering::Equal)
2112            })
2113            .then_with(|| a.cluster_id.cmp(&b.cluster_id))
2114    });
2115
2116    // Internal merge policy: distance-based function already enforces ms_level/window_group,
2117    // and we explicitly do NOT want to require same parents here.
2118    let merge_policy = ClusterMergePolicy {
2119        require_same_ms_level: false,
2120        require_same_window_group: false,
2121        require_same_parents: false,
2122        keep_axes_if_identical: true,
2123        keep_traces_if_identical: true,
2124    };
2125
2126    let mut out: Vec<ClusterResult1D> = Vec::new();
2127    let mut group: Vec<ClusterResult1D> = Vec::new();
2128
2129    for c in clusters.into_iter() {
2130        if let Some(last) = group.last() {
2131            if clusters_can_merge_with_distance(last, &c, dist) {
2132                // same chain → keep extending
2133                group.push(c);
2134            } else {
2135                // finish previous chain
2136                let new_id = group[0].cluster_id; // keep id of first
2137                if let Some(merged) = merge_cluster_group(&group, new_id, &merge_policy) {
2138                    out.push(merged);
2139                }
2140                group.clear();
2141                group.push(c);
2142            }
2143        } else {
2144            group.push(c);
2145        }
2146    }
2147
2148    // flush final chain
2149    if !group.is_empty() {
2150        let new_id = group[0].cluster_id;
2151        if let Some(merged) = merge_cluster_group(&group, new_id, &merge_policy) {
2152            out.push(merged);
2153        }
2154    }
2155
2156    out
2157}
rustdf/cluster/cluster.rs

rustdf/cluster/
cluster.rs