rustdf/data/
dia.rs

1use std::sync::Arc;
2use crate::data::acquisition::AcquisitionMode;
3use rustc_hash::FxHashMap;
4use crate::data::handle::{IndexConverter, TimsData, TimsDataLoader};
5use crate::data::meta::{
6    read_dia_ms_ms_info, read_dia_ms_ms_windows, read_global_meta_sql, read_meta_data_sql,
7    DiaMsMisInfo, DiaMsMsWindow, FrameMeta, GlobalMetaData,
8};
9use mscore::timstof::frame::{RawTimsFrame, TimsFrame};
10use mscore::timstof::slice::TimsSlice;
11use rand::prelude::IteratorRandom;
12use rayon::iter::IntoParallelRefIterator;
13use crate::cluster::peak::{build_frame_bin_view, build_tof_rt_grid_full, expand_many_im_peaks_along_rt, FrameBinView, ImPeak1D, RtExpandParams, RtFrames, TofRtGrid};
14use crate::cluster::utility::{TofScale};
15use crate::data::utility::merge_ranges;
16use rayon::prelude::*;
17use std::collections::{HashMap};
18use rayon::ThreadPoolBuilder;
19use crate::cluster::candidates::{build_pseudo_spectra_all_pairs, build_pseudo_spectra_end_to_end, build_pseudo_spectra_end_to_end_xic, PseudoBuildResult, ScoreOpts};
20use crate::cluster::cluster::{attach_raw_points_for_spec_1d_in_ctx, bin_range_for_win, build_scan_slices, decorate_with_mz_for_cluster, evaluate_spec_1d, make_specs_from_im_and_rt_groups_threads, BuildSpecOpts, ClusterResult1D, ClusterSpec1D, Eval1DOpts, RawAttachContext, RawPoints, ScanSlice};
21use crate::cluster::feature::SimpleFeature;
22use crate::cluster::pseudo::{PseudoSpecOpts};
23use crate::cluster::candidates::CandidateOpts;
24use crate::cluster::scoring::XicScoreOpts;
25
26// ==========================================================
27// Options structs for reducing parameter explosion (Phase 5)
28// ==========================================================
29
30/// Options for cluster extraction from IM peaks.
31#[derive(Clone, Debug)]
32pub struct ClusterExtractionOpts {
33    /// TOF step for grid binning (typically 1, 2, 4, 8).
34    pub tof_step: i32,
35    /// RT expansion parameters.
36    pub rt_params: RtExpandParams,
37    /// Spec building options (window padding, ms_level).
38    pub build_opts: BuildSpecOpts,
39    /// Evaluation options (refine, attach axes/traces).
40    pub eval_opts: Eval1DOpts,
41    /// Whether to require RT overlap for spec generation.
42    pub require_rt_overlap: bool,
43    /// Number of threads for parallel processing.
44    pub num_threads: usize,
45}
46
47impl Default for ClusterExtractionOpts {
48    fn default() -> Self {
49        Self {
50            tof_step: 8,
51            rt_params: RtExpandParams::default(),
52            build_opts: BuildSpecOpts::ms1_defaults(),
53            eval_opts: Eval1DOpts::default(),
54            require_rt_overlap: true,
55            num_threads: 4,
56        }
57    }
58}
59
60impl ClusterExtractionOpts {
61    /// Create options for MS1 (precursor) extraction.
62    pub fn for_ms1() -> Self {
63        Self {
64            build_opts: BuildSpecOpts::ms1_defaults(),
65            ..Self::default()
66        }
67    }
68
69    /// Create options for MS2 (fragment) extraction.
70    pub fn for_ms2() -> Self {
71        Self {
72            build_opts: BuildSpecOpts::ms2_defaults(),
73            ..Self::default()
74        }
75    }
76
77    /// Set the TOF step.
78    pub fn with_tof_step(mut self, step: i32) -> Self {
79        self.tof_step = step;
80        self
81    }
82
83    /// Set the number of threads.
84    pub fn with_threads(mut self, n: usize) -> Self {
85        self.num_threads = n;
86        self
87    }
88}
89
90/// Options for building pseudo spectra from clusters.
91#[derive(Clone, Debug)]
92pub struct PseudoSpectrumBuildOpts<'a> {
93    /// Candidate enumeration options.
94    pub cand_opts: &'a CandidateOpts,
95    /// Geometric scoring options.
96    pub score_opts: &'a ScoreOpts,
97    /// Pseudo spectrum construction options.
98    pub pseudo_opts: &'a PseudoSpecOpts,
99    /// Optional features for grouping.
100    pub features: Option<&'a [SimpleFeature]>,
101}
102
103#[derive(Clone, Debug)]
104pub struct ProgramSlice {
105    pub mz_lo: f64,
106    pub mz_hi: f64,
107    pub scan_lo: u32,
108    pub scan_hi: u32,
109}
110
111#[inline]
112fn ranges_overlap_u32(a: (u32, u32), b: (u32, u32)) -> bool {
113    let lo = a.0.max(b.0);
114    let hi = a.1.min(b.1);
115    hi >= lo
116}
117
118#[derive(Debug, Clone)]
119pub struct Ms2GroupProgram {
120    /// Raw per-row isolation windows (normalized, ascending, finite)
121    pub mz_windows: Vec<(f32, f32)>,
122    /// Raw per-row active scan ranges (normalized, inclusive)
123    pub scan_ranges: Vec<(u32, u32)>,
124    /// Union (convex hull) over all mz_windows for the group; None if no valid rows
125    pub mz_union: Option<(f32, f32)>,
126    /// Merged disjoint scan ranges (inclusive) over the group; empty = permissive fallback
127    pub scan_unions: Vec<(u32, u32)>,
128}
129
130#[derive(Debug, Clone)]
131pub struct DiaIndex {
132    /// MS2 frame_id -> window_group
133    pub frame_to_group: HashMap<u32, u32>,
134    /// window_group -> MS2 frame_ids (sorted by **time**)
135    pub group_to_frames: HashMap<u32, Vec<u32>>,
136    /// window_group -> list of (mz_lo, mz_hi) isolation ranges (normalized)
137    pub group_to_isolation: HashMap<u32, Vec<(f64, f64)>>,
138    /// window_group -> list of (scan_lo, scan_hi) active scan ranges (normalized, inclusive)
139    pub group_to_scan_ranges: HashMap<u32, Vec<(u32, u32)>>,
140    /// window_group -> convex m/z union (lo, hi)
141    pub group_to_mz_union: HashMap<u32, (f64, f64)>,
142    /// window_group -> merged disjoint scan unions (inclusive)
143    pub group_to_scan_unions: HashMap<u32, Vec<(u32, u32)>>,
144    /// frame_id -> time (seconds)
145    pub frame_time: HashMap<u32, f64>,
146    /// New: pre-materialized program slices per group.
147    pub group_to_slices: HashMap<u32, Arc<[ProgramSlice]>>,
148}
149
150impl DiaIndex {
151    pub fn new(meta: &[FrameMeta], info: &[DiaMsMisInfo], wins: &[DiaMsMsWindow]) -> Self {
152        // helpers
153        #[inline]
154        fn norm_f64_pair(a: f64, b: f64) -> Option<(f64, f64)> {
155            if !a.is_finite() || !b.is_finite() { return None; }
156            let (lo, hi) = if a <= b { (a, b) } else { (b, a) };
157            if hi > lo { Some((lo, hi)) } else { None }
158        }
159        #[inline]
160        fn norm_u32_pair(a: u32, b: u32) -> Option<(u32, u32)> {
161            let (lo, hi) = if a <= b { (a, b) } else { (b, a) };
162            Some((lo, hi))
163        }
164        fn merge_scan_ranges(mut ranges: Vec<(u32, u32)>) -> Vec<(u32, u32)> {
165            if ranges.is_empty() { return ranges; }
166            ranges.sort_unstable_by_key(|&(l, r)| (l, r));
167            let mut out: Vec<(u32,u32)> = Vec::with_capacity(ranges.len());
168            let mut cur = ranges[0];
169            for &(l, r) in &ranges[1..] {
170                if l <= cur.1.saturating_add(1) {
171                    if r > cur.1 { cur.1 = r; }
172                } else {
173                    out.push(cur);
174                    cur = (l, r);
175                }
176            }
177            out.push(cur);
178            out
179        }
180
181        // frame_id -> time
182        let mut frame_time: HashMap<u32, f64> = HashMap::new();
183        for m in meta {
184            frame_time.insert(m.id as u32, m.time);
185        }
186
187        // frame_id -> group; group -> frames
188        let mut frame_to_group: HashMap<u32, u32> = HashMap::new();
189        let mut group_to_frames: HashMap<u32, Vec<u32>> = HashMap::new();
190        for r in info {
191            let fid = r.frame_id;
192            frame_to_group.insert(fid, r.window_group);
193            group_to_frames.entry(r.window_group).or_default().push(fid);
194        }
195
196        // raw program rows
197        let mut group_to_isolation: HashMap<u32, Vec<(f64, f64)>> = HashMap::new();
198        let mut group_to_scan_ranges: HashMap<u32, Vec<(u32, u32)>> = HashMap::new();
199        for w in wins {
200            let half = 0.5 * w.isolation_width;
201            if half.is_finite() && half > 0.0 && w.isolation_mz.is_finite() {
202                let lo = w.isolation_mz - half;
203                let hi = w.isolation_mz + half;
204                if let Some(p) = norm_f64_pair(lo, hi) {
205                    group_to_isolation.entry(w.window_group).or_default().push(p);
206                }
207            }
208            if let Some(p) = norm_u32_pair(w.scan_num_begin, w.scan_num_end) {
209                group_to_scan_ranges.entry(w.window_group).or_default().push(p);
210            }
211        }
212
213        // unions + sort frames by time
214        let mut group_to_mz_union: HashMap<u32, (f64, f64)> = HashMap::new();
215        let mut group_to_scan_unions: HashMap<u32, Vec<(u32, u32)>> = HashMap::new();
216
217        for (g, frames) in group_to_frames.iter_mut() {
218            frames.sort_unstable_by(|&a, &b| {
219                let ta = frame_time.get(&a).copied().unwrap_or(f64::NAN);
220                let tb = frame_time.get(&b).copied().unwrap_or(f64::NAN);
221                ta.partial_cmp(&tb).unwrap_or(std::cmp::Ordering::Equal)
222            });
223
224            if let Some(v) = group_to_isolation.get(g) {
225                if !v.is_empty() {
226                    let lo = v.iter().map(|p| p.0).fold(f64::INFINITY, f64::min);
227                    let hi = v.iter().map(|p| p.1).fold(f64::NEG_INFINITY, f64::max);
228                    if lo.is_finite() && hi.is_finite() && hi > lo {
229                        group_to_mz_union.insert(*g, (lo, hi));
230                    }
231                }
232            }
233            let merged = merge_scan_ranges(group_to_scan_ranges.get(g).cloned().unwrap_or_default());
234            group_to_scan_unions.insert(*g, merged);
235        }
236
237        let mut group_to_slices: HashMap<u32, Arc<[ProgramSlice]>> = HashMap::new();
238
239        for (&g, iso_rows) in &group_to_isolation {
240            let scan_rows = group_to_scan_ranges.get(&g).cloned().unwrap_or_default();
241            let n = iso_rows.len().min(scan_rows.len());
242            let mut v = Vec::with_capacity(n);
243            for k in 0..n {
244                let (mz_lo, mz_hi)     = iso_rows[k];
245                let (scan_lo, scan_hi) = scan_rows[k];
246                v.push(ProgramSlice { mz_lo, mz_hi, scan_lo, scan_hi });
247            }
248            group_to_slices.insert(g, v.into());
249        }
250
251        DiaIndex {
252            frame_to_group,
253            group_to_frames,
254            group_to_isolation,
255            group_to_scan_ranges,
256            group_to_mz_union,
257            group_to_scan_unions,
258            frame_time,
259            group_to_slices,
260        }
261    }
262
263    pub fn program_slices_for_group(&self, g: u32) -> Vec<ProgramSlice> {
264        self.slices_for_group(g).to_vec()
265    }
266
267    /// Tiles in group `g` that could have selected this precursor.
268    ///
269    /// Conditions:
270    ///   1) Precursor IM window overlaps tile scan band.
271    ///   2) Precursor m/z lies inside tile's isolation m/z window.
272    /// Tiles in group `g` that could have selected this precursor.
273    ///
274    /// Conditions:
275    ///   1) Precursor **apex** IM scan lies inside tile scan band.
276    ///   2) Precursor m/z lies inside tile's isolation m/z window.
277    pub fn tiles_for_precursor_in_group(
278        &self,
279        g: u32,
280        prec_mz: f32,
281        im_apex: f32,
282    ) -> Vec<usize> {
283        let slices = self.slices_for_group(g);
284        let mut hits = Vec::new();
285
286        if !prec_mz.is_finite() || !im_apex.is_finite() {
287            return hits;
288        }
289
290        for (idx, s) in slices.iter().enumerate() {
291            // Precursor m/z inside isolation window
292            if prec_mz < s.mz_lo as f32 || prec_mz > s.mz_hi as f32 {
293                continue;
294            }
295
296            // Apex inside scan band (no margin for now, same semantics as your debug plot)
297            let scan_lo = s.scan_lo as f32;
298            let scan_hi = s.scan_hi as f32;
299            if im_apex < scan_lo || im_apex > scan_hi {
300                continue;
301            }
302
303            hits.push(idx);
304        }
305
306        hits
307    }
308
309    /// Tiles in group `g` that an MS2 fragment cluster could belong to.
310    ///
311    /// IMPORTANT:
312    ///   Fragment m/z is *not* constrained by isolation window (selection
313    ///   happened before fragmentation). We only check the IM/scan range.
314    pub fn tiles_for_fragment_in_group(
315        &self,
316        g: u32,
317        im_window: (usize, usize),
318    ) -> Vec<usize> {
319        let slices = self.program_slices_for_group(g);
320        let mut hits = Vec::new();
321
322        for (idx, s) in slices.iter().enumerate() {
323            let tile_scans = (s.scan_lo, s.scan_hi);
324
325            if ranges_overlap_u32(
326                (im_window.0 as u32, im_window.1 as u32),
327                tile_scans,
328            ) {
329                hits.push(idx);
330            }
331        }
332
333        hits
334    }
335
336    /// Convenience: materialize a program description for a group.
337    pub fn program_for_group(&self, g: u32) -> Ms2GroupProgram {
338        let mz_windows = self.group_to_isolation
339            .get(&g)
340            .map(|v| v.iter().map(|&(a,b)| (a as f32, b as f32)).collect())
341            .unwrap_or_else(Vec::new);
342
343        let scan_ranges = self.group_to_scan_ranges
344            .get(&g)
345            .cloned()
346            .unwrap_or_else(Vec::new);
347
348        let mz_union = self.group_to_mz_union
349            .get(&g)
350            .map(|&(a,b)| (a as f32, b as f32));
351
352        let scan_unions = self.group_to_scan_unions
353            .get(&g)
354            .cloned()
355            .unwrap_or_else(Vec::new);
356
357        Ms2GroupProgram { mz_windows, scan_ranges, mz_union, scan_unions }
358    }
359
360    pub fn groups_for_precursor(
361        &self,
362        prec_mz: f32,
363        im_apex: f32,
364    ) -> Vec<u32> {
365        if !prec_mz.is_finite() || !im_apex.is_finite() {
366            return Vec::new();
367        }
368
369        let mut out = Vec::new();
370        for (&g, &(mz_lo, mz_hi)) in &self.group_to_mz_union {
371            if prec_mz < mz_lo as f32 || prec_mz > mz_hi as f32 {
372                continue;
373            }
374
375            // optionally also check scan_unions
376            if let Some(unions) = self.group_to_scan_unions.get(&g) {
377
378                if !unions.iter().any(|&(lo, hi)| {
379                    (im_apex as u32) >= lo && (im_apex as u32) <= hi
380                }) {
381                    continue;
382                }
383            }
384
385            let tiles = self.tiles_for_precursor_in_group(g, prec_mz, im_apex);
386            if !tiles.is_empty() {
387                out.push(g);
388            }
389        }
390        out
391    }
392
393    /// Return the m/z isolation bounds (lo, hi) for a given tile index in a group.
394    ///
395    /// If the tile index is out of range for this group, returns (NaN, NaN).
396    /// This is used by the fragment query to decide whether a fragment's m/z
397    /// lies **inside** the precursor-selection band of the shared tile.
398    pub fn tile_mz_bounds(&self, g: u32, tile_idx: usize) -> (f32, f32) {
399        let slices = self.slices_for_group(g);
400        if tile_idx >= slices.len() {
401            return (f32::NAN, f32::NAN);
402        }
403        let s = &slices[tile_idx];
404        (s.mz_lo as f32, s.mz_hi as f32)
405    }
406
407    #[inline]
408    pub fn mz_bounds_for_window_group_core(&self, g: u32) -> Option<(f32, f32)> {
409        self.group_to_mz_union.get(&g).map(|&(a,b)| (a as f32, b as f32))
410    }
411    #[inline]
412    pub fn scan_unions_for_window_group_core(&self, g: u32) -> Option<Vec<(usize, usize)>> {
413        self.group_to_scan_unions.get(&g).map(|v| v.iter().map(|&(l,r)| (l as usize, r as usize)).collect())
414    }
415
416    fn slices_for_group(&self, g: u32) -> &[ProgramSlice] {
417        self.group_to_slices.get(&g).map(|a| &a[..]).unwrap_or(&[])
418    }
419
420}
421
422pub struct TimsDatasetDIA {
423    pub loader: TimsDataLoader,
424    pub global_meta_data: GlobalMetaData,
425    pub meta_data: Vec<FrameMeta>,
426    pub dia_ms_ms_info: Vec<DiaMsMisInfo>,
427    pub dia_ms_ms_windows: Vec<DiaMsMsWindow>,
428    pub dia_index: DiaIndex,
429}
430
431impl TimsDatasetDIA {
432    pub fn new(
433        bruker_lib_path: &str,
434        data_path: &str,
435        in_memory: bool,
436        use_bruker_sdk: bool,
437    ) -> Self {
438        // TODO: error handling
439        let global_meta_data = read_global_meta_sql(data_path).unwrap();
440        let meta_data = read_meta_data_sql(data_path).unwrap();
441        let dia_ms_mis_info = read_dia_ms_ms_info(data_path).unwrap();
442        let dia_ms_ms_windows = read_dia_ms_ms_windows(data_path).unwrap();
443
444        let scan_max_index = meta_data.iter().map(|x| x.num_scans).max().unwrap() as u32;
445        let im_lower = global_meta_data.one_over_k0_range_lower;
446        let im_upper = global_meta_data.one_over_k0_range_upper;
447
448        let tof_max_index = global_meta_data.tof_max_index;
449        let mz_lower = global_meta_data.mz_acquisition_range_lower;
450        let mz_upper = global_meta_data.mz_acquisition_range_upper;
451
452        let loader = match in_memory {
453            true => TimsDataLoader::new_in_memory(
454                bruker_lib_path,
455                data_path,
456                use_bruker_sdk,
457                scan_max_index,
458                im_lower,
459                im_upper,
460                tof_max_index,
461                mz_lower,
462                mz_upper,
463            ),
464            false => TimsDataLoader::new_lazy(
465                bruker_lib_path,
466                data_path,
467                use_bruker_sdk,
468                scan_max_index,
469                im_lower,
470                im_upper,
471                tof_max_index,
472                mz_lower,
473                mz_upper,
474            ),
475        };
476
477        let dia_index = DiaIndex::new(&meta_data, &dia_ms_mis_info, &dia_ms_ms_windows);
478
479        TimsDatasetDIA {
480            loader,
481            global_meta_data,
482            meta_data,
483            dia_ms_ms_info: dia_ms_mis_info,
484            dia_ms_ms_windows,
485            dia_index,
486        }
487    }
488
489    /// Create a DIA dataset with regression-derived m/z calibration.
490    ///
491    /// This method uses externally-provided m/z calibration coefficients (e.g., from
492    /// linear regression on SDK data) instead of the simple boundary model.
493    pub fn new_with_mz_calibration(
494        data_path: &str,
495        in_memory: bool,
496        tof_intercept: f64,
497        tof_slope: f64,
498    ) -> Self {
499        let meta_data = read_meta_data_sql(data_path).unwrap();
500        let global_meta_data = read_global_meta_sql(data_path).unwrap();
501        let dia_ms_mis_info = read_dia_ms_ms_info(data_path).unwrap();
502        let dia_ms_ms_windows = read_dia_ms_ms_windows(data_path).unwrap();
503
504        let scan_max_index = meta_data.iter().map(|x| x.num_scans).max().unwrap() as u32;
505        let im_lower = global_meta_data.one_over_k0_range_lower;
506        let im_upper = global_meta_data.one_over_k0_range_upper;
507
508        let loader = match in_memory {
509            true => TimsDataLoader::new_in_memory_with_mz_calibration(
510                data_path,
511                tof_intercept,
512                tof_slope,
513                im_lower,
514                im_upper,
515                scan_max_index,
516            ),
517            false => TimsDataLoader::new_lazy_with_mz_calibration(
518                data_path,
519                tof_intercept,
520                tof_slope,
521                im_lower,
522                im_upper,
523                scan_max_index,
524            ),
525        };
526
527        let dia_index = DiaIndex::new(&meta_data, &dia_ms_mis_info, &dia_ms_ms_windows);
528
529        TimsDatasetDIA {
530            loader,
531            global_meta_data,
532            meta_data,
533            dia_ms_ms_info: dia_ms_mis_info,
534            dia_ms_ms_windows,
535            dia_index,
536        }
537    }
538
539    pub fn program_for_group(&self, g: u32) -> Ms2GroupProgram {
540        self.dia_index.program_for_group(g)
541    }
542
543    /// Collect all program slices for a window group from DiaFrameMsMsWindows.
544    /// Isolation window: [isolation_mz - width/2, isolation_mz + width/2]
545    pub fn program_slices_for_group(&self, group: u32) -> Vec<ProgramSlice> {
546        self.dia_ms_ms_windows
547            .iter()
548            .filter(|w| w.window_group == group)
549            .map(|w| {
550                let half = 0.5 * w.isolation_width;
551                ProgramSlice {
552                    mz_lo: w.isolation_mz - half,
553                    mz_hi: w.isolation_mz + half,
554                    scan_lo: w.scan_num_begin,
555                    scan_hi: w.scan_num_end,
556                }
557            })
558            .collect()
559    }
560
561    pub fn sample_precursor_signal(
562        &self,
563        num_frames: usize,
564        max_intensity: f64,
565        take_probability: f64,
566    ) -> TimsFrame {
567        // get all precursor frames
568        let precursor_frames = self.meta_data.iter().filter(|x| x.ms_ms_type == 0);
569
570        // randomly sample num_frames
571        let mut rng = rand::thread_rng();
572        let mut sampled_frames: Vec<TimsFrame> = Vec::new();
573
574        // go through each frame and sample the data
575        for frame in precursor_frames.choose_multiple(&mut rng, num_frames) {
576            let frame_id = frame.id;
577            let frame_data = self
578                .loader
579                .get_frame(frame_id as u32)
580                .filter_ranged(0.0, 2000.0, 0, 1000, 0.0, 5.0, 1.0, max_intensity, 0, i32::MAX)
581                .generate_random_sample(take_probability);
582            sampled_frames.push(frame_data);
583        }
584
585        // get the first frame
586        let mut sampled_frame = sampled_frames.remove(0);
587
588        // sum all the other frames to the first frame
589        for frame in sampled_frames {
590            sampled_frame = sampled_frame + frame;
591        }
592
593        sampled_frame
594    }
595
596    pub fn sample_fragment_signal(
597        &self,
598        num_frames: usize,
599        window_group: u32,
600        max_intensity: f64,
601        take_probability: f64,
602    ) -> TimsFrame {
603        // get all fragment frames, filter by window_group
604        let fragment_frames: Vec<u32> = self
605            .dia_ms_ms_info
606            .iter()
607            .filter(|x| x.window_group == window_group)
608            .map(|x| x.frame_id)
609            .collect();
610
611        // randomly sample num_frames
612        let mut rng = rand::thread_rng();
613        let mut sampled_frames: Vec<TimsFrame> = Vec::new();
614
615        // go through each frame and sample the data
616        for frame_id in fragment_frames
617            .into_iter()
618            .choose_multiple(&mut rng, num_frames)
619        {
620            let frame_data = self
621                .loader
622                .get_frame(frame_id)
623                .filter_ranged(0.0, 2000.0, 0, 1000, 0.0, 5.0, 1.0, max_intensity, 0, i32::MAX)
624                .generate_random_sample(take_probability);
625            sampled_frames.push(frame_data);
626        }
627
628        // get the first frame
629        let mut sampled_frame = sampled_frames.remove(0);
630
631        // sum all the other frames to the first frame
632        for frame in sampled_frames {
633            sampled_frame = sampled_frame + frame;
634        }
635
636        sampled_frame
637    }
638
639    /// All DIA window_group IDs present in the file (sorted unique).
640    pub fn dia_window_groups(&self) -> Vec<u32> {
641        let mut gs: Vec<u32> = self
642            .dia_ms_ms_info
643            .iter()
644            .map(|x| x.window_group)
645            .collect();
646        gs.sort_unstable();
647        gs.dedup();
648        gs
649    }
650
651    pub fn window_groups_for_precursor(&self, prec_mz: f32, im_apex: f32) -> Vec<u32> {
652        self.dia_index.groups_for_precursor(prec_mz, im_apex)
653    }
654
655    /// Map frame_id -> (time, ms_ms_type) for quick lookups.
656    fn frame_time_map(&self) -> FxHashMap<u32, (f32, i64)> {
657        let mut m = FxHashMap::default();
658        for fm in &self.meta_data {
659            m.insert(fm.id as u32, (fm.time as f32, fm.ms_ms_type));
660        }
661        m
662    }
663
664    /// RT-sorted **fragment** frames for a given DIA group.
665    pub fn fragment_frame_ids_and_times_for_group_core(&self, window_group: u32) -> (Vec<u32>, Vec<f32>) {
666        let time_map = self.frame_time_map();
667        let mut rows: Vec<(u32, f32)> = self
668            .dia_ms_ms_info
669            .iter()
670            .filter(|x| x.window_group == window_group)
671            .filter_map(|x| {
672                time_map.get(&(x.frame_id)).map(|(t, _ms2)| (x.frame_id, *t))
673            })
674            .collect();
675
676        // Just in case: keep only MS2 frames (ms_ms_type != 0)
677        rows.retain(|(fid, _)| time_map.get(fid).map(|(_, ty)| *ty != 0).unwrap_or(false));
678
679        rows.sort_by(|a,b| a.1.partial_cmp(&b.1).unwrap());
680        let (ids, times): (Vec<_>, Vec<_>) = rows.into_iter().unzip();
681        (ids, times)
682    }
683
684    /// Merged, sorted **global scan** unions for this DIA group, from DiaFrameMsMsWindows.
685    /// Returns None if there are no window rows for this group.
686    pub fn scan_unions_for_window_group_core(&self, window_group: u32) -> Option<Vec<(usize, usize)>> {
687        let ranges: Vec<(usize, usize)> = self
688            .dia_ms_ms_windows
689            .iter()
690            .filter(|w| w.window_group == window_group)
691            .map(|w| {
692                let l = w.scan_num_begin as usize;
693                let r = w.scan_num_end as usize;
694                if l <= r { (l, r) } else { (r, l) }
695            })
696            .collect();
697        if ranges.is_empty() {
698            return None;
699        }
700        Some(merge_ranges(ranges))
701    }
702
703    /// m/z unions (min..max) for the group from DiaFrameMsMsWindows (wide clamp).
704    pub fn mz_bounds_for_window_group_core(&self, window_group: u32) -> Option<(f32, f32)> {
705        let mut lo = f32::INFINITY;
706        let mut hi = f32::NEG_INFINITY;
707        let mut hit = false;
708        for w in &self.dia_ms_ms_windows {
709            if w.window_group == window_group {
710                let c = w.isolation_mz as f32;
711                let half = 0.5f32 * (w.isolation_width as f32);
712                lo = lo.min(c - half);
713                hi = hi.max(c + half);
714                hit = true;
715            }
716        }
717        if hit && hi > lo && lo.is_finite() && hi.is_finite() {
718            Some((lo, hi))
719        } else {
720            None
721        }
722    }
723
724    /// RT-sorted FRAGMENT frames + times for a DIA group, converted into FrameBinView rows.
725    /// `tof_step` controls TOF granularity of CSR binning.
726    pub fn make_rt_frames_for_group(&self, window_group: u32, tof_step: i32) -> RtFrames {
727        assert!(tof_step > 0);
728
729        let (ids, times) = self.fragment_frame_ids_and_times_for_group_core(window_group);
730        assert!(
731            !ids.is_empty(),
732            "No MS2 frames for window_group {}",
733            window_group
734        );
735
736        let global_num_scans = self.meta_data
737            .iter()
738            .filter(|m| ids.binary_search(&(m.id as u32)).is_ok())
739            .map(|m| m.num_scans as usize)
740            .max()
741            .unwrap_or(0);
742
743        // TOF scale for this group
744        let scale = self.tof_scale_for_group(window_group, tof_step);
745
746        let frames: Vec<FrameBinView> = ids.par_iter()
747            .map(|&fid| build_frame_bin_view(self.get_frame(fid), &scale, global_num_scans))
748            .collect();
749
750        RtFrames {
751            frames,
752            frame_ids: ids,
753            rt_times: times,
754            scale: Arc::new(scale),
755        }
756    }
757
758    /// RT-sorted PRECURSOR frames (ms_ms_type == 0) into FrameBinView rows.
759    /// `tof_step` controls TOF granularity of CSR binning.
760    pub fn make_rt_frames_for_precursor(&self, tof_step: i32) -> RtFrames {
761        assert!(tof_step > 0);
762
763        let mut rows: Vec<(u32, f32, usize)> = self.meta_data
764            .iter()
765            .filter(|m| m.ms_ms_type == 0)
766            .map(|m| (m.id as u32, m.time as f32, m.num_scans as usize))
767            .collect();
768
769        assert!(!rows.is_empty(), "No precursor (MS1) frames found");
770        rows.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
771
772        let frame_ids: Vec<u32> = rows.iter().map(|r| r.0).collect();
773        let rt_times:  Vec<f32> = rows.iter().map(|r| r.1).collect();
774        let global_num_scans = rows.iter().map(|r| r.2).max().unwrap_or(1);
775
776        // Build TOF scale from precursor frames
777        let frames_for_scale: Vec<_> = frame_ids.iter().map(|&fid| self.get_frame(fid)).collect();
778        let scale = TofScale::build_from_tof(&frames_for_scale, tof_step)
779            .expect("make_rt_frames_for_precursor: failed to build TOF scale");
780
781        let frames: Vec<FrameBinView> = frame_ids.par_iter()
782            .map(|&fid| build_frame_bin_view(self.get_frame(fid), &scale, global_num_scans))
783            .collect();
784
785        RtFrames {
786            frames,
787            frame_ids,
788            rt_times,
789            scale: Arc::new(scale),
790        }
791    }
792
793    /// Build a TOF-based scale for a DIA group.
794    /// `tof_step = 1` → max TOF resolution; larger steps downsample.
795    pub fn tof_scale_for_group(&self, window_group: u32, tof_step: i32) -> TofScale {
796        assert!(tof_step > 0);
797
798        let (ids, _times) = self.fragment_frame_ids_and_times_for_group_core(window_group);
799        assert!(
800            !ids.is_empty(),
801            "tof_scale_for_group: no MS2 frames for window_group {}",
802            window_group
803        );
804
805        let frames: Vec<_> = ids.iter().map(|&fid| self.get_frame(fid)).collect();
806
807        TofScale::build_from_tof(&frames, tof_step)
808            .expect("tof_scale_for_group: failed to build TOF scale (empty or degenerate)")
809    }
810
811    /// Conservative: derive a TOF scale by scanning actual frame data.
812    pub fn tof_scale_from_frames_scan(
813        &self,
814        frame_ids: &[u32],
815        tof_step: i32,
816    ) -> Option<TofScale> {
817        assert!(tof_step > 0);
818        let frames: Vec<_> = frame_ids.iter().map(|&fid| self.get_frame(fid)).collect();
819        TofScale::build_from_tof(&frames, tof_step)
820    }
821
822    /// Internal helper: go from a set of IM peaks to evaluated 1D clusters
823    /// on a given RtFrames grid (precursor or DIA group).
824    fn clusters_for_im_peaks_on_rt_frames(
825        &self,
826        rt: RtFrames,
827        im_peaks: &[ImPeak1D],
828        rt_params: RtExpandParams,
829        build_opts: &BuildSpecOpts,
830        eval_opts: &Eval1DOpts,
831        require_rt_overlap: bool,
832        num_threads: usize,
833    ) -> Vec<ClusterResult1D> {
834        // 1) Expand IM peaks along RT on this grid
835        let rt_groups = expand_many_im_peaks_along_rt(
836            im_peaks,
837            &rt.frames,
838            rt.ctx(),
839            rt.scale.as_ref(),
840            rt_params,
841        );
842
843        // 2) Build 1D specs (RT × IM × TOF-window)
844        let specs: Vec<ClusterSpec1D> = make_specs_from_im_and_rt_groups_threads(
845            im_peaks,
846            &rt_groups,
847            &rt,
848            build_opts,
849            require_rt_overlap,
850            num_threads,
851        );
852
853        // 3) Evaluate all specs on this RtFrames grid
854        self.evaluate_specs_1d_threads(&rt, &specs, eval_opts, num_threads)
855    }
856
857    /// Evaluate a batch of 1D cluster specs on the given RtFrames.
858    /// - Runs spec evaluation (marginals + moment fits) in parallel.
859    /// - Optionally attaches raw points using the finalized **TOF-bin window**
860    ///   and the spec’s IM/RT windows.
861    /// - `num_threads == 0` => use rayon’s global pool.
862    pub fn evaluate_specs_1d_threads(
863        &self,
864        rt_frames: &RtFrames,
865        specs: &[ClusterSpec1D],
866        opts: &Eval1DOpts,
867        num_threads: usize,
868    ) -> Vec<ClusterResult1D> {
869        if specs.is_empty() {
870            return Vec::new();
871        }
872
873        let scale = &*rt_frames.scale;
874
875        let run = || {
876            // Pre-build raw attach context once if needed
877            let attach_ctx = if opts.attach.attach_points {
878                let frame_ids_local = rt_frames.frame_ids.clone();
879                let slice = self.get_slice(frame_ids_local.clone(), num_threads.max(1));
880                let scan_slices = slice
881                    .frames
882                    .iter()
883                    .map(|fr| build_scan_slices(fr))
884                    .collect::<Vec<_>>();
885                let rt_axis_sec = rt_frames.rt_times.clone();
886
887                Some(RawAttachContext {
888                    slice,
889                    scan_slices,
890                    frame_ids_local,
891                    rt_axis_sec,
892                })
893            } else {
894                None
895            };
896
897            specs
898                .par_iter()
899                .map(|spec| {
900                    // 1) core 1D evaluation (RT/IM/TOF fits etc.)
901                    let mut res = evaluate_spec_1d(rt_frames, spec, opts);
902
903                    // 2) decorate with m/z axis + mz_fit/mz_window
904                    //
905                    //    - uses the already-computed TOF window in `spec.tof_win`
906                    //    - does *not* depend on raw_points being attached
907                    //    - keeps the logic local and cheap
908                    decorate_with_mz_for_cluster(self, rt_frames, &mut res);(self, rt_frames, spec, scale, &mut res);
909
910                    // 3) optional raw point attachment (using shared context)
911                    if let Some(ref ctx) = attach_ctx {
912                        if opts.attach.attach_points && res.raw_sum > 0.0 && res.tof_fit.area > 0.0 {
913                            let (bin_lo, bin_hi) = bin_range_for_win(scale, spec.tof_win);
914                            let (im_lo, im_hi) = (spec.im_lo, spec.im_hi);
915                            let (rt_lo, rt_hi) = (spec.rt_lo, spec.rt_hi);
916
917                            let raw = attach_raw_points_for_spec_1d_in_ctx(
918                                ctx,
919                                scale,
920                                bin_lo,
921                                bin_hi,
922                                im_lo,
923                                im_hi,
924                                rt_lo,
925                                rt_hi,
926                                opts.attach.max_points,
927                            );
928                            res.raw_points = Some(raw);
929                        }
930                    }
931
932                    res
933                })
934                .collect::<Vec<_>>()
935        };
936
937        if num_threads == 0 {
938            run()
939        } else {
940            ThreadPoolBuilder::new()
941                .num_threads(num_threads)
942                .build()
943                .unwrap()
944                .install(run)
945        }
946    }
947
948    pub fn clusters_for_group(
949        &self,
950        window_group: u32,
951        tof_step: i32,
952        im_peaks: &[ImPeak1D],
953        rt_params: RtExpandParams,
954        build_opts: &BuildSpecOpts,
955        eval_opts: &Eval1DOpts,
956        require_rt_overlap: bool,
957        num_threads: usize,
958    ) -> Vec<ClusterResult1D> {
959        // Guard: all IM peaks must belong to this DIA group
960        debug_assert!(
961            im_peaks
962                .iter()
963                .all(|p| p.window_group == Some(window_group)),
964            "clusters_for_group: some IM peaks have wrong or missing window_group"
965        );
966
967        // Build RT+TOF grid for this DIA isolation window group.
968        let rt = self.make_rt_frames_for_group(window_group, tof_step);
969
970        // Force MS level 2 for DIA fragments
971        let build_opts_ms2 = build_opts.with_ms_level(2);
972
973        self.clusters_for_im_peaks_on_rt_frames(
974            rt,
975            im_peaks,
976            rt_params,
977            &build_opts_ms2,
978            eval_opts,
979            require_rt_overlap,
980            num_threads,
981        )
982    }
983
984    pub fn clusters_for_precursor(
985        &self,
986        tof_step: i32,
987        im_peaks: &[ImPeak1D],
988        rt_params: RtExpandParams,
989        build_opts: &BuildSpecOpts,
990        eval_opts: &Eval1DOpts,
991        require_rt_overlap: bool,
992        num_threads: usize,
993    ) -> Vec<ClusterResult1D> {
994        // Guard: all IM peaks must be MS1 (no window_group)
995        debug_assert!(
996            im_peaks.iter().all(|p| p.window_group.is_none()),
997            "clusters_for_precursor: IM peaks unexpectedly carry a window_group"
998        );
999
1000        // RT+TOF grid for precursor (MS1) frames
1001        let rt = self.make_rt_frames_for_precursor(tof_step);
1002
1003        // Force MS level 1 for precursor
1004        let build_opts_ms1 = build_opts.with_ms_level(1);
1005
1006        self.clusters_for_im_peaks_on_rt_frames(
1007            rt,
1008            im_peaks,
1009            rt_params,
1010            &build_opts_ms1,
1011            eval_opts,
1012            require_rt_overlap,
1013            num_threads,
1014        )
1015    }
1016
1017    /// End-to-end DIA → pseudo-DDA builder (geometric scoring).
1018    ///
1019    /// Thin wrapper around `build_pseudo_spectra_end_to_end`.
1020    pub fn build_pseudo_spectra_from_clusters_geom(
1021        &self,
1022        ms1: &[ClusterResult1D],
1023        ms2: &[ClusterResult1D],
1024        features: Option<&[SimpleFeature]>,
1025        cand_opts: &CandidateOpts,
1026        score_opts: &ScoreOpts,
1027        pseudo_opts: &PseudoSpecOpts,
1028    ) -> PseudoBuildResult {
1029        build_pseudo_spectra_end_to_end(
1030            self,
1031            ms1,
1032            ms2,
1033            features,
1034            cand_opts,
1035            score_opts,
1036            pseudo_opts,
1037        )
1038    }
1039
1040    /// End-to-end DIA → pseudo-DDA builder (XIC-based scoring).
1041    ///
1042    /// Thin wrapper around `build_pseudo_spectra_end_to_end_xic`.
1043    pub fn build_pseudo_spectra_from_clusters_xic(
1044        &self,
1045        ms1: &[ClusterResult1D],
1046        ms2: &[ClusterResult1D],
1047        features: Option<&[SimpleFeature]>,
1048        cand_opts: &CandidateOpts,
1049        xic_opts: &XicScoreOpts,
1050        pseudo_opts: &PseudoSpecOpts,
1051    ) -> PseudoBuildResult {
1052        build_pseudo_spectra_end_to_end_xic(
1053            self,
1054            ms1,
1055            ms2,
1056            features,
1057            cand_opts,
1058            xic_opts,
1059            pseudo_opts,
1060        )
1061    }
1062
1063    /// Naive "all pairs" builder stays as-is, still returning Vec<PseudoSpectrum>.
1064    pub fn build_pseudo_spectra_all_pairs_from_clusters(
1065        &self,
1066        ms1: &[ClusterResult1D],
1067        ms2: &[ClusterResult1D],
1068        features: Option<&[SimpleFeature]>,
1069        pseudo_opts: &PseudoSpecOpts,
1070    ) -> PseudoBuildResult {
1071        build_pseudo_spectra_all_pairs(self, ms1, ms2, features, pseudo_opts)
1072    }
1073
1074    // ==========================================================
1075    // Convenience methods using ClusterExtractionOpts (Phase 5)
1076    // ==========================================================
1077
1078    /// Extract clusters for a DIA window group using options struct.
1079    pub fn clusters_for_group_opts(
1080        &self,
1081        window_group: u32,
1082        im_peaks: &[ImPeak1D],
1083        opts: &ClusterExtractionOpts,
1084    ) -> Vec<ClusterResult1D> {
1085        self.clusters_for_group(
1086            window_group,
1087            opts.tof_step,
1088            im_peaks,
1089            opts.rt_params.clone(),
1090            &opts.build_opts,
1091            &opts.eval_opts,
1092            opts.require_rt_overlap,
1093            opts.num_threads,
1094        )
1095    }
1096
1097    /// Extract clusters for precursor (MS1) frames using options struct.
1098    pub fn clusters_for_precursor_opts(
1099        &self,
1100        im_peaks: &[ImPeak1D],
1101        opts: &ClusterExtractionOpts,
1102    ) -> Vec<ClusterResult1D> {
1103        self.clusters_for_precursor(
1104            opts.tof_step,
1105            im_peaks,
1106            opts.rt_params.clone(),
1107            &opts.build_opts,
1108            &opts.eval_opts,
1109            opts.require_rt_overlap,
1110            opts.num_threads,
1111        )
1112    }
1113
1114    /// Dense TOF×RT grid for all PRECURSOR (MS1) frames.
1115    /// `tof_step` controls TOF bin granularity (1 = full resolution).
1116    pub fn tof_rt_grid_precursor(&self, tof_step: i32) -> TofRtGrid {
1117        let rt = self.make_rt_frames_for_precursor(tof_step);
1118        build_tof_rt_grid_full(&rt, None)
1119    }
1120
1121    /// Dense TOF×RT grid for FRAGMENT frames in a DIA window group.
1122    /// `tof_step` controls TOF bin granularity (1 = full resolution).
1123    pub fn tof_rt_grid_for_group(&self, window_group: u32, tof_step: i32) -> TofRtGrid {
1124        let rt = self.make_rt_frames_for_group(window_group, tof_step);
1125        build_tof_rt_grid_full(&rt, Some(window_group))
1126    }
1127
1128    /// Debug helper: for a bunch of clusters that all live on the same
1129    /// DIA grid (either MS1 or one MS2 window_group), extract RawPoints
1130    /// per cluster.
1131    ///
1132    /// - `clusters`: precursor *or* fragment clusters.
1133    ///   All must have the same `ms_level`.
1134    /// - `window_group`: if you pass Some(g), we treat them as MS2 clusters
1135    ///   on that DIA group. If None and ms_level == 2, we try to infer g
1136    ///   from the first cluster’s `window_group`.
1137    /// - `tof_step`: must be the same as used during clustering.
1138    /// - `max_points`: optional thinning cap per cluster.
1139    /// - `num_threads`: used for `get_slice` in the raw extractor.
1140    pub fn debug_extract_raw_for_clusters(
1141        &self,
1142        clusters: &[ClusterResult1D],
1143        window_group: Option<u32>,
1144        tof_step: i32,
1145        max_points: Option<usize>,
1146        num_threads: usize,
1147    ) -> Vec<RawPoints> {
1148        if clusters.is_empty() {
1149            return Vec::new();
1150        }
1151
1152        // ---- 1. Basic sanity: consistent ms_level ---------------------------
1153        let ms_level = clusters[0].ms_level;
1154        debug_assert!(
1155            clusters.iter().all(|c| c.ms_level == ms_level),
1156            "debug_extract_raw_for_clusters: mixed ms_level in cluster list"
1157        );
1158
1159        // Optional filter by WG if caller wants to restrict.
1160        let clusters: Vec<ClusterResult1D> = clusters
1161            .iter()
1162            .filter(|c| match window_group {
1163                Some(g) => c.window_group == Some(g),
1164                None => true,
1165            })
1166            .cloned()
1167            .collect();
1168
1169        if clusters.is_empty() {
1170            return Vec::new();
1171        }
1172
1173        // ---- 2. Rebuild the RtFrames grid used for these clusters ----------
1174        //
1175        // For MS1: all clusters live on the precursor RtFrames grid.
1176        // For MS2: all clusters live on the RtFrames grid of a single window group.
1177        let rt_frames = if ms_level == 1 {
1178            // Precursor grid; we ignore window_group here.
1179            self.make_rt_frames_for_precursor(tof_step)
1180        } else {
1181            // Fragments: we need a valid DIA group.
1182            let g = match window_group {
1183                Some(g) => g,
1184                None => clusters[0]
1185                    .window_group
1186                    .expect("MS2 cluster without window_group; pass window_group explicitly"),
1187            };
1188            self.make_rt_frames_for_group(g, tof_step)
1189        };
1190
1191        // ---- 3. For each cluster, call the existing raw extractor ----------
1192        clusters
1193            .iter()
1194            .map(|c| {
1195                let (rt_lo, rt_hi) = c.rt_window;
1196                let (im_lo, im_hi) = c.im_window;
1197                let (tof_lo, tof_hi) = c.tof_window;
1198
1199                attach_raw_points_for_spec_1d_threads(
1200                    self,
1201                    &rt_frames,
1202                    tof_lo,
1203                    tof_hi,
1204                    im_lo,
1205                    im_hi,
1206                    rt_lo,
1207                    rt_hi,
1208                    max_points,
1209                    num_threads,
1210                )
1211            })
1212            .collect()
1213    }
1214}
1215
1216impl TimsData for TimsDatasetDIA {
1217    fn get_frame(&self, frame_id: u32) -> TimsFrame {
1218        self.loader.get_frame(frame_id)
1219    }
1220
1221    fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
1222        self.loader.get_raw_frame(frame_id)
1223    }
1224
1225    fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
1226        self.loader.get_slice(frame_ids, num_threads)
1227    }
1228    fn get_acquisition_mode(&self) -> AcquisitionMode {
1229        self.loader.get_acquisition_mode().clone()
1230    }
1231
1232    fn get_frame_count(&self) -> i32 {
1233        self.loader.get_frame_count()
1234    }
1235
1236    fn get_data_path(&self) -> &str {
1237        &self.loader.get_data_path()
1238    }
1239}
1240
1241impl IndexConverter for TimsDatasetDIA {
1242    fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
1243        self.loader
1244            .get_index_converter()
1245            .tof_to_mz(frame_id, tof_values)
1246    }
1247
1248    fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
1249        self.loader
1250            .get_index_converter()
1251            .mz_to_tof(frame_id, mz_values)
1252    }
1253
1254    fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
1255        self.loader
1256            .get_index_converter()
1257            .scan_to_inverse_mobility(frame_id, scan_values)
1258    }
1259
1260    fn inverse_mobility_to_scan(
1261        &self,
1262        frame_id: u32,
1263        inverse_mobility_values: &Vec<f64>,
1264    ) -> Vec<u32> {
1265        self.loader
1266            .get_index_converter()
1267            .inverse_mobility_to_scan(frame_id, inverse_mobility_values)
1268    }
1269}
1270
1271pub fn attach_raw_points_for_spec_1d_threads(
1272    ds: &TimsDatasetDIA,
1273    rt_frames: &RtFrames,
1274    final_bin_lo: usize,
1275    final_bin_hi: usize,
1276    final_im_lo: usize,
1277    final_im_hi: usize,
1278    final_rt_lo: usize,
1279    final_rt_hi: usize,
1280    max_points: Option<usize>,
1281    num_threads: usize,
1282) -> RawPoints {
1283    let scale = &*rt_frames.scale;
1284
1285    // Defensive clamp of bin indices to the valid bin range.
1286    let n_bins = scale.num_bins();
1287    if n_bins == 0 {
1288        return RawPoints::default();
1289    }
1290
1291    let mut bin_lo = final_bin_lo.min(n_bins.saturating_sub(1));
1292    let mut bin_hi = final_bin_hi.min(n_bins.saturating_sub(1));
1293    if bin_lo > bin_hi {
1294        std::mem::swap(&mut bin_lo, &mut bin_hi);
1295    }
1296
1297    // Axis window in *TOF-axis units*:
1298    //   [edges[bin_lo], cushion_hi_edge(edges[bin_hi+1])]
1299    let mut axis_lo = scale.edges[bin_lo];
1300    let hi_edge_idx = (bin_hi + 1).min(scale.edges.len().saturating_sub(1));
1301    let mut axis_hi = cushion_hi_edge(scale, scale.edges[hi_edge_idx]);
1302
1303    // RT frames and slice
1304    let frame_ids_local = rt_frames.frame_ids[final_rt_lo..=final_rt_hi].to_vec();
1305    let slice = ds.get_slice(frame_ids_local.clone(), num_threads.max(1));
1306    let scan_slices: Vec<Vec<ScanSlice>> =
1307        slice.frames.iter().map(|fr| build_scan_slices(fr)).collect();
1308
1309    // 1) Count – now using **TOF** as the search axis.
1310    let mut total = 0usize;
1311    for (fi, fr) in slice.frames.iter().enumerate() {
1312        let tofs = &fr.tof;               // raw TOF indices
1313        for sl in &scan_slices[fi] {
1314            if sl.scan < final_im_lo || sl.scan > final_im_hi {
1315                continue;
1316            }
1317            let l = lower_bound_tof(tofs, sl.start, sl.end, axis_lo);
1318            let r = upper_bound_tof(tofs, sl.start, sl.end, axis_hi);
1319            total += r.saturating_sub(l);
1320        }
1321    }
1322
1323    // 2) Last-chance widen: if empty, expand by ±1 bin in TOF space.
1324    if total == 0 {
1325        let lo_idx = bin_lo.saturating_sub(1);
1326        let hi_edge_idx_wide =
1327            (bin_hi + 2).min(n_bins).min(scale.edges.len().saturating_sub(1));
1328
1329        let try_lo = scale.edges[lo_idx];
1330        let try_hi = cushion_hi_edge(scale, scale.edges[hi_edge_idx_wide]);
1331
1332        let mut total2 = 0usize;
1333        for (fi, fr) in slice.frames.iter().enumerate() {
1334            let tofs = &fr.tof;
1335            for sl in &scan_slices[fi] {
1336                if sl.scan < final_im_lo || sl.scan > final_im_hi {
1337                    continue;
1338                }
1339                let l = lower_bound_tof(tofs, sl.start, sl.end, try_lo);
1340                let r = upper_bound_tof(tofs, sl.start, sl.end, try_hi);
1341                total2 += r.saturating_sub(l);
1342            }
1343        }
1344
1345        if total2 > 0 {
1346            total = total2;
1347            axis_lo = try_lo;
1348            axis_hi = try_hi;
1349        }
1350    }
1351
1352    // Still empty → bail with empty container
1353    if total == 0 {
1354        return RawPoints::default();
1355    }
1356
1357    let stride = max_points.map(|cap| thin_stride(total, cap)).unwrap_or(1);
1358    let reserve = total / stride + 8;
1359
1360    let mut pts = RawPoints {
1361        mz: Vec::with_capacity(reserve),
1362        rt: Vec::with_capacity(reserve),
1363        im: Vec::with_capacity(reserve),
1364        scan: Vec::with_capacity(reserve),
1365        intensity: Vec::with_capacity(reserve),
1366        tof: Vec::with_capacity(reserve),
1367        frame: Vec::with_capacity(reserve),
1368    };
1369
1370    let rt_axis_sec = rt_frames.rt_times[final_rt_lo..=final_rt_hi].to_vec();
1371
1372    // 3) Extraction with thinning – again use TOF range for selection
1373    let mut seen = 0usize;
1374    for (fi, fr) in slice.frames.iter().enumerate() {
1375        let mz   = &fr.ims_frame.mz;
1376        let it   = &fr.ims_frame.intensity;
1377        let ims  = &fr.ims_frame.mobility;
1378        let tofs = &fr.tof;
1379
1380        let len_all = mz.len().min(it.len()).min(ims.len()).min(tofs.len());
1381        let rt_val = rt_axis_sec[fi];
1382        let frame_id = frame_ids_local[fi];
1383
1384        for sl in &scan_slices[fi] {
1385            let s_abs = sl.scan;
1386            if s_abs < final_im_lo || s_abs > final_im_hi {
1387                continue;
1388            }
1389
1390            let mut l = lower_bound_tof(tofs, sl.start, sl.end, axis_lo);
1391            let mut r = upper_bound_tof(tofs, sl.start, sl.end, axis_hi);
1392            if r > len_all {
1393                r = len_all;
1394            }
1395            if l >= r {
1396                continue;
1397            }
1398
1399            while l < r {
1400                if stride == 1 || (seen % stride == 0) {
1401                    // Note: we still *store* m/z, but the selection is done in TOF.
1402                    pts.mz.push(mz[l] as f32);
1403                    pts.rt.push(rt_val);
1404                    pts.im.push(ims[l] as f32);
1405                    pts.scan.push(s_abs as u32);
1406                    pts.intensity.push(it[l] as f32);
1407                    pts.frame.push(frame_id);
1408                    pts.tof.push(tofs[l]);
1409                }
1410                seen += 1;
1411                l += 1;
1412            }
1413        }
1414    }
1415
1416    pts
1417}
1418
1419/// Binary search in TOF array [start, end) for lower bound.
1420#[inline]
1421fn lower_bound_tof(tofs: &[i32], start: usize, end: usize, x: f32) -> usize {
1422    let mut lo = start;
1423    let mut hi = end;
1424    let xf = x as f64;
1425    while lo < hi {
1426        let mid = (lo + hi) >> 1;
1427        if (tofs[mid] as f64) < xf {
1428            lo = mid + 1;
1429        } else {
1430            hi = mid;
1431        }
1432    }
1433    lo
1434}
1435
1436/// Binary search in TOF array [start, end) for upper bound.
1437#[inline]
1438fn upper_bound_tof(tofs: &[i32], start: usize, end: usize, x: f32) -> usize {
1439    let mut lo = start;
1440    let mut hi = end;
1441    let xf = x as f64;
1442    while lo < hi {
1443        let mid = (lo + hi) >> 1;
1444        if (tofs[mid] as f64) <= xf {
1445            lo = mid + 1;
1446        } else {
1447            hi = mid;
1448        }
1449    }
1450    lo
1451}
1452
1453// ----------------------------------------------------------------------
1454// Helpers for axis-based extraction (no ppm, no mz_min/mz_max needed)
1455// ----------------------------------------------------------------------
1456
1457/// Slightly "cushion" the high edge used for upper_bound search.
1458///
1459/// Semantics:
1460/// - `hi_edge` is typically `scale.edges[bin_hi + 1]` (the upper edge of the last bin).
1461/// - We nudge it outward by a tiny fraction of a *typical* bin width so values
1462///   that sit exactly on the edge aren't accidentally excluded.
1463/// - This is axis-generic: it works whether the axis is TOF or m/z-like.
1464#[inline]
1465fn cushion_hi_edge(scale: &TofScale, hi_edge: f32) -> f32 {
1466    let edges = &scale.edges;
1467    if edges.len() >= 2 {
1468        // Use a small fraction of the first bin width as epsilon.
1469        let bw = (edges[1] - edges[0]).abs().max(1e-6);
1470        hi_edge + 0.01 * bw
1471    } else {
1472        hi_edge
1473    }
1474}
1475
1476#[inline]
1477fn thin_stride(total: usize, cap: usize) -> usize {
1478    if cap == 0 || total <= cap {
1479        1
1480    } else {
1481        (total + cap - 1) / cap
1482    }
1483}