Skip to main content

rustdf/data/
dia.rs

1use std::sync::Arc;
2use crate::data::acquisition::AcquisitionMode;
3use rustc_hash::FxHashMap;
4use crate::data::handle::{IndexConverter, TimsData, TimsDataLoader};
5use crate::data::meta::{
6    read_dia_ms_ms_info, read_dia_ms_ms_windows, read_global_meta_sql, read_meta_data_sql,
7    DiaMsMisInfo, DiaMsMsWindow, FrameMeta, GlobalMetaData,
8};
9use mscore::data::spectrum::MsType;
10use mscore::timstof::frame::{RawTimsFrame, TimsFrame};
11use mscore::timstof::slice::TimsSlice;
12use rand::prelude::IteratorRandom;
13use rayon::iter::IntoParallelRefIterator;
14use crate::cluster::peak::{build_frame_bin_view, build_tof_rt_grid_full, expand_many_im_peaks_along_rt, FrameBinView, ImPeak1D, RtExpandParams, RtFrames, TofRtGrid};
15use crate::cluster::utility::{TofScale};
16use crate::data::utility::merge_ranges;
17use rayon::prelude::*;
18use std::collections::{HashMap};
19use rayon::ThreadPoolBuilder;
20use crate::cluster::candidates::{build_pseudo_spectra_all_pairs, build_pseudo_spectra_end_to_end, build_pseudo_spectra_end_to_end_xic, PseudoBuildResult, ScoreOpts};
21use crate::cluster::cluster::{attach_raw_points_for_spec_1d_in_ctx, bin_range_for_win, build_scan_slices, decorate_with_mz_for_cluster, evaluate_spec_1d, make_specs_from_im_and_rt_groups_threads, BuildSpecOpts, ClusterResult1D, ClusterSpec1D, Eval1DOpts, RawAttachContext, RawPoints, ScanSlice};
22use crate::cluster::feature::SimpleFeature;
23use crate::cluster::pseudo::{PseudoSpecOpts};
24use crate::cluster::candidates::CandidateOpts;
25use crate::cluster::scoring::XicScoreOpts;
26
27// ==========================================================
28// Options structs for reducing parameter explosion (Phase 5)
29// ==========================================================
30
31/// Options for cluster extraction from IM peaks.
32#[derive(Clone, Debug)]
33pub struct ClusterExtractionOpts {
34    /// TOF step for grid binning (typically 1, 2, 4, 8).
35    pub tof_step: i32,
36    /// RT expansion parameters.
37    pub rt_params: RtExpandParams,
38    /// Spec building options (window padding, ms_level).
39    pub build_opts: BuildSpecOpts,
40    /// Evaluation options (refine, attach axes/traces).
41    pub eval_opts: Eval1DOpts,
42    /// Whether to require RT overlap for spec generation.
43    pub require_rt_overlap: bool,
44    /// Number of threads for parallel processing.
45    pub num_threads: usize,
46}
47
48impl Default for ClusterExtractionOpts {
49    fn default() -> Self {
50        Self {
51            tof_step: 8,
52            rt_params: RtExpandParams::default(),
53            build_opts: BuildSpecOpts::ms1_defaults(),
54            eval_opts: Eval1DOpts::default(),
55            require_rt_overlap: true,
56            num_threads: 4,
57        }
58    }
59}
60
61impl ClusterExtractionOpts {
62    /// Create options for MS1 (precursor) extraction.
63    pub fn for_ms1() -> Self {
64        Self {
65            build_opts: BuildSpecOpts::ms1_defaults(),
66            ..Self::default()
67        }
68    }
69
70    /// Create options for MS2 (fragment) extraction.
71    pub fn for_ms2() -> Self {
72        Self {
73            build_opts: BuildSpecOpts::ms2_defaults(),
74            ..Self::default()
75        }
76    }
77
78    /// Set the TOF step.
79    pub fn with_tof_step(mut self, step: i32) -> Self {
80        self.tof_step = step;
81        self
82    }
83
84    /// Set the number of threads.
85    pub fn with_threads(mut self, n: usize) -> Self {
86        self.num_threads = n;
87        self
88    }
89}
90
91/// Options for building pseudo spectra from clusters.
92#[derive(Clone, Debug)]
93pub struct PseudoSpectrumBuildOpts<'a> {
94    /// Candidate enumeration options.
95    pub cand_opts: &'a CandidateOpts,
96    /// Geometric scoring options.
97    pub score_opts: &'a ScoreOpts,
98    /// Pseudo spectrum construction options.
99    pub pseudo_opts: &'a PseudoSpecOpts,
100    /// Optional features for grouping.
101    pub features: Option<&'a [SimpleFeature]>,
102}
103
104#[derive(Clone, Debug)]
105pub struct ProgramSlice {
106    pub mz_lo: f64,
107    pub mz_hi: f64,
108    pub scan_lo: u32,
109    pub scan_hi: u32,
110}
111
112#[inline]
113fn ranges_overlap_u32(a: (u32, u32), b: (u32, u32)) -> bool {
114    let lo = a.0.max(b.0);
115    let hi = a.1.min(b.1);
116    hi >= lo
117}
118
119#[derive(Debug, Clone)]
120pub struct Ms2GroupProgram {
121    /// Raw per-row isolation windows (normalized, ascending, finite)
122    pub mz_windows: Vec<(f32, f32)>,
123    /// Raw per-row active scan ranges (normalized, inclusive)
124    pub scan_ranges: Vec<(u32, u32)>,
125    /// Union (convex hull) over all mz_windows for the group; None if no valid rows
126    pub mz_union: Option<(f32, f32)>,
127    /// Merged disjoint scan ranges (inclusive) over the group; empty = permissive fallback
128    pub scan_unions: Vec<(u32, u32)>,
129}
130
131#[derive(Debug, Clone)]
132pub struct DiaIndex {
133    /// MS2 frame_id -> window_group
134    pub frame_to_group: HashMap<u32, u32>,
135    /// window_group -> MS2 frame_ids (sorted by **time**)
136    pub group_to_frames: HashMap<u32, Vec<u32>>,
137    /// window_group -> list of (mz_lo, mz_hi) isolation ranges (normalized)
138    pub group_to_isolation: HashMap<u32, Vec<(f64, f64)>>,
139    /// window_group -> list of (scan_lo, scan_hi) active scan ranges (normalized, inclusive)
140    pub group_to_scan_ranges: HashMap<u32, Vec<(u32, u32)>>,
141    /// window_group -> convex m/z union (lo, hi)
142    pub group_to_mz_union: HashMap<u32, (f64, f64)>,
143    /// window_group -> merged disjoint scan unions (inclusive)
144    pub group_to_scan_unions: HashMap<u32, Vec<(u32, u32)>>,
145    /// frame_id -> time (seconds)
146    pub frame_time: HashMap<u32, f64>,
147    /// New: pre-materialized program slices per group.
148    pub group_to_slices: HashMap<u32, Arc<[ProgramSlice]>>,
149}
150
151impl DiaIndex {
152    pub fn new(meta: &[FrameMeta], info: &[DiaMsMisInfo], wins: &[DiaMsMsWindow]) -> Self {
153        // helpers
154        #[inline]
155        fn norm_f64_pair(a: f64, b: f64) -> Option<(f64, f64)> {
156            if !a.is_finite() || !b.is_finite() { return None; }
157            let (lo, hi) = if a <= b { (a, b) } else { (b, a) };
158            if hi > lo { Some((lo, hi)) } else { None }
159        }
160        #[inline]
161        fn norm_u32_pair(a: u32, b: u32) -> Option<(u32, u32)> {
162            let (lo, hi) = if a <= b { (a, b) } else { (b, a) };
163            Some((lo, hi))
164        }
165        fn merge_scan_ranges(mut ranges: Vec<(u32, u32)>) -> Vec<(u32, u32)> {
166            if ranges.is_empty() { return ranges; }
167            ranges.sort_unstable_by_key(|&(l, r)| (l, r));
168            let mut out: Vec<(u32,u32)> = Vec::with_capacity(ranges.len());
169            let mut cur = ranges[0];
170            for &(l, r) in &ranges[1..] {
171                if l <= cur.1.saturating_add(1) {
172                    if r > cur.1 { cur.1 = r; }
173                } else {
174                    out.push(cur);
175                    cur = (l, r);
176                }
177            }
178            out.push(cur);
179            out
180        }
181
182        // frame_id -> time
183        let mut frame_time: HashMap<u32, f64> = HashMap::new();
184        for m in meta {
185            frame_time.insert(m.id as u32, m.time);
186        }
187
188        // frame_id -> group; group -> frames
189        let mut frame_to_group: HashMap<u32, u32> = HashMap::new();
190        let mut group_to_frames: HashMap<u32, Vec<u32>> = HashMap::new();
191        for r in info {
192            let fid = r.frame_id;
193            frame_to_group.insert(fid, r.window_group);
194            group_to_frames.entry(r.window_group).or_default().push(fid);
195        }
196
197        // raw program rows
198        let mut group_to_isolation: HashMap<u32, Vec<(f64, f64)>> = HashMap::new();
199        let mut group_to_scan_ranges: HashMap<u32, Vec<(u32, u32)>> = HashMap::new();
200        for w in wins {
201            let half = 0.5 * w.isolation_width;
202            if half.is_finite() && half > 0.0 && w.isolation_mz.is_finite() {
203                let lo = w.isolation_mz - half;
204                let hi = w.isolation_mz + half;
205                if let Some(p) = norm_f64_pair(lo, hi) {
206                    group_to_isolation.entry(w.window_group).or_default().push(p);
207                }
208            }
209            if let Some(p) = norm_u32_pair(w.scan_num_begin, w.scan_num_end) {
210                group_to_scan_ranges.entry(w.window_group).or_default().push(p);
211            }
212        }
213
214        // unions + sort frames by time
215        let mut group_to_mz_union: HashMap<u32, (f64, f64)> = HashMap::new();
216        let mut group_to_scan_unions: HashMap<u32, Vec<(u32, u32)>> = HashMap::new();
217
218        for (g, frames) in group_to_frames.iter_mut() {
219            frames.sort_unstable_by(|&a, &b| {
220                let ta = frame_time.get(&a).copied().unwrap_or(f64::NAN);
221                let tb = frame_time.get(&b).copied().unwrap_or(f64::NAN);
222                ta.partial_cmp(&tb).unwrap_or(std::cmp::Ordering::Equal)
223            });
224
225            if let Some(v) = group_to_isolation.get(g) {
226                if !v.is_empty() {
227                    let lo = v.iter().map(|p| p.0).fold(f64::INFINITY, f64::min);
228                    let hi = v.iter().map(|p| p.1).fold(f64::NEG_INFINITY, f64::max);
229                    if lo.is_finite() && hi.is_finite() && hi > lo {
230                        group_to_mz_union.insert(*g, (lo, hi));
231                    }
232                }
233            }
234            let merged = merge_scan_ranges(group_to_scan_ranges.get(g).cloned().unwrap_or_default());
235            group_to_scan_unions.insert(*g, merged);
236        }
237
238        let mut group_to_slices: HashMap<u32, Arc<[ProgramSlice]>> = HashMap::new();
239
240        for (&g, iso_rows) in &group_to_isolation {
241            let scan_rows = group_to_scan_ranges.get(&g).cloned().unwrap_or_default();
242            let n = iso_rows.len().min(scan_rows.len());
243            let mut v = Vec::with_capacity(n);
244            for k in 0..n {
245                let (mz_lo, mz_hi)     = iso_rows[k];
246                let (scan_lo, scan_hi) = scan_rows[k];
247                v.push(ProgramSlice { mz_lo, mz_hi, scan_lo, scan_hi });
248            }
249            group_to_slices.insert(g, v.into());
250        }
251
252        DiaIndex {
253            frame_to_group,
254            group_to_frames,
255            group_to_isolation,
256            group_to_scan_ranges,
257            group_to_mz_union,
258            group_to_scan_unions,
259            frame_time,
260            group_to_slices,
261        }
262    }
263
264    pub fn program_slices_for_group(&self, g: u32) -> Vec<ProgramSlice> {
265        self.slices_for_group(g).to_vec()
266    }
267
268    /// Tiles in group `g` that could have selected this precursor.
269    ///
270    /// Conditions:
271    ///   1) Precursor IM window overlaps tile scan band.
272    ///   2) Precursor m/z lies inside tile's isolation m/z window.
273    /// Tiles in group `g` that could have selected this precursor.
274    ///
275    /// Conditions:
276    ///   1) Precursor **apex** IM scan lies inside tile scan band.
277    ///   2) Precursor m/z lies inside tile's isolation m/z window.
278    pub fn tiles_for_precursor_in_group(
279        &self,
280        g: u32,
281        prec_mz: f32,
282        im_apex: f32,
283    ) -> Vec<usize> {
284        let slices = self.slices_for_group(g);
285        let mut hits = Vec::new();
286
287        if !prec_mz.is_finite() || !im_apex.is_finite() {
288            return hits;
289        }
290
291        for (idx, s) in slices.iter().enumerate() {
292            // Precursor m/z inside isolation window
293            if prec_mz < s.mz_lo as f32 || prec_mz > s.mz_hi as f32 {
294                continue;
295            }
296
297            // Apex inside scan band (no margin for now, same semantics as your debug plot)
298            let scan_lo = s.scan_lo as f32;
299            let scan_hi = s.scan_hi as f32;
300            if im_apex < scan_lo || im_apex > scan_hi {
301                continue;
302            }
303
304            hits.push(idx);
305        }
306
307        hits
308    }
309
310    /// Tiles in group `g` that an MS2 fragment cluster could belong to.
311    ///
312    /// IMPORTANT:
313    ///   Fragment m/z is *not* constrained by isolation window (selection
314    ///   happened before fragmentation). We only check the IM/scan range.
315    pub fn tiles_for_fragment_in_group(
316        &self,
317        g: u32,
318        im_window: (usize, usize),
319    ) -> Vec<usize> {
320        let slices = self.program_slices_for_group(g);
321        let mut hits = Vec::new();
322
323        for (idx, s) in slices.iter().enumerate() {
324            let tile_scans = (s.scan_lo, s.scan_hi);
325
326            if ranges_overlap_u32(
327                (im_window.0 as u32, im_window.1 as u32),
328                tile_scans,
329            ) {
330                hits.push(idx);
331            }
332        }
333
334        hits
335    }
336
337    /// Convenience: materialize a program description for a group.
338    pub fn program_for_group(&self, g: u32) -> Ms2GroupProgram {
339        let mz_windows = self.group_to_isolation
340            .get(&g)
341            .map(|v| v.iter().map(|&(a,b)| (a as f32, b as f32)).collect())
342            .unwrap_or_else(Vec::new);
343
344        let scan_ranges = self.group_to_scan_ranges
345            .get(&g)
346            .cloned()
347            .unwrap_or_else(Vec::new);
348
349        let mz_union = self.group_to_mz_union
350            .get(&g)
351            .map(|&(a,b)| (a as f32, b as f32));
352
353        let scan_unions = self.group_to_scan_unions
354            .get(&g)
355            .cloned()
356            .unwrap_or_else(Vec::new);
357
358        Ms2GroupProgram { mz_windows, scan_ranges, mz_union, scan_unions }
359    }
360
361    pub fn groups_for_precursor(
362        &self,
363        prec_mz: f32,
364        im_apex: f32,
365    ) -> Vec<u32> {
366        if !prec_mz.is_finite() || !im_apex.is_finite() {
367            return Vec::new();
368        }
369
370        let mut out = Vec::new();
371        for (&g, &(mz_lo, mz_hi)) in &self.group_to_mz_union {
372            if prec_mz < mz_lo as f32 || prec_mz > mz_hi as f32 {
373                continue;
374            }
375
376            // optionally also check scan_unions
377            if let Some(unions) = self.group_to_scan_unions.get(&g) {
378
379                if !unions.iter().any(|&(lo, hi)| {
380                    (im_apex as u32) >= lo && (im_apex as u32) <= hi
381                }) {
382                    continue;
383                }
384            }
385
386            let tiles = self.tiles_for_precursor_in_group(g, prec_mz, im_apex);
387            if !tiles.is_empty() {
388                out.push(g);
389            }
390        }
391        out
392    }
393
394    /// Return the m/z isolation bounds (lo, hi) for a given tile index in a group.
395    ///
396    /// If the tile index is out of range for this group, returns (NaN, NaN).
397    /// This is used by the fragment query to decide whether a fragment's m/z
398    /// lies **inside** the precursor-selection band of the shared tile.
399    pub fn tile_mz_bounds(&self, g: u32, tile_idx: usize) -> (f32, f32) {
400        let slices = self.slices_for_group(g);
401        if tile_idx >= slices.len() {
402            return (f32::NAN, f32::NAN);
403        }
404        let s = &slices[tile_idx];
405        (s.mz_lo as f32, s.mz_hi as f32)
406    }
407
408    #[inline]
409    pub fn mz_bounds_for_window_group_core(&self, g: u32) -> Option<(f32, f32)> {
410        self.group_to_mz_union.get(&g).map(|&(a,b)| (a as f32, b as f32))
411    }
412    #[inline]
413    pub fn scan_unions_for_window_group_core(&self, g: u32) -> Option<Vec<(usize, usize)>> {
414        self.group_to_scan_unions.get(&g).map(|v| v.iter().map(|&(l,r)| (l as usize, r as usize)).collect())
415    }
416
417    fn slices_for_group(&self, g: u32) -> &[ProgramSlice] {
418        self.group_to_slices.get(&g).map(|a| &a[..]).unwrap_or(&[])
419    }
420
421}
422
423pub struct TimsDatasetDIA {
424    pub loader: TimsDataLoader,
425    pub global_meta_data: GlobalMetaData,
426    pub meta_data: Vec<FrameMeta>,
427    pub dia_ms_ms_info: Vec<DiaMsMisInfo>,
428    pub dia_ms_ms_windows: Vec<DiaMsMsWindow>,
429    pub dia_index: DiaIndex,
430}
431
432impl TimsDatasetDIA {
433    pub fn new(
434        bruker_lib_path: &str,
435        data_path: &str,
436        in_memory: bool,
437        use_bruker_sdk: bool,
438    ) -> Self {
439        // TODO: error handling
440        let global_meta_data = read_global_meta_sql(data_path).unwrap();
441        let meta_data = read_meta_data_sql(data_path).unwrap();
442        let dia_ms_mis_info = read_dia_ms_ms_info(data_path).unwrap();
443        let dia_ms_ms_windows = read_dia_ms_ms_windows(data_path).unwrap();
444
445        let scan_max_index = meta_data.iter().map(|x| x.num_scans).max().unwrap() as u32;
446        let im_lower = global_meta_data.one_over_k0_range_lower;
447        let im_upper = global_meta_data.one_over_k0_range_upper;
448
449        let tof_max_index = global_meta_data.tof_max_index;
450        let mz_lower = global_meta_data.mz_acquisition_range_lower;
451        let mz_upper = global_meta_data.mz_acquisition_range_upper;
452
453        let loader = match in_memory {
454            true => TimsDataLoader::new_in_memory(
455                bruker_lib_path,
456                data_path,
457                use_bruker_sdk,
458                scan_max_index,
459                im_lower,
460                im_upper,
461                tof_max_index,
462                mz_lower,
463                mz_upper,
464            ),
465            false => TimsDataLoader::new_lazy(
466                bruker_lib_path,
467                data_path,
468                use_bruker_sdk,
469                scan_max_index,
470                im_lower,
471                im_upper,
472                tof_max_index,
473                mz_lower,
474                mz_upper,
475            ),
476        };
477
478        let dia_index = DiaIndex::new(&meta_data, &dia_ms_mis_info, &dia_ms_ms_windows);
479
480        TimsDatasetDIA {
481            loader,
482            global_meta_data,
483            meta_data,
484            dia_ms_ms_info: dia_ms_mis_info,
485            dia_ms_ms_windows,
486            dia_index,
487        }
488    }
489
490    /// Create a DIA dataset with regression-derived m/z calibration.
491    ///
492    /// This method uses externally-provided m/z calibration coefficients (e.g., from
493    /// linear regression on SDK data) instead of the simple boundary model.
494    pub fn new_with_mz_calibration(
495        data_path: &str,
496        in_memory: bool,
497        tof_intercept: f64,
498        tof_slope: f64,
499    ) -> Self {
500        let meta_data = read_meta_data_sql(data_path).unwrap();
501        let global_meta_data = read_global_meta_sql(data_path).unwrap();
502        let dia_ms_mis_info = read_dia_ms_ms_info(data_path).unwrap();
503        let dia_ms_ms_windows = read_dia_ms_ms_windows(data_path).unwrap();
504
505        let scan_max_index = meta_data.iter().map(|x| x.num_scans).max().unwrap() as u32;
506        let im_lower = global_meta_data.one_over_k0_range_lower;
507        let im_upper = global_meta_data.one_over_k0_range_upper;
508
509        let loader = match in_memory {
510            true => TimsDataLoader::new_in_memory_with_mz_calibration(
511                data_path,
512                tof_intercept,
513                tof_slope,
514                im_lower,
515                im_upper,
516                scan_max_index,
517            ),
518            false => TimsDataLoader::new_lazy_with_mz_calibration(
519                data_path,
520                tof_intercept,
521                tof_slope,
522                im_lower,
523                im_upper,
524                scan_max_index,
525            ),
526        };
527
528        let dia_index = DiaIndex::new(&meta_data, &dia_ms_mis_info, &dia_ms_ms_windows);
529
530        TimsDatasetDIA {
531            loader,
532            global_meta_data,
533            meta_data,
534            dia_ms_ms_info: dia_ms_mis_info,
535            dia_ms_ms_windows,
536            dia_index,
537        }
538    }
539
540    pub fn program_for_group(&self, g: u32) -> Ms2GroupProgram {
541        self.dia_index.program_for_group(g)
542    }
543
544    /// Collect all program slices for a window group from DiaFrameMsMsWindows.
545    /// Isolation window: [isolation_mz - width/2, isolation_mz + width/2]
546    pub fn program_slices_for_group(&self, group: u32) -> Vec<ProgramSlice> {
547        self.dia_ms_ms_windows
548            .iter()
549            .filter(|w| w.window_group == group)
550            .map(|w| {
551                let half = 0.5 * w.isolation_width;
552                ProgramSlice {
553                    mz_lo: w.isolation_mz - half,
554                    mz_hi: w.isolation_mz + half,
555                    scan_lo: w.scan_num_begin,
556                    scan_hi: w.scan_num_end,
557                }
558            })
559            .collect()
560    }
561
562    /// Sample MS1 noise from `num_frames` random precursor frames in the
563    /// reference dataset, sum them into one combined TimsFrame, and stamp
564    /// the result's `ms_type` as `MsType::Precursor` so downstream
565    /// `simulated + noise` adds don't collapse the simulated frame's type
566    /// to `MsType::Unknown`.
567    ///
568    /// Empty-fallback handling
569    /// -----------------------
570    /// `loader.get_frame()` returns `TimsFrame::default()` (which is
571    /// `frame_id=0, ms_type=MsType::Unknown`) when the requested raw
572    /// frame has no scan data. If one of those Default-shaped frames is
573    /// the first element fed into the inner accumulator, every subsequent
574    /// `sampled_frame + frame` retains `frame_id=0, ms_type=Unknown` via
575    /// `TimsFrame::Add`'s correct "LHS frame_id wins; mismatched ms_type
576    /// → Unknown" semantics. That corrupted noise then leaks through the
577    /// outer simulator add. The fix here:
578    ///   1. Skip empty / Default-shaped candidates before they enter the
579    ///      accumulator (`scan.is_empty()` or `ms_type == Unknown`),
580    ///   2. Use a bounded retry pool so we don't hang on degenerate
581    ///      references (e.g. a near-empty blank gradient),
582    ///   3. Stamp the returned frame's `ms_type` explicitly — purely
583    ///      defensive, documents the post-condition.
584    pub fn sample_precursor_signal(
585        &self,
586        num_frames: usize,
587        max_intensity: f64,
588        take_probability: f64,
589    ) -> TimsFrame {
590        let target_type = MsType::Precursor;
591        let pool: Vec<&FrameMeta> = self
592            .meta_data
593            .iter()
594            .filter(|x| x.ms_ms_type == 0)
595            .collect();
596        let mut rng = rand::thread_rng();
597        let mut sampled_frames: Vec<TimsFrame> =
598            self.collect_typed_noise_samples(&pool, num_frames, max_intensity,
599                                             take_probability, &mut rng,
600                                             |meta| meta.id as u32);
601        Self::accumulate_or_typed_empty(&mut sampled_frames, target_type)
602    }
603
604    /// Sample MS2 noise restricted to a window_group from the reference
605    /// dataset. See `sample_precursor_signal` for the leak diagnosis and
606    /// the design notes; the only difference here is the candidate pool
607    /// (DIA-MS²-info filtered by window_group) and the type stamp
608    /// (`MsType::FragmentDia`).
609    pub fn sample_fragment_signal(
610        &self,
611        num_frames: usize,
612        window_group: u32,
613        max_intensity: f64,
614        take_probability: f64,
615    ) -> TimsFrame {
616        let target_type = MsType::FragmentDia;
617        let pool: Vec<u32> = self
618            .dia_ms_ms_info
619            .iter()
620            .filter(|x| x.window_group == window_group)
621            .map(|x| x.frame_id)
622            .collect();
623        let mut rng = rand::thread_rng();
624        let mut sampled_frames: Vec<TimsFrame> =
625            self.collect_typed_noise_samples(&pool, num_frames, max_intensity,
626                                             take_probability, &mut rng,
627                                             |&fid| fid);
628        Self::accumulate_or_typed_empty(&mut sampled_frames, target_type)
629    }
630
631    /// Internal: pull up to `num_frames` non-empty TimsFrames from `pool`
632    /// via the loader, with bounded retry to skip empty raws.
633    /// `key_fn` lifts a pool element to a frame_id.
634    fn collect_typed_noise_samples<T>(
635        &self,
636        pool: &[T],
637        num_frames: usize,
638        max_intensity: f64,
639        take_probability: f64,
640        rng: &mut impl rand::Rng,
641        key_fn: impl Fn(&T) -> u32,
642    ) -> Vec<TimsFrame> {
643        let mut out: Vec<TimsFrame> = Vec::with_capacity(num_frames);
644        if pool.is_empty() || num_frames == 0 {
645            return out;
646        }
647        let max_attempts = (num_frames * 8).max(16);
648        let mut attempts = 0usize;
649        use rand::seq::SliceRandom;
650        while out.len() < num_frames && attempts < max_attempts {
651            attempts += 1;
652            let Some(candidate_meta) = pool.choose(rng) else { break };
653            let candidate = self
654                .loader
655                .get_frame(key_fn(candidate_meta))
656                .filter_ranged(0.0, 2000.0, 0, 1000, 0.0, 5.0, 1.0,
657                               max_intensity, 0, i32::MAX)
658                .generate_random_sample(take_probability);
659            // Skip the loader's empty-frame Default-fallback so the
660            // returned noise never carries MsType::Unknown into the
661            // outer simulator add.
662            if candidate.scan.is_empty() || candidate.ms_type == MsType::Unknown {
663                continue;
664            }
665            out.push(candidate);
666        }
667        out
668    }
669
670    /// Internal: fold a Vec of typed noise samples into one via Add, and
671    /// return a *typed* empty TimsFrame if the input was empty so callers
672    /// don't get Default::default()'s Unknown back.
673    fn accumulate_or_typed_empty(
674        samples: &mut Vec<TimsFrame>,
675        target_type: MsType,
676    ) -> TimsFrame {
677        if samples.is_empty() {
678            return TimsFrame::new(
679                0, target_type, 0.0,
680                vec![], vec![], vec![], vec![], vec![],
681            );
682        }
683        let mut acc = samples.remove(0);
684        for f in samples.drain(..) {
685            acc = acc + f;
686        }
687        // Belt-and-braces: if the inner Add chain somehow collapsed to
688        // Unknown despite the input filter, restore the type the caller
689        // explicitly asked for.
690        acc.ms_type = target_type;
691        acc
692    }
693
694    /// All DIA window_group IDs present in the file (sorted unique).
695    pub fn dia_window_groups(&self) -> Vec<u32> {
696        let mut gs: Vec<u32> = self
697            .dia_ms_ms_info
698            .iter()
699            .map(|x| x.window_group)
700            .collect();
701        gs.sort_unstable();
702        gs.dedup();
703        gs
704    }
705
706    pub fn window_groups_for_precursor(&self, prec_mz: f32, im_apex: f32) -> Vec<u32> {
707        self.dia_index.groups_for_precursor(prec_mz, im_apex)
708    }
709
710    /// Map frame_id -> (time, ms_ms_type) for quick lookups.
711    fn frame_time_map(&self) -> FxHashMap<u32, (f32, i64)> {
712        let mut m = FxHashMap::default();
713        for fm in &self.meta_data {
714            m.insert(fm.id as u32, (fm.time as f32, fm.ms_ms_type));
715        }
716        m
717    }
718
719    /// RT-sorted **fragment** frames for a given DIA group.
720    pub fn fragment_frame_ids_and_times_for_group_core(&self, window_group: u32) -> (Vec<u32>, Vec<f32>) {
721        let time_map = self.frame_time_map();
722        let mut rows: Vec<(u32, f32)> = self
723            .dia_ms_ms_info
724            .iter()
725            .filter(|x| x.window_group == window_group)
726            .filter_map(|x| {
727                time_map.get(&(x.frame_id)).map(|(t, _ms2)| (x.frame_id, *t))
728            })
729            .collect();
730
731        // Just in case: keep only MS2 frames (ms_ms_type != 0)
732        rows.retain(|(fid, _)| time_map.get(fid).map(|(_, ty)| *ty != 0).unwrap_or(false));
733
734        rows.sort_by(|a,b| a.1.partial_cmp(&b.1).unwrap());
735        let (ids, times): (Vec<_>, Vec<_>) = rows.into_iter().unzip();
736        (ids, times)
737    }
738
739    /// Merged, sorted **global scan** unions for this DIA group, from DiaFrameMsMsWindows.
740    /// Returns None if there are no window rows for this group.
741    pub fn scan_unions_for_window_group_core(&self, window_group: u32) -> Option<Vec<(usize, usize)>> {
742        let ranges: Vec<(usize, usize)> = self
743            .dia_ms_ms_windows
744            .iter()
745            .filter(|w| w.window_group == window_group)
746            .map(|w| {
747                let l = w.scan_num_begin as usize;
748                let r = w.scan_num_end as usize;
749                if l <= r { (l, r) } else { (r, l) }
750            })
751            .collect();
752        if ranges.is_empty() {
753            return None;
754        }
755        Some(merge_ranges(ranges))
756    }
757
758    /// m/z unions (min..max) for the group from DiaFrameMsMsWindows (wide clamp).
759    pub fn mz_bounds_for_window_group_core(&self, window_group: u32) -> Option<(f32, f32)> {
760        let mut lo = f32::INFINITY;
761        let mut hi = f32::NEG_INFINITY;
762        let mut hit = false;
763        for w in &self.dia_ms_ms_windows {
764            if w.window_group == window_group {
765                let c = w.isolation_mz as f32;
766                let half = 0.5f32 * (w.isolation_width as f32);
767                lo = lo.min(c - half);
768                hi = hi.max(c + half);
769                hit = true;
770            }
771        }
772        if hit && hi > lo && lo.is_finite() && hi.is_finite() {
773            Some((lo, hi))
774        } else {
775            None
776        }
777    }
778
779    /// RT-sorted FRAGMENT frames + times for a DIA group, converted into FrameBinView rows.
780    /// `tof_step` controls TOF granularity of CSR binning.
781    pub fn make_rt_frames_for_group(&self, window_group: u32, tof_step: i32) -> RtFrames {
782        assert!(tof_step > 0);
783
784        let (ids, times) = self.fragment_frame_ids_and_times_for_group_core(window_group);
785        assert!(
786            !ids.is_empty(),
787            "No MS2 frames for window_group {}",
788            window_group
789        );
790
791        let global_num_scans = self.meta_data
792            .iter()
793            .filter(|m| ids.binary_search(&(m.id as u32)).is_ok())
794            .map(|m| m.num_scans as usize)
795            .max()
796            .unwrap_or(0);
797
798        // TOF scale for this group
799        let scale = self.tof_scale_for_group(window_group, tof_step);
800
801        let frames: Vec<FrameBinView> = ids.par_iter()
802            .map(|&fid| build_frame_bin_view(self.get_frame(fid), &scale, global_num_scans))
803            .collect();
804
805        RtFrames {
806            frames,
807            frame_ids: ids,
808            rt_times: times,
809            scale: Arc::new(scale),
810        }
811    }
812
813    /// RT-sorted PRECURSOR frames (ms_ms_type == 0) into FrameBinView rows.
814    /// `tof_step` controls TOF granularity of CSR binning.
815    pub fn make_rt_frames_for_precursor(&self, tof_step: i32) -> RtFrames {
816        assert!(tof_step > 0);
817
818        let mut rows: Vec<(u32, f32, usize)> = self.meta_data
819            .iter()
820            .filter(|m| m.ms_ms_type == 0)
821            .map(|m| (m.id as u32, m.time as f32, m.num_scans as usize))
822            .collect();
823
824        assert!(!rows.is_empty(), "No precursor (MS1) frames found");
825        rows.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
826
827        let frame_ids: Vec<u32> = rows.iter().map(|r| r.0).collect();
828        let rt_times:  Vec<f32> = rows.iter().map(|r| r.1).collect();
829        let global_num_scans = rows.iter().map(|r| r.2).max().unwrap_or(1);
830
831        // Build TOF scale from precursor frames
832        let frames_for_scale: Vec<_> = frame_ids.iter().map(|&fid| self.get_frame(fid)).collect();
833        let scale = TofScale::build_from_tof(&frames_for_scale, tof_step)
834            .expect("make_rt_frames_for_precursor: failed to build TOF scale");
835
836        let frames: Vec<FrameBinView> = frame_ids.par_iter()
837            .map(|&fid| build_frame_bin_view(self.get_frame(fid), &scale, global_num_scans))
838            .collect();
839
840        RtFrames {
841            frames,
842            frame_ids,
843            rt_times,
844            scale: Arc::new(scale),
845        }
846    }
847
848    /// Build a TOF-based scale for a DIA group.
849    /// `tof_step = 1` → max TOF resolution; larger steps downsample.
850    pub fn tof_scale_for_group(&self, window_group: u32, tof_step: i32) -> TofScale {
851        assert!(tof_step > 0);
852
853        let (ids, _times) = self.fragment_frame_ids_and_times_for_group_core(window_group);
854        assert!(
855            !ids.is_empty(),
856            "tof_scale_for_group: no MS2 frames for window_group {}",
857            window_group
858        );
859
860        let frames: Vec<_> = ids.iter().map(|&fid| self.get_frame(fid)).collect();
861
862        TofScale::build_from_tof(&frames, tof_step)
863            .expect("tof_scale_for_group: failed to build TOF scale (empty or degenerate)")
864    }
865
866    /// Conservative: derive a TOF scale by scanning actual frame data.
867    pub fn tof_scale_from_frames_scan(
868        &self,
869        frame_ids: &[u32],
870        tof_step: i32,
871    ) -> Option<TofScale> {
872        assert!(tof_step > 0);
873        let frames: Vec<_> = frame_ids.iter().map(|&fid| self.get_frame(fid)).collect();
874        TofScale::build_from_tof(&frames, tof_step)
875    }
876
877    /// Internal helper: go from a set of IM peaks to evaluated 1D clusters
878    /// on a given RtFrames grid (precursor or DIA group).
879    fn clusters_for_im_peaks_on_rt_frames(
880        &self,
881        rt: RtFrames,
882        im_peaks: &[ImPeak1D],
883        rt_params: RtExpandParams,
884        build_opts: &BuildSpecOpts,
885        eval_opts: &Eval1DOpts,
886        require_rt_overlap: bool,
887        num_threads: usize,
888    ) -> Vec<ClusterResult1D> {
889        // 1) Expand IM peaks along RT on this grid
890        let rt_groups = expand_many_im_peaks_along_rt(
891            im_peaks,
892            &rt.frames,
893            rt.ctx(),
894            rt.scale.as_ref(),
895            rt_params,
896        );
897
898        // 2) Build 1D specs (RT × IM × TOF-window)
899        let specs: Vec<ClusterSpec1D> = make_specs_from_im_and_rt_groups_threads(
900            im_peaks,
901            &rt_groups,
902            &rt,
903            build_opts,
904            require_rt_overlap,
905            num_threads,
906        );
907
908        // 3) Evaluate all specs on this RtFrames grid
909        self.evaluate_specs_1d_threads(&rt, &specs, eval_opts, num_threads)
910    }
911
912    /// Evaluate a batch of 1D cluster specs on the given RtFrames.
913    /// - Runs spec evaluation (marginals + moment fits) in parallel.
914    /// - Optionally attaches raw points using the finalized **TOF-bin window**
915    ///   and the spec’s IM/RT windows.
916    /// - `num_threads == 0` => use rayon’s global pool.
917    pub fn evaluate_specs_1d_threads(
918        &self,
919        rt_frames: &RtFrames,
920        specs: &[ClusterSpec1D],
921        opts: &Eval1DOpts,
922        num_threads: usize,
923    ) -> Vec<ClusterResult1D> {
924        if specs.is_empty() {
925            return Vec::new();
926        }
927
928        let scale = &*rt_frames.scale;
929
930        let run = || {
931            // Pre-build raw attach context once if needed
932            let attach_ctx = if opts.attach.attach_points {
933                let frame_ids_local = rt_frames.frame_ids.clone();
934                let slice = self.get_slice(frame_ids_local.clone(), num_threads.max(1));
935                let scan_slices = slice
936                    .frames
937                    .iter()
938                    .map(|fr| build_scan_slices(fr))
939                    .collect::<Vec<_>>();
940                let rt_axis_sec = rt_frames.rt_times.clone();
941
942                Some(RawAttachContext {
943                    slice,
944                    scan_slices,
945                    frame_ids_local,
946                    rt_axis_sec,
947                })
948            } else {
949                None
950            };
951
952            specs
953                .par_iter()
954                .map(|spec| {
955                    // 1) core 1D evaluation (RT/IM/TOF fits etc.)
956                    let mut res = evaluate_spec_1d(rt_frames, spec, opts);
957
958                    // 2) decorate with m/z axis + mz_fit/mz_window
959                    //
960                    //    - uses the already-computed TOF window in `spec.tof_win`
961                    //    - does *not* depend on raw_points being attached
962                    //    - keeps the logic local and cheap
963                    decorate_with_mz_for_cluster(self, rt_frames, &mut res);(self, rt_frames, spec, scale, &mut res);
964
965                    // 3) optional raw point attachment (using shared context)
966                    if let Some(ref ctx) = attach_ctx {
967                        if opts.attach.attach_points && res.raw_sum > 0.0 && res.tof_fit.area > 0.0 {
968                            let (bin_lo, bin_hi) = bin_range_for_win(scale, spec.tof_win);
969                            let (im_lo, im_hi) = (spec.im_lo, spec.im_hi);
970                            let (rt_lo, rt_hi) = (spec.rt_lo, spec.rt_hi);
971
972                            let raw = attach_raw_points_for_spec_1d_in_ctx(
973                                ctx,
974                                scale,
975                                bin_lo,
976                                bin_hi,
977                                im_lo,
978                                im_hi,
979                                rt_lo,
980                                rt_hi,
981                                opts.attach.max_points,
982                            );
983                            res.raw_points = Some(raw);
984                        }
985                    }
986
987                    res
988                })
989                .collect::<Vec<_>>()
990        };
991
992        if num_threads == 0 {
993            run()
994        } else {
995            ThreadPoolBuilder::new()
996                .num_threads(num_threads)
997                .build()
998                .unwrap()
999                .install(run)
1000        }
1001    }
1002
1003    pub fn clusters_for_group(
1004        &self,
1005        window_group: u32,
1006        tof_step: i32,
1007        im_peaks: &[ImPeak1D],
1008        rt_params: RtExpandParams,
1009        build_opts: &BuildSpecOpts,
1010        eval_opts: &Eval1DOpts,
1011        require_rt_overlap: bool,
1012        num_threads: usize,
1013    ) -> Vec<ClusterResult1D> {
1014        // Guard: all IM peaks must belong to this DIA group
1015        debug_assert!(
1016            im_peaks
1017                .iter()
1018                .all(|p| p.window_group == Some(window_group)),
1019            "clusters_for_group: some IM peaks have wrong or missing window_group"
1020        );
1021
1022        // Build RT+TOF grid for this DIA isolation window group.
1023        let rt = self.make_rt_frames_for_group(window_group, tof_step);
1024
1025        // Force MS level 2 for DIA fragments
1026        let build_opts_ms2 = build_opts.with_ms_level(2);
1027
1028        self.clusters_for_im_peaks_on_rt_frames(
1029            rt,
1030            im_peaks,
1031            rt_params,
1032            &build_opts_ms2,
1033            eval_opts,
1034            require_rt_overlap,
1035            num_threads,
1036        )
1037    }
1038
1039    pub fn clusters_for_precursor(
1040        &self,
1041        tof_step: i32,
1042        im_peaks: &[ImPeak1D],
1043        rt_params: RtExpandParams,
1044        build_opts: &BuildSpecOpts,
1045        eval_opts: &Eval1DOpts,
1046        require_rt_overlap: bool,
1047        num_threads: usize,
1048    ) -> Vec<ClusterResult1D> {
1049        // Guard: all IM peaks must be MS1 (no window_group)
1050        debug_assert!(
1051            im_peaks.iter().all(|p| p.window_group.is_none()),
1052            "clusters_for_precursor: IM peaks unexpectedly carry a window_group"
1053        );
1054
1055        // RT+TOF grid for precursor (MS1) frames
1056        let rt = self.make_rt_frames_for_precursor(tof_step);
1057
1058        // Force MS level 1 for precursor
1059        let build_opts_ms1 = build_opts.with_ms_level(1);
1060
1061        self.clusters_for_im_peaks_on_rt_frames(
1062            rt,
1063            im_peaks,
1064            rt_params,
1065            &build_opts_ms1,
1066            eval_opts,
1067            require_rt_overlap,
1068            num_threads,
1069        )
1070    }
1071
1072    /// End-to-end DIA → pseudo-DDA builder (geometric scoring).
1073    ///
1074    /// Thin wrapper around `build_pseudo_spectra_end_to_end`.
1075    pub fn build_pseudo_spectra_from_clusters_geom(
1076        &self,
1077        ms1: &[ClusterResult1D],
1078        ms2: &[ClusterResult1D],
1079        features: Option<&[SimpleFeature]>,
1080        cand_opts: &CandidateOpts,
1081        score_opts: &ScoreOpts,
1082        pseudo_opts: &PseudoSpecOpts,
1083    ) -> PseudoBuildResult {
1084        build_pseudo_spectra_end_to_end(
1085            self,
1086            ms1,
1087            ms2,
1088            features,
1089            cand_opts,
1090            score_opts,
1091            pseudo_opts,
1092        )
1093    }
1094
1095    /// End-to-end DIA → pseudo-DDA builder (XIC-based scoring).
1096    ///
1097    /// Thin wrapper around `build_pseudo_spectra_end_to_end_xic`.
1098    pub fn build_pseudo_spectra_from_clusters_xic(
1099        &self,
1100        ms1: &[ClusterResult1D],
1101        ms2: &[ClusterResult1D],
1102        features: Option<&[SimpleFeature]>,
1103        cand_opts: &CandidateOpts,
1104        xic_opts: &XicScoreOpts,
1105        pseudo_opts: &PseudoSpecOpts,
1106    ) -> PseudoBuildResult {
1107        build_pseudo_spectra_end_to_end_xic(
1108            self,
1109            ms1,
1110            ms2,
1111            features,
1112            cand_opts,
1113            xic_opts,
1114            pseudo_opts,
1115        )
1116    }
1117
1118    /// Naive "all pairs" builder stays as-is, still returning Vec<PseudoSpectrum>.
1119    pub fn build_pseudo_spectra_all_pairs_from_clusters(
1120        &self,
1121        ms1: &[ClusterResult1D],
1122        ms2: &[ClusterResult1D],
1123        features: Option<&[SimpleFeature]>,
1124        pseudo_opts: &PseudoSpecOpts,
1125    ) -> PseudoBuildResult {
1126        build_pseudo_spectra_all_pairs(self, ms1, ms2, features, pseudo_opts)
1127    }
1128
1129    // ==========================================================
1130    // Convenience methods using ClusterExtractionOpts (Phase 5)
1131    // ==========================================================
1132
1133    /// Extract clusters for a DIA window group using options struct.
1134    pub fn clusters_for_group_opts(
1135        &self,
1136        window_group: u32,
1137        im_peaks: &[ImPeak1D],
1138        opts: &ClusterExtractionOpts,
1139    ) -> Vec<ClusterResult1D> {
1140        self.clusters_for_group(
1141            window_group,
1142            opts.tof_step,
1143            im_peaks,
1144            opts.rt_params.clone(),
1145            &opts.build_opts,
1146            &opts.eval_opts,
1147            opts.require_rt_overlap,
1148            opts.num_threads,
1149        )
1150    }
1151
1152    /// Extract clusters for precursor (MS1) frames using options struct.
1153    pub fn clusters_for_precursor_opts(
1154        &self,
1155        im_peaks: &[ImPeak1D],
1156        opts: &ClusterExtractionOpts,
1157    ) -> Vec<ClusterResult1D> {
1158        self.clusters_for_precursor(
1159            opts.tof_step,
1160            im_peaks,
1161            opts.rt_params.clone(),
1162            &opts.build_opts,
1163            &opts.eval_opts,
1164            opts.require_rt_overlap,
1165            opts.num_threads,
1166        )
1167    }
1168
1169    /// Dense TOF×RT grid for all PRECURSOR (MS1) frames.
1170    /// `tof_step` controls TOF bin granularity (1 = full resolution).
1171    pub fn tof_rt_grid_precursor(&self, tof_step: i32) -> TofRtGrid {
1172        let rt = self.make_rt_frames_for_precursor(tof_step);
1173        build_tof_rt_grid_full(&rt, None)
1174    }
1175
1176    /// Dense TOF×RT grid for FRAGMENT frames in a DIA window group.
1177    /// `tof_step` controls TOF bin granularity (1 = full resolution).
1178    pub fn tof_rt_grid_for_group(&self, window_group: u32, tof_step: i32) -> TofRtGrid {
1179        let rt = self.make_rt_frames_for_group(window_group, tof_step);
1180        build_tof_rt_grid_full(&rt, Some(window_group))
1181    }
1182
1183    /// Debug helper: for a bunch of clusters that all live on the same
1184    /// DIA grid (either MS1 or one MS2 window_group), extract RawPoints
1185    /// per cluster.
1186    ///
1187    /// - `clusters`: precursor *or* fragment clusters.
1188    ///   All must have the same `ms_level`.
1189    /// - `window_group`: if you pass Some(g), we treat them as MS2 clusters
1190    ///   on that DIA group. If None and ms_level == 2, we try to infer g
1191    ///   from the first cluster’s `window_group`.
1192    /// - `tof_step`: must be the same as used during clustering.
1193    /// - `max_points`: optional thinning cap per cluster.
1194    /// - `num_threads`: used for `get_slice` in the raw extractor.
1195    pub fn debug_extract_raw_for_clusters(
1196        &self,
1197        clusters: &[ClusterResult1D],
1198        window_group: Option<u32>,
1199        tof_step: i32,
1200        max_points: Option<usize>,
1201        num_threads: usize,
1202    ) -> Vec<RawPoints> {
1203        if clusters.is_empty() {
1204            return Vec::new();
1205        }
1206
1207        // ---- 1. Basic sanity: consistent ms_level ---------------------------
1208        let ms_level = clusters[0].ms_level;
1209        debug_assert!(
1210            clusters.iter().all(|c| c.ms_level == ms_level),
1211            "debug_extract_raw_for_clusters: mixed ms_level in cluster list"
1212        );
1213
1214        // Optional filter by WG if caller wants to restrict.
1215        let clusters: Vec<ClusterResult1D> = clusters
1216            .iter()
1217            .filter(|c| match window_group {
1218                Some(g) => c.window_group == Some(g),
1219                None => true,
1220            })
1221            .cloned()
1222            .collect();
1223
1224        if clusters.is_empty() {
1225            return Vec::new();
1226        }
1227
1228        // ---- 2. Rebuild the RtFrames grid used for these clusters ----------
1229        //
1230        // For MS1: all clusters live on the precursor RtFrames grid.
1231        // For MS2: all clusters live on the RtFrames grid of a single window group.
1232        let rt_frames = if ms_level == 1 {
1233            // Precursor grid; we ignore window_group here.
1234            self.make_rt_frames_for_precursor(tof_step)
1235        } else {
1236            // Fragments: we need a valid DIA group.
1237            let g = match window_group {
1238                Some(g) => g,
1239                None => clusters[0]
1240                    .window_group
1241                    .expect("MS2 cluster without window_group; pass window_group explicitly"),
1242            };
1243            self.make_rt_frames_for_group(g, tof_step)
1244        };
1245
1246        // ---- 3. For each cluster, call the existing raw extractor ----------
1247        clusters
1248            .iter()
1249            .map(|c| {
1250                let (rt_lo, rt_hi) = c.rt_window;
1251                let (im_lo, im_hi) = c.im_window;
1252                let (tof_lo, tof_hi) = c.tof_window;
1253
1254                attach_raw_points_for_spec_1d_threads(
1255                    self,
1256                    &rt_frames,
1257                    tof_lo,
1258                    tof_hi,
1259                    im_lo,
1260                    im_hi,
1261                    rt_lo,
1262                    rt_hi,
1263                    max_points,
1264                    num_threads,
1265                )
1266            })
1267            .collect()
1268    }
1269}
1270
1271impl TimsData for TimsDatasetDIA {
1272    fn get_frame(&self, frame_id: u32) -> TimsFrame {
1273        self.loader.get_frame(frame_id)
1274    }
1275
1276    fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
1277        self.loader.get_raw_frame(frame_id)
1278    }
1279
1280    fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
1281        self.loader.get_slice(frame_ids, num_threads)
1282    }
1283    fn get_acquisition_mode(&self) -> AcquisitionMode {
1284        self.loader.get_acquisition_mode().clone()
1285    }
1286
1287    fn get_frame_count(&self) -> i32 {
1288        self.loader.get_frame_count()
1289    }
1290
1291    fn get_data_path(&self) -> &str {
1292        &self.loader.get_data_path()
1293    }
1294}
1295
1296impl IndexConverter for TimsDatasetDIA {
1297    fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
1298        self.loader
1299            .get_index_converter()
1300            .tof_to_mz(frame_id, tof_values)
1301    }
1302
1303    fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
1304        self.loader
1305            .get_index_converter()
1306            .mz_to_tof(frame_id, mz_values)
1307    }
1308
1309    fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
1310        self.loader
1311            .get_index_converter()
1312            .scan_to_inverse_mobility(frame_id, scan_values)
1313    }
1314
1315    fn inverse_mobility_to_scan(
1316        &self,
1317        frame_id: u32,
1318        inverse_mobility_values: &Vec<f64>,
1319    ) -> Vec<u32> {
1320        self.loader
1321            .get_index_converter()
1322            .inverse_mobility_to_scan(frame_id, inverse_mobility_values)
1323    }
1324}
1325
1326pub fn attach_raw_points_for_spec_1d_threads(
1327    ds: &TimsDatasetDIA,
1328    rt_frames: &RtFrames,
1329    final_bin_lo: usize,
1330    final_bin_hi: usize,
1331    final_im_lo: usize,
1332    final_im_hi: usize,
1333    final_rt_lo: usize,
1334    final_rt_hi: usize,
1335    max_points: Option<usize>,
1336    num_threads: usize,
1337) -> RawPoints {
1338    let scale = &*rt_frames.scale;
1339
1340    // Defensive clamp of bin indices to the valid bin range.
1341    let n_bins = scale.num_bins();
1342    if n_bins == 0 {
1343        return RawPoints::default();
1344    }
1345
1346    let mut bin_lo = final_bin_lo.min(n_bins.saturating_sub(1));
1347    let mut bin_hi = final_bin_hi.min(n_bins.saturating_sub(1));
1348    if bin_lo > bin_hi {
1349        std::mem::swap(&mut bin_lo, &mut bin_hi);
1350    }
1351
1352    // Axis window in *TOF-axis units*:
1353    //   [edges[bin_lo], cushion_hi_edge(edges[bin_hi+1])]
1354    let mut axis_lo = scale.edges[bin_lo];
1355    let hi_edge_idx = (bin_hi + 1).min(scale.edges.len().saturating_sub(1));
1356    let mut axis_hi = cushion_hi_edge(scale, scale.edges[hi_edge_idx]);
1357
1358    // RT frames and slice
1359    let frame_ids_local = rt_frames.frame_ids[final_rt_lo..=final_rt_hi].to_vec();
1360    let slice = ds.get_slice(frame_ids_local.clone(), num_threads.max(1));
1361    let scan_slices: Vec<Vec<ScanSlice>> =
1362        slice.frames.iter().map(|fr| build_scan_slices(fr)).collect();
1363
1364    // 1) Count – now using **TOF** as the search axis.
1365    let mut total = 0usize;
1366    for (fi, fr) in slice.frames.iter().enumerate() {
1367        let tofs = &fr.tof;               // raw TOF indices
1368        for sl in &scan_slices[fi] {
1369            if sl.scan < final_im_lo || sl.scan > final_im_hi {
1370                continue;
1371            }
1372            let l = lower_bound_tof(tofs, sl.start, sl.end, axis_lo);
1373            let r = upper_bound_tof(tofs, sl.start, sl.end, axis_hi);
1374            total += r.saturating_sub(l);
1375        }
1376    }
1377
1378    // 2) Last-chance widen: if empty, expand by ±1 bin in TOF space.
1379    if total == 0 {
1380        let lo_idx = bin_lo.saturating_sub(1);
1381        let hi_edge_idx_wide =
1382            (bin_hi + 2).min(n_bins).min(scale.edges.len().saturating_sub(1));
1383
1384        let try_lo = scale.edges[lo_idx];
1385        let try_hi = cushion_hi_edge(scale, scale.edges[hi_edge_idx_wide]);
1386
1387        let mut total2 = 0usize;
1388        for (fi, fr) in slice.frames.iter().enumerate() {
1389            let tofs = &fr.tof;
1390            for sl in &scan_slices[fi] {
1391                if sl.scan < final_im_lo || sl.scan > final_im_hi {
1392                    continue;
1393                }
1394                let l = lower_bound_tof(tofs, sl.start, sl.end, try_lo);
1395                let r = upper_bound_tof(tofs, sl.start, sl.end, try_hi);
1396                total2 += r.saturating_sub(l);
1397            }
1398        }
1399
1400        if total2 > 0 {
1401            total = total2;
1402            axis_lo = try_lo;
1403            axis_hi = try_hi;
1404        }
1405    }
1406
1407    // Still empty → bail with empty container
1408    if total == 0 {
1409        return RawPoints::default();
1410    }
1411
1412    let stride = max_points.map(|cap| thin_stride(total, cap)).unwrap_or(1);
1413    let reserve = total / stride + 8;
1414
1415    let mut pts = RawPoints {
1416        mz: Vec::with_capacity(reserve),
1417        rt: Vec::with_capacity(reserve),
1418        im: Vec::with_capacity(reserve),
1419        scan: Vec::with_capacity(reserve),
1420        intensity: Vec::with_capacity(reserve),
1421        tof: Vec::with_capacity(reserve),
1422        frame: Vec::with_capacity(reserve),
1423    };
1424
1425    let rt_axis_sec = rt_frames.rt_times[final_rt_lo..=final_rt_hi].to_vec();
1426
1427    // 3) Extraction with thinning – again use TOF range for selection
1428    let mut seen = 0usize;
1429    for (fi, fr) in slice.frames.iter().enumerate() {
1430        let mz   = &fr.ims_frame.mz;
1431        let it   = &fr.ims_frame.intensity;
1432        let ims  = &fr.ims_frame.mobility;
1433        let tofs = &fr.tof;
1434
1435        let len_all = mz.len().min(it.len()).min(ims.len()).min(tofs.len());
1436        let rt_val = rt_axis_sec[fi];
1437        let frame_id = frame_ids_local[fi];
1438
1439        for sl in &scan_slices[fi] {
1440            let s_abs = sl.scan;
1441            if s_abs < final_im_lo || s_abs > final_im_hi {
1442                continue;
1443            }
1444
1445            let mut l = lower_bound_tof(tofs, sl.start, sl.end, axis_lo);
1446            let mut r = upper_bound_tof(tofs, sl.start, sl.end, axis_hi);
1447            if r > len_all {
1448                r = len_all;
1449            }
1450            if l >= r {
1451                continue;
1452            }
1453
1454            while l < r {
1455                if stride == 1 || (seen % stride == 0) {
1456                    // Note: we still *store* m/z, but the selection is done in TOF.
1457                    pts.mz.push(mz[l] as f32);
1458                    pts.rt.push(rt_val);
1459                    pts.im.push(ims[l] as f32);
1460                    pts.scan.push(s_abs as u32);
1461                    pts.intensity.push(it[l] as f32);
1462                    pts.frame.push(frame_id);
1463                    pts.tof.push(tofs[l]);
1464                }
1465                seen += 1;
1466                l += 1;
1467            }
1468        }
1469    }
1470
1471    pts
1472}
1473
1474/// Binary search in TOF array [start, end) for lower bound.
1475#[inline]
1476fn lower_bound_tof(tofs: &[i32], start: usize, end: usize, x: f32) -> usize {
1477    let mut lo = start;
1478    let mut hi = end;
1479    let xf = x as f64;
1480    while lo < hi {
1481        let mid = (lo + hi) >> 1;
1482        if (tofs[mid] as f64) < xf {
1483            lo = mid + 1;
1484        } else {
1485            hi = mid;
1486        }
1487    }
1488    lo
1489}
1490
1491/// Binary search in TOF array [start, end) for upper bound.
1492#[inline]
1493fn upper_bound_tof(tofs: &[i32], start: usize, end: usize, x: f32) -> usize {
1494    let mut lo = start;
1495    let mut hi = end;
1496    let xf = x as f64;
1497    while lo < hi {
1498        let mid = (lo + hi) >> 1;
1499        if (tofs[mid] as f64) <= xf {
1500            lo = mid + 1;
1501        } else {
1502            hi = mid;
1503        }
1504    }
1505    lo
1506}
1507
1508// ----------------------------------------------------------------------
1509// Helpers for axis-based extraction (no ppm, no mz_min/mz_max needed)
1510// ----------------------------------------------------------------------
1511
1512/// Slightly "cushion" the high edge used for upper_bound search.
1513///
1514/// Semantics:
1515/// - `hi_edge` is typically `scale.edges[bin_hi + 1]` (the upper edge of the last bin).
1516/// - We nudge it outward by a tiny fraction of a *typical* bin width so values
1517///   that sit exactly on the edge aren't accidentally excluded.
1518/// - This is axis-generic: it works whether the axis is TOF or m/z-like.
1519#[inline]
1520fn cushion_hi_edge(scale: &TofScale, hi_edge: f32) -> f32 {
1521    let edges = &scale.edges;
1522    if edges.len() >= 2 {
1523        // Use a small fraction of the first bin width as epsilon.
1524        let bw = (edges[1] - edges[0]).abs().max(1e-6);
1525        hi_edge + 0.01 * bw
1526    } else {
1527        hi_edge
1528    }
1529}
1530
1531#[inline]
1532fn thin_stride(total: usize, cap: usize) -> usize {
1533    if cap == 0 || total <= cap {
1534        1
1535    } else {
1536        (total + cap - 1) / cap
1537    }
1538}
1539
1540
1541#[cfg(test)]
1542mod tests {
1543    use super::*;
1544    use mscore::data::spectrum::MsType;
1545    use mscore::timstof::frame::TimsFrame;
1546
1547    fn make_frame(frame_id: i32, ms_type: MsType) -> TimsFrame {
1548        // minimal non-empty frame: one scan at scan=10, mobility=0.8, tof=500,
1549        // mz=500.0, intensity=10.0 — enough for Add to have something to merge.
1550        TimsFrame::new(
1551            frame_id,
1552            ms_type,
1553            0.0,
1554            vec![10],
1555            vec![0.8],
1556            vec![500],
1557            vec![500.0],
1558            vec![10.0],
1559        )
1560    }
1561
1562    #[test]
1563    fn accumulate_or_typed_empty_returns_typed_empty_when_pool_is_empty() {
1564        // The whole point of the fix: an empty sample pool must not
1565        // return TimsFrame::default() (frame_id=0, ms_type=Unknown). It
1566        // must return a typed empty so the caller's `frame + noise`
1567        // doesn't collapse the simulated frame's ms_type to Unknown.
1568        let mut samples: Vec<TimsFrame> = vec![];
1569        let noise = TimsDatasetDIA::accumulate_or_typed_empty(
1570            &mut samples, MsType::Precursor,
1571        );
1572        assert_eq!(noise.ms_type, MsType::Precursor,
1573                   "typed-empty must carry the requested MsType, not Unknown");
1574        assert!(noise.scan.is_empty());
1575        assert!(noise.ims_frame.mz.is_empty());
1576    }
1577
1578    #[test]
1579    fn accumulate_or_typed_empty_stamps_type_even_after_inner_unknown() {
1580        // Belt-and-braces: even if some path managed to insert an
1581        // MsType::Unknown sample into the accumulator (which the outer
1582        // filter should prevent), the final stamp must restore the
1583        // requested type so downstream `simulated + noise` doesn't see
1584        // a mismatched operand.
1585        let mut samples = vec![
1586            make_frame(1, MsType::Unknown),
1587            make_frame(2, MsType::FragmentDia),
1588        ];
1589        let noise = TimsDatasetDIA::accumulate_or_typed_empty(
1590            &mut samples, MsType::FragmentDia,
1591        );
1592        assert_eq!(noise.ms_type, MsType::FragmentDia);
1593    }
1594
1595    #[test]
1596    fn accumulate_preserves_type_on_clean_input() {
1597        let mut samples = vec![
1598            make_frame(1, MsType::FragmentDia),
1599            make_frame(2, MsType::FragmentDia),
1600            make_frame(3, MsType::FragmentDia),
1601        ];
1602        let noise = TimsDatasetDIA::accumulate_or_typed_empty(
1603            &mut samples, MsType::FragmentDia,
1604        );
1605        assert_eq!(noise.ms_type, MsType::FragmentDia);
1606        // Inputs were merged; the accumulator should hold a non-empty frame.
1607        assert!(!noise.scan.is_empty());
1608    }
1609
1610    #[test]
1611    fn typed_empty_frame_uses_correct_default_frame_id() {
1612        // Empty-pool fallback should use frame_id=0 — same as
1613        // TimsFrame::default() — but with the requested ms_type stamped.
1614        // This pairs with the writer-side Frames.Id uniqueness check
1615        // (packages/imspy-simulation/src/imspy_simulation/tdf.py):
1616        // empty noise frames must not silently pollute the Frames table.
1617        let mut samples: Vec<TimsFrame> = vec![];
1618        let noise = TimsDatasetDIA::accumulate_or_typed_empty(
1619            &mut samples, MsType::Precursor,
1620        );
1621        assert_eq!(noise.frame_id, 0);
1622        assert_eq!(noise.ims_frame.retention_time, 0.0);
1623    }
1624}