1use std::sync::Arc;
2use crate::data::acquisition::AcquisitionMode;
3use rustc_hash::FxHashMap;
4use crate::data::handle::{IndexConverter, TimsData, TimsDataLoader};
5use crate::data::meta::{
6 read_dia_ms_ms_info, read_dia_ms_ms_windows, read_global_meta_sql, read_meta_data_sql,
7 DiaMsMisInfo, DiaMsMsWindow, FrameMeta, GlobalMetaData,
8};
9use mscore::timstof::frame::{RawTimsFrame, TimsFrame};
10use mscore::timstof::slice::TimsSlice;
11use rand::prelude::IteratorRandom;
12use rayon::iter::IntoParallelRefIterator;
13use crate::cluster::peak::{build_frame_bin_view, build_tof_rt_grid_full, expand_many_im_peaks_along_rt, FrameBinView, ImPeak1D, RtExpandParams, RtFrames, TofRtGrid};
14use crate::cluster::utility::{TofScale};
15use crate::data::utility::merge_ranges;
16use rayon::prelude::*;
17use std::collections::{HashMap};
18use rayon::ThreadPoolBuilder;
19use crate::cluster::candidates::{build_pseudo_spectra_all_pairs, build_pseudo_spectra_end_to_end, build_pseudo_spectra_end_to_end_xic, PseudoBuildResult, ScoreOpts};
20use crate::cluster::cluster::{attach_raw_points_for_spec_1d_in_ctx, bin_range_for_win, build_scan_slices, decorate_with_mz_for_cluster, evaluate_spec_1d, make_specs_from_im_and_rt_groups_threads, BuildSpecOpts, ClusterResult1D, ClusterSpec1D, Eval1DOpts, RawAttachContext, RawPoints, ScanSlice};
21use crate::cluster::feature::SimpleFeature;
22use crate::cluster::pseudo::{PseudoSpecOpts};
23use crate::cluster::candidates::CandidateOpts;
24use crate::cluster::scoring::XicScoreOpts;
25
26#[derive(Clone, Debug)]
32pub struct ClusterExtractionOpts {
33 pub tof_step: i32,
35 pub rt_params: RtExpandParams,
37 pub build_opts: BuildSpecOpts,
39 pub eval_opts: Eval1DOpts,
41 pub require_rt_overlap: bool,
43 pub num_threads: usize,
45}
46
47impl Default for ClusterExtractionOpts {
48 fn default() -> Self {
49 Self {
50 tof_step: 8,
51 rt_params: RtExpandParams::default(),
52 build_opts: BuildSpecOpts::ms1_defaults(),
53 eval_opts: Eval1DOpts::default(),
54 require_rt_overlap: true,
55 num_threads: 4,
56 }
57 }
58}
59
60impl ClusterExtractionOpts {
61 pub fn for_ms1() -> Self {
63 Self {
64 build_opts: BuildSpecOpts::ms1_defaults(),
65 ..Self::default()
66 }
67 }
68
69 pub fn for_ms2() -> Self {
71 Self {
72 build_opts: BuildSpecOpts::ms2_defaults(),
73 ..Self::default()
74 }
75 }
76
77 pub fn with_tof_step(mut self, step: i32) -> Self {
79 self.tof_step = step;
80 self
81 }
82
83 pub fn with_threads(mut self, n: usize) -> Self {
85 self.num_threads = n;
86 self
87 }
88}
89
90#[derive(Clone, Debug)]
92pub struct PseudoSpectrumBuildOpts<'a> {
93 pub cand_opts: &'a CandidateOpts,
95 pub score_opts: &'a ScoreOpts,
97 pub pseudo_opts: &'a PseudoSpecOpts,
99 pub features: Option<&'a [SimpleFeature]>,
101}
102
103#[derive(Clone, Debug)]
104pub struct ProgramSlice {
105 pub mz_lo: f64,
106 pub mz_hi: f64,
107 pub scan_lo: u32,
108 pub scan_hi: u32,
109}
110
111#[inline]
112fn ranges_overlap_u32(a: (u32, u32), b: (u32, u32)) -> bool {
113 let lo = a.0.max(b.0);
114 let hi = a.1.min(b.1);
115 hi >= lo
116}
117
118#[derive(Debug, Clone)]
119pub struct Ms2GroupProgram {
120 pub mz_windows: Vec<(f32, f32)>,
122 pub scan_ranges: Vec<(u32, u32)>,
124 pub mz_union: Option<(f32, f32)>,
126 pub scan_unions: Vec<(u32, u32)>,
128}
129
130#[derive(Debug, Clone)]
131pub struct DiaIndex {
132 pub frame_to_group: HashMap<u32, u32>,
134 pub group_to_frames: HashMap<u32, Vec<u32>>,
136 pub group_to_isolation: HashMap<u32, Vec<(f64, f64)>>,
138 pub group_to_scan_ranges: HashMap<u32, Vec<(u32, u32)>>,
140 pub group_to_mz_union: HashMap<u32, (f64, f64)>,
142 pub group_to_scan_unions: HashMap<u32, Vec<(u32, u32)>>,
144 pub frame_time: HashMap<u32, f64>,
146 pub group_to_slices: HashMap<u32, Arc<[ProgramSlice]>>,
148}
149
150impl DiaIndex {
151 pub fn new(meta: &[FrameMeta], info: &[DiaMsMisInfo], wins: &[DiaMsMsWindow]) -> Self {
152 #[inline]
154 fn norm_f64_pair(a: f64, b: f64) -> Option<(f64, f64)> {
155 if !a.is_finite() || !b.is_finite() { return None; }
156 let (lo, hi) = if a <= b { (a, b) } else { (b, a) };
157 if hi > lo { Some((lo, hi)) } else { None }
158 }
159 #[inline]
160 fn norm_u32_pair(a: u32, b: u32) -> Option<(u32, u32)> {
161 let (lo, hi) = if a <= b { (a, b) } else { (b, a) };
162 Some((lo, hi))
163 }
164 fn merge_scan_ranges(mut ranges: Vec<(u32, u32)>) -> Vec<(u32, u32)> {
165 if ranges.is_empty() { return ranges; }
166 ranges.sort_unstable_by_key(|&(l, r)| (l, r));
167 let mut out: Vec<(u32,u32)> = Vec::with_capacity(ranges.len());
168 let mut cur = ranges[0];
169 for &(l, r) in &ranges[1..] {
170 if l <= cur.1.saturating_add(1) {
171 if r > cur.1 { cur.1 = r; }
172 } else {
173 out.push(cur);
174 cur = (l, r);
175 }
176 }
177 out.push(cur);
178 out
179 }
180
181 let mut frame_time: HashMap<u32, f64> = HashMap::new();
183 for m in meta {
184 frame_time.insert(m.id as u32, m.time);
185 }
186
187 let mut frame_to_group: HashMap<u32, u32> = HashMap::new();
189 let mut group_to_frames: HashMap<u32, Vec<u32>> = HashMap::new();
190 for r in info {
191 let fid = r.frame_id;
192 frame_to_group.insert(fid, r.window_group);
193 group_to_frames.entry(r.window_group).or_default().push(fid);
194 }
195
196 let mut group_to_isolation: HashMap<u32, Vec<(f64, f64)>> = HashMap::new();
198 let mut group_to_scan_ranges: HashMap<u32, Vec<(u32, u32)>> = HashMap::new();
199 for w in wins {
200 let half = 0.5 * w.isolation_width;
201 if half.is_finite() && half > 0.0 && w.isolation_mz.is_finite() {
202 let lo = w.isolation_mz - half;
203 let hi = w.isolation_mz + half;
204 if let Some(p) = norm_f64_pair(lo, hi) {
205 group_to_isolation.entry(w.window_group).or_default().push(p);
206 }
207 }
208 if let Some(p) = norm_u32_pair(w.scan_num_begin, w.scan_num_end) {
209 group_to_scan_ranges.entry(w.window_group).or_default().push(p);
210 }
211 }
212
213 let mut group_to_mz_union: HashMap<u32, (f64, f64)> = HashMap::new();
215 let mut group_to_scan_unions: HashMap<u32, Vec<(u32, u32)>> = HashMap::new();
216
217 for (g, frames) in group_to_frames.iter_mut() {
218 frames.sort_unstable_by(|&a, &b| {
219 let ta = frame_time.get(&a).copied().unwrap_or(f64::NAN);
220 let tb = frame_time.get(&b).copied().unwrap_or(f64::NAN);
221 ta.partial_cmp(&tb).unwrap_or(std::cmp::Ordering::Equal)
222 });
223
224 if let Some(v) = group_to_isolation.get(g) {
225 if !v.is_empty() {
226 let lo = v.iter().map(|p| p.0).fold(f64::INFINITY, f64::min);
227 let hi = v.iter().map(|p| p.1).fold(f64::NEG_INFINITY, f64::max);
228 if lo.is_finite() && hi.is_finite() && hi > lo {
229 group_to_mz_union.insert(*g, (lo, hi));
230 }
231 }
232 }
233 let merged = merge_scan_ranges(group_to_scan_ranges.get(g).cloned().unwrap_or_default());
234 group_to_scan_unions.insert(*g, merged);
235 }
236
237 let mut group_to_slices: HashMap<u32, Arc<[ProgramSlice]>> = HashMap::new();
238
239 for (&g, iso_rows) in &group_to_isolation {
240 let scan_rows = group_to_scan_ranges.get(&g).cloned().unwrap_or_default();
241 let n = iso_rows.len().min(scan_rows.len());
242 let mut v = Vec::with_capacity(n);
243 for k in 0..n {
244 let (mz_lo, mz_hi) = iso_rows[k];
245 let (scan_lo, scan_hi) = scan_rows[k];
246 v.push(ProgramSlice { mz_lo, mz_hi, scan_lo, scan_hi });
247 }
248 group_to_slices.insert(g, v.into());
249 }
250
251 DiaIndex {
252 frame_to_group,
253 group_to_frames,
254 group_to_isolation,
255 group_to_scan_ranges,
256 group_to_mz_union,
257 group_to_scan_unions,
258 frame_time,
259 group_to_slices,
260 }
261 }
262
263 pub fn program_slices_for_group(&self, g: u32) -> Vec<ProgramSlice> {
264 self.slices_for_group(g).to_vec()
265 }
266
267 pub fn tiles_for_precursor_in_group(
278 &self,
279 g: u32,
280 prec_mz: f32,
281 im_apex: f32,
282 ) -> Vec<usize> {
283 let slices = self.slices_for_group(g);
284 let mut hits = Vec::new();
285
286 if !prec_mz.is_finite() || !im_apex.is_finite() {
287 return hits;
288 }
289
290 for (idx, s) in slices.iter().enumerate() {
291 if prec_mz < s.mz_lo as f32 || prec_mz > s.mz_hi as f32 {
293 continue;
294 }
295
296 let scan_lo = s.scan_lo as f32;
298 let scan_hi = s.scan_hi as f32;
299 if im_apex < scan_lo || im_apex > scan_hi {
300 continue;
301 }
302
303 hits.push(idx);
304 }
305
306 hits
307 }
308
309 pub fn tiles_for_fragment_in_group(
315 &self,
316 g: u32,
317 im_window: (usize, usize),
318 ) -> Vec<usize> {
319 let slices = self.program_slices_for_group(g);
320 let mut hits = Vec::new();
321
322 for (idx, s) in slices.iter().enumerate() {
323 let tile_scans = (s.scan_lo, s.scan_hi);
324
325 if ranges_overlap_u32(
326 (im_window.0 as u32, im_window.1 as u32),
327 tile_scans,
328 ) {
329 hits.push(idx);
330 }
331 }
332
333 hits
334 }
335
336 pub fn program_for_group(&self, g: u32) -> Ms2GroupProgram {
338 let mz_windows = self.group_to_isolation
339 .get(&g)
340 .map(|v| v.iter().map(|&(a,b)| (a as f32, b as f32)).collect())
341 .unwrap_or_else(Vec::new);
342
343 let scan_ranges = self.group_to_scan_ranges
344 .get(&g)
345 .cloned()
346 .unwrap_or_else(Vec::new);
347
348 let mz_union = self.group_to_mz_union
349 .get(&g)
350 .map(|&(a,b)| (a as f32, b as f32));
351
352 let scan_unions = self.group_to_scan_unions
353 .get(&g)
354 .cloned()
355 .unwrap_or_else(Vec::new);
356
357 Ms2GroupProgram { mz_windows, scan_ranges, mz_union, scan_unions }
358 }
359
360 pub fn groups_for_precursor(
361 &self,
362 prec_mz: f32,
363 im_apex: f32,
364 ) -> Vec<u32> {
365 if !prec_mz.is_finite() || !im_apex.is_finite() {
366 return Vec::new();
367 }
368
369 let mut out = Vec::new();
370 for (&g, &(mz_lo, mz_hi)) in &self.group_to_mz_union {
371 if prec_mz < mz_lo as f32 || prec_mz > mz_hi as f32 {
372 continue;
373 }
374
375 if let Some(unions) = self.group_to_scan_unions.get(&g) {
377
378 if !unions.iter().any(|&(lo, hi)| {
379 (im_apex as u32) >= lo && (im_apex as u32) <= hi
380 }) {
381 continue;
382 }
383 }
384
385 let tiles = self.tiles_for_precursor_in_group(g, prec_mz, im_apex);
386 if !tiles.is_empty() {
387 out.push(g);
388 }
389 }
390 out
391 }
392
393 pub fn tile_mz_bounds(&self, g: u32, tile_idx: usize) -> (f32, f32) {
399 let slices = self.slices_for_group(g);
400 if tile_idx >= slices.len() {
401 return (f32::NAN, f32::NAN);
402 }
403 let s = &slices[tile_idx];
404 (s.mz_lo as f32, s.mz_hi as f32)
405 }
406
407 #[inline]
408 pub fn mz_bounds_for_window_group_core(&self, g: u32) -> Option<(f32, f32)> {
409 self.group_to_mz_union.get(&g).map(|&(a,b)| (a as f32, b as f32))
410 }
411 #[inline]
412 pub fn scan_unions_for_window_group_core(&self, g: u32) -> Option<Vec<(usize, usize)>> {
413 self.group_to_scan_unions.get(&g).map(|v| v.iter().map(|&(l,r)| (l as usize, r as usize)).collect())
414 }
415
416 fn slices_for_group(&self, g: u32) -> &[ProgramSlice] {
417 self.group_to_slices.get(&g).map(|a| &a[..]).unwrap_or(&[])
418 }
419
420}
421
422pub struct TimsDatasetDIA {
423 pub loader: TimsDataLoader,
424 pub global_meta_data: GlobalMetaData,
425 pub meta_data: Vec<FrameMeta>,
426 pub dia_ms_ms_info: Vec<DiaMsMisInfo>,
427 pub dia_ms_ms_windows: Vec<DiaMsMsWindow>,
428 pub dia_index: DiaIndex,
429}
430
431impl TimsDatasetDIA {
432 pub fn new(
433 bruker_lib_path: &str,
434 data_path: &str,
435 in_memory: bool,
436 use_bruker_sdk: bool,
437 ) -> Self {
438 let global_meta_data = read_global_meta_sql(data_path).unwrap();
440 let meta_data = read_meta_data_sql(data_path).unwrap();
441 let dia_ms_mis_info = read_dia_ms_ms_info(data_path).unwrap();
442 let dia_ms_ms_windows = read_dia_ms_ms_windows(data_path).unwrap();
443
444 let scan_max_index = meta_data.iter().map(|x| x.num_scans).max().unwrap() as u32;
445 let im_lower = global_meta_data.one_over_k0_range_lower;
446 let im_upper = global_meta_data.one_over_k0_range_upper;
447
448 let tof_max_index = global_meta_data.tof_max_index;
449 let mz_lower = global_meta_data.mz_acquisition_range_lower;
450 let mz_upper = global_meta_data.mz_acquisition_range_upper;
451
452 let loader = match in_memory {
453 true => TimsDataLoader::new_in_memory(
454 bruker_lib_path,
455 data_path,
456 use_bruker_sdk,
457 scan_max_index,
458 im_lower,
459 im_upper,
460 tof_max_index,
461 mz_lower,
462 mz_upper,
463 ),
464 false => TimsDataLoader::new_lazy(
465 bruker_lib_path,
466 data_path,
467 use_bruker_sdk,
468 scan_max_index,
469 im_lower,
470 im_upper,
471 tof_max_index,
472 mz_lower,
473 mz_upper,
474 ),
475 };
476
477 let dia_index = DiaIndex::new(&meta_data, &dia_ms_mis_info, &dia_ms_ms_windows);
478
479 TimsDatasetDIA {
480 loader,
481 global_meta_data,
482 meta_data,
483 dia_ms_ms_info: dia_ms_mis_info,
484 dia_ms_ms_windows,
485 dia_index,
486 }
487 }
488
489 pub fn new_with_mz_calibration(
494 data_path: &str,
495 in_memory: bool,
496 tof_intercept: f64,
497 tof_slope: f64,
498 ) -> Self {
499 let meta_data = read_meta_data_sql(data_path).unwrap();
500 let global_meta_data = read_global_meta_sql(data_path).unwrap();
501 let dia_ms_mis_info = read_dia_ms_ms_info(data_path).unwrap();
502 let dia_ms_ms_windows = read_dia_ms_ms_windows(data_path).unwrap();
503
504 let scan_max_index = meta_data.iter().map(|x| x.num_scans).max().unwrap() as u32;
505 let im_lower = global_meta_data.one_over_k0_range_lower;
506 let im_upper = global_meta_data.one_over_k0_range_upper;
507
508 let loader = match in_memory {
509 true => TimsDataLoader::new_in_memory_with_mz_calibration(
510 data_path,
511 tof_intercept,
512 tof_slope,
513 im_lower,
514 im_upper,
515 scan_max_index,
516 ),
517 false => TimsDataLoader::new_lazy_with_mz_calibration(
518 data_path,
519 tof_intercept,
520 tof_slope,
521 im_lower,
522 im_upper,
523 scan_max_index,
524 ),
525 };
526
527 let dia_index = DiaIndex::new(&meta_data, &dia_ms_mis_info, &dia_ms_ms_windows);
528
529 TimsDatasetDIA {
530 loader,
531 global_meta_data,
532 meta_data,
533 dia_ms_ms_info: dia_ms_mis_info,
534 dia_ms_ms_windows,
535 dia_index,
536 }
537 }
538
539 pub fn program_for_group(&self, g: u32) -> Ms2GroupProgram {
540 self.dia_index.program_for_group(g)
541 }
542
543 pub fn program_slices_for_group(&self, group: u32) -> Vec<ProgramSlice> {
546 self.dia_ms_ms_windows
547 .iter()
548 .filter(|w| w.window_group == group)
549 .map(|w| {
550 let half = 0.5 * w.isolation_width;
551 ProgramSlice {
552 mz_lo: w.isolation_mz - half,
553 mz_hi: w.isolation_mz + half,
554 scan_lo: w.scan_num_begin,
555 scan_hi: w.scan_num_end,
556 }
557 })
558 .collect()
559 }
560
561 pub fn sample_precursor_signal(
562 &self,
563 num_frames: usize,
564 max_intensity: f64,
565 take_probability: f64,
566 ) -> TimsFrame {
567 let precursor_frames = self.meta_data.iter().filter(|x| x.ms_ms_type == 0);
569
570 let mut rng = rand::thread_rng();
572 let mut sampled_frames: Vec<TimsFrame> = Vec::new();
573
574 for frame in precursor_frames.choose_multiple(&mut rng, num_frames) {
576 let frame_id = frame.id;
577 let frame_data = self
578 .loader
579 .get_frame(frame_id as u32)
580 .filter_ranged(0.0, 2000.0, 0, 1000, 0.0, 5.0, 1.0, max_intensity, 0, i32::MAX)
581 .generate_random_sample(take_probability);
582 sampled_frames.push(frame_data);
583 }
584
585 let mut sampled_frame = sampled_frames.remove(0);
587
588 for frame in sampled_frames {
590 sampled_frame = sampled_frame + frame;
591 }
592
593 sampled_frame
594 }
595
596 pub fn sample_fragment_signal(
597 &self,
598 num_frames: usize,
599 window_group: u32,
600 max_intensity: f64,
601 take_probability: f64,
602 ) -> TimsFrame {
603 let fragment_frames: Vec<u32> = self
605 .dia_ms_ms_info
606 .iter()
607 .filter(|x| x.window_group == window_group)
608 .map(|x| x.frame_id)
609 .collect();
610
611 let mut rng = rand::thread_rng();
613 let mut sampled_frames: Vec<TimsFrame> = Vec::new();
614
615 for frame_id in fragment_frames
617 .into_iter()
618 .choose_multiple(&mut rng, num_frames)
619 {
620 let frame_data = self
621 .loader
622 .get_frame(frame_id)
623 .filter_ranged(0.0, 2000.0, 0, 1000, 0.0, 5.0, 1.0, max_intensity, 0, i32::MAX)
624 .generate_random_sample(take_probability);
625 sampled_frames.push(frame_data);
626 }
627
628 let mut sampled_frame = sampled_frames.remove(0);
630
631 for frame in sampled_frames {
633 sampled_frame = sampled_frame + frame;
634 }
635
636 sampled_frame
637 }
638
639 pub fn dia_window_groups(&self) -> Vec<u32> {
641 let mut gs: Vec<u32> = self
642 .dia_ms_ms_info
643 .iter()
644 .map(|x| x.window_group)
645 .collect();
646 gs.sort_unstable();
647 gs.dedup();
648 gs
649 }
650
651 pub fn window_groups_for_precursor(&self, prec_mz: f32, im_apex: f32) -> Vec<u32> {
652 self.dia_index.groups_for_precursor(prec_mz, im_apex)
653 }
654
655 fn frame_time_map(&self) -> FxHashMap<u32, (f32, i64)> {
657 let mut m = FxHashMap::default();
658 for fm in &self.meta_data {
659 m.insert(fm.id as u32, (fm.time as f32, fm.ms_ms_type));
660 }
661 m
662 }
663
664 pub fn fragment_frame_ids_and_times_for_group_core(&self, window_group: u32) -> (Vec<u32>, Vec<f32>) {
666 let time_map = self.frame_time_map();
667 let mut rows: Vec<(u32, f32)> = self
668 .dia_ms_ms_info
669 .iter()
670 .filter(|x| x.window_group == window_group)
671 .filter_map(|x| {
672 time_map.get(&(x.frame_id)).map(|(t, _ms2)| (x.frame_id, *t))
673 })
674 .collect();
675
676 rows.retain(|(fid, _)| time_map.get(fid).map(|(_, ty)| *ty != 0).unwrap_or(false));
678
679 rows.sort_by(|a,b| a.1.partial_cmp(&b.1).unwrap());
680 let (ids, times): (Vec<_>, Vec<_>) = rows.into_iter().unzip();
681 (ids, times)
682 }
683
684 pub fn scan_unions_for_window_group_core(&self, window_group: u32) -> Option<Vec<(usize, usize)>> {
687 let ranges: Vec<(usize, usize)> = self
688 .dia_ms_ms_windows
689 .iter()
690 .filter(|w| w.window_group == window_group)
691 .map(|w| {
692 let l = w.scan_num_begin as usize;
693 let r = w.scan_num_end as usize;
694 if l <= r { (l, r) } else { (r, l) }
695 })
696 .collect();
697 if ranges.is_empty() {
698 return None;
699 }
700 Some(merge_ranges(ranges))
701 }
702
703 pub fn mz_bounds_for_window_group_core(&self, window_group: u32) -> Option<(f32, f32)> {
705 let mut lo = f32::INFINITY;
706 let mut hi = f32::NEG_INFINITY;
707 let mut hit = false;
708 for w in &self.dia_ms_ms_windows {
709 if w.window_group == window_group {
710 let c = w.isolation_mz as f32;
711 let half = 0.5f32 * (w.isolation_width as f32);
712 lo = lo.min(c - half);
713 hi = hi.max(c + half);
714 hit = true;
715 }
716 }
717 if hit && hi > lo && lo.is_finite() && hi.is_finite() {
718 Some((lo, hi))
719 } else {
720 None
721 }
722 }
723
724 pub fn make_rt_frames_for_group(&self, window_group: u32, tof_step: i32) -> RtFrames {
727 assert!(tof_step > 0);
728
729 let (ids, times) = self.fragment_frame_ids_and_times_for_group_core(window_group);
730 assert!(
731 !ids.is_empty(),
732 "No MS2 frames for window_group {}",
733 window_group
734 );
735
736 let global_num_scans = self.meta_data
737 .iter()
738 .filter(|m| ids.binary_search(&(m.id as u32)).is_ok())
739 .map(|m| m.num_scans as usize)
740 .max()
741 .unwrap_or(0);
742
743 let scale = self.tof_scale_for_group(window_group, tof_step);
745
746 let frames: Vec<FrameBinView> = ids.par_iter()
747 .map(|&fid| build_frame_bin_view(self.get_frame(fid), &scale, global_num_scans))
748 .collect();
749
750 RtFrames {
751 frames,
752 frame_ids: ids,
753 rt_times: times,
754 scale: Arc::new(scale),
755 }
756 }
757
758 pub fn make_rt_frames_for_precursor(&self, tof_step: i32) -> RtFrames {
761 assert!(tof_step > 0);
762
763 let mut rows: Vec<(u32, f32, usize)> = self.meta_data
764 .iter()
765 .filter(|m| m.ms_ms_type == 0)
766 .map(|m| (m.id as u32, m.time as f32, m.num_scans as usize))
767 .collect();
768
769 assert!(!rows.is_empty(), "No precursor (MS1) frames found");
770 rows.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
771
772 let frame_ids: Vec<u32> = rows.iter().map(|r| r.0).collect();
773 let rt_times: Vec<f32> = rows.iter().map(|r| r.1).collect();
774 let global_num_scans = rows.iter().map(|r| r.2).max().unwrap_or(1);
775
776 let frames_for_scale: Vec<_> = frame_ids.iter().map(|&fid| self.get_frame(fid)).collect();
778 let scale = TofScale::build_from_tof(&frames_for_scale, tof_step)
779 .expect("make_rt_frames_for_precursor: failed to build TOF scale");
780
781 let frames: Vec<FrameBinView> = frame_ids.par_iter()
782 .map(|&fid| build_frame_bin_view(self.get_frame(fid), &scale, global_num_scans))
783 .collect();
784
785 RtFrames {
786 frames,
787 frame_ids,
788 rt_times,
789 scale: Arc::new(scale),
790 }
791 }
792
793 pub fn tof_scale_for_group(&self, window_group: u32, tof_step: i32) -> TofScale {
796 assert!(tof_step > 0);
797
798 let (ids, _times) = self.fragment_frame_ids_and_times_for_group_core(window_group);
799 assert!(
800 !ids.is_empty(),
801 "tof_scale_for_group: no MS2 frames for window_group {}",
802 window_group
803 );
804
805 let frames: Vec<_> = ids.iter().map(|&fid| self.get_frame(fid)).collect();
806
807 TofScale::build_from_tof(&frames, tof_step)
808 .expect("tof_scale_for_group: failed to build TOF scale (empty or degenerate)")
809 }
810
811 pub fn tof_scale_from_frames_scan(
813 &self,
814 frame_ids: &[u32],
815 tof_step: i32,
816 ) -> Option<TofScale> {
817 assert!(tof_step > 0);
818 let frames: Vec<_> = frame_ids.iter().map(|&fid| self.get_frame(fid)).collect();
819 TofScale::build_from_tof(&frames, tof_step)
820 }
821
822 fn clusters_for_im_peaks_on_rt_frames(
825 &self,
826 rt: RtFrames,
827 im_peaks: &[ImPeak1D],
828 rt_params: RtExpandParams,
829 build_opts: &BuildSpecOpts,
830 eval_opts: &Eval1DOpts,
831 require_rt_overlap: bool,
832 num_threads: usize,
833 ) -> Vec<ClusterResult1D> {
834 let rt_groups = expand_many_im_peaks_along_rt(
836 im_peaks,
837 &rt.frames,
838 rt.ctx(),
839 rt.scale.as_ref(),
840 rt_params,
841 );
842
843 let specs: Vec<ClusterSpec1D> = make_specs_from_im_and_rt_groups_threads(
845 im_peaks,
846 &rt_groups,
847 &rt,
848 build_opts,
849 require_rt_overlap,
850 num_threads,
851 );
852
853 self.evaluate_specs_1d_threads(&rt, &specs, eval_opts, num_threads)
855 }
856
857 pub fn evaluate_specs_1d_threads(
863 &self,
864 rt_frames: &RtFrames,
865 specs: &[ClusterSpec1D],
866 opts: &Eval1DOpts,
867 num_threads: usize,
868 ) -> Vec<ClusterResult1D> {
869 if specs.is_empty() {
870 return Vec::new();
871 }
872
873 let scale = &*rt_frames.scale;
874
875 let run = || {
876 let attach_ctx = if opts.attach.attach_points {
878 let frame_ids_local = rt_frames.frame_ids.clone();
879 let slice = self.get_slice(frame_ids_local.clone(), num_threads.max(1));
880 let scan_slices = slice
881 .frames
882 .iter()
883 .map(|fr| build_scan_slices(fr))
884 .collect::<Vec<_>>();
885 let rt_axis_sec = rt_frames.rt_times.clone();
886
887 Some(RawAttachContext {
888 slice,
889 scan_slices,
890 frame_ids_local,
891 rt_axis_sec,
892 })
893 } else {
894 None
895 };
896
897 specs
898 .par_iter()
899 .map(|spec| {
900 let mut res = evaluate_spec_1d(rt_frames, spec, opts);
902
903 decorate_with_mz_for_cluster(self, rt_frames, &mut res);(self, rt_frames, spec, scale, &mut res);
909
910 if let Some(ref ctx) = attach_ctx {
912 if opts.attach.attach_points && res.raw_sum > 0.0 && res.tof_fit.area > 0.0 {
913 let (bin_lo, bin_hi) = bin_range_for_win(scale, spec.tof_win);
914 let (im_lo, im_hi) = (spec.im_lo, spec.im_hi);
915 let (rt_lo, rt_hi) = (spec.rt_lo, spec.rt_hi);
916
917 let raw = attach_raw_points_for_spec_1d_in_ctx(
918 ctx,
919 scale,
920 bin_lo,
921 bin_hi,
922 im_lo,
923 im_hi,
924 rt_lo,
925 rt_hi,
926 opts.attach.max_points,
927 );
928 res.raw_points = Some(raw);
929 }
930 }
931
932 res
933 })
934 .collect::<Vec<_>>()
935 };
936
937 if num_threads == 0 {
938 run()
939 } else {
940 ThreadPoolBuilder::new()
941 .num_threads(num_threads)
942 .build()
943 .unwrap()
944 .install(run)
945 }
946 }
947
948 pub fn clusters_for_group(
949 &self,
950 window_group: u32,
951 tof_step: i32,
952 im_peaks: &[ImPeak1D],
953 rt_params: RtExpandParams,
954 build_opts: &BuildSpecOpts,
955 eval_opts: &Eval1DOpts,
956 require_rt_overlap: bool,
957 num_threads: usize,
958 ) -> Vec<ClusterResult1D> {
959 debug_assert!(
961 im_peaks
962 .iter()
963 .all(|p| p.window_group == Some(window_group)),
964 "clusters_for_group: some IM peaks have wrong or missing window_group"
965 );
966
967 let rt = self.make_rt_frames_for_group(window_group, tof_step);
969
970 let build_opts_ms2 = build_opts.with_ms_level(2);
972
973 self.clusters_for_im_peaks_on_rt_frames(
974 rt,
975 im_peaks,
976 rt_params,
977 &build_opts_ms2,
978 eval_opts,
979 require_rt_overlap,
980 num_threads,
981 )
982 }
983
984 pub fn clusters_for_precursor(
985 &self,
986 tof_step: i32,
987 im_peaks: &[ImPeak1D],
988 rt_params: RtExpandParams,
989 build_opts: &BuildSpecOpts,
990 eval_opts: &Eval1DOpts,
991 require_rt_overlap: bool,
992 num_threads: usize,
993 ) -> Vec<ClusterResult1D> {
994 debug_assert!(
996 im_peaks.iter().all(|p| p.window_group.is_none()),
997 "clusters_for_precursor: IM peaks unexpectedly carry a window_group"
998 );
999
1000 let rt = self.make_rt_frames_for_precursor(tof_step);
1002
1003 let build_opts_ms1 = build_opts.with_ms_level(1);
1005
1006 self.clusters_for_im_peaks_on_rt_frames(
1007 rt,
1008 im_peaks,
1009 rt_params,
1010 &build_opts_ms1,
1011 eval_opts,
1012 require_rt_overlap,
1013 num_threads,
1014 )
1015 }
1016
1017 pub fn build_pseudo_spectra_from_clusters_geom(
1021 &self,
1022 ms1: &[ClusterResult1D],
1023 ms2: &[ClusterResult1D],
1024 features: Option<&[SimpleFeature]>,
1025 cand_opts: &CandidateOpts,
1026 score_opts: &ScoreOpts,
1027 pseudo_opts: &PseudoSpecOpts,
1028 ) -> PseudoBuildResult {
1029 build_pseudo_spectra_end_to_end(
1030 self,
1031 ms1,
1032 ms2,
1033 features,
1034 cand_opts,
1035 score_opts,
1036 pseudo_opts,
1037 )
1038 }
1039
1040 pub fn build_pseudo_spectra_from_clusters_xic(
1044 &self,
1045 ms1: &[ClusterResult1D],
1046 ms2: &[ClusterResult1D],
1047 features: Option<&[SimpleFeature]>,
1048 cand_opts: &CandidateOpts,
1049 xic_opts: &XicScoreOpts,
1050 pseudo_opts: &PseudoSpecOpts,
1051 ) -> PseudoBuildResult {
1052 build_pseudo_spectra_end_to_end_xic(
1053 self,
1054 ms1,
1055 ms2,
1056 features,
1057 cand_opts,
1058 xic_opts,
1059 pseudo_opts,
1060 )
1061 }
1062
1063 pub fn build_pseudo_spectra_all_pairs_from_clusters(
1065 &self,
1066 ms1: &[ClusterResult1D],
1067 ms2: &[ClusterResult1D],
1068 features: Option<&[SimpleFeature]>,
1069 pseudo_opts: &PseudoSpecOpts,
1070 ) -> PseudoBuildResult {
1071 build_pseudo_spectra_all_pairs(self, ms1, ms2, features, pseudo_opts)
1072 }
1073
1074 pub fn clusters_for_group_opts(
1080 &self,
1081 window_group: u32,
1082 im_peaks: &[ImPeak1D],
1083 opts: &ClusterExtractionOpts,
1084 ) -> Vec<ClusterResult1D> {
1085 self.clusters_for_group(
1086 window_group,
1087 opts.tof_step,
1088 im_peaks,
1089 opts.rt_params.clone(),
1090 &opts.build_opts,
1091 &opts.eval_opts,
1092 opts.require_rt_overlap,
1093 opts.num_threads,
1094 )
1095 }
1096
1097 pub fn clusters_for_precursor_opts(
1099 &self,
1100 im_peaks: &[ImPeak1D],
1101 opts: &ClusterExtractionOpts,
1102 ) -> Vec<ClusterResult1D> {
1103 self.clusters_for_precursor(
1104 opts.tof_step,
1105 im_peaks,
1106 opts.rt_params.clone(),
1107 &opts.build_opts,
1108 &opts.eval_opts,
1109 opts.require_rt_overlap,
1110 opts.num_threads,
1111 )
1112 }
1113
1114 pub fn tof_rt_grid_precursor(&self, tof_step: i32) -> TofRtGrid {
1117 let rt = self.make_rt_frames_for_precursor(tof_step);
1118 build_tof_rt_grid_full(&rt, None)
1119 }
1120
1121 pub fn tof_rt_grid_for_group(&self, window_group: u32, tof_step: i32) -> TofRtGrid {
1124 let rt = self.make_rt_frames_for_group(window_group, tof_step);
1125 build_tof_rt_grid_full(&rt, Some(window_group))
1126 }
1127
1128 pub fn debug_extract_raw_for_clusters(
1141 &self,
1142 clusters: &[ClusterResult1D],
1143 window_group: Option<u32>,
1144 tof_step: i32,
1145 max_points: Option<usize>,
1146 num_threads: usize,
1147 ) -> Vec<RawPoints> {
1148 if clusters.is_empty() {
1149 return Vec::new();
1150 }
1151
1152 let ms_level = clusters[0].ms_level;
1154 debug_assert!(
1155 clusters.iter().all(|c| c.ms_level == ms_level),
1156 "debug_extract_raw_for_clusters: mixed ms_level in cluster list"
1157 );
1158
1159 let clusters: Vec<ClusterResult1D> = clusters
1161 .iter()
1162 .filter(|c| match window_group {
1163 Some(g) => c.window_group == Some(g),
1164 None => true,
1165 })
1166 .cloned()
1167 .collect();
1168
1169 if clusters.is_empty() {
1170 return Vec::new();
1171 }
1172
1173 let rt_frames = if ms_level == 1 {
1178 self.make_rt_frames_for_precursor(tof_step)
1180 } else {
1181 let g = match window_group {
1183 Some(g) => g,
1184 None => clusters[0]
1185 .window_group
1186 .expect("MS2 cluster without window_group; pass window_group explicitly"),
1187 };
1188 self.make_rt_frames_for_group(g, tof_step)
1189 };
1190
1191 clusters
1193 .iter()
1194 .map(|c| {
1195 let (rt_lo, rt_hi) = c.rt_window;
1196 let (im_lo, im_hi) = c.im_window;
1197 let (tof_lo, tof_hi) = c.tof_window;
1198
1199 attach_raw_points_for_spec_1d_threads(
1200 self,
1201 &rt_frames,
1202 tof_lo,
1203 tof_hi,
1204 im_lo,
1205 im_hi,
1206 rt_lo,
1207 rt_hi,
1208 max_points,
1209 num_threads,
1210 )
1211 })
1212 .collect()
1213 }
1214}
1215
1216impl TimsData for TimsDatasetDIA {
1217 fn get_frame(&self, frame_id: u32) -> TimsFrame {
1218 self.loader.get_frame(frame_id)
1219 }
1220
1221 fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
1222 self.loader.get_raw_frame(frame_id)
1223 }
1224
1225 fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
1226 self.loader.get_slice(frame_ids, num_threads)
1227 }
1228 fn get_acquisition_mode(&self) -> AcquisitionMode {
1229 self.loader.get_acquisition_mode().clone()
1230 }
1231
1232 fn get_frame_count(&self) -> i32 {
1233 self.loader.get_frame_count()
1234 }
1235
1236 fn get_data_path(&self) -> &str {
1237 &self.loader.get_data_path()
1238 }
1239}
1240
1241impl IndexConverter for TimsDatasetDIA {
1242 fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
1243 self.loader
1244 .get_index_converter()
1245 .tof_to_mz(frame_id, tof_values)
1246 }
1247
1248 fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
1249 self.loader
1250 .get_index_converter()
1251 .mz_to_tof(frame_id, mz_values)
1252 }
1253
1254 fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
1255 self.loader
1256 .get_index_converter()
1257 .scan_to_inverse_mobility(frame_id, scan_values)
1258 }
1259
1260 fn inverse_mobility_to_scan(
1261 &self,
1262 frame_id: u32,
1263 inverse_mobility_values: &Vec<f64>,
1264 ) -> Vec<u32> {
1265 self.loader
1266 .get_index_converter()
1267 .inverse_mobility_to_scan(frame_id, inverse_mobility_values)
1268 }
1269}
1270
1271pub fn attach_raw_points_for_spec_1d_threads(
1272 ds: &TimsDatasetDIA,
1273 rt_frames: &RtFrames,
1274 final_bin_lo: usize,
1275 final_bin_hi: usize,
1276 final_im_lo: usize,
1277 final_im_hi: usize,
1278 final_rt_lo: usize,
1279 final_rt_hi: usize,
1280 max_points: Option<usize>,
1281 num_threads: usize,
1282) -> RawPoints {
1283 let scale = &*rt_frames.scale;
1284
1285 let n_bins = scale.num_bins();
1287 if n_bins == 0 {
1288 return RawPoints::default();
1289 }
1290
1291 let mut bin_lo = final_bin_lo.min(n_bins.saturating_sub(1));
1292 let mut bin_hi = final_bin_hi.min(n_bins.saturating_sub(1));
1293 if bin_lo > bin_hi {
1294 std::mem::swap(&mut bin_lo, &mut bin_hi);
1295 }
1296
1297 let mut axis_lo = scale.edges[bin_lo];
1300 let hi_edge_idx = (bin_hi + 1).min(scale.edges.len().saturating_sub(1));
1301 let mut axis_hi = cushion_hi_edge(scale, scale.edges[hi_edge_idx]);
1302
1303 let frame_ids_local = rt_frames.frame_ids[final_rt_lo..=final_rt_hi].to_vec();
1305 let slice = ds.get_slice(frame_ids_local.clone(), num_threads.max(1));
1306 let scan_slices: Vec<Vec<ScanSlice>> =
1307 slice.frames.iter().map(|fr| build_scan_slices(fr)).collect();
1308
1309 let mut total = 0usize;
1311 for (fi, fr) in slice.frames.iter().enumerate() {
1312 let tofs = &fr.tof; for sl in &scan_slices[fi] {
1314 if sl.scan < final_im_lo || sl.scan > final_im_hi {
1315 continue;
1316 }
1317 let l = lower_bound_tof(tofs, sl.start, sl.end, axis_lo);
1318 let r = upper_bound_tof(tofs, sl.start, sl.end, axis_hi);
1319 total += r.saturating_sub(l);
1320 }
1321 }
1322
1323 if total == 0 {
1325 let lo_idx = bin_lo.saturating_sub(1);
1326 let hi_edge_idx_wide =
1327 (bin_hi + 2).min(n_bins).min(scale.edges.len().saturating_sub(1));
1328
1329 let try_lo = scale.edges[lo_idx];
1330 let try_hi = cushion_hi_edge(scale, scale.edges[hi_edge_idx_wide]);
1331
1332 let mut total2 = 0usize;
1333 for (fi, fr) in slice.frames.iter().enumerate() {
1334 let tofs = &fr.tof;
1335 for sl in &scan_slices[fi] {
1336 if sl.scan < final_im_lo || sl.scan > final_im_hi {
1337 continue;
1338 }
1339 let l = lower_bound_tof(tofs, sl.start, sl.end, try_lo);
1340 let r = upper_bound_tof(tofs, sl.start, sl.end, try_hi);
1341 total2 += r.saturating_sub(l);
1342 }
1343 }
1344
1345 if total2 > 0 {
1346 total = total2;
1347 axis_lo = try_lo;
1348 axis_hi = try_hi;
1349 }
1350 }
1351
1352 if total == 0 {
1354 return RawPoints::default();
1355 }
1356
1357 let stride = max_points.map(|cap| thin_stride(total, cap)).unwrap_or(1);
1358 let reserve = total / stride + 8;
1359
1360 let mut pts = RawPoints {
1361 mz: Vec::with_capacity(reserve),
1362 rt: Vec::with_capacity(reserve),
1363 im: Vec::with_capacity(reserve),
1364 scan: Vec::with_capacity(reserve),
1365 intensity: Vec::with_capacity(reserve),
1366 tof: Vec::with_capacity(reserve),
1367 frame: Vec::with_capacity(reserve),
1368 };
1369
1370 let rt_axis_sec = rt_frames.rt_times[final_rt_lo..=final_rt_hi].to_vec();
1371
1372 let mut seen = 0usize;
1374 for (fi, fr) in slice.frames.iter().enumerate() {
1375 let mz = &fr.ims_frame.mz;
1376 let it = &fr.ims_frame.intensity;
1377 let ims = &fr.ims_frame.mobility;
1378 let tofs = &fr.tof;
1379
1380 let len_all = mz.len().min(it.len()).min(ims.len()).min(tofs.len());
1381 let rt_val = rt_axis_sec[fi];
1382 let frame_id = frame_ids_local[fi];
1383
1384 for sl in &scan_slices[fi] {
1385 let s_abs = sl.scan;
1386 if s_abs < final_im_lo || s_abs > final_im_hi {
1387 continue;
1388 }
1389
1390 let mut l = lower_bound_tof(tofs, sl.start, sl.end, axis_lo);
1391 let mut r = upper_bound_tof(tofs, sl.start, sl.end, axis_hi);
1392 if r > len_all {
1393 r = len_all;
1394 }
1395 if l >= r {
1396 continue;
1397 }
1398
1399 while l < r {
1400 if stride == 1 || (seen % stride == 0) {
1401 pts.mz.push(mz[l] as f32);
1403 pts.rt.push(rt_val);
1404 pts.im.push(ims[l] as f32);
1405 pts.scan.push(s_abs as u32);
1406 pts.intensity.push(it[l] as f32);
1407 pts.frame.push(frame_id);
1408 pts.tof.push(tofs[l]);
1409 }
1410 seen += 1;
1411 l += 1;
1412 }
1413 }
1414 }
1415
1416 pts
1417}
1418
1419#[inline]
1421fn lower_bound_tof(tofs: &[i32], start: usize, end: usize, x: f32) -> usize {
1422 let mut lo = start;
1423 let mut hi = end;
1424 let xf = x as f64;
1425 while lo < hi {
1426 let mid = (lo + hi) >> 1;
1427 if (tofs[mid] as f64) < xf {
1428 lo = mid + 1;
1429 } else {
1430 hi = mid;
1431 }
1432 }
1433 lo
1434}
1435
1436#[inline]
1438fn upper_bound_tof(tofs: &[i32], start: usize, end: usize, x: f32) -> usize {
1439 let mut lo = start;
1440 let mut hi = end;
1441 let xf = x as f64;
1442 while lo < hi {
1443 let mid = (lo + hi) >> 1;
1444 if (tofs[mid] as f64) <= xf {
1445 lo = mid + 1;
1446 } else {
1447 hi = mid;
1448 }
1449 }
1450 lo
1451}
1452
1453#[inline]
1465fn cushion_hi_edge(scale: &TofScale, hi_edge: f32) -> f32 {
1466 let edges = &scale.edges;
1467 if edges.len() >= 2 {
1468 let bw = (edges[1] - edges[0]).abs().max(1e-6);
1470 hi_edge + 0.01 * bw
1471 } else {
1472 hi_edge
1473 }
1474}
1475
1476#[inline]
1477fn thin_stride(total: usize, cap: usize) -> usize {
1478 if cap == 0 || total <= cap {
1479 1
1480 } else {
1481 (total + cap - 1) / cap
1482 }
1483}