1use std::collections::HashMap;
2use std::io;
3use std::path::Path;
4use std::sync::Arc;
5use rayon::prelude::*;
6use crate::cluster::cluster::ClusterResult1D;
7use crate::cluster::feature::SimpleFeature;
8use crate::cluster::io::load_parquet;
9use crate::cluster::peak::ThresholdMode;
10use crate::cluster::pseudo::{PseudoSpecOpts, PseudoSpectrum, build_pseudo_spectra_from_pairs, cluster_mz_mu, PseudoFragment};
11use crate::cluster::scoring::{assign_ms2_to_best_ms1_by_xic, jaccard_time, ms1_to_ms2_map, query_precursor_scored, query_precursors_scored_par, MatchScoreMode, PrecursorLike, PrecursorSearchIndex, ScoredHit, XicScoreOpts};
12use crate::cluster::utility::robust_noise_mad;
13use crate::data::dia::{DiaIndex, TimsDatasetDIA};
14pub enum AssignmentMode<'a> {
19 Geometric(&'a ScoreOpts),
20 Xic(&'a XicScoreOpts),
21}
22
23pub fn best_ms1_for_each_ms2_geom(
25 ms1: &[ClusterResult1D],
26 ms2: &[ClusterResult1D],
27 pairs: &[(usize, usize)],
28 opts: &ScoreOpts,
29) -> Vec<Option<usize>> {
30 crate::cluster::scoring::best_ms1_for_each_ms2(ms1, ms2, pairs, opts)
31}
32
33pub fn best_ms1_for_each_ms2_xic(
36 ms1: &[ClusterResult1D],
37 ms2: &[ClusterResult1D],
38 pairs: &[(usize, usize)],
39 opts: &XicScoreOpts,
40) -> Vec<Option<usize>> {
41 let triples = assign_ms2_to_best_ms1_by_xic(ms1, ms2, pairs, opts);
42 let mut best: Vec<Option<usize>> = vec![None; ms2.len()];
43
44 for (ms2_idx, ms1_idx, _s) in triples {
45 if ms2_idx < best.len() {
46 best[ms2_idx] = Some(ms1_idx);
47 }
48 }
49 best
50}
51
52pub fn best_ms1_for_each_ms2_any(
54 ms1: &[ClusterResult1D],
55 ms2: &[ClusterResult1D],
56 pairs: &[(usize, usize)],
57 mode: AssignmentMode<'_>,
58) -> Vec<Option<usize>> {
59 match mode {
60 AssignmentMode::Geometric(opts) => {
61 best_ms1_for_each_ms2_geom(ms1, ms2, pairs, opts)
62 }
63 AssignmentMode::Xic(opts) => {
64 best_ms1_for_each_ms2_xic(ms1, ms2, pairs, opts)
65 }
66 }
67}
68
69#[derive(Clone, Debug)]
76pub struct CandidateOpts {
77 pub min_rt_jaccard: f32,
79 pub ms2_rt_guard_sec: f64,
81 pub rt_bucket_width: f64,
83 pub max_ms1_rt_span_sec: Option<f64>,
85 pub max_ms2_rt_span_sec: Option<f64>,
86 pub min_raw_sum: ThresholdMode,
90
91 pub max_rt_apex_delta_sec: Option<f32>,
94 pub max_scan_apex_delta: Option<usize>,
96 pub min_im_overlap_scans: usize,
98
99 pub reject_frag_inside_precursor_tile: bool,
104}
105
106impl Default for CandidateOpts {
107 fn default() -> Self {
108 Self {
109 min_rt_jaccard: 0.0,
110 ms2_rt_guard_sec: 0.0,
111 rt_bucket_width: 1.0,
112 max_ms1_rt_span_sec: Some(60.0),
113 max_ms2_rt_span_sec: Some(60.0),
114 min_raw_sum: ThresholdMode::AdaptiveNoise(3.0),
115
116 max_rt_apex_delta_sec: Some(2.0),
117 max_scan_apex_delta: Some(6),
118 min_im_overlap_scans: 1,
119
120 reject_frag_inside_precursor_tile: false,
121 }
122 }
123}
124
125impl CandidateOpts {
126 pub fn with_fixed_raw_sum(mut self, val: f32) -> Self {
128 self.min_raw_sum = ThresholdMode::Fixed(val);
129 self
130 }
131
132 pub fn with_adaptive_raw_sum(mut self, sigma_multiplier: f32) -> Self {
134 self.min_raw_sum = ThresholdMode::AdaptiveNoise(sigma_multiplier);
135 self
136 }
137
138 pub fn legacy_defaults() -> Self {
140 Self {
141 min_raw_sum: ThresholdMode::Fixed(1.0),
142 ..Default::default()
143 }
144 }
145}
146
147#[derive(Clone, Copy, Debug)]
153pub struct PairFeatures {
154 pub jacc_rt: f32, pub rt_apex_delta_s: f32, pub im_apex_delta_scans: f32,pub im_overlap_scans: u32, pub im_union_scans: u32, pub ms1_raw_sum: f32, pub shape_ok: bool, pub z_rt: f32, pub z_im: f32, pub s_shape: f32, }
165
166#[derive(Clone, Debug)]
169pub struct ScoreOpts {
170 pub w_jacc_rt: f32,
172 pub w_shape: f32,
174 pub w_rt_apex: f32,
176 pub w_im_apex: f32,
178 pub w_im_overlap: f32,
180 pub w_ms1_intensity: f32,
182
183 pub rt_apex_scale_s: f32,
185 pub im_apex_scale_scans: f32,
186
187 pub shape_neutral: f32,
189
190 pub min_sigma_rt: f32,
192 pub min_sigma_im: f32,
193
194 pub w_shape_rt_inner: f32,
196 pub w_shape_im_inner: f32,
197}
198
199impl Default for ScoreOpts {
200 fn default() -> Self {
201 Self {
202 w_jacc_rt: 1.0,
203 w_shape: 1.0,
204 w_rt_apex: 0.75,
205 w_im_apex: 0.75,
206 w_im_overlap: 0.5,
207 w_ms1_intensity: 0.25,
208 rt_apex_scale_s: 0.75, im_apex_scale_scans: 3.0, shape_neutral: 0.6, min_sigma_rt: 0.05,
212 min_sigma_im: 0.5,
213 w_shape_rt_inner: 1.0,
214 w_shape_im_inner: 1.0,
215 }
216 }
217}
218
219#[inline]
220pub(crate) fn build_features(ms1: &ClusterResult1D, ms2: &ClusterResult1D, opts: &ScoreOpts) -> PairFeatures {
221 let (rt1_lo, rt1_hi) = (
223 ms1.rt_fit.mu as f64 - (ms1.rt_fit.sigma as f64) * 3.0,
224 ms1.rt_fit.mu as f64 + (ms1.rt_fit.sigma as f64) * 3.0,
225 );
226 let (rt2_lo, rt2_hi) = (
227 ms2.rt_fit.mu as f64 - (ms2.rt_fit.sigma as f64) * 3.0,
228 ms2.rt_fit.mu as f64 + (ms2.rt_fit.sigma as f64) * 3.0,
229 );
230 let jacc_rt = jaccard_time(rt1_lo, rt1_hi, rt2_lo, rt2_hi).clamp(0.0, 1.0);
231
232 let rt_apex_delta_s = (ms1.rt_fit.mu - ms2.rt_fit.mu).abs();
234 let im_apex_delta_scans = (ms1.im_fit.mu - ms2.im_fit.mu).abs();
235
236 let (im_inter, im_union) = im_overlap_and_union(ms1.im_window, ms2.im_window);
238
239 let s1_rt = ms1.rt_fit.sigma.max(opts.min_sigma_rt);
241 let s2_rt = ms2.rt_fit.sigma.max(opts.min_sigma_rt);
242 let s1_im = ms1.im_fit.sigma.max(opts.min_sigma_im);
243 let s2_im = ms2.im_fit.sigma.max(opts.min_sigma_im);
244
245 let (mut shape_ok, mut z_rt, mut z_im, mut s_shape) = (false, 0.0, 0.0, 0.0);
246 if let (Some(sig_rt), Some(sig_im)) = (pooled_sigma(s1_rt, s2_rt), pooled_sigma(s1_im, s2_im)) {
247 if sig_rt.is_finite() && sig_rt > 0.0 && sig_im.is_finite() && sig_im > 0.0 {
248 z_rt = rt_apex_delta_s / sig_rt;
249 z_im = im_apex_delta_scans / sig_im;
250 let q = -0.5_f32 * (opts.w_shape_rt_inner * z_rt * z_rt
251 + opts.w_shape_im_inner * z_im * z_im);
252 s_shape = q.exp(); shape_ok = s_shape.is_finite();
254 }
255 }
256
257 PairFeatures {
258 jacc_rt,
259 rt_apex_delta_s,
260 im_apex_delta_scans,
261 im_overlap_scans: im_inter,
262 im_union_scans: im_union,
263 ms1_raw_sum: ms1.raw_sum,
264 shape_ok,
265 z_rt,
266 z_im,
267 s_shape,
268 }
269}
270
271#[inline]
272fn im_overlap_and_union(a: (usize, usize), b: (usize, usize)) -> (u32, u32) {
273 let lo = a.0.max(b.0);
274 let hi = a.1.min(b.1);
275 let inter = if hi >= lo { (hi - lo + 1) as u32 } else { 0 };
276 let a_len = if a.1 >= a.0 { (a.1 - a.0 + 1) as u32 } else { 0 };
277 let b_len = if b.1 >= b.0 { (b.1 - b.0 + 1) as u32 } else { 0 };
278 let union = a_len + b_len - inter;
279 (inter, union.max(1))
280}
281
282#[inline]
283fn pooled_sigma(s1: f32, s2: f32) -> Option<f32> {
284 let v = s1 * s1 + s2 * s2;
285 if v.is_finite() && v > 0.0 { Some(v.sqrt()) } else { None }
286}
287
288#[inline]
289pub(crate) fn exp_decay(delta: f32, scale: f32) -> f32 {
290 if !delta.is_finite() || !scale.is_finite() || scale <= 0.0 { return 0.0; }
292 (-delta / scale).exp()
293}
294
295#[inline]
296pub(crate) fn safe_log1p(x: f32) -> f32 {
297 if x.is_finite() && x >= 0.0 { (1.0 + x as f64).ln() as f32 } else { 0.0 }
298}
299
300#[derive(Clone, Debug)]
309pub struct AssignmentResult {
310 pub pairs: Vec<(usize, usize)>,
312 pub ms2_best_ms1: Vec<Option<usize>>,
317 pub ms1_to_ms2: Vec<Vec<usize>>,
322}
323
324#[derive(Clone, Debug)]
326pub struct PseudoBuildResult {
327 pub assignment: AssignmentResult,
329 pub pseudo_spectra: Vec<PseudoSpectrum>,
331}
332
333pub fn build_pseudo_spectra_all_pairs(
346 ds: &TimsDatasetDIA,
347 ms1: &[ClusterResult1D],
348 ms2: &[ClusterResult1D],
349 features: Option<&[SimpleFeature]>,
350 pseudo_opts: &PseudoSpecOpts,
351) -> PseudoBuildResult {
352 let cand_opts = CandidateOpts {
354 min_rt_jaccard: 0.0,
355 ms2_rt_guard_sec: 0.0,
356 rt_bucket_width: 1.0,
357 max_ms1_rt_span_sec: None,
358 max_ms2_rt_span_sec: None,
359 min_raw_sum: ThresholdMode::Fixed(1.0), max_rt_apex_delta_sec: None,
362 max_scan_apex_delta: None,
363 min_im_overlap_scans: 1,
364 reject_frag_inside_precursor_tile: true,
365 };
366
367 let idx = PrecursorSearchIndex::build(ds, ms1, &cand_opts);
369 let pairs = idx.enumerate_pairs(ms1, ms2, &cand_opts); if pairs.is_empty() {
373 return PseudoBuildResult {
374 assignment: AssignmentResult {
375 pairs,
376 ms2_best_ms1: vec![None; ms2.len()],
377 ms1_to_ms2: vec![Vec::new(); ms1.len()],
378 },
379 pseudo_spectra: Vec::new(),
380 };
381 }
382
383 let empty_feats: &[SimpleFeature] = &[];
385 let feat_slice = features.unwrap_or(empty_feats);
386
387 let pseudo_spectra = build_pseudo_spectra_from_pairs(
388 ms1,
389 ms2,
390 feat_slice,
391 &pairs,
392 pseudo_opts,
393 );
394
395 let mut ms1_to_ms2: Vec<Vec<usize>> = vec![Vec::new(); ms1.len()];
400 for (ms2_idx, ms1_idx) in &pairs {
401 if *ms1_idx < ms1_to_ms2.len() {
402 ms1_to_ms2[*ms1_idx].push(*ms2_idx);
403 }
404 }
405
406 let ms2_best_ms1 = vec![None; ms2.len()];
407
408 let assignment = AssignmentResult {
409 pairs,
410 ms2_best_ms1,
411 ms1_to_ms2,
412 };
413
414 PseudoBuildResult {
415 assignment,
416 pseudo_spectra,
417 }
418}
419
420pub fn build_pseudo_spectra_end_to_end(
429 ds: &TimsDatasetDIA,
430 ms1: &[ClusterResult1D],
431 ms2: &[ClusterResult1D],
432 features: Option<&[SimpleFeature]>,
433 cand_opts: &CandidateOpts,
434 score_opts: &ScoreOpts,
435 pseudo_opts: &PseudoSpecOpts,
436) -> PseudoBuildResult {
437 let idx = PrecursorSearchIndex::build(ds, ms1, cand_opts);
439 let pairs = idx.enumerate_pairs(ms1, ms2, cand_opts); if pairs.is_empty() {
443 return PseudoBuildResult {
444 assignment: AssignmentResult {
445 pairs,
446 ms2_best_ms1: vec![None; ms2.len()],
447 ms1_to_ms2: vec![Vec::new(); ms1.len()],
448 },
449 pseudo_spectra: Vec::new(),
450 };
451 }
452
453 let ms2_best_ms1 = best_ms1_for_each_ms2_geom(
455 ms1,
456 ms2,
457 &pairs,
458 score_opts,
459 );
460 let ms1_to_ms2 = ms1_to_ms2_map(
461 ms1.len(),
462 &ms2_best_ms1,
463 );
464
465 let assignment = AssignmentResult {
466 pairs,
467 ms2_best_ms1: ms2_best_ms1.clone(),
468 ms1_to_ms2: ms1_to_ms2.clone(),
469 };
470
471 let mut winning_pairs: Vec<(usize, usize)> = Vec::new();
473 for (ms1_idx, js) in ms1_to_ms2.iter().enumerate() {
474 for &ms2_idx in js {
475 winning_pairs.push((ms2_idx, ms1_idx));
476 }
477 }
478
479 debug_assert!({
481 use std::collections::HashSet;
482 let mut seen = HashSet::new();
483 for (ms2_idx, _) in &winning_pairs {
484 if !seen.insert(ms2_idx) {
485 panic!("Duplicated ms2");
487 }
488 }
489 true
490 });
491
492 let empty_feats: &[SimpleFeature] = &[];
495 let feat_slice = features.unwrap_or(empty_feats);
496
497 let pseudo_spectra = build_pseudo_spectra_from_pairs(
498 ms1,
499 ms2,
500 feat_slice,
501 &winning_pairs,
502 pseudo_opts,
503 );
504
505 PseudoBuildResult {
506 assignment,
507 pseudo_spectra,
508 }
509}
510
511pub fn build_pseudo_spectra_end_to_end_xic(
520 ds: &TimsDatasetDIA,
521 ms1: &[ClusterResult1D],
522 ms2: &[ClusterResult1D],
523 features: Option<&[SimpleFeature]>,
524 cand_opts: &CandidateOpts,
525 xic_opts: &XicScoreOpts,
526 pseudo_opts: &PseudoSpecOpts,
527) -> PseudoBuildResult {
528 let idx = PrecursorSearchIndex::build(ds, ms1, cand_opts);
530 let pairs = idx.enumerate_pairs(ms1, ms2, cand_opts); if pairs.is_empty() {
534 return PseudoBuildResult {
535 assignment: AssignmentResult {
536 pairs,
537 ms2_best_ms1: vec![None; ms2.len()],
538 ms1_to_ms2: vec![Vec::new(); ms1.len()],
539 },
540 pseudo_spectra: Vec::new(),
541 };
542 }
543
544 let ms2_best_ms1 = best_ms1_for_each_ms2_xic(
546 ms1,
547 ms2,
548 &pairs,
549 xic_opts,
550 );
551 let ms1_to_ms2 = ms1_to_ms2_map(
552 ms1.len(),
553 &ms2_best_ms1,
554 );
555
556 let assignment = AssignmentResult {
557 pairs,
558 ms2_best_ms1: ms2_best_ms1.clone(),
559 ms1_to_ms2: ms1_to_ms2.clone(),
560 };
561
562 let mut winning_pairs: Vec<(usize, usize)> = Vec::new();
564 for (ms1_idx, js) in ms1_to_ms2.iter().enumerate() {
565 for &ms2_idx in js {
566 winning_pairs.push((ms2_idx, ms1_idx));
567 }
568 }
569
570 debug_assert!({
572 use std::collections::HashSet;
573 let mut seen = HashSet::new();
574 for (ms2_idx, _) in &winning_pairs {
575 if !seen.insert(ms2_idx) {
576 panic!("Duplicated ms2");
578 }
579 }
580 true
581 });
582
583 let empty_feats: &[SimpleFeature] = &[];
586 let feat_slice = features.unwrap_or(empty_feats);
587
588 let pseudo_spectra = build_pseudo_spectra_from_pairs(
589 ms1,
590 ms2,
591 feat_slice,
592 &winning_pairs,
593 pseudo_opts,
594 );
595
596 PseudoBuildResult {
597 assignment,
598 pseudo_spectra,
599 }
600}
601
602#[derive(Clone, Debug, Default)]
608pub struct FragmentMetadata {
609 pub im_mu: Vec<f32>,
611 pub im_windows: Vec<(usize, usize)>,
613 pub keep: Vec<bool>,
615 pub cluster_ids: Vec<u64>,
617 pub mz_mu: Vec<f32>,
619}
620
621impl FragmentMetadata {
622 #[inline]
624 pub fn len(&self) -> usize {
625 self.cluster_ids.len()
626 }
627
628 #[inline]
630 pub fn is_empty(&self) -> bool {
631 self.cluster_ids.is_empty()
632 }
633
634 #[inline]
636 pub fn count_valid(&self) -> usize {
637 self.keep.iter().filter(|&&k| k).count()
638 }
639
640 #[inline]
642 pub fn get(&self, idx: usize) -> Option<FragmentMetadataEntry> {
643 if idx >= self.len() {
644 return None;
645 }
646 Some(FragmentMetadataEntry {
647 im_mu: self.im_mu[idx],
648 im_window: self.im_windows[idx],
649 keep: self.keep[idx],
650 cluster_id: self.cluster_ids[idx],
651 mz_mu: self.mz_mu[idx],
652 })
653 }
654}
655
656#[derive(Clone, Copy, Debug)]
658pub struct FragmentMetadataEntry {
659 pub im_mu: f32,
660 pub im_window: (usize, usize),
661 pub keep: bool,
662 pub cluster_id: u64,
663 pub mz_mu: f32,
664}
665
666#[derive(Clone, Debug)]
668pub struct FragmentStorage {
669 pub clusters: Arc<[ClusterResult1D]>,
671 pub id_to_idx: HashMap<u64, usize>,
673}
674
675impl FragmentStorage {
676 pub fn from_vec(clusters: Vec<ClusterResult1D>) -> Self {
678 let id_to_idx: HashMap<u64, usize> = clusters
679 .iter()
680 .enumerate()
681 .map(|(i, c)| (c.cluster_id, i))
682 .collect();
683 Self {
684 clusters: clusters.into(),
685 id_to_idx,
686 }
687 }
688
689 #[inline]
691 pub fn get(&self, idx: usize) -> Option<&ClusterResult1D> {
692 self.clusters.get(idx)
693 }
694
695 #[inline]
697 pub fn get_by_id(&self, id: u64) -> Option<&ClusterResult1D> {
698 self.id_to_idx.get(&id).and_then(|&i| self.clusters.get(i))
699 }
700
701 #[inline]
703 pub fn index_of(&self, id: u64) -> Option<usize> {
704 self.id_to_idx.get(&id).copied()
705 }
706
707 #[inline]
709 pub fn len(&self) -> usize {
710 self.clusters.len()
711 }
712
713 #[inline]
715 pub fn is_empty(&self) -> bool {
716 self.clusters.is_empty()
717 }
718
719 #[inline]
721 pub fn iter(&self) -> impl Iterator<Item = &ClusterResult1D> {
722 self.clusters.iter()
723 }
724}
725
726#[derive(Clone, Debug)]
727pub struct FragmentGroupIndex {
728 pub ms2_indices: Vec<usize>,
730 pub rt_apex: Vec<f32>,
732}
733
734#[derive(Clone, Debug)]
736pub struct FragmentQueryOpts {
737 pub max_rt_apex_delta_sec: Option<f32>,
739 pub max_scan_apex_delta: Option<usize>,
741 pub min_im_overlap_scans: usize,
743 pub require_tile_compat: bool,
745 pub reject_frag_inside_precursor_tile: bool,
747}
748
749impl Default for FragmentQueryOpts {
750 fn default() -> Self {
751 Self {
752 max_rt_apex_delta_sec: Some(2.0),
753 max_scan_apex_delta: Some(6),
754 min_im_overlap_scans: 1,
755 require_tile_compat: true,
756 reject_frag_inside_precursor_tile: false,
757 }
758 }
759}
760
761#[derive(Clone, Debug)]
762pub struct FragmentIndex {
763 dia_index: Arc<DiaIndex>,
764
765 ms2_storage: Arc<[ClusterResult1D]>,
768
769 ms2_im_mu: Vec<f32>,
771 ms2_im_windows: Vec<(usize, usize)>,
772 ms2_keep: Vec<bool>,
773 ms2_cluster_ids: Vec<u64>,
775
776 ms2_mz_mu: Vec<f32>,
778
779 by_group: HashMap<u32, FragmentGroupIndex>,
781
782 id_to_idx: HashMap<u64, usize>,
784}
785
786impl FragmentIndex {
787 #[inline]
788 pub fn ms2_slice(&self) -> &[ClusterResult1D] {
789 &self.ms2_storage
790 }
791
792 #[inline]
793 pub fn get_ms2_by_index(&self, idx: usize) -> Option<&ClusterResult1D> {
794 self.ms2_storage.get(idx)
795 }
796
797 #[inline]
798 pub fn get_ms2_by_cluster_id(&self, cid: u64) -> Option<&ClusterResult1D> {
799 self.id_to_idx.get(&cid).and_then(|&i| self.ms2_storage.get(i))
800 }
801
802 #[inline]
806 pub fn metadata(&self) -> FragmentMetadata {
807 FragmentMetadata {
808 im_mu: self.ms2_im_mu.clone(),
809 im_windows: self.ms2_im_windows.clone(),
810 keep: self.ms2_keep.clone(),
811 cluster_ids: self.ms2_cluster_ids.clone(),
812 mz_mu: self.ms2_mz_mu.clone(),
813 }
814 }
815
816 #[inline]
818 pub fn storage(&self) -> FragmentStorage {
819 FragmentStorage {
820 clusters: Arc::clone(&self.ms2_storage),
821 id_to_idx: self.id_to_idx.clone(),
822 }
823 }
824
825 #[inline]
827 pub fn dia_index(&self) -> &Arc<DiaIndex> {
828 &self.dia_index
829 }
830
831 #[inline]
833 pub fn group_index(&self, group: u32) -> Option<&FragmentGroupIndex> {
834 self.by_group.get(&group)
835 }
836
837 #[inline]
839 pub fn groups(&self) -> Vec<u32> {
840 self.by_group.keys().copied().collect()
841 }
842
843 #[inline]
845 pub fn count_valid(&self) -> usize {
846 self.ms2_keep.iter().filter(|&&k| k).count()
847 }
848
849 #[inline]
851 pub fn len(&self) -> usize {
852 self.ms2_storage.len()
853 }
854
855 #[inline]
857 pub fn is_empty(&self) -> bool {
858 self.ms2_storage.is_empty()
859 }
860
861 fn build_with_storage(
864 dia_index: Arc<DiaIndex>,
865 ms2_storage: Arc<[ClusterResult1D]>,
866 opts: &CandidateOpts,
867 ) -> Self {
868 let ms2: &[ClusterResult1D] = &ms2_storage;
869 let frame_time = &dia_index.frame_time;
870
871 let ms2_time_bounds: Vec<(f64, f64)> = ms2
880 .par_iter()
881 .map(|c| {
882 let mut t_lo = f64::INFINITY;
883 let mut t_hi = f64::NEG_INFINITY;
884
885 if !c.frame_ids_used.is_empty() {
887 for &fid in &c.frame_ids_used {
888 if let Some(&t) = frame_time.get(&fid) {
889 if t < t_lo { t_lo = t; }
890 if t > t_hi { t_hi = t; }
891 }
892 }
893 }
894
895 if !t_lo.is_finite() || !t_hi.is_finite() {
897 let (rt_lo, rt_hi) = c.rt_window;
898 if rt_hi >= rt_lo {
899 for fid in rt_lo as u32..=rt_hi as u32 {
900 if let Some(&t) = frame_time.get(&fid) {
901 if t < t_lo { t_lo = t; }
902 if t > t_hi { t_hi = t; }
903 }
904 }
905 }
906 }
907
908 (t_lo, t_hi)
909 })
910 .collect();
911
912 let raw_sums: Vec<f32> = ms2
915 .iter()
916 .filter(|c| c.ms_level == 2)
917 .map(|c| c.raw_sum)
918 .collect();
919 let raw_sum_noise = robust_noise_mad(&raw_sums);
920 let effective_min_raw_sum = opts.min_raw_sum.effective(raw_sum_noise);
921
922 let ms2_keep: Vec<bool> = ms2
924 .par_iter()
925 .enumerate()
926 .map(|(i, c)| {
927 if c.ms_level != 2 {
928 return false;
929 }
930 if c.window_group.is_none() {
931 return false;
932 }
933 if c.raw_sum < effective_min_raw_sum {
934 return false;
935 }
936
937 let (mut t0, mut t1) = ms2_time_bounds[i];
938 if t0.is_finite() {
939 t0 -= opts.ms2_rt_guard_sec;
940 }
941 if t1.is_finite() {
942 t1 += opts.ms2_rt_guard_sec;
943 }
944
945 if !(t0.is_finite() && t1.is_finite() && t1 > t0) {
946 return false;
947 }
948
949 if let Some(max_rt) = opts.max_ms2_rt_span_sec {
950 if (t1 - t0) > max_rt {
951 return false;
952 }
953 }
954
955 true
956 })
957 .collect();
958
959 let ms2_im_windows: Vec<(usize, usize)> =
961 ms2.iter().map(|c| c.im_window).collect();
962
963 let ms2_im_mu: Vec<f32> = ms2
964 .iter()
965 .map(|c| {
966 let mu = c.im_fit.mu;
967 if mu.is_finite() && mu > 0.0 {
968 mu
969 } else {
970 let (lo, hi) = c.im_window;
971 if hi > lo {
972 ((lo + hi) as f32) * 0.5
973 } else {
974 f32::NAN
975 }
976 }
977 })
978 .collect();
979
980 let ms2_cluster_ids: Vec<u64> =
981 ms2.iter().map(|c| c.cluster_id).collect();
982
983 let ms2_rt_apex: Vec<f32> = ms2
985 .iter()
986 .enumerate()
987 .map(|(i, c)| {
988 let mu = c.rt_fit.mu;
989 if mu.is_finite() && mu > 0.0 {
990 mu
991 } else {
992 let (t0, t1) = ms2_time_bounds[i];
993 if t0.is_finite() && t1.is_finite() && t1 > t0 {
994 ((t0 + t1) * 0.5) as f32
995 } else {
996 f32::NAN
997 }
998 }
999 })
1000 .collect();
1001
1002 let ms2_mz_mu: Vec<f32> = ms2
1004 .iter()
1005 .map(|c| {
1006 if let Some(m) = cluster_mz_mu(c) {
1007 if m.is_finite() && m > 0.0 {
1008 m
1009 } else {
1010 match c.mz_window {
1011 Some((lo, hi)) if lo.is_finite() && hi.is_finite() && hi > lo => {
1012 0.5 * (lo + hi) as f32
1013 }
1014 _ => f32::NAN,
1015 }
1016 }
1017 } else {
1018 match c.mz_window {
1019 Some((lo, hi)) if lo.is_finite() && hi.is_finite() && hi > lo => {
1020 0.5 * (lo + hi) as f32
1021 }
1022 _ => f32::NAN,
1023 }
1024 }
1025 })
1026 .collect();
1027
1028 let mut by_group_raw: HashMap<u32, Vec<(f32, usize)>> = HashMap::new();
1030
1031 for (j, c2) in ms2.iter().enumerate() {
1032 if !ms2_keep[j] {
1033 continue;
1034 }
1035 let g = match c2.window_group {
1036 Some(g) => g,
1037 None => continue,
1038 };
1039
1040 let rt = ms2_rt_apex[j];
1041 if !rt.is_finite() {
1042 continue;
1043 }
1044
1045 by_group_raw.entry(g).or_default().push((rt, j));
1046 }
1047
1048 let mut by_group: HashMap<u32, FragmentGroupIndex> = HashMap::new();
1049 for (g, mut v) in by_group_raw {
1050 v.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
1051 let (rt_apex, ms2_indices): (Vec<f32>, Vec<usize>) =
1052 v.into_iter().map(|(rt, j)| (rt, j)).unzip();
1053 by_group.insert(g, FragmentGroupIndex { ms2_indices, rt_apex });
1054 }
1055
1056 let id_to_idx: HashMap<u64, usize> = ms2_cluster_ids
1058 .iter()
1059 .enumerate()
1060 .map(|(i, &cid)| (cid, i))
1061 .collect();
1062
1063 FragmentIndex {
1064 dia_index,
1065 ms2_storage,
1066 ms2_im_mu,
1067 ms2_im_windows,
1068 ms2_keep,
1069 ms2_cluster_ids,
1070 ms2_mz_mu,
1071 by_group,
1072 id_to_idx,
1073 }
1074 }
1075
1076 pub fn from_parquet_file(
1077 dia_index: Arc<DiaIndex>,
1078 parquet_path: impl AsRef<Path>,
1079 opts: &CandidateOpts,
1080 ) -> io::Result<Self> {
1081 let path_str = parquet_path
1082 .as_ref()
1083 .to_str()
1084 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "non-UTF8 path"))?;
1085
1086 let clusters = load_parquet(path_str)?;
1087 Ok(Self::from_owned(dia_index, clusters, opts))
1088 }
1089
1090 pub fn from_parquet_dir(
1092 dia_index: Arc<DiaIndex>,
1093 dir: impl AsRef<Path>,
1094 opts: &CandidateOpts,
1095 ) -> io::Result<Self> {
1096 let dir = dir.as_ref();
1097 let mut all: Vec<ClusterResult1D> = Vec::new();
1098
1099 for entry in std::fs::read_dir(dir)? {
1100 let entry = entry?;
1101 if !entry.file_type()?.is_file() {
1102 continue;
1103 }
1104 let path = entry.path();
1105 if path.extension().and_then(|s| s.to_str()) != Some("parquet") {
1106 continue;
1107 }
1108
1109 let path_str = path
1110 .to_str()
1111 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "non-UTF8 path"))?;
1112
1113 let mut part = load_parquet(path_str)?;
1114 all.append(&mut part);
1115 }
1116
1117 Ok(Self::from_owned(dia_index, all, opts))
1118 }
1119
1120 pub fn from_slice(
1122 dia_index: Arc<DiaIndex>,
1123 ms2: &[ClusterResult1D],
1124 opts: &CandidateOpts,
1125 ) -> Self {
1126 let storage: Arc<[ClusterResult1D]> = ms2.to_vec().into();
1127 Self::build_with_storage(dia_index, storage, opts)
1128 }
1129
1130 pub fn from_owned(
1132 dia_index: Arc<DiaIndex>,
1133 ms2: Vec<ClusterResult1D>,
1134 opts: &CandidateOpts,
1135 ) -> Self {
1136 let storage: Arc<[ClusterResult1D]> = ms2.into();
1137 Self::build_with_storage(dia_index, storage, opts)
1138 }
1139
1140 fn precursor_rt_apex_sec(&self, prec: &ClusterResult1D) -> Option<f32> {
1141 let mu = prec.rt_fit.mu;
1143 if mu.is_finite() && mu > 0.0 {
1144 return Some(mu);
1145 }
1146
1147 let ft = &self.dia_index.frame_time;
1148
1149 let mut sum = 0.0f64;
1151 let mut n = 0usize;
1152 for fid in &prec.frame_ids_used {
1153 if let Some(&t) = ft.get(fid) {
1154 if t.is_finite() {
1155 sum += t;
1156 n += 1;
1157 }
1158 }
1159 }
1160 if n > 0 {
1161 return Some((sum / n as f64) as f32);
1162 }
1163
1164 let (rt_lo, rt_hi) = prec.rt_window;
1166 if rt_hi >= rt_lo {
1167 let mut sum = 0.0f64;
1168 let mut n = 0usize;
1169 for fid in rt_lo as u32..=rt_hi as u32 {
1170 if let Some(&t) = ft.get(&fid) {
1171 if t.is_finite() {
1172 sum += t;
1173 n += 1;
1174 }
1175 }
1176 }
1177 if n > 0 {
1178 return Some((sum / n as f64) as f32);
1179 }
1180 }
1181
1182 None
1183 }
1184
1185 pub fn query_precursor(
1186 &self,
1187 prec: &ClusterResult1D,
1188 groups: Option<&[u32]>,
1189 opts: &FragmentQueryOpts,
1190 ) -> Vec<u64> {
1191 let mut out = Vec::new();
1192
1193 let prec_mz = if let Some(m) = cluster_mz_mu(prec) {
1195 if m.is_finite() && m > 0.0 {
1196 m
1197 } else {
1198 match prec.mz_window {
1199 Some((lo, hi)) if lo.is_finite() && hi.is_finite() && hi > lo => {
1200 0.5 * (lo + hi)
1201 }
1202 _ => return out, }
1204 }
1205 } else {
1206 match prec.mz_window {
1207 Some((lo, hi)) if lo.is_finite() && hi.is_finite() && hi > lo => {
1208 0.5 * (lo + hi)
1209 }
1210 _ => return out,
1211 }
1212 };
1213
1214 let prec_rt = match self.precursor_rt_apex_sec(prec) {
1216 Some(t) => t,
1217 None => return out,
1218 };
1219
1220 let prec_im = if prec.im_fit.mu.is_finite() {
1222 prec.im_fit.mu
1223 } else {
1224 let (lo, hi) = prec.im_window;
1225 if hi > lo {
1226 ((lo + hi) as f32) * 0.5
1227 } else {
1228 return out;
1229 }
1230 };
1231
1232 let prec_im_win = prec.im_window;
1233 let prec_scan_win = (prec_im_win.0 as u32, prec_im_win.1 as u32);
1234
1235 let groups: Vec<u32> = match groups {
1237 Some(gs) if !gs.is_empty() => gs.to_vec(),
1238 _ => self.dia_index.groups_for_precursor(prec_mz, prec_im),
1239 };
1240 if groups.is_empty() {
1241 return Vec::new();
1242 }
1243
1244 let require_tile = opts.require_tile_compat;
1245 let reject_inside = opts.reject_frag_inside_precursor_tile;
1246
1247 for g in groups {
1248 let fg = match self.by_group.get(&g) {
1249 Some(fg) => fg,
1250 None => continue,
1251 };
1252
1253 let (lo_idx, hi_idx) = if let Some(dt) = opts.max_rt_apex_delta_sec {
1255 if dt > 0.0 {
1256 let lo_t = prec_rt - dt;
1257 let hi_t = prec_rt + dt;
1258 (
1259 lower_bound(&fg.rt_apex, lo_t),
1260 upper_bound(&fg.rt_apex, hi_t),
1261 )
1262 } else {
1263 (0, fg.ms2_indices.len())
1264 }
1265 } else {
1266 (0, fg.ms2_indices.len())
1267 };
1268
1269 let slices = self.dia_index.program_slices_for_group(g);
1271
1272 'ms2_loop: for k in lo_idx..hi_idx {
1273 let j = fg.ms2_indices[k];
1274 if !self.ms2_keep[j] {
1275 continue;
1276 }
1277
1278 let im2 = self.ms2_im_windows[j];
1280 let frag_scan_win = (im2.0 as u32, im2.1 as u32);
1281
1282 let im_overlap = {
1283 let lo = prec_im_win.0.max(im2.0);
1284 let hi = prec_im_win.1.min(im2.1);
1285 hi.saturating_sub(lo).saturating_add(1)
1286 };
1287 if im_overlap < opts.min_im_overlap_scans {
1288 continue;
1289 }
1290
1291 if let Some(max_d) = opts.max_scan_apex_delta {
1293 let s2 = self.ms2_im_mu[j];
1294 if s2.is_finite() {
1295 let d = (prec_im - s2).abs() as f32;
1296 if d > max_d as f32 {
1297 continue 'ms2_loop;
1298 }
1299 } else {
1300 continue 'ms2_loop;
1301 }
1302 }
1303
1304 if require_tile || reject_inside {
1306 let mut tile_compatible = false;
1307 let mut inside_prec_tile = false;
1308 let frag_mz = self.ms2_mz_mu[j];
1309
1310 for s in &slices {
1311 let tile_scans = (s.scan_lo, s.scan_hi);
1312
1313 let prec_hits_tile = ranges_overlap_u32(prec_scan_win, tile_scans);
1315 let frag_hits_tile = ranges_overlap_u32(frag_scan_win, tile_scans);
1316
1317 if !(prec_hits_tile && frag_hits_tile) {
1318 continue;
1319 }
1320
1321 tile_compatible = true;
1322
1323 if reject_inside && frag_mz.is_finite() {
1324 let prec_in_tile =
1326 prec_mz >= s.mz_lo as f32 && prec_mz <= s.mz_hi as f32;
1327 let frag_in_tile =
1329 frag_mz >= s.mz_lo as f32 && frag_mz <= s.mz_hi as f32;
1330
1331 if prec_in_tile && frag_in_tile {
1332 inside_prec_tile = true;
1333 break;
1336 }
1337 }
1338 }
1339
1340 if require_tile && !tile_compatible {
1342 continue 'ms2_loop;
1343 }
1344
1345 if reject_inside && inside_prec_tile {
1347 continue 'ms2_loop;
1348 }
1349 }
1350
1351 out.push(self.ms2_cluster_ids[j]);
1353 }
1354 }
1355
1356 out.sort_unstable();
1357 out.dedup();
1358 out
1359 }
1360
1361 pub fn query_precursors_par(
1363 &self,
1364 precursors: &[ClusterResult1D],
1365 opts: &FragmentQueryOpts,
1366 num_threads: usize,
1367 ) -> Vec<Vec<u64>> {
1368 let pool = rayon::ThreadPoolBuilder::new()
1369 .num_threads(num_threads)
1370 .build()
1371 .unwrap();
1372 pool.install(|| {
1373 precursors
1374 .par_iter()
1375 .map(|prec| self.query_precursor(prec, None, opts))
1376 .collect()
1377 })
1378 }
1379
1380 pub fn enumerate_candidates_for_precursor_thin(
1384 &self,
1385 prec: &ThinPrecursor,
1386 window_groups: Option<&[u32]>,
1387 opts: &FragmentQueryOpts,
1388 ) -> Vec<usize> {
1389 let mut out = Vec::<usize>::new();
1390
1391 let prec_mz = prec.mz_mu;
1392 let prec_rt = prec.rt_mu;
1393 let prec_im = prec.im_mu;
1394 let prec_im_win = prec.im_window;
1395 let prec_scan_win = (prec_im_win.0 as u32, prec_im_win.1 as u32);
1396
1397 let groups: Vec<u32> = match window_groups {
1399 Some(gs) if !gs.is_empty() => gs.to_vec(),
1400 _ => self.dia_index.groups_for_precursor(prec_mz, prec_im),
1401 };
1402 if groups.is_empty() {
1403 return out;
1404 }
1405
1406 let require_tile = opts.require_tile_compat;
1407 let reject_inside = opts.reject_frag_inside_precursor_tile;
1408
1409 for g in groups {
1410 let fg = match self.by_group.get(&g) {
1411 Some(fg) => fg,
1412 None => continue,
1413 };
1414
1415 let (lo_idx, hi_idx) = if let Some(dt) = opts.max_rt_apex_delta_sec {
1417 if dt > 0.0 {
1418 let lo_t = prec_rt - dt;
1419 let hi_t = prec_rt + dt;
1420 (
1421 lower_bound(&fg.rt_apex, lo_t),
1422 upper_bound(&fg.rt_apex, hi_t),
1423 )
1424 } else {
1425 (0, fg.ms2_indices.len())
1426 }
1427 } else {
1428 (0, fg.ms2_indices.len())
1429 };
1430
1431 let slices = self.dia_index.program_slices_for_group(g);
1433
1434 'ms2_loop: for k in lo_idx..hi_idx {
1435 let j = fg.ms2_indices[k];
1436 if !self.ms2_keep[j] {
1437 continue;
1438 }
1439
1440 let im2 = self.ms2_im_windows[j];
1442 let frag_scan_win = (im2.0 as u32, im2.1 as u32);
1443
1444 let im_overlap = {
1445 let lo = prec_im_win.0.max(im2.0);
1446 let hi = prec_im_win.1.min(im2.1);
1447 hi.saturating_sub(lo).saturating_add(1)
1448 };
1449 if im_overlap < opts.min_im_overlap_scans {
1450 continue;
1451 }
1452
1453 if let Some(max_d) = opts.max_scan_apex_delta {
1455 let s2 = self.ms2_im_mu[j];
1456 if s2.is_finite() {
1457 let d = (prec_im - s2).abs() as f32;
1458 if d > max_d as f32 {
1459 continue 'ms2_loop;
1460 }
1461 } else {
1462 continue 'ms2_loop;
1463 }
1464 }
1465
1466 if require_tile || reject_inside {
1468 let mut tile_compatible = false;
1469 let mut inside_prec_tile = false;
1470 let frag_mz = self.ms2_mz_mu[j];
1471
1472 for s in &slices {
1473 let tile_scans = (s.scan_lo, s.scan_hi);
1474
1475 let prec_hits_tile = ranges_overlap_u32(prec_scan_win, tile_scans);
1477 let frag_hits_tile = ranges_overlap_u32(frag_scan_win, tile_scans);
1478
1479 if !(prec_hits_tile && frag_hits_tile) {
1480 continue;
1481 }
1482
1483 tile_compatible = true;
1484
1485 if reject_inside && frag_mz.is_finite() {
1486 let prec_in_tile =
1488 prec_mz >= s.mz_lo as f32 && prec_mz <= s.mz_hi as f32;
1489 let frag_in_tile =
1491 frag_mz >= s.mz_lo as f32 && frag_mz <= s.mz_hi as f32;
1492
1493 if prec_in_tile && frag_in_tile {
1494 inside_prec_tile = true;
1495 break;
1496 }
1497 }
1498 }
1499
1500 if require_tile && !tile_compatible {
1501 continue 'ms2_loop;
1502 }
1503 if reject_inside && inside_prec_tile {
1504 continue 'ms2_loop;
1505 }
1506 }
1507
1508 out.push(j);
1509 }
1510 }
1511
1512 out.sort_unstable();
1513 out.dedup();
1514 out
1515 }
1516
1517 fn enumerate_candidates_for_precursors_thin(
1521 &self,
1522 precs: &[Option<ThinPrecursor>],
1523 opts: &FragmentQueryOpts,
1524 ) -> Vec<Vec<usize>> {
1525 precs
1526 .iter()
1527 .map(|maybe_prec| {
1528 if let Some(ref p) = maybe_prec {
1529 self.enumerate_candidates_for_precursor_thin(p, None, opts)
1530 } else {
1531 Vec::new()
1532 }
1533 })
1534 .collect()
1535 }
1536
1537 pub fn enumerate_candidates_for_feature(
1544 &self,
1545 feat: &SimpleFeature,
1546 opts: &FragmentQueryOpts,
1547 ) -> Vec<usize> {
1548 let prec_cluster = match feature_representative_cluster(feat) {
1549 Some(c) => c,
1550 None => return Vec::new(),
1551 };
1552 let thin = match self.make_thin_precursor(prec_cluster) {
1553 Some(t) => t,
1554 None => return Vec::new(),
1555 };
1556 self.enumerate_candidates_for_precursor_thin(&thin, None, opts)
1558 }
1559
1560 pub fn query_precursor_scored(
1561 &self,
1562 prec: &ClusterResult1D,
1563 window_groups: Option<&[u32]>,
1564 opts: &FragmentQueryOpts,
1565 mode: MatchScoreMode,
1566 geom_opts: &ScoreOpts,
1567 xic_opts: &XicScoreOpts,
1568 min_score: f32,
1569 ) -> Vec<ScoredHit> {
1570 let thin = match self.make_thin_precursor(prec) {
1572 Some(t) => t,
1573 None => return Vec::new(),
1574 };
1575
1576 let candidate_ids =
1577 self.enumerate_candidates_for_precursor_thin(&thin, window_groups, opts);
1578
1579 query_precursor_scored(
1581 PrecursorLike::Cluster(prec),
1582 self.ms2_slice(),
1583 &candidate_ids,
1584 mode,
1585 geom_opts,
1586 xic_opts,
1587 min_score,
1588 )
1589 }
1590
1591 pub fn query_precursors_scored_par(
1592 &self,
1593 precs: &[ClusterResult1D],
1594 opts: &FragmentQueryOpts,
1595 mode: MatchScoreMode,
1596 geom_opts: &ScoreOpts,
1597 xic_opts: &XicScoreOpts,
1598 min_score: f32,
1599 ) -> Vec<Vec<ScoredHit>> {
1600 let thin_precs: Vec<Option<ThinPrecursor>> = precs
1602 .iter()
1603 .map(|c| self.make_thin_precursor(c))
1604 .collect();
1605
1606 let all_candidates = self.enumerate_candidates_for_precursors_thin(&thin_precs, opts);
1608
1609 let prec_like: Vec<PrecursorLike<'_>> =
1611 precs.iter().map(|c| PrecursorLike::Cluster(c)).collect();
1612
1613 query_precursors_scored_par(
1615 &prec_like,
1616 self.ms2_slice(),
1617 &all_candidates,
1618 mode,
1619 geom_opts,
1620 xic_opts,
1621 min_score,
1622 )
1623 }
1624
1625 fn enumerate_candidates_for_features(
1626 &self,
1627 feats: &[SimpleFeature],
1628 opts: &FragmentQueryOpts,
1629 ) -> Vec<Vec<usize>> {
1630 feats
1631 .iter()
1632 .map(|feat| {
1633 let prec_cluster = match feature_representative_cluster(feat) {
1634 Some(c) => c,
1635 None => return Vec::new(),
1636 };
1637 let thin = match self.make_thin_precursor(prec_cluster) {
1638 Some(t) => t,
1639 None => return Vec::new(),
1640 };
1641 self.enumerate_candidates_for_precursor_thin(&thin, None, opts)
1643 })
1644 .collect()
1645 }
1646
1647 pub fn query_feature_scored(
1653 &self,
1654 feat: &SimpleFeature,
1655 opts: &FragmentQueryOpts,
1656 mode: MatchScoreMode,
1657 geom_opts: &ScoreOpts,
1658 xic_opts: &XicScoreOpts,
1659 min_score: f32,
1660 ) -> Vec<ScoredHit> {
1661 let candidate_ids = self.enumerate_candidates_for_feature(feat, opts);
1662
1663 query_precursor_scored(
1664 PrecursorLike::Feature(feat),
1665 self.ms2_slice(),
1666 &candidate_ids,
1667 mode,
1668 geom_opts,
1669 xic_opts,
1670 min_score,
1671 )
1672 }
1673
1674 pub fn query_features_scored_par(
1679 &self,
1680 feats: &[SimpleFeature],
1681 opts: &FragmentQueryOpts,
1682 mode: MatchScoreMode,
1683 geom_opts: &ScoreOpts,
1684 xic_opts: &XicScoreOpts,
1685 min_score: f32,
1686 ) -> Vec<Vec<ScoredHit>> {
1687 let all_candidates = self.enumerate_candidates_for_features(feats, opts);
1688
1689 let prec_like: Vec<PrecursorLike<'_>> =
1690 feats.iter().map(|f| PrecursorLike::Feature(f)).collect();
1691
1692 query_precursors_scored_par(
1693 &prec_like,
1694 self.ms2_slice(),
1695 &all_candidates,
1696 mode,
1697 geom_opts,
1698 xic_opts,
1699 min_score,
1700 )
1701 }
1702
1703 pub fn make_thin_precursor(&self, prec: &ClusterResult1D) -> Option<ThinPrecursor> {
1708 let prec_mz = if let Some(m) = cluster_mz_mu(prec) {
1710 if m.is_finite() && m > 0.0 {
1711 m
1712 } else {
1713 match prec.mz_window {
1714 Some((lo, hi)) if lo.is_finite() && hi.is_finite() && hi > lo => {
1715 0.5 * (lo + hi)
1716 }
1717 _ => return None, }
1719 }
1720 } else {
1721 match prec.mz_window {
1722 Some((lo, hi)) if lo.is_finite() && hi.is_finite() && hi > lo => {
1723 0.5 * (lo + hi)
1724 }
1725 _ => return None,
1726 }
1727 };
1728
1729 let rt_mu = match self.precursor_rt_apex_sec(prec) {
1731 Some(t) => t,
1732 None => return None,
1733 };
1734
1735 let im_window = prec.im_window;
1737 let im_mu = if prec.im_fit.mu.is_finite() {
1738 prec.im_fit.mu
1739 } else {
1740 let (lo, hi) = im_window;
1741 if hi > lo {
1742 ((lo + hi) as f32) * 0.5
1743 } else {
1744 return None;
1745 }
1746 };
1747
1748 Some(ThinPrecursor {
1749 mz_mu: prec_mz,
1750 rt_mu,
1751 im_mu,
1752 im_window,
1753 })
1754 }
1755}
1756
1757#[derive(Clone, Debug)]
1758pub struct ThinPrecursor {
1759 pub mz_mu: f32,
1761
1762 pub rt_mu: f32,
1764
1765 pub im_mu: f32,
1767
1768 pub im_window: (usize, usize),
1770}
1771
1772
1773
1774fn lower_bound(xs: &[f32], x: f32) -> usize {
1777 let mut lo = 0;
1778 let mut hi = xs.len();
1779 while lo < hi {
1780 let mid = (lo + hi) / 2;
1781 if xs[mid] < x {
1782 lo = mid + 1;
1783 } else {
1784 hi = mid;
1785 }
1786 }
1787 lo
1788}
1789
1790#[inline]
1791fn ranges_overlap_u32(a: (u32, u32), b: (u32, u32)) -> bool {
1792 let lo = a.0.max(b.0);
1793 let hi = a.1.min(b.1);
1794 hi >= lo
1795}
1796
1797fn upper_bound(xs: &[f32], x: f32) -> usize {
1798 let mut lo = 0;
1799 let mut hi = xs.len();
1800 while lo < hi {
1801 let mid = (lo + hi) / 2;
1802 if xs[mid] <= x {
1803 lo = mid + 1;
1804 } else {
1805 hi = mid;
1806 }
1807 }
1808 lo
1809}
1810
1811pub fn fragment_from_cluster(c: &ClusterResult1D) -> Option<PseudoFragment> {
1814 let mz = if let Some(fit) = &c.mz_fit {
1816 if fit.mu.is_finite() && fit.mu > 0.0 {
1817 fit.mu
1818 } else if let Some((lo, hi)) = c.mz_window {
1819 0.5 * (lo + hi)
1820 } else {
1821 return None;
1822 }
1823 } else if let Some((lo, hi)) = c.mz_window {
1824 0.5 * (lo + hi)
1825 } else {
1826 return None;
1827 };
1828
1829 if !mz.is_finite() {
1830 return None;
1831 }
1832
1833 let intensity = c.raw_sum;
1835
1836 Some(PseudoFragment {
1837 mz,
1838 intensity,
1839 ms2_cluster_index: 0,
1840 ms2_cluster_id: c.cluster_id,
1841 window_group: c.window_group.unwrap_or(0),
1842 })
1843}
1844
1845fn feature_representative_cluster<'a>(feat: &'a SimpleFeature) -> Option<&'a ClusterResult1D> {
1846 if feat.member_clusters.is_empty() {
1847 return None;
1848 }
1849
1850 feat.member_clusters
1851 .iter()
1852 .max_by(|a, b| a.raw_sum.partial_cmp(&b.raw_sum).unwrap_or(std::cmp::Ordering::Equal))
1853}