1use std::cmp::Ordering;
2use std::collections::HashMap;
3use std::sync::Arc;
4
5use rayon::prelude::*;
6use crate::cluster::candidates::{CandidateOpts, PairFeatures, ScoreOpts};
7use crate::cluster::cluster::ClusterResult1D;
8use crate::cluster::feature::SimpleFeature;
9use crate::cluster::pseudo::cluster_mz_mu;
10use crate::cluster::utility::robust_noise_mad;
11use crate::data::dia::{DiaIndex, TimsDatasetDIA};
12
13#[derive(Clone, Copy, Debug)]
14pub enum MatchScoreMode {
15 Geom,
17 Xic,
19}
20
21#[derive(Clone, Debug)]
22pub struct XicDetails {
23 pub s_rt: Option<f32>,
25 pub s_im: Option<f32>,
27 pub s_intensity: Option<f32>,
29
30 pub r_rt: Option<f32>,
32 pub r_im: Option<f32>,
34}
35
36#[derive(Clone, Debug)]
37pub struct ScoredHit {
38 pub frag_idx: usize,
40 pub score: f32,
42 pub geom: Option<PairFeatures>,
44 pub xic: Option<XicDetails>,
46}
47
48#[derive(Clone, Copy)]
49pub enum PrecursorLike<'a> {
50 Cluster(&'a ClusterResult1D),
51 Feature(&'a SimpleFeature),
52}
53
54#[derive(Clone, Debug)]
55pub struct XicScoreOpts {
56 pub w_rt: f32,
58 pub w_im: f32,
60 pub w_intensity: f32,
62
63 pub intensity_tau: f32,
66
67 pub min_total_score: f32,
69
70 pub use_rt: bool,
72 pub use_im: bool,
73 pub use_intensity: bool,
74}
75
76impl Default for XicScoreOpts {
77 fn default() -> Self {
78 Self {
79 w_rt: 0.45,
81 w_im: 0.45,
82 w_intensity: 0.10,
83 intensity_tau: 1.5,
84 min_total_score: 0.0, use_rt: true,
87 use_im: true,
88 use_intensity: true,
89 }
90 }
91}
92
93#[inline]
94fn zscore(v: &[f32]) -> Option<Vec<f32>> {
95 let n = v.len();
96 if n < 3 {
97 return None;
98 }
99 let mut sum = 0.0f64;
100 let mut sum2 = 0.0f64;
101 for &x in v {
102 let xf = x as f64;
103 sum += xf;
104 sum2 += xf * xf;
105 }
106 let n_f = n as f64;
107 let mean = sum / n_f;
108 let var = (sum2 / n_f) - mean * mean;
109 if !var.is_finite() || var <= 0.0 {
110 return None;
111 }
112 let std = var.sqrt();
113 let mut out = Vec::with_capacity(n);
114 for &x in v {
115 out.push(((x as f64 - mean) / std) as f32);
116 }
117 Some(out)
118}
119
120fn pearson_corr_z(a: &[f32], b: &[f32]) -> Option<f32> {
122 let n = a.len().min(b.len());
123 if n < 3 {
124 return None;
125 }
126 let az = zscore(&a[..n])?;
127 let bz = zscore(&b[..n])?;
128
129 let mut num = 0.0f64;
130 for i in 0..n {
131 num += az[i] as f64 * bz[i] as f64;
132 }
133 let den = (n as f64).max(1.0);
134 let r = (num / den)
135 .max(-1.0)
136 .min(1.0);
137 Some(r as f32)
138}
139
140pub fn xic_match_score(
141 ms1: &ClusterResult1D,
142 ms2: &ClusterResult1D,
143 opts: &XicScoreOpts,
144) -> Option<(XicDetails, f32)> {
145 let mut score = 0.0f32;
146 let mut w_sum = 0.0f32;
147
148 let mut s_rt: Option<f32> = None;
149 let mut s_im: Option<f32> = None;
150 let mut s_intensity: Option<f32> = None;
151 let mut r_rt: Option<f32> = None;
152 let mut r_im: Option<f32> = None;
153
154 if opts.use_rt {
156 if let (Some(ref rt1), Some(ref rt2)) = (&ms1.rt_trace, &ms2.rt_trace) {
157 if let Some(r) = pearson_corr_z(rt1, rt2) {
158 if r.is_finite() {
159 let s = 0.5 * (r + 1.0); r_rt = Some(r);
161 s_rt = Some(s);
162 score += opts.w_rt * s;
163 w_sum += opts.w_rt;
164 }
165 }
166 }
167 }
168
169 if opts.use_im {
171 if let (Some(ref im1), Some(ref im2)) = (&ms1.im_trace, &ms2.im_trace) {
172 if let Some(r) = pearson_corr_z(im1, im2) {
173 if r.is_finite() {
174 let s = 0.5 * (r + 1.0);
175 r_im = Some(r);
176 s_im = Some(s);
177 score += opts.w_im * s;
178 w_sum += opts.w_im;
179 }
180 }
181 }
182 }
183
184 if opts.use_intensity && opts.w_intensity > 0.0 && opts.intensity_tau > 0.0 {
186 let i1 = if let Some(ref rt1) = ms1.rt_trace {
188 rt1.iter().fold(0.0f32, |acc, x| acc + x.max(0.0))
189 } else {
190 ms1.raw_sum.max(0.0)
191 };
192
193 let i2 = if let Some(ref rt2) = ms2.rt_trace {
194 rt2.iter().fold(0.0f32, |acc, x| acc + x.max(0.0))
195 } else {
196 ms2.raw_sum.max(0.0)
197 };
198
199 if i1 > 0.0 && i2 > 0.0 {
200 let ratio = (i2 / i1).max(1e-6);
201 let d = ratio.ln().abs(); let s = (-d / opts.intensity_tau).exp(); if s.is_finite() {
204 s_intensity = Some(s);
205 score += opts.w_intensity * s;
206 w_sum += opts.w_intensity;
207 }
208 }
209 }
210
211 if w_sum <= 0.0 {
212 return None;
213 }
214
215 let final_score = score / w_sum;
216 if !final_score.is_finite() {
217 None
218 } else {
219 let details = XicDetails {
220 s_rt,
221 s_im,
222 s_intensity,
223 r_rt,
224 r_im,
225 };
226 Some((details, final_score.clamp(0.0, 1.0)))
227 }
228}
229
230pub fn assign_ms2_to_best_ms1_by_xic(
231 ms1: &[ClusterResult1D],
232 ms2: &[ClusterResult1D],
233 pairs: &[(usize, usize)],
234 opts: &XicScoreOpts,
235) -> Vec<(usize, usize, f32)> {
236 if ms1.is_empty() || ms2.is_empty() || pairs.is_empty() {
237 return Vec::new();
238 }
239
240 let ms1_int = precompute_intensities(ms1);
242 let ms2_int = precompute_intensities(ms2);
243
244 let mut best: Vec<Option<(usize, f32)>> = vec![None; ms2.len()];
246
247 for &(ms2_idx, ms1_idx) in pairs {
248 if ms1_idx >= ms1.len() || ms2_idx >= ms2.len() {
249 continue;
250 }
251
252 let s = match xic_match_score_precomputed(
254 ms1_idx,
255 ms2_idx,
256 ms1,
257 ms2,
258 &ms1_int,
259 &ms2_int,
260 opts,
261 ) {
262 Some(v) => v,
263 None => continue,
264 };
265
266 if s < opts.min_total_score {
268 continue;
269 }
270
271 match &mut best[ms2_idx] {
272 Some((_best_i, best_s)) if s <= *best_s => {
273 }
275 slot => {
276 *slot = Some((ms1_idx, s));
278 }
279 }
280 }
281
282 let mut out = Vec::new();
284 out.reserve(pairs.len().min(ms2.len()));
285 for (ms2_idx, maybe) in best.into_iter().enumerate() {
286 if let Some((ms1_idx, s)) = maybe {
287 out.push((ms2_idx, ms1_idx, s));
288 }
289 }
290 out
291}
292
293#[inline]
295pub fn jaccard_time(a_lo: f64, a_hi: f64, b_lo: f64, b_hi: f64) -> f32 {
296 if !a_lo.is_finite() || !a_hi.is_finite() || !b_lo.is_finite() || !b_hi.is_finite() {
297 return 0.0;
298 }
299 if a_hi < b_lo || b_hi < a_lo {
300 return 0.0;
301 }
302 let inter = (a_hi.min(b_hi) - a_lo.max(b_lo)).max(0.0);
303 let union = (a_hi.max(b_hi) - a_lo.min(b_lo)).max(0.0);
304 if union <= 0.0 {
305 0.0
306 } else {
307 (inter / union) as f32
308 }
309}
310
311#[inline]
312fn overlap_f32(a: (f32, f32), b: (f32, f32)) -> bool {
313 let (a0, a1) = a;
314 let (b0, b1) = b;
315 a0.is_finite()
316 && a1.is_finite()
317 && b0.is_finite()
318 && b1.is_finite()
319 && a1 >= b0
320 && b1 >= a0
321}
322
323#[inline]
324fn overlap_u32(a: (u32, u32), b: (u32, u32)) -> bool {
325 a.1 >= b.0 && b.1 >= a.0
326}
327
328#[derive(Clone, Debug)]
330pub struct RtBuckets {
331 lo: f64,
332 inv_bw: f64,
333 buckets: Vec<Vec<usize>>,
334}
335
336impl RtBuckets {
337 pub fn build(
338 global_lo: f64,
339 global_hi: f64,
340 bucket_width: f64,
341 ms1_time_bounds: &[(f64, f64)],
342 ms1_keep: Option<&[bool]>,
343 ) -> Self {
344 let bw = bucket_width.max(0.5);
345 let lo = global_lo.floor();
346 let hi = global_hi.ceil().max(lo + bw);
347 let n = (((hi - lo) / bw).ceil() as usize).max(1);
348 let inv_bw = 1.0 / bw;
349 let mut buckets = vec![Vec::<usize>::new(); n];
350
351 let clamp = |x: f64| -> usize {
352 if x <= lo {
353 0
354 } else {
355 (((x - lo) * inv_bw).floor() as isize).clamp(0, (n as isize) - 1) as usize
356 }
357 };
358
359 for (i, &(t0, t1)) in ms1_time_bounds.iter().enumerate() {
360 if let Some(keep) = ms1_keep {
361 if !keep[i] {
362 continue;
363 }
364 }
365 if !(t0.is_finite() && t1.is_finite()) || t1 <= t0 {
366 continue;
367 }
368 let b0 = clamp(t0);
369 let b1 = clamp(t1);
370 for b in b0..=b1 {
371 buckets[b].push(i);
372 }
373 }
374 Self { lo, inv_bw, buckets }
375 }
376
377 #[inline]
378 fn range(&self, t0: f64, t1: f64) -> (usize, usize) {
379 let n = self.buckets.len();
380 let clamp = |x: f64| -> usize {
381 if x <= self.lo {
382 0
383 } else {
384 (((x - self.lo) * self.inv_bw).floor() as isize).clamp(0, (n as isize) - 1) as usize
385 }
386 };
387 let a = clamp(t0.min(t1));
388 let b = clamp(t0.max(t1));
389 (a.min(b), a.max(b))
390 }
391
392 #[inline]
394 pub fn gather(&self, t0: f64, t1: f64, out: &mut Vec<usize>) {
395 let (b0, b1) = self.range(t0, t1);
396 for b in b0..=b1 {
397 out.extend_from_slice(&self.buckets[b]);
398 }
399 }
400}
401
402#[derive(Clone, Debug)]
404pub struct PrecursorSearchIndex {
405 ms1_time_bounds: Vec<(f64, f64)>,
406 ms1_keep: Vec<bool>,
407 rt_buckets: RtBuckets,
408 ms1_group_ok: HashMap<u32, Vec<bool>>,
410 frame_time: Arc<HashMap<u32, f64>>,
412 dia_index: Arc<DiaIndex>,
414}
415
416impl PrecursorSearchIndex {
417 pub fn build(ds: &TimsDatasetDIA, ms1: &[ClusterResult1D], opts: &CandidateOpts) -> Self {
419 let frame_time = Arc::new(ds.dia_index.frame_time.clone());
420 let dia_index = Arc::new(ds.dia_index.clone());
421
422 let ms1_time_bounds: Vec<(f64, f64)> = ms1
424 .par_iter()
425 .map(|c| {
426 let mut t_lo = f64::INFINITY;
427 let mut t_hi = f64::NEG_INFINITY;
428
429 if !c.frame_ids_used.is_empty() {
431 for &fid in &c.frame_ids_used {
432 if let Some(&t) = frame_time.get(&fid) {
433 if t < t_lo { t_lo = t; }
434 if t > t_hi { t_hi = t; }
435 }
436 }
437 }
438
439 if !t_lo.is_finite() || !t_hi.is_finite() {
441 let (rt_lo, rt_hi) = c.rt_window;
442 if rt_hi >= rt_lo {
443 for fid in rt_lo as u32..=rt_hi as u32 {
444 if let Some(&t) = frame_time.get(&fid) {
445 if t < t_lo { t_lo = t; }
446 if t > t_hi { t_hi = t; }
447 }
448 }
449 }
450 }
451
452 (t_lo, t_hi)
454 })
455 .collect();
456
457 let ms1_raw_sums: Vec<f32> = ms1
459 .iter()
460 .filter(|c| c.ms_level == 1)
461 .map(|c| c.raw_sum)
462 .collect();
463 let ms1_noise = robust_noise_mad(&ms1_raw_sums);
464 let effective_min_raw_sum = opts.min_raw_sum.effective(ms1_noise);
465
466 let ms1_keep: Vec<bool> = ms1
468 .par_iter()
469 .enumerate()
470 .map(|(i, c)| {
471 if c.ms_level != 1 {
472 return false;
473 }
474 if c.raw_sum < effective_min_raw_sum {
475 return false;
476 }
477 let (t0, t1) = ms1_time_bounds[i];
478 if !(t0.is_finite() && t1.is_finite()) || t1 <= t0 {
479 return false;
480 }
481 if let Some(max_rt) = opts.max_ms1_rt_span_sec {
482 if (t1 - t0) > max_rt {
483 return false;
484 }
485 }
486 true
487 })
488 .collect();
489
490 let (mut rt_min, mut rt_max) = (f64::INFINITY, f64::NEG_INFINITY);
492 for &(a, b) in &ms1_time_bounds {
493 if a.is_finite() {
494 rt_min = rt_min.min(a);
495 }
496 if b.is_finite() {
497 rt_max = rt_max.max(b);
498 }
499 }
500 if !rt_min.is_finite() || !rt_max.is_finite() || rt_max <= rt_min {
501 rt_min = 0.0;
502 rt_max = 1.0;
503 }
504 let rt_buckets = RtBuckets::build(
505 rt_min,
506 rt_max,
507 opts.rt_bucket_width,
508 &ms1_time_bounds,
509 Some(&ms1_keep),
510 );
511
512 let ms1_group_ok: HashMap<u32, Vec<bool>> = ds
514 .dia_index
515 .group_to_isolation
516 .par_iter()
517 .map(|(&g, mz_rows)| {
518 let scans = ds
519 .dia_index
520 .group_to_scan_ranges
521 .get(&g)
522 .cloned()
523 .unwrap_or_default();
524 let mz_rows_f32: Vec<(f32, f32)> =
525 mz_rows.iter().map(|&(a, b)| (a as f32, b as f32)).collect();
526 let scan_rows_u32: Vec<(u32, u32)> = scans.iter().copied().collect();
527
528 if mz_rows_f32.is_empty() || scan_rows_u32.is_empty() {
529 return (g, vec![false; ms1.len()]);
530 }
531
532 let mask: Vec<bool> = ms1
533 .par_iter()
534 .map(|c| {
535 if c.ms_level != 1 {
536 return false;
537 }
538 let mz_ok = mz_rows_f32
539 .iter()
540 .any(|&w| overlap_f32(c.mz_window.unwrap(), w));
541 if !mz_ok {
542 return false;
543 }
544 let im_u32 = (c.im_window.0 as u32, c.im_window.1 as u32);
545 scan_rows_u32.iter().any(|&s| overlap_u32(im_u32, s))
546 })
547 .collect();
548
549 (g, mask)
550 })
551 .collect();
552
553 Self {
554 ms1_time_bounds,
555 ms1_keep,
556 rt_buckets,
557 ms1_group_ok,
558 frame_time,
559 dia_index,
560 }
561 }
562
563 pub fn enumerate_pairs(
574 &self,
575 ms1: &[ClusterResult1D],
576 ms2: &[ClusterResult1D],
577 opts: &CandidateOpts,
578 ) -> Vec<(usize, usize)> {
579 let ms2_time_bounds: Vec<(f64, f64)> = ms2
581 .par_iter()
582 .map(|c| {
583 let mut t_lo = f64::INFINITY;
584 let mut t_hi = f64::NEG_INFINITY;
585
586 if !c.frame_ids_used.is_empty() {
587 for &fid in &c.frame_ids_used {
588 if let Some(&t) = self.frame_time.get(&fid) {
589 if t < t_lo { t_lo = t; }
590 if t > t_hi { t_hi = t; }
591 }
592 }
593 }
594
595 if !t_lo.is_finite() || !t_hi.is_finite() {
597 let (rt_lo, rt_hi) = c.rt_window;
598 if rt_hi >= rt_lo {
599 for fid in rt_lo as u32..=rt_hi as u32 {
600 if let Some(&t) = self.frame_time.get(&fid) {
601 if t < t_lo { t_lo = t; }
602 if t > t_hi { t_hi = t; }
603 }
604 }
605 }
606 }
607
608 (t_lo, t_hi)
609 })
610 .collect();
611
612 let ms2_time_bounds = Arc::new(ms2_time_bounds);
613
614 let ms2_raw_sums: Vec<f32> = ms2
616 .iter()
617 .filter(|c| c.ms_level == 2)
618 .map(|c| c.raw_sum)
619 .collect();
620 let ms2_noise = robust_noise_mad(&ms2_raw_sums);
621 let effective_min_raw_sum = opts.min_raw_sum.effective(ms2_noise);
622
623 let ms2_keep: Vec<bool> = ms2
625 .par_iter()
626 .enumerate()
627 .map(|(i, c)| {
628 if c.ms_level != 2 {
629 return false;
630 }
631 if c.window_group.is_none() {
632 return false;
633 }
634 if c.raw_sum < effective_min_raw_sum {
635 return false;
636 }
637 let (mut t0, mut t1) = ms2_time_bounds[i];
638 if t0.is_finite() {
639 t0 -= opts.ms2_rt_guard_sec;
640 }
641 if t1.is_finite() {
642 t1 += opts.ms2_rt_guard_sec;
643 }
644 if !(t0.is_finite() && t1.is_finite() && t1 > t0) {
645 return false;
646 }
647 if let Some(max_rt) = opts.max_ms2_rt_span_sec {
648 if (t1 - t0) > max_rt {
649 return false;
650 }
651 }
652 true
653 })
654 .collect();
655
656 let mut by_group: HashMap<u32, Vec<usize>> = HashMap::new();
658 for (j, c2) in ms2.iter().enumerate() {
659 if !ms2_keep[j] {
660 continue;
661 }
662 if let Some(g) = c2.window_group {
663 by_group.entry(g).or_default().push(j);
664 }
665 }
666
667 let idx_arc = Arc::new(self.clone());
668 let ms2_tb = ms2_time_bounds.clone();
669
670 let mut out = by_group
671 .into_par_iter()
672 .flat_map(|(g, js)| {
673 let mask_vec: Vec<bool> = idx_arc
675 .ms1_group_ok
676 .get(&g)
677 .cloned()
678 .unwrap_or_else(|| vec![false; ms1.len()]);
679 let mask_arc = Arc::new(mask_vec);
680
681 let idx = idx_arc.clone();
682 let tb = ms2_tb.clone();
683 let dia_index = idx.dia_index.clone();
684
685 let slices_vec = dia_index.program_slices_for_group(g);
687 let slices = Arc::new(slices_vec);
688
689 js.into_par_iter().flat_map(move |j| {
690 let (mut t2_lo, mut t2_hi) = tb[j];
691 if t2_lo.is_finite() {
692 t2_lo -= opts.ms2_rt_guard_sec;
693 }
694 if t2_hi.is_finite() {
695 t2_hi += opts.ms2_rt_guard_sec;
696 }
697
698 let mut hits = Vec::<usize>::new();
700 idx.rt_buckets.gather(t2_lo, t2_hi, &mut hits);
701 hits.sort_unstable();
702 hits.dedup();
703
704 let mask = mask_arc.clone();
705 let slices = slices.clone();
706
707 let mut local = Vec::<(usize, usize)>::with_capacity(16);
708 for i in hits {
709 if !idx.ms1_keep[i] {
710 continue;
711 }
712 if !mask[i] {
713 continue;
714 }
715
716 let (t1_lo, t1_hi) = idx.ms1_time_bounds[i];
717 if !(t1_lo.is_finite() && t1_hi.is_finite()) {
718 continue;
719 }
720
721 if t1_hi < t2_lo || t2_hi < t1_lo {
723 continue;
724 }
725 if opts.min_rt_jaccard > 0.0 {
726 let jacc = jaccard_time(t1_lo, t1_hi, t2_lo, t2_hi);
727 if jacc < opts.min_rt_jaccard {
728 continue;
729 }
730 }
731
732 let im1 = ms1[i].im_window;
733 let im2 = ms2[j].im_window;
734
735 let im_overlap = {
737 let lo = im1.0.max(im2.0);
738 let hi = im1.1.min(im2.1);
739 hi.saturating_sub(lo).saturating_add(1)
740 };
741 if im_overlap < opts.min_im_overlap_scans {
742 continue;
743 }
744
745 if let Some(max_d) = opts.max_scan_apex_delta {
747 let s1 = ms1[i].im_fit.mu;
748 let s2 = ms2[j].im_fit.mu;
749 if s1.is_finite() && s2.is_finite() {
750 let d = (s1 - s2).abs() as f32;
751 if d > max_d as f32 {
752 continue;
753 }
754 } else {
755 continue;
756 }
757 }
758
759 if let Some(max_dt) = opts.max_rt_apex_delta_sec {
761 let r1 = ms1[i].rt_fit.mu;
762 let r2 = ms2[j].rt_fit.mu;
763 if r1.is_finite() && r2.is_finite() {
764 if (r1 - r2).abs() > max_dt {
765 continue;
766 }
767 } else {
768 continue;
769 }
770 }
771
772 let prec_mz = match cluster_mz_mu(&ms1[i]) {
774 Some(m) if m.is_finite() && m > 0.0 => m,
775 _ => continue,
776 };
777
778 let prec_im_apex = ms1[i].im_fit.mu;
780 if !prec_im_apex.is_finite() {
781 continue;
782 }
783
784 let prec_tiles = dia_index.tiles_for_precursor_in_group(
786 g,
787 prec_mz,
788 prec_im_apex,
789 );
790 if prec_tiles.is_empty() {
791 continue;
792 }
793
794 let frag_tiles = dia_index.tiles_for_fragment_in_group(g, im2);
796 if frag_tiles.is_empty() {
797 continue;
798 }
799
800 let mut ok = false;
802 for t in &prec_tiles {
803 if frag_tiles.contains(t) {
804 ok = true;
805 break;
806 }
807 }
808 if !ok {
809 continue;
810 }
811
812 if opts.reject_frag_inside_precursor_tile {
815 if let Some(frag_mz) = cluster_mz_mu(&ms2[j]) {
816 if frag_mz.is_finite() && frag_mz > 0.0 {
817 let mut reject = false;
818
819 for &tile_idx in &prec_tiles {
821 if !frag_tiles.contains(&tile_idx) {
822 continue;
823 }
824 if tile_idx >= slices.len() {
825 continue;
826 }
827 let s = &slices[tile_idx]; if (frag_mz as f64) >= s.mz_lo
830 && (frag_mz as f64) <= s.mz_hi
831 {
832 reject = true;
835 break;
836 }
837 }
838
839 if reject {
840 continue;
841 }
842 }
843 }
844 }
845
846 local.push((j, i));
848 }
849
850 local.into_par_iter()
851 })
852 })
853 .collect::<Vec<(usize, usize)>>();
854
855 out.sort_unstable();
856 out.dedup();
857 out
858 }
859}
860
861pub fn enumerate_ms2_ms1_pairs_simple(
863 ds: &TimsDatasetDIA,
864 ms1: &[ClusterResult1D],
865 ms2: &[ClusterResult1D],
866 opts: &CandidateOpts,
867) -> Vec<(usize, usize)> {
868 let idx = PrecursorSearchIndex::build(ds, ms1, opts);
869 idx.enumerate_pairs(ms1, ms2, opts)
870}
871
872#[inline]
879fn score_from_features(f: &PairFeatures, opts: &ScoreOpts) -> f32 {
880 let shape_term = if f.shape_ok { f.s_shape } else { opts.shape_neutral };
881
882 let rt_close = crate::cluster::candidates::exp_decay(f.rt_apex_delta_s, opts.rt_apex_scale_s);
883 let im_close = crate::cluster::candidates::exp_decay(f.im_apex_delta_scans, opts.im_apex_scale_scans);
884
885 let im_ratio = (f.im_overlap_scans as f32) / (f.im_union_scans as f32);
886
887 let ms1_int = crate::cluster::candidates::safe_log1p(f.ms1_raw_sum);
888
889 opts.w_jacc_rt * f.jacc_rt
890 + opts.w_shape * shape_term
891 + opts.w_rt_apex * rt_close
892 + opts.w_im_apex * im_close
893 + opts.w_im_overlap * im_ratio
894 + opts.w_ms1_intensity * ms1_int
895}
896
897pub fn score_pairs(
899 ms1: &[ClusterResult1D],
900 ms2: &[ClusterResult1D],
901 pairs: &[(usize, usize)],
902 opts: &ScoreOpts,
903) -> Vec<(usize, usize, PairFeatures, f32)> {
904 pairs.par_iter().map(|&(j, i)| {
905 let f = crate::cluster::candidates::build_features(&ms1[i], &ms2[j], opts);
906 let s = score_from_features(&f, opts);
907 (j, i, f, s)
908 }).collect()
909}
910
911pub fn best_ms1_for_each_ms2(
914 ms1: &[ClusterResult1D],
915 ms2: &[ClusterResult1D],
916 pairs: &[(usize, usize)],
917 opts: &ScoreOpts,
918) -> Vec<Option<usize>> {
919 let scored = score_pairs(ms1, ms2, pairs, opts);
920
921 let mut by_ms2: Vec<Vec<(usize, PairFeatures, f32)>> = vec![Vec::new(); ms2.len()];
923 for (j, i, f, s) in scored {
924 by_ms2[j].push((i, f, s));
925 }
926
927 by_ms2
928 .into_par_iter()
929 .map(|mut vec_i| {
930 if vec_i.is_empty() { return None; }
931 vec_i.sort_unstable_by(|a, b| {
932 match b.2.partial_cmp(&a.2).unwrap_or(Ordering::Equal) {
934 Ordering::Equal => {
935 let ja = a.1.jacc_rt;
938 let jb = b.1.jacc_rt;
939 if (ja - jb).abs() > 1e-6 {
940 return jb.partial_cmp(&ja).unwrap_or(Ordering::Equal);
941 }
942 let sa = if a.1.shape_ok { a.1.s_shape } else { 0.0 };
944 let sb = if b.1.shape_ok { b.1.s_shape } else { 0.0 };
945 if (sa - sb).abs() > 1e-6 {
946 return sb.partial_cmp(&sa).unwrap_or(Ordering::Equal);
947 }
948 let dra = a.1.rt_apex_delta_s;
950 let drb = b.1.rt_apex_delta_s;
951 if (dra - drb).abs() > 1e-6 {
952 return dra.partial_cmp(&drb).unwrap_or(Ordering::Equal);
953 }
954 let dia = a.1.im_apex_delta_scans;
956 let dib = b.1.im_apex_delta_scans;
957 if (dia - dib).abs() > 1e-6 {
958 return dia.partial_cmp(&dib).unwrap_or(Ordering::Equal);
959 }
960 let oa = a.1.im_overlap_scans;
962 let ob = b.1.im_overlap_scans;
963 if oa != ob {
964 return ob.cmp(&oa);
965 }
966 let ia = a.1.ms1_raw_sum;
968 let ib = b.1.ms1_raw_sum;
969 ib.partial_cmp(&ia).unwrap_or(Ordering::Equal)
970 }
971 ord => ord,
972 }
973 });
974 Some(vec_i[0].0)
975 })
976 .collect()
977}
978
979pub fn ms1_to_ms2_map(
982 ms1_len: usize,
983 ms2_to_best_ms1: &[Option<usize>],
984) -> Vec<Vec<usize>> {
985 let mut out = vec![Vec::<usize>::new(); ms1_len];
986 for (ms2_idx, maybe_ms1) in ms2_to_best_ms1.iter().enumerate() {
987 if let Some(i) = maybe_ms1 {
988 if *i < ms1_len {
989 out[*i].push(ms2_idx);
990 }
991 }
992 }
993 out
994}
995
996fn precompute_intensities(clusters: &[ClusterResult1D]) -> Vec<f32> {
997 clusters
998 .par_iter()
999 .map(|c| {
1000 if let Some(ref rt) = c.rt_trace {
1001 rt.iter().fold(0.0f32, |acc, x| acc + x.max(0.0))
1002 } else {
1003 c.raw_sum.max(0.0)
1004 }
1005 })
1006 .collect()
1007}
1008
1009#[inline]
1010fn top_intensity_member(feat: &SimpleFeature) -> Option<&ClusterResult1D> {
1011 if feat.member_clusters.is_empty() {
1012 return None;
1013 }
1014 feat.member_clusters
1015 .iter()
1016 .max_by(|a, b| a.raw_sum.partial_cmp(&b.raw_sum).unwrap_or(Ordering::Equal))
1017}
1018
1019#[inline]
1023pub fn geom_match_score_single_cluster(
1024 ms1: &ClusterResult1D,
1025 ms2: &ClusterResult1D,
1026 opts: &ScoreOpts,
1027) -> (PairFeatures, f32) {
1028 let f = crate::cluster::candidates::build_features(ms1, ms2, opts);
1029 let s = score_from_features(&f, opts);
1030 (f, s)
1031}
1032
1033pub fn xic_match_score_precomputed(
1034 ms1_idx: usize,
1035 ms2_idx: usize,
1036 ms1: &[ClusterResult1D],
1037 ms2: &[ClusterResult1D],
1038 ms1_int: &[f32],
1039 ms2_int: &[f32],
1040 opts: &XicScoreOpts,
1041) -> Option<f32> {
1042 let mut score = 0.0f32;
1043 let mut w_sum = 0.0f32;
1044
1045 let c1 = &ms1[ms1_idx];
1046 let c2 = &ms2[ms2_idx];
1047
1048 if opts.use_rt {
1050 if let (Some(ref rt1), Some(ref rt2)) = (&c1.rt_trace, &c2.rt_trace) {
1051 if let Some(r) = pearson_corr_z(rt1, rt2) {
1052 if r.is_finite() {
1053 let s = 0.5 * (r + 1.0);
1054 score += opts.w_rt * s;
1055 w_sum += opts.w_rt;
1056 }
1057 }
1058 }
1059 }
1060
1061 if opts.use_im {
1063 if let (Some(ref im1), Some(ref im2)) = (&c1.im_trace, &c2.im_trace) {
1064 if let Some(r) = pearson_corr_z(im1, im2) {
1065 if r.is_finite() {
1066 let s = 0.5 * (r + 1.0);
1067 score += opts.w_im * s;
1068 w_sum += opts.w_im;
1069 }
1070 }
1071 }
1072 }
1073
1074 if opts.use_intensity && opts.w_intensity > 0.0 && opts.intensity_tau > 0.0 {
1076 let i1 = ms1_int[ms1_idx];
1077 let i2 = ms2_int[ms2_idx];
1078
1079 if i1 > 0.0 && i2 > 0.0 {
1080 let ratio = (i2 / i1).max(1e-6);
1081 let d = ratio.ln().abs();
1082 let s = (-d / opts.intensity_tau).exp();
1083 if s.is_finite() {
1084 score += opts.w_intensity * s;
1085 w_sum += opts.w_intensity;
1086 }
1087 }
1088 }
1089
1090 if w_sum <= 0.0 {
1091 return None;
1092 }
1093 let final_score = score / w_sum;
1094 if !final_score.is_finite() {
1095 None
1096 } else {
1097 Some(final_score.clamp(0.0, 1.0))
1098 }
1099}
1100
1101pub fn xic_match_score_precursor(
1102 prec: PrecursorLike<'_>,
1103 frag: &ClusterResult1D,
1104 opts: &XicScoreOpts,
1105) -> Option<(XicDetails, f32)> {
1106 match prec {
1107 PrecursorLike::Cluster(c) => xic_match_score(c, frag, opts),
1108
1109 PrecursorLike::Feature(f) => {
1110 let top = top_intensity_member(f)?;
1111 xic_match_score(top, frag, opts)
1112 }
1113 }
1114}
1115
1116
1117pub fn geom_match_score_precursor(
1118 prec: PrecursorLike<'_>,
1119 frag: &ClusterResult1D,
1120 opts: &ScoreOpts,
1121) -> Option<(PairFeatures, f32)> {
1122 match prec {
1123 PrecursorLike::Cluster(c) => {
1124 let (f, s) = geom_match_score_single_cluster(c, frag, opts);
1125 Some((f, s))
1126 }
1127
1128 PrecursorLike::Feature(feat) => {
1129 let top = top_intensity_member(feat)?;
1130 let (f, s) = geom_match_score_single_cluster(top, frag, opts);
1131 Some((f, s))
1132 }
1133 }
1134}
1135
1136pub fn query_precursor_scored(
1148 prec: PrecursorLike<'_>,
1149 ms2: &[ClusterResult1D],
1150 candidate_ids: &[usize],
1151 mode: MatchScoreMode,
1152 geom_opts: &ScoreOpts,
1153 xic_opts: &XicScoreOpts,
1154 min_score: f32,
1155) -> Vec<ScoredHit> {
1156 let mut out: Vec<ScoredHit> = Vec::with_capacity(candidate_ids.len());
1157
1158 for &j in candidate_ids {
1159 if j >= ms2.len() {
1160 continue;
1161 }
1162 let frag = &ms2[j];
1163
1164 match mode {
1165 MatchScoreMode::Geom => {
1166 if let Some((f_geom, s)) = geom_match_score_precursor(prec, frag, geom_opts) {
1167 if s.is_finite() && s >= min_score {
1168 out.push(ScoredHit {
1169 frag_idx: j,
1170 score: s,
1171 geom: Some(f_geom),
1172 xic: None,
1173 });
1174 }
1175 }
1176 }
1177 MatchScoreMode::Xic => {
1178 if let Some((xic_det, s)) = xic_match_score_precursor(prec, frag, xic_opts) {
1179 if s.is_finite() && s >= min_score {
1180 out.push(ScoredHit {
1181 frag_idx: j,
1182 score: s,
1183 geom: None,
1184 xic: Some(xic_det),
1185 });
1186 }
1187 }
1188 }
1189 }
1190 }
1191
1192 out.sort_unstable_by(|a, b| {
1193 b.score
1194 .partial_cmp(&a.score)
1195 .unwrap_or(Ordering::Equal)
1196 });
1197
1198 out
1199}
1200
1201pub fn query_precursors_scored_par(
1210 precs: &[PrecursorLike<'_>],
1211 ms2: &[ClusterResult1D],
1212 candidates_per_prec: &[Vec<usize>],
1213 mode: MatchScoreMode,
1214 geom_opts: &ScoreOpts,
1215 xic_opts: &XicScoreOpts,
1216 min_score: f32,
1217) -> Vec<Vec<ScoredHit>> {
1218 assert_eq!(
1219 precs.len(),
1220 candidates_per_prec.len(),
1221 "precs and candidates_per_prec must have same length"
1222 );
1223
1224 precs
1225 .par_iter()
1226 .zip(candidates_per_prec.par_iter())
1227 .map(|(&prec, cands)| {
1228 query_precursor_scored(
1229 prec,
1230 ms2,
1231 cands,
1232 mode,
1233 geom_opts,
1234 xic_opts,
1235 min_score,
1236 )
1237 })
1238 .collect()
1239}