1use std::cmp::Ordering;
4use std::collections::HashMap;
5use std::hash::Hash;
6
7use rayon::prelude::*;
8use serde::{Deserialize, Serialize};
9use crate::cluster::candidates::CandidateOpts;
10use crate::cluster::cluster::ClusterResult1D;
11use crate::cluster::feature::SimpleFeature;
12use crate::cluster::scoring::{assign_ms2_to_best_ms1_by_xic, enumerate_ms2_ms1_pairs_simple, XicScoreOpts};
13use crate::data::dia::TimsDatasetDIA;
14pub fn cluster_mz_mu(c: &ClusterResult1D) -> Option<f32> {
22 if let Some(ref f) = c.mz_fit {
23 if f.mu.is_finite() && f.mu > 0.0 {
24 return Some(f.mu);
25 }
26 }
27 if let Some((lo, hi)) = c.mz_window {
28 let mu = 0.5 * (lo + hi);
29 if mu.is_finite() && mu > 0.0 {
30 return Some(mu);
31 }
32 }
33 None
34}
35
36#[derive(Clone, Debug, Serialize, Deserialize)]
42pub struct PseudoFragment {
43 pub mz: f32,
45 pub intensity: f32,
47 pub ms2_cluster_index: usize,
49 pub ms2_cluster_id: u64,
51 pub window_group: u32,
52}
53
54#[derive(Clone, Debug, Serialize, Deserialize)]
56pub struct PseudoSpectrum {
57 pub precursor_mz: f32,
58 pub precursor_charge: u8,
59 pub rt_apex: f32,
60 pub im_apex: f32,
61
62 pub window_groups: Vec<u32>,
63
64 pub feature_id: Option<usize>,
65 pub precursor_cluster_indices: Vec<usize>,
66 pub precursor_cluster_ids: Vec<u64>,
67 pub fragments: Vec<PseudoFragment>,
68}
69
70#[derive(Clone, Debug)]
72pub struct PseudoSpecOpts {
73 pub top_n_fragments: usize,
75}
76
77impl Default for PseudoSpecOpts {
78 fn default() -> Self {
79 Self {
80 top_n_fragments: 500, }
82 }
83}
84
85#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
87pub enum PrecursorKey {
88 Feature {
89 feature_id: usize,
90 window_group: u32,
91 },
92 OrphanCluster {
93 cluster_idx: usize,
94 window_group: u32,
95 },
96}
97
98fn build_cluster_to_feature_map(n_ms1: usize, features: &[SimpleFeature]) -> Vec<Option<usize>> {
106 let mut map = vec![None; n_ms1];
107 for (f_idx, feat) in features.iter().enumerate() {
108 for &ci in &feat.member_cluster_indices {
109 if ci < n_ms1 {
110 map[ci] = Some(f_idx);
111 }
112 }
113 }
114 map
115}
116
117fn precursor_apex_from_feature_or_cluster(
126 key: PrecursorKey,
127 ms1: &[ClusterResult1D],
128 features: &[SimpleFeature],
129) -> (f32, f32) {
130 match key {
131 PrecursorKey::OrphanCluster { cluster_idx, .. } => {
132 let c = &ms1[cluster_idx];
133 (c.rt_fit.mu, c.im_fit.mu)
134 }
135 PrecursorKey::Feature { feature_id, .. } => {
136 let feat = &features[feature_id];
137 let mut rt_w = 0.0_f64;
138 let mut im_w = 0.0_f64;
139 let mut wsum = 0.0_f64;
140
141 for &ci in &feat.member_cluster_indices {
142 if ci >= ms1.len() {
143 continue;
144 }
145 let c = &ms1[ci];
146 let w = c.raw_sum.max(1.0) as f64;
147
148 if c.rt_fit.mu.is_finite() {
149 rt_w += (c.rt_fit.mu as f64) * w;
150 }
151 if c.im_fit.mu.is_finite() {
152 im_w += (c.im_fit.mu as f64) * w;
153 }
154 wsum += w;
155 }
156
157 if wsum > 0.0 {
158 ((rt_w / wsum) as f32, (im_w / wsum) as f32)
159 } else {
160 let (rt_lo, rt_hi) = feat.rt_bounds;
162 let (im_lo, im_hi) = feat.im_bounds;
163 (
164 0.5 * ((rt_lo + rt_hi) as f32),
165 0.5 * ((im_lo + im_hi) as f32),
166 )
167 }
168 }
169 }
170}
171
172pub fn build_pseudo_spectra_from_pairs(
190 ms1: &[ClusterResult1D],
191 ms2: &[ClusterResult1D],
192 features: &[SimpleFeature],
193 pairs: &[(usize, usize)],
194 opts: &PseudoSpecOpts,
195) -> Vec<PseudoSpectrum> {
196 if ms1.is_empty() || ms2.is_empty() || pairs.is_empty() {
197 return Vec::new();
198 }
199
200 let cluster_to_feature = build_cluster_to_feature_map(ms1.len(), features);
201
202 let mut grouped: HashMap<PrecursorKey, Vec<usize>> = HashMap::new();
203
204 for &(ms2_idx, ms1_idx) in pairs {
205 if ms1_idx >= ms1.len() || ms2_idx >= ms2.len() {
206 continue;
207 }
208
209 let g = ms2[ms2_idx]
211 .window_group
212 .unwrap_or(0); let key = match cluster_to_feature[ms1_idx] {
215 Some(fid) => PrecursorKey::Feature {
216 feature_id: fid,
217 window_group: g,
218 },
219 None => PrecursorKey::OrphanCluster {
220 cluster_idx: ms1_idx,
221 window_group: g,
222 },
223 };
224
225 grouped.entry(key).or_default().push(ms2_idx);
226 }
227
228 if grouped.is_empty() {
229 return Vec::new();
230 }
231
232 let top_n = opts.top_n_fragments;
233
234 let mut spectra: Vec<PseudoSpectrum> = grouped
235 .into_par_iter()
236 .map(|(key, mut frag_indices)| {
237 frag_indices.sort_unstable();
238 frag_indices.dedup();
239
240 let (prec_mz, charge, prec_cluster_indices, prec_cluster_ids, _wg) = match key {
241 PrecursorKey::OrphanCluster { cluster_idx, window_group } => {
242 let c = &ms1[cluster_idx];
243 let mz = cluster_mz_mu(c).unwrap_or(0.0);
244 let z = 0u8;
245 let idxs = vec![cluster_idx];
246 let ids = vec![c.cluster_id];
247 (mz, z, idxs, ids, window_group)
248 }
249 PrecursorKey::Feature { feature_id, window_group } => {
250 let feat = &features[feature_id];
251 let mz = feat.mz_mono;
252 let z = feat.charge;
253 let idxs = feat.member_cluster_indices.clone();
254 let ids = feat.member_cluster_ids.clone();
255 (mz, z, idxs, ids, window_group)
256 }
257 };
258
259 let (rt_apex, im_apex) =
260 precursor_apex_from_feature_or_cluster(key, ms1, features);
261
262 let mut frags: Vec<PseudoFragment> = frag_indices
263 .into_iter()
264 .filter_map(|j| {
265 let c2 = &ms2[j];
266 let mz = match cluster_mz_mu(c2) {
267 Some(m) if m.is_finite() && m > 0.0 => m,
268 _ => return None,
269 };
270 let intensity = c2.raw_sum.max(0.0);
271 Some(PseudoFragment {
272 mz,
273 intensity,
274 ms2_cluster_index: j,
275 ms2_cluster_id: c2.cluster_id,
276 window_group: c2.window_group.unwrap_or(0),
277 })
278 })
279 .collect();
280
281 frags.sort_unstable_by(|a, b| {
282 b.intensity
283 .partial_cmp(&a.intensity)
284 .unwrap_or(Ordering::Equal)
285 });
286 if top_n > 0 && frags.len() > top_n {
287 frags.truncate(top_n);
288 }
289
290 PseudoSpectrum {
291 precursor_mz: prec_mz,
292 precursor_charge: charge,
293 rt_apex,
294 im_apex,
295 window_groups: {
297 let mut wgs: Vec<u32> = frags.iter().map(|f| f.window_group).collect();
298 wgs.sort_unstable();
299 wgs.dedup();
300 wgs
301 },
302 feature_id: match key {
303 PrecursorKey::Feature { feature_id, .. } => Some(feature_id),
304 PrecursorKey::OrphanCluster { .. } => None,
305 },
306 precursor_cluster_indices: prec_cluster_indices,
307 precursor_cluster_ids: prec_cluster_ids,
308 fragments: frags,
309 }
310 })
311 .collect();
312
313 spectra.sort_unstable_by(|a, b| {
314 a.rt_apex
315 .partial_cmp(&b.rt_apex)
316 .unwrap_or(Ordering::Equal)
317 .then_with(|| {
318 a.precursor_mz
319 .partial_cmp(&b.precursor_mz)
320 .unwrap_or(Ordering::Equal)
321 })
322 });
323
324 spectra
325}
326
327pub fn build_pseudo_spectra_with_xic(
332 ds: &TimsDatasetDIA,
333 ms1: &[ClusterResult1D],
334 ms2: &[ClusterResult1D],
335 features: &[SimpleFeature],
336 cand_opts: &CandidateOpts,
337 xic_opts: &XicScoreOpts,
338 pseudo_opts: &PseudoSpecOpts,
339) -> (Vec<PseudoSpectrum>, Vec<(usize, usize, f32)>) {
340 let pairs_geom: Vec<(usize, usize)> =
342 enumerate_ms2_ms1_pairs_simple(ds, ms1, ms2, cand_opts);
343
344 if pairs_geom.is_empty() {
345 return (Vec::new(), Vec::new());
346 }
347
348 let pairs_scored: Vec<(usize, usize, f32)> =
350 assign_ms2_to_best_ms1_by_xic(ms1, ms2, &pairs_geom, xic_opts);
351
352 if pairs_scored.is_empty() {
353 return (Vec::new(), Vec::new());
354 }
355
356 let final_pairs: Vec<(usize, usize)> = pairs_scored
358 .iter()
359 .map(|(ms2_idx, ms1_idx, _s)| (*ms2_idx, *ms1_idx))
360 .collect();
361
362 let spectra = build_pseudo_spectra_from_pairs(
364 ms1,
365 ms2,
366 features,
367 &final_pairs,
368 pseudo_opts,
369 );
370
371 (spectra, pairs_scored)
372}