mscore/simulation/
annotation.rs

1use std::collections::{BTreeMap, HashMap};
2use std::fmt::Display;
3use itertools::{izip, multizip};
4use rand::distributions::{Uniform, Distribution};
5use rand::rngs::ThreadRng;
6use statrs::distribution::Normal;
7use crate::data::spectrum::{MsType, ToResolution, Vectorized};
8
9#[derive(Clone, Debug)]
10pub struct PeakAnnotation {
11    pub contributions: Vec<ContributionSource>,
12}
13
14impl PeakAnnotation {
15    pub fn new_random_noise(intensity: f64) -> Self {
16        let contribution_source = ContributionSource {
17            intensity_contribution: intensity,
18            source_type: SourceType::RandomNoise,
19            signal_attributes: None,
20        };
21
22        PeakAnnotation {
23            contributions: vec![contribution_source],
24        }
25    }
26}
27
28
29#[derive(Clone, Debug)]
30pub struct ContributionSource {
31    pub intensity_contribution: f64,
32    pub source_type: SourceType,
33    pub signal_attributes: Option<SignalAttributes>,
34}
35
36#[derive(Clone, Debug, PartialEq)]
37pub enum SourceType {
38    Signal,
39    ChemicalNoise,
40    RandomNoise,
41    Unknown,
42}
43
44impl SourceType {
45    pub fn new(source_type: i32) -> Self {
46        match source_type {
47            0 => SourceType::Signal,
48            1 => SourceType::ChemicalNoise,
49            2 => SourceType::RandomNoise,
50            3 => SourceType::Unknown,
51            _ => panic!("Invalid source type"),
52        }
53    }
54}
55
56impl Display for SourceType {
57    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58        match self {
59            SourceType::Signal => write!(f, "Signal"),
60            SourceType::ChemicalNoise => write!(f, "ChemicalNoise"),
61            SourceType::RandomNoise => write!(f, "RandomNoise"),
62            SourceType::Unknown => write!(f, "Unknown"),
63        }
64    }
65}
66
67#[derive(Clone, Debug)]
68pub struct SignalAttributes {
69    pub charge_state: i32,
70    pub peptide_id: i32,
71    pub isotope_peak: i32,
72    pub description: Option<String>,
73}
74
75#[derive(Clone, Debug)]
76pub struct MzSpectrumAnnotated {
77    pub mz: Vec<f64>,
78    pub intensity: Vec<f64>,
79    pub annotations: Vec<PeakAnnotation>,
80}
81
82impl MzSpectrumAnnotated {
83    pub fn new(mz: Vec<f64>, intensity: Vec<f64>, annotations: Vec<PeakAnnotation>) -> Self {
84        assert!(mz.len() == intensity.len() && intensity.len() == annotations.len());
85        // zip and sort by mz
86        let mut mz_intensity_annotations: Vec<(f64, f64, PeakAnnotation)> = izip!(mz.iter(), intensity.iter(), annotations.iter()).map(|(mz, intensity, annotation)| (*mz, *intensity, annotation.clone())).collect();
87        mz_intensity_annotations.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
88
89        MzSpectrumAnnotated {
90            mz: mz_intensity_annotations.iter().map(|(mz, _, _)| *mz).collect(),
91            intensity: mz_intensity_annotations.iter().map(|(_, intensity, _)| *intensity).collect(),
92            annotations: mz_intensity_annotations.iter().map(|(_, _, annotation)| annotation.clone()).collect(),
93        }
94    }
95
96    pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min: f64, intensity_max: f64) -> Self {
97        let mut mz_filtered: Vec<f64> = Vec::new();
98        let mut intensity_filtered: Vec<f64> = Vec::new();
99        let mut annotations_filtered: Vec<PeakAnnotation> = Vec::new();
100
101        for (mz, intensity, annotation) in izip!(self.mz.iter(), self.intensity.iter(), self.annotations.iter()) {
102            if *mz >= mz_min && *mz <= mz_max && *intensity >= intensity_min && *intensity <= intensity_max {
103                mz_filtered.push(*mz);
104                intensity_filtered.push(*intensity);
105                annotations_filtered.push(annotation.clone());
106            }
107        }
108        // after filtering, the length of the mz, intensity and annotations vectors should be the same
109        assert!(mz_filtered.len() == intensity_filtered.len() && intensity_filtered.len() == annotations_filtered.len());
110
111        MzSpectrumAnnotated {
112            mz: mz_filtered,
113            intensity: intensity_filtered,
114            annotations: annotations_filtered,
115        }
116    }
117
118    pub fn add_mz_noise_uniform(&self, ppm: f64, right_drag: bool) -> Self {
119        let mut rng = rand::thread_rng();
120        self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
121
122            let ppm_mz = match right_drag {
123                true => mz * ppm / 1e6 / 2.0,
124                false => mz * ppm / 1e6,
125            };
126
127            let dist = match right_drag {
128                true => Uniform::from(mz - (ppm_mz / 3.0)..=mz + ppm_mz),
129                false => Uniform::from(mz - ppm_mz..=mz + ppm_mz),
130            };
131
132            dist.sample(rng)
133        })
134    }
135
136    pub fn add_mz_noise_normal(&self, ppm: f64) -> Self {
137        let mut rng = rand::thread_rng();
138        self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
139            let ppm_mz = mz * ppm / 1e6;
140            let dist = Normal::new(mz, ppm_mz / 3.0).unwrap(); // 3 sigma ? good enough?
141            dist.sample(rng)
142        })
143    }
144
145    fn add_mz_noise<F>(&self, ppm: f64, rng: &mut ThreadRng, noise_fn: F) -> Self
146        where
147            F: Fn(&mut ThreadRng, f64, f64) -> f64,
148    {
149        let mz: Vec<f64> = self.mz.iter().map(|&mz_value| noise_fn(rng, mz_value, ppm)).collect();
150        let spectrum = MzSpectrumAnnotated { mz, intensity: self.intensity.clone(), annotations: self.annotations.clone()};
151
152        // Sort the spectrum by m/z values and potentially sum up intensities and extend annotations at the same m/z value
153        spectrum.to_resolution(6)
154    }
155
156    pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> BTreeMap<i32, MzSpectrumAnnotated> {
157        let mut splits = BTreeMap::new();
158
159        for (i, &mz) in self.mz.iter().enumerate() {
160            let intensity = self.intensity[i];
161            let annotation = self.annotations[i].clone();
162
163            let tmp_key = (mz / window_length).floor() as i32;
164
165            splits.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).mz.push(mz);
166            splits.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).intensity.push(intensity);
167            splits.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).annotations.push(annotation);
168        }
169
170        if overlapping {
171            let mut splits_offset = BTreeMap::new();
172
173            for (i, &mmz) in self.mz.iter().enumerate() {
174                let intensity = self.intensity[i];
175                let annotation = self.annotations[i].clone();
176
177                let tmp_key = -((mmz + window_length / 2.0) / window_length).floor() as i32;
178
179                splits_offset.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).mz.push(mmz);
180                splits_offset.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).intensity.push(intensity);
181                splits_offset.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).annotations.push(annotation);
182            }
183
184            for (key, val) in splits_offset {
185                splits.entry(key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).mz.extend(val.mz);
186                splits.entry(key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).intensity.extend(val.intensity);
187                splits.entry(key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).annotations.extend(val.annotations);
188            }
189        }
190
191        splits.retain(|_, spectrum| {
192            spectrum.mz.len() >= min_peaks && spectrum.intensity.iter().cloned().max_by(
193                |a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)).unwrap_or(0.0) >= min_intensity
194        });
195
196        splits
197    }
198}
199
200impl std::ops::Add for MzSpectrumAnnotated {
201    type Output = Self;
202    fn add(self, other: Self) -> Self {
203
204        let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
205        let mut spec_map: BTreeMap<i64, (f64, PeakAnnotation)> = BTreeMap::new();
206
207        for ((mz, intensity), annotation) in self.mz.iter().zip(self.intensity.iter()).zip(self.annotations.iter()) {
208            let key = quantize(*mz);
209            spec_map.insert(key, (*intensity, annotation.clone()));
210        }
211
212        for ((mz, intensity), annotation) in other.mz.iter().zip(other.intensity.iter()).zip(other.annotations.iter()) {
213            let key = quantize(*mz);
214            spec_map.entry(key).and_modify(|e| {
215                e.0 += *intensity;
216                e.1.contributions.extend(annotation.contributions.clone());
217            }).or_insert((*intensity, annotation.clone()));
218        }
219
220        let mz: Vec<f64> = spec_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
221        let intensity: Vec<f64> = spec_map.values().map(|(intensity, _)| *intensity).collect();
222        let annotations: Vec<PeakAnnotation> = spec_map.values().map(|(_, annotation)| annotation.clone()).collect();
223
224        assert!(mz.len() == intensity.len() && intensity.len() == annotations.len());
225
226        MzSpectrumAnnotated {
227            mz,
228            intensity,
229            annotations,
230        }
231    }
232}
233
234impl ToResolution for MzSpectrumAnnotated {
235    fn to_resolution(&self, resolution: i32) -> Self {
236        let mut spec_map: BTreeMap<i64, (f64, PeakAnnotation)> = BTreeMap::new();
237        let quantize = |mz: f64| -> i64 { (mz * 10.0_f64.powi(resolution)).round() as i64 };
238
239        for ((mz, intensity), annotation) in self.mz.iter().zip(self.intensity.iter()).zip(self.annotations.iter()) {
240            let key = quantize(*mz);
241            spec_map.entry(key).and_modify(|e| {
242                e.0 += *intensity;
243                e.1.contributions.extend(annotation.contributions.clone());
244            }).or_insert((*intensity, annotation.clone()));
245        }
246
247        let mz: Vec<f64> = spec_map.keys().map(|&key| key as f64 / 10.0_f64.powi(resolution)).collect();
248        let intensity: Vec<f64> = spec_map.values().map(|(intensity, _)| *intensity).collect();
249        let annotations: Vec<PeakAnnotation> = spec_map.values().map(|(_, annotation)| annotation.clone()).collect();
250
251        assert!(mz.len() == intensity.len() && intensity.len() == annotations.len());
252
253        MzSpectrumAnnotated {
254            mz,
255            intensity,
256            annotations,
257        }
258    }
259}
260
261impl std::ops::Mul<f64> for MzSpectrumAnnotated {
262    type Output = Self;
263    fn mul(self, scale: f64) -> Self::Output{
264
265        let mut scaled_intensities: Vec<f64> = vec![0.0; self.intensity.len()];
266
267        for (idx,intensity) in self.intensity.iter().enumerate(){
268            scaled_intensities[idx] = scale*intensity;
269        }
270
271        let mut scaled_annotations: Vec<PeakAnnotation> = Vec::new();
272
273        for annotation in self.annotations.iter(){
274            let mut scaled_contributions: Vec<ContributionSource> = Vec::new();
275            for contribution in annotation.contributions.iter(){
276                let scaled_intensity = (contribution.intensity_contribution*scale).round();
277                let scaled_contribution = ContributionSource{
278                    intensity_contribution: scaled_intensity,
279                    source_type: contribution.source_type.clone(),
280                    signal_attributes: contribution.signal_attributes.clone(),
281                };
282                scaled_contributions.push(scaled_contribution);
283            }
284            let scaled_annotation = PeakAnnotation{
285                contributions: scaled_contributions,
286            };
287            scaled_annotations.push(scaled_annotation);
288        }
289
290        MzSpectrumAnnotated { mz: self.mz.clone(), intensity: scaled_intensities, annotations: scaled_annotations }
291    }
292}
293
294impl Vectorized<MzSpectrumAnnotatedVectorized> for MzSpectrumAnnotated {
295    fn vectorized(&self, resolution: i32) -> MzSpectrumAnnotatedVectorized {
296
297        let quantize = |mz: f64| -> i64 { (mz * 10.0_f64.powi(resolution)).round() as i64 };
298
299        let binned_spec = self.to_resolution(resolution);
300        let mut indices: Vec<u32> = Vec::with_capacity(binned_spec.mz.len());
301        let mut values: Vec<f64> = Vec::with_capacity(binned_spec.mz.len());
302        let mut annotations: Vec<PeakAnnotation> = Vec::with_capacity(binned_spec.mz.len());
303
304        for (mz, intensity, annotation) in izip!(binned_spec.mz.iter(), binned_spec.intensity.iter(), binned_spec.annotations.iter()) {
305            indices.push(quantize(*mz) as u32);
306            values.push(*intensity);
307            annotations.push(annotation.clone());
308        }
309
310        MzSpectrumAnnotatedVectorized {
311            indices,
312            values,
313            annotations,
314        }
315    }
316}
317
318#[derive(Clone, Debug)]
319pub struct MzSpectrumAnnotatedVectorized {
320    pub indices: Vec<u32>,
321    pub values: Vec<f64>,
322    pub annotations: Vec<PeakAnnotation>,
323}
324
325#[derive(Clone, Debug)]
326pub struct TimsSpectrumAnnotated {
327    pub frame_id: i32,
328    pub scan: u32,
329    pub retention_time: f64,
330    pub mobility: f64,
331    pub ms_type: MsType,
332    pub tof: Vec<u32>,
333    pub spectrum: MzSpectrumAnnotated,
334}
335
336impl TimsSpectrumAnnotated {
337    pub fn new(frame_id: i32, scan: u32, retention_time: f64, mobility: f64, ms_type: MsType, tof: Vec<u32>, spectrum: MzSpectrumAnnotated) -> Self {
338        assert!(tof.len() == spectrum.mz.len() && spectrum.mz.len() == spectrum.intensity.len() && spectrum.intensity.len() == spectrum.annotations.len());
339        // zip and sort by mz
340        let mut mz_intensity_annotations: Vec<(u32, f64, f64, PeakAnnotation)> = izip!(tof.iter(), spectrum.mz.iter(), spectrum.intensity.iter(),
341            spectrum.annotations.iter()).map(|(tof, mz, intensity, annotation)| (*tof, *mz, *intensity, annotation.clone())).collect();
342        mz_intensity_annotations.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
343        TimsSpectrumAnnotated {
344            frame_id,
345            scan,
346            retention_time,
347            mobility,
348            ms_type,
349            tof: mz_intensity_annotations.iter().map(|(tof, _, _, _)| *tof).collect(),
350            spectrum: MzSpectrumAnnotated {
351                mz: mz_intensity_annotations.iter().map(|(_, mz, _, _)| *mz).collect(),
352                intensity: mz_intensity_annotations.iter().map(|(_, _, intensity, _)| *intensity).collect(),
353                annotations: mz_intensity_annotations.iter().map(|(_, _, _, annotation)| annotation.clone()).collect(),
354            },
355        }
356    }
357
358    pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min: f64, intensity_max: f64) -> Self {
359        let mut tof_filtered: Vec<u32> = Vec::new();
360        let mut mz_filtered: Vec<f64> = Vec::new();
361        let mut intensity_filtered: Vec<f64> = Vec::new();
362        let mut annotations_filtered: Vec<PeakAnnotation> = Vec::new();
363
364        for (tof, mz, intensity, annotation) in izip!(self.tof.iter(), self.spectrum.mz.iter(), self.spectrum.intensity.iter(), self.spectrum.annotations.iter()) {
365            if *mz >= mz_min && *mz <= mz_max && *intensity >= intensity_min && *intensity <= intensity_max {
366                tof_filtered.push(*tof);
367                mz_filtered.push(*mz);
368                intensity_filtered.push(*intensity);
369                annotations_filtered.push(annotation.clone());
370            }
371        }
372
373        assert!(tof_filtered.len() == mz_filtered.len() && mz_filtered.len() == intensity_filtered.len() && intensity_filtered.len() == annotations_filtered.len());
374
375        TimsSpectrumAnnotated {
376            frame_id: self.frame_id,
377            scan: self.scan,
378            retention_time: self.retention_time,
379            mobility: self.mobility,
380            ms_type: self.ms_type.clone(),
381            tof: tof_filtered,
382            spectrum: MzSpectrumAnnotated::new(mz_filtered, intensity_filtered, annotations_filtered),
383        }
384    }
385
386    pub fn add_mz_noise_uniform(&self, ppm: f64, right_drag: bool) -> Self {
387        TimsSpectrumAnnotated {
388            frame_id: self.frame_id,
389            scan: self.scan,
390            retention_time: self.retention_time,
391            mobility: self.mobility,
392            ms_type: self.ms_type.clone(),
393            // TODO: adding noise to mz means that TOF values need to be re-calculated
394            tof: self.tof.clone(),
395            spectrum: self.spectrum.add_mz_noise_uniform(ppm, right_drag),
396        }
397    }
398
399    pub fn add_mz_noise_normal(&self, ppm: f64) -> Self {
400        TimsSpectrumAnnotated {
401            frame_id: self.frame_id,
402            scan: self.scan,
403            retention_time: self.retention_time,
404            mobility: self.mobility,
405            ms_type: self.ms_type.clone(),
406            // TODO: adding noise to mz means that TOF values need to be re-calculated
407            tof: self.tof.clone(),
408            spectrum: self.spectrum.add_mz_noise_normal(ppm),
409        }
410    }
411
412    pub fn to_windows(
413        &self,
414        window_length: f64,
415        overlapping: bool,
416        min_peaks: usize,
417        min_intensity: f64,
418    ) -> BTreeMap<i32, TimsSpectrumAnnotated> {
419        // 1) base‐window buckets
420        let mut buckets: BTreeMap<i32, Vec<(u32, f64, f64, PeakAnnotation)>> = BTreeMap::new();
421        for (&tof, &mz, &intensity, annotation) in
422            izip!(
423                &self.tof,
424                &self.spectrum.mz,
425                &self.spectrum.intensity,
426                &self.spectrum.annotations
427            )
428        {
429            let idx = (mz / window_length).floor() as i32;
430            buckets.entry(idx)
431                .or_default()
432                .push((tof, mz, intensity, annotation.clone()));
433        }
434
435        // 2) overlapping half‐shifted buckets
436        if overlapping {
437            let mut off: BTreeMap<i32, Vec<(u32, f64, f64, PeakAnnotation)>> = BTreeMap::new();
438            let half = window_length / 2.0;
439            for (&tof, &mz, &intensity, annotation) in
440                izip!(
441                    &self.tof,
442                    &self.spectrum.mz,
443                    &self.spectrum.intensity,
444                    &self.spectrum.annotations
445                )
446            {
447                let idx = -(((mz + half) / window_length).floor() as i32);
448                off.entry(idx)
449                    .or_default()
450                    .push((tof, mz, intensity, annotation.clone()));
451            }
452            for (k, v) in off {
453                buckets.entry(k).or_default().extend(v);
454            }
455        }
456
457        // 3) filter & rebuild
458        let mut out = BTreeMap::new();
459        for (idx, group) in buckets {
460            if group.len() < min_peaks {
461                continue;
462            }
463            let max_i = group.iter().map(|&(_, _, i, _)| i).fold(0.0, f64::max);
464            if max_i < min_intensity {
465                continue;
466            }
467
468            // manual “unzip4”
469            let mut tofs   = Vec::with_capacity(group.len());
470            let mut mzs    = Vec::with_capacity(group.len());
471            let mut ints   = Vec::with_capacity(group.len());
472            let mut annots = Vec::with_capacity(group.len());
473            for (tof, mz, intensity, annotation) in group {
474                tofs.push(tof);
475                mzs.push(mz);
476                ints.push(intensity);
477                annots.push(annotation);
478            }
479
480            // sort+rebuild the annotated spectrum
481            let window_spec = MzSpectrumAnnotated::new(mzs, ints, annots);
482
483            let sub = TimsSpectrumAnnotated {
484                frame_id:       self.frame_id,
485                scan:           self.scan,
486                retention_time: self.retention_time,
487                mobility:       self.mobility,
488                ms_type:        self.ms_type.clone(),
489                tof:            tofs,
490                spectrum:       window_spec,
491            };
492
493            out.insert(idx, sub);
494        }
495
496        out
497    }
498}
499
500impl std::ops::Add for TimsSpectrumAnnotated {
501    type Output = Self;
502    fn add(self, other: Self) -> Self {
503
504        let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
505        let mut spec_map: BTreeMap<i64, (u32, f64, PeakAnnotation, i64)> = BTreeMap::new();
506        let mean_scan_floor = ((self.scan as f64 + other.scan as f64) / 2.0) as u32;
507
508        for (tof, mz, intensity, annotation) in izip!(self.tof.iter(), self.spectrum.mz.iter(), self.spectrum.intensity.iter(), self.spectrum.annotations.iter()) {
509            let key = quantize(*mz);
510            spec_map.insert(key, (*tof, *intensity, annotation.clone(), 1));
511        }
512
513        for (tof, mz, intensity, annotation) in izip!(other.tof.iter(), other.spectrum.mz.iter(), other.spectrum.intensity.iter(), other.spectrum.annotations.iter()) {
514            let key = quantize(*mz);
515            spec_map.entry(key).and_modify(|e| {
516                e.0 += *tof;
517                e.1 += *intensity;
518                e.2.contributions.extend(annotation.contributions.clone());
519                e.3 += 1;
520            }).or_insert((*tof, *intensity, annotation.clone(), 1));
521        }
522
523        let mut tof_vec: Vec<u32> = Vec::with_capacity(spec_map.len());
524        let mut mz_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
525        let mut intensity_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
526        let mut annotations_vec: Vec<PeakAnnotation> = Vec::with_capacity(spec_map.len());
527
528        for (key, (tof, intensity, annotation, count)) in spec_map.iter() {
529            tof_vec.push((*tof as f64 / *count as f64) as u32);
530            mz_vec.push(*key as f64 / 1_000_000.0);
531            intensity_vec.push(*intensity / *count as f64);
532            annotations_vec.push(annotation.clone());
533        }
534
535        assert!(tof_vec.len() == mz_vec.len() && mz_vec.len() == intensity_vec.len() && intensity_vec.len() == annotations_vec.len());
536
537        TimsSpectrumAnnotated {
538            frame_id: self.frame_id,
539            scan: mean_scan_floor,
540            retention_time: self.retention_time,
541            mobility: self.mobility,
542            ms_type: if self.ms_type == other.ms_type { self.ms_type.clone() } else { MsType::Unknown },
543            tof: tof_vec,
544            spectrum: MzSpectrumAnnotated::new(mz_vec, intensity_vec, annotations_vec),
545        }
546    }
547}
548
549#[derive(Clone, Debug)]
550pub struct TimsFrameAnnotated {
551    pub frame_id: i32,
552    pub retention_time: f64,
553    pub ms_type: MsType,
554    pub tof: Vec<u32>,
555    pub mz: Vec<f64>,
556    pub scan: Vec<u32>,
557    pub inv_mobility: Vec<f64>,
558    pub intensity: Vec<f64>,
559    pub annotations: Vec<PeakAnnotation>,
560}
561
562impl TimsFrameAnnotated {
563    pub fn new(frame_id: i32, retention_time: f64, ms_type: MsType, tof: Vec<u32>, mz: Vec<f64>, scan: Vec<u32>, inv_mobility: Vec<f64>, intensity: Vec<f64>, annotations: Vec<PeakAnnotation>) -> Self {
564        assert!(tof.len() == mz.len() && mz.len() == scan.len() && scan.len() == inv_mobility.len() && inv_mobility.len() == intensity.len() && intensity.len() == annotations.len());
565        TimsFrameAnnotated {
566            frame_id,
567            retention_time,
568            ms_type,
569            tof,
570            mz,
571            scan,
572            inv_mobility,
573            intensity,
574            annotations,
575        }
576    }
577    pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, inv_mobility_min: f64, inv_mobility_max: f64, scan_min: u32, scan_max: u32, intensity_min: f64, intensity_max: f64) -> Self {
578        let mut tof_filtered: Vec<u32> = Vec::new();
579        let mut mz_filtered: Vec<f64> = Vec::new();
580        let mut scan_filtered: Vec<u32> = Vec::new();
581        let mut inv_mobility_filtered: Vec<f64> = Vec::new();
582        let mut intensity_filtered: Vec<f64> = Vec::new();
583        let mut annotations_filtered: Vec<PeakAnnotation> = Vec::new();
584
585        for (tof, mz, scan, inv_mobility, intensity, annotation) in izip!(self.tof.iter(), self.mz.iter(), self.scan.iter(), self.inv_mobility.iter(), self.intensity.iter(), self.annotations.iter()) {
586            if *mz >= mz_min && *mz <= mz_max && *inv_mobility >= inv_mobility_min && *inv_mobility <= inv_mobility_max && *scan >= scan_min && *scan <= scan_max && *intensity >= intensity_min && *intensity <= intensity_max {
587                tof_filtered.push(*tof);
588                mz_filtered.push(*mz);
589                scan_filtered.push(*scan);
590                inv_mobility_filtered.push(*inv_mobility);
591                intensity_filtered.push(*intensity);
592                annotations_filtered.push(annotation.clone());
593            }
594        }
595
596        assert!(tof_filtered.len() == mz_filtered.len() && mz_filtered.len() == scan_filtered.len() && scan_filtered.len() == inv_mobility_filtered.len() && inv_mobility_filtered.len() == intensity_filtered.len() && intensity_filtered.len() == annotations_filtered.len());
597
598        TimsFrameAnnotated {
599            frame_id: self.frame_id,
600            retention_time: self.retention_time,
601            ms_type: self.ms_type.clone(),
602            tof: tof_filtered,
603            mz: mz_filtered,
604            scan: scan_filtered,
605            inv_mobility: inv_mobility_filtered,
606            intensity: intensity_filtered,
607            annotations: annotations_filtered,
608        }
609    }
610
611    pub fn to_tims_spectra_annotated(&self) -> Vec<TimsSpectrumAnnotated> {
612        // use a sorted map where scan is used as key
613        let mut spectra = BTreeMap::<i32, (f64, Vec<u32>, MzSpectrumAnnotated)>::new();
614
615        // all indices and the intensity values are sorted by scan and stored in the map as a tuple (mobility, tof, mz, intensity)
616        for (scan, mobility, tof, mz, intensity, annotations) in izip!(self.scan.iter(), self.inv_mobility.iter(), self.tof.iter(), self.mz.iter(), self.intensity.iter(), self.annotations.iter()) {
617            let entry = spectra.entry(*scan as i32).or_insert_with(|| (*mobility, Vec::new(), MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())));
618            entry.1.push(*tof);
619            entry.2.mz.push(*mz);
620            entry.2.intensity.push(*intensity);
621            entry.2.annotations.push(annotations.clone());
622        }
623
624        // convert the map to a vector of TimsSpectrumAnnotated
625        let mut tims_spectra: Vec<TimsSpectrumAnnotated> = Vec::new();
626
627        for (scan, (mobility, tof, spectrum)) in spectra {
628            tims_spectra.push(TimsSpectrumAnnotated::new(self.frame_id, scan as u32, self.retention_time, mobility, self.ms_type.clone(), tof, spectrum));
629        }
630
631        tims_spectra
632    }
633
634    pub fn from_tims_spectra_annotated(spectra: Vec<TimsSpectrumAnnotated>) -> TimsFrameAnnotated {
635        let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
636        let mut spec_map: BTreeMap<(u32, i64), (f64, u32, f64, PeakAnnotation, i64)> = BTreeMap::new();
637        let mut capacity_count = 0;
638
639        for spectrum in &spectra {
640            let inv_mobility = spectrum.mobility;
641            for (i, mz) in spectrum.spectrum.mz.iter().enumerate() {
642                let scan = spectrum.scan;
643                let tof = spectrum.tof[i];
644                let intensity = spectrum.spectrum.intensity[i];
645                let annotation = spectrum.spectrum.annotations[i].clone();
646                let key = (scan, quantize(*mz));
647                spec_map.entry(key).and_modify(|e| {
648                    e.0 += intensity;
649                    e.1 += tof;
650                    e.2 += inv_mobility;
651                    e.3.contributions.extend(annotation.contributions.clone());
652                    e.4 += 1;
653                }).or_insert((intensity, tof, inv_mobility, annotation, 1));
654                capacity_count += 1;
655            }
656        }
657
658        let mut scan_vec: Vec<u32> = Vec::with_capacity(capacity_count);
659        let mut inv_mobility_vec: Vec<f64> = Vec::with_capacity(capacity_count);
660        let mut tof_vec: Vec<u32> = Vec::with_capacity(capacity_count);
661        let mut mz_vec: Vec<f64> = Vec::with_capacity(capacity_count);
662        let mut intensity_vec: Vec<f64> = Vec::with_capacity(capacity_count);
663        let mut annotations_vec: Vec<PeakAnnotation> = Vec::with_capacity(capacity_count);
664
665        for ((scan, mz), (intensity, tof, inv_mobility, annotation, count)) in spec_map.iter() {
666            scan_vec.push(*scan);
667            inv_mobility_vec.push(*inv_mobility / *count as f64);
668            tof_vec.push((*tof as f64 / *count as f64) as u32);
669            mz_vec.push(*mz as f64 / 1_000_000.0);
670            intensity_vec.push(*intensity);
671            annotations_vec.push(annotation.clone());
672        }
673
674        assert!(tof_vec.len() == mz_vec.len() && mz_vec.len() == scan_vec.len() && scan_vec.len() == inv_mobility_vec.len() && inv_mobility_vec.len() == intensity_vec.len() && intensity_vec.len() == annotations_vec.len());
675
676        TimsFrameAnnotated {
677            frame_id: spectra.first().unwrap().frame_id,
678            retention_time: spectra.first().unwrap().retention_time,
679            ms_type: spectra.first().unwrap().ms_type.clone(),
680            tof: tof_vec,
681            mz: mz_vec,
682            scan: scan_vec,
683            inv_mobility: inv_mobility_vec,
684            intensity: intensity_vec,
685            annotations: annotations_vec,
686        }
687    }
688    pub fn to_windows_indexed(
689        &self,
690        window_length: f64,
691        overlapping: bool,
692        min_peaks: usize,
693        min_intensity: f64
694    ) -> (Vec<u32>, Vec<i32>, Vec<TimsSpectrumAnnotated>) {
695        // 1) explode into spectra by scan/mobility
696        let spectra = self.to_tims_spectra_annotated();
697
698        // 2) window each one
699        let windows_per_scan: Vec<_> = spectra
700            .iter()
701            .map(|s| s.to_windows(window_length, overlapping, min_peaks, min_intensity))
702            .collect();
703
704        // 3) flatten out into three parallel vectors
705        let mut scan_indices   = Vec::new();
706        let mut window_indices = Vec::new();
707        let mut out_spectra    = Vec::new();
708
709        for (spec, window_map) in spectra.iter().zip(windows_per_scan.iter()) {
710            for (&win_idx, win_spec) in window_map {
711                scan_indices.push(spec.scan);
712                window_indices.push(win_idx);
713                out_spectra.push(win_spec.clone());
714            }
715        }
716
717        (scan_indices, window_indices, out_spectra)
718    }
719
720    pub fn to_windows(
721        &self,
722        window_length: f64,
723        overlapping: bool,
724        min_peaks: usize,
725        min_intensity: f64
726    ) -> Vec<TimsSpectrumAnnotated> {
727        // 1) explode into spectra by scan/mobility
728        let spectra = self.to_tims_spectra_annotated();
729
730        // 2) window each one
731        let windows_per_scan: Vec<_> = spectra
732            .iter()
733            .map(|s| s.to_windows(window_length, overlapping, min_peaks, min_intensity))
734            .collect();
735
736        // 3) flatten out into a single vector of TimsSpectrumAnnotated
737        let mut out_spectra = Vec::new();
738        for (_, window_map) in spectra.iter().zip(windows_per_scan.iter()) {
739            for (_, win_spec) in window_map {
740                out_spectra.push(win_spec.clone());
741            }
742        }
743
744        out_spectra
745    }
746
747    pub fn to_dense_windows(
748        &self,
749        window_length: f64,
750        overlapping: bool,
751        min_peaks: usize,
752        min_intensity: f64,
753        resolution: i32
754    ) -> (Vec<f64>, Vec<i32>, Vec<i32>, usize, usize) {
755        let factor    = 10f64.powi(resolution);
756        let n_cols    = ((window_length * factor).round() + 1.0) as usize;
757
758        // 1) get indexed windows
759        let (scan_indices, window_indices, spectra) =
760            self.to_windows_indexed(window_length, overlapping, min_peaks, min_intensity);
761
762        // 2) vectorize each window’s MzSpectrumAnnotated
763        let vec_specs: Vec<_> = spectra
764            .iter()
765            .map(|ts| ts.spectrum.vectorized(resolution))
766            .collect();
767
768        // 3) prepare flat matrix
769        let n_rows      = spectra.len();
770        let mut matrix = vec![0.0; n_rows * n_cols];
771
772        // 4) fill in each row
773        for (row, ( &win_idx, vec_spec)) in
774            multizip((&window_indices, &vec_specs))
775                .enumerate()
776        {
777            // compute the "vectorized" start index of this window
778            let start_i = if win_idx >= 0 {
779                ((win_idx as f64 * window_length) * factor).round() as i32
780            } else {
781                // negative key → half‐shifted
782                ((((-win_idx) as f64 * window_length) - 0.5 * window_length) * factor)
783                    .round() as i32
784            };
785
786            // now place each nonzero bin
787            for (&idx, &val) in vec_spec.indices.iter().zip(&vec_spec.values) {
788                let col = (idx as i32 - start_i) as usize;
789                let flat_idx = row * n_cols + col;
790                matrix[flat_idx] = val;
791            }
792        }
793
794        // cast scan indices to i32 for consistency
795        let scan_indices: Vec<i32> = scan_indices.iter().map(|&scan| scan as i32).collect();
796
797        (matrix, scan_indices, window_indices, n_rows, n_cols)
798    }
799
800    /// Returns:
801    ///  - intensity_matrix,
802    ///  - scan_indices,
803    ///  - window_indices,
804    ///  - mz_start for each window,
805    ///  - ion_mobility_start for each window,
806    ///  - n_rows, n_cols,
807    ///  - isotope_peak labels,
808    ///  - charge_state labels,
809    ///  - peptide_id labels (0..5)
810    pub fn to_dense_windows_with_labels(
811        &self,
812        window_length: f64,
813        overlapping: bool,
814        min_peaks: usize,
815        min_intensity: f64,
816        resolution: i32,
817    ) -> (
818        Vec<f64>,    // intensities
819        Vec<u32>,    // scan index per row
820        Vec<i32>,    // window key per row
821        Vec<f64>,    // mz_start per row
822        Vec<f64>,    // ion_mobility_start per row
823        usize,       // n_rows
824        usize,       // n_cols
825        Vec<i32>,    // isotope_peak labels
826        Vec<i32>,    // charge_state labels
827        Vec<i32>,    // peptide_id labels
828    ) {
829        let factor = 10f64.powi(resolution);
830        let n_cols = ((window_length * factor).round() + 1.0) as usize;
831
832        // 1) explode into per-scan windows
833        let (scan_indices, window_indices, spectra) =
834            self.to_windows_indexed(window_length, overlapping, min_peaks, min_intensity);
835        let vec_specs: Vec<_> = spectra
836            .iter()
837            .map(|ts| ts.spectrum.vectorized(resolution))
838            .collect();
839
840        let n_rows     = vec_specs.len();
841        let matrix_sz  = n_rows * n_cols;
842
843        // 2) allocate output arrays
844        let mut intensities       = vec![0.0_f64; matrix_sz];
845        let mut iso_labels        = vec![-1_i32; matrix_sz];
846        let mut charge_labels     = vec![-1_i32; matrix_sz];
847        let mut peptide_labels    = vec![-1_i32; matrix_sz];
848        let mut mz_start          = Vec::with_capacity(n_rows);
849        let mut ion_mobility_start = Vec::with_capacity(n_rows);
850
851        // 3) fill each row
852        for (row, ((&win_idx, vec_spec), ts)) in
853            multizip((&window_indices, &vec_specs))
854                .zip(spectra.iter())
855                .enumerate()
856        {
857            // record the first‐peak m/z and mobility
858            let first_mz = ts.spectrum.mz.first().cloned().unwrap_or(0.0);
859            mz_start.push(first_mz);
860            ion_mobility_start.push(ts.mobility);
861
862            // per‐window map for peptide_id → 0..5
863            let mut feat_map  = HashMap::<i32,i32>::new();
864            let mut next_feat = 0;
865
866            // window start index
867            let start_i = if win_idx >= 0 {
868                ((win_idx as f64 * window_length) * factor).round() as i32
869            } else {
870                (((-win_idx) as f64 * window_length - 0.5 * window_length) * factor)
871                    .round() as i32
872            };
873
874            // fill columns
875            for (&bin_idx, &val, annotation) in
876                izip!(&vec_spec.indices, &vec_spec.values, &vec_spec.annotations)
877            {
878                let col  = (bin_idx as i32 - start_i) as usize;
879                let flat = row * n_cols + col;
880                intensities[flat] = val;
881
882                // choose best contributor
883                if let Some(best) = annotation
884                    .contributions
885                    .iter()
886                    .max_by(|a, b| {
887                        a.intensity_contribution
888                            .partial_cmp(&b.intensity_contribution)
889                            .unwrap()
890                    })
891                {
892                    match best.source_type {
893                        SourceType::Signal => {
894                            if let Some(sa) = &best.signal_attributes {
895                                iso_labels[flat]    = sa.isotope_peak;
896                                charge_labels[flat] = sa.charge_state;
897                                // re-index peptide_id
898                                let old = sa.peptide_id;
899                                let new = *feat_map.entry(old).or_insert_with(|| {
900                                    let i = next_feat; next_feat += 1; i.min(5)
901                                });
902                                peptide_labels[flat] = new;
903                            }
904                        }
905                        SourceType::RandomNoise => {
906                            iso_labels[flat]    = -2;
907                            charge_labels[flat] = -2;
908                            peptide_labels[flat] = -2;
909                        }
910                        _ => { /* leave as -1 */ }
911                    }
912                }
913            }
914        }
915
916        (
917            intensities,
918            scan_indices,
919            window_indices,
920            mz_start,
921            ion_mobility_start,
922            n_rows,
923            n_cols,
924            iso_labels,
925            charge_labels,
926            peptide_labels,
927        )
928    }
929
930    pub fn fold_along_scan_axis(self, fold_width: usize) -> TimsFrameAnnotated {
931        // extract tims spectra from frame
932        let spectra = self.to_tims_spectra_annotated();
933
934        // create a new collection of merged spectra,where spectra are first grouped by the key they create when divided by fold_width
935        // and then merge them by addition
936        let mut merged_spectra: BTreeMap<u32, TimsSpectrumAnnotated> = BTreeMap::new();
937        for spectrum in spectra {
938            let key = spectrum.scan / fold_width as u32;
939
940            // if the key already exists, merge the spectra
941            if let Some(existing_spectrum) = merged_spectra.get_mut(&key) {
942
943                let merged_spectrum = existing_spectrum.clone() + spectrum;
944                // update the existing spectrum with the merged one
945                *existing_spectrum = merged_spectrum;
946
947            } else {
948                // otherwise, insert the new spectrum
949                merged_spectra.insert(key, spectrum);
950            }
951        }
952        // convert the merged spectra back to a TimsFrame
953        TimsFrameAnnotated::from_tims_spectra_annotated(merged_spectra.into_values().collect())
954    }
955}
956
957impl std::ops::Add for TimsFrameAnnotated {
958    type Output = Self;
959    fn add(self, other: Self) -> Self {
960
961        let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
962        let mut spec_map: BTreeMap<(u32, i64), (f64, u32, f64, PeakAnnotation, i64)> = BTreeMap::new();
963
964        for (scan, mz, tof, inv_mobility, intensity, annotation) in
965        izip!(self.scan.iter(), self.mz.iter(), self.tof.iter(), self.inv_mobility.iter(), self.intensity.iter(), self.annotations.iter()) {
966            let key = (*scan, quantize(*mz));
967            spec_map.insert(key, (*intensity, *tof, *inv_mobility, annotation.clone(), 1));
968        }
969
970        for (scan, mz, tof, inv_mobility, intensity, annotation) in
971        izip!(other.scan.iter(), other.mz.iter(), other.tof.iter(), other.inv_mobility.iter(), other.intensity.iter(), other.annotations.iter()) {
972            let key = (*scan, quantize(*mz));
973            spec_map.entry(key).and_modify(|e| {
974                e.0 += *intensity;
975                e.1 += *tof;
976                e.2 += *inv_mobility;
977                e.3.contributions.extend(annotation.contributions.clone());
978                e.4 += 1;
979            }).or_insert((*intensity, *tof, *inv_mobility, annotation.clone(), 1));
980        }
981
982        let mut tof_vec: Vec<u32> = Vec::with_capacity(spec_map.len());
983        let mut mz_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
984        let mut scan_vec: Vec<u32> = Vec::with_capacity(spec_map.len());
985        let mut inv_mobility_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
986        let mut intensity_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
987        let mut annotations_vec: Vec<PeakAnnotation> = Vec::with_capacity(spec_map.len());
988
989        for ((scan, mz), (intensity, tof, inv_mobility, annotation, count)) in spec_map.iter() {
990            scan_vec.push(*scan);
991            mz_vec.push(*mz as f64 / 1_000_000.0);
992            intensity_vec.push(*intensity);
993            tof_vec.push((*tof as f64 / *count as f64) as u32);
994            inv_mobility_vec.push(*inv_mobility / *count as f64);
995            annotations_vec.push(annotation.clone());
996        }
997
998        assert!(tof_vec.len() == mz_vec.len() && mz_vec.len() == scan_vec.len() && scan_vec.len() == inv_mobility_vec.len() && inv_mobility_vec.len() == intensity_vec.len() && intensity_vec.len() == annotations_vec.len());
999
1000        TimsFrameAnnotated {
1001            frame_id: self.frame_id,
1002            retention_time: self.retention_time,
1003            ms_type: if self.ms_type == other.ms_type { self.ms_type.clone() } else { MsType::Unknown },
1004            tof: tof_vec,
1005            mz: mz_vec,
1006            scan: scan_vec,
1007            inv_mobility: inv_mobility_vec,
1008            intensity: intensity_vec,
1009            annotations: annotations_vec,
1010        }
1011    }
1012}