1use std::collections::{BTreeMap, HashMap};
2use std::fmt::Display;
3use itertools::{izip, multizip};
4use rand::distributions::{Uniform, Distribution};
5use rand::rngs::ThreadRng;
6use statrs::distribution::Normal;
7use crate::data::spectrum::{MsType, ToResolution, Vectorized};
8
9#[derive(Clone, Debug)]
10pub struct PeakAnnotation {
11 pub contributions: Vec<ContributionSource>,
12}
13
14impl PeakAnnotation {
15 pub fn new_random_noise(intensity: f64) -> Self {
16 let contribution_source = ContributionSource {
17 intensity_contribution: intensity,
18 source_type: SourceType::RandomNoise,
19 signal_attributes: None,
20 };
21
22 PeakAnnotation {
23 contributions: vec![contribution_source],
24 }
25 }
26}
27
28
29#[derive(Clone, Debug)]
30pub struct ContributionSource {
31 pub intensity_contribution: f64,
32 pub source_type: SourceType,
33 pub signal_attributes: Option<SignalAttributes>,
34}
35
36#[derive(Clone, Debug, PartialEq)]
37pub enum SourceType {
38 Signal,
39 ChemicalNoise,
40 RandomNoise,
41 Unknown,
42}
43
44impl SourceType {
45 pub fn new(source_type: i32) -> Self {
46 match source_type {
47 0 => SourceType::Signal,
48 1 => SourceType::ChemicalNoise,
49 2 => SourceType::RandomNoise,
50 3 => SourceType::Unknown,
51 _ => panic!("Invalid source type"),
52 }
53 }
54}
55
56impl Display for SourceType {
57 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58 match self {
59 SourceType::Signal => write!(f, "Signal"),
60 SourceType::ChemicalNoise => write!(f, "ChemicalNoise"),
61 SourceType::RandomNoise => write!(f, "RandomNoise"),
62 SourceType::Unknown => write!(f, "Unknown"),
63 }
64 }
65}
66
67#[derive(Clone, Debug)]
68pub struct SignalAttributes {
69 pub charge_state: i32,
70 pub peptide_id: i32,
71 pub isotope_peak: i32,
72 pub description: Option<String>,
73}
74
75#[derive(Clone, Debug)]
76pub struct MzSpectrumAnnotated {
77 pub mz: Vec<f64>,
78 pub intensity: Vec<f64>,
79 pub annotations: Vec<PeakAnnotation>,
80}
81
82impl MzSpectrumAnnotated {
83 pub fn new(mz: Vec<f64>, intensity: Vec<f64>, annotations: Vec<PeakAnnotation>) -> Self {
84 assert!(mz.len() == intensity.len() && intensity.len() == annotations.len());
85 let mut mz_intensity_annotations: Vec<(f64, f64, PeakAnnotation)> = izip!(mz.iter(), intensity.iter(), annotations.iter()).map(|(mz, intensity, annotation)| (*mz, *intensity, annotation.clone())).collect();
87 mz_intensity_annotations.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
88
89 MzSpectrumAnnotated {
90 mz: mz_intensity_annotations.iter().map(|(mz, _, _)| *mz).collect(),
91 intensity: mz_intensity_annotations.iter().map(|(_, intensity, _)| *intensity).collect(),
92 annotations: mz_intensity_annotations.iter().map(|(_, _, annotation)| annotation.clone()).collect(),
93 }
94 }
95
96 pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min: f64, intensity_max: f64) -> Self {
97 let mut mz_filtered: Vec<f64> = Vec::new();
98 let mut intensity_filtered: Vec<f64> = Vec::new();
99 let mut annotations_filtered: Vec<PeakAnnotation> = Vec::new();
100
101 for (mz, intensity, annotation) in izip!(self.mz.iter(), self.intensity.iter(), self.annotations.iter()) {
102 if *mz >= mz_min && *mz <= mz_max && *intensity >= intensity_min && *intensity <= intensity_max {
103 mz_filtered.push(*mz);
104 intensity_filtered.push(*intensity);
105 annotations_filtered.push(annotation.clone());
106 }
107 }
108 assert!(mz_filtered.len() == intensity_filtered.len() && intensity_filtered.len() == annotations_filtered.len());
110
111 MzSpectrumAnnotated {
112 mz: mz_filtered,
113 intensity: intensity_filtered,
114 annotations: annotations_filtered,
115 }
116 }
117
118 pub fn add_mz_noise_uniform(&self, ppm: f64, right_drag: bool) -> Self {
119 let mut rng = rand::thread_rng();
120 self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
121
122 let ppm_mz = match right_drag {
123 true => mz * ppm / 1e6 / 2.0,
124 false => mz * ppm / 1e6,
125 };
126
127 let dist = match right_drag {
128 true => Uniform::from(mz - (ppm_mz / 3.0)..=mz + ppm_mz),
129 false => Uniform::from(mz - ppm_mz..=mz + ppm_mz),
130 };
131
132 dist.sample(rng)
133 })
134 }
135
136 pub fn add_mz_noise_normal(&self, ppm: f64) -> Self {
137 let mut rng = rand::thread_rng();
138 self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
139 let ppm_mz = mz * ppm / 1e6;
140 let dist = Normal::new(mz, ppm_mz / 3.0).unwrap(); dist.sample(rng)
142 })
143 }
144
145 fn add_mz_noise<F>(&self, ppm: f64, rng: &mut ThreadRng, noise_fn: F) -> Self
146 where
147 F: Fn(&mut ThreadRng, f64, f64) -> f64,
148 {
149 let mz: Vec<f64> = self.mz.iter().map(|&mz_value| noise_fn(rng, mz_value, ppm)).collect();
150 let spectrum = MzSpectrumAnnotated { mz, intensity: self.intensity.clone(), annotations: self.annotations.clone()};
151
152 spectrum.to_resolution(6)
154 }
155
156 pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> BTreeMap<i32, MzSpectrumAnnotated> {
157 let mut splits = BTreeMap::new();
158
159 for (i, &mz) in self.mz.iter().enumerate() {
160 let intensity = self.intensity[i];
161 let annotation = self.annotations[i].clone();
162
163 let tmp_key = (mz / window_length).floor() as i32;
164
165 splits.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).mz.push(mz);
166 splits.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).intensity.push(intensity);
167 splits.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).annotations.push(annotation);
168 }
169
170 if overlapping {
171 let mut splits_offset = BTreeMap::new();
172
173 for (i, &mmz) in self.mz.iter().enumerate() {
174 let intensity = self.intensity[i];
175 let annotation = self.annotations[i].clone();
176
177 let tmp_key = -((mmz + window_length / 2.0) / window_length).floor() as i32;
178
179 splits_offset.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).mz.push(mmz);
180 splits_offset.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).intensity.push(intensity);
181 splits_offset.entry(tmp_key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).annotations.push(annotation);
182 }
183
184 for (key, val) in splits_offset {
185 splits.entry(key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).mz.extend(val.mz);
186 splits.entry(key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).intensity.extend(val.intensity);
187 splits.entry(key).or_insert_with(|| MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())).annotations.extend(val.annotations);
188 }
189 }
190
191 splits.retain(|_, spectrum| {
192 spectrum.mz.len() >= min_peaks && spectrum.intensity.iter().cloned().max_by(
193 |a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)).unwrap_or(0.0) >= min_intensity
194 });
195
196 splits
197 }
198}
199
200impl std::ops::Add for MzSpectrumAnnotated {
201 type Output = Self;
202 fn add(self, other: Self) -> Self {
203
204 let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
205 let mut spec_map: BTreeMap<i64, (f64, PeakAnnotation)> = BTreeMap::new();
206
207 for ((mz, intensity), annotation) in self.mz.iter().zip(self.intensity.iter()).zip(self.annotations.iter()) {
208 let key = quantize(*mz);
209 spec_map.insert(key, (*intensity, annotation.clone()));
210 }
211
212 for ((mz, intensity), annotation) in other.mz.iter().zip(other.intensity.iter()).zip(other.annotations.iter()) {
213 let key = quantize(*mz);
214 spec_map.entry(key).and_modify(|e| {
215 e.0 += *intensity;
216 e.1.contributions.extend(annotation.contributions.clone());
217 }).or_insert((*intensity, annotation.clone()));
218 }
219
220 let mz: Vec<f64> = spec_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
221 let intensity: Vec<f64> = spec_map.values().map(|(intensity, _)| *intensity).collect();
222 let annotations: Vec<PeakAnnotation> = spec_map.values().map(|(_, annotation)| annotation.clone()).collect();
223
224 assert!(mz.len() == intensity.len() && intensity.len() == annotations.len());
225
226 MzSpectrumAnnotated {
227 mz,
228 intensity,
229 annotations,
230 }
231 }
232}
233
234impl ToResolution for MzSpectrumAnnotated {
235 fn to_resolution(&self, resolution: i32) -> Self {
236 let mut spec_map: BTreeMap<i64, (f64, PeakAnnotation)> = BTreeMap::new();
237 let quantize = |mz: f64| -> i64 { (mz * 10.0_f64.powi(resolution)).round() as i64 };
238
239 for ((mz, intensity), annotation) in self.mz.iter().zip(self.intensity.iter()).zip(self.annotations.iter()) {
240 let key = quantize(*mz);
241 spec_map.entry(key).and_modify(|e| {
242 e.0 += *intensity;
243 e.1.contributions.extend(annotation.contributions.clone());
244 }).or_insert((*intensity, annotation.clone()));
245 }
246
247 let mz: Vec<f64> = spec_map.keys().map(|&key| key as f64 / 10.0_f64.powi(resolution)).collect();
248 let intensity: Vec<f64> = spec_map.values().map(|(intensity, _)| *intensity).collect();
249 let annotations: Vec<PeakAnnotation> = spec_map.values().map(|(_, annotation)| annotation.clone()).collect();
250
251 assert!(mz.len() == intensity.len() && intensity.len() == annotations.len());
252
253 MzSpectrumAnnotated {
254 mz,
255 intensity,
256 annotations,
257 }
258 }
259}
260
261impl std::ops::Mul<f64> for MzSpectrumAnnotated {
262 type Output = Self;
263 fn mul(self, scale: f64) -> Self::Output{
264
265 let mut scaled_intensities: Vec<f64> = vec![0.0; self.intensity.len()];
266
267 for (idx,intensity) in self.intensity.iter().enumerate(){
268 scaled_intensities[idx] = scale*intensity;
269 }
270
271 let mut scaled_annotations: Vec<PeakAnnotation> = Vec::new();
272
273 for annotation in self.annotations.iter(){
274 let mut scaled_contributions: Vec<ContributionSource> = Vec::new();
275 for contribution in annotation.contributions.iter(){
276 let scaled_intensity = (contribution.intensity_contribution*scale).round();
277 let scaled_contribution = ContributionSource{
278 intensity_contribution: scaled_intensity,
279 source_type: contribution.source_type.clone(),
280 signal_attributes: contribution.signal_attributes.clone(),
281 };
282 scaled_contributions.push(scaled_contribution);
283 }
284 let scaled_annotation = PeakAnnotation{
285 contributions: scaled_contributions,
286 };
287 scaled_annotations.push(scaled_annotation);
288 }
289
290 MzSpectrumAnnotated { mz: self.mz.clone(), intensity: scaled_intensities, annotations: scaled_annotations }
291 }
292}
293
294impl Vectorized<MzSpectrumAnnotatedVectorized> for MzSpectrumAnnotated {
295 fn vectorized(&self, resolution: i32) -> MzSpectrumAnnotatedVectorized {
296
297 let quantize = |mz: f64| -> i64 { (mz * 10.0_f64.powi(resolution)).round() as i64 };
298
299 let binned_spec = self.to_resolution(resolution);
300 let mut indices: Vec<u32> = Vec::with_capacity(binned_spec.mz.len());
301 let mut values: Vec<f64> = Vec::with_capacity(binned_spec.mz.len());
302 let mut annotations: Vec<PeakAnnotation> = Vec::with_capacity(binned_spec.mz.len());
303
304 for (mz, intensity, annotation) in izip!(binned_spec.mz.iter(), binned_spec.intensity.iter(), binned_spec.annotations.iter()) {
305 indices.push(quantize(*mz) as u32);
306 values.push(*intensity);
307 annotations.push(annotation.clone());
308 }
309
310 MzSpectrumAnnotatedVectorized {
311 indices,
312 values,
313 annotations,
314 }
315 }
316}
317
318#[derive(Clone, Debug)]
319pub struct MzSpectrumAnnotatedVectorized {
320 pub indices: Vec<u32>,
321 pub values: Vec<f64>,
322 pub annotations: Vec<PeakAnnotation>,
323}
324
325#[derive(Clone, Debug)]
326pub struct TimsSpectrumAnnotated {
327 pub frame_id: i32,
328 pub scan: u32,
329 pub retention_time: f64,
330 pub mobility: f64,
331 pub ms_type: MsType,
332 pub tof: Vec<u32>,
333 pub spectrum: MzSpectrumAnnotated,
334}
335
336impl TimsSpectrumAnnotated {
337 pub fn new(frame_id: i32, scan: u32, retention_time: f64, mobility: f64, ms_type: MsType, tof: Vec<u32>, spectrum: MzSpectrumAnnotated) -> Self {
338 assert!(tof.len() == spectrum.mz.len() && spectrum.mz.len() == spectrum.intensity.len() && spectrum.intensity.len() == spectrum.annotations.len());
339 let mut mz_intensity_annotations: Vec<(u32, f64, f64, PeakAnnotation)> = izip!(tof.iter(), spectrum.mz.iter(), spectrum.intensity.iter(),
341 spectrum.annotations.iter()).map(|(tof, mz, intensity, annotation)| (*tof, *mz, *intensity, annotation.clone())).collect();
342 mz_intensity_annotations.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
343 TimsSpectrumAnnotated {
344 frame_id,
345 scan,
346 retention_time,
347 mobility,
348 ms_type,
349 tof: mz_intensity_annotations.iter().map(|(tof, _, _, _)| *tof).collect(),
350 spectrum: MzSpectrumAnnotated {
351 mz: mz_intensity_annotations.iter().map(|(_, mz, _, _)| *mz).collect(),
352 intensity: mz_intensity_annotations.iter().map(|(_, _, intensity, _)| *intensity).collect(),
353 annotations: mz_intensity_annotations.iter().map(|(_, _, _, annotation)| annotation.clone()).collect(),
354 },
355 }
356 }
357
358 pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min: f64, intensity_max: f64) -> Self {
359 let mut tof_filtered: Vec<u32> = Vec::new();
360 let mut mz_filtered: Vec<f64> = Vec::new();
361 let mut intensity_filtered: Vec<f64> = Vec::new();
362 let mut annotations_filtered: Vec<PeakAnnotation> = Vec::new();
363
364 for (tof, mz, intensity, annotation) in izip!(self.tof.iter(), self.spectrum.mz.iter(), self.spectrum.intensity.iter(), self.spectrum.annotations.iter()) {
365 if *mz >= mz_min && *mz <= mz_max && *intensity >= intensity_min && *intensity <= intensity_max {
366 tof_filtered.push(*tof);
367 mz_filtered.push(*mz);
368 intensity_filtered.push(*intensity);
369 annotations_filtered.push(annotation.clone());
370 }
371 }
372
373 assert!(tof_filtered.len() == mz_filtered.len() && mz_filtered.len() == intensity_filtered.len() && intensity_filtered.len() == annotations_filtered.len());
374
375 TimsSpectrumAnnotated {
376 frame_id: self.frame_id,
377 scan: self.scan,
378 retention_time: self.retention_time,
379 mobility: self.mobility,
380 ms_type: self.ms_type.clone(),
381 tof: tof_filtered,
382 spectrum: MzSpectrumAnnotated::new(mz_filtered, intensity_filtered, annotations_filtered),
383 }
384 }
385
386 pub fn add_mz_noise_uniform(&self, ppm: f64, right_drag: bool) -> Self {
387 TimsSpectrumAnnotated {
388 frame_id: self.frame_id,
389 scan: self.scan,
390 retention_time: self.retention_time,
391 mobility: self.mobility,
392 ms_type: self.ms_type.clone(),
393 tof: self.tof.clone(),
395 spectrum: self.spectrum.add_mz_noise_uniform(ppm, right_drag),
396 }
397 }
398
399 pub fn add_mz_noise_normal(&self, ppm: f64) -> Self {
400 TimsSpectrumAnnotated {
401 frame_id: self.frame_id,
402 scan: self.scan,
403 retention_time: self.retention_time,
404 mobility: self.mobility,
405 ms_type: self.ms_type.clone(),
406 tof: self.tof.clone(),
408 spectrum: self.spectrum.add_mz_noise_normal(ppm),
409 }
410 }
411
412 pub fn to_windows(
413 &self,
414 window_length: f64,
415 overlapping: bool,
416 min_peaks: usize,
417 min_intensity: f64,
418 ) -> BTreeMap<i32, TimsSpectrumAnnotated> {
419 let mut buckets: BTreeMap<i32, Vec<(u32, f64, f64, PeakAnnotation)>> = BTreeMap::new();
421 for (&tof, &mz, &intensity, annotation) in
422 izip!(
423 &self.tof,
424 &self.spectrum.mz,
425 &self.spectrum.intensity,
426 &self.spectrum.annotations
427 )
428 {
429 let idx = (mz / window_length).floor() as i32;
430 buckets.entry(idx)
431 .or_default()
432 .push((tof, mz, intensity, annotation.clone()));
433 }
434
435 if overlapping {
437 let mut off: BTreeMap<i32, Vec<(u32, f64, f64, PeakAnnotation)>> = BTreeMap::new();
438 let half = window_length / 2.0;
439 for (&tof, &mz, &intensity, annotation) in
440 izip!(
441 &self.tof,
442 &self.spectrum.mz,
443 &self.spectrum.intensity,
444 &self.spectrum.annotations
445 )
446 {
447 let idx = -(((mz + half) / window_length).floor() as i32);
448 off.entry(idx)
449 .or_default()
450 .push((tof, mz, intensity, annotation.clone()));
451 }
452 for (k, v) in off {
453 buckets.entry(k).or_default().extend(v);
454 }
455 }
456
457 let mut out = BTreeMap::new();
459 for (idx, group) in buckets {
460 if group.len() < min_peaks {
461 continue;
462 }
463 let max_i = group.iter().map(|&(_, _, i, _)| i).fold(0.0, f64::max);
464 if max_i < min_intensity {
465 continue;
466 }
467
468 let mut tofs = Vec::with_capacity(group.len());
470 let mut mzs = Vec::with_capacity(group.len());
471 let mut ints = Vec::with_capacity(group.len());
472 let mut annots = Vec::with_capacity(group.len());
473 for (tof, mz, intensity, annotation) in group {
474 tofs.push(tof);
475 mzs.push(mz);
476 ints.push(intensity);
477 annots.push(annotation);
478 }
479
480 let window_spec = MzSpectrumAnnotated::new(mzs, ints, annots);
482
483 let sub = TimsSpectrumAnnotated {
484 frame_id: self.frame_id,
485 scan: self.scan,
486 retention_time: self.retention_time,
487 mobility: self.mobility,
488 ms_type: self.ms_type.clone(),
489 tof: tofs,
490 spectrum: window_spec,
491 };
492
493 out.insert(idx, sub);
494 }
495
496 out
497 }
498}
499
500impl std::ops::Add for TimsSpectrumAnnotated {
501 type Output = Self;
502 fn add(self, other: Self) -> Self {
503
504 let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
505 let mut spec_map: BTreeMap<i64, (u32, f64, PeakAnnotation, i64)> = BTreeMap::new();
506 let mean_scan_floor = ((self.scan as f64 + other.scan as f64) / 2.0) as u32;
507
508 for (tof, mz, intensity, annotation) in izip!(self.tof.iter(), self.spectrum.mz.iter(), self.spectrum.intensity.iter(), self.spectrum.annotations.iter()) {
509 let key = quantize(*mz);
510 spec_map.insert(key, (*tof, *intensity, annotation.clone(), 1));
511 }
512
513 for (tof, mz, intensity, annotation) in izip!(other.tof.iter(), other.spectrum.mz.iter(), other.spectrum.intensity.iter(), other.spectrum.annotations.iter()) {
514 let key = quantize(*mz);
515 spec_map.entry(key).and_modify(|e| {
516 e.0 += *tof;
517 e.1 += *intensity;
518 e.2.contributions.extend(annotation.contributions.clone());
519 e.3 += 1;
520 }).or_insert((*tof, *intensity, annotation.clone(), 1));
521 }
522
523 let mut tof_vec: Vec<u32> = Vec::with_capacity(spec_map.len());
524 let mut mz_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
525 let mut intensity_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
526 let mut annotations_vec: Vec<PeakAnnotation> = Vec::with_capacity(spec_map.len());
527
528 for (key, (tof, intensity, annotation, count)) in spec_map.iter() {
529 tof_vec.push((*tof as f64 / *count as f64) as u32);
530 mz_vec.push(*key as f64 / 1_000_000.0);
531 intensity_vec.push(*intensity / *count as f64);
532 annotations_vec.push(annotation.clone());
533 }
534
535 assert!(tof_vec.len() == mz_vec.len() && mz_vec.len() == intensity_vec.len() && intensity_vec.len() == annotations_vec.len());
536
537 TimsSpectrumAnnotated {
538 frame_id: self.frame_id,
539 scan: mean_scan_floor,
540 retention_time: self.retention_time,
541 mobility: self.mobility,
542 ms_type: if self.ms_type == other.ms_type { self.ms_type.clone() } else { MsType::Unknown },
543 tof: tof_vec,
544 spectrum: MzSpectrumAnnotated::new(mz_vec, intensity_vec, annotations_vec),
545 }
546 }
547}
548
549#[derive(Clone, Debug)]
550pub struct TimsFrameAnnotated {
551 pub frame_id: i32,
552 pub retention_time: f64,
553 pub ms_type: MsType,
554 pub tof: Vec<u32>,
555 pub mz: Vec<f64>,
556 pub scan: Vec<u32>,
557 pub inv_mobility: Vec<f64>,
558 pub intensity: Vec<f64>,
559 pub annotations: Vec<PeakAnnotation>,
560}
561
562impl TimsFrameAnnotated {
563 pub fn new(frame_id: i32, retention_time: f64, ms_type: MsType, tof: Vec<u32>, mz: Vec<f64>, scan: Vec<u32>, inv_mobility: Vec<f64>, intensity: Vec<f64>, annotations: Vec<PeakAnnotation>) -> Self {
564 assert!(tof.len() == mz.len() && mz.len() == scan.len() && scan.len() == inv_mobility.len() && inv_mobility.len() == intensity.len() && intensity.len() == annotations.len());
565 TimsFrameAnnotated {
566 frame_id,
567 retention_time,
568 ms_type,
569 tof,
570 mz,
571 scan,
572 inv_mobility,
573 intensity,
574 annotations,
575 }
576 }
577 pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, inv_mobility_min: f64, inv_mobility_max: f64, scan_min: u32, scan_max: u32, intensity_min: f64, intensity_max: f64) -> Self {
578 let mut tof_filtered: Vec<u32> = Vec::new();
579 let mut mz_filtered: Vec<f64> = Vec::new();
580 let mut scan_filtered: Vec<u32> = Vec::new();
581 let mut inv_mobility_filtered: Vec<f64> = Vec::new();
582 let mut intensity_filtered: Vec<f64> = Vec::new();
583 let mut annotations_filtered: Vec<PeakAnnotation> = Vec::new();
584
585 for (tof, mz, scan, inv_mobility, intensity, annotation) in izip!(self.tof.iter(), self.mz.iter(), self.scan.iter(), self.inv_mobility.iter(), self.intensity.iter(), self.annotations.iter()) {
586 if *mz >= mz_min && *mz <= mz_max && *inv_mobility >= inv_mobility_min && *inv_mobility <= inv_mobility_max && *scan >= scan_min && *scan <= scan_max && *intensity >= intensity_min && *intensity <= intensity_max {
587 tof_filtered.push(*tof);
588 mz_filtered.push(*mz);
589 scan_filtered.push(*scan);
590 inv_mobility_filtered.push(*inv_mobility);
591 intensity_filtered.push(*intensity);
592 annotations_filtered.push(annotation.clone());
593 }
594 }
595
596 assert!(tof_filtered.len() == mz_filtered.len() && mz_filtered.len() == scan_filtered.len() && scan_filtered.len() == inv_mobility_filtered.len() && inv_mobility_filtered.len() == intensity_filtered.len() && intensity_filtered.len() == annotations_filtered.len());
597
598 TimsFrameAnnotated {
599 frame_id: self.frame_id,
600 retention_time: self.retention_time,
601 ms_type: self.ms_type.clone(),
602 tof: tof_filtered,
603 mz: mz_filtered,
604 scan: scan_filtered,
605 inv_mobility: inv_mobility_filtered,
606 intensity: intensity_filtered,
607 annotations: annotations_filtered,
608 }
609 }
610
611 pub fn to_tims_spectra_annotated(&self) -> Vec<TimsSpectrumAnnotated> {
612 let mut spectra = BTreeMap::<i32, (f64, Vec<u32>, MzSpectrumAnnotated)>::new();
614
615 for (scan, mobility, tof, mz, intensity, annotations) in izip!(self.scan.iter(), self.inv_mobility.iter(), self.tof.iter(), self.mz.iter(), self.intensity.iter(), self.annotations.iter()) {
617 let entry = spectra.entry(*scan as i32).or_insert_with(|| (*mobility, Vec::new(), MzSpectrumAnnotated::new(Vec::new(), Vec::new(), Vec::new())));
618 entry.1.push(*tof);
619 entry.2.mz.push(*mz);
620 entry.2.intensity.push(*intensity);
621 entry.2.annotations.push(annotations.clone());
622 }
623
624 let mut tims_spectra: Vec<TimsSpectrumAnnotated> = Vec::new();
626
627 for (scan, (mobility, tof, spectrum)) in spectra {
628 tims_spectra.push(TimsSpectrumAnnotated::new(self.frame_id, scan as u32, self.retention_time, mobility, self.ms_type.clone(), tof, spectrum));
629 }
630
631 tims_spectra
632 }
633
634 pub fn from_tims_spectra_annotated(spectra: Vec<TimsSpectrumAnnotated>) -> TimsFrameAnnotated {
635 let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
636 let mut spec_map: BTreeMap<(u32, i64), (f64, u32, f64, PeakAnnotation, i64)> = BTreeMap::new();
637 let mut capacity_count = 0;
638
639 for spectrum in &spectra {
640 let inv_mobility = spectrum.mobility;
641 for (i, mz) in spectrum.spectrum.mz.iter().enumerate() {
642 let scan = spectrum.scan;
643 let tof = spectrum.tof[i];
644 let intensity = spectrum.spectrum.intensity[i];
645 let annotation = spectrum.spectrum.annotations[i].clone();
646 let key = (scan, quantize(*mz));
647 spec_map.entry(key).and_modify(|e| {
648 e.0 += intensity;
649 e.1 += tof;
650 e.2 += inv_mobility;
651 e.3.contributions.extend(annotation.contributions.clone());
652 e.4 += 1;
653 }).or_insert((intensity, tof, inv_mobility, annotation, 1));
654 capacity_count += 1;
655 }
656 }
657
658 let mut scan_vec: Vec<u32> = Vec::with_capacity(capacity_count);
659 let mut inv_mobility_vec: Vec<f64> = Vec::with_capacity(capacity_count);
660 let mut tof_vec: Vec<u32> = Vec::with_capacity(capacity_count);
661 let mut mz_vec: Vec<f64> = Vec::with_capacity(capacity_count);
662 let mut intensity_vec: Vec<f64> = Vec::with_capacity(capacity_count);
663 let mut annotations_vec: Vec<PeakAnnotation> = Vec::with_capacity(capacity_count);
664
665 for ((scan, mz), (intensity, tof, inv_mobility, annotation, count)) in spec_map.iter() {
666 scan_vec.push(*scan);
667 inv_mobility_vec.push(*inv_mobility / *count as f64);
668 tof_vec.push((*tof as f64 / *count as f64) as u32);
669 mz_vec.push(*mz as f64 / 1_000_000.0);
670 intensity_vec.push(*intensity);
671 annotations_vec.push(annotation.clone());
672 }
673
674 assert!(tof_vec.len() == mz_vec.len() && mz_vec.len() == scan_vec.len() && scan_vec.len() == inv_mobility_vec.len() && inv_mobility_vec.len() == intensity_vec.len() && intensity_vec.len() == annotations_vec.len());
675
676 TimsFrameAnnotated {
677 frame_id: spectra.first().unwrap().frame_id,
678 retention_time: spectra.first().unwrap().retention_time,
679 ms_type: spectra.first().unwrap().ms_type.clone(),
680 tof: tof_vec,
681 mz: mz_vec,
682 scan: scan_vec,
683 inv_mobility: inv_mobility_vec,
684 intensity: intensity_vec,
685 annotations: annotations_vec,
686 }
687 }
688 pub fn to_windows_indexed(
689 &self,
690 window_length: f64,
691 overlapping: bool,
692 min_peaks: usize,
693 min_intensity: f64
694 ) -> (Vec<u32>, Vec<i32>, Vec<TimsSpectrumAnnotated>) {
695 let spectra = self.to_tims_spectra_annotated();
697
698 let windows_per_scan: Vec<_> = spectra
700 .iter()
701 .map(|s| s.to_windows(window_length, overlapping, min_peaks, min_intensity))
702 .collect();
703
704 let mut scan_indices = Vec::new();
706 let mut window_indices = Vec::new();
707 let mut out_spectra = Vec::new();
708
709 for (spec, window_map) in spectra.iter().zip(windows_per_scan.iter()) {
710 for (&win_idx, win_spec) in window_map {
711 scan_indices.push(spec.scan);
712 window_indices.push(win_idx);
713 out_spectra.push(win_spec.clone());
714 }
715 }
716
717 (scan_indices, window_indices, out_spectra)
718 }
719
720 pub fn to_windows(
721 &self,
722 window_length: f64,
723 overlapping: bool,
724 min_peaks: usize,
725 min_intensity: f64
726 ) -> Vec<TimsSpectrumAnnotated> {
727 let spectra = self.to_tims_spectra_annotated();
729
730 let windows_per_scan: Vec<_> = spectra
732 .iter()
733 .map(|s| s.to_windows(window_length, overlapping, min_peaks, min_intensity))
734 .collect();
735
736 let mut out_spectra = Vec::new();
738 for (_, window_map) in spectra.iter().zip(windows_per_scan.iter()) {
739 for (_, win_spec) in window_map {
740 out_spectra.push(win_spec.clone());
741 }
742 }
743
744 out_spectra
745 }
746
747 pub fn to_dense_windows(
748 &self,
749 window_length: f64,
750 overlapping: bool,
751 min_peaks: usize,
752 min_intensity: f64,
753 resolution: i32
754 ) -> (Vec<f64>, Vec<i32>, Vec<i32>, usize, usize) {
755 let factor = 10f64.powi(resolution);
756 let n_cols = ((window_length * factor).round() + 1.0) as usize;
757
758 let (scan_indices, window_indices, spectra) =
760 self.to_windows_indexed(window_length, overlapping, min_peaks, min_intensity);
761
762 let vec_specs: Vec<_> = spectra
764 .iter()
765 .map(|ts| ts.spectrum.vectorized(resolution))
766 .collect();
767
768 let n_rows = spectra.len();
770 let mut matrix = vec![0.0; n_rows * n_cols];
771
772 for (row, ( &win_idx, vec_spec)) in
774 multizip((&window_indices, &vec_specs))
775 .enumerate()
776 {
777 let start_i = if win_idx >= 0 {
779 ((win_idx as f64 * window_length) * factor).round() as i32
780 } else {
781 ((((-win_idx) as f64 * window_length) - 0.5 * window_length) * factor)
783 .round() as i32
784 };
785
786 for (&idx, &val) in vec_spec.indices.iter().zip(&vec_spec.values) {
788 let col = (idx as i32 - start_i) as usize;
789 let flat_idx = row * n_cols + col;
790 matrix[flat_idx] = val;
791 }
792 }
793
794 let scan_indices: Vec<i32> = scan_indices.iter().map(|&scan| scan as i32).collect();
796
797 (matrix, scan_indices, window_indices, n_rows, n_cols)
798 }
799
800 pub fn to_dense_windows_with_labels(
811 &self,
812 window_length: f64,
813 overlapping: bool,
814 min_peaks: usize,
815 min_intensity: f64,
816 resolution: i32,
817 ) -> (
818 Vec<f64>, Vec<u32>, Vec<i32>, Vec<f64>, Vec<f64>, usize, usize, Vec<i32>, Vec<i32>, Vec<i32>, ) {
829 let factor = 10f64.powi(resolution);
830 let n_cols = ((window_length * factor).round() + 1.0) as usize;
831
832 let (scan_indices, window_indices, spectra) =
834 self.to_windows_indexed(window_length, overlapping, min_peaks, min_intensity);
835 let vec_specs: Vec<_> = spectra
836 .iter()
837 .map(|ts| ts.spectrum.vectorized(resolution))
838 .collect();
839
840 let n_rows = vec_specs.len();
841 let matrix_sz = n_rows * n_cols;
842
843 let mut intensities = vec![0.0_f64; matrix_sz];
845 let mut iso_labels = vec![-1_i32; matrix_sz];
846 let mut charge_labels = vec![-1_i32; matrix_sz];
847 let mut peptide_labels = vec![-1_i32; matrix_sz];
848 let mut mz_start = Vec::with_capacity(n_rows);
849 let mut ion_mobility_start = Vec::with_capacity(n_rows);
850
851 for (row, ((&win_idx, vec_spec), ts)) in
853 multizip((&window_indices, &vec_specs))
854 .zip(spectra.iter())
855 .enumerate()
856 {
857 let first_mz = ts.spectrum.mz.first().cloned().unwrap_or(0.0);
859 mz_start.push(first_mz);
860 ion_mobility_start.push(ts.mobility);
861
862 let mut feat_map = HashMap::<i32,i32>::new();
864 let mut next_feat = 0;
865
866 let start_i = if win_idx >= 0 {
868 ((win_idx as f64 * window_length) * factor).round() as i32
869 } else {
870 (((-win_idx) as f64 * window_length - 0.5 * window_length) * factor)
871 .round() as i32
872 };
873
874 for (&bin_idx, &val, annotation) in
876 izip!(&vec_spec.indices, &vec_spec.values, &vec_spec.annotations)
877 {
878 let col = (bin_idx as i32 - start_i) as usize;
879 let flat = row * n_cols + col;
880 intensities[flat] = val;
881
882 if let Some(best) = annotation
884 .contributions
885 .iter()
886 .max_by(|a, b| {
887 a.intensity_contribution
888 .partial_cmp(&b.intensity_contribution)
889 .unwrap()
890 })
891 {
892 match best.source_type {
893 SourceType::Signal => {
894 if let Some(sa) = &best.signal_attributes {
895 iso_labels[flat] = sa.isotope_peak;
896 charge_labels[flat] = sa.charge_state;
897 let old = sa.peptide_id;
899 let new = *feat_map.entry(old).or_insert_with(|| {
900 let i = next_feat; next_feat += 1; i.min(5)
901 });
902 peptide_labels[flat] = new;
903 }
904 }
905 SourceType::RandomNoise => {
906 iso_labels[flat] = -2;
907 charge_labels[flat] = -2;
908 peptide_labels[flat] = -2;
909 }
910 _ => { }
911 }
912 }
913 }
914 }
915
916 (
917 intensities,
918 scan_indices,
919 window_indices,
920 mz_start,
921 ion_mobility_start,
922 n_rows,
923 n_cols,
924 iso_labels,
925 charge_labels,
926 peptide_labels,
927 )
928 }
929
930 pub fn fold_along_scan_axis(self, fold_width: usize) -> TimsFrameAnnotated {
931 let spectra = self.to_tims_spectra_annotated();
933
934 let mut merged_spectra: BTreeMap<u32, TimsSpectrumAnnotated> = BTreeMap::new();
937 for spectrum in spectra {
938 let key = spectrum.scan / fold_width as u32;
939
940 if let Some(existing_spectrum) = merged_spectra.get_mut(&key) {
942
943 let merged_spectrum = existing_spectrum.clone() + spectrum;
944 *existing_spectrum = merged_spectrum;
946
947 } else {
948 merged_spectra.insert(key, spectrum);
950 }
951 }
952 TimsFrameAnnotated::from_tims_spectra_annotated(merged_spectra.into_values().collect())
954 }
955}
956
957impl std::ops::Add for TimsFrameAnnotated {
958 type Output = Self;
959 fn add(self, other: Self) -> Self {
960
961 let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
962 let mut spec_map: BTreeMap<(u32, i64), (f64, u32, f64, PeakAnnotation, i64)> = BTreeMap::new();
963
964 for (scan, mz, tof, inv_mobility, intensity, annotation) in
965 izip!(self.scan.iter(), self.mz.iter(), self.tof.iter(), self.inv_mobility.iter(), self.intensity.iter(), self.annotations.iter()) {
966 let key = (*scan, quantize(*mz));
967 spec_map.insert(key, (*intensity, *tof, *inv_mobility, annotation.clone(), 1));
968 }
969
970 for (scan, mz, tof, inv_mobility, intensity, annotation) in
971 izip!(other.scan.iter(), other.mz.iter(), other.tof.iter(), other.inv_mobility.iter(), other.intensity.iter(), other.annotations.iter()) {
972 let key = (*scan, quantize(*mz));
973 spec_map.entry(key).and_modify(|e| {
974 e.0 += *intensity;
975 e.1 += *tof;
976 e.2 += *inv_mobility;
977 e.3.contributions.extend(annotation.contributions.clone());
978 e.4 += 1;
979 }).or_insert((*intensity, *tof, *inv_mobility, annotation.clone(), 1));
980 }
981
982 let mut tof_vec: Vec<u32> = Vec::with_capacity(spec_map.len());
983 let mut mz_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
984 let mut scan_vec: Vec<u32> = Vec::with_capacity(spec_map.len());
985 let mut inv_mobility_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
986 let mut intensity_vec: Vec<f64> = Vec::with_capacity(spec_map.len());
987 let mut annotations_vec: Vec<PeakAnnotation> = Vec::with_capacity(spec_map.len());
988
989 for ((scan, mz), (intensity, tof, inv_mobility, annotation, count)) in spec_map.iter() {
990 scan_vec.push(*scan);
991 mz_vec.push(*mz as f64 / 1_000_000.0);
992 intensity_vec.push(*intensity);
993 tof_vec.push((*tof as f64 / *count as f64) as u32);
994 inv_mobility_vec.push(*inv_mobility / *count as f64);
995 annotations_vec.push(annotation.clone());
996 }
997
998 assert!(tof_vec.len() == mz_vec.len() && mz_vec.len() == scan_vec.len() && scan_vec.len() == inv_mobility_vec.len() && inv_mobility_vec.len() == intensity_vec.len() && intensity_vec.len() == annotations_vec.len());
999
1000 TimsFrameAnnotated {
1001 frame_id: self.frame_id,
1002 retention_time: self.retention_time,
1003 ms_type: if self.ms_type == other.ms_type { self.ms_type.clone() } else { MsType::Unknown },
1004 tof: tof_vec,
1005 mz: mz_vec,
1006 scan: scan_vec,
1007 inv_mobility: inv_mobility_vec,
1008 intensity: intensity_vec,
1009 annotations: annotations_vec,
1010 }
1011 }
1012}