1use std::fmt;
2use std::collections::BTreeMap;
3use std::sync::Arc;
4use nalgebra::DVector;
5use std::fmt::{Display, Formatter};
6use bincode::{Decode, Encode};
7use serde::{Serialize, Deserialize};
8
9extern crate rand;
10
11use rand::distributions::{Uniform, Distribution};
12use rand::rngs::ThreadRng;
13use statrs::distribution::Normal;
14
15pub trait ToResolution {
17 fn to_resolution(&self, resolution: i32) -> Self;
18}
19
20pub trait Vectorized<T> {
22 fn vectorized(&self, resolution: i32) -> T;
23}
24
25#[derive(Clone, PartialEq, Debug, Serialize, Deserialize, Encode, Decode)]
32pub enum MsType {
33 Precursor,
34 FragmentDda,
35 FragmentDia,
36 Unknown,
37}
38
39impl MsType {
40 pub fn new(ms_type: i32) -> MsType {
47 match ms_type {
48 0 => MsType::Precursor,
49 8 => MsType::FragmentDda,
50 9 => MsType::FragmentDia,
51 _ => MsType::Unknown,
52 }
53 }
54
55 pub fn ms_type_numeric(&self) -> i32 {
57 match self {
58 MsType::Precursor => 0,
59 MsType::FragmentDda => 8,
60 MsType::FragmentDia => 9,
61 MsType::Unknown => -1,
62 }
63 }
64}
65
66impl Default for MsType {
67 fn default() -> Self {
68 MsType::Unknown
69 }
70}
71
72impl Display for MsType {
73 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
74 match self {
75 MsType::Precursor => write!(f, "Precursor"),
76 MsType::FragmentDda => write!(f, "FragmentDda"),
77 MsType::FragmentDia => write!(f, "FragmentDia"),
78 MsType::Unknown => write!(f, "Unknown"),
79 }
80 }
81}
82
83#[derive(Clone, Debug, Serialize, Deserialize)]
87pub struct MzSpectrum {
88 pub mz: Arc<Vec<f64>>,
89 pub intensity: Arc<Vec<f64>>,
90}
91
92impl Encode for MzSpectrum {
94 fn encode<E: bincode::enc::Encoder>(&self, encoder: &mut E) -> Result<(), bincode::error::EncodeError> {
95 bincode::Encode::encode(&*self.mz, encoder)?;
97 bincode::Encode::encode(&*self.intensity, encoder)?;
98 Ok(())
99 }
100}
101
102impl<Context> Decode<Context> for MzSpectrum {
103 fn decode<D: bincode::de::Decoder<Context = Context>>(decoder: &mut D) -> Result<Self, bincode::error::DecodeError> {
104 let mz: Vec<f64> = bincode::Decode::decode(decoder)?;
105 let intensity: Vec<f64> = bincode::Decode::decode(decoder)?;
106 Ok(MzSpectrum::new(mz, intensity))
107 }
108}
109
110impl<'de, Context> bincode::BorrowDecode<'de, Context> for MzSpectrum {
111 fn borrow_decode<D: bincode::de::BorrowDecoder<'de, Context = Context>>(decoder: &mut D) -> Result<Self, bincode::error::DecodeError> {
112 let mz: Vec<f64> = bincode::BorrowDecode::borrow_decode(decoder)?;
113 let intensity: Vec<f64> = bincode::BorrowDecode::borrow_decode(decoder)?;
114 Ok(MzSpectrum::new(mz, intensity))
115 }
116}
117
118impl MzSpectrum {
119 pub fn new(mz: Vec<f64>, intensity: Vec<f64>) -> Self {
139 MzSpectrum {
140 mz: Arc::new(mz),
141 intensity: Arc::new(intensity),
142 }
143 }
144
145 pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min:f64, intensity_max: f64) -> Self {
146 let mut mz_vec: Vec<f64> = Vec::new();
147 let mut intensity_vec: Vec<f64> = Vec::new();
148
149 for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
150 if mz_min <= *mz && *mz <= mz_max && *intensity >= intensity_min && *intensity <= intensity_max {
151 mz_vec.push(*mz);
152 intensity_vec.push(*intensity);
153 }
154 }
155 MzSpectrum::new(mz_vec, intensity_vec)
156 }
157
158 pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> BTreeMap<i32, MzSpectrum> {
190 let mut splits: BTreeMap<i32, (Vec<f64>, Vec<f64>)> = BTreeMap::new();
192
193 for (i, &mz) in self.mz.iter().enumerate() {
194 let intensity = self.intensity[i];
195 let tmp_key = (mz / window_length).floor() as i32;
196
197 let entry = splits.entry(tmp_key).or_insert_with(|| (Vec::new(), Vec::new()));
198 entry.0.push(mz);
199 entry.1.push(intensity);
200 }
201
202 if overlapping {
203 let mut splits_offset: BTreeMap<i32, (Vec<f64>, Vec<f64>)> = BTreeMap::new();
204
205 for (i, &mmz) in self.mz.iter().enumerate() {
206 let intensity = self.intensity[i];
207 let tmp_key = -((mmz + window_length / 2.0) / window_length).floor() as i32;
208
209 let entry = splits_offset.entry(tmp_key).or_insert_with(|| (Vec::new(), Vec::new()));
210 entry.0.push(mmz);
211 entry.1.push(intensity);
212 }
213
214 for (key, (mz_vec, int_vec)) in splits_offset {
215 let entry = splits.entry(key).or_insert_with(|| (Vec::new(), Vec::new()));
216 entry.0.extend(mz_vec);
217 entry.1.extend(int_vec);
218 }
219 }
220
221 splits.into_iter()
223 .filter(|(_, (mz_vec, int_vec))| {
224 mz_vec.len() >= min_peaks && int_vec.iter().cloned().max_by(
225 |a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)).unwrap_or(0.0) >= min_intensity
226 })
227 .map(|(key, (mz_vec, int_vec))| (key, MzSpectrum::new(mz_vec, int_vec)))
228 .collect()
229 }
230
231 pub fn to_centroid(&self, baseline_noise_level: i32, sigma: f64, normalize: bool) -> MzSpectrum {
232
233 let filtered = self.filter_ranged(0.0, 1e9, baseline_noise_level as f64, 1e9);
234
235 let mut cent_mz = Vec::new();
236 let mut cent_i: Vec<f64> = Vec::new();
237
238 let mut last_mz = 0.0;
239 let mut mean_mz = 0.0;
240 let mut sum_i = 0.0;
241
242 for (i, ¤t_mz) in filtered.mz.iter().enumerate() {
243 let current_intensity = filtered.intensity[i];
244
245 if current_mz - last_mz > sigma && mean_mz > 0.0 {
247 mean_mz /= sum_i;
248 cent_mz.push(mean_mz);
249 cent_i.push(sum_i);
250
251 sum_i = 0.0;
253 mean_mz = 0.0;
254 }
255
256 mean_mz += current_mz * current_intensity as f64;
257 sum_i += current_intensity;
258 last_mz = current_mz;
259 }
260
261 if mean_mz > 0.0 {
263 mean_mz /= sum_i;
264 cent_mz.push(mean_mz);
265 cent_i.push(sum_i);
266 }
267
268 if normalize {
269 let sum_i: f64 = cent_i.iter().sum();
270 cent_i = cent_i.iter().map(|&i| i / sum_i).collect();
271 }
272 MzSpectrum::new(cent_mz, cent_i)
273 }
274
275 pub fn from_collection(collection: Vec<MzSpectrum>) -> MzSpectrum {
276
277 let quantize = |mz: f64| -> i64 {
278 (mz * 1_000_000.0).round() as i64
279 };
280
281 let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
282
283 for spectrum in collection {
284 for (mz, intensity) in spectrum.mz.iter().zip(spectrum.intensity.iter()) {
285 let key = quantize(*mz);
286 let entry = combined_map.entry(key).or_insert(0.0);
287 *entry += *intensity;
288 }
289 }
290
291 let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
292 let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
293
294 MzSpectrum::new(mz_combined, intensity_combined)
295 }
296
297 pub fn add_mz_noise_uniform(&self, ppm: f64, right_drag: bool) -> Self {
298 let mut rng = rand::thread_rng();
299 self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
300
301 let ppm_mz = match right_drag {
302 true => mz * ppm / 1e6 / 2.0,
303 false => mz * ppm / 1e6,
304 };
305
306 let dist = match right_drag {
307 true => Uniform::from(mz - (ppm_mz / 3.0)..=mz + ppm_mz),
308 false => Uniform::from(mz - ppm_mz..=mz + ppm_mz),
309 };
310
311 dist.sample(rng)
312 })
313 }
314
315 pub fn add_mz_noise_normal(&self, ppm: f64) -> Self {
316 let mut rng = rand::thread_rng();
317 self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
318 let ppm_mz = mz * ppm / 1e6;
319 let dist = Normal::new(mz, ppm_mz / 3.0).unwrap();
320 dist.sample(rng)
321 })
322 }
323
324 fn add_mz_noise<F>(&self, ppm: f64, rng: &mut ThreadRng, noise_fn: F) -> Self
325 where
326 F: Fn(&mut ThreadRng, f64, f64) -> f64,
327 {
328 let mz: Vec<f64> = self.mz.iter().map(|&mz_value| noise_fn(rng, mz_value, ppm)).collect();
329 let spectrum = MzSpectrum { mz: Arc::new(mz), intensity: self.intensity.clone() };
331 spectrum.to_resolution(6)
333 }
334}
335
336impl ToResolution for MzSpectrum {
337 fn to_resolution(&self, resolution: i32) -> Self {
362 let mut binned: BTreeMap<i64, f64> = BTreeMap::new();
363 let factor = 10f64.powi(resolution);
364
365 for (mz, inten) in self.mz.iter().zip(self.intensity.iter()) {
366
367 let key = (mz * factor).round() as i64;
368 let entry = binned.entry(key).or_insert(0.0);
369 *entry += *inten;
370 }
371
372 let mz: Vec<f64> = binned.keys().map(|&key| key as f64 / 10f64.powi(resolution)).collect();
373 let intensity: Vec<f64> = binned.values().cloned().collect();
374
375 MzSpectrum::new(mz, intensity)
376 }
377}
378
379impl Vectorized<MzSpectrumVectorized> for MzSpectrum {
380 fn vectorized(&self, resolution: i32) -> MzSpectrumVectorized {
384
385 let binned_spectrum = self.to_resolution(resolution);
386
387 let indices: Vec<i32> = binned_spectrum.mz.iter().map(|&mz| (mz * 10f64.powi(resolution)).round() as i32).collect();
389
390 MzSpectrumVectorized {
391 resolution,
392 indices,
393 values: (*binned_spectrum.intensity).clone(),
394 }
395 }
396}
397
398impl Display for MzSpectrum {
400 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
401
402 let (mz, i) = self.mz.iter()
403 .zip(self.intensity.iter())
404 .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
405 .unwrap();
406
407 write!(f, "MzSpectrum(data points: {}, max by intensity:({}, {}))", self.mz.len(), format!("{:.3}", mz), i)
408 }
409}
410
411impl std::ops::Add for MzSpectrum {
412 type Output = Self;
413 fn add(self, other: Self) -> MzSpectrum {
431 let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
432
433 let quantize = |mz: f64| -> i64 {
435 (mz * 1_000_000.0).round() as i64
436 };
437
438 for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
440 let key = quantize(*mz);
441 combined_map.insert(key, *intensity);
442 }
443
444 for (mz, intensity) in other.mz.iter().zip(other.intensity.iter()) {
446 let key = quantize(*mz);
447 let entry = combined_map.entry(key).or_insert(0.0);
448 *entry += *intensity;
449 }
450
451 let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
453 let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
454
455 MzSpectrum::new(mz_combined, intensity_combined)
456 }
457}
458
459impl std::ops::Mul<f64> for MzSpectrum {
460 type Output = Self;
461 fn mul(self, scale: f64) -> Self::Output {
462 let scaled_intensities: Vec<f64> = self.intensity.iter().map(|i| scale * i).collect();
463 Self { mz: self.mz.clone(), intensity: Arc::new(scaled_intensities) }
465 }
466}
467
468impl std::ops::Sub for MzSpectrum {
469 type Output = Self;
470 fn sub(self, other: Self) -> Self::Output {
471 let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
472
473 let quantize = |mz: f64| -> i64 {
475 (mz * 1_000_000.0).round() as i64
476 };
477
478 for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
480 let key = quantize(*mz);
481 combined_map.insert(key, *intensity);
482 }
483
484 for (mz, intensity) in other.mz.iter().zip(other.intensity.iter()) {
486 let key = quantize(*mz);
487 let entry = combined_map.entry(key).or_insert(0.0);
488 *entry -= *intensity;
489 }
490
491 let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
493 let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
494
495 MzSpectrum::new(mz_combined, intensity_combined)
496 }
497}
498
499#[derive(Clone, Debug)]
501pub struct IndexedMzSpectrum {
502 pub index: Vec<i32>,
503 pub mz_spectrum: MzSpectrum,
504}
505
506impl Default for IndexedMzSpectrum {
508 fn default() -> Self {
509 IndexedMzSpectrum { index: Vec::new(), mz_spectrum: MzSpectrum::new(Vec::new(), Vec::new()) }
510 }
511}
512
513impl IndexedMzSpectrum {
514 pub fn new(index: Vec<i32>, mz: Vec<f64>, intensity: Vec<f64>) -> Self {
531 IndexedMzSpectrum { index, mz_spectrum: MzSpectrum::new(mz, intensity) }
532 }
533
534 pub fn from_mz_spectrum(index: Vec<i32>, mz_spectrum: MzSpectrum) -> Self {
544 IndexedMzSpectrum { index, mz_spectrum }
545 }
546
547 pub fn to_resolution(&self, resolution: i32) -> IndexedMzSpectrum {
567
568 let mut mz_bins: BTreeMap<i64, (f64, Vec<i64>)> = BTreeMap::new();
569 let factor = 10f64.powi(resolution);
570
571 for ((mz, intensity), tof_val) in self.mz_spectrum.mz.iter().zip(self.mz_spectrum.intensity.iter()).zip(&self.index) {
572 let key = (mz * factor).round() as i64;
573 let entry = mz_bins.entry(key).or_insert((0.0, Vec::new()));
574 entry.0 += *intensity;
575 entry.1.push(*tof_val as i64);
576 }
577
578 let mz: Vec<f64> = mz_bins.keys().map(|&key| key as f64 / factor).collect();
579 let intensity: Vec<f64> = mz_bins.values().map(|(intensity, _)| *intensity).collect();
580 let tof: Vec<i32> = mz_bins.values().map(|(_, tof_vals)| {
581 let sum: i64 = tof_vals.iter().sum();
582 let count: i32 = tof_vals.len() as i32;
583 (sum as f64 / count as f64).round() as i32
584 }).collect();
585
586 IndexedMzSpectrum {index: tof, mz_spectrum: MzSpectrum::new(mz, intensity) }
587 }
588
589 pub fn vectorized(&self, resolution: i32) -> IndexedMzSpectrumVectorized {
610
611 let binned_spectrum = self.to_resolution(resolution);
612
613 let indices: Vec<i32> = binned_spectrum.mz_spectrum.mz.iter()
615 .map(|&mz| (mz * 10f64.powi(resolution)).round() as i32).collect();
616
617 IndexedMzSpectrumVectorized {
618 index: binned_spectrum.index,
619 mz_vector: MzSpectrumVectorized {
620 resolution,
621 indices,
622 values: (*binned_spectrum.mz_spectrum.intensity).clone(),
623 }
624 }
625 }
626
627 pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min:f64, intensity_max: f64) -> Self {
628 let mut mz_vec: Vec<f64> = Vec::new();
629 let mut intensity_vec: Vec<f64> = Vec::new();
630 let mut index_vec: Vec<i32> = Vec::new();
631
632 for ((&mz, &intensity), &index) in self.mz_spectrum.mz.iter().zip(self.mz_spectrum.intensity.iter()).zip(self.index.iter()) {
633 if mz_min <= mz && mz <= mz_max && intensity >= intensity_min && intensity <= intensity_max {
634 mz_vec.push(mz);
635 intensity_vec.push(intensity);
636 index_vec.push(index);
637 }
638 }
639 IndexedMzSpectrum { index: index_vec, mz_spectrum: MzSpectrum::new(mz_vec, intensity_vec) }
640 }
641}
642
643impl Display for IndexedMzSpectrum {
644 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
645 let (mz, i) = self.mz_spectrum.mz.iter()
646 .zip(self.mz_spectrum.intensity.iter())
647 .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
648 .unwrap();
649
650 write!(f, "IndexedMzSpectrum(data points: {}, max by intensity:({}, {}))", self.mz_spectrum.mz.len(), format!("{:.3}", mz), i)
651 }
652}
653
654#[derive(Clone)]
655pub struct MzSpectrumVectorized {
656 pub resolution: i32,
657 pub indices: Vec<i32>,
658 pub values: Vec<f64>,
659}
660
661impl MzSpectrumVectorized {
662 fn get_max_index(&self) -> usize {
672 let base: i32 = 10;
673 let max_mz: i32 = 2000;
674 let max_index: usize = (max_mz*base.pow(self.resolution as u32)) as usize;
675 max_index
676 }
677
678 pub fn to_dense(&self, max_index: Option<usize>) -> DVector<f64> {
679 let max_index = match max_index {
680 Some(max_index) => max_index,
681 None => self.get_max_index(),
682 };
683 let mut dense_intensities: DVector<f64> = DVector::<f64>::zeros(max_index + 1);
684 for (&index, &intensity) in self.indices.iter().zip(self.values.iter()) {
685 if (index as usize) <= max_index {
686 dense_intensities[index as usize] = intensity;
687 }
688 }
689 dense_intensities
690 }
691 pub fn to_dense_spectrum(&self, max_index: Option<usize>) -> MzSpectrumVectorized{
692 let max_index = match max_index {
693 Some(max_index) => max_index,
694 None => self.get_max_index(),
695 };
696 let dense_intensities: Vec<f64> = self.to_dense(Some(max_index)).data.into();
697 let dense_indices: Vec<i32> = (0..=max_index).map(|i| i as i32).collect();
698 let dense_spectrum: MzSpectrumVectorized = MzSpectrumVectorized { resolution: (self.resolution), indices: (dense_indices), values: (dense_intensities) };
699 dense_spectrum
700 }
701}
702
703#[derive(Clone)]
704pub struct IndexedMzSpectrumVectorized {
705 pub index: Vec<i32>,
706 pub mz_vector: MzSpectrumVectorized,
707}