1use std::fmt;
2use std::collections::BTreeMap;
3use nalgebra::DVector;
4use std::fmt::{Display, Formatter};
5use bincode::{Decode, Encode};
6use serde::{Serialize, Deserialize};
7
8extern crate rand;
9
10use rand::distributions::{Uniform, Distribution};
11use rand::rngs::ThreadRng;
12use statrs::distribution::Normal;
13
14pub trait ToResolution {
16 fn to_resolution(&self, resolution: i32) -> Self;
17}
18
19pub trait Vectorized<T> {
21 fn vectorized(&self, resolution: i32) -> T;
22}
23
24#[derive(Clone, PartialEq, Debug, Serialize, Deserialize, Encode, Decode)]
31pub enum MsType {
32 Precursor,
33 FragmentDda,
34 FragmentDia,
35 Unknown,
36}
37
38impl MsType {
39 pub fn new(ms_type: i32) -> MsType {
46 match ms_type {
47 0 => MsType::Precursor,
48 8 => MsType::FragmentDda,
49 9 => MsType::FragmentDia,
50 _ => MsType::Unknown,
51 }
52 }
53
54 pub fn ms_type_numeric(&self) -> i32 {
56 match self {
57 MsType::Precursor => 0,
58 MsType::FragmentDda => 8,
59 MsType::FragmentDia => 9,
60 MsType::Unknown => -1,
61 }
62 }
63}
64
65impl Default for MsType {
66 fn default() -> Self {
67 MsType::Unknown
68 }
69}
70
71impl Display for MsType {
72 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
73 match self {
74 MsType::Precursor => write!(f, "Precursor"),
75 MsType::FragmentDda => write!(f, "FragmentDda"),
76 MsType::FragmentDia => write!(f, "FragmentDia"),
77 MsType::Unknown => write!(f, "Unknown"),
78 }
79 }
80}
81
82#[derive(Clone, Debug, Serialize, Deserialize, Encode, Decode)]
84pub struct MzSpectrum {
85 pub mz: Vec<f64>,
86 pub intensity: Vec<f64>,
87}
88
89impl MzSpectrum {
90 pub fn new(mz: Vec<f64>, intensity: Vec<f64>) -> Self {
110 MzSpectrum {mz, intensity}
111 }
112
113 pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min:f64, intensity_max: f64) -> Self {
114 let mut mz_vec: Vec<f64> = Vec::new();
115 let mut intensity_vec: Vec<f64> = Vec::new();
116
117 for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
118 if mz_min <= *mz && *mz <= mz_max && *intensity >= intensity_min && *intensity <= intensity_max {
119 mz_vec.push(*mz);
120 intensity_vec.push(*intensity);
121 }
122 }
123 MzSpectrum { mz: mz_vec, intensity: intensity_vec }
124 }
125
126 pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> BTreeMap<i32, MzSpectrum> {
158 let mut splits = BTreeMap::new();
159
160 for (i, &mz) in self.mz.iter().enumerate() {
161 let intensity = self.intensity[i];
162
163 let tmp_key = (mz / window_length).floor() as i32;
164
165 splits.entry(tmp_key).or_insert_with(|| MzSpectrum::new(Vec::new(), Vec::new())).mz.push(mz);
166 splits.entry(tmp_key).or_insert_with(|| MzSpectrum::new(Vec::new(), Vec::new())).intensity.push(intensity);
167 }
168
169 if overlapping {
170 let mut splits_offset = BTreeMap::new();
171
172 for (i, &mmz) in self.mz.iter().enumerate() {
173 let intensity = self.intensity[i];
174
175 let tmp_key = -((mmz + window_length / 2.0) / window_length).floor() as i32;
176
177 splits_offset.entry(tmp_key).or_insert_with(|| MzSpectrum::new(Vec::new(), Vec::new())).mz.push(mmz);
178 splits_offset.entry(tmp_key).or_insert_with(|| MzSpectrum::new(Vec::new(), Vec::new())).intensity.push(intensity);
179 }
180
181 for (key, val) in splits_offset {
182 splits.entry(key).or_insert_with(|| MzSpectrum::new(Vec::new(), Vec::new())).mz.extend(val.mz);
183 splits.entry(key).or_insert_with(|| MzSpectrum::new(Vec::new(), Vec::new())).intensity.extend(val.intensity);
184 }
185 }
186
187 splits.retain(|_, spectrum| {
188 spectrum.mz.len() >= min_peaks && spectrum.intensity.iter().cloned().max_by(
189 |a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)).unwrap_or(0.0) >= min_intensity
190 });
191
192 splits
193 }
194
195 pub fn to_centroid(&self, baseline_noise_level: i32, sigma: f64, normalize: bool) -> MzSpectrum {
196
197 let filtered = self.filter_ranged(0.0, 1e9, baseline_noise_level as f64, 1e9);
198
199 let mut cent_mz = Vec::new();
200 let mut cent_i: Vec<f64> = Vec::new();
201
202 let mut last_mz = 0.0;
203 let mut mean_mz = 0.0;
204 let mut sum_i = 0.0;
205
206 for (i, ¤t_mz) in filtered.mz.iter().enumerate() {
207 let current_intensity = filtered.intensity[i];
208
209 if current_mz - last_mz > sigma && mean_mz > 0.0 {
211 mean_mz /= sum_i;
212 cent_mz.push(mean_mz);
213 cent_i.push(sum_i);
214
215 sum_i = 0.0;
217 mean_mz = 0.0;
218 }
219
220 mean_mz += current_mz * current_intensity as f64;
221 sum_i += current_intensity;
222 last_mz = current_mz;
223 }
224
225 if mean_mz > 0.0 {
227 mean_mz /= sum_i;
228 cent_mz.push(mean_mz);
229 cent_i.push(sum_i);
230 }
231
232 if normalize {
233 let sum_i: f64 = cent_i.iter().sum();
234 cent_i = cent_i.iter().map(|&i| i / sum_i).collect();
235 }
236 MzSpectrum::new(cent_mz, cent_i)
237 }
238
239 pub fn from_collection(collection: Vec<MzSpectrum>) -> MzSpectrum {
240
241 let quantize = |mz: f64| -> i64 {
242 (mz * 1_000_000.0).round() as i64
243 };
244
245 let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
246
247 for spectrum in collection {
248 for (mz, intensity) in spectrum.mz.iter().zip(spectrum.intensity.iter()) {
249 let key = quantize(*mz);
250 let entry = combined_map.entry(key).or_insert(0.0);
251 *entry += *intensity;
252 }
253 }
254
255 let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
256 let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
257
258 MzSpectrum { mz: mz_combined, intensity: intensity_combined }
259 }
260
261 pub fn add_mz_noise_uniform(&self, ppm: f64, right_drag: bool) -> Self {
262 let mut rng = rand::thread_rng();
263 self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
264
265 let ppm_mz = match right_drag {
266 true => mz * ppm / 1e6 / 2.0,
267 false => mz * ppm / 1e6,
268 };
269
270 let dist = match right_drag {
271 true => Uniform::from(mz - (ppm_mz / 3.0)..=mz + ppm_mz),
272 false => Uniform::from(mz - ppm_mz..=mz + ppm_mz),
273 };
274
275 dist.sample(rng)
276 })
277 }
278
279 pub fn add_mz_noise_normal(&self, ppm: f64) -> Self {
280 let mut rng = rand::thread_rng();
281 self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
282 let ppm_mz = mz * ppm / 1e6;
283 let dist = Normal::new(mz, ppm_mz / 3.0).unwrap();
284 dist.sample(rng)
285 })
286 }
287
288 fn add_mz_noise<F>(&self, ppm: f64, rng: &mut ThreadRng, noise_fn: F) -> Self
289 where
290 F: Fn(&mut ThreadRng, f64, f64) -> f64,
291 {
292 let mz: Vec<f64> = self.mz.iter().map(|&mz_value| noise_fn(rng, mz_value, ppm)).collect();
293 let spectrum = MzSpectrum { mz, intensity: self.intensity.clone()};
294 spectrum.to_resolution(6)
296 }
297}
298
299impl ToResolution for MzSpectrum {
300 fn to_resolution(&self, resolution: i32) -> Self {
325 let mut binned: BTreeMap<i64, f64> = BTreeMap::new();
326 let factor = 10f64.powi(resolution);
327
328 for (mz, inten) in self.mz.iter().zip(self.intensity.iter()) {
329
330 let key = (mz * factor).round() as i64;
331 let entry = binned.entry(key).or_insert(0.0);
332 *entry += *inten;
333 }
334
335 let mz: Vec<f64> = binned.keys().map(|&key| key as f64 / 10f64.powi(resolution)).collect();
336 let intensity: Vec<f64> = binned.values().cloned().collect();
337
338 MzSpectrum { mz, intensity }
339 }
340}
341
342impl Vectorized<MzSpectrumVectorized> for MzSpectrum {
343 fn vectorized(&self, resolution: i32) -> MzSpectrumVectorized {
347
348 let binned_spectrum = self.to_resolution(resolution);
349
350 let indices: Vec<i32> = binned_spectrum.mz.iter().map(|&mz| (mz * 10f64.powi(resolution)).round() as i32).collect();
352
353 MzSpectrumVectorized {
354 resolution,
355 indices,
356 values: binned_spectrum.intensity,
357 }
358 }
359}
360
361impl Display for MzSpectrum {
363 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
364
365 let (mz, i) = self.mz.iter()
366 .zip(&self.intensity)
367 .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
368 .unwrap();
369
370 write!(f, "MzSpectrum(data points: {}, max by intensity:({}, {}))", self.mz.len(), format!("{:.3}", mz), i)
371 }
372}
373
374impl std::ops::Add for MzSpectrum {
375 type Output = Self;
376 fn add(self, other: Self) -> MzSpectrum {
394 let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
395
396 let quantize = |mz: f64| -> i64 {
398 (mz * 1_000_000.0).round() as i64
399 };
400
401 for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
403 let key = quantize(*mz);
404 combined_map.insert(key, *intensity);
405 }
406
407 for (mz, intensity) in other.mz.iter().zip(other.intensity.iter()) {
409 let key = quantize(*mz);
410 let entry = combined_map.entry(key).or_insert(0.0);
411 *entry += *intensity;
412 }
413
414 let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
416 let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
417
418 MzSpectrum { mz: mz_combined, intensity: intensity_combined }
419 }
420}
421
422impl std::ops::Mul<f64> for MzSpectrum {
423 type Output = Self;
424 fn mul(self, scale: f64) -> Self::Output{
425 let mut scaled_intensities: Vec<f64> = vec![0.0; self.intensity.len()];
426 for (idx,intensity) in self.intensity.iter().enumerate(){
427 scaled_intensities[idx] = scale*intensity;
428 }
429 Self{ mz: self.mz.clone(), intensity: scaled_intensities}
430
431 }
432}
433
434impl std::ops::Sub for MzSpectrum {
435 type Output = Self;
436 fn sub(self, other: Self) -> Self::Output {
437 let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
438
439 let quantize = |mz: f64| -> i64 {
441 (mz * 1_000_000.0).round() as i64
442 };
443
444 for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
446 let key = quantize(*mz);
447 combined_map.insert(key, *intensity);
448 }
449
450 for (mz, intensity) in other.mz.iter().zip(other.intensity.iter()) {
452 let key = quantize(*mz);
453 let entry = combined_map.entry(key).or_insert(0.0);
454 *entry -= *intensity;
455 }
456
457 let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
459 let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
460
461 MzSpectrum { mz: mz_combined, intensity: intensity_combined }
462 }
463}
464
465#[derive(Clone, Debug)]
467pub struct IndexedMzSpectrum {
468 pub index: Vec<i32>,
469 pub mz_spectrum: MzSpectrum,
470}
471
472impl Default for IndexedMzSpectrum {
474 fn default() -> Self {
475 IndexedMzSpectrum { index: Vec::new(), mz_spectrum: MzSpectrum::new(Vec::new(), Vec::new()) }
476 }
477}
478
479impl IndexedMzSpectrum {
480 pub fn new(index: Vec<i32>, mz: Vec<f64>, intensity: Vec<f64>) -> Self {
497 IndexedMzSpectrum { index, mz_spectrum: MzSpectrum { mz, intensity } }
498 }
499 pub fn to_resolution(&self, resolution: i32) -> IndexedMzSpectrum {
519
520 let mut mz_bins: BTreeMap<i64, (f64, Vec<i64>)> = BTreeMap::new();
521 let factor = 10f64.powi(resolution);
522
523 for ((mz, intensity), tof_val) in self.mz_spectrum.mz.iter().zip(self.mz_spectrum.intensity.iter()).zip(&self.index) {
524 let key = (mz * factor).round() as i64;
525 let entry = mz_bins.entry(key).or_insert((0.0, Vec::new()));
526 entry.0 += *intensity;
527 entry.1.push(*tof_val as i64);
528 }
529
530 let mz: Vec<f64> = mz_bins.keys().map(|&key| key as f64 / factor).collect();
531 let intensity: Vec<f64> = mz_bins.values().map(|(intensity, _)| *intensity).collect();
532 let tof: Vec<i32> = mz_bins.values().map(|(_, tof_vals)| {
533 let sum: i64 = tof_vals.iter().sum();
534 let count: i32 = tof_vals.len() as i32;
535 (sum as f64 / count as f64).round() as i32
536 }).collect();
537
538 IndexedMzSpectrum {index: tof, mz_spectrum: MzSpectrum {mz, intensity } }
539 }
540
541 pub fn vectorized(&self, resolution: i32) -> IndexedMzSpectrumVectorized {
562
563 let binned_spectrum = self.to_resolution(resolution);
564
565 let indices: Vec<i32> = binned_spectrum.mz_spectrum.mz.iter()
567 .map(|&mz| (mz * 10f64.powi(resolution)).round() as i32).collect();
568
569 IndexedMzSpectrumVectorized {
570 index: binned_spectrum.index,
571 mz_vector: MzSpectrumVectorized {
572 resolution,
573 indices,
574 values: binned_spectrum.mz_spectrum.intensity,
575 }
576 }
577 }
578
579 pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min:f64, intensity_max: f64) -> Self {
580 let mut mz_vec: Vec<f64> = Vec::new();
581 let mut intensity_vec: Vec<f64> = Vec::new();
582 let mut index_vec: Vec<i32> = Vec::new();
583
584 for ((&mz, &intensity), &index) in self.mz_spectrum.mz.iter().zip(self.mz_spectrum.intensity.iter()).zip(self.index.iter()) {
585 if mz_min <= mz && mz <= mz_max && intensity >= intensity_min && intensity <= intensity_max {
586 mz_vec.push(mz);
587 intensity_vec.push(intensity);
588 index_vec.push(index);
589 }
590 }
591 IndexedMzSpectrum { index: index_vec, mz_spectrum: MzSpectrum { mz: mz_vec, intensity: intensity_vec } }
592 }
593}
594
595impl Display for IndexedMzSpectrum {
596 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
597 let (mz, i) = self.mz_spectrum.mz.iter()
598 .zip(&self.mz_spectrum.intensity)
599 .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
600 .unwrap();
601
602 write!(f, "IndexedMzSpectrum(data points: {}, max by intensity:({}, {}))", self.mz_spectrum.mz.len(), format!("{:.3}", mz), i)
603 }
604}
605
606#[derive(Clone)]
607pub struct MzSpectrumVectorized {
608 pub resolution: i32,
609 pub indices: Vec<i32>,
610 pub values: Vec<f64>,
611}
612
613impl MzSpectrumVectorized {
614 fn get_max_index(&self) -> usize {
624 let base: i32 = 10;
625 let max_mz: i32 = 2000;
626 let max_index: usize = (max_mz*base.pow(self.resolution as u32)) as usize;
627 max_index
628 }
629
630 pub fn to_dense(&self, max_index: Option<usize>) -> DVector<f64> {
631 let max_index = match max_index {
632 Some(max_index) => max_index,
633 None => self.get_max_index(),
634 };
635 let mut dense_intensities: DVector<f64> = DVector::<f64>::zeros(max_index + 1);
636 for (&index, &intensity) in self.indices.iter().zip(self.values.iter()) {
637 if (index as usize) <= max_index {
638 dense_intensities[index as usize] = intensity;
639 }
640 }
641 dense_intensities
642 }
643 pub fn to_dense_spectrum(&self, max_index: Option<usize>) -> MzSpectrumVectorized{
644 let max_index = match max_index {
645 Some(max_index) => max_index,
646 None => self.get_max_index(),
647 };
648 let dense_intensities: Vec<f64> = self.to_dense(Some(max_index)).data.into();
649 let dense_indices: Vec<i32> = (0..=max_index).map(|i| i as i32).collect();
650 let dense_spectrum: MzSpectrumVectorized = MzSpectrumVectorized { resolution: (self.resolution), indices: (dense_indices), values: (dense_intensities) };
651 dense_spectrum
652 }
653}
654
655#[derive(Clone)]
656pub struct IndexedMzSpectrumVectorized {
657 pub index: Vec<i32>,
658 pub mz_vector: MzSpectrumVectorized,
659}
660
661
662
663
664