mscore/data/
spectrum.rs

1use std::fmt;
2use std::collections::BTreeMap;
3use std::sync::Arc;
4use nalgebra::DVector;
5use std::fmt::{Display, Formatter};
6use bincode::{Decode, Encode};
7use serde::{Serialize, Deserialize};
8
9extern crate rand;
10
11use rand::distributions::{Uniform, Distribution};
12use rand::rngs::ThreadRng;
13use statrs::distribution::Normal;
14
15/// Represents a vectorized mass spectrum.
16pub trait ToResolution {
17    fn to_resolution(&self, resolution: i32) -> Self;
18}
19
20/// Vectorized representation for Structs holding m/z values and intensities.
21pub trait Vectorized<T> {
22    fn vectorized(&self, resolution: i32) -> T;
23}
24
25/// Represents the type of spectrum.
26///
27/// # Description
28///
29/// The `SpecType` enum is used to distinguish between precursor and fragment spectra.
30///
31#[derive(Clone, PartialEq, Debug, Serialize, Deserialize, Encode, Decode)]
32pub enum MsType {
33    Precursor,
34    FragmentDda,
35    FragmentDia,
36    Unknown,
37}
38
39impl MsType {
40    /// Returns the `MsType` enum corresponding to the given integer value.
41    ///
42    /// # Arguments
43    ///
44    /// * `ms_type` - An integer value corresponding to the `MsType` enum.
45    ///
46    pub fn new(ms_type: i32) -> MsType {
47        match ms_type {
48            0 => MsType::Precursor,
49            8 => MsType::FragmentDda,
50            9 => MsType::FragmentDia,
51            _ => MsType::Unknown,
52        }
53    }
54
55    /// Returns the integer value corresponding to the `MsType` enum.
56    pub fn ms_type_numeric(&self) -> i32 {
57        match self {
58            MsType::Precursor => 0,
59            MsType::FragmentDda => 8,
60            MsType::FragmentDia => 9,
61            MsType::Unknown => -1,
62        }
63    }
64}
65
66impl Default for MsType {
67    fn default() -> Self {
68        MsType::Unknown
69    }
70}
71
72impl Display for MsType {
73    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
74        match self {
75            MsType::Precursor => write!(f, "Precursor"),
76            MsType::FragmentDda => write!(f, "FragmentDda"),
77            MsType::FragmentDia => write!(f, "FragmentDia"),
78            MsType::Unknown => write!(f, "Unknown"),
79        }
80    }
81}
82
83/// Represents a mass spectrum with associated m/z values and intensities.
84///
85/// Uses Arc<Vec<T>> for efficient cloning - clone is O(1) instead of O(n).
86#[derive(Clone, Debug, Serialize, Deserialize)]
87pub struct MzSpectrum {
88    pub mz: Arc<Vec<f64>>,
89    pub intensity: Arc<Vec<f64>>,
90}
91
92// Manual bincode implementation for Arc compatibility
93impl Encode for MzSpectrum {
94    fn encode<E: bincode::enc::Encoder>(&self, encoder: &mut E) -> Result<(), bincode::error::EncodeError> {
95        // Encode the inner Vec directly
96        bincode::Encode::encode(&*self.mz, encoder)?;
97        bincode::Encode::encode(&*self.intensity, encoder)?;
98        Ok(())
99    }
100}
101
102impl<Context> Decode<Context> for MzSpectrum {
103    fn decode<D: bincode::de::Decoder<Context = Context>>(decoder: &mut D) -> Result<Self, bincode::error::DecodeError> {
104        let mz: Vec<f64> = bincode::Decode::decode(decoder)?;
105        let intensity: Vec<f64> = bincode::Decode::decode(decoder)?;
106        Ok(MzSpectrum::new(mz, intensity))
107    }
108}
109
110impl<'de, Context> bincode::BorrowDecode<'de, Context> for MzSpectrum {
111    fn borrow_decode<D: bincode::de::BorrowDecoder<'de, Context = Context>>(decoder: &mut D) -> Result<Self, bincode::error::DecodeError> {
112        let mz: Vec<f64> = bincode::BorrowDecode::borrow_decode(decoder)?;
113        let intensity: Vec<f64> = bincode::BorrowDecode::borrow_decode(decoder)?;
114        Ok(MzSpectrum::new(mz, intensity))
115    }
116}
117
118impl MzSpectrum {
119    /// Constructs a new `MzSpectrum`.
120    ///
121    /// # Arguments
122    ///
123    /// * `mz` - A vector of m/z values.
124    /// * `intensity` - A vector of intensity values corresponding to the m/z values.
125    ///
126    /// # Panics
127    ///
128    /// Panics if the lengths of `mz` and `intensity` are not the same. (actually, it doesn't at the moment, planning on adding this later)
129    ///
130    /// # Example
131    ///
132    /// ```rust
133    /// # use mscore::data::spectrum::MzSpectrum;
134    /// let spectrum = MzSpectrum::new(vec![100.0, 200.0], vec![10.0, 20.0]);
135    /// assert_eq!(*spectrum.mz, vec![100.0, 200.0]);
136    /// assert_eq!(*spectrum.intensity, vec![10.0, 20.0]);
137    /// ```
138    pub fn new(mz: Vec<f64>, intensity: Vec<f64>) -> Self {
139        MzSpectrum {
140            mz: Arc::new(mz),
141            intensity: Arc::new(intensity),
142        }
143    }
144
145    pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min:f64, intensity_max: f64) -> Self {
146        let mut mz_vec: Vec<f64> = Vec::new();
147        let mut intensity_vec: Vec<f64> = Vec::new();
148
149        for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
150            if mz_min <= *mz && *mz <= mz_max && *intensity >= intensity_min && *intensity <= intensity_max {
151                mz_vec.push(*mz);
152                intensity_vec.push(*intensity);
153            }
154        }
155        MzSpectrum::new(mz_vec, intensity_vec)
156    }
157
158    /// Splits the spectrum into a collection of windows based on m/z values.
159    ///
160    /// This function divides the spectrum into smaller spectra (windows) based on a specified window length.
161    /// Each window contains peaks from the original spectrum that fall within the m/z range of that window.
162    ///
163    /// # Arguments
164    ///
165    /// * `window_length`: The size (in terms of m/z values) of each window.
166    ///
167    /// * `overlapping`: If `true`, each window will overlap with its neighboring windows by half of the `window_length`.
168    ///   This means that a peak may belong to multiple windows. If `false`, windows do not overlap.
169    ///
170    /// * `min_peaks`: The minimum number of peaks a window must have to be retained in the result.
171    ///
172    /// * `min_intensity`: The minimum intensity value a window must have (in its highest intensity peak) to be retained in the result.
173    ///
174    /// # Returns
175    ///
176    /// A `BTreeMap` where the keys represent the window indices and the values are the spectra (`MzSpectrum`) within those windows.
177    /// Windows that do not meet the criteria of having at least `min_peaks` peaks or a highest intensity peak
178    /// greater than or equal to `min_intensity` are discarded.
179    ///
180    /// # Example
181    ///
182    /// ```rust
183    /// # use mscore::data::spectrum::MzSpectrum;
184    /// let spectrum = MzSpectrum::new(vec![100.0, 101.0, 102.5, 103.0], vec![10.0, 20.0, 30.0, 40.0]);
185    /// let windowed_spectrum = spectrum.to_windows(1.0, false, 1, 10.0);
186    /// assert!(windowed_spectrum.contains_key(&100));
187    /// assert!(windowed_spectrum.contains_key(&102));
188    /// ```
189    pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> BTreeMap<i32, MzSpectrum> {
190        // Build up vectors first, then wrap in MzSpectrum at the end
191        let mut splits: BTreeMap<i32, (Vec<f64>, Vec<f64>)> = BTreeMap::new();
192
193        for (i, &mz) in self.mz.iter().enumerate() {
194            let intensity = self.intensity[i];
195            let tmp_key = (mz / window_length).floor() as i32;
196
197            let entry = splits.entry(tmp_key).or_insert_with(|| (Vec::new(), Vec::new()));
198            entry.0.push(mz);
199            entry.1.push(intensity);
200        }
201
202        if overlapping {
203            let mut splits_offset: BTreeMap<i32, (Vec<f64>, Vec<f64>)> = BTreeMap::new();
204
205            for (i, &mmz) in self.mz.iter().enumerate() {
206                let intensity = self.intensity[i];
207                let tmp_key = -((mmz + window_length / 2.0) / window_length).floor() as i32;
208
209                let entry = splits_offset.entry(tmp_key).or_insert_with(|| (Vec::new(), Vec::new()));
210                entry.0.push(mmz);
211                entry.1.push(intensity);
212            }
213
214            for (key, (mz_vec, int_vec)) in splits_offset {
215                let entry = splits.entry(key).or_insert_with(|| (Vec::new(), Vec::new()));
216                entry.0.extend(mz_vec);
217                entry.1.extend(int_vec);
218            }
219        }
220
221        // Filter and convert to MzSpectrum
222        splits.into_iter()
223            .filter(|(_, (mz_vec, int_vec))| {
224                mz_vec.len() >= min_peaks && int_vec.iter().cloned().max_by(
225                    |a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)).unwrap_or(0.0) >= min_intensity
226            })
227            .map(|(key, (mz_vec, int_vec))| (key, MzSpectrum::new(mz_vec, int_vec)))
228            .collect()
229    }
230
231    pub fn to_centroid(&self, baseline_noise_level: i32, sigma: f64, normalize: bool) -> MzSpectrum {
232
233        let filtered = self.filter_ranged(0.0, 1e9, baseline_noise_level as f64, 1e9);
234
235        let mut cent_mz = Vec::new();
236        let mut cent_i: Vec<f64> = Vec::new();
237
238        let mut last_mz = 0.0;
239        let mut mean_mz = 0.0;
240        let mut sum_i = 0.0;
241
242        for (i, &current_mz) in filtered.mz.iter().enumerate() {
243            let current_intensity = filtered.intensity[i];
244
245            // If peak is too far away from last peak, push centroid
246            if current_mz - last_mz > sigma && mean_mz > 0.0 {
247                mean_mz /= sum_i;
248                cent_mz.push(mean_mz);
249                cent_i.push(sum_i);
250
251                // Start new centroid
252                sum_i = 0.0;
253                mean_mz = 0.0;
254            }
255
256            mean_mz += current_mz * current_intensity as f64;
257            sum_i += current_intensity;
258            last_mz = current_mz;
259        }
260
261        // Push back last remaining centroid
262        if mean_mz > 0.0 {
263            mean_mz /= sum_i;
264            cent_mz.push(mean_mz);
265            cent_i.push(sum_i);
266        }
267
268        if normalize {
269            let sum_i: f64 = cent_i.iter().sum();
270            cent_i = cent_i.iter().map(|&i| i / sum_i).collect();
271        }
272        MzSpectrum::new(cent_mz, cent_i)
273    }
274
275    pub fn from_collection(collection: Vec<MzSpectrum>) -> MzSpectrum {
276
277        let quantize = |mz: f64| -> i64 {
278            (mz * 1_000_000.0).round() as i64
279        };
280
281        let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
282
283        for spectrum in collection {
284            for (mz, intensity) in spectrum.mz.iter().zip(spectrum.intensity.iter()) {
285                let key = quantize(*mz);
286                let entry = combined_map.entry(key).or_insert(0.0);
287                *entry += *intensity;
288            }
289        }
290
291        let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
292        let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
293
294        MzSpectrum::new(mz_combined, intensity_combined)
295    }
296
297    pub fn add_mz_noise_uniform(&self, ppm: f64, right_drag: bool) -> Self {
298        let mut rng = rand::thread_rng();
299        self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
300
301            let ppm_mz = match right_drag {
302                true => mz * ppm / 1e6 / 2.0,
303                false => mz * ppm / 1e6,
304            };
305
306            let dist = match right_drag {
307                true => Uniform::from(mz - (ppm_mz / 3.0)..=mz + ppm_mz),
308                false => Uniform::from(mz - ppm_mz..=mz + ppm_mz),
309            };
310
311            dist.sample(rng)
312        })
313    }
314
315    pub fn add_mz_noise_normal(&self, ppm: f64) -> Self {
316        let mut rng = rand::thread_rng();
317        self.add_mz_noise(ppm, &mut rng, |rng, mz, ppm| {
318            let ppm_mz = mz * ppm / 1e6;
319            let dist = Normal::new(mz, ppm_mz / 3.0).unwrap();
320            dist.sample(rng)
321        })
322    }
323
324    fn add_mz_noise<F>(&self, ppm: f64, rng: &mut ThreadRng, noise_fn: F) -> Self
325        where
326            F: Fn(&mut ThreadRng, f64, f64) -> f64,
327    {
328        let mz: Vec<f64> = self.mz.iter().map(|&mz_value| noise_fn(rng, mz_value, ppm)).collect();
329        // Clone intensity Arc (O(1)) and wrap new mz in Arc
330        let spectrum = MzSpectrum { mz: Arc::new(mz), intensity: self.intensity.clone() };
331        // Sort the spectrum by m/z values and potentially sum up intensities at the same m/z value
332        spectrum.to_resolution(6)
333    }
334}
335
336impl ToResolution for MzSpectrum {
337    /// Bins the spectrum's m/z values to a given resolution and sums the intensities.
338    ///
339    /// # Arguments
340    ///
341    /// * `resolution` - The desired resolution in terms of decimal places. For instance, a resolution of 2
342    /// would bin m/z values to two decimal places.
343    ///
344    /// # Returns
345    ///
346    /// A new `MzSpectrum` where m/z values are binned according to the given resolution.
347    ///
348    /// # Example
349    ///
350    /// ```rust
351    /// # use mscore::data::spectrum::MzSpectrum;
352    /// # use mscore::data::spectrum::ToResolution;
353    /// let spectrum = MzSpectrum::new(vec![100.123, 100.121, 100.131], vec![10.0, 20.0, 30.0]);
354    /// let binned_spectrum_1 = spectrum.to_resolution(1);
355    /// let binned_spectrum_2 = spectrum.to_resolution(2);
356    /// // assert_eq!(*binned_spectrum_1.mz, vec![100.1]);
357    /// assert_eq!(*binned_spectrum_1.intensity, vec![60.0]);
358    /// assert_eq!(*binned_spectrum_2.mz, vec![100.12, 100.13]);
359    /// assert_eq!(*binned_spectrum_2.intensity, vec![30.0, 30.0]);
360    /// ```
361    fn to_resolution(&self, resolution: i32) -> Self {
362        let mut binned: BTreeMap<i64, f64> = BTreeMap::new();
363        let factor = 10f64.powi(resolution);
364
365        for (mz, inten) in self.mz.iter().zip(self.intensity.iter()) {
366
367            let key = (mz * factor).round() as i64;
368            let entry = binned.entry(key).or_insert(0.0);
369            *entry += *inten;
370        }
371
372        let mz: Vec<f64> = binned.keys().map(|&key| key as f64 / 10f64.powi(resolution)).collect();
373        let intensity: Vec<f64> = binned.values().cloned().collect();
374
375        MzSpectrum::new(mz, intensity)
376    }
377}
378
379impl Vectorized<MzSpectrumVectorized> for MzSpectrum {
380    /// Convert the `MzSpectrum` to a `MzSpectrumVectorized` using the given resolution for binning.
381    ///
382    /// After binning to the desired resolution, the binned m/z values are translated into integer indices.
383    fn vectorized(&self, resolution: i32) -> MzSpectrumVectorized {
384
385        let binned_spectrum = self.to_resolution(resolution);
386
387        // Translate the m/z values into integer indices
388        let indices: Vec<i32> = binned_spectrum.mz.iter().map(|&mz| (mz * 10f64.powi(resolution)).round() as i32).collect();
389
390        MzSpectrumVectorized {
391            resolution,
392            indices,
393            values: (*binned_spectrum.intensity).clone(),
394        }
395    }
396}
397
398/// Formats the `MzSpectrum` for display.
399impl Display for MzSpectrum {
400    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
401
402        let (mz, i) = self.mz.iter()
403            .zip(self.intensity.iter())
404            .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
405            .unwrap();
406
407        write!(f, "MzSpectrum(data points: {}, max  by intensity:({}, {}))", self.mz.len(), format!("{:.3}", mz), i)
408    }
409}
410
411impl std::ops::Add for MzSpectrum {
412    type Output = Self;
413    /// Combines two `MzSpectrum` instances by summing up the intensities of matching m/z values.
414    ///
415    /// # Description
416    /// Each m/z value is quantized to retain at least 6 decimals. If two spectra have m/z values
417    /// that quantize to the same integer value, their intensities are summed.
418    ///
419    /// # Example
420    /// ```
421    /// # use mscore::data::spectrum::MzSpectrum;
422    /// let spectrum1 = MzSpectrum::new(vec![100.523, 101.923], vec![10.0, 20.0]);
423    /// let spectrum2 = MzSpectrum::new(vec![101.235, 105.112], vec![15.0, 30.0]);
424    ///
425    /// let combined = spectrum1 + spectrum2;
426    ///
427    /// assert_eq!(*combined.mz, vec![100.523, 101.235, 101.923, 105.112]);
428    /// assert_eq!(*combined.intensity, vec![10.0, 15.0, 20.0, 30.0]);
429    /// ```
430    fn add(self, other: Self) -> MzSpectrum {
431        let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
432
433        // Helper to quantize mz to an integer key
434        let quantize = |mz: f64| -> i64 {
435            (mz * 1_000_000.0).round() as i64
436        };
437
438        // Add the m/z and intensities from the first spectrum to the map
439        for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
440            let key = quantize(*mz);
441            combined_map.insert(key, *intensity);
442        }
443
444        // Combine the second spectrum into the map
445        for (mz, intensity) in other.mz.iter().zip(other.intensity.iter()) {
446            let key = quantize(*mz);
447            let entry = combined_map.entry(key).or_insert(0.0);
448            *entry += *intensity;
449        }
450
451        // Convert the combined map back into two Vec<f64>
452        let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
453        let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
454
455        MzSpectrum::new(mz_combined, intensity_combined)
456    }
457}
458
459impl std::ops::Mul<f64> for MzSpectrum {
460    type Output = Self;
461    fn mul(self, scale: f64) -> Self::Output {
462        let scaled_intensities: Vec<f64> = self.intensity.iter().map(|i| scale * i).collect();
463        // Clone mz Arc (O(1)), wrap new intensities in Arc
464        Self { mz: self.mz.clone(), intensity: Arc::new(scaled_intensities) }
465    }
466}
467
468impl std::ops::Sub for MzSpectrum {
469    type Output = Self;
470    fn sub(self, other: Self) -> Self::Output {
471        let mut combined_map: BTreeMap<i64, f64> = BTreeMap::new();
472
473        // Helper to quantize mz to an integer key
474        let quantize = |mz: f64| -> i64 {
475            (mz * 1_000_000.0).round() as i64
476        };
477
478        // Add the m/z and intensities from the first spectrum to the map
479        for (mz, intensity) in self.mz.iter().zip(self.intensity.iter()) {
480            let key = quantize(*mz);
481            combined_map.insert(key, *intensity);
482        }
483
484        // Combine the second spectrum into the map
485        for (mz, intensity) in other.mz.iter().zip(other.intensity.iter()) {
486            let key = quantize(*mz);
487            let entry = combined_map.entry(key).or_insert(0.0);
488            *entry -= *intensity;
489        }
490
491        // Convert the combined map back into two Vec<f64>
492        let mz_combined: Vec<f64> = combined_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
493        let intensity_combined: Vec<f64> = combined_map.values().cloned().collect();
494
495        MzSpectrum::new(mz_combined, intensity_combined)
496    }
497}
498
499/// Represents a mass spectrum with associated m/z indices, m/z values, and intensities
500#[derive(Clone, Debug)]
501pub struct IndexedMzSpectrum {
502    pub index: Vec<i32>,
503    pub mz_spectrum: MzSpectrum,
504}
505
506// implement default (empty IndexedMzSpectrum) constructor
507impl Default for IndexedMzSpectrum {
508    fn default() -> Self {
509        IndexedMzSpectrum { index: Vec::new(), mz_spectrum: MzSpectrum::new(Vec::new(), Vec::new()) }
510    }
511}
512
513impl IndexedMzSpectrum {
514    /// Creates a new `TOFMzSpectrum` instance.
515    ///
516    /// # Arguments
517    ///
518    /// * `index` - A vector containing the mz index, e.g., time-of-flight values.
519    /// * `mz` - A vector containing the m/z values.
520    /// * `intensity` - A vector containing the intensity values.
521    ///
522    /// # Examples
523    ///
524    /// ```
525    /// use mscore::data::spectrum::IndexedMzSpectrum;
526    /// use mscore::data::spectrum::MzSpectrum;
527    ///
528    /// let spectrum = IndexedMzSpectrum::new(vec![1000, 2000], vec![100.5, 200.5], vec![50.0, 60.0]);
529    /// ```
530    pub fn new(index: Vec<i32>, mz: Vec<f64>, intensity: Vec<f64>) -> Self {
531        IndexedMzSpectrum { index, mz_spectrum: MzSpectrum::new(mz, intensity) }
532    }
533
534    /// Create an IndexedMzSpectrum from an existing MzSpectrum, sharing the Arc-wrapped vectors.
535    ///
536    /// This is more efficient than using `new()` when you already have an MzSpectrum,
537    /// as it avoids copying the mz and intensity vectors.
538    ///
539    /// # Arguments
540    ///
541    /// * `index` - A vector containing the index values.
542    /// * `mz_spectrum` - An existing MzSpectrum to share.
543    pub fn from_mz_spectrum(index: Vec<i32>, mz_spectrum: MzSpectrum) -> Self {
544        IndexedMzSpectrum { index, mz_spectrum }
545    }
546
547    /// Bins the spectrum based on a given m/z resolution, summing intensities and averaging index values
548    /// for m/z values that fall into the same bin.
549    ///
550    /// # Arguments
551    ///
552    /// * `resolution` - The desired m/z resolution for binning.
553    ///
554    /// # Examples
555    ///
556    /// ```
557    /// use mscore::data::spectrum::IndexedMzSpectrum;
558    ///
559    /// let spectrum = IndexedMzSpectrum::new(vec![1000, 2000], vec![100.42, 100.43], vec![50.0, 60.0]);
560    /// let binned_spectrum = spectrum.to_resolution(1);
561    ///
562    /// assert_eq!(*binned_spectrum.mz_spectrum.mz, vec![100.4]);
563    /// assert_eq!(*binned_spectrum.mz_spectrum.intensity, vec![110.0]);
564    /// assert_eq!(binned_spectrum.index, vec![1500]);
565    /// ```
566    pub fn to_resolution(&self, resolution: i32) -> IndexedMzSpectrum {
567
568        let mut mz_bins: BTreeMap<i64, (f64, Vec<i64>)> = BTreeMap::new();
569        let factor = 10f64.powi(resolution);
570
571        for ((mz, intensity), tof_val) in self.mz_spectrum.mz.iter().zip(self.mz_spectrum.intensity.iter()).zip(&self.index) {
572            let key = (mz * factor).round() as i64;
573            let entry = mz_bins.entry(key).or_insert((0.0, Vec::new()));
574            entry.0 += *intensity;
575            entry.1.push(*tof_val as i64);
576        }
577
578        let mz: Vec<f64> = mz_bins.keys().map(|&key| key as f64 / factor).collect();
579        let intensity: Vec<f64> = mz_bins.values().map(|(intensity, _)| *intensity).collect();
580        let tof: Vec<i32> = mz_bins.values().map(|(_, tof_vals)| {
581            let sum: i64 = tof_vals.iter().sum();
582            let count: i32 = tof_vals.len() as i32;
583            (sum as f64 / count as f64).round() as i32
584        }).collect();
585
586        IndexedMzSpectrum {index: tof, mz_spectrum: MzSpectrum::new(mz, intensity) }
587    }
588
589    /// Convert the `IndexedMzSpectrum` to a `IndexedMzVector` using the given resolution for binning.
590    ///
591    /// After binning to the desired resolution, the binned m/z values are translated into integer indices.
592    ///
593    /// # Arguments
594    ///
595    /// * `resolution` - The desired m/z resolution for binning.
596    ///
597    /// # Examples
598    ///
599    /// ```
600    /// use mscore::data::spectrum::IndexedMzSpectrum;
601    ///
602    /// let spectrum = IndexedMzSpectrum::new(vec![1000, 2000], vec![100.42, 100.43], vec![50.0, 60.0]);
603    /// let binned_spectrum = spectrum.to_resolution(1);
604    ///
605    /// assert_eq!(*binned_spectrum.mz_spectrum.mz, vec![100.4]);
606    /// assert_eq!(*binned_spectrum.mz_spectrum.intensity, vec![110.0]);
607    /// assert_eq!(binned_spectrum.index, vec![1500]);
608    /// ```
609    pub fn vectorized(&self, resolution: i32) -> IndexedMzSpectrumVectorized {
610
611        let binned_spectrum = self.to_resolution(resolution);
612
613        // Translate the m/z values into integer indices
614        let indices: Vec<i32> = binned_spectrum.mz_spectrum.mz.iter()
615            .map(|&mz| (mz * 10f64.powi(resolution)).round() as i32).collect();
616
617        IndexedMzSpectrumVectorized {
618            index: binned_spectrum.index,
619            mz_vector: MzSpectrumVectorized {
620                resolution,
621                indices,
622                values: (*binned_spectrum.mz_spectrum.intensity).clone(),
623            }
624        }
625    }
626
627    pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min:f64, intensity_max: f64) -> Self {
628        let mut mz_vec: Vec<f64> = Vec::new();
629        let mut intensity_vec: Vec<f64> = Vec::new();
630        let mut index_vec: Vec<i32> = Vec::new();
631
632        for ((&mz, &intensity), &index) in self.mz_spectrum.mz.iter().zip(self.mz_spectrum.intensity.iter()).zip(self.index.iter()) {
633            if mz_min <= mz && mz <= mz_max && intensity >= intensity_min && intensity <= intensity_max {
634                mz_vec.push(mz);
635                intensity_vec.push(intensity);
636                index_vec.push(index);
637            }
638        }
639        IndexedMzSpectrum { index: index_vec, mz_spectrum: MzSpectrum::new(mz_vec, intensity_vec) }
640    }
641}
642
643impl Display for IndexedMzSpectrum {
644    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
645        let (mz, i) = self.mz_spectrum.mz.iter()
646            .zip(self.mz_spectrum.intensity.iter())
647            .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
648            .unwrap();
649
650        write!(f, "IndexedMzSpectrum(data points: {}, max  by intensity:({}, {}))", self.mz_spectrum.mz.len(), format!("{:.3}", mz), i)
651    }
652}
653
654#[derive(Clone)]
655pub struct MzSpectrumVectorized {
656    pub resolution: i32,
657    pub indices: Vec<i32>,
658    pub values: Vec<f64>,
659}
660
661impl MzSpectrumVectorized {
662    /// Convert the `MzVector` to a dense vector with a specified maximum index.
663    ///
664    /// The resulting vector has length equal to `max_index + 1` and its values
665    /// are the intensities corresponding to each index. Indices with no associated intensity will have a value of 0.
666    ///
667    /// # Arguments
668    ///
669    /// * `max_index` - The maximum index for the dense vector.
670    
671    fn get_max_index(&self) -> usize {
672        let base: i32 = 10;
673        let max_mz: i32 = 2000;
674        let max_index: usize = (max_mz*base.pow(self.resolution as u32)) as usize;
675        max_index
676    }
677
678    pub fn to_dense(&self, max_index: Option<usize>) -> DVector<f64> {
679        let max_index = match max_index {
680            Some(max_index) => max_index,
681            None => self.get_max_index(),
682        };
683        let mut dense_intensities: DVector<f64> = DVector::<f64>::zeros(max_index + 1);
684        for (&index, &intensity) in self.indices.iter().zip(self.values.iter()) {
685            if (index as usize) <= max_index {
686                dense_intensities[index as usize] = intensity;
687            }
688        }
689        dense_intensities
690    }
691    pub fn to_dense_spectrum(&self, max_index: Option<usize>) -> MzSpectrumVectorized{
692        let max_index = match max_index {
693            Some(max_index) => max_index,
694            None => self.get_max_index(),
695        };
696        let dense_intensities: Vec<f64> = self.to_dense(Some(max_index)).data.into();
697        let dense_indices: Vec<i32> = (0..=max_index).map(|i| i as i32).collect();
698        let dense_spectrum: MzSpectrumVectorized = MzSpectrumVectorized { resolution: (self.resolution), indices: (dense_indices), values: (dense_intensities) };
699        dense_spectrum
700    }
701}
702
703#[derive(Clone)]
704pub struct IndexedMzSpectrumVectorized {
705    pub index: Vec<i32>,
706    pub mz_vector: MzSpectrumVectorized,
707}