mscore/algorithm/
isotope.rs

1extern crate statrs;
2
3use rayon::prelude::*;
4use rayon::ThreadPoolBuilder;
5use std::collections::{BTreeMap, HashMap, HashSet};
6
7use crate::chemistry::constants::{MASS_NEUTRON, MASS_PROTON};
8use crate::chemistry::elements::{atoms_isotopic_weights, isotopic_abundance};
9use crate::data::peptide::PeptideIon;
10use crate::data::spectrum::MzSpectrum;
11use crate::data::spectrum::ToResolution;
12use statrs::distribution::{Continuous, Normal};
13
14/// convolve two distributions of masses and abundances
15///
16/// Arguments:
17///
18/// * `dist_a` - first distribution of masses and abundances
19/// * `dist_b` - second distribution of masses and abundances
20/// * `mass_tolerance` - mass tolerance for combining peaks
21/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
22/// * `max_results` - maximum number of peaks to include in the result
23///
24/// Returns:
25///
26/// * `Vec<(f64, f64)>` - combined distribution of masses and abundances
27///
28/// # Examples
29///
30/// ```
31/// use mscore::algorithm::isotope::convolve;
32///
33/// let dist_a = vec![(100.0, 0.5), (101.0, 0.5)];
34/// let dist_b = vec![(100.0, 0.5), (101.0, 0.5)];
35/// let result = convolve(&dist_a, &dist_b, 1e-6, 1e-12, 200);
36/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
37/// ```
38pub fn convolve(
39    dist_a: &Vec<(f64, f64)>,
40    dist_b: &Vec<(f64, f64)>,
41    mass_tolerance: f64,
42    abundance_threshold: f64,
43    max_results: usize,
44) -> Vec<(f64, f64)> {
45    let mut result: Vec<(f64, f64)> = Vec::new();
46
47    for (mass_a, abundance_a) in dist_a {
48        for (mass_b, abundance_b) in dist_b {
49            let combined_mass = mass_a + mass_b;
50            let combined_abundance = abundance_a * abundance_b;
51
52            // Skip entries with combined abundance below the threshold
53            if combined_abundance < abundance_threshold {
54                continue;
55            }
56
57            // Insert or update the combined mass in the result distribution
58            if let Some(entry) = result
59                .iter_mut()
60                .find(|(m, _)| (*m - combined_mass).abs() < mass_tolerance)
61            {
62                entry.1 += combined_abundance;
63            } else {
64                result.push((combined_mass, combined_abundance));
65            }
66        }
67    }
68
69    // Sort by abundance (descending) to prepare for trimming
70    result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
71
72    // Trim the vector if it exceeds max_results
73    if result.len() > max_results {
74        result.truncate(max_results);
75    }
76
77    // Optionally, sort by mass if needed for further processing
78    result.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
79
80    result
81}
82
83/// convolve a distribution with itself n times
84///
85/// Arguments:
86///
87/// * `dist` - distribution of masses and abundances
88/// * `n` - number of times to convolve the distribution with itself
89///
90/// Returns:
91///
92/// * `Vec<(f64, f64)>` - distribution of masses and abundances
93///
94/// # Examples
95///
96/// ```
97/// use mscore::algorithm::isotope::convolve_pow;
98///
99/// let dist = vec![(100.0, 0.5), (101.0, 0.5)];
100/// let result = convolve_pow(&dist, 2);
101/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
102/// ```
103pub fn convolve_pow(dist: &Vec<(f64, f64)>, n: i32) -> Vec<(f64, f64)> {
104    if n == 0 {
105        return vec![(0.0, 1.0)]; // Return the delta distribution
106    }
107    if n == 1 {
108        return dist.clone();
109    }
110
111    let mut result = dist.clone();
112    let mut power = 2;
113
114    while power <= n {
115        result = convolve(&result, &result, 1e-6, 1e-12, 200); // Square the result to get the next power of 2
116        power *= 2;
117    }
118
119    // If n is not a power of 2, recursively fill in the remainder
120    if power / 2 < n {
121        result = convolve(
122            &result,
123            &convolve_pow(dist, n - power / 2),
124            1e-6,
125            1e-12,
126            200,
127        );
128    }
129
130    result
131}
132
133/// generate the isotope distribution for a given atomic composition
134///
135/// Arguments:
136///
137/// * `atomic_composition` - atomic composition of the peptide
138/// * `mass_tolerance` - mass tolerance for combining peaks
139/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
140/// * `max_result` - maximum number of peaks to include in the result
141///
142/// Returns:
143///
144/// * `Vec<(f64, f64)>` - distribution of masses and abundances
145///
146/// # Examples
147///
148/// ```
149/// use std::collections::HashMap;
150/// use mscore::algorithm::isotope::generate_isotope_distribution;
151///
152/// let mut atomic_composition = HashMap::new();
153/// atomic_composition.insert("C".to_string(), 5);
154/// atomic_composition.insert("H".to_string(), 9);
155/// atomic_composition.insert("N".to_string(), 1);
156/// atomic_composition.insert("O".to_string(), 1);
157/// let result = generate_isotope_distribution(&atomic_composition, 1e-6, 1e-12, 200);
158/// ```
159pub fn generate_isotope_distribution(
160    atomic_composition: &HashMap<String, i32>,
161    mass_tolerance: f64,
162    abundance_threshold: f64,
163    max_result: i32,
164) -> Vec<(f64, f64)> {
165    let mut cumulative_distribution: Option<Vec<(f64, f64)>> = None;
166    let atoms_isotopic_weights: HashMap<String, Vec<f64>> = atoms_isotopic_weights()
167        .iter()
168        .map(|(k, v)| (k.to_string(), v.clone()))
169        .collect();
170    let atomic_isotope_abundance: HashMap<String, Vec<f64>> = isotopic_abundance()
171        .iter()
172        .map(|(k, v)| (k.to_string(), v.clone()))
173        .collect();
174
175    for (element, &count) in atomic_composition.iter() {
176        let elemental_isotope_weights = atoms_isotopic_weights
177            .get(element)
178            .expect("Element not found in isotopic weights table")
179            .clone();
180        let elemental_isotope_abundance = atomic_isotope_abundance
181            .get(element)
182            .expect("Element not found in isotopic abundance table")
183            .clone();
184
185        let element_distribution: Vec<(f64, f64)> = elemental_isotope_weights
186            .iter()
187            .zip(elemental_isotope_abundance.iter())
188            .map(|(&mass, &abundance)| (mass, abundance))
189            .collect();
190
191        let element_power_distribution = if count > 1 {
192            convolve_pow(&element_distribution, count)
193        } else {
194            element_distribution
195        };
196
197        cumulative_distribution = match cumulative_distribution {
198            Some(cum_dist) => Some(convolve(
199                &cum_dist,
200                &element_power_distribution,
201                mass_tolerance,
202                abundance_threshold,
203                max_result as usize,
204            )),
205            None => Some(element_power_distribution),
206        };
207    }
208
209    let final_distribution = cumulative_distribution.expect("Peptide has no elements");
210    // Normalize the distribution
211    let total_abundance: f64 = final_distribution
212        .iter()
213        .map(|&(_, abundance)| abundance)
214        .sum();
215    let result: Vec<_> = final_distribution
216        .into_iter()
217        .map(|(mass, abundance)| (mass, abundance / total_abundance))
218        .collect();
219
220    let mut sort_map: BTreeMap<i64, f64> = BTreeMap::new();
221    let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
222
223    for (mz, intensity) in result {
224        let key = quantize(mz);
225        sort_map
226            .entry(key)
227            .and_modify(|e| *e += intensity)
228            .or_insert(intensity);
229    }
230
231    let mz: Vec<f64> = sort_map
232        .keys()
233        .map(|&key| key as f64 / 1_000_000.0)
234        .collect();
235    let intensity: Vec<f64> = sort_map.values().map(|&intensity| intensity).collect();
236    mz.iter()
237        .zip(intensity.iter())
238        .map(|(&mz, &intensity)| (mz, intensity))
239        .collect()
240}
241
242/// calculate the normal probability density function
243///
244/// Arguments:
245///
246/// * `x` - value to calculate the probability density function of
247/// * `mean` - mean of the normal distribution
248/// * `std_dev` - standard deviation of the normal distribution
249///
250/// Returns:
251///
252/// * `f64` - probability density function of `x`
253///
254/// # Examples
255///
256/// ```
257/// use mscore::algorithm::isotope::normal_pdf;
258///
259/// let pdf = normal_pdf(0.0, 0.0, 1.0);
260/// assert_eq!(pdf, 0.39894228040143265);
261/// ```
262pub fn normal_pdf(x: f64, mean: f64, std_dev: f64) -> f64 {
263    let normal = Normal::new(mean, std_dev).unwrap();
264    normal.pdf(x)
265}
266
267/// calculate the factorial of a number
268///
269/// Arguments:
270///
271/// * `n` - number to calculate factorial of
272///
273/// Returns:
274///
275/// * `f64` - factorial of `n`
276///
277/// # Examples
278///
279/// ```
280/// use mscore::algorithm::isotope::factorial;
281///
282/// let fact = factorial(5);
283/// assert_eq!(fact, 120.0);
284/// ```
285pub fn factorial(n: i32) -> f64 {
286    (1..=n).fold(1.0, |acc, x| acc * x as f64)
287}
288
289pub fn weight(mass: f64, peak_nums: Vec<i32>, normalize: bool) -> Vec<f64> {
290    let lam_val = lam(mass, 0.000594, -0.03091);
291    let factorials: Vec<f64> = peak_nums.iter().map(|&k| factorial(k)).collect();
292    let mut weights: Vec<f64> = peak_nums
293        .iter()
294        .map(|&k| {
295            let pow = lam_val.powi(k);
296            let exp = (-lam_val).exp();
297            exp * pow / factorials[k as usize]
298        })
299        .collect();
300
301    if normalize {
302        let sum: f64 = weights.iter().sum();
303        weights = weights.iter().map(|&w| w / sum).collect();
304    }
305
306    weights
307}
308
309/// calculate the lambda value for a given mass
310///
311/// Arguments:
312///
313/// * `mass` - mass of the peptide
314/// * `slope` - slope of the linear regression
315/// * `intercept` - intercept of the linear regression
316///
317/// Returns:
318///
319/// * `f64` - lambda value
320///
321/// # Examples
322///
323/// ```
324/// use mscore::algorithm::isotope::lam;
325///
326/// let lambda = lam(1000.0, 0.000594, -0.03091);
327/// assert_eq!(lambda, 0.56309);
328pub fn lam(mass: f64, slope: f64, intercept: f64) -> f64 {
329    slope * mass + intercept
330}
331
332/// calculate the isotope pattern for a given mass and charge based on the averagine model
333/// using the normal distribution for peak shapes
334///
335/// Arguments:
336///
337/// * `x` - list of m/z values to probe
338/// * `mass` - mass of the peptide
339/// * `charge` - charge of the peptide
340/// * `sigma` - standard deviation of the normal distribution
341/// * `amp` - amplitude of the isotope pattern
342/// * `k` - number of isotopes to consider
343/// * `step_size` - step size for the m/z values to probe
344///
345/// Returns:
346///
347/// * `Vec<f64>` - isotope pattern
348///
349pub fn iso(
350    x: &Vec<f64>,
351    mass: f64,
352    charge: f64,
353    sigma: f64,
354    amp: f64,
355    k: usize,
356    step_size: f64,
357) -> Vec<f64> {
358    let k_range: Vec<usize> = (0..k).collect();
359    let means: Vec<f64> = k_range
360        .iter()
361        .map(|&k_val| (mass + MASS_NEUTRON * k_val as f64) / charge)
362        .collect();
363    let weights = weight(
364        mass,
365        k_range
366            .iter()
367            .map(|&k_val| k_val as i32)
368            .collect::<Vec<i32>>(),
369        true,
370    );
371
372    let mut intensities = vec![0.0; x.len()];
373    for (i, x_val) in x.iter().enumerate() {
374        for (j, &mean) in means.iter().enumerate() {
375            intensities[i] += weights[j] * normal_pdf(*x_val, mean, sigma);
376        }
377        intensities[i] *= step_size;
378    }
379    intensities
380        .iter()
381        .map(|&intensity| intensity * amp)
382        .collect()
383}
384
385/// generate the isotope pattern for a given mass and charge
386///
387/// Arguments:
388///
389/// * `lower_bound` - lower bound of the isotope pattern
390/// * `upper_bound` - upper bound of the isotope pattern
391/// * `mass` - mass of the peptide
392/// * `charge` - charge of the peptide
393/// * `amp` - amplitude of the isotope pattern
394/// * `k` - number of isotopes to consider
395/// * `sigma` - standard deviation of the normal distribution
396/// * `resolution` - resolution of the isotope pattern
397///
398/// Returns:
399///
400/// * `(Vec<f64>, Vec<f64>)` - isotope pattern
401///
402/// # Examples
403///
404/// ```
405/// use mscore::algorithm::isotope::generate_isotope_pattern;
406///
407/// let (mzs, intensities) = generate_isotope_pattern(1500.0, 1510.0, 3000.0, 2.0, 1e4, 10, 1.0, 3);
408/// ```
409pub fn generate_isotope_pattern(
410    lower_bound: f64,
411    upper_bound: f64,
412    mass: f64,
413    charge: f64,
414    amp: f64,
415    k: usize,
416    sigma: f64,
417    resolution: i32,
418) -> (Vec<f64>, Vec<f64>) {
419    let step_size = f64::min(sigma / 10.0, 1.0 / 10f64.powi(resolution));
420    let size = ((upper_bound - lower_bound) / step_size).ceil() as usize;
421    let mzs: Vec<f64> = (0..size)
422        .map(|i| lower_bound + step_size * i as f64)
423        .collect();
424    let intensities = iso(&mzs, mass, charge, sigma, amp, k, step_size);
425
426    (
427        mzs.iter().map(|&mz| mz + MASS_PROTON).collect(),
428        intensities,
429    )
430}
431
432/// generate the averagine spectrum for a given mass and charge
433///
434/// Arguments:
435///
436/// * `mass` - mass of the peptide
437/// * `charge` - charge of the peptide
438/// * `min_intensity` - minimum intensity for a peak to be included in the result
439/// * `k` - number of isotopes to consider
440/// * `resolution` - resolution of the isotope pattern
441/// * `centroid` - whether to centroid the spectrum
442/// * `amp` - amplitude of the isotope pattern
443///
444/// Returns:
445///
446/// * `MzSpectrum` - averagine spectrum
447///
448/// # Examples
449///
450/// ```
451/// use mscore::algorithm::isotope::generate_averagine_spectrum;
452///
453/// let spectrum = generate_averagine_spectrum(3000.0, 2, 1, 10, 3, true, None);
454/// ```
455pub fn generate_averagine_spectrum(
456    mass: f64,
457    charge: i32,
458    min_intensity: i32,
459    k: i32,
460    resolution: i32,
461    centroid: bool,
462    amp: Option<f64>,
463) -> MzSpectrum {
464    let amp = amp.unwrap_or(1e4);
465    let lb = mass / charge as f64 - 0.2;
466    let ub = mass / charge as f64 + k as f64 + 0.2;
467
468    let (mz, intensities) = generate_isotope_pattern(
469        lb,
470        ub,
471        mass,
472        charge as f64,
473        amp,
474        k as usize,
475        0.008492569002123142,
476        resolution,
477    );
478
479    let spectrum = MzSpectrum::new(mz, intensities)
480        .to_resolution(resolution)
481        .filter_ranged(lb, ub, min_intensity as f64, 1e9);
482
483    if centroid {
484        spectrum.to_centroid(
485            std::cmp::max(min_intensity, 1),
486            1.0 / 10f64.powi(resolution - 1),
487            true,
488        )
489    } else {
490        spectrum
491    }
492}
493
494/// generate the averagine spectra for a given list of masses and charges
495/// using multiple threads
496///
497/// Arguments:
498///
499/// * `masses` - list of masses of the peptides
500/// * `charges` - list of charges of the peptides
501/// * `min_intensity` - minimum intensity for a peak to be included in the result
502/// * `k` - number of isotopes to consider
503/// * `resolution` - resolution of the isotope pattern
504/// * `centroid` - whether to centroid the spectrum
505/// * `num_threads` - number of threads to use
506/// * `amp` - amplitude of the isotope pattern
507///
508/// Returns:
509///
510/// * `Vec<MzSpectrum>` - list of averagine spectra
511///
512/// # Examples
513///
514/// ```
515/// use mscore::algorithm::isotope::generate_averagine_spectra;
516///
517/// let masses = vec![3000.0, 3000.0];
518/// let charges = vec![2, 3];
519/// let spectra = generate_averagine_spectra(masses, charges, 1, 10, 3, true, 4, None);
520/// ```
521pub fn generate_averagine_spectra(
522    masses: Vec<f64>,
523    charges: Vec<i32>,
524    min_intensity: i32,
525    k: i32,
526    resolution: i32,
527    centroid: bool,
528    num_threads: usize,
529    amp: Option<f64>,
530) -> Vec<MzSpectrum> {
531    let amp = amp.unwrap_or(1e5);
532    let mut spectra: Vec<MzSpectrum> = Vec::new();
533    let thread_pool = ThreadPoolBuilder::new()
534        .num_threads(num_threads)
535        .build()
536        .unwrap();
537
538    thread_pool.install(|| {
539        spectra = masses
540            .par_iter()
541            .zip(charges.par_iter())
542            .map(|(&mass, &charge)| {
543                generate_averagine_spectrum(
544                    mass,
545                    charge,
546                    min_intensity,
547                    k,
548                    resolution,
549                    centroid,
550                    Some(amp),
551                )
552            })
553            .collect();
554    });
555
556    spectra
557}
558
559/// generate the precursor spectrum for a given peptide sequence and charge
560/// using isotope convolutions
561///
562/// Arguments:
563///
564/// * `sequence` - peptide sequence
565/// * `charge` - charge of the peptide
566///
567/// Returns:
568///
569/// * `MzSpectrum` - precursor spectrum
570///
571pub fn generate_precursor_spectrum(
572    sequence: &str,
573    charge: i32,
574    peptide_id: Option<i32>,
575) -> MzSpectrum {
576    let peptide_ion = PeptideIon::new(sequence.to_string(), charge, 1.0, peptide_id);
577    peptide_ion.calculate_isotopic_spectrum(1e-3, 1e-9, 200, 1e-6)
578}
579
580/// parallel version of `generate_precursor_spectrum`
581///
582/// Arguments:
583///
584/// * `sequences` - list of peptide sequences
585/// * `charges` - list of charges of the peptides
586/// * `num_threads` - number of threads to use
587///
588/// Returns:
589///
590/// * `Vec<MzSpectrum>` - list of precursor spectra
591///
592pub fn generate_precursor_spectra(
593    sequences: &Vec<&str>,
594    charges: &Vec<i32>,
595    num_threads: usize,
596    peptide_ids: Vec<Option<i32>>,
597) -> Vec<MzSpectrum> {
598    let thread_pool = ThreadPoolBuilder::new()
599        .num_threads(num_threads)
600        .build()
601        .unwrap();
602    // need to zip sequences and charges and peptide_ids
603    let result = thread_pool.install(|| {
604        sequences
605            .par_iter()
606            .zip(charges.par_iter())
607            .zip(peptide_ids.par_iter())
608            .map(|((&sequence, &charge), &peptide_id)| {
609                generate_precursor_spectrum(sequence, charge, peptide_id)
610            })
611            .collect()
612    });
613    result
614}
615
616// Calculates the isotope distribution for a fragment given the isotope distribution of the fragment, the isotope distribution of the complementary fragment, and the transmitted precursor isotopes
617// implemented based on OpenMS: "https://github.com/OpenMS/OpenMS/blob/079143800f7ed036a7c68ea6e124fe4f5cfc9569/src/openms/source/CHEMISTRY/ISOTOPEDISTRIBUTION/CoarseIsotopePatternGenerator.cpp#L415"
618pub fn calculate_transmission_dependent_fragment_ion_isotope_distribution(
619    fragment_isotope_dist: &Vec<(f64, f64)>,
620    comp_fragment_isotope_dist: &Vec<(f64, f64)>,
621    precursor_isotopes: &HashSet<usize>,
622    max_isotope: usize,
623) -> Vec<(f64, f64)> {
624    if fragment_isotope_dist.is_empty() || comp_fragment_isotope_dist.is_empty() {
625        return Vec::new();
626    }
627
628    let mut r_max = fragment_isotope_dist.len();
629    if max_isotope != 0 && r_max > max_isotope {
630        r_max = max_isotope;
631    }
632
633    let mut result = (0..r_max)
634        .map(|i| (fragment_isotope_dist[0].0 + i as f64, 0.0))
635        .collect::<Vec<(f64, f64)>>();
636
637    // Calculation of dependent isotope distribution
638    for (i, &(_mz, intensity)) in fragment_isotope_dist.iter().enumerate().take(r_max) {
639        for &precursor in precursor_isotopes {
640            if precursor >= i && (precursor - i) < comp_fragment_isotope_dist.len() {
641                let comp_intensity = comp_fragment_isotope_dist[precursor - i].1;
642                result[i].1 += comp_intensity;
643            }
644        }
645        result[i].1 *= intensity;
646    }
647
648    result
649}