mscore/algorithm/isotope.rs
1extern crate statrs;
2
3use rayon::prelude::*;
4use rayon::ThreadPoolBuilder;
5use std::collections::{BTreeMap, HashMap, HashSet};
6
7use crate::chemistry::constants::{MASS_NEUTRON, MASS_PROTON};
8use crate::chemistry::elements::{atoms_isotopic_weights, isotopic_abundance};
9use crate::data::peptide::PeptideIon;
10use crate::data::spectrum::MzSpectrum;
11use crate::data::spectrum::ToResolution;
12use statrs::distribution::{Continuous, Normal};
13
14/// convolve two distributions of masses and abundances
15///
16/// Arguments:
17///
18/// * `dist_a` - first distribution of masses and abundances
19/// * `dist_b` - second distribution of masses and abundances
20/// * `mass_tolerance` - mass tolerance for combining peaks
21/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
22/// * `max_results` - maximum number of peaks to include in the result
23///
24/// Returns:
25///
26/// * `Vec<(f64, f64)>` - combined distribution of masses and abundances
27///
28/// # Examples
29///
30/// ```
31/// use mscore::algorithm::isotope::convolve;
32///
33/// let dist_a = vec![(100.0, 0.5), (101.0, 0.5)];
34/// let dist_b = vec![(100.0, 0.5), (101.0, 0.5)];
35/// let result = convolve(&dist_a, &dist_b, 1e-6, 1e-12, 200);
36/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
37/// ```
38pub fn convolve(
39 dist_a: &Vec<(f64, f64)>,
40 dist_b: &Vec<(f64, f64)>,
41 mass_tolerance: f64,
42 abundance_threshold: f64,
43 max_results: usize,
44) -> Vec<(f64, f64)> {
45 let mut result: Vec<(f64, f64)> = Vec::new();
46
47 for (mass_a, abundance_a) in dist_a {
48 for (mass_b, abundance_b) in dist_b {
49 let combined_mass = mass_a + mass_b;
50 let combined_abundance = abundance_a * abundance_b;
51
52 // Skip entries with combined abundance below the threshold
53 if combined_abundance < abundance_threshold {
54 continue;
55 }
56
57 // Insert or update the combined mass in the result distribution
58 if let Some(entry) = result
59 .iter_mut()
60 .find(|(m, _)| (*m - combined_mass).abs() < mass_tolerance)
61 {
62 entry.1 += combined_abundance;
63 } else {
64 result.push((combined_mass, combined_abundance));
65 }
66 }
67 }
68
69 // Sort by abundance (descending) to prepare for trimming
70 result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
71
72 // Trim the vector if it exceeds max_results
73 if result.len() > max_results {
74 result.truncate(max_results);
75 }
76
77 // Optionally, sort by mass if needed for further processing
78 result.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
79
80 result
81}
82
83/// convolve a distribution with itself n times
84///
85/// Arguments:
86///
87/// * `dist` - distribution of masses and abundances
88/// * `n` - number of times to convolve the distribution with itself
89///
90/// Returns:
91///
92/// * `Vec<(f64, f64)>` - distribution of masses and abundances
93///
94/// # Examples
95///
96/// ```
97/// use mscore::algorithm::isotope::convolve_pow;
98///
99/// let dist = vec![(100.0, 0.5), (101.0, 0.5)];
100/// let result = convolve_pow(&dist, 2);
101/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
102/// ```
103pub fn convolve_pow(dist: &Vec<(f64, f64)>, n: i32) -> Vec<(f64, f64)> {
104 if n == 0 {
105 return vec![(0.0, 1.0)]; // Return the delta distribution
106 }
107 if n == 1 {
108 return dist.clone();
109 }
110
111 let mut result = dist.clone();
112 let mut power = 2;
113
114 while power <= n {
115 result = convolve(&result, &result, 1e-6, 1e-12, 200); // Square the result to get the next power of 2
116 power *= 2;
117 }
118
119 // If n is not a power of 2, recursively fill in the remainder
120 if power / 2 < n {
121 result = convolve(
122 &result,
123 &convolve_pow(dist, n - power / 2),
124 1e-6,
125 1e-12,
126 200,
127 );
128 }
129
130 result
131}
132
133/// generate the isotope distribution for a given atomic composition
134///
135/// Arguments:
136///
137/// * `atomic_composition` - atomic composition of the peptide
138/// * `mass_tolerance` - mass tolerance for combining peaks
139/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
140/// * `max_result` - maximum number of peaks to include in the result
141///
142/// Returns:
143///
144/// * `Vec<(f64, f64)>` - distribution of masses and abundances
145///
146/// # Examples
147///
148/// ```
149/// use std::collections::HashMap;
150/// use mscore::algorithm::isotope::generate_isotope_distribution;
151///
152/// let mut atomic_composition = HashMap::new();
153/// atomic_composition.insert("C".to_string(), 5);
154/// atomic_composition.insert("H".to_string(), 9);
155/// atomic_composition.insert("N".to_string(), 1);
156/// atomic_composition.insert("O".to_string(), 1);
157/// let result = generate_isotope_distribution(&atomic_composition, 1e-6, 1e-12, 200);
158/// ```
159pub fn generate_isotope_distribution(
160 atomic_composition: &HashMap<String, i32>,
161 mass_tolerance: f64,
162 abundance_threshold: f64,
163 max_result: i32,
164) -> Vec<(f64, f64)> {
165 let mut cumulative_distribution: Option<Vec<(f64, f64)>> = None;
166 let atoms_isotopic_weights: HashMap<String, Vec<f64>> = atoms_isotopic_weights()
167 .iter()
168 .map(|(k, v)| (k.to_string(), v.clone()))
169 .collect();
170 let atomic_isotope_abundance: HashMap<String, Vec<f64>> = isotopic_abundance()
171 .iter()
172 .map(|(k, v)| (k.to_string(), v.clone()))
173 .collect();
174
175 for (element, &count) in atomic_composition.iter() {
176 let elemental_isotope_weights = atoms_isotopic_weights
177 .get(element)
178 .expect("Element not found in isotopic weights table")
179 .clone();
180 let elemental_isotope_abundance = atomic_isotope_abundance
181 .get(element)
182 .expect("Element not found in isotopic abundance table")
183 .clone();
184
185 let element_distribution: Vec<(f64, f64)> = elemental_isotope_weights
186 .iter()
187 .zip(elemental_isotope_abundance.iter())
188 .map(|(&mass, &abundance)| (mass, abundance))
189 .collect();
190
191 let element_power_distribution = if count > 1 {
192 convolve_pow(&element_distribution, count)
193 } else {
194 element_distribution
195 };
196
197 cumulative_distribution = match cumulative_distribution {
198 Some(cum_dist) => Some(convolve(
199 &cum_dist,
200 &element_power_distribution,
201 mass_tolerance,
202 abundance_threshold,
203 max_result as usize,
204 )),
205 None => Some(element_power_distribution),
206 };
207 }
208
209 let final_distribution = cumulative_distribution.expect("Peptide has no elements");
210 // Normalize the distribution
211 let total_abundance: f64 = final_distribution
212 .iter()
213 .map(|&(_, abundance)| abundance)
214 .sum();
215 let result: Vec<_> = final_distribution
216 .into_iter()
217 .map(|(mass, abundance)| (mass, abundance / total_abundance))
218 .collect();
219
220 let mut sort_map: BTreeMap<i64, f64> = BTreeMap::new();
221 let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
222
223 for (mz, intensity) in result {
224 let key = quantize(mz);
225 sort_map
226 .entry(key)
227 .and_modify(|e| *e += intensity)
228 .or_insert(intensity);
229 }
230
231 let mz: Vec<f64> = sort_map
232 .keys()
233 .map(|&key| key as f64 / 1_000_000.0)
234 .collect();
235 let intensity: Vec<f64> = sort_map.values().map(|&intensity| intensity).collect();
236 mz.iter()
237 .zip(intensity.iter())
238 .map(|(&mz, &intensity)| (mz, intensity))
239 .collect()
240}
241
242/// calculate the normal probability density function
243///
244/// Arguments:
245///
246/// * `x` - value to calculate the probability density function of
247/// * `mean` - mean of the normal distribution
248/// * `std_dev` - standard deviation of the normal distribution
249///
250/// Returns:
251///
252/// * `f64` - probability density function of `x`
253///
254/// # Examples
255///
256/// ```
257/// use mscore::algorithm::isotope::normal_pdf;
258///
259/// let pdf = normal_pdf(0.0, 0.0, 1.0);
260/// assert_eq!(pdf, 0.39894228040143265);
261/// ```
262pub fn normal_pdf(x: f64, mean: f64, std_dev: f64) -> f64 {
263 let normal = Normal::new(mean, std_dev).unwrap();
264 normal.pdf(x)
265}
266
267/// calculate the factorial of a number
268///
269/// Arguments:
270///
271/// * `n` - number to calculate factorial of
272///
273/// Returns:
274///
275/// * `f64` - factorial of `n`
276///
277/// # Examples
278///
279/// ```
280/// use mscore::algorithm::isotope::factorial;
281///
282/// let fact = factorial(5);
283/// assert_eq!(fact, 120.0);
284/// ```
285pub fn factorial(n: i32) -> f64 {
286 (1..=n).fold(1.0, |acc, x| acc * x as f64)
287}
288
289pub fn weight(mass: f64, peak_nums: Vec<i32>, normalize: bool) -> Vec<f64> {
290 let lam_val = lam(mass, 0.000594, -0.03091);
291 let factorials: Vec<f64> = peak_nums.iter().map(|&k| factorial(k)).collect();
292 let mut weights: Vec<f64> = peak_nums
293 .iter()
294 .map(|&k| {
295 let pow = lam_val.powi(k);
296 let exp = (-lam_val).exp();
297 exp * pow / factorials[k as usize]
298 })
299 .collect();
300
301 if normalize {
302 let sum: f64 = weights.iter().sum();
303 weights = weights.iter().map(|&w| w / sum).collect();
304 }
305
306 weights
307}
308
309/// calculate the lambda value for a given mass
310///
311/// Arguments:
312///
313/// * `mass` - mass of the peptide
314/// * `slope` - slope of the linear regression
315/// * `intercept` - intercept of the linear regression
316///
317/// Returns:
318///
319/// * `f64` - lambda value
320///
321/// # Examples
322///
323/// ```
324/// use mscore::algorithm::isotope::lam;
325///
326/// let lambda = lam(1000.0, 0.000594, -0.03091);
327/// assert_eq!(lambda, 0.56309);
328pub fn lam(mass: f64, slope: f64, intercept: f64) -> f64 {
329 slope * mass + intercept
330}
331
332/// calculate the isotope pattern for a given mass and charge based on the averagine model
333/// using the normal distribution for peak shapes
334///
335/// Arguments:
336///
337/// * `x` - list of m/z values to probe
338/// * `mass` - mass of the peptide
339/// * `charge` - charge of the peptide
340/// * `sigma` - standard deviation of the normal distribution
341/// * `amp` - amplitude of the isotope pattern
342/// * `k` - number of isotopes to consider
343/// * `step_size` - step size for the m/z values to probe
344///
345/// Returns:
346///
347/// * `Vec<f64>` - isotope pattern
348///
349pub fn iso(
350 x: &Vec<f64>,
351 mass: f64,
352 charge: f64,
353 sigma: f64,
354 amp: f64,
355 k: usize,
356 step_size: f64,
357) -> Vec<f64> {
358 let k_range: Vec<usize> = (0..k).collect();
359 let means: Vec<f64> = k_range
360 .iter()
361 .map(|&k_val| (mass + MASS_NEUTRON * k_val as f64) / charge)
362 .collect();
363 let weights = weight(
364 mass,
365 k_range
366 .iter()
367 .map(|&k_val| k_val as i32)
368 .collect::<Vec<i32>>(),
369 true,
370 );
371
372 let mut intensities = vec![0.0; x.len()];
373 for (i, x_val) in x.iter().enumerate() {
374 for (j, &mean) in means.iter().enumerate() {
375 intensities[i] += weights[j] * normal_pdf(*x_val, mean, sigma);
376 }
377 intensities[i] *= step_size;
378 }
379 intensities
380 .iter()
381 .map(|&intensity| intensity * amp)
382 .collect()
383}
384
385/// generate the isotope pattern for a given mass and charge
386///
387/// Arguments:
388///
389/// * `lower_bound` - lower bound of the isotope pattern
390/// * `upper_bound` - upper bound of the isotope pattern
391/// * `mass` - mass of the peptide
392/// * `charge` - charge of the peptide
393/// * `amp` - amplitude of the isotope pattern
394/// * `k` - number of isotopes to consider
395/// * `sigma` - standard deviation of the normal distribution
396/// * `resolution` - resolution of the isotope pattern
397///
398/// Returns:
399///
400/// * `(Vec<f64>, Vec<f64>)` - isotope pattern
401///
402/// # Examples
403///
404/// ```
405/// use mscore::algorithm::isotope::generate_isotope_pattern;
406///
407/// let (mzs, intensities) = generate_isotope_pattern(1500.0, 1510.0, 3000.0, 2.0, 1e4, 10, 1.0, 3);
408/// ```
409pub fn generate_isotope_pattern(
410 lower_bound: f64,
411 upper_bound: f64,
412 mass: f64,
413 charge: f64,
414 amp: f64,
415 k: usize,
416 sigma: f64,
417 resolution: i32,
418) -> (Vec<f64>, Vec<f64>) {
419 let step_size = f64::min(sigma / 10.0, 1.0 / 10f64.powi(resolution));
420 let size = ((upper_bound - lower_bound) / step_size).ceil() as usize;
421 let mzs: Vec<f64> = (0..size)
422 .map(|i| lower_bound + step_size * i as f64)
423 .collect();
424 let intensities = iso(&mzs, mass, charge, sigma, amp, k, step_size);
425
426 (
427 mzs.iter().map(|&mz| mz + MASS_PROTON).collect(),
428 intensities,
429 )
430}
431
432/// generate the averagine spectrum for a given mass and charge
433///
434/// Arguments:
435///
436/// * `mass` - mass of the peptide
437/// * `charge` - charge of the peptide
438/// * `min_intensity` - minimum intensity for a peak to be included in the result
439/// * `k` - number of isotopes to consider
440/// * `resolution` - resolution of the isotope pattern
441/// * `centroid` - whether to centroid the spectrum
442/// * `amp` - amplitude of the isotope pattern
443///
444/// Returns:
445///
446/// * `MzSpectrum` - averagine spectrum
447///
448/// # Examples
449///
450/// ```
451/// use mscore::algorithm::isotope::generate_averagine_spectrum;
452///
453/// let spectrum = generate_averagine_spectrum(3000.0, 2, 1, 10, 3, true, None);
454/// ```
455pub fn generate_averagine_spectrum(
456 mass: f64,
457 charge: i32,
458 min_intensity: i32,
459 k: i32,
460 resolution: i32,
461 centroid: bool,
462 amp: Option<f64>,
463) -> MzSpectrum {
464 let amp = amp.unwrap_or(1e4);
465 let lb = mass / charge as f64 - 0.2;
466 let ub = mass / charge as f64 + k as f64 + 0.2;
467
468 let (mz, intensities) = generate_isotope_pattern(
469 lb,
470 ub,
471 mass,
472 charge as f64,
473 amp,
474 k as usize,
475 0.008492569002123142,
476 resolution,
477 );
478
479 let spectrum = MzSpectrum::new(mz, intensities)
480 .to_resolution(resolution)
481 .filter_ranged(lb, ub, min_intensity as f64, 1e9);
482
483 if centroid {
484 spectrum.to_centroid(
485 std::cmp::max(min_intensity, 1),
486 1.0 / 10f64.powi(resolution - 1),
487 true,
488 )
489 } else {
490 spectrum
491 }
492}
493
494/// generate the averagine spectra for a given list of masses and charges
495/// using multiple threads
496///
497/// Arguments:
498///
499/// * `masses` - list of masses of the peptides
500/// * `charges` - list of charges of the peptides
501/// * `min_intensity` - minimum intensity for a peak to be included in the result
502/// * `k` - number of isotopes to consider
503/// * `resolution` - resolution of the isotope pattern
504/// * `centroid` - whether to centroid the spectrum
505/// * `num_threads` - number of threads to use
506/// * `amp` - amplitude of the isotope pattern
507///
508/// Returns:
509///
510/// * `Vec<MzSpectrum>` - list of averagine spectra
511///
512/// # Examples
513///
514/// ```
515/// use mscore::algorithm::isotope::generate_averagine_spectra;
516///
517/// let masses = vec![3000.0, 3000.0];
518/// let charges = vec![2, 3];
519/// let spectra = generate_averagine_spectra(masses, charges, 1, 10, 3, true, 4, None);
520/// ```
521pub fn generate_averagine_spectra(
522 masses: Vec<f64>,
523 charges: Vec<i32>,
524 min_intensity: i32,
525 k: i32,
526 resolution: i32,
527 centroid: bool,
528 num_threads: usize,
529 amp: Option<f64>,
530) -> Vec<MzSpectrum> {
531 let amp = amp.unwrap_or(1e5);
532 let mut spectra: Vec<MzSpectrum> = Vec::new();
533 let thread_pool = ThreadPoolBuilder::new()
534 .num_threads(num_threads)
535 .build()
536 .unwrap();
537
538 thread_pool.install(|| {
539 spectra = masses
540 .par_iter()
541 .zip(charges.par_iter())
542 .map(|(&mass, &charge)| {
543 generate_averagine_spectrum(
544 mass,
545 charge,
546 min_intensity,
547 k,
548 resolution,
549 centroid,
550 Some(amp),
551 )
552 })
553 .collect();
554 });
555
556 spectra
557}
558
559/// generate the precursor spectrum for a given peptide sequence and charge
560/// using isotope convolutions
561///
562/// Arguments:
563///
564/// * `sequence` - peptide sequence
565/// * `charge` - charge of the peptide
566///
567/// Returns:
568///
569/// * `MzSpectrum` - precursor spectrum
570///
571pub fn generate_precursor_spectrum(
572 sequence: &str,
573 charge: i32,
574 peptide_id: Option<i32>,
575) -> MzSpectrum {
576 let peptide_ion = PeptideIon::new(sequence.to_string(), charge, 1.0, peptide_id);
577 peptide_ion.calculate_isotopic_spectrum(1e-3, 1e-9, 200, 1e-6)
578}
579
580/// parallel version of `generate_precursor_spectrum`
581///
582/// Arguments:
583///
584/// * `sequences` - list of peptide sequences
585/// * `charges` - list of charges of the peptides
586/// * `num_threads` - number of threads to use
587///
588/// Returns:
589///
590/// * `Vec<MzSpectrum>` - list of precursor spectra
591///
592pub fn generate_precursor_spectra(
593 sequences: &Vec<&str>,
594 charges: &Vec<i32>,
595 num_threads: usize,
596 peptide_ids: Vec<Option<i32>>,
597) -> Vec<MzSpectrum> {
598 let thread_pool = ThreadPoolBuilder::new()
599 .num_threads(num_threads)
600 .build()
601 .unwrap();
602 // need to zip sequences and charges and peptide_ids
603 let result = thread_pool.install(|| {
604 sequences
605 .par_iter()
606 .zip(charges.par_iter())
607 .zip(peptide_ids.par_iter())
608 .map(|((&sequence, &charge), &peptide_id)| {
609 generate_precursor_spectrum(sequence, charge, peptide_id)
610 })
611 .collect()
612 });
613 result
614}
615
616// Calculates the isotope distribution for a fragment given the isotope distribution of the fragment, the isotope distribution of the complementary fragment, and the transmitted precursor isotopes
617// implemented based on OpenMS: "https://github.com/OpenMS/OpenMS/blob/079143800f7ed036a7c68ea6e124fe4f5cfc9569/src/openms/source/CHEMISTRY/ISOTOPEDISTRIBUTION/CoarseIsotopePatternGenerator.cpp#L415"
618pub fn calculate_transmission_dependent_fragment_ion_isotope_distribution(
619 fragment_isotope_dist: &Vec<(f64, f64)>,
620 comp_fragment_isotope_dist: &Vec<(f64, f64)>,
621 precursor_isotopes: &HashSet<usize>,
622 max_isotope: usize,
623) -> Vec<(f64, f64)> {
624 if fragment_isotope_dist.is_empty() || comp_fragment_isotope_dist.is_empty() {
625 return Vec::new();
626 }
627
628 let mut r_max = fragment_isotope_dist.len();
629 if max_isotope != 0 && r_max > max_isotope {
630 r_max = max_isotope;
631 }
632
633 let mut result = (0..r_max)
634 .map(|i| (fragment_isotope_dist[0].0 + i as f64, 0.0))
635 .collect::<Vec<(f64, f64)>>();
636
637 // Calculation of dependent isotope distribution
638 for (i, &(_mz, intensity)) in fragment_isotope_dist.iter().enumerate().take(r_max) {
639 for &precursor in precursor_isotopes {
640 if precursor >= i && (precursor - i) < comp_fragment_isotope_dist.len() {
641 let comp_intensity = comp_fragment_isotope_dist[precursor - i].1;
642 result[i].1 += comp_intensity;
643 }
644 }
645 result[i].1 *= intensity;
646 }
647
648 result
649}