rustms/algorithm/
isotope.rs

1use std::collections::{BTreeMap, HashMap};
2use crate::chemistry::element::{atoms_isotopic_weights, isotopic_abundance};
3
4/// convolve two distributions of masses and abundances
5///
6/// Arguments:
7///
8/// * `dist_a` - first distribution of masses and abundances
9/// * `dist_b` - second distribution of masses and abundances
10/// * `mass_tolerance` - mass tolerance for combining peaks
11/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
12/// * `max_results` - maximum number of peaks to include in the result
13///
14/// Returns:
15///
16/// * `Vec<(f64, f64)>` - combined distribution of masses and abundances
17///
18/// # Examples
19///
20/// ```
21/// use rustms::algorithm::isotope::convolve;
22///
23/// let dist_a = vec![(100.0, 0.5), (101.0, 0.5)];
24/// let dist_b = vec![(100.0, 0.5), (101.0, 0.5)];
25/// let result = convolve(&dist_a, &dist_b, 1e-6, 1e-12, 200);
26/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
27/// ```
28pub fn convolve(dist_a: &Vec<(f64, f64)>, dist_b: &Vec<(f64, f64)>, mass_tolerance: f64, abundance_threshold: f64, max_results: usize) -> Vec<(f64, f64)> {
29
30    let mut result: Vec<(f64, f64)> = Vec::new();
31
32    for (mass_a, abundance_a) in dist_a {
33        for (mass_b, abundance_b) in dist_b {
34            let combined_mass = mass_a + mass_b;
35            let combined_abundance = abundance_a * abundance_b;
36
37            // Skip entries with combined abundance below the threshold
38            if combined_abundance < abundance_threshold {
39                continue;
40            }
41
42            // Insert or update the combined mass in the result distribution
43            if let Some(entry) = result.iter_mut().find(|(m, _)| (*m - combined_mass).abs() < mass_tolerance) {
44                entry.1 += combined_abundance;
45            } else {
46                result.push((combined_mass, combined_abundance));
47            }
48        }
49    }
50
51    // Sort by abundance (descending) to prepare for trimming
52    result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
53
54    // Trim the vector if it exceeds max_results
55    if result.len() > max_results {
56        result.truncate(max_results);
57    }
58
59    // Optionally, sort by mass if needed for further processing
60    result.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
61
62    result
63}
64
65/// convolve a distribution with itself n times
66///
67/// Arguments:
68///
69/// * `dist` - distribution of masses and abundances
70/// * `n` - number of times to convolve the distribution with itself
71///
72/// Returns:
73///
74/// * `Vec<(f64, f64)>` - distribution of masses and abundances
75///
76/// # Examples
77///
78/// ```
79/// use rustms::algorithm::isotope::convolve_pow;
80///
81/// let dist = vec![(100.0, 0.5), (101.0, 0.5)];
82/// let result = convolve_pow(&dist, 2);
83/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
84/// ```
85pub fn convolve_pow(dist: &Vec<(f64, f64)>, n: i32) -> Vec<(f64, f64)> {
86    if n == 0 {
87        return vec![(0.0, 1.0)]; // Return the delta distribution
88    }
89    if n == 1 {
90        return dist.clone();
91    }
92
93    let mut result = dist.clone();
94    let mut power = 2;
95
96    while power <= n {
97        result = convolve(&result, &result, 1e-6, 1e-12, 200); // Square the result to get the next power of 2
98        power *= 2;
99    }
100
101    // If n is not a power of 2, recursively fill in the remainder
102    if power / 2 < n {
103        result = convolve(&result, &convolve_pow(dist, n - power / 2, ), 1e-6, 1e-12, 200);
104    }
105
106    result
107}
108
109/// generate the isotope distribution for a given atomic composition
110///
111/// Arguments:
112///
113/// * `atomic_composition` - atomic composition of the peptide
114/// * `mass_tolerance` - mass tolerance for combining peaks
115/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
116/// * `max_result` - maximum number of peaks to include in the result
117///
118/// Returns:
119///
120/// * `Vec<(f64, f64)>` - distribution of masses and abundances
121///
122/// # Examples
123///
124/// ```
125/// use std::collections::HashMap;
126/// use rustms::algorithm::isotope::generate_isotope_distribution;
127///
128/// let mut atomic_composition = HashMap::new();
129/// atomic_composition.insert("C".to_string(), 5);
130/// atomic_composition.insert("H".to_string(), 9);
131/// atomic_composition.insert("N".to_string(), 1);
132/// atomic_composition.insert("O".to_string(), 1);
133/// let result = generate_isotope_distribution(&atomic_composition, 1e-6, 1e-12, 200);
134/// ```
135pub fn generate_isotope_distribution(
136    atomic_composition: &HashMap<String, i32>,
137    mass_tolerance: f64,
138    abundance_threshold: f64,
139    max_result: i32
140) -> Vec<(f64, f64)> {
141
142    let mut cumulative_distribution: Option<Vec<(f64, f64)>> = None;
143    let atoms_isotopic_weights: HashMap<String, Vec<f64>> = atoms_isotopic_weights().iter().map(|(k, v)| (k.to_string(), v.clone())).collect();
144    let atomic_isotope_abundance: HashMap<String, Vec<f64>> = isotopic_abundance().iter().map(|(k, v)| (k.to_string(), v.clone())).collect();
145
146    for (element, &count) in atomic_composition.iter() {
147        let elemental_isotope_weights = atoms_isotopic_weights.get(element).expect("Element not found in isotopic weights table").clone();
148        let elemental_isotope_abundance = atomic_isotope_abundance.get(element).expect("Element not found in isotopic abundance table").clone();
149
150        let element_distribution: Vec<(f64, f64)> = elemental_isotope_weights.iter().zip(elemental_isotope_abundance.iter()).map(|(&mass, &abundance
151                                                                                                                                  )| (mass, abundance)).collect();
152
153        let element_power_distribution = if count > 1 {
154            convolve_pow(&element_distribution, count)
155        } else {
156            element_distribution
157        };
158
159        cumulative_distribution = match cumulative_distribution {
160            Some(cum_dist) => Some(convolve(&cum_dist, &element_power_distribution, mass_tolerance, abundance_threshold, max_result as usize)),
161            None => Some(element_power_distribution),
162        };
163    }
164
165    let final_distribution = cumulative_distribution.expect("Peptide has no elements");
166    // Normalize the distribution
167    let total_abundance: f64 = final_distribution.iter().map(|&(_, abundance)| abundance).sum();
168    let result: Vec<_> = final_distribution.into_iter().map(|(mass, abundance)| (mass, abundance / total_abundance)).collect();
169
170    let mut sort_map: BTreeMap<i64, f64> = BTreeMap::new();
171    let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
172
173    for (mz, intensity) in result {
174        let key = quantize(mz);
175        sort_map.entry(key).and_modify(|e| *e += intensity).or_insert(intensity);
176    }
177
178    let mz: Vec<f64> = sort_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
179    let intensity: Vec<f64> = sort_map.values().map(|&intensity| intensity).collect();
180    mz.iter().zip(intensity.iter()).map(|(&mz, &intensity)| (mz, intensity)).collect()
181}