rustms/algorithm/
isotope.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
use std::collections::{BTreeMap, HashMap};
use crate::chemistry::element::{atoms_isotopic_weights, isotopic_abundance};

/// convolve two distributions of masses and abundances
///
/// Arguments:
///
/// * `dist_a` - first distribution of masses and abundances
/// * `dist_b` - second distribution of masses and abundances
/// * `mass_tolerance` - mass tolerance for combining peaks
/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
/// * `max_results` - maximum number of peaks to include in the result
///
/// Returns:
///
/// * `Vec<(f64, f64)>` - combined distribution of masses and abundances
///
/// # Examples
///
/// ```
/// use rustms::algorithm::isotope::convolve;
///
/// let dist_a = vec![(100.0, 0.5), (101.0, 0.5)];
/// let dist_b = vec![(100.0, 0.5), (101.0, 0.5)];
/// let result = convolve(&dist_a, &dist_b, 1e-6, 1e-12, 200);
/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
/// ```
pub fn convolve(dist_a: &Vec<(f64, f64)>, dist_b: &Vec<(f64, f64)>, mass_tolerance: f64, abundance_threshold: f64, max_results: usize) -> Vec<(f64, f64)> {

    let mut result: Vec<(f64, f64)> = Vec::new();

    for (mass_a, abundance_a) in dist_a {
        for (mass_b, abundance_b) in dist_b {
            let combined_mass = mass_a + mass_b;
            let combined_abundance = abundance_a * abundance_b;

            // Skip entries with combined abundance below the threshold
            if combined_abundance < abundance_threshold {
                continue;
            }

            // Insert or update the combined mass in the result distribution
            if let Some(entry) = result.iter_mut().find(|(m, _)| (*m - combined_mass).abs() < mass_tolerance) {
                entry.1 += combined_abundance;
            } else {
                result.push((combined_mass, combined_abundance));
            }
        }
    }

    // Sort by abundance (descending) to prepare for trimming
    result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());

    // Trim the vector if it exceeds max_results
    if result.len() > max_results {
        result.truncate(max_results);
    }

    // Optionally, sort by mass if needed for further processing
    result.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());

    result
}

/// convolve a distribution with itself n times
///
/// Arguments:
///
/// * `dist` - distribution of masses and abundances
/// * `n` - number of times to convolve the distribution with itself
///
/// Returns:
///
/// * `Vec<(f64, f64)>` - distribution of masses and abundances
///
/// # Examples
///
/// ```
/// use rustms::algorithm::isotope::convolve_pow;
///
/// let dist = vec![(100.0, 0.5), (101.0, 0.5)];
/// let result = convolve_pow(&dist, 2);
/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
/// ```
pub fn convolve_pow(dist: &Vec<(f64, f64)>, n: i32) -> Vec<(f64, f64)> {
    if n == 0 {
        return vec![(0.0, 1.0)]; // Return the delta distribution
    }
    if n == 1 {
        return dist.clone();
    }

    let mut result = dist.clone();
    let mut power = 2;

    while power <= n {
        result = convolve(&result, &result, 1e-6, 1e-12, 200); // Square the result to get the next power of 2
        power *= 2;
    }

    // If n is not a power of 2, recursively fill in the remainder
    if power / 2 < n {
        result = convolve(&result, &convolve_pow(dist, n - power / 2, ), 1e-6, 1e-12, 200);
    }

    result
}

/// generate the isotope distribution for a given atomic composition
///
/// Arguments:
///
/// * `atomic_composition` - atomic composition of the peptide
/// * `mass_tolerance` - mass tolerance for combining peaks
/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
/// * `max_result` - maximum number of peaks to include in the result
///
/// Returns:
///
/// * `Vec<(f64, f64)>` - distribution of masses and abundances
///
/// # Examples
///
/// ```
/// use std::collections::HashMap;
/// use rustms::algorithm::isotope::generate_isotope_distribution;
///
/// let mut atomic_composition = HashMap::new();
/// atomic_composition.insert("C".to_string(), 5);
/// atomic_composition.insert("H".to_string(), 9);
/// atomic_composition.insert("N".to_string(), 1);
/// atomic_composition.insert("O".to_string(), 1);
/// let result = generate_isotope_distribution(&atomic_composition, 1e-6, 1e-12, 200);
/// ```
pub fn generate_isotope_distribution(
    atomic_composition: &HashMap<String, i32>,
    mass_tolerance: f64,
    abundance_threshold: f64,
    max_result: i32
) -> Vec<(f64, f64)> {

    let mut cumulative_distribution: Option<Vec<(f64, f64)>> = None;
    let atoms_isotopic_weights: HashMap<String, Vec<f64>> = atoms_isotopic_weights().iter().map(|(k, v)| (k.to_string(), v.clone())).collect();
    let atomic_isotope_abundance: HashMap<String, Vec<f64>> = isotopic_abundance().iter().map(|(k, v)| (k.to_string(), v.clone())).collect();

    for (element, &count) in atomic_composition.iter() {
        let elemental_isotope_weights = atoms_isotopic_weights.get(element).expect("Element not found in isotopic weights table").clone();
        let elemental_isotope_abundance = atomic_isotope_abundance.get(element).expect("Element not found in isotopic abundance table").clone();

        let element_distribution: Vec<(f64, f64)> = elemental_isotope_weights.iter().zip(elemental_isotope_abundance.iter()).map(|(&mass, &abundance
                                                                                                                                  )| (mass, abundance)).collect();

        let element_power_distribution = if count > 1 {
            convolve_pow(&element_distribution, count)
        } else {
            element_distribution
        };

        cumulative_distribution = match cumulative_distribution {
            Some(cum_dist) => Some(convolve(&cum_dist, &element_power_distribution, mass_tolerance, abundance_threshold, max_result as usize)),
            None => Some(element_power_distribution),
        };
    }

    let final_distribution = cumulative_distribution.expect("Peptide has no elements");
    // Normalize the distribution
    let total_abundance: f64 = final_distribution.iter().map(|&(_, abundance)| abundance).sum();
    let result: Vec<_> = final_distribution.into_iter().map(|(mass, abundance)| (mass, abundance / total_abundance)).collect();

    let mut sort_map: BTreeMap<i64, f64> = BTreeMap::new();
    let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };

    for (mz, intensity) in result {
        let key = quantize(mz);
        sort_map.entry(key).and_modify(|e| *e += intensity).or_insert(intensity);
    }

    let mz: Vec<f64> = sort_map.keys().map(|&key| key as f64 / 1_000_000.0).collect();
    let intensity: Vec<f64> = sort_map.values().map(|&intensity| intensity).collect();
    mz.iter().zip(intensity.iter()).map(|(&mz, &intensity)| (mz, intensity)).collect()
}