1use std::collections::HashMap;
2use regex::Regex;
3use itertools::Itertools;
4use serde::{Deserialize, Serialize};
5use crate::algorithm::peptide::{calculate_peptide_mono_isotopic_mass, calculate_peptide_product_ion_mono_isotopic_mass, peptide_sequence_to_atomic_composition};
6use crate::chemistry::formula::calculate_mz;
7use crate::chemistry::utility::{find_unimod_patterns, reshape_prosit_array, unimod_sequence_to_tokens};
8use crate::ms::spectrum::MzSpectrum;
9use crate::proteomics::amino_acid::amino_acid_masses;
10use bincode::{Encode, Decode};
11
12type Mass = f64;
14type Abundance = f64;
15type IsotopeDistribution = Vec<(Mass, Abundance)>;
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct PeptideIon {
19 pub sequence: PeptideSequence,
20 pub charge: i32,
21 pub intensity: f64,
22 pub ordinal: u32,
23}
24
25impl PeptideIon {
26 pub fn new(sequence: String, charge: i32, intensity: f64, ordinal: u32, peptide_id: Option<i32>) -> Self {
27 PeptideIon {
28 sequence: PeptideSequence::new(sequence, peptide_id),
29 charge,
30 intensity,
31 ordinal,
32 }
33 }
34 pub fn mz(&self) -> f64 {
35 calculate_mz(self.sequence.mono_isotopic_mass(), self.charge)
36 }
37
38 pub fn calculate_isotope_distribution(
39 &self,
40 mass_tolerance: f64,
41 abundance_threshold: f64,
42 max_result: i32,
43 intensity_min: f64,
44 ) -> IsotopeDistribution {
45
46 let atomic_composition: HashMap<String, i32> = self.sequence.atomic_composition().iter().map(|(k, v)| (k.to_string(), *v)).collect();
47
48 let distribution: IsotopeDistribution = crate::algorithm::isotope::generate_isotope_distribution(&atomic_composition, mass_tolerance, abundance_threshold, max_result)
49 .into_iter().filter(|&(_, abundance)| abundance > intensity_min).collect();
50
51 let mz_distribution = distribution.iter().map(|(mass, _)| calculate_mz(*mass, self.charge))
52 .zip(distribution.iter().map(|&(_, abundance)| abundance)).collect();
53
54 mz_distribution
55 }
56
57 pub fn calculate_isotopic_spectrum(
58 &self,
59 mass_tolerance: f64,
60 abundance_threshold: f64,
61 max_result: i32,
62 intensity_min: f64,
63 ) -> MzSpectrum {
64 let isotopic_distribution = self.calculate_isotope_distribution(mass_tolerance, abundance_threshold, max_result, intensity_min);
65 MzSpectrum::new(isotopic_distribution.iter().map(|(mz, _)| *mz).collect(), isotopic_distribution.iter().map(|(_, abundance)| *abundance).collect()) * self.intensity
66 }
67}
68
69#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
70pub enum FragmentType { A, B, C, X, Y, Z, }
71
72impl std::fmt::Display for FragmentType {
74 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
75 match self {
76 FragmentType::A => write!(f, "a"),
77 FragmentType::B => write!(f, "b"),
78 FragmentType::C => write!(f, "c"),
79 FragmentType::X => write!(f, "x"),
80 FragmentType::Y => write!(f, "y"),
81 FragmentType::Z => write!(f, "z"),
82 }
83 }
84}
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
87pub struct PeptideProductIon {
88 pub kind: FragmentType,
89 pub ion: PeptideIon,
90}
91
92impl PeptideProductIon {
93 pub fn new(kind: FragmentType, sequence: String, charge: i32, intensity: f64, ordinal: u32, peptide_id: Option<i32>) -> Self {
94 PeptideProductIon {
95 kind,
96 ion: PeptideIon {
97 sequence: PeptideSequence::new(sequence, peptide_id),
98 charge,
99 intensity,
100 ordinal,
101 },
102 }
103 }
104
105 pub fn mono_isotopic_mass(&self) -> f64 {
106 calculate_peptide_product_ion_mono_isotopic_mass(self.ion.sequence.sequence.as_str(), self.kind)
107 }
108
109 pub fn atomic_composition(&self) -> HashMap<&str, i32> {
110
111 let mut composition = peptide_sequence_to_atomic_composition(&self.ion.sequence);
112
113 match self.kind {
114 FragmentType::A => {
115 *composition.entry("H").or_insert(0) -= 2;
116 *composition.entry("O").or_insert(0) -= 2;
117 *composition.entry("C").or_insert(0) -= 1;
118 },
119
120 FragmentType::B => {
121 *composition.entry("H").or_insert(0) -= 2;
123 *composition.entry("O").or_insert(0) -= 1;
124 },
125
126 FragmentType::C => {
127 *composition.entry("H").or_insert(0) += 1;
129 *composition.entry("N").or_insert(0) += 1;
130 *composition.entry("O").or_insert(0) -= 1;
131 },
132
133 FragmentType::X => {
134 *composition.entry("C").or_insert(0) += 1;
136 *composition.entry("O").or_insert(0) += 1;
137 },
138
139 FragmentType::Y => {
140 ()
141 },
142
143 FragmentType::Z => {
144 *composition.entry("H").or_insert(0) -= 1;
145 *composition.entry("N").or_insert(0) -= 3;
146 },
147 }
148 composition
149 }
150
151 pub fn mz(&self) -> f64 {
152 calculate_mz(self.mono_isotopic_mass(), self.ion.charge)
153 }
154
155 pub fn isotope_distribution(
156 &self,
157 mass_tolerance: f64,
158 abundance_threshold: f64,
159 max_result: i32,
160 intensity_min: f64,
161 ) -> IsotopeDistribution {
162
163 let atomic_composition: HashMap<String, i32> = self.atomic_composition().iter().map(|(k, v)| (k.to_string(), *v)).collect();
164
165 let distribution: IsotopeDistribution = crate::algorithm::isotope::generate_isotope_distribution(&atomic_composition, mass_tolerance, abundance_threshold, max_result)
166 .into_iter().filter(|&(_, abundance)| abundance > intensity_min).collect();
167
168 let mz_distribution = distribution.iter().map(|(mass, _)| calculate_mz(*mass, self.ion.charge)).zip(distribution.iter().map(|&(_, abundance)| abundance)).collect();
169
170 mz_distribution
171 }
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize, Encode, Decode)]
175pub struct PeptideSequence {
176 pub sequence: String,
177 pub peptide_id: Option<i32>,
178}
179
180impl PeptideSequence {
181 pub fn new(raw_sequence: String, peptide_id: Option<i32>) -> Self {
182
183 let pattern = Regex::new(r"\[UNIMOD:(\d+|\?)]").unwrap();
185
186 let sequence = pattern.replace_all(&raw_sequence, "").to_string();
188
189 let valid_amino_acids = sequence.chars().all(|c| amino_acid_masses().contains_key(&c.to_string()[..]));
191 if !valid_amino_acids {
192 panic!("Invalid amino acid sequence: {}, use only valid amino acids: ARNDCQEGHILKMFPSTWYVU, and modifications in the format [UNIMOD:ID]", raw_sequence);
193 }
194
195 PeptideSequence { sequence: raw_sequence, peptide_id }
196 }
197
198 pub fn mono_isotopic_mass(&self) -> f64 {
199 calculate_peptide_mono_isotopic_mass(self)
200 }
201
202 pub fn atomic_composition(&self) -> HashMap<&str, i32> {
203 peptide_sequence_to_atomic_composition(self)
204 }
205
206 pub fn to_tokens(&self, group_modifications: bool) -> Vec<String> {
207 unimod_sequence_to_tokens(&*self.sequence, group_modifications)
208 }
209
210 pub fn to_sage_representation(&self) -> (String, Vec<f64>) {
211 find_unimod_patterns(&*self.sequence)
212 }
213
214 pub fn amino_acid_count(&self) -> usize {
215 self.to_tokens(true).len()
216 }
217
218 pub fn calculate_mono_isotopic_product_ion_spectrum(&self, charge: i32, fragment_type: FragmentType) -> MzSpectrum {
219 let product_ions = self.calculate_product_ion_series(charge, fragment_type);
220 product_ions.generate_mono_isotopic_spectrum()
221 }
222
223 pub fn calculate_isotopic_product_ion_spectrum(&self, charge: i32, fragment_type: FragmentType, mass_tolerance: f64, abundance_threshold: f64, max_result: i32, intensity_min: f64) -> MzSpectrum {
224 let product_ions = self.calculate_product_ion_series(charge, fragment_type);
225 product_ions.generate_isotopic_spectrum(mass_tolerance, abundance_threshold, max_result, intensity_min)
226 }
227
228 pub fn calculate_product_ion_series(&self, target_charge: i32, fragment_type: FragmentType) -> PeptideProductIonSeries {
229 let tokens = unimod_sequence_to_tokens(self.sequence.as_str(), true);
231 let mut n_terminal_ions = Vec::new();
232 let mut c_terminal_ions = Vec::new();
233
234 for i in 1..tokens.len() {
236 let n_ion_seq = tokens[..i].join("");
237 n_terminal_ions.push(PeptideProductIon {
238 kind: match fragment_type {
239 FragmentType::A => FragmentType::A,
240 FragmentType::B => FragmentType::B,
241 FragmentType::C => FragmentType::C,
242 FragmentType::X => FragmentType::A,
243 FragmentType::Y => FragmentType::B,
244 FragmentType::Z => FragmentType::C,
245 },
246 ion: PeptideIon {
247 sequence: PeptideSequence {
248 sequence: n_ion_seq,
249 peptide_id: self.peptide_id,
250 },
251 charge: target_charge,
252 intensity: 1.0, ordinal: i as u32,
254 },
255 });
256 }
257
258 for i in 1..tokens.len() {
260 let c_ion_seq = tokens[tokens.len() - i..].join("");
261 c_terminal_ions.push(PeptideProductIon {
262 kind: match fragment_type {
263 FragmentType::A => FragmentType::X,
264 FragmentType::B => FragmentType::Y,
265 FragmentType::C => FragmentType::Z,
266 FragmentType::X => FragmentType::X,
267 FragmentType::Y => FragmentType::Y,
268 FragmentType::Z => FragmentType::Z,
269 },
270 ion: PeptideIon {
271 sequence: PeptideSequence {
272 sequence: c_ion_seq,
273 peptide_id: self.peptide_id,
274 },
275 charge: target_charge,
276 intensity: 1.0, ordinal: i as u32,
278 },
279 });
280 }
281
282 PeptideProductIonSeries::new(target_charge, n_terminal_ions, c_terminal_ions)
283 }
284
285 pub fn associate_with_predicted_intensities(
286 &self,
287 charge: i32,
289 fragment_type: FragmentType,
290 flat_intensities: Vec<f64>,
291 normalize: bool,
292 half_charge_one: bool,
293 ) -> PeptideProductIonSeriesCollection {
294
295 let reshaped_intensities = reshape_prosit_array(flat_intensities);
296 let max_charge = std::cmp::min(charge, 3).max(1); let mut sum_intensity = if normalize { 0.0 } else { 1.0 };
298 let num_tokens = self.amino_acid_count() - 1; let mut peptide_ion_collection = Vec::new();
301
302 if normalize {
303 for z in 1..=max_charge {
304
305 let intensity_c: Vec<f64> = reshaped_intensities[..num_tokens].iter().map(|x| x[0][z as usize - 1]).filter(|&x| x > 0.0).collect();
306 let intensity_n: Vec<f64> = reshaped_intensities[..num_tokens].iter().map(|x| x[1][z as usize - 1]).filter(|&x| x > 0.0).collect();
307
308 sum_intensity += intensity_n.iter().sum::<f64>() + intensity_c.iter().sum::<f64>();
309 }
310 }
311
312 for z in 1..=max_charge {
313
314 let mut product_ions = self.calculate_product_ion_series(z, fragment_type);
315 let intensity_n: Vec<f64> = reshaped_intensities[..num_tokens].iter().map(|x| x[1][z as usize - 1]).collect();
316 let intensity_c: Vec<f64> = reshaped_intensities[..num_tokens].iter().map(|x| x[0][z as usize - 1]).collect(); let adjusted_sum_intensity = if max_charge == 1 && half_charge_one { sum_intensity * 2.0 } else { sum_intensity };
319
320 for (i, ion) in product_ions.n_ions.iter_mut().enumerate() {
321 ion.ion.intensity = intensity_n[i] / adjusted_sum_intensity;
322 }
323 for (i, ion) in product_ions.c_ions.iter_mut().enumerate() {
324 ion.ion.intensity = intensity_c[i] / adjusted_sum_intensity;
325 }
326
327 peptide_ion_collection.push(PeptideProductIonSeries::new(z, product_ions.n_ions, product_ions.c_ions));
328 }
329
330 PeptideProductIonSeriesCollection::new(peptide_ion_collection)
331 }
332}
333
334#[derive(Debug, Clone, Serialize, Deserialize)]
335pub struct PeptideProductIonSeries {
336 pub charge: i32,
337 pub n_ions: Vec<PeptideProductIon>,
338 pub c_ions: Vec<PeptideProductIon>,
339}
340
341impl PeptideProductIonSeries {
342 pub fn new(charge: i32, n_ions: Vec<PeptideProductIon>, c_ions: Vec<PeptideProductIon>) -> Self {
343 PeptideProductIonSeries {
344 charge,
345 n_ions,
346 c_ions,
347 }
348 }
349 pub fn generate_mono_isotopic_spectrum(&self) -> MzSpectrum {
350 let mz_i_n = self.n_ions.iter().map(|ion| (ion.mz(), ion.ion.intensity)).collect_vec();
351 let mz_i_c = self.c_ions.iter().map(|ion| (ion.mz(), ion.ion.intensity)).collect_vec();
352 let n_spectrum = MzSpectrum::new(mz_i_n.iter().map(|(mz, _)| *mz).collect(), mz_i_n.iter().map(|(_, abundance)| *abundance).collect());
353 let c_spectrum = MzSpectrum::new(mz_i_c.iter().map(|(mz, _)| *mz).collect(), mz_i_c.iter().map(|(_, abundance)| *abundance).collect());
354 MzSpectrum::from_collection(vec![n_spectrum, c_spectrum]).filter_ranged(0.0, 5_000.0, 1e-6, 1e6)
355 }
356
357 pub fn generate_isotopic_spectrum(&self, mass_tolerance: f64, abundance_threshold: f64, max_result: i32, intensity_min: f64) -> MzSpectrum {
358 let mut spectra: Vec<MzSpectrum> = Vec::new();
359
360 for ion in &self.n_ions {
361 let n_isotopes = ion.isotope_distribution(mass_tolerance, abundance_threshold, max_result, intensity_min);
362 let spectrum = MzSpectrum::new(n_isotopes.iter().map(|(mz, _)| *mz).collect(), n_isotopes.iter().map(|(_, abundance)| *abundance * ion.ion.intensity).collect());
363 spectra.push(spectrum);
364 }
365
366 for ion in &self.c_ions {
367 let c_isotopes = ion.isotope_distribution(mass_tolerance, abundance_threshold, max_result, intensity_min);
368 let spectrum = MzSpectrum::new(c_isotopes.iter().map(|(mz, _)| *mz).collect(), c_isotopes.iter().map(|(_, abundance)| *abundance * ion.ion.intensity).collect());
369 spectra.push(spectrum);
370 }
371
372 MzSpectrum::from_collection(spectra).filter_ranged(0.0, 5_000.0, 1e-6, 1e6)
373 }
374}
375
376#[derive(Debug, Clone, Serialize, Deserialize)]
377pub struct PeptideProductIonSeriesCollection {
378 pub peptide_ions: Vec<PeptideProductIonSeries>,
379}
380impl PeptideProductIonSeriesCollection {
381 pub fn new(peptide_ions: Vec<PeptideProductIonSeries>) -> Self {
382 PeptideProductIonSeriesCollection {
383 peptide_ions,
384 }
385 }
386
387 pub fn find_ion_series(&self, charge: i32) -> Option<&PeptideProductIonSeries> {
388 self.peptide_ions.iter().find(|ion_series| ion_series.charge == charge)
389 }
390
391 pub fn generate_isotopic_spectrum(&self, mass_tolerance: f64, abundance_threshold: f64, max_result: i32, intensity_min: f64) -> MzSpectrum {
392 let mut spectra: Vec<MzSpectrum> = Vec::new();
393
394 for ion_series in &self.peptide_ions {
395 let isotopic_spectrum = ion_series.generate_isotopic_spectrum(mass_tolerance, abundance_threshold, max_result, intensity_min);
396 spectra.push(isotopic_spectrum);
397 }
398
399 MzSpectrum::from_collection(spectra).filter_ranged(0.0, 5_000.0, 1e-6, 1e6)
400 }
401}