rustdf/sim/
precursor.rs

1use mscore::data::peptide::PeptideIon;
2use mscore::data::spectrum::{IndexedMzSpectrum, MsType, MzSpectrum};
3use mscore::simulation::annotation::{
4    MzSpectrumAnnotated, PeakAnnotation, TimsFrameAnnotated, TimsSpectrumAnnotated,
5};
6use mscore::timstof::frame::TimsFrame;
7use mscore::timstof::spectrum::TimsSpectrum;
8use rusqlite::Result;
9use std::collections::{BTreeMap, HashSet};
10use std::path::Path;
11
12use crate::sim::containers::{FramesSim, IonSim, PeptidesSim, ScansSim};
13use crate::sim::handle::TimsTofSyntheticsDataHandle;
14use rayon::prelude::*;
15use rayon::ThreadPoolBuilder;
16
17pub struct TimsTofSyntheticsPrecursorFrameBuilder {
18    pub ions: BTreeMap<u32, Vec<IonSim>>,
19    pub peptides: BTreeMap<u32, PeptidesSim>,
20    pub scans: Vec<ScansSim>,
21    pub frames: Vec<FramesSim>,
22    pub precursor_frame_id_set: HashSet<u32>,
23    pub frame_to_abundances: BTreeMap<u32, (Vec<u32>, Vec<f32>)>,
24    pub peptide_to_ions: BTreeMap<
25        u32,
26        (
27            Vec<f32>,
28            Vec<Vec<u32>>,
29            Vec<Vec<f32>>,
30            Vec<i8>,
31            Vec<MzSpectrum>,
32        ),
33    >,
34    pub frame_to_rt: BTreeMap<u32, f32>,
35    pub scan_to_mobility: BTreeMap<u32, f32>,
36    pub peptide_to_events: BTreeMap<u32, f32>,
37}
38
39impl TimsTofSyntheticsPrecursorFrameBuilder {
40    /// Create a new instance of TimsTofSynthetics
41    ///
42    /// # Arguments
43    ///
44    /// * `path` - A reference to a Path
45    ///
46    /// # Returns
47    ///
48    /// * A Result containing the TimsTofSynthetics instance
49    ///
50    pub fn new(path: &Path) -> Result<Self> {
51        let handle = TimsTofSyntheticsDataHandle::new(path)?;
52        let ions = handle.read_ions()?;
53        let peptides = handle.read_peptides()?;
54        let scans = handle.read_scans()?;
55        let frames = handle.read_frames()?;
56        Ok(Self {
57            ions: TimsTofSyntheticsDataHandle::build_peptide_to_ion_map(&ions),
58            peptides: TimsTofSyntheticsDataHandle::build_peptide_map(&peptides),
59            scans: scans.clone(),
60            frames: frames.clone(),
61            precursor_frame_id_set: TimsTofSyntheticsDataHandle::build_precursor_frame_id_set(
62                &frames,
63            ),
64            frame_to_abundances: TimsTofSyntheticsDataHandle::build_frame_to_abundances(&peptides),
65            peptide_to_ions: TimsTofSyntheticsDataHandle::build_peptide_to_ions(&ions),
66            frame_to_rt: TimsTofSyntheticsDataHandle::build_frame_to_rt(&frames),
67            scan_to_mobility: TimsTofSyntheticsDataHandle::build_scan_to_mobility(&scans),
68            peptide_to_events: TimsTofSyntheticsDataHandle::build_peptide_to_events(&peptides),
69        })
70    }
71
72    /// Build a precursor frame
73    ///
74    /// # Arguments
75    ///
76    /// * `frame_id` - A u32 representing the frame id
77    ///
78    /// # Returns
79    ///
80    /// * A TimsFrame instance
81    pub fn build_precursor_frame(
82        &self,
83        frame_id: u32,
84        mz_noise_precursor: bool,
85        uniform: bool,
86        precursor_noise_ppm: f64,
87        right_drag: bool,
88    ) -> TimsFrame {
89        let ms_type = match self.precursor_frame_id_set.contains(&frame_id) {
90            true => MsType::Precursor,
91            false => MsType::Unknown,
92        };
93
94        let mut tims_spectra: Vec<TimsSpectrum> = Vec::new();
95
96        // Frame might not have any peptides
97        if !self.frame_to_abundances.contains_key(&frame_id) {
98            return TimsFrame::new(
99                frame_id as i32,
100                ms_type.clone(),
101                *self.frame_to_rt.get(&frame_id).unwrap() as f64,
102                vec![],
103                vec![],
104                vec![],
105                vec![],
106                vec![],
107            );
108        }
109        // Get the peptide ids and abundances for the frame, should now save to unwrap since we checked if the frame is in the map
110        let (peptide_ids, abundances) = self.frame_to_abundances.get(&frame_id).unwrap();
111
112        // go over all peptides and their abundances in the frame
113        for (peptide_id, abundance) in peptide_ids.iter().zip(abundances.iter()) {
114            // jump to next peptide if the peptide_id is not in the peptide_to_ions map
115            if !self.peptide_to_ions.contains_key(&peptide_id) {
116                continue;
117            }
118
119            // one peptide can have multiple ions, occurring in multiple scans
120            let (ion_abundances, scan_occurrences, scan_abundances, _, spectra) =
121                self.peptide_to_ions.get(&peptide_id).unwrap();
122
123            for (index, ion_abundance) in ion_abundances.iter().enumerate() {
124                let scan_occurrence = scan_occurrences.get(index).unwrap();
125                let scan_abundance = scan_abundances.get(index).unwrap();
126                let spectrum = spectra.get(index).unwrap();
127
128                for (scan, scan_abu) in scan_occurrence.iter().zip(scan_abundance.iter()) {
129                    let abundance_factor = abundance
130                        * ion_abundance
131                        * scan_abu
132                        * self.peptide_to_events.get(&peptide_id).unwrap();
133                    let scan_id = *scan;
134                    let scaled_spec: MzSpectrum = spectrum.clone() * abundance_factor as f64;
135
136                    let mz_spectrum = if mz_noise_precursor {
137                        match uniform {
138                            true => {
139                                scaled_spec.add_mz_noise_uniform(precursor_noise_ppm, right_drag)
140                            }
141                            false => scaled_spec.add_mz_noise_normal(precursor_noise_ppm),
142                        }
143                    } else {
144                        scaled_spec
145                    };
146
147                    let tims_spec = TimsSpectrum::new(
148                        frame_id as i32,
149                        *scan as i32,
150                        *self.frame_to_rt.get(&frame_id).unwrap() as f64,
151                        *self.scan_to_mobility.get(&scan_id).unwrap() as f64,
152                        ms_type.clone(),
153                        IndexedMzSpectrum::new(
154                            vec![0; mz_spectrum.mz.len()],
155                            mz_spectrum.mz,
156                            mz_spectrum.intensity,
157                        ),
158                    );
159                    tims_spectra.push(tims_spec);
160                }
161            }
162        }
163
164        let tims_frame = TimsFrame::from_tims_spectra(tims_spectra);
165
166        tims_frame.filter_ranged(0.0, 10000.0, 0, 2000, 0.0, 10.0, 1.0, 1e9)
167    }
168
169    /// Build a collection of precursor frames in parallel
170    ///
171    /// # Arguments
172    ///
173    /// * `frame_ids` - A vector of u32 representing the frame ids
174    /// * `num_threads` - A usize representing the number of threads
175    ///
176    /// # Returns
177    ///
178    /// * A vector of TimsFrame instances
179    ///
180    pub fn build_precursor_frames(
181        &self,
182        frame_ids: Vec<u32>,
183        mz_noise_precursor: bool,
184        uniform: bool,
185        precursor_noise_ppm: f64,
186        right_drag: bool,
187        num_threads: usize,
188    ) -> Vec<TimsFrame> {
189        let thread_pool = ThreadPoolBuilder::new()
190            .num_threads(num_threads)
191            .build()
192            .unwrap();
193        let mut tims_frames: Vec<TimsFrame> = Vec::new();
194
195        thread_pool.install(|| {
196            tims_frames = frame_ids
197                .par_iter()
198                .map(|frame_id| {
199                    self.build_precursor_frame(
200                        *frame_id,
201                        mz_noise_precursor,
202                        uniform,
203                        precursor_noise_ppm,
204                        right_drag,
205                    )
206                })
207                .collect();
208        });
209
210        tims_frames.sort_by(|a, b| a.frame_id.cmp(&b.frame_id));
211
212        tims_frames
213    }
214
215    pub fn build_precursor_frame_annotated(
216        &self,
217        frame_id: u32,
218        mz_noise_precursor: bool,
219        uniform: bool,
220        precursor_noise_ppm: f64,
221        right_drag: bool,
222    ) -> TimsFrameAnnotated {
223        let ms_type = match self.precursor_frame_id_set.contains(&frame_id) {
224            true => MsType::Precursor,
225            false => MsType::Unknown,
226        };
227
228        // no peptides in the frame
229        if !self.frame_to_abundances.contains_key(&frame_id) {
230            return TimsFrameAnnotated::new(
231                frame_id as i32,
232                0.0,
233                ms_type.clone(),
234                vec![],
235                vec![],
236                vec![],
237                vec![],
238                vec![],
239                vec![],
240            );
241        }
242
243        let (peptide_ids, abundances) = self.frame_to_abundances.get(&frame_id).unwrap();
244        let mut tims_spectra: Vec<TimsSpectrumAnnotated> = Vec::new();
245
246        for (peptide_id, abundance) in peptide_ids.iter().zip(abundances.iter()) {
247            // jump to next peptide if the peptide_id is not in the peptide_to_ions map
248            if !self.peptide_to_ions.contains_key(&peptide_id) {
249                continue;
250            }
251
252            let (ion_abundances, scan_occurrences, scan_abundances, charges, _) =
253                self.peptide_to_ions.get(&peptide_id).unwrap();
254
255            for (index, ion_abundance) in ion_abundances.iter().enumerate() {
256                let scan_occurrence = scan_occurrences.get(index).unwrap();
257                let scan_abundance = scan_abundances.get(index).unwrap();
258                let charge = charges.get(index).unwrap();
259                let peptide = self.peptides.get(peptide_id).unwrap();
260                let ion = PeptideIon::new(
261                    peptide.sequence.sequence.clone(),
262                    *charge as i32,
263                    *ion_abundance as f64,
264                    Some(*peptide_id as i32),
265                );
266                // TODO: make this configurable
267                let spectrum = ion.calculate_isotopic_spectrum_annotated(1e-3, 1e-8, 200, 1e-4);
268
269                for (scan, scan_abu) in scan_occurrence.iter().zip(scan_abundance.iter()) {
270                    let abundance_factor = abundance
271                        * ion_abundance
272                        * scan_abu
273                        * self.peptide_to_events.get(&peptide_id).unwrap();
274                    let scan_id = *scan;
275                    let scaled_spec: MzSpectrumAnnotated =
276                        spectrum.clone() * abundance_factor as f64;
277
278                    let mz_spectrum = if mz_noise_precursor {
279                        match uniform {
280                            true => {
281                                scaled_spec.add_mz_noise_uniform(precursor_noise_ppm, right_drag)
282                            }
283                            false => scaled_spec.add_mz_noise_normal(precursor_noise_ppm),
284                        }
285                    } else {
286                        scaled_spec
287                    };
288
289                    let tims_spec = TimsSpectrumAnnotated::new(
290                        frame_id as i32,
291                        *scan,
292                        *self.frame_to_rt.get(&frame_id).unwrap() as f64,
293                        *self.scan_to_mobility.get(&scan_id).unwrap() as f64,
294                        ms_type.clone(),
295                        vec![0; mz_spectrum.mz.len()],
296                        mz_spectrum,
297                    );
298                    tims_spectra.push(tims_spec);
299                }
300            }
301        }
302
303        let tims_frame = TimsFrameAnnotated::from_tims_spectra_annotated(tims_spectra);
304
305        let filtered_frame = tims_frame.filter_ranged(0.0, 2000.0, 0.0, 2.0, 0, 1000, 1.0, 1e9);
306
307        TimsFrameAnnotated {
308            frame_id: filtered_frame.frame_id,
309            retention_time: filtered_frame.retention_time,
310            ms_type: filtered_frame.ms_type,
311            tof: filtered_frame.tof,
312            mz: filtered_frame.mz,
313            scan: filtered_frame.scan,
314            inv_mobility: filtered_frame.inv_mobility,
315            intensity: filtered_frame.intensity,
316            annotations: filtered_frame
317                .annotations
318                .iter()
319                .map(|x| {
320                    let mut contributions = x.contributions.clone();
321                    contributions.sort_by(|a, b| {
322                        a.intensity_contribution
323                            .partial_cmp(&b.intensity_contribution)
324                            .unwrap()
325                    });
326                    PeakAnnotation {
327                        contributions,
328                        ..*x
329                    }
330                })
331                .collect::<Vec<PeakAnnotation>>(),
332        }
333    }
334
335    pub fn build_precursor_frames_annotated(
336        &self,
337        frame_ids: Vec<u32>,
338        mz_noise_precursor: bool,
339        uniform: bool,
340        precursor_noise_ppm: f64,
341        right_drag: bool,
342        num_threads: usize,
343    ) -> Vec<TimsFrameAnnotated> {
344        let thread_pool = ThreadPoolBuilder::new()
345            .num_threads(num_threads)
346            .build()
347            .unwrap();
348        let mut tims_frames: Vec<TimsFrameAnnotated> = Vec::new();
349
350        thread_pool.install(|| {
351            tims_frames = frame_ids
352                .par_iter()
353                .map(|frame_id| {
354                    self.build_precursor_frame_annotated(
355                        *frame_id,
356                        mz_noise_precursor,
357                        uniform,
358                        precursor_noise_ppm,
359                        right_drag,
360                    )
361                })
362                .collect();
363        });
364
365        tims_frames.sort_by(|a, b| a.frame_id.cmp(&b.frame_id));
366
367        tims_frames
368    }
369}