rustdf/sim/
precursor.rs

1use mscore::data::peptide::PeptideIon;
2use mscore::data::spectrum::{IndexedMzSpectrum, MsType, MzSpectrum};
3use mscore::simulation::annotation::{
4    MzSpectrumAnnotated, TimsFrameAnnotated, TimsSpectrumAnnotated,
5};
6use mscore::timstof::frame::TimsFrame;
7use mscore::timstof::spectrum::TimsSpectrum;
8use rusqlite::Result;
9use std::collections::{BTreeMap, HashSet};
10use std::path::Path;
11
12use crate::sim::containers::{FramesSim, IonSim, PeptidesSim, ScansSim};
13use crate::sim::handle::TimsTofSyntheticsDataHandle;
14use rayon::prelude::*;
15
16pub struct TimsTofSyntheticsPrecursorFrameBuilder {
17    pub ions: BTreeMap<u32, Vec<IonSim>>,
18    pub peptides: BTreeMap<u32, PeptidesSim>,
19    pub scans: Vec<ScansSim>,
20    pub frames: Vec<FramesSim>,
21    pub precursor_frame_id_set: HashSet<u32>,
22    pub frame_to_abundances: BTreeMap<u32, (Vec<u32>, Vec<f32>)>,
23    pub peptide_to_ions: BTreeMap<
24        u32,
25        (
26            Vec<f32>,
27            Vec<Vec<u32>>,
28            Vec<Vec<f32>>,
29            Vec<i8>,
30            Vec<MzSpectrum>,
31        ),
32    >,
33    pub frame_to_rt: BTreeMap<u32, f32>,
34    pub scan_to_mobility: BTreeMap<u32, f32>,
35    pub peptide_to_events: BTreeMap<u32, f32>,
36    /// Mapping from ion_id to (peptide_id, charge) for DDA precursor lookup
37    pub ion_id_to_peptide_charge: BTreeMap<u32, (u32, i8)>,
38}
39
40impl TimsTofSyntheticsPrecursorFrameBuilder {
41    /// Create a new instance of TimsTofSynthetics
42    ///
43    /// # Arguments
44    ///
45    /// * `path` - A reference to a Path
46    ///
47    /// # Returns
48    ///
49    /// * A Result containing the TimsTofSynthetics instance
50    ///
51    pub fn new(path: &Path) -> Result<Self> {
52        let handle = TimsTofSyntheticsDataHandle::new(path)?;
53        let ions = handle.read_ions()?;
54        let peptides = handle.read_peptides()?;
55        let scans = handle.read_scans()?;
56        let frames = handle.read_frames()?;
57
58        // Build ion_id to (peptide_id, charge) mapping for DDA precursor lookup
59        let mut ion_id_to_peptide_charge: BTreeMap<u32, (u32, i8)> = BTreeMap::new();
60        for ion in &ions {
61            ion_id_to_peptide_charge.insert(ion.ion_id, (ion.peptide_id, ion.charge));
62        }
63
64        Ok(Self {
65            ions: TimsTofSyntheticsDataHandle::build_peptide_to_ion_map(&ions),
66            peptides: TimsTofSyntheticsDataHandle::build_peptide_map(&peptides),
67            scans: scans.clone(),
68            frames: frames.clone(),
69            precursor_frame_id_set: TimsTofSyntheticsDataHandle::build_precursor_frame_id_set(
70                &frames,
71            ),
72            frame_to_abundances: TimsTofSyntheticsDataHandle::build_frame_to_abundances(&peptides),
73            peptide_to_ions: TimsTofSyntheticsDataHandle::build_peptide_to_ions(&ions),
74            frame_to_rt: TimsTofSyntheticsDataHandle::build_frame_to_rt(&frames),
75            scan_to_mobility: TimsTofSyntheticsDataHandle::build_scan_to_mobility(&scans),
76            peptide_to_events: TimsTofSyntheticsDataHandle::build_peptide_to_events(&peptides),
77            ion_id_to_peptide_charge,
78        })
79    }
80
81    /// Build a precursor frame
82    ///
83    /// # Arguments
84    ///
85    /// * `frame_id` - A u32 representing the frame id
86    ///
87    /// # Returns
88    ///
89    /// * A TimsFrame instance
90    pub fn build_precursor_frame(
91        &self,
92        frame_id: u32,
93        mz_noise_precursor: bool,
94        uniform: bool,
95        precursor_noise_ppm: f64,
96        right_drag: bool,
97    ) -> TimsFrame {
98        // Cache frame-level lookups
99        let ms_type = if self.precursor_frame_id_set.contains(&frame_id) {
100            MsType::Precursor
101        } else {
102            MsType::Unknown
103        };
104        let rt = *self.frame_to_rt.get(&frame_id).unwrap() as f64;
105
106        // Single lookup instead of contains_key + get
107        let Some((peptide_ids, abundances)) = self.frame_to_abundances.get(&frame_id) else {
108            return TimsFrame::new(frame_id as i32, ms_type, rt, vec![], vec![], vec![], vec![], vec![]);
109        };
110
111        // Preallocate with estimated capacity
112        let estimated_capacity = peptide_ids.len() * 4;
113        let mut tims_spectra: Vec<TimsSpectrum> = Vec::with_capacity(estimated_capacity);
114
115        // Go over all peptides and their abundances in the frame
116        for (peptide_id, abundance) in peptide_ids.iter().zip(abundances.iter()) {
117            // Single lookup instead of contains_key + get
118            let Some((ion_abundances, scan_occurrences, scan_abundances, _, spectra)) =
119                self.peptide_to_ions.get(peptide_id)
120            else {
121                continue;
122            };
123
124            // Cache peptide-level lookup
125            let total_events = *self.peptide_to_events.get(peptide_id).unwrap();
126
127            for (index, ion_abundance) in ion_abundances.iter().enumerate() {
128                let scan_occurrence = &scan_occurrences[index];
129                let scan_abundance = &scan_abundances[index];
130                let spectrum = &spectra[index];
131
132                for (scan, scan_abu) in scan_occurrence.iter().zip(scan_abundance.iter()) {
133                    let abundance_factor = abundance * ion_abundance * scan_abu * total_events;
134                    let scaled_spec: MzSpectrum = spectrum.clone() * abundance_factor as f64;
135
136                    let mz_spectrum = if mz_noise_precursor {
137                        if uniform {
138                            scaled_spec.add_mz_noise_uniform(precursor_noise_ppm, right_drag)
139                        } else {
140                            scaled_spec.add_mz_noise_normal(precursor_noise_ppm)
141                        }
142                    } else {
143                        scaled_spec
144                    };
145
146                    // Cache scan mobility
147                    let scan_mobility = *self.scan_to_mobility.get(scan).unwrap() as f64;
148                    let spectrum_len = mz_spectrum.mz.len();
149
150                    tims_spectra.push(TimsSpectrum::new(
151                        frame_id as i32,
152                        *scan as i32,
153                        rt,
154                        scan_mobility,
155                        ms_type.clone(),
156                        IndexedMzSpectrum::from_mz_spectrum(
157                            vec![0; spectrum_len],
158                            mz_spectrum,
159                        ),
160                    ));
161                }
162            }
163        }
164
165        let tims_frame = TimsFrame::from_tims_spectra(tims_spectra);
166        tims_frame.filter_ranged(0.0, 10000.0, 0, 2000, 0.0, 10.0, 1.0, 1e9, 0, i32::MAX)
167    }
168
169    /// Build a collection of precursor frames in parallel
170    ///
171    /// # Arguments
172    ///
173    /// * `frame_ids` - A vector of u32 representing the frame ids
174    /// * `num_threads` - A usize representing the number of threads
175    ///
176    /// # Returns
177    ///
178    /// * A vector of TimsFrame instances
179    ///
180    pub fn build_precursor_frames(
181        &self,
182        frame_ids: Vec<u32>,
183        mz_noise_precursor: bool,
184        uniform: bool,
185        precursor_noise_ppm: f64,
186        right_drag: bool,
187        num_threads: usize,
188    ) -> Vec<TimsFrame> {
189        let pool = rayon::ThreadPoolBuilder::new()
190            .num_threads(num_threads)
191            .build()
192            .unwrap();
193
194        pool.install(|| {
195            // Use indexed parallel iteration to maintain order, avoiding post-sort
196            let mut tims_frames: Vec<TimsFrame> = Vec::with_capacity(frame_ids.len());
197            unsafe { tims_frames.set_len(frame_ids.len()); }
198
199            frame_ids.par_iter().enumerate().for_each(|(idx, frame_id)| {
200                let frame = self.build_precursor_frame(
201                    *frame_id,
202                    mz_noise_precursor,
203                    uniform,
204                    precursor_noise_ppm,
205                    right_drag,
206                );
207                unsafe {
208                    let ptr = tims_frames.as_ptr() as *mut TimsFrame;
209                    std::ptr::write(ptr.add(idx), frame);
210                }
211            });
212
213            tims_frames
214        })
215    }
216
217    pub fn build_precursor_frame_annotated(
218        &self,
219        frame_id: u32,
220        mz_noise_precursor: bool,
221        uniform: bool,
222        precursor_noise_ppm: f64,
223        right_drag: bool,
224    ) -> TimsFrameAnnotated {
225        // Cache frame-level lookups
226        let ms_type = if self.precursor_frame_id_set.contains(&frame_id) {
227            MsType::Precursor
228        } else {
229            MsType::Unknown
230        };
231        let rt = *self.frame_to_rt.get(&frame_id).unwrap_or(&0.0) as f64;
232
233        // Single lookup instead of contains_key + get
234        let Some((peptide_ids, abundances)) = self.frame_to_abundances.get(&frame_id) else {
235            return TimsFrameAnnotated::new(frame_id as i32, rt, ms_type, vec![], vec![], vec![], vec![], vec![], vec![]);
236        };
237
238        // Preallocate with estimated capacity
239        let estimated_capacity = peptide_ids.len() * 4;
240        let mut tims_spectra: Vec<TimsSpectrumAnnotated> = Vec::with_capacity(estimated_capacity);
241
242        for (peptide_id, abundance) in peptide_ids.iter().zip(abundances.iter()) {
243            // Single lookup
244            let Some((ion_abundances, scan_occurrences, scan_abundances, charges, _)) =
245                self.peptide_to_ions.get(peptide_id)
246            else {
247                continue;
248            };
249
250            // Cache peptide-level lookups
251            let total_events = *self.peptide_to_events.get(peptide_id).unwrap();
252            let peptide = self.peptides.get(peptide_id).unwrap();
253
254            for (index, ion_abundance) in ion_abundances.iter().enumerate() {
255                let scan_occurrence = &scan_occurrences[index];
256                let scan_abundance = &scan_abundances[index];
257                let charge = charges[index];
258
259                let ion = PeptideIon::new(
260                    peptide.sequence.sequence.clone(),
261                    charge as i32,
262                    *ion_abundance as f64,
263                    Some(*peptide_id as i32),
264                );
265                // TODO: make this configurable
266                let spectrum = ion.calculate_isotopic_spectrum_annotated(1e-3, 1e-8, 200, 1e-4);
267
268                for (scan, scan_abu) in scan_occurrence.iter().zip(scan_abundance.iter()) {
269                    let abundance_factor = abundance * ion_abundance * scan_abu * total_events;
270                    let scaled_spec: MzSpectrumAnnotated = spectrum.clone() * abundance_factor as f64;
271
272                    let mz_spectrum = if mz_noise_precursor {
273                        if uniform {
274                            scaled_spec.add_mz_noise_uniform(precursor_noise_ppm, right_drag)
275                        } else {
276                            scaled_spec.add_mz_noise_normal(precursor_noise_ppm)
277                        }
278                    } else {
279                        scaled_spec
280                    };
281
282                    // Cache scan mobility
283                    let scan_mobility = *self.scan_to_mobility.get(scan).unwrap() as f64;
284                    let spectrum_len = mz_spectrum.mz.len();
285
286                    tims_spectra.push(TimsSpectrumAnnotated::new(
287                        frame_id as i32,
288                        *scan,
289                        rt,
290                        scan_mobility,
291                        ms_type.clone(),
292                        vec![0; spectrum_len],
293                        mz_spectrum,
294                    ));
295                }
296            }
297        }
298
299        let tims_frame = TimsFrameAnnotated::from_tims_spectra_annotated(tims_spectra);
300        let filtered_frame = tims_frame.filter_ranged(0.0, 2000.0, 0.0, 2.0, 0, 1000, 1.0, 1e9);
301
302        TimsFrameAnnotated {
303            frame_id: filtered_frame.frame_id,
304            retention_time: filtered_frame.retention_time,
305            ms_type: filtered_frame.ms_type,
306            tof: filtered_frame.tof,
307            mz: filtered_frame.mz,
308            scan: filtered_frame.scan,
309            inv_mobility: filtered_frame.inv_mobility,
310            intensity: filtered_frame.intensity,
311            annotations: filtered_frame
312                .annotations
313                .into_iter()
314                .map(|mut x| {
315                    x.contributions.sort_by(|a, b| {
316                        a.intensity_contribution
317                            .partial_cmp(&b.intensity_contribution)
318                            .unwrap()
319                    });
320                    x
321                })
322                .collect(),
323        }
324    }
325
326    pub fn build_precursor_frames_annotated(
327        &self,
328        frame_ids: Vec<u32>,
329        mz_noise_precursor: bool,
330        uniform: bool,
331        precursor_noise_ppm: f64,
332        right_drag: bool,
333        num_threads: usize,
334    ) -> Vec<TimsFrameAnnotated> {
335        let pool = rayon::ThreadPoolBuilder::new()
336            .num_threads(num_threads)
337            .build()
338            .unwrap();
339
340        pool.install(|| {
341            // Use indexed parallel iteration to maintain order, avoiding post-sort
342            let mut tims_frames: Vec<TimsFrameAnnotated> = Vec::with_capacity(frame_ids.len());
343            unsafe { tims_frames.set_len(frame_ids.len()); }
344
345            frame_ids.par_iter().enumerate().for_each(|(idx, frame_id)| {
346                let frame = self.build_precursor_frame_annotated(
347                    *frame_id,
348                    mz_noise_precursor,
349                    uniform,
350                    precursor_noise_ppm,
351                    right_drag,
352                );
353                unsafe {
354                    let ptr = tims_frames.as_ptr() as *mut TimsFrameAnnotated;
355                    std::ptr::write(ptr.add(idx), frame);
356                }
357            });
358
359            tims_frames
360        })
361    }
362}