rustdf/sim/
lazy_builder.rs

1//! Lazy frame builders for DIA and DDA synthetic experiments.
2//!
3//! This module provides `TimsTofLazyFrameBuilderDIA` and `TimsTofLazyFrameBuilderDDA`,
4//! memory-efficient alternatives to their non-lazy counterparts that only load
5//! peptide/ion data for the frames being built rather than loading everything upfront.
6
7use mscore::data::peptide::PeptideProductIonSeriesCollection;
8use mscore::data::spectrum::{IndexedMzSpectrum, MsType, MzSpectrum};
9use mscore::timstof::collision::{TimsTofCollisionEnergy, TimsTofCollisionEnergyDIA};
10use mscore::timstof::frame::TimsFrame;
11use mscore::timstof::quadrupole::{IonTransmission, TimsTransmissionDDA, TimsTransmissionDIA};
12use mscore::timstof::spectrum::TimsSpectrum;
13use std::collections::{BTreeMap, HashSet};
14use std::path::Path;
15use std::sync::Arc;
16
17use rayon::prelude::*;
18
19use crate::sim::containers::{FragmentIonSim, FramesSim, IonSim, PeptidesSim, ScansSim};
20use crate::sim::handle::TimsTofSyntheticsDataHandle;
21
22/// A lazy frame builder for DIA experiments that only loads data as needed.
23///
24/// Unlike `TimsTofSyntheticsFrameBuilderDIA`, this struct does not load all peptides,
25/// ions, and fragment ions into memory at construction time. Instead, it stores only
26/// the static metadata (frame info, scan info, transmission settings) and loads
27/// peptide/ion data on-demand for each batch of frames being built.
28///
29/// This can significantly reduce memory usage for large simulations.
30pub struct TimsTofLazyFrameBuilderDIA {
31    /// Path to the SQLite database
32    pub db_path: String,
33    /// Frame metadata (id, time, ms_type)
34    pub frames: Vec<FramesSim>,
35    /// Scan metadata (scan_id, mobility)
36    pub scans: Vec<ScansSim>,
37    /// Set of precursor frame IDs for quick lookup
38    pub precursor_frame_id_set: HashSet<u32>,
39    /// Map from frame_id to retention_time
40    pub frame_to_rt: BTreeMap<u32, f32>,
41    /// Map from scan_id to mobility
42    pub scan_to_mobility: BTreeMap<u32, f32>,
43    /// DIA transmission settings
44    pub transmission_settings: TimsTransmissionDIA,
45    /// DIA fragmentation/collision energy settings
46    pub fragmentation_settings: TimsTofCollisionEnergyDIA,
47    /// Number of threads for parallel processing
48    pub num_threads: usize,
49}
50
51impl TimsTofLazyFrameBuilderDIA {
52    /// Create a new lazy frame builder.
53    ///
54    /// Only loads static metadata (frames, scans, transmission settings).
55    /// Peptides, ions, and fragment ions are NOT loaded here.
56    ///
57    /// # Arguments
58    ///
59    /// * `path` - Path to the SQLite database
60    /// * `num_threads` - Number of threads for parallel operations
61    ///
62    /// # Returns
63    ///
64    /// Result containing the lazy frame builder
65    pub fn new(path: &Path, num_threads: usize) -> rusqlite::Result<Self> {
66        let handle = TimsTofSyntheticsDataHandle::new(path)?;
67
68        let frames = handle.read_frames()?;
69        let scans = handle.read_scans()?;
70
71        let precursor_frame_id_set = TimsTofSyntheticsDataHandle::build_precursor_frame_id_set(&frames);
72        let frame_to_rt = TimsTofSyntheticsDataHandle::build_frame_to_rt(&frames);
73        let scan_to_mobility = TimsTofSyntheticsDataHandle::build_scan_to_mobility(&scans);
74
75        let transmission_settings = handle.get_transmission_dia();
76        let fragmentation_settings = handle.get_collision_energy_dia();
77
78        Ok(Self {
79            db_path: path.to_str().unwrap().to_string(),
80            frames,
81            scans,
82            precursor_frame_id_set,
83            frame_to_rt,
84            scan_to_mobility,
85            transmission_settings,
86            fragmentation_settings,
87            num_threads,
88        })
89    }
90
91    /// Load data for a specific frame range from the database.
92    ///
93    /// Returns peptides, ions, and fragment ions that are relevant to the frame range.
94    fn load_data_for_frame_range(
95        &self,
96        frame_min: u32,
97        frame_max: u32,
98    ) -> rusqlite::Result<(Vec<PeptidesSim>, Vec<IonSim>, Vec<FragmentIonSim>)> {
99        let path = Path::new(&self.db_path);
100        let handle = TimsTofSyntheticsDataHandle::new(path)?;
101
102        // Load only peptides for this frame range
103        let peptides = handle.read_peptides_for_frame_range(frame_min, frame_max)?;
104
105        if peptides.is_empty() {
106            return Ok((Vec::new(), Vec::new(), Vec::new()));
107        }
108
109        // Get peptide IDs for querying related data
110        let peptide_ids: Vec<u32> = peptides.iter().map(|p| p.peptide_id).collect();
111
112        // Load ions and fragment ions for these peptides
113        let ions = handle.read_ions_for_peptides(&peptide_ids)?;
114        let fragment_ions = handle.read_fragment_ions_for_peptides(&peptide_ids)?;
115
116        Ok((peptides, ions, fragment_ions))
117    }
118
119    /// Build frames for a range of frame IDs.
120    ///
121    /// This method loads only the data needed for the specified frames,
122    /// builds the frames, and then releases the loaded data.
123    ///
124    /// # Arguments
125    ///
126    /// * `frame_ids` - Vector of frame IDs to build
127    /// * `fragmentation` - Whether to include fragmentation
128    /// * `mz_noise_precursor` - Whether to add m/z noise to precursor ions
129    /// * `uniform` - Whether to use uniform noise distribution
130    /// * `precursor_noise_ppm` - Precursor noise in ppm
131    /// * `mz_noise_fragment` - Whether to add m/z noise to fragment ions
132    /// * `fragment_noise_ppm` - Fragment noise in ppm
133    /// * `right_drag` - Whether to use right drag for noise
134    ///
135    /// # Returns
136    ///
137    /// Vector of built TimsFrame instances
138    pub fn build_frames_lazy(
139        &self,
140        frame_ids: Vec<u32>,
141        fragmentation: bool,
142        mz_noise_precursor: bool,
143        uniform: bool,
144        precursor_noise_ppm: f64,
145        mz_noise_fragment: bool,
146        fragment_noise_ppm: f64,
147        right_drag: bool,
148    ) -> Vec<TimsFrame> {
149        if frame_ids.is_empty() {
150            return Vec::new();
151        }
152
153        // Determine frame range
154        let frame_min = *frame_ids.iter().min().unwrap();
155        let frame_max = *frame_ids.iter().max().unwrap();
156
157        // Load data for this frame range
158        let (peptides, ions, fragment_ions) = match self.load_data_for_frame_range(frame_min, frame_max) {
159            Ok(data) => data,
160            Err(_) => return Vec::new(),
161        };
162
163        // Build lookup maps
164        let peptide_map = TimsTofSyntheticsDataHandle::build_peptide_map(&peptides);
165        let peptide_to_ions = TimsTofSyntheticsDataHandle::build_peptide_to_ions(&ions);
166        let frame_to_abundances = TimsTofSyntheticsDataHandle::build_frame_to_abundances(&peptides);
167        let peptide_to_events = TimsTofSyntheticsDataHandle::build_peptide_to_events(&peptides);
168
169        // Build fragment ions map if fragmentation is enabled
170        let fragment_ions_map = if fragmentation {
171            Some(TimsTofSyntheticsDataHandle::build_fragment_ions(
172                &peptide_map,
173                &fragment_ions,
174                self.num_threads,
175            ))
176        } else {
177            None
178        };
179
180        // Build frames in parallel using indexed iteration to maintain order
181        let pool = rayon::ThreadPoolBuilder::new()
182            .num_threads(self.num_threads)
183            .build()
184            .unwrap();
185
186        pool.install(|| {
187            let mut tims_frames: Vec<TimsFrame> = Vec::with_capacity(frame_ids.len());
188            unsafe { tims_frames.set_len(frame_ids.len()); }
189
190            frame_ids.par_iter().enumerate().for_each(|(idx, frame_id)| {
191                let frame = self.build_single_frame(
192                    *frame_id,
193                    fragmentation,
194                    mz_noise_precursor,
195                    uniform,
196                    precursor_noise_ppm,
197                    mz_noise_fragment,
198                    fragment_noise_ppm,
199                    right_drag,
200                    &peptide_map,
201                    &peptide_to_ions,
202                    &frame_to_abundances,
203                    &peptide_to_events,
204                    &fragment_ions_map,
205                );
206                unsafe {
207                    let ptr = tims_frames.as_ptr() as *mut TimsFrame;
208                    std::ptr::write(ptr.add(idx), frame);
209                }
210            });
211
212            tims_frames
213        })
214    }
215
216    /// Build a single frame with provided data maps.
217    #[allow(clippy::too_many_arguments)]
218    fn build_single_frame(
219        &self,
220        frame_id: u32,
221        fragmentation: bool,
222        mz_noise_precursor: bool,
223        uniform: bool,
224        precursor_noise_ppm: f64,
225        mz_noise_fragment: bool,
226        fragment_noise_ppm: f64,
227        right_drag: bool,
228        _peptide_map: &BTreeMap<u32, PeptidesSim>,
229        peptide_to_ions: &BTreeMap<u32, (Vec<f32>, Vec<Vec<u32>>, Vec<Vec<f32>>, Vec<i8>, Vec<MzSpectrum>)>,
230        frame_to_abundances: &BTreeMap<u32, (Vec<u32>, Vec<f32>)>,
231        peptide_to_events: &BTreeMap<u32, f32>,
232        fragment_ions_map: &Option<BTreeMap<(u32, i8, i32), (PeptideProductIonSeriesCollection, Vec<MzSpectrum>)>>,
233    ) -> TimsFrame {
234        // Determine if this is a precursor or fragment frame
235        let is_precursor = self.precursor_frame_id_set.contains(&frame_id);
236
237        if is_precursor {
238            self.build_precursor_frame(
239                frame_id,
240                mz_noise_precursor,
241                uniform,
242                precursor_noise_ppm,
243                right_drag,
244                peptide_to_ions,
245                frame_to_abundances,
246                peptide_to_events,
247            )
248        } else {
249            self.build_fragment_frame(
250                frame_id,
251                fragmentation,
252                mz_noise_fragment,
253                uniform,
254                fragment_noise_ppm,
255                right_drag,
256                peptide_to_ions,
257                frame_to_abundances,
258                peptide_to_events,
259                fragment_ions_map,
260            )
261        }
262    }
263
264    /// Build a precursor (MS1) frame.
265    #[allow(clippy::too_many_arguments)]
266    fn build_precursor_frame(
267        &self,
268        frame_id: u32,
269        mz_noise_precursor: bool,
270        uniform: bool,
271        precursor_noise_ppm: f64,
272        right_drag: bool,
273        peptide_to_ions: &BTreeMap<u32, (Vec<f32>, Vec<Vec<u32>>, Vec<Vec<f32>>, Vec<i8>, Vec<MzSpectrum>)>,
274        frame_to_abundances: &BTreeMap<u32, (Vec<u32>, Vec<f32>)>,
275        peptide_to_events: &BTreeMap<u32, f32>,
276    ) -> TimsFrame {
277        let ms_type = MsType::Precursor;
278        let rt = *self.frame_to_rt.get(&frame_id).unwrap_or(&0.0) as f64;
279
280        // Single lookup instead of contains_key + get
281        let Some((peptide_ids, abundances)) = frame_to_abundances.get(&frame_id) else {
282            return TimsFrame::new(frame_id as i32, ms_type, rt, vec![], vec![], vec![], vec![], vec![]);
283        };
284
285        // Preallocate with estimated capacity
286        let estimated_capacity = peptide_ids.len() * 4;
287        let mut tims_spectra: Vec<TimsSpectrum> = Vec::with_capacity(estimated_capacity);
288
289        for (peptide_id, abundance) in peptide_ids.iter().zip(abundances.iter()) {
290            let Some((ion_abundances, scan_occurrences, scan_abundances, _, spectra)) =
291                peptide_to_ions.get(peptide_id)
292            else {
293                continue;
294            };
295
296            // Cache peptide-level lookup
297            let total_events = *peptide_to_events.get(peptide_id).unwrap_or(&1.0);
298
299            for (index, ion_abundance) in ion_abundances.iter().enumerate() {
300                let scan_occurrence = &scan_occurrences[index];
301                let scan_abundance = &scan_abundances[index];
302                let spectrum = &spectra[index];
303
304                for (scan, scan_abu) in scan_occurrence.iter().zip(scan_abundance.iter()) {
305                    let abundance_factor = abundance * ion_abundance * scan_abu * total_events;
306                    let scaled_spec: MzSpectrum = spectrum.clone() * abundance_factor as f64;
307
308                    let mz_spectrum = if mz_noise_precursor {
309                        if uniform {
310                            scaled_spec.add_mz_noise_uniform(precursor_noise_ppm, right_drag)
311                        } else {
312                            scaled_spec.add_mz_noise_normal(precursor_noise_ppm)
313                        }
314                    } else {
315                        scaled_spec
316                    };
317
318                    let scan_mobility = *self.scan_to_mobility.get(scan).unwrap_or(&0.0) as f64;
319                    let spectrum_len = mz_spectrum.mz.len();
320
321                    tims_spectra.push(TimsSpectrum::new(
322                        frame_id as i32,
323                        *scan as i32,
324                        rt,
325                        scan_mobility,
326                        ms_type.clone(),
327                        IndexedMzSpectrum::from_mz_spectrum(
328                            vec![0; spectrum_len],
329                            mz_spectrum,
330                        ),
331                    ));
332                }
333            }
334        }
335
336        let mut filtered = TimsFrame::from_tims_spectra_filtered(
337            tims_spectra, 0.0, 10000.0, 0, 2000, 0.0, 10.0, 1.0, 1e9,
338        );
339
340        // Round intensities
341        let intensities_rounded: Vec<f64> = filtered
342            .ims_frame
343            .intensity
344            .iter()
345            .map(|x| x.round())
346            .collect();
347        filtered.ims_frame.intensity = Arc::new(intensities_rounded);
348
349        filtered
350    }
351
352    /// Build a fragment (MS2) frame.
353    #[allow(clippy::too_many_arguments)]
354    fn build_fragment_frame(
355        &self,
356        frame_id: u32,
357        fragmentation: bool,
358        mz_noise_fragment: bool,
359        uniform: bool,
360        fragment_noise_ppm: f64,
361        right_drag: bool,
362        peptide_to_ions: &BTreeMap<u32, (Vec<f32>, Vec<Vec<u32>>, Vec<Vec<f32>>, Vec<i8>, Vec<MzSpectrum>)>,
363        frame_to_abundances: &BTreeMap<u32, (Vec<u32>, Vec<f32>)>,
364        peptide_to_events: &BTreeMap<u32, f32>,
365        fragment_ions_map: &Option<BTreeMap<(u32, i8, i32), (PeptideProductIonSeriesCollection, Vec<MzSpectrum>)>>,
366    ) -> TimsFrame {
367        let ms_type = MsType::FragmentDia;
368        let rt = *self.frame_to_rt.get(&frame_id).unwrap_or(&0.0) as f64;
369
370        if !fragmentation || fragment_ions_map.is_none() {
371            // If no fragmentation, build a quadrupole-filtered precursor frame
372            let precursor_frame = self.build_precursor_frame(
373                frame_id,
374                mz_noise_fragment,
375                uniform,
376                fragment_noise_ppm,
377                right_drag,
378                peptide_to_ions,
379                frame_to_abundances,
380                peptide_to_events,
381            );
382            let mut frame = self.transmission_settings.transmit_tims_frame(&precursor_frame, None);
383            frame.ms_type = MsType::FragmentDia;
384            return frame;
385        }
386
387        let fragment_ions = fragment_ions_map.as_ref().unwrap();
388
389        // Single lookup instead of contains_key + get
390        let Some((peptide_ids, frame_abundances)) = frame_to_abundances.get(&frame_id) else {
391            return TimsFrame::new(frame_id as i32, ms_type, rt, vec![], vec![], vec![], vec![], vec![]);
392        };
393
394        // Preallocate with estimated capacity
395        let estimated_capacity = peptide_ids.len() * 4;
396        let mut tims_spectra: Vec<TimsSpectrum> = Vec::with_capacity(estimated_capacity);
397
398        for (peptide_id, frame_abundance) in peptide_ids.iter().zip(frame_abundances.iter()) {
399            let Some((ion_abundances, scan_occurrences, scan_abundances, charges, spectra)) =
400                peptide_to_ions.get(peptide_id)
401            else {
402                continue;
403            };
404
405            // Cache peptide-level lookup
406            let total_events = *peptide_to_events.get(peptide_id).unwrap_or(&1.0);
407
408            for (index, ion_abundance) in ion_abundances.iter().enumerate() {
409                let all_scan_occurrence = &scan_occurrences[index];
410                let all_scan_abundance = &scan_abundances[index];
411                let spectrum = &spectra[index];
412                let charge_state = charges[index];
413
414                for (scan, scan_abundance) in all_scan_occurrence.iter().zip(all_scan_abundance.iter()) {
415                    // Check if precursor is transmitted
416                    if !self.transmission_settings.any_transmitted(
417                        frame_id as i32,
418                        *scan as i32,
419                        &spectrum.mz,
420                        None,
421                    ) {
422                        continue;
423                    }
424
425                    // Calculate abundance factor
426                    let fraction_events = frame_abundance * scan_abundance * ion_abundance * total_events;
427
428                    // Get collision energy
429                    let collision_energy = self.fragmentation_settings.get_collision_energy(
430                        frame_id as i32,
431                        *scan as i32,
432                    );
433                    let collision_energy_quantized = (collision_energy * 1e1).round() as i32;
434
435                    // Single lookup with let-else
436                    let Some((_, fragment_series_vec)) = fragment_ions.get(&(*peptide_id, charge_state, collision_energy_quantized)) else {
437                        continue;
438                    };
439
440                    // Cache scan mobility
441                    let scan_mobility = *self.scan_to_mobility.get(scan).unwrap_or(&0.0) as f64;
442
443                    for fragment_ion_series in fragment_series_vec.iter() {
444                        let scaled_spec = fragment_ion_series.clone() * fraction_events as f64;
445
446                        let mz_spectrum = if mz_noise_fragment {
447                            if uniform {
448                                scaled_spec.add_mz_noise_uniform(fragment_noise_ppm, right_drag)
449                            } else {
450                                scaled_spec.add_mz_noise_normal(fragment_noise_ppm)
451                            }
452                        } else {
453                            scaled_spec
454                        };
455
456                        let spectrum_len = mz_spectrum.mz.len();
457                        tims_spectra.push(TimsSpectrum::new(
458                            frame_id as i32,
459                            *scan as i32,
460                            rt,
461                            scan_mobility,
462                            ms_type.clone(),
463                            IndexedMzSpectrum::from_mz_spectrum(
464                                vec![0; spectrum_len],
465                                mz_spectrum,
466                            ).filter_ranged(100.0, 1700.0, 1.0, 1e9),
467                        ));
468                    }
469                }
470            }
471        }
472
473        if tims_spectra.is_empty() {
474            return TimsFrame::new(frame_id as i32, ms_type, rt, vec![], vec![], vec![], vec![], vec![]);
475        }
476
477        let mut filtered = TimsFrame::from_tims_spectra_filtered(
478            tims_spectra, 100.0, 1700.0, 0, 1000, 0.0, 10.0, 1.0, 1e9,
479        );
480
481        // Round intensities
482        let intensities_rounded: Vec<f64> = filtered
483            .ims_frame
484            .intensity
485            .iter()
486            .map(|x| x.round())
487            .collect();
488        filtered.ims_frame.intensity = Arc::new(intensities_rounded);
489
490        filtered
491    }
492
493    /// Get the total number of frames.
494    pub fn num_frames(&self) -> usize {
495        self.frames.len()
496    }
497
498    /// Get all frame IDs.
499    pub fn frame_ids(&self) -> Vec<u32> {
500        self.frames.iter().map(|f| f.frame_id).collect()
501    }
502
503    /// Get precursor frame IDs.
504    pub fn precursor_frame_ids(&self) -> Vec<u32> {
505        self.precursor_frame_id_set.iter().cloned().collect()
506    }
507
508    /// Get fragment frame IDs.
509    pub fn fragment_frame_ids(&self) -> Vec<u32> {
510        self.frames
511            .iter()
512            .filter(|f| !self.precursor_frame_id_set.contains(&f.frame_id))
513            .map(|f| f.frame_id)
514            .collect()
515    }
516}
517
518impl TimsTofCollisionEnergy for TimsTofLazyFrameBuilderDIA {
519    fn get_collision_energy(&self, frame_id: i32, scan_id: i32) -> f64 {
520        self.fragmentation_settings.get_collision_energy(frame_id, scan_id)
521    }
522}
523
524/// A lazy frame builder for DDA experiments that only loads data as needed.
525///
526/// Unlike `TimsTofSyntheticsFrameBuilderDDA`, this struct does not load all peptides,
527/// ions, and fragment ions into memory at construction time. Instead, it stores only
528/// the static metadata (frame info, scan info, transmission settings) and loads
529/// peptide/ion data on-demand for each batch of frames being built.
530///
531/// This can significantly reduce memory usage for large simulations.
532pub struct TimsTofLazyFrameBuilderDDA {
533    /// Path to the SQLite database
534    pub db_path: String,
535    /// Frame metadata (id, time, ms_type)
536    pub frames: Vec<FramesSim>,
537    /// Scan metadata (scan_id, mobility)
538    pub scans: Vec<ScansSim>,
539    /// Set of precursor frame IDs for quick lookup
540    pub precursor_frame_id_set: HashSet<u32>,
541    /// Map from frame_id to retention_time
542    pub frame_to_rt: BTreeMap<u32, f32>,
543    /// Map from scan_id to mobility
544    pub scan_to_mobility: BTreeMap<u32, f32>,
545    /// DDA transmission settings (includes PASEF metadata with collision energies)
546    pub transmission_settings: TimsTransmissionDDA,
547    /// Number of threads for parallel processing
548    pub num_threads: usize,
549}
550
551impl TimsTofLazyFrameBuilderDDA {
552    /// Create a new lazy frame builder for DDA.
553    ///
554    /// Only loads static metadata (frames, scans, transmission settings).
555    /// Peptides, ions, and fragment ions are NOT loaded here.
556    ///
557    /// # Arguments
558    ///
559    /// * `path` - Path to the SQLite database
560    /// * `num_threads` - Number of threads for parallel operations
561    ///
562    /// # Returns
563    ///
564    /// Result containing the lazy frame builder
565    pub fn new(path: &Path, num_threads: usize) -> rusqlite::Result<Self> {
566        let handle = TimsTofSyntheticsDataHandle::new(path)?;
567
568        let frames = handle.read_frames()?;
569        let scans = handle.read_scans()?;
570
571        let precursor_frame_id_set = TimsTofSyntheticsDataHandle::build_precursor_frame_id_set(&frames);
572        let frame_to_rt = TimsTofSyntheticsDataHandle::build_frame_to_rt(&frames);
573        let scan_to_mobility = TimsTofSyntheticsDataHandle::build_scan_to_mobility(&scans);
574
575        let transmission_settings = handle.get_transmission_dda();
576
577        Ok(Self {
578            db_path: path.to_str().unwrap().to_string(),
579            frames,
580            scans,
581            precursor_frame_id_set,
582            frame_to_rt,
583            scan_to_mobility,
584            transmission_settings,
585            num_threads,
586        })
587    }
588
589    /// Load data for a specific frame range from the database.
590    ///
591    /// Returns peptides, ions, and fragment ions that are relevant to the frame range.
592    fn load_data_for_frame_range(
593        &self,
594        frame_min: u32,
595        frame_max: u32,
596    ) -> rusqlite::Result<(Vec<PeptidesSim>, Vec<IonSim>, Vec<FragmentIonSim>)> {
597        let path = Path::new(&self.db_path);
598        let handle = TimsTofSyntheticsDataHandle::new(path)?;
599
600        // Load only peptides for this frame range
601        let peptides = handle.read_peptides_for_frame_range(frame_min, frame_max)?;
602
603        if peptides.is_empty() {
604            return Ok((Vec::new(), Vec::new(), Vec::new()));
605        }
606
607        // Get peptide IDs for querying related data
608        let peptide_ids: Vec<u32> = peptides.iter().map(|p| p.peptide_id).collect();
609
610        // Load ions and fragment ions for these peptides
611        let ions = handle.read_ions_for_peptides(&peptide_ids)?;
612        let fragment_ions = handle.read_fragment_ions_for_peptides(&peptide_ids)?;
613
614        Ok((peptides, ions, fragment_ions))
615    }
616
617    /// Build frames for a range of frame IDs.
618    ///
619    /// This method loads only the data needed for the specified frames,
620    /// builds the frames, and then releases the loaded data.
621    ///
622    /// # Arguments
623    ///
624    /// * `frame_ids` - Vector of frame IDs to build
625    /// * `fragmentation` - Whether to include fragmentation
626    /// * `mz_noise_precursor` - Whether to add m/z noise to precursor ions
627    /// * `uniform` - Whether to use uniform noise distribution
628    /// * `precursor_noise_ppm` - Precursor noise in ppm
629    /// * `mz_noise_fragment` - Whether to add m/z noise to fragment ions
630    /// * `fragment_noise_ppm` - Fragment noise in ppm
631    /// * `right_drag` - Whether to use right drag for noise
632    ///
633    /// # Returns
634    ///
635    /// Vector of built TimsFrame instances
636    pub fn build_frames_lazy(
637        &self,
638        frame_ids: Vec<u32>,
639        fragmentation: bool,
640        mz_noise_precursor: bool,
641        uniform: bool,
642        precursor_noise_ppm: f64,
643        mz_noise_fragment: bool,
644        fragment_noise_ppm: f64,
645        right_drag: bool,
646    ) -> Vec<TimsFrame> {
647        if frame_ids.is_empty() {
648            return Vec::new();
649        }
650
651        // Determine frame range
652        let frame_min = *frame_ids.iter().min().unwrap();
653        let frame_max = *frame_ids.iter().max().unwrap();
654
655        // Load data for this frame range
656        let (peptides, ions, fragment_ions) = match self.load_data_for_frame_range(frame_min, frame_max) {
657            Ok(data) => data,
658            Err(_) => return Vec::new(),
659        };
660
661        // Build lookup maps
662        let peptide_map = TimsTofSyntheticsDataHandle::build_peptide_map(&peptides);
663        let peptide_to_ions = TimsTofSyntheticsDataHandle::build_peptide_to_ions(&ions);
664        let frame_to_abundances = TimsTofSyntheticsDataHandle::build_frame_to_abundances(&peptides);
665        let peptide_to_events = TimsTofSyntheticsDataHandle::build_peptide_to_events(&peptides);
666
667        // Build fragment ions map if fragmentation is enabled
668        let fragment_ions_map = if fragmentation {
669            Some(TimsTofSyntheticsDataHandle::build_fragment_ions(
670                &peptide_map,
671                &fragment_ions,
672                self.num_threads,
673            ))
674        } else {
675            None
676        };
677
678        // Build frames in parallel using indexed iteration to maintain order
679        let pool = rayon::ThreadPoolBuilder::new()
680            .num_threads(self.num_threads)
681            .build()
682            .unwrap();
683
684        pool.install(|| {
685            let mut tims_frames: Vec<TimsFrame> = Vec::with_capacity(frame_ids.len());
686            unsafe { tims_frames.set_len(frame_ids.len()); }
687
688            frame_ids.par_iter().enumerate().for_each(|(idx, frame_id)| {
689                let frame = self.build_single_frame(
690                    *frame_id,
691                    fragmentation,
692                    mz_noise_precursor,
693                    uniform,
694                    precursor_noise_ppm,
695                    mz_noise_fragment,
696                    fragment_noise_ppm,
697                    right_drag,
698                    &peptide_to_ions,
699                    &frame_to_abundances,
700                    &peptide_to_events,
701                    &fragment_ions_map,
702                );
703                unsafe {
704                    let ptr = tims_frames.as_ptr() as *mut TimsFrame;
705                    std::ptr::write(ptr.add(idx), frame);
706                }
707            });
708
709            tims_frames
710        })
711    }
712
713    /// Build a single frame with provided data maps.
714    #[allow(clippy::too_many_arguments)]
715    fn build_single_frame(
716        &self,
717        frame_id: u32,
718        fragmentation: bool,
719        mz_noise_precursor: bool,
720        uniform: bool,
721        precursor_noise_ppm: f64,
722        mz_noise_fragment: bool,
723        fragment_noise_ppm: f64,
724        right_drag: bool,
725        peptide_to_ions: &BTreeMap<u32, (Vec<f32>, Vec<Vec<u32>>, Vec<Vec<f32>>, Vec<i8>, Vec<MzSpectrum>)>,
726        frame_to_abundances: &BTreeMap<u32, (Vec<u32>, Vec<f32>)>,
727        peptide_to_events: &BTreeMap<u32, f32>,
728        fragment_ions_map: &Option<BTreeMap<(u32, i8, i32), (PeptideProductIonSeriesCollection, Vec<MzSpectrum>)>>,
729    ) -> TimsFrame {
730        // Determine if this is a precursor or fragment frame
731        let is_precursor = self.precursor_frame_id_set.contains(&frame_id);
732
733        if is_precursor {
734            self.build_precursor_frame(
735                frame_id,
736                mz_noise_precursor,
737                uniform,
738                precursor_noise_ppm,
739                right_drag,
740                peptide_to_ions,
741                frame_to_abundances,
742                peptide_to_events,
743            )
744        } else {
745            self.build_fragment_frame(
746                frame_id,
747                fragmentation,
748                mz_noise_fragment,
749                uniform,
750                fragment_noise_ppm,
751                right_drag,
752                peptide_to_ions,
753                frame_to_abundances,
754                peptide_to_events,
755                fragment_ions_map,
756            )
757        }
758    }
759
760    /// Build a precursor (MS1) frame.
761    #[allow(clippy::too_many_arguments)]
762    fn build_precursor_frame(
763        &self,
764        frame_id: u32,
765        mz_noise_precursor: bool,
766        uniform: bool,
767        precursor_noise_ppm: f64,
768        right_drag: bool,
769        peptide_to_ions: &BTreeMap<u32, (Vec<f32>, Vec<Vec<u32>>, Vec<Vec<f32>>, Vec<i8>, Vec<MzSpectrum>)>,
770        frame_to_abundances: &BTreeMap<u32, (Vec<u32>, Vec<f32>)>,
771        peptide_to_events: &BTreeMap<u32, f32>,
772    ) -> TimsFrame {
773        let ms_type = MsType::Precursor;
774        let rt = *self.frame_to_rt.get(&frame_id).unwrap_or(&0.0) as f64;
775
776        // Single lookup instead of contains_key + get
777        let Some((peptide_ids, abundances)) = frame_to_abundances.get(&frame_id) else {
778            return TimsFrame::new(frame_id as i32, ms_type, rt, vec![], vec![], vec![], vec![], vec![]);
779        };
780
781        // Preallocate with estimated capacity
782        let estimated_capacity = peptide_ids.len() * 4;
783        let mut tims_spectra: Vec<TimsSpectrum> = Vec::with_capacity(estimated_capacity);
784
785        for (peptide_id, abundance) in peptide_ids.iter().zip(abundances.iter()) {
786            let Some((ion_abundances, scan_occurrences, scan_abundances, _, spectra)) =
787                peptide_to_ions.get(peptide_id)
788            else {
789                continue;
790            };
791
792            // Cache peptide-level lookup
793            let total_events = *peptide_to_events.get(peptide_id).unwrap_or(&1.0);
794
795            for (index, ion_abundance) in ion_abundances.iter().enumerate() {
796                let scan_occurrence = &scan_occurrences[index];
797                let scan_abundance = &scan_abundances[index];
798                let spectrum = &spectra[index];
799
800                for (scan, scan_abu) in scan_occurrence.iter().zip(scan_abundance.iter()) {
801                    let abundance_factor = abundance * ion_abundance * scan_abu * total_events;
802                    let scaled_spec: MzSpectrum = spectrum.clone() * abundance_factor as f64;
803
804                    let mz_spectrum = if mz_noise_precursor {
805                        if uniform {
806                            scaled_spec.add_mz_noise_uniform(precursor_noise_ppm, right_drag)
807                        } else {
808                            scaled_spec.add_mz_noise_normal(precursor_noise_ppm)
809                        }
810                    } else {
811                        scaled_spec
812                    };
813
814                    let scan_mobility = *self.scan_to_mobility.get(scan).unwrap_or(&0.0) as f64;
815                    let spectrum_len = mz_spectrum.mz.len();
816
817                    tims_spectra.push(TimsSpectrum::new(
818                        frame_id as i32,
819                        *scan as i32,
820                        rt,
821                        scan_mobility,
822                        ms_type.clone(),
823                        IndexedMzSpectrum::from_mz_spectrum(
824                            vec![0; spectrum_len],
825                            mz_spectrum,
826                        ),
827                    ));
828                }
829            }
830        }
831
832        let mut filtered = TimsFrame::from_tims_spectra_filtered(
833            tims_spectra, 0.0, 10000.0, 0, 2000, 0.0, 10.0, 1.0, 1e9,
834        );
835
836        // Round intensities
837        let intensities_rounded: Vec<f64> = filtered
838            .ims_frame
839            .intensity
840            .iter()
841            .map(|x| x.round())
842            .collect();
843        filtered.ims_frame.intensity = Arc::new(intensities_rounded);
844
845        filtered
846    }
847
848    /// Build a fragment (MS2) frame for DDA.
849    #[allow(clippy::too_many_arguments)]
850    fn build_fragment_frame(
851        &self,
852        frame_id: u32,
853        fragmentation: bool,
854        mz_noise_fragment: bool,
855        uniform: bool,
856        fragment_noise_ppm: f64,
857        right_drag: bool,
858        peptide_to_ions: &BTreeMap<u32, (Vec<f32>, Vec<Vec<u32>>, Vec<Vec<f32>>, Vec<i8>, Vec<MzSpectrum>)>,
859        frame_to_abundances: &BTreeMap<u32, (Vec<u32>, Vec<f32>)>,
860        peptide_to_events: &BTreeMap<u32, f32>,
861        fragment_ions_map: &Option<BTreeMap<(u32, i8, i32), (PeptideProductIonSeriesCollection, Vec<MzSpectrum>)>>,
862    ) -> TimsFrame {
863        let ms_type = MsType::FragmentDda;
864        let rt = *self.frame_to_rt.get(&frame_id).unwrap_or(&0.0) as f64;
865
866        // Cache PASEF meta lookup for this frame
867        let pasef_meta = self.transmission_settings.pasef_meta.get(&(frame_id as i32));
868
869        // If no PASEF meta for this frame, return empty or filtered precursor
870        if pasef_meta.is_none() {
871            if !fragmentation || fragment_ions_map.is_none() {
872                // Build quadrupole-filtered precursor frame
873                let precursor_frame = self.build_precursor_frame(
874                    frame_id,
875                    mz_noise_fragment,
876                    uniform,
877                    fragment_noise_ppm,
878                    right_drag,
879                    peptide_to_ions,
880                    frame_to_abundances,
881                    peptide_to_events,
882                );
883                let mut frame = self.transmission_settings.transmit_tims_frame(&precursor_frame, None);
884                frame.ms_type = MsType::FragmentDda;
885                return frame;
886            }
887            return TimsFrame::new(frame_id as i32, ms_type, rt, vec![], vec![], vec![], vec![], vec![]);
888        }
889        let pasef_meta = pasef_meta.unwrap();
890
891        if !fragmentation || fragment_ions_map.is_none() {
892            // Build quadrupole-filtered precursor frame
893            let precursor_frame = self.build_precursor_frame(
894                frame_id,
895                mz_noise_fragment,
896                uniform,
897                fragment_noise_ppm,
898                right_drag,
899                peptide_to_ions,
900                frame_to_abundances,
901                peptide_to_events,
902            );
903            let mut frame = self.transmission_settings.transmit_tims_frame(&precursor_frame, None);
904            frame.ms_type = MsType::FragmentDda;
905            return frame;
906        }
907
908        let fragment_ions = fragment_ions_map.as_ref().unwrap();
909
910        // Single lookup for frame abundances
911        let Some((peptide_ids, frame_abundances)) = frame_to_abundances.get(&frame_id) else {
912            return TimsFrame::new(frame_id as i32, ms_type, rt, vec![], vec![], vec![], vec![], vec![]);
913        };
914
915        // Preallocate with estimated capacity
916        let estimated_capacity = peptide_ids.len() * 4;
917        let mut tims_spectra: Vec<TimsSpectrum> = Vec::with_capacity(estimated_capacity);
918
919        for (peptide_id, frame_abundance) in peptide_ids.iter().zip(frame_abundances.iter()) {
920            let Some((ion_abundances, scan_occurrences, scan_abundances, charges, spectra)) =
921                peptide_to_ions.get(peptide_id)
922            else {
923                continue;
924            };
925
926            // Cache peptide-level lookup
927            let total_events = *peptide_to_events.get(peptide_id).unwrap_or(&1.0);
928
929            for (index, ion_abundance) in ion_abundances.iter().enumerate() {
930                let all_scan_occurrence = &scan_occurrences[index];
931                let all_scan_abundance = &scan_abundances[index];
932                let spectrum = &spectra[index];
933                let charge_state = charges[index];
934
935                for (scan, scan_abundance) in all_scan_occurrence.iter().zip(all_scan_abundance.iter()) {
936                    // Check if precursor is transmitted
937                    if !self.transmission_settings.any_transmitted(
938                        frame_id as i32,
939                        *scan as i32,
940                        &spectrum.mz,
941                        None,
942                    ) {
943                        continue;
944                    }
945
946                    // Calculate abundance factor
947                    let fraction_events = frame_abundance * scan_abundance * ion_abundance * total_events;
948
949                    // Get collision energy from PASEF meta
950                    let collision_energy: f64 = pasef_meta
951                        .iter()
952                        .find(|scan_meta| scan_meta.scan_start <= *scan as i32 && scan_meta.scan_end >= *scan as i32)
953                        .map(|s| s.collision_energy)
954                        .unwrap_or(0.0);
955                    let collision_energy_quantized = (collision_energy * 1e1).round() as i32;
956
957                    // Get fragment ions for this peptide/charge/energy combination
958                    let Some((_, fragment_series_vec)) = fragment_ions.get(&(*peptide_id, charge_state, collision_energy_quantized)) else {
959                        continue;
960                    };
961
962                    // Cache scan mobility
963                    let scan_mobility = *self.scan_to_mobility.get(scan).unwrap_or(&0.0) as f64;
964
965                    for fragment_ion_series in fragment_series_vec.iter() {
966                        let scaled_spec = fragment_ion_series.clone() * fraction_events as f64;
967
968                        let mz_spectrum = if mz_noise_fragment {
969                            if uniform {
970                                scaled_spec.add_mz_noise_uniform(fragment_noise_ppm, right_drag)
971                            } else {
972                                scaled_spec.add_mz_noise_normal(fragment_noise_ppm)
973                            }
974                        } else {
975                            scaled_spec
976                        };
977
978                        let spectrum_len = mz_spectrum.mz.len();
979                        tims_spectra.push(TimsSpectrum::new(
980                            frame_id as i32,
981                            *scan as i32,
982                            rt,
983                            scan_mobility,
984                            ms_type.clone(),
985                            IndexedMzSpectrum::from_mz_spectrum(
986                                vec![0; spectrum_len],
987                                mz_spectrum,
988                            ).filter_ranged(100.0, 1700.0, 1.0, 1e9),
989                        ));
990                    }
991                }
992            }
993        }
994
995        if tims_spectra.is_empty() {
996            return TimsFrame::new(frame_id as i32, ms_type, rt, vec![], vec![], vec![], vec![], vec![]);
997        }
998
999        let mut filtered = TimsFrame::from_tims_spectra_filtered(
1000            tims_spectra, 100.0, 1700.0, 0, 1000, 0.0, 10.0, 1.0, 1e9,
1001        );
1002
1003        // Round intensities
1004        let intensities_rounded: Vec<f64> = filtered
1005            .ims_frame
1006            .intensity
1007            .iter()
1008            .map(|x| x.round())
1009            .collect();
1010        filtered.ims_frame.intensity = Arc::new(intensities_rounded);
1011
1012        filtered
1013    }
1014
1015    /// Get collision energy for a frame/scan combination from PASEF metadata.
1016    pub fn get_collision_energy(&self, frame_id: i32, scan_id: i32) -> f64 {
1017        self.transmission_settings.get_collision_energy(frame_id, scan_id).unwrap_or(0.0)
1018    }
1019
1020    /// Get the total number of frames.
1021    pub fn num_frames(&self) -> usize {
1022        self.frames.len()
1023    }
1024
1025    /// Get all frame IDs.
1026    pub fn frame_ids(&self) -> Vec<u32> {
1027        self.frames.iter().map(|f| f.frame_id).collect()
1028    }
1029
1030    /// Get precursor frame IDs.
1031    pub fn precursor_frame_ids(&self) -> Vec<u32> {
1032        self.precursor_frame_id_set.iter().cloned().collect()
1033    }
1034
1035    /// Get fragment frame IDs.
1036    pub fn fragment_frame_ids(&self) -> Vec<u32> {
1037        self.frames
1038            .iter()
1039            .filter(|f| !self.precursor_frame_id_set.contains(&f.frame_id))
1040            .map(|f| f.frame_id)
1041            .collect()
1042    }
1043}