rustdf/sim/
handle.rs

1use crate::sim::containers::{
2    FragmentIonSim, FrameToWindowGroupSim, FramesSim, IonSim, PeptidesSim, ScansSim,
3    SignalDistribution, WindowGroupSettingsSim,
4};
5use mscore::data::peptide::{FragmentType, PeptideProductIonSeriesCollection, PeptideSequence};
6use mscore::data::spectrum::{MsType, MzSpectrum};
7use mscore::simulation::annotation::MzSpectrumAnnotated;
8use mscore::timstof::collision::{TimsTofCollisionEnergy, TimsTofCollisionEnergyDIA};
9use mscore::timstof::quadrupole::{IonTransmission, PASEFMeta, TimsTransmissionDDA, TimsTransmissionDIA};
10use rayon::prelude::*;
11use rayon::ThreadPoolBuilder;
12use rusqlite::Connection;
13use std::collections::{BTreeMap, BTreeSet, HashSet};
14use std::path::Path;
15
16#[derive(Debug)]
17pub struct TimsTofSyntheticsDataHandle {
18    pub connection: Connection,
19}
20
21impl TimsTofSyntheticsDataHandle {
22    pub fn new(path: &Path) -> rusqlite::Result<Self> {
23        let connection = Connection::open(path)?;
24        Ok(Self { connection })
25    }
26
27    pub fn read_frames(&self) -> rusqlite::Result<Vec<FramesSim>> {
28        let mut stmt = self.connection.prepare("SELECT * FROM frames")?;
29        let frames_iter = stmt.query_map([], |row| {
30            Ok(FramesSim::new(row.get(0)?, row.get(1)?, row.get(2)?))
31        })?;
32        let mut frames = Vec::new();
33        for frame in frames_iter {
34            frames.push(frame?);
35        }
36        Ok(frames)
37    }
38
39    pub fn read_scans(&self) -> rusqlite::Result<Vec<ScansSim>> {
40        let mut stmt = self.connection.prepare("SELECT * FROM scans")?;
41        let scans_iter = stmt.query_map([], |row| Ok(ScansSim::new(row.get(0)?, row.get(1)?)))?;
42        let mut scans = Vec::new();
43        for scan in scans_iter {
44            scans.push(scan?);
45        }
46        Ok(scans)
47    }
48    pub fn read_peptides(&self) -> rusqlite::Result<Vec<PeptidesSim>> {
49        let mut stmt = self.connection.prepare("SELECT * FROM peptides")?;
50        let peptides_iter = stmt.query_map([], |row| {
51            let frame_occurrence_str: String = row.get(15)?;
52            let frame_abundance_str: String = row.get(16)?;
53
54            let frame_occurrence: Vec<u32> = match serde_json::from_str(&frame_occurrence_str) {
55                Ok(value) => value,
56                Err(e) => {
57                    return Err(rusqlite::Error::FromSqlConversionFailure(
58                        15,
59                        rusqlite::types::Type::Text,
60                        Box::new(e),
61                    ))
62                }
63            };
64
65            // if the frame abundance is not available, set it to 0
66            let frame_abundance: Vec<f32> = match serde_json::from_str(&frame_abundance_str) {
67                Ok(value) => value,
68                Err(_e) => vec![0.0; frame_occurrence.len()],
69            };
70
71            let frame_distribution =
72                SignalDistribution::new(0.0, 0.0, 0.0, frame_occurrence, frame_abundance);
73
74            Ok(PeptidesSim {
75                protein_id: row.get(0)?,
76                peptide_id: row.get(1)?,
77                sequence: PeptideSequence::new(row.get(2)?, row.get(1)?),
78                proteins: row.get(3)?,
79                decoy: row.get(4)?,
80                missed_cleavages: row.get(5)?,
81                n_term: row.get(6)?,
82                c_term: row.get(7)?,
83                mono_isotopic_mass: row.get(8)?,
84                retention_time: row.get(9)?,
85                events: row.get(10)?,
86                frame_start: row.get(13)?,
87                frame_end: row.get(14)?,
88                frame_distribution,
89            })
90        })?;
91        let mut peptides = Vec::new();
92        for peptide in peptides_iter {
93            peptides.push(peptide?);
94        }
95        Ok(peptides)
96    }
97
98    pub fn read_ions(&self) -> rusqlite::Result<Vec<IonSim>> {
99        let mut stmt = self.connection.prepare("SELECT * FROM ions")?;
100        let ions_iter = stmt.query_map([], |row| {
101            let simulated_spectrum_str: String = row.get(8)?;
102            let scan_occurrence_str: String = row.get(9)?;
103            let scan_abundance_str: String = row.get(10)?;
104
105            let simulated_spectrum: MzSpectrum = match serde_json::from_str(&simulated_spectrum_str)
106            {
107                Ok(value) => value,
108                Err(e) => {
109                    return Err(rusqlite::Error::FromSqlConversionFailure(
110                        8,
111                        rusqlite::types::Type::Text,
112                        Box::new(e),
113                    ))
114                }
115            };
116
117            let scan_occurrence: Vec<u32> = match serde_json::from_str(&scan_occurrence_str) {
118                Ok(value) => value,
119                Err(e) => {
120                    return Err(rusqlite::Error::FromSqlConversionFailure(
121                        9,
122                        rusqlite::types::Type::Text,
123                        Box::new(e),
124                    ))
125                }
126            };
127
128            let scan_abundance: Vec<f32> = match serde_json::from_str(&scan_abundance_str) {
129                Ok(value) => value,
130                Err(e) => {
131                    return Err(rusqlite::Error::FromSqlConversionFailure(
132                        10,
133                        rusqlite::types::Type::Text,
134                        Box::new(e),
135                    ))
136                }
137            };
138
139            Ok(IonSim::new(
140                row.get(0)?,
141                row.get(1)?,
142                row.get(2)?,
143                row.get(3)?,
144                row.get(5)?,
145                row.get(6)?,
146                simulated_spectrum,
147                scan_occurrence,
148                scan_abundance,
149            ))
150        })?;
151        let mut ions = Vec::new();
152        for ion in ions_iter {
153            ions.push(ion?);
154        }
155        Ok(ions)
156    }
157
158    pub fn read_window_group_settings(&self) -> rusqlite::Result<Vec<WindowGroupSettingsSim>> {
159        let mut stmt = self.connection.prepare("SELECT * FROM dia_ms_ms_windows")?;
160        let window_group_settings_iter = stmt.query_map([], |row| {
161            Ok(WindowGroupSettingsSim::new(
162                row.get(0)?,
163                row.get(1)?,
164                row.get(2)?,
165                row.get(3)?,
166                row.get(4)?,
167                row.get(5)?,
168            ))
169        })?;
170        let mut window_group_settings = Vec::new();
171        for window_group_setting in window_group_settings_iter {
172            window_group_settings.push(window_group_setting?);
173        }
174        Ok(window_group_settings)
175    }
176
177    pub fn read_frame_to_window_group(&self) -> rusqlite::Result<Vec<FrameToWindowGroupSim>> {
178        let mut stmt = self.connection.prepare("SELECT * FROM dia_ms_ms_info")?;
179        let frame_to_window_group_iter = stmt.query_map([], |row| {
180            Ok(FrameToWindowGroupSim::new(row.get(0)?, row.get(1)?))
181        })?;
182
183        let mut frame_to_window_groups: Vec<FrameToWindowGroupSim> = Vec::new();
184        for frame_to_window_group in frame_to_window_group_iter {
185            frame_to_window_groups.push(frame_to_window_group?);
186        }
187
188        Ok(frame_to_window_groups)
189    }
190
191    pub fn read_pasef_meta(&self) -> rusqlite::Result<Vec<PASEFMeta>> {
192        let mut stmt = self.connection.prepare("SELECT * FROM pasef_meta")?;
193        let pasef_meta_iter = stmt.query_map([], |row| {
194            Ok(PASEFMeta::new(
195                row.get(0)?,
196                row.get(1)?,
197                row.get(2)?,
198                row.get(3)?,
199                row.get(4)?,
200                row.get(5)?,
201                row.get(6)?))
202        })?;
203
204        let mut pasef_meta: Vec<PASEFMeta> = Vec::new();
205
206        for pasef_meta_entry in pasef_meta_iter {
207            pasef_meta.push(pasef_meta_entry?);
208        }
209
210        Ok(pasef_meta)
211    }
212
213    /// Read peptides that are present in the given frame range.
214    /// A peptide is included if its frame range overlaps with [frame_min, frame_max].
215    pub fn read_peptides_for_frame_range(
216        &self,
217        frame_min: u32,
218        frame_max: u32,
219    ) -> rusqlite::Result<Vec<PeptidesSim>> {
220        let mut stmt = self.connection.prepare(
221            "SELECT * FROM peptides WHERE frame_occurrence_start <= ?1 AND frame_occurrence_end >= ?2"
222        )?;
223
224        let peptides_iter = stmt.query_map([frame_max, frame_min], |row| {
225            let frame_occurrence_str: String = row.get(15)?;
226            let frame_abundance_str: String = row.get(16)?;
227
228            let frame_occurrence: Vec<u32> = match serde_json::from_str(&frame_occurrence_str) {
229                Ok(value) => value,
230                Err(e) => {
231                    return Err(rusqlite::Error::FromSqlConversionFailure(
232                        15,
233                        rusqlite::types::Type::Text,
234                        Box::new(e),
235                    ))
236                }
237            };
238
239            let frame_abundance: Vec<f32> = match serde_json::from_str(&frame_abundance_str) {
240                Ok(value) => value,
241                Err(_e) => vec![0.0; frame_occurrence.len()],
242            };
243
244            let frame_distribution =
245                SignalDistribution::new(0.0, 0.0, 0.0, frame_occurrence, frame_abundance);
246
247            Ok(PeptidesSim {
248                protein_id: row.get(0)?,
249                peptide_id: row.get(1)?,
250                sequence: PeptideSequence::new(row.get(2)?, row.get(1)?),
251                proteins: row.get(3)?,
252                decoy: row.get(4)?,
253                missed_cleavages: row.get(5)?,
254                n_term: row.get(6)?,
255                c_term: row.get(7)?,
256                mono_isotopic_mass: row.get(8)?,
257                retention_time: row.get(9)?,
258                events: row.get(10)?,
259                frame_start: row.get(13)?,
260                frame_end: row.get(14)?,
261                frame_distribution,
262            })
263        })?;
264
265        let mut peptides = Vec::new();
266        for peptide in peptides_iter {
267            peptides.push(peptide?);
268        }
269        Ok(peptides)
270    }
271
272    /// Read ions for specific peptide IDs.
273    /// Uses batched queries for efficiency with large peptide ID lists.
274    pub fn read_ions_for_peptides(&self, peptide_ids: &[u32]) -> rusqlite::Result<Vec<IonSim>> {
275        if peptide_ids.is_empty() {
276            return Ok(Vec::new());
277        }
278
279        let mut all_ions = Vec::new();
280
281        // Process in chunks to avoid SQLite parameter limits
282        const CHUNK_SIZE: usize = 500;
283
284        for chunk in peptide_ids.chunks(CHUNK_SIZE) {
285            let placeholders: String = chunk.iter()
286                .map(|_| "?")
287                .collect::<Vec<_>>()
288                .join(",");
289
290            let sql = format!(
291                "SELECT * FROM ions WHERE peptide_id IN ({})",
292                placeholders
293            );
294
295            let mut stmt = self.connection.prepare(&sql)?;
296
297            let ions_iter = stmt.query_map(
298                rusqlite::params_from_iter(chunk.iter()),
299                |row| {
300                    let simulated_spectrum_str: String = row.get(8)?;
301                    let scan_occurrence_str: String = row.get(9)?;
302                    let scan_abundance_str: String = row.get(10)?;
303
304                    let simulated_spectrum: MzSpectrum = match serde_json::from_str(&simulated_spectrum_str) {
305                        Ok(value) => value,
306                        Err(e) => {
307                            return Err(rusqlite::Error::FromSqlConversionFailure(
308                                8,
309                                rusqlite::types::Type::Text,
310                                Box::new(e),
311                            ))
312                        }
313                    };
314
315                    let scan_occurrence: Vec<u32> = match serde_json::from_str(&scan_occurrence_str) {
316                        Ok(value) => value,
317                        Err(e) => {
318                            return Err(rusqlite::Error::FromSqlConversionFailure(
319                                9,
320                                rusqlite::types::Type::Text,
321                                Box::new(e),
322                            ))
323                        }
324                    };
325
326                    let scan_abundance: Vec<f32> = match serde_json::from_str(&scan_abundance_str) {
327                        Ok(value) => value,
328                        Err(e) => {
329                            return Err(rusqlite::Error::FromSqlConversionFailure(
330                                10,
331                                rusqlite::types::Type::Text,
332                                Box::new(e),
333                            ))
334                        }
335                    };
336
337                    Ok(IonSim::new(
338                        row.get(0)?,
339                        row.get(1)?,
340                        row.get(2)?,
341                        row.get(3)?,
342                        row.get(5)?,
343                        row.get(6)?,
344                        simulated_spectrum,
345                        scan_occurrence,
346                        scan_abundance,
347                    ))
348                },
349            )?;
350
351            for ion in ions_iter {
352                all_ions.push(ion?);
353            }
354        }
355
356        Ok(all_ions)
357    }
358
359    /// Read fragment ions for specific peptide IDs.
360    /// Uses batched queries for efficiency with large peptide ID lists.
361    pub fn read_fragment_ions_for_peptides(&self, peptide_ids: &[u32]) -> rusqlite::Result<Vec<FragmentIonSim>> {
362        if peptide_ids.is_empty() {
363            return Ok(Vec::new());
364        }
365
366        let mut all_fragment_ions = Vec::new();
367
368        // Process in chunks to avoid SQLite parameter limits
369        const CHUNK_SIZE: usize = 500;
370
371        for chunk in peptide_ids.chunks(CHUNK_SIZE) {
372            let placeholders: String = chunk.iter()
373                .map(|_| "?")
374                .collect::<Vec<_>>()
375                .join(",");
376
377            let sql = format!(
378                "SELECT * FROM fragment_ions WHERE peptide_id IN ({})",
379                placeholders
380            );
381
382            let mut stmt = self.connection.prepare(&sql)?;
383
384            let fragment_ion_iter = stmt.query_map(
385                rusqlite::params_from_iter(chunk.iter()),
386                |row| {
387                    let indices_string: String = row.get(4)?;
388                    let values_string: String = row.get(5)?;
389
390                    let indices: Vec<u32> = match serde_json::from_str(&indices_string) {
391                        Ok(value) => value,
392                        Err(e) => {
393                            return Err(rusqlite::Error::FromSqlConversionFailure(
394                                4,
395                                rusqlite::types::Type::Text,
396                                Box::new(e),
397                            ))
398                        }
399                    };
400
401                    let values: Vec<f64> = match serde_json::from_str(&values_string) {
402                        Ok(value) => value,
403                        Err(e) => {
404                            return Err(rusqlite::Error::FromSqlConversionFailure(
405                                5,
406                                rusqlite::types::Type::Text,
407                                Box::new(e),
408                            ))
409                        }
410                    };
411
412                    Ok(FragmentIonSim::new(
413                        row.get(0)?,
414                        row.get(1)?,
415                        row.get(2)?,
416                        row.get(3)?,
417                        indices,
418                        values,
419                    ))
420                },
421            )?;
422
423            for fragment_ion in fragment_ion_iter {
424                all_fragment_ions.push(fragment_ion?);
425            }
426        }
427
428        Ok(all_fragment_ions)
429    }
430
431    pub fn read_fragment_ions(&self) -> rusqlite::Result<Vec<FragmentIonSim>> {
432        let mut stmt = self.connection.prepare("SELECT * FROM fragment_ions")?;
433
434        let fragment_ion_sim_iter = stmt.query_map([], |row| {
435            let indices_string: String = row.get(4)?;
436            let values_string: String = row.get(5)?;
437
438            let indices: Vec<u32> = match serde_json::from_str(&indices_string) {
439                Ok(value) => value,
440                Err(e) => {
441                    return Err(rusqlite::Error::FromSqlConversionFailure(
442                        4,
443                        rusqlite::types::Type::Text,
444                        Box::new(e),
445                    ))
446                }
447            };
448
449            let values: Vec<f64> = match serde_json::from_str(&values_string) {
450                Ok(value) => value,
451                Err(e) => {
452                    return Err(rusqlite::Error::FromSqlConversionFailure(
453                        5,
454                        rusqlite::types::Type::Text,
455                        Box::new(e),
456                    ))
457                }
458            };
459
460            Ok(FragmentIonSim::new(
461                row.get(0)?,
462                row.get(1)?,
463                row.get(2)?,
464                row.get(3)?,
465                indices,
466                values,
467            ))
468        })?;
469
470        let mut fragment_ion_sim = Vec::new();
471        for fragment_ion in fragment_ion_sim_iter {
472            fragment_ion_sim.push(fragment_ion?);
473        }
474
475        Ok(fragment_ion_sim)
476    }
477
478    pub fn get_transmission_dia(&self) -> TimsTransmissionDIA {
479        let frame_to_window_group = self.read_frame_to_window_group().unwrap();
480        let window_group_settings = self.read_window_group_settings().unwrap();
481
482        TimsTransmissionDIA::new(
483            frame_to_window_group
484                .iter()
485                .map(|x| x.frame_id as i32)
486                .collect(),
487            frame_to_window_group
488                .iter()
489                .map(|x| x.window_group as i32)
490                .collect(),
491            window_group_settings
492                .iter()
493                .map(|x| x.window_group as i32)
494                .collect(),
495            window_group_settings
496                .iter()
497                .map(|x| x.scan_start as i32)
498                .collect(),
499            window_group_settings
500                .iter()
501                .map(|x| x.scan_end as i32)
502                .collect(),
503            window_group_settings
504                .iter()
505                .map(|x| x.isolation_mz as f64)
506                .collect(),
507            window_group_settings
508                .iter()
509                .map(|x| x.isolation_width as f64)
510                .collect(),
511            None,
512        )
513    }
514
515    pub fn get_transmission_dda(&self) -> TimsTransmissionDDA {
516        let pasef_meta = self.read_pasef_meta().unwrap();
517        TimsTransmissionDDA::new(
518            pasef_meta,
519            None,
520        )
521    }
522
523    pub fn get_collision_energy_dia(&self) -> TimsTofCollisionEnergyDIA {
524        let frame_to_window_group = self.read_frame_to_window_group().unwrap();
525        let window_group_settings = self.read_window_group_settings().unwrap();
526
527        TimsTofCollisionEnergyDIA::new(
528            frame_to_window_group
529                .iter()
530                .map(|x| x.frame_id as i32)
531                .collect(),
532            frame_to_window_group
533                .iter()
534                .map(|x| x.window_group as i32)
535                .collect(),
536            window_group_settings
537                .iter()
538                .map(|x| x.window_group as i32)
539                .collect(),
540            window_group_settings
541                .iter()
542                .map(|x| x.scan_start as i32)
543                .collect(),
544            window_group_settings
545                .iter()
546                .map(|x| x.scan_end as i32)
547                .collect(),
548            window_group_settings
549                .iter()
550                .map(|x| x.collision_energy as f64)
551                .collect(),
552        )
553    }
554
555    fn ion_map_fn_dda(
556        ion: IonSim,
557        peptide_map: &BTreeMap<u32, PeptidesSim>,
558        _precursor_frames: &HashSet<u32>,
559        transmission: &TimsTransmissionDDA,
560    ) -> BTreeSet<(u32, u32, String, i8, i32)> {
561        let peptide = peptide_map.get(&ion.peptide_id).unwrap();
562        let mut ret_tree: BTreeSet<(u32, u32, String, i8, i32)> = BTreeSet::new();
563
564        // Get all frames where this ion was explicitly selected for fragmentation
565        // using the precursor ID from pasef_meta (instead of re-computing m/z transmission)
566        let selections = transmission.get_selections_for_precursor(ion.ion_id as i32);
567
568        for (_, collision_energy) in selections {
569            // Quantize with same factor as DIA (line 614) so return conversion (line 702)
570            // divides by 100 to recover raw CE, matching the Python normalization flow
571            let quantized_energy = (collision_energy * 100.0).round() as i32;
572
573            ret_tree.insert((
574                ion.peptide_id,
575                ion.ion_id,
576                peptide.sequence.sequence.clone(),
577                ion.charge,
578                quantized_energy,
579            ));
580        }
581
582        ret_tree
583    }
584
585    fn ion_map_fn_dia(
586        ion: IonSim,
587        peptide_map: &BTreeMap<u32, PeptidesSim>,
588        precursor_frames: &HashSet<u32>,
589        transmission: &TimsTransmissionDIA,
590        collision_energy: &TimsTofCollisionEnergyDIA,
591    ) -> BTreeSet<(u32, u32, String, i8, i32)> {
592        let peptide = peptide_map.get(&ion.peptide_id).unwrap();
593        let mut ret_tree: BTreeSet<(u32, u32, String, i8, i32)> = BTreeSet::new();
594
595        // go over all frames the ion occurs in
596        for frame in peptide.frame_distribution.occurrence.iter() {
597            // only consider fragment frames
598            if !precursor_frames.contains(frame) {
599                // go over all scans the ion occurs in
600                for scan in &ion.scan_distribution.occurrence {
601                    // check transmission for all precursor ion peaks of the isotopic envelope
602
603                    let precursor_spec = &ion.simulated_spectrum;
604
605                    if transmission.any_transmitted(
606                        *frame as i32,
607                        *scan as i32,
608                        &precursor_spec.mz,
609                        Some(0.5),
610                    ) {
611                        let collision_energy =
612                            collision_energy.get_collision_energy(*frame as i32, *scan as i32);
613                        let quantized_energy = (collision_energy * 100.0).round() as i32;
614
615                        ret_tree.insert((
616                            ion.peptide_id,
617                            ion.ion_id,
618                            peptide.sequence.sequence.clone(),
619                            ion.charge,
620                            quantized_energy,
621                        ));
622                    }
623                }
624            }
625        }
626        ret_tree
627    }
628
629    // TODO: take isotopic envelope into account
630    pub fn get_transmitted_ions(
631        &self,
632        num_threads: usize,
633        dda_mode: bool,
634    ) -> (Vec<i32>, Vec<i32>, Vec<String>, Vec<i8>, Vec<f32>) {
635
636        let thread_pool = ThreadPoolBuilder::new()
637            .num_threads(num_threads)
638            .build()
639            .unwrap();
640
641        let peptides = self.read_peptides().unwrap();
642
643        let peptide_map = TimsTofSyntheticsDataHandle::build_peptide_map(&peptides);
644
645        let precursor_frames =
646            TimsTofSyntheticsDataHandle::build_precursor_frame_id_set(&self.read_frames().unwrap());
647
648        let ions = self.read_ions().unwrap();
649
650        let trees = match dda_mode {
651            true => {
652                let transmission = self.get_transmission_dda();
653                thread_pool.install(|| {
654                    ions.par_iter()
655                        .map(|ion| {
656                            TimsTofSyntheticsDataHandle::ion_map_fn_dda(
657                                ion.clone(),
658                                &peptide_map,
659                                &precursor_frames,
660                                &transmission,
661                            )
662                        })
663                        .collect::<Vec<_>>()
664            })
665        },
666            false => {
667                let transmission = self.get_transmission_dia();
668                let collision_energy = self.get_collision_energy_dia();
669                thread_pool.install(|| {
670                    ions.par_iter()
671                        .map(|ion| {
672                            TimsTofSyntheticsDataHandle::ion_map_fn_dia(
673                                ion.clone(),
674                                &peptide_map,
675                                &precursor_frames,
676                                &transmission,
677                                &collision_energy,
678                            )
679                        })
680                        .collect::<Vec<_>>()
681                })
682            },
683        };
684
685        let mut ret_tree: BTreeSet<(u32, u32, String, i8, i32)> = BTreeSet::new();
686        for tree in trees {
687            ret_tree.extend(tree);
688        }
689
690        let mut ret_peptide_id = Vec::new();
691        let mut ret_ion_id = Vec::new();
692        let mut ret_sequence = Vec::new();
693        let mut ret_charge = Vec::new();
694        let mut ret_energy = Vec::new();
695
696        for (peptide_id, ion_id, sequence, charge, energy) in ret_tree {
697            ret_peptide_id.push(peptide_id as i32);
698            ret_ion_id.push(ion_id as i32);
699            ret_sequence.push(sequence);
700            ret_charge.push(charge);
701            ret_energy.push(energy as f32 / 100.0);
702        }
703
704        (
705            ret_peptide_id,
706            ret_ion_id,
707            ret_sequence,
708            ret_charge,
709            ret_energy,
710        )
711    }
712
713    /// Lazy version of get_transmitted_ions that only loads data for a specific frame range.
714    /// This reduces memory usage by only loading peptides and ions that are relevant to the
715    /// specified frame range instead of all data from the database.
716    ///
717    /// # Arguments
718    ///
719    /// * `frame_min` - Minimum frame ID to include (inclusive)
720    /// * `frame_max` - Maximum frame ID to include (inclusive)
721    /// * `num_threads` - Number of threads to use for parallel processing
722    /// * `dda_mode` - If true, use DDA transmission; if false, use DIA transmission
723    ///
724    /// # Returns
725    ///
726    /// Tuple of (peptide_ids, ion_ids, sequences, charges, collision_energies) for transmitted ions
727    pub fn get_transmitted_ions_for_frame_range(
728        &self,
729        frame_min: u32,
730        frame_max: u32,
731        num_threads: usize,
732        dda_mode: bool,
733    ) -> (Vec<i32>, Vec<i32>, Vec<String>, Vec<i8>, Vec<f32>) {
734
735        let thread_pool = ThreadPoolBuilder::new()
736            .num_threads(num_threads)
737            .build()
738            .unwrap();
739
740        // Only load peptides for the specified frame range
741        let peptides = self.read_peptides_for_frame_range(frame_min, frame_max).unwrap();
742
743        if peptides.is_empty() {
744            return (Vec::new(), Vec::new(), Vec::new(), Vec::new(), Vec::new());
745        }
746
747        let peptide_ids: Vec<u32> = peptides.iter().map(|p| p.peptide_id).collect();
748        let peptide_map = TimsTofSyntheticsDataHandle::build_peptide_map(&peptides);
749
750        let precursor_frames =
751            TimsTofSyntheticsDataHandle::build_precursor_frame_id_set(&self.read_frames().unwrap());
752
753        // Only load ions for the peptides in our frame range
754        let ions = self.read_ions_for_peptides(&peptide_ids).unwrap();
755
756        if ions.is_empty() {
757            return (Vec::new(), Vec::new(), Vec::new(), Vec::new(), Vec::new());
758        }
759
760        let trees = match dda_mode {
761            true => {
762                let transmission = self.get_transmission_dda();
763                thread_pool.install(|| {
764                    ions.par_iter()
765                        .map(|ion| {
766                            TimsTofSyntheticsDataHandle::ion_map_fn_dda(
767                                ion.clone(),
768                                &peptide_map,
769                                &precursor_frames,
770                                &transmission,
771                            )
772                        })
773                        .collect::<Vec<_>>()
774                })
775            },
776            false => {
777                let transmission = self.get_transmission_dia();
778                let collision_energy = self.get_collision_energy_dia();
779                thread_pool.install(|| {
780                    ions.par_iter()
781                        .map(|ion| {
782                            TimsTofSyntheticsDataHandle::ion_map_fn_dia(
783                                ion.clone(),
784                                &peptide_map,
785                                &precursor_frames,
786                                &transmission,
787                                &collision_energy,
788                            )
789                        })
790                        .collect::<Vec<_>>()
791                })
792            },
793        };
794
795        let mut ret_tree: BTreeSet<(u32, u32, String, i8, i32)> = BTreeSet::new();
796        for tree in trees {
797            ret_tree.extend(tree);
798        }
799
800        let mut ret_peptide_id = Vec::new();
801        let mut ret_ion_id = Vec::new();
802        let mut ret_sequence = Vec::new();
803        let mut ret_charge = Vec::new();
804        let mut ret_energy = Vec::new();
805
806        for (peptide_id, ion_id, sequence, charge, energy) in ret_tree {
807            ret_peptide_id.push(peptide_id as i32);
808            ret_ion_id.push(ion_id as i32);
809            ret_sequence.push(sequence);
810            ret_charge.push(charge);
811            ret_energy.push(energy as f32 / 100.0);
812        }
813
814        (
815            ret_peptide_id,
816            ret_ion_id,
817            ret_sequence,
818            ret_charge,
819            ret_energy,
820        )
821    }
822
823    /// Method to build a map from peptide id to ions
824    pub fn build_peptide_to_ion_map(ions: &Vec<IonSim>) -> BTreeMap<u32, Vec<IonSim>> {
825        let mut ion_map = BTreeMap::new();
826        for ion in ions.iter() {
827            let ions = ion_map.entry(ion.peptide_id).or_insert_with(Vec::new);
828            ions.push(ion.clone());
829        }
830        ion_map
831    }
832
833    /// Method to build a map from peptide id to events (absolute number of events in the simulation)
834    pub fn build_peptide_map(peptides: &Vec<PeptidesSim>) -> BTreeMap<u32, PeptidesSim> {
835        let mut peptide_map = BTreeMap::new();
836        for peptide in peptides.iter() {
837            peptide_map.insert(peptide.peptide_id, peptide.clone());
838        }
839        peptide_map
840    }
841
842    /// Method to build a set of precursor frame ids, can be used to check if a frame is a precursor frame
843    pub fn build_precursor_frame_id_set(frames: &Vec<FramesSim>) -> HashSet<u32> {
844        frames
845            .iter()
846            .filter(|frame| frame.parse_ms_type() == MsType::Precursor)
847            .map(|frame| frame.frame_id)
848            .collect()
849    }
850
851    // Method to build a map from peptide id to events (absolute number of events in the simulation)
852    pub fn build_peptide_to_events(peptides: &Vec<PeptidesSim>) -> BTreeMap<u32, f32> {
853        let mut peptide_to_events = BTreeMap::new();
854        for peptide in peptides.iter() {
855            peptide_to_events.insert(peptide.peptide_id, peptide.events);
856        }
857        peptide_to_events
858    }
859
860    // Method to build a map from frame id to retention time
861    pub fn build_frame_to_rt(frames: &Vec<FramesSim>) -> BTreeMap<u32, f32> {
862        let mut frame_to_rt = BTreeMap::new();
863        for frame in frames.iter() {
864            frame_to_rt.insert(frame.frame_id, frame.time);
865        }
866        frame_to_rt
867    }
868
869    // Method to build a map from scan id to mobility
870    pub fn build_scan_to_mobility(scans: &Vec<ScansSim>) -> BTreeMap<u32, f32> {
871        let mut scan_to_mobility = BTreeMap::new();
872        for scan in scans.iter() {
873            scan_to_mobility.insert(scan.scan, scan.mobility);
874        }
875        scan_to_mobility
876    }
877    pub fn build_frame_to_abundances(
878        peptides: &Vec<PeptidesSim>,
879    ) -> BTreeMap<u32, (Vec<u32>, Vec<f32>)> {
880        let mut frame_to_abundances = BTreeMap::new();
881
882        for peptide in peptides.iter() {
883            let peptide_id = peptide.peptide_id;
884            let frame_occurrence = peptide.frame_distribution.occurrence.clone();
885            let frame_abundance = peptide.frame_distribution.abundance.clone();
886
887            for (frame_id, abundance) in frame_occurrence.iter().zip(frame_abundance.iter()) {
888                // only insert if the abundance is greater than 1e-6
889
890                if *abundance > 1e-6 {
891                    let (occurrences, abundances) = frame_to_abundances
892                        .entry(*frame_id)
893                        .or_insert((vec![], vec![]));
894                    occurrences.push(peptide_id);
895                    abundances.push(*abundance);
896                }
897            }
898        }
899
900        frame_to_abundances
901    }
902    pub fn build_peptide_to_ions(
903        ions: &Vec<IonSim>,
904    ) -> BTreeMap<
905        u32,
906        (
907            Vec<f32>,
908            Vec<Vec<u32>>,
909            Vec<Vec<f32>>,
910            Vec<i8>,
911            Vec<MzSpectrum>,
912        ),
913    > {
914        let mut peptide_to_ions = BTreeMap::new();
915
916        for ion in ions.iter() {
917            let peptide_id = ion.peptide_id;
918            let abundance = ion.relative_abundance;
919            let scan_occurrence = ion.scan_distribution.occurrence.clone();
920            let scan_abundance = ion.scan_distribution.abundance.clone();
921            let charge = ion.charge;
922            let spectrum = ion.simulated_spectrum.clone();
923
924            let (abundances, scan_occurrences, scan_abundances, charges, spectra) = peptide_to_ions
925                .entry(peptide_id)
926                .or_insert((vec![], vec![], vec![], vec![], vec![]));
927            abundances.push(abundance);
928            scan_occurrences.push(scan_occurrence);
929            scan_abundances.push(scan_abundance);
930            charges.push(charge);
931            spectra.push(spectrum);
932        }
933
934        peptide_to_ions
935    }
936    pub fn build_fragment_ions(
937        peptides_sim: &BTreeMap<u32, PeptidesSim>,
938        fragment_ions: &Vec<FragmentIonSim>,
939        num_threads: usize,
940    ) -> BTreeMap<(u32, i8, i32), (PeptideProductIonSeriesCollection, Vec<MzSpectrum>)> {
941        let thread_pool = ThreadPoolBuilder::new()
942            .num_threads(num_threads)
943            .build()
944            .unwrap();
945        let fragment_ion_map = thread_pool.install(|| {
946            fragment_ions
947                .par_iter()
948                .map(|fragment_ion| {
949                    let key = (
950                        fragment_ion.peptide_id,
951                        fragment_ion.charge,
952                        (fragment_ion.collision_energy * 1e3).round() as i32,
953                    );
954
955                    let value = peptides_sim
956                        .get(&fragment_ion.peptide_id)
957                        .unwrap()
958                        .sequence
959                        .associate_with_predicted_intensities(
960                            fragment_ion.charge as i32,
961                            FragmentType::B,
962                            fragment_ion.to_dense(174),
963                            true,
964                            true,
965                        );
966
967                    let fragment_ions: Vec<MzSpectrum> = value
968                        .peptide_ions
969                        .par_iter()
970                        .map(|ion_series| {
971                            ion_series.generate_isotopic_spectrum(1e-2, 1e-3, 100, 1e-5)
972                        })
973                        .collect();
974                    (key, (value, fragment_ions))
975                })
976                .collect::<BTreeMap<_, _>>() // Collect the results into a BTreeMap
977        });
978
979        fragment_ion_map
980    }
981
982    pub fn build_fragment_ions_annotated(
983        peptides_sim: &BTreeMap<u32, PeptidesSim>,
984        fragment_ions: &Vec<FragmentIonSim>,
985        num_threads: usize,
986    ) -> BTreeMap<(u32, i8, i32), (PeptideProductIonSeriesCollection, Vec<MzSpectrumAnnotated>)>
987    {
988        let thread_pool = ThreadPoolBuilder::new()
989            .num_threads(num_threads)
990            .build()
991            .unwrap();
992        let fragment_ion_map = thread_pool.install(|| {
993            fragment_ions
994                .par_iter()
995                .map(|fragment_ion| {
996                    let key = (
997                        fragment_ion.peptide_id,
998                        fragment_ion.charge,
999                        (fragment_ion.collision_energy * 1e3).round() as i32,
1000                    );
1001
1002                    let value = peptides_sim
1003                        .get(&fragment_ion.peptide_id)
1004                        .unwrap()
1005                        .sequence
1006                        .associate_with_predicted_intensities(
1007                            fragment_ion.charge as i32,
1008                            FragmentType::B,
1009                            fragment_ion.to_dense(174),
1010                            true,
1011                            true,
1012                        );
1013
1014                    let fragment_ions: Vec<MzSpectrumAnnotated> = value
1015                        .peptide_ions
1016                        .par_iter()
1017                        .map(|ion_series| {
1018                            ion_series.generate_isotopic_spectrum_annotated(1e-2, 1e-3, 100, 1e-5)
1019                        })
1020                        .collect();
1021                    (key, (value, fragment_ions))
1022                })
1023                .collect::<BTreeMap<_, _>>() // Collect the results into a BTreeMap
1024        });
1025
1026        fragment_ion_map
1027    }
1028
1029    /// Build fragment ions with complementary isotope distribution data.
1030    ///
1031    /// This variant calculates both the fragment isotope distribution and
1032    /// the complementary fragment isotope distribution, which are needed
1033    /// for quad-selection dependent isotope transmission calculations.
1034    ///
1035    /// # Arguments
1036    ///
1037    /// * `peptides_sim` - Map of peptide_id to PeptidesSim
1038    /// * `fragment_ions` - Vector of FragmentIonSim
1039    /// * `num_threads` - Number of threads for parallel processing
1040    ///
1041    /// # Returns
1042    ///
1043    /// * `BTreeMap` mapping (peptide_id, charge, collision_energy) to
1044    ///   (PeptideProductIonSeriesCollection, fragment spectra, fragment distributions, complementary distributions)
1045    /// Build fragment ions with transmission data for both precursor scaling and per-fragment modes.
1046    ///
1047    /// This function calculates:
1048    /// - Precursor isotope distribution (for PrecursorScaling mode)
1049    /// - Per-fragment isotope distributions with their complementary distributions (for PerFragment mode)
1050    pub fn build_fragment_ions_with_transmission_data(
1051        peptides_sim: &BTreeMap<u32, PeptidesSim>,
1052        fragment_ions: &Vec<FragmentIonSim>,
1053        num_threads: usize,
1054    ) -> BTreeMap<(u32, i8, i32), FragmentIonsWithComplementary> {
1055        let thread_pool = ThreadPoolBuilder::new()
1056            .num_threads(num_threads)
1057            .build()
1058            .unwrap();
1059
1060        let fragment_ion_map = thread_pool.install(|| {
1061            fragment_ions
1062                .par_iter()
1063                .map(|fragment_ion| {
1064                    let key = (
1065                        fragment_ion.peptide_id,
1066                        fragment_ion.charge,
1067                        (fragment_ion.collision_energy * 1e3).round() as i32,
1068                    );
1069
1070                    let peptide_sim = peptides_sim.get(&fragment_ion.peptide_id).unwrap();
1071
1072                    // Get precursor atomic composition for complementary calculations
1073                    let precursor_composition = peptide_sim.sequence.atomic_composition();
1074
1075                    // Calculate precursor isotope distribution for PrecursorScaling mode
1076                    let precursor_composition_owned: std::collections::HashMap<String, i32> =
1077                        precursor_composition.iter().map(|(k, v)| (k.to_string(), *v)).collect();
1078                    let precursor_isotope_distribution = mscore::algorithm::isotope::generate_isotope_distribution(
1079                        &precursor_composition_owned,
1080                        1e-3,
1081                        1e-8,
1082                        100,
1083                    ).into_iter().filter(|&(_, abundance)| abundance > 1e-10).collect();
1084
1085                    let ion_series_collection = peptide_sim
1086                        .sequence
1087                        .associate_with_predicted_intensities(
1088                            fragment_ion.charge as i32,
1089                            FragmentType::B,
1090                            fragment_ion.to_dense(174),
1091                            true,
1092                            true,
1093                        );
1094
1095                    // Calculate fragment spectra and per-fragment transmission data
1096                    let mut fragment_spectra: Vec<MzSpectrum> = Vec::new();
1097                    let mut per_fragment_data: Vec<Vec<FragmentIonTransmissionData>> = Vec::new();
1098
1099                    for ion_series in &ion_series_collection.peptide_ions {
1100                        // Generate the full isotopic spectrum for this ion series
1101                        let spectrum = ion_series.generate_isotopic_spectrum(1e-2, 1e-3, 100, 1e-5);
1102                        fragment_spectra.push(spectrum);
1103
1104                        // Build per-fragment data for this series
1105                        let mut series_fragment_data: Vec<FragmentIonTransmissionData> = Vec::new();
1106
1107                        // Process n-terminal ions (b-ions)
1108                        for n_ion in &ion_series.n_ions {
1109                            let frag_dist = n_ion.isotope_distribution(1e-3, 1e-8, 100, 1e-10);
1110                            let comp_dist = n_ion.complementary_isotope_distribution(
1111                                &precursor_composition,
1112                                1e-3,
1113                                1e-8,
1114                                100,
1115                            );
1116
1117                            series_fragment_data.push(FragmentIonTransmissionData {
1118                                fragment_distribution: frag_dist,
1119                                complementary_distribution: comp_dist,
1120                                predicted_intensity: n_ion.ion.intensity,
1121                            });
1122                        }
1123
1124                        // Process c-terminal ions (y-ions)
1125                        for c_ion in &ion_series.c_ions {
1126                            let frag_dist = c_ion.isotope_distribution(1e-3, 1e-8, 100, 1e-10);
1127                            let comp_dist = c_ion.complementary_isotope_distribution(
1128                                &precursor_composition,
1129                                1e-3,
1130                                1e-8,
1131                                100,
1132                            );
1133
1134                            series_fragment_data.push(FragmentIonTransmissionData {
1135                                fragment_distribution: frag_dist,
1136                                complementary_distribution: comp_dist,
1137                                predicted_intensity: c_ion.ion.intensity,
1138                            });
1139                        }
1140
1141                        per_fragment_data.push(series_fragment_data);
1142                    }
1143
1144                    let data = FragmentIonsWithComplementary {
1145                        ion_series_collection,
1146                        fragment_spectra,
1147                        precursor_isotope_distribution,
1148                        per_fragment_data,
1149                    };
1150
1151                    (key, data)
1152                })
1153                .collect::<BTreeMap<_, _>>()
1154        });
1155
1156        fragment_ion_map
1157    }
1158}
1159
1160/// Data for a single fragment ion with its complementary distribution.
1161#[derive(Debug, Clone)]
1162pub struct FragmentIonTransmissionData {
1163    /// Fragment isotope distribution as (m/z, abundance) pairs
1164    pub fragment_distribution: Vec<(f64, f64)>,
1165    /// Complementary fragment isotope distribution as (mass, abundance) pairs
1166    pub complementary_distribution: Vec<(f64, f64)>,
1167    /// Predicted intensity of this fragment ion
1168    pub predicted_intensity: f64,
1169}
1170
1171/// Struct holding fragment ion data along with transmission calculation data.
1172///
1173/// This is used for quad-selection dependent isotope transmission calculations,
1174/// where fragment isotope patterns are adjusted based on which precursor isotopes
1175/// were transmitted through the quadrupole.
1176#[derive(Debug, Clone)]
1177pub struct FragmentIonsWithComplementary {
1178    /// The original ion series collection with intensity predictions
1179    pub ion_series_collection: PeptideProductIonSeriesCollection,
1180    /// Pre-calculated fragment spectra (standard isotope patterns)
1181    pub fragment_spectra: Vec<MzSpectrum>,
1182    /// Precursor isotope distribution for scaling mode (m/z, abundance)
1183    pub precursor_isotope_distribution: Vec<(f64, f64)>,
1184    /// Per-fragment transmission data for per-fragment mode
1185    /// Outer Vec: one per ion_series, Inner Vec: one per fragment ion in that series
1186    pub per_fragment_data: Vec<Vec<FragmentIonTransmissionData>>,
1187}