1use crate::sim::containers::{
2 FragmentIonSim, FrameToWindowGroupSim, FramesSim, IonSim, PeptidesSim, ScansSim,
3 SignalDistribution, WindowGroupSettingsSim,
4};
5use mscore::data::peptide::{FragmentType, PeptideProductIonSeriesCollection, PeptideSequence};
6use mscore::data::spectrum::{MsType, MzSpectrum};
7use mscore::simulation::annotation::MzSpectrumAnnotated;
8use mscore::timstof::collision::{TimsTofCollisionEnergy, TimsTofCollisionEnergyDIA};
9use mscore::timstof::quadrupole::{IonTransmission, PASEFMeta, TimsTransmissionDDA, TimsTransmissionDIA};
10use rayon::prelude::*;
11use rayon::ThreadPoolBuilder;
12use rusqlite::Connection;
13use std::collections::{BTreeMap, BTreeSet, HashSet};
14use std::path::Path;
15
16#[derive(Debug)]
17pub struct TimsTofSyntheticsDataHandle {
18 pub connection: Connection,
19}
20
21impl TimsTofSyntheticsDataHandle {
22 pub fn new(path: &Path) -> rusqlite::Result<Self> {
23 let connection = Connection::open(path)?;
24 Ok(Self { connection })
25 }
26
27 pub fn read_frames(&self) -> rusqlite::Result<Vec<FramesSim>> {
28 let mut stmt = self.connection.prepare("SELECT * FROM frames")?;
29 let frames_iter = stmt.query_map([], |row| {
30 Ok(FramesSim::new(row.get(0)?, row.get(1)?, row.get(2)?))
31 })?;
32 let mut frames = Vec::new();
33 for frame in frames_iter {
34 frames.push(frame?);
35 }
36 Ok(frames)
37 }
38
39 pub fn read_scans(&self) -> rusqlite::Result<Vec<ScansSim>> {
40 let mut stmt = self.connection.prepare("SELECT * FROM scans")?;
41 let scans_iter = stmt.query_map([], |row| Ok(ScansSim::new(row.get(0)?, row.get(1)?)))?;
42 let mut scans = Vec::new();
43 for scan in scans_iter {
44 scans.push(scan?);
45 }
46 Ok(scans)
47 }
48 pub fn read_peptides(&self) -> rusqlite::Result<Vec<PeptidesSim>> {
49 let mut stmt = self.connection.prepare("SELECT * FROM peptides")?;
50 let peptides_iter = stmt.query_map([], |row| {
51 let frame_occurrence_str: String = row.get(15)?;
52 let frame_abundance_str: String = row.get(16)?;
53
54 let frame_occurrence: Vec<u32> = match serde_json::from_str(&frame_occurrence_str) {
55 Ok(value) => value,
56 Err(e) => {
57 return Err(rusqlite::Error::FromSqlConversionFailure(
58 15,
59 rusqlite::types::Type::Text,
60 Box::new(e),
61 ))
62 }
63 };
64
65 let frame_abundance: Vec<f32> = match serde_json::from_str(&frame_abundance_str) {
67 Ok(value) => value,
68 Err(_e) => vec![0.0; frame_occurrence.len()],
69 };
70
71 let frame_distribution =
72 SignalDistribution::new(0.0, 0.0, 0.0, frame_occurrence, frame_abundance);
73
74 Ok(PeptidesSim {
75 protein_id: row.get(0)?,
76 peptide_id: row.get(1)?,
77 sequence: PeptideSequence::new(row.get(2)?, row.get(1)?),
78 proteins: row.get(3)?,
79 decoy: row.get(4)?,
80 missed_cleavages: row.get(5)?,
81 n_term: row.get(6)?,
82 c_term: row.get(7)?,
83 mono_isotopic_mass: row.get(8)?,
84 retention_time: row.get(9)?,
85 events: row.get(10)?,
86 frame_start: row.get(13)?,
87 frame_end: row.get(14)?,
88 frame_distribution,
89 })
90 })?;
91 let mut peptides = Vec::new();
92 for peptide in peptides_iter {
93 peptides.push(peptide?);
94 }
95 Ok(peptides)
96 }
97
98 pub fn read_ions(&self) -> rusqlite::Result<Vec<IonSim>> {
99 let mut stmt = self.connection.prepare("SELECT * FROM ions")?;
100 let ions_iter = stmt.query_map([], |row| {
101 let simulated_spectrum_str: String = row.get(8)?;
102 let scan_occurrence_str: String = row.get(9)?;
103 let scan_abundance_str: String = row.get(10)?;
104
105 let simulated_spectrum: MzSpectrum = match serde_json::from_str(&simulated_spectrum_str)
106 {
107 Ok(value) => value,
108 Err(e) => {
109 return Err(rusqlite::Error::FromSqlConversionFailure(
110 8,
111 rusqlite::types::Type::Text,
112 Box::new(e),
113 ))
114 }
115 };
116
117 let scan_occurrence: Vec<u32> = match serde_json::from_str(&scan_occurrence_str) {
118 Ok(value) => value,
119 Err(e) => {
120 return Err(rusqlite::Error::FromSqlConversionFailure(
121 9,
122 rusqlite::types::Type::Text,
123 Box::new(e),
124 ))
125 }
126 };
127
128 let scan_abundance: Vec<f32> = match serde_json::from_str(&scan_abundance_str) {
129 Ok(value) => value,
130 Err(e) => {
131 return Err(rusqlite::Error::FromSqlConversionFailure(
132 10,
133 rusqlite::types::Type::Text,
134 Box::new(e),
135 ))
136 }
137 };
138
139 Ok(IonSim::new(
140 row.get(0)?,
141 row.get(1)?,
142 row.get(2)?,
143 row.get(3)?,
144 row.get(5)?,
145 row.get(6)?,
146 simulated_spectrum,
147 scan_occurrence,
148 scan_abundance,
149 ))
150 })?;
151 let mut ions = Vec::new();
152 for ion in ions_iter {
153 ions.push(ion?);
154 }
155 Ok(ions)
156 }
157
158 pub fn read_window_group_settings(&self) -> rusqlite::Result<Vec<WindowGroupSettingsSim>> {
159 let mut stmt = self.connection.prepare("SELECT * FROM dia_ms_ms_windows")?;
160 let window_group_settings_iter = stmt.query_map([], |row| {
161 Ok(WindowGroupSettingsSim::new(
162 row.get(0)?,
163 row.get(1)?,
164 row.get(2)?,
165 row.get(3)?,
166 row.get(4)?,
167 row.get(5)?,
168 ))
169 })?;
170 let mut window_group_settings = Vec::new();
171 for window_group_setting in window_group_settings_iter {
172 window_group_settings.push(window_group_setting?);
173 }
174 Ok(window_group_settings)
175 }
176
177 pub fn read_frame_to_window_group(&self) -> rusqlite::Result<Vec<FrameToWindowGroupSim>> {
178 let mut stmt = self.connection.prepare("SELECT * FROM dia_ms_ms_info")?;
179 let frame_to_window_group_iter = stmt.query_map([], |row| {
180 Ok(FrameToWindowGroupSim::new(row.get(0)?, row.get(1)?))
181 })?;
182
183 let mut frame_to_window_groups: Vec<FrameToWindowGroupSim> = Vec::new();
184 for frame_to_window_group in frame_to_window_group_iter {
185 frame_to_window_groups.push(frame_to_window_group?);
186 }
187
188 Ok(frame_to_window_groups)
189 }
190
191 pub fn read_pasef_meta(&self) -> rusqlite::Result<Vec<PASEFMeta>> {
192 let mut stmt = self.connection.prepare("SELECT * FROM pasef_meta")?;
193 let pasef_meta_iter = stmt.query_map([], |row| {
194 Ok(PASEFMeta::new(
195 row.get(0)?,
196 row.get(1)?,
197 row.get(2)?,
198 row.get(3)?,
199 row.get(4)?,
200 row.get(5)?,
201 row.get(6)?))
202 })?;
203
204 let mut pasef_meta: Vec<PASEFMeta> = Vec::new();
205
206 for pasef_meta_entry in pasef_meta_iter {
207 pasef_meta.push(pasef_meta_entry?);
208 }
209
210 Ok(pasef_meta)
211 }
212
213 pub fn read_fragment_ions(&self) -> rusqlite::Result<Vec<FragmentIonSim>> {
214 let mut stmt = self.connection.prepare("SELECT * FROM fragment_ions")?;
215
216 let fragment_ion_sim_iter = stmt.query_map([], |row| {
217 let indices_string: String = row.get(4)?;
218 let values_string: String = row.get(5)?;
219
220 let indices: Vec<u32> = match serde_json::from_str(&indices_string) {
221 Ok(value) => value,
222 Err(e) => {
223 return Err(rusqlite::Error::FromSqlConversionFailure(
224 4,
225 rusqlite::types::Type::Text,
226 Box::new(e),
227 ))
228 }
229 };
230
231 let values: Vec<f64> = match serde_json::from_str(&values_string) {
232 Ok(value) => value,
233 Err(e) => {
234 return Err(rusqlite::Error::FromSqlConversionFailure(
235 5,
236 rusqlite::types::Type::Text,
237 Box::new(e),
238 ))
239 }
240 };
241
242 Ok(FragmentIonSim::new(
243 row.get(0)?,
244 row.get(1)?,
245 row.get(2)?,
246 row.get(3)?,
247 indices,
248 values,
249 ))
250 })?;
251
252 let mut fragment_ion_sim = Vec::new();
253 for fragment_ion in fragment_ion_sim_iter {
254 fragment_ion_sim.push(fragment_ion?);
255 }
256
257 Ok(fragment_ion_sim)
258 }
259
260 pub fn get_transmission_dia(&self) -> TimsTransmissionDIA {
261 let frame_to_window_group = self.read_frame_to_window_group().unwrap();
262 let window_group_settings = self.read_window_group_settings().unwrap();
263
264 TimsTransmissionDIA::new(
265 frame_to_window_group
266 .iter()
267 .map(|x| x.frame_id as i32)
268 .collect(),
269 frame_to_window_group
270 .iter()
271 .map(|x| x.window_group as i32)
272 .collect(),
273 window_group_settings
274 .iter()
275 .map(|x| x.window_group as i32)
276 .collect(),
277 window_group_settings
278 .iter()
279 .map(|x| x.scan_start as i32)
280 .collect(),
281 window_group_settings
282 .iter()
283 .map(|x| x.scan_end as i32)
284 .collect(),
285 window_group_settings
286 .iter()
287 .map(|x| x.isolation_mz as f64)
288 .collect(),
289 window_group_settings
290 .iter()
291 .map(|x| x.isolation_width as f64)
292 .collect(),
293 None,
294 )
295 }
296
297 pub fn get_transmission_dda(&self) -> TimsTransmissionDDA {
298 let pasef_meta = self.read_pasef_meta().unwrap();
299 TimsTransmissionDDA::new(
300 pasef_meta,
301 None,
302 )
303 }
304
305 pub fn get_collision_energy_dia(&self) -> TimsTofCollisionEnergyDIA {
306 let frame_to_window_group = self.read_frame_to_window_group().unwrap();
307 let window_group_settings = self.read_window_group_settings().unwrap();
308
309 TimsTofCollisionEnergyDIA::new(
310 frame_to_window_group
311 .iter()
312 .map(|x| x.frame_id as i32)
313 .collect(),
314 frame_to_window_group
315 .iter()
316 .map(|x| x.window_group as i32)
317 .collect(),
318 window_group_settings
319 .iter()
320 .map(|x| x.window_group as i32)
321 .collect(),
322 window_group_settings
323 .iter()
324 .map(|x| x.scan_start as i32)
325 .collect(),
326 window_group_settings
327 .iter()
328 .map(|x| x.scan_end as i32)
329 .collect(),
330 window_group_settings
331 .iter()
332 .map(|x| x.collision_energy as f64)
333 .collect(),
334 )
335 }
336
337 fn ion_map_fn_dda(
338 ion: IonSim,
339 peptide_map: &BTreeMap<u32, PeptidesSim>,
340 precursor_frames: &HashSet<u32>,
341 transmission: &TimsTransmissionDDA,
342 ) -> BTreeSet<(u32, u32, String, i8, i32)> {
343 let peptide = peptide_map.get(&ion.peptide_id).unwrap();
344 let mut ret_tree: BTreeSet<(u32, u32, String, i8, i32)> = BTreeSet::new();
345
346 for frame in peptide.frame_distribution.occurrence.iter() {
348 if !precursor_frames.contains(frame) {
350 for scan in &ion.scan_distribution.occurrence {
352 let precursor_spec = &ion.simulated_spectrum;
355
356 if transmission.any_transmitted(
357 *frame as i32,
358 *scan as i32,
359 &precursor_spec.mz,
360 Some(0.5),
361 ) {
362 let collision_energy =
363 transmission.get_collision_energy(*frame as i32, *scan as i32).unwrap_or(0.0);
364
365 let quantized_energy = (collision_energy * 100.0).round() as i32;
366
367 ret_tree.insert((
368 ion.peptide_id,
369 ion.ion_id,
370 peptide.sequence.sequence.clone(),
371 ion.charge,
372 quantized_energy,
373 ));
374 }
375 }
376 }
377 }
378 ret_tree
379 }
380
381 fn ion_map_fn_dia(
382 ion: IonSim,
383 peptide_map: &BTreeMap<u32, PeptidesSim>,
384 precursor_frames: &HashSet<u32>,
385 transmission: &TimsTransmissionDIA,
386 collision_energy: &TimsTofCollisionEnergyDIA,
387 ) -> BTreeSet<(u32, u32, String, i8, i32)> {
388 let peptide = peptide_map.get(&ion.peptide_id).unwrap();
389 let mut ret_tree: BTreeSet<(u32, u32, String, i8, i32)> = BTreeSet::new();
390
391 for frame in peptide.frame_distribution.occurrence.iter() {
393 if !precursor_frames.contains(frame) {
395 for scan in &ion.scan_distribution.occurrence {
397 let precursor_spec = &ion.simulated_spectrum;
400
401 if transmission.any_transmitted(
402 *frame as i32,
403 *scan as i32,
404 &precursor_spec.mz,
405 Some(0.5),
406 ) {
407 let collision_energy =
408 collision_energy.get_collision_energy(*frame as i32, *scan as i32);
409 let quantized_energy = (collision_energy * 100.0).round() as i32;
410
411 ret_tree.insert((
412 ion.peptide_id,
413 ion.ion_id,
414 peptide.sequence.sequence.clone(),
415 ion.charge,
416 quantized_energy,
417 ));
418 }
419 }
420 }
421 }
422 ret_tree
423 }
424
425 pub fn get_transmitted_ions(
427 &self,
428 num_threads: usize,
429 dda_mode: bool,
430 ) -> (Vec<i32>, Vec<i32>, Vec<String>, Vec<i8>, Vec<f32>) {
431
432 let thread_pool = ThreadPoolBuilder::new()
433 .num_threads(num_threads)
434 .build()
435 .unwrap();
436
437 let peptides = self.read_peptides().unwrap();
438
439 let peptide_map = TimsTofSyntheticsDataHandle::build_peptide_map(&peptides);
440
441 let precursor_frames =
442 TimsTofSyntheticsDataHandle::build_precursor_frame_id_set(&self.read_frames().unwrap());
443
444 let ions = self.read_ions().unwrap();
445
446 let trees = match dda_mode {
447 true => {
448 let transmission = self.get_transmission_dda();
449 thread_pool.install(|| {
450 ions.par_iter()
451 .map(|ion| {
452 TimsTofSyntheticsDataHandle::ion_map_fn_dda(
453 ion.clone(),
454 &peptide_map,
455 &precursor_frames,
456 &transmission,
457 )
458 })
459 .collect::<Vec<_>>()
460 })
461 },
462 false => {
463 let transmission = self.get_transmission_dia();
464 let collision_energy = self.get_collision_energy_dia();
465 thread_pool.install(|| {
466 ions.par_iter()
467 .map(|ion| {
468 TimsTofSyntheticsDataHandle::ion_map_fn_dia(
469 ion.clone(),
470 &peptide_map,
471 &precursor_frames,
472 &transmission,
473 &collision_energy,
474 )
475 })
476 .collect::<Vec<_>>()
477 })
478 },
479 };
480
481 let mut ret_tree: BTreeSet<(u32, u32, String, i8, i32)> = BTreeSet::new();
482 for tree in trees {
483 ret_tree.extend(tree);
484 }
485
486 let mut ret_peptide_id = Vec::new();
487 let mut ret_ion_id = Vec::new();
488 let mut ret_sequence = Vec::new();
489 let mut ret_charge = Vec::new();
490 let mut ret_energy = Vec::new();
491
492 for (peptide_id, ion_id, sequence, charge, energy) in ret_tree {
493 ret_peptide_id.push(peptide_id as i32);
494 ret_ion_id.push(ion_id as i32);
495 ret_sequence.push(sequence);
496 ret_charge.push(charge);
497 ret_energy.push(energy as f32 / 100.0);
498 }
499
500 (
501 ret_peptide_id,
502 ret_ion_id,
503 ret_sequence,
504 ret_charge,
505 ret_energy,
506 )
507 }
508
509 pub fn build_peptide_to_ion_map(ions: &Vec<IonSim>) -> BTreeMap<u32, Vec<IonSim>> {
511 let mut ion_map = BTreeMap::new();
512 for ion in ions.iter() {
513 let ions = ion_map.entry(ion.peptide_id).or_insert_with(Vec::new);
514 ions.push(ion.clone());
515 }
516 ion_map
517 }
518
519 pub fn build_peptide_map(peptides: &Vec<PeptidesSim>) -> BTreeMap<u32, PeptidesSim> {
521 let mut peptide_map = BTreeMap::new();
522 for peptide in peptides.iter() {
523 peptide_map.insert(peptide.peptide_id, peptide.clone());
524 }
525 peptide_map
526 }
527
528 pub fn build_precursor_frame_id_set(frames: &Vec<FramesSim>) -> HashSet<u32> {
530 frames
531 .iter()
532 .filter(|frame| frame.parse_ms_type() == MsType::Precursor)
533 .map(|frame| frame.frame_id)
534 .collect()
535 }
536
537 pub fn build_peptide_to_events(peptides: &Vec<PeptidesSim>) -> BTreeMap<u32, f32> {
539 let mut peptide_to_events = BTreeMap::new();
540 for peptide in peptides.iter() {
541 peptide_to_events.insert(peptide.peptide_id, peptide.events);
542 }
543 peptide_to_events
544 }
545
546 pub fn build_frame_to_rt(frames: &Vec<FramesSim>) -> BTreeMap<u32, f32> {
548 let mut frame_to_rt = BTreeMap::new();
549 for frame in frames.iter() {
550 frame_to_rt.insert(frame.frame_id, frame.time);
551 }
552 frame_to_rt
553 }
554
555 pub fn build_scan_to_mobility(scans: &Vec<ScansSim>) -> BTreeMap<u32, f32> {
557 let mut scan_to_mobility = BTreeMap::new();
558 for scan in scans.iter() {
559 scan_to_mobility.insert(scan.scan, scan.mobility);
560 }
561 scan_to_mobility
562 }
563 pub fn build_frame_to_abundances(
564 peptides: &Vec<PeptidesSim>,
565 ) -> BTreeMap<u32, (Vec<u32>, Vec<f32>)> {
566 let mut frame_to_abundances = BTreeMap::new();
567
568 for peptide in peptides.iter() {
569 let peptide_id = peptide.peptide_id;
570 let frame_occurrence = peptide.frame_distribution.occurrence.clone();
571 let frame_abundance = peptide.frame_distribution.abundance.clone();
572
573 for (frame_id, abundance) in frame_occurrence.iter().zip(frame_abundance.iter()) {
574 if *abundance > 1e-6 {
577 let (occurrences, abundances) = frame_to_abundances
578 .entry(*frame_id)
579 .or_insert((vec![], vec![]));
580 occurrences.push(peptide_id);
581 abundances.push(*abundance);
582 }
583 }
584 }
585
586 frame_to_abundances
587 }
588 pub fn build_peptide_to_ions(
589 ions: &Vec<IonSim>,
590 ) -> BTreeMap<
591 u32,
592 (
593 Vec<f32>,
594 Vec<Vec<u32>>,
595 Vec<Vec<f32>>,
596 Vec<i8>,
597 Vec<MzSpectrum>,
598 ),
599 > {
600 let mut peptide_to_ions = BTreeMap::new();
601
602 for ion in ions.iter() {
603 let peptide_id = ion.peptide_id;
604 let abundance = ion.relative_abundance;
605 let scan_occurrence = ion.scan_distribution.occurrence.clone();
606 let scan_abundance = ion.scan_distribution.abundance.clone();
607 let charge = ion.charge;
608 let spectrum = ion.simulated_spectrum.clone();
609
610 let (abundances, scan_occurrences, scan_abundances, charges, spectra) = peptide_to_ions
611 .entry(peptide_id)
612 .or_insert((vec![], vec![], vec![], vec![], vec![]));
613 abundances.push(abundance);
614 scan_occurrences.push(scan_occurrence);
615 scan_abundances.push(scan_abundance);
616 charges.push(charge);
617 spectra.push(spectrum);
618 }
619
620 peptide_to_ions
621 }
622 pub fn build_fragment_ions(
623 peptides_sim: &BTreeMap<u32, PeptidesSim>,
624 fragment_ions: &Vec<FragmentIonSim>,
625 num_threads: usize,
626 ) -> BTreeMap<(u32, i8, i32), (PeptideProductIonSeriesCollection, Vec<MzSpectrum>)> {
627 let thread_pool = ThreadPoolBuilder::new()
628 .num_threads(num_threads)
629 .build()
630 .unwrap();
631 let fragment_ion_map = thread_pool.install(|| {
632 fragment_ions
633 .par_iter()
634 .map(|fragment_ion| {
635 let key = (
636 fragment_ion.peptide_id,
637 fragment_ion.charge,
638 (fragment_ion.collision_energy * 1e3).round() as i32,
639 );
640
641 let value = peptides_sim
642 .get(&fragment_ion.peptide_id)
643 .unwrap()
644 .sequence
645 .associate_with_predicted_intensities(
646 fragment_ion.charge as i32,
647 FragmentType::B,
648 fragment_ion.to_dense(174),
649 true,
650 true,
651 );
652
653 let fragment_ions: Vec<MzSpectrum> = value
654 .peptide_ions
655 .par_iter()
656 .map(|ion_series| {
657 ion_series.generate_isotopic_spectrum(1e-2, 1e-3, 100, 1e-5)
658 })
659 .collect();
660 (key, (value, fragment_ions))
661 })
662 .collect::<BTreeMap<_, _>>() });
664
665 fragment_ion_map
666 }
667
668 pub fn build_fragment_ions_annotated(
669 peptides_sim: &BTreeMap<u32, PeptidesSim>,
670 fragment_ions: &Vec<FragmentIonSim>,
671 num_threads: usize,
672 ) -> BTreeMap<(u32, i8, i32), (PeptideProductIonSeriesCollection, Vec<MzSpectrumAnnotated>)>
673 {
674 let thread_pool = ThreadPoolBuilder::new()
675 .num_threads(num_threads)
676 .build()
677 .unwrap();
678 let fragment_ion_map = thread_pool.install(|| {
679 fragment_ions
680 .par_iter()
681 .map(|fragment_ion| {
682 let key = (
683 fragment_ion.peptide_id,
684 fragment_ion.charge,
685 (fragment_ion.collision_energy * 1e3).round() as i32,
686 );
687
688 let value = peptides_sim
689 .get(&fragment_ion.peptide_id)
690 .unwrap()
691 .sequence
692 .associate_with_predicted_intensities(
693 fragment_ion.charge as i32,
694 FragmentType::B,
695 fragment_ion.to_dense(174),
696 true,
697 true,
698 );
699
700 let fragment_ions: Vec<MzSpectrumAnnotated> = value
701 .peptide_ions
702 .par_iter()
703 .map(|ion_series| {
704 ion_series.generate_isotopic_spectrum_annotated(1e-2, 1e-3, 100, 1e-5)
705 })
706 .collect();
707 (key, (value, fragment_ions))
708 })
709 .collect::<BTreeMap<_, _>>() });
711
712 fragment_ion_map
713 }
714}