1use crate::data::acquisition::AcquisitionMode;
2use crate::data::handle::{IndexConverter, TimsData, TimsDataLoader};
3use crate::data::meta::{read_dda_precursor_meta, read_global_meta_sql, read_meta_data_sql, read_pasef_frame_ms_ms_info, DDAPrecursor, PasefMsMsMeta};
4use mscore::timstof::frame::{ImsFrame, RawTimsFrame, TimsFrame};
5use mscore::timstof::slice::TimsSlice;
6use rayon::prelude::*;
7use rayon::ThreadPoolBuilder;
8use std::collections::BTreeMap;
9use rand::prelude::IteratorRandom;
10use mscore::data::spectrum::MsType;
11
12#[derive(Clone)]
13pub struct PASEFDDAFragment {
14 pub frame_id: u32,
15 pub precursor_id: u32,
16 pub collision_energy: f64,
17 pub selected_fragment: TimsFrame,
18}
19
20pub struct TimsDatasetDDA {
21 pub loader: TimsDataLoader,
22 pub pasef_meta: Vec<PasefMsMsMeta>,
23}
24
25impl TimsDatasetDDA {
26 pub fn new(
27 bruker_lib_path: &str,
28 data_path: &str,
29 in_memory: bool,
30 use_bruker_sdk: bool,
31 ) -> Self {
32 let global_meta_data = read_global_meta_sql(data_path).unwrap();
34 let meta_data = read_meta_data_sql(data_path).unwrap();
35
36 let scan_max_index = meta_data.iter().map(|x| x.num_scans).max().unwrap() as u32;
37 let im_lower = global_meta_data.one_over_k0_range_lower;
38 let im_upper = global_meta_data.one_over_k0_range_upper;
39
40 let tof_max_index = global_meta_data.tof_max_index;
41 let mz_lower = global_meta_data.mz_acquisition_range_lower;
42 let mz_upper = global_meta_data.mz_acquisition_range_upper;
43
44 let loader = match in_memory {
45 true => TimsDataLoader::new_in_memory(
46 bruker_lib_path,
47 data_path,
48 use_bruker_sdk,
49 scan_max_index,
50 im_lower,
51 im_upper,
52 tof_max_index,
53 mz_lower,
54 mz_upper,
55 ),
56 false => TimsDataLoader::new_lazy(
57 bruker_lib_path,
58 data_path,
59 use_bruker_sdk,
60 scan_max_index,
61 im_lower,
62 im_upper,
63 tof_max_index,
64 mz_lower,
65 mz_upper,
66 ),
67 };
68
69 let pasef_meta = read_pasef_frame_ms_ms_info(data_path).unwrap();
70
71 TimsDatasetDDA { loader, pasef_meta }
72 }
73
74 pub fn get_selected_precursors(&self) -> Vec<DDAPrecursor> {
75 let precursor_meta = read_dda_precursor_meta(&self.loader.get_data_path()).unwrap();
76 let pasef_meta = &self.pasef_meta;
77
78 let precursor_id_to_pasef_meta: BTreeMap<i64, &PasefMsMsMeta> = pasef_meta
79 .iter()
80 .map(|x| (x.precursor_id as i64, x))
81 .collect();
82
83 let result: Vec<_> = precursor_meta
85 .iter()
86 .map(|precursor| {
87 let pasef_meta = precursor_id_to_pasef_meta
88 .get(&precursor.precursor_id)
89 .unwrap();
90 DDAPrecursor {
91 frame_id: precursor.precursor_frame_id,
92 precursor_id: precursor.precursor_id,
93 mono_mz: precursor.precursor_mz_monoisotopic,
94 highest_intensity_mz: precursor.precursor_mz_highest_intensity,
95 average_mz: precursor.precursor_mz_average,
96 charge: precursor.precursor_charge,
97 inverse_ion_mobility: self.scan_to_inverse_mobility(
98 precursor.precursor_frame_id as u32,
99 &vec![precursor.precursor_average_scan_number as u32],
100 )[0],
101 collision_energy: pasef_meta.collision_energy,
102 precuror_total_intensity: precursor.precursor_total_intensity,
103 isolation_mz: pasef_meta.isolation_mz,
104 isolation_width: pasef_meta.isolation_width,
105 }
106 })
107 .collect();
108
109 result
110 }
111
112 pub fn get_precursor_frames(
113 &self,
114 min_intensity: f64,
115 max_num_peaks: usize,
116 num_threads: usize,
117 ) -> Vec<TimsFrame> {
118 let meta_data = read_meta_data_sql(&self.loader.get_data_path()).unwrap();
120
121 let precursor_frames = meta_data.iter().filter(|x| x.ms_ms_type == 0);
123
124 let tims_silce =
125 self.get_slice(precursor_frames.map(|x| x.id as u32).collect(), num_threads);
126
127 let result: Vec<_> = tims_silce
128 .frames
129 .par_iter()
130 .map(|frame| {
131 frame
132 .filter_ranged(0.0, 2000.0, 0, 2000, 0.0, 5.0, min_intensity, 1e9)
133 .top_n(max_num_peaks)
134 })
135 .collect();
136
137 result
138 }
139
140 pub fn get_pasef_frame_ms_ms_info(&self) -> Vec<PasefMsMsMeta> {
141 read_pasef_frame_ms_ms_info(&self.loader.get_data_path()).unwrap()
142 }
143
144 pub fn get_pasef_fragments(&self, num_threads: usize) -> Vec<PASEFDDAFragment> {
146 let pasef_info = self.get_pasef_frame_ms_ms_info();
148
149 let pool = ThreadPoolBuilder::new()
150 .num_threads(num_threads)
151 .build()
152 .unwrap();
153
154 let filtered_frames = pool.install(|| {
155 let result: Vec<_> = pasef_info
156 .par_iter()
157 .map(|pasef_info| {
158 let frame = self.loader.get_frame(pasef_info.frame_id as u32);
160
161 let scan_margin = (pasef_info.scan_num_end - pasef_info.scan_num_begin) / 20;
163
164 let filtered_frame = frame.filter_ranged(
166 0.0,
167 2000.0,
168 (pasef_info.scan_num_begin - scan_margin) as i32,
169 (pasef_info.scan_num_end + scan_margin) as i32,
170 0.0,
171 5.0,
172 0.0,
173 1e9,
174 );
175
176 PASEFDDAFragment {
177 frame_id: pasef_info.frame_id as u32,
178 precursor_id: pasef_info.precursor_id as u32,
179 collision_energy: pasef_info.collision_energy,
180 selected_fragment: filtered_frame,
182 }
183 })
184 .collect();
185
186 result
187 });
188
189 filtered_frames
190 }
191
192 pub fn sample_pasef_fragment_random(
193 &self,
194 target_scan_apex: i32,
195 experiment_max_scan: i32,
196 ) -> TimsFrame {
197 let pasef_meta = &self.pasef_meta;
198 let random_index = rand::random::<usize>() % pasef_meta.len();
199 let pasef_info = &pasef_meta[random_index];
200
201 let frame = self.loader.get_frame(pasef_info.frame_id as u32);
203
204 let scan_margin = (pasef_info.scan_num_end - pasef_info.scan_num_begin) / 20;
206
207 let mut filtered = frame.filter_ranged(
209 0.0,
210 2000.0,
211 (pasef_info.scan_num_begin - scan_margin) as i32,
212 (pasef_info.scan_num_end + scan_margin) as i32,
213 0.0,
214 5.0,
215 0.0,
216 1e9,
217 );
218
219 if filtered.scan.is_empty() {
221 return filtered;
222 }
223
224 let mut scan_copy = filtered.scan.clone();
226 scan_copy.sort_unstable();
227 let median_scan = scan_copy[scan_copy.len() / 2];
228
229 let scan_shift = target_scan_apex - median_scan;
231
232 for s in filtered.scan.iter_mut() {
234 *s += scan_shift;
235 }
236
237 let re_filtered = filtered.filter_ranged(
239 0.0,
240 2000.0,
241 0,
242 experiment_max_scan,
243 0.0,
244 5.0,
245 0.0,
246 1e9,
247 );
248
249 re_filtered
250 }
251
252 pub fn sample_pasef_fragments_random(
253 &self,
254 target_scan_apex_values: Vec<i32>,
255 experiment_max_scan: i32,
256 ) -> TimsFrame {
257
258 if target_scan_apex_values.is_empty() {
260 return TimsFrame {
261 frame_id: 0, ms_type: MsType::FragmentDda,
263 scan: Vec::new(),
264 tof: Vec::new(),
265 ims_frame: ImsFrame::default(), }
267 }
268
269 let mut pasef_frames = Vec::new();
270
271 for target_scan_apex in target_scan_apex_values {
272 let pasef_frame = self.sample_pasef_fragment_random(target_scan_apex, experiment_max_scan);
273 pasef_frames.push(pasef_frame);
274 }
275
276 let mut combined_frame = pasef_frames[0].clone();
278
279 for frame in pasef_frames.iter().skip(1) {
280 combined_frame = combined_frame + frame.clone();
281 }
282
283 let im_values = self.scan_to_inverse_mobility(
285 combined_frame.frame_id as u32,
286 &combined_frame.scan.iter().map(|x| *x as u32).collect(),
287 );
288
289 combined_frame.ims_frame.mobility = im_values;
291
292 combined_frame
293 }
294
295 pub fn sample_precursor_signal(
296 &self,
297 num_frames: usize,
298 max_intensity: f64,
299 take_probability: f64,
300 ) -> TimsFrame {
301 let meta_data = read_meta_data_sql(&self.loader.get_data_path()).unwrap();
303 let precursor_frames = meta_data.iter().filter(|x| x.ms_ms_type == 0);
304
305 let mut rng = rand::thread_rng();
307 let mut sampled_frames: Vec<TimsFrame> = Vec::new();
308
309 for frame in precursor_frames.choose_multiple(&mut rng, num_frames) {
311 let frame_id = frame.id;
312 let frame_data = self
313 .loader
314 .get_frame(frame_id as u32)
315 .filter_ranged(0.0, 2000.0, 0, 1000, 0.0, 5.0, 1.0, max_intensity)
316 .generate_random_sample(take_probability);
317 sampled_frames.push(frame_data);
318 }
319
320 let mut sampled_frame = sampled_frames.remove(0);
322
323 for frame in sampled_frames {
325 sampled_frame = sampled_frame + frame;
326 }
327
328 sampled_frame
329 }
330}
331
332impl TimsData for TimsDatasetDDA {
333 fn get_frame(&self, frame_id: u32) -> TimsFrame {
334 self.loader.get_frame(frame_id)
335 }
336
337 fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
338 self.loader.get_raw_frame(frame_id)
339 }
340
341 fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
342 self.loader.get_slice(frame_ids, num_threads)
343 }
344
345 fn get_acquisition_mode(&self) -> AcquisitionMode {
346 self.loader.get_acquisition_mode().clone()
347 }
348
349 fn get_frame_count(&self) -> i32 {
350 self.loader.get_frame_count()
351 }
352
353 fn get_data_path(&self) -> &str {
354 &self.loader.get_data_path()
355 }
356}
357
358impl IndexConverter for TimsDatasetDDA {
359 fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
360 self.loader
361 .get_index_converter()
362 .tof_to_mz(frame_id, tof_values)
363 }
364
365 fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
366 self.loader
367 .get_index_converter()
368 .mz_to_tof(frame_id, mz_values)
369 }
370
371 fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
372 self.loader
373 .get_index_converter()
374 .scan_to_inverse_mobility(frame_id, scan_values)
375 }
376
377 fn inverse_mobility_to_scan(
378 &self,
379 frame_id: u32,
380 inverse_mobility_values: &Vec<f64>,
381 ) -> Vec<u32> {
382 self.loader
383 .get_index_converter()
384 .inverse_mobility_to_scan(frame_id, inverse_mobility_values)
385 }
386}