rustdf/data/
dda.rs

1use crate::data::acquisition::AcquisitionMode;
2use crate::data::handle::{IndexConverter, TimsData, TimsDataLoader};
3use crate::data::meta::{read_dda_precursor_meta, read_global_meta_sql, read_meta_data_sql, read_pasef_frame_ms_ms_info, DDAPrecursor, PasefMsMsMeta};
4use mscore::timstof::frame::{ImsFrame, RawTimsFrame, TimsFrame};
5use mscore::timstof::slice::TimsSlice;
6use rayon::prelude::*;
7use rayon::ThreadPoolBuilder;
8use std::collections::BTreeMap;
9use rand::prelude::IteratorRandom;
10use mscore::data::spectrum::MsType;
11
12#[derive(Clone)]
13pub struct PASEFDDAFragment {
14    pub frame_id: u32,
15    pub precursor_id: u32,
16    pub collision_energy: f64,
17    pub selected_fragment: TimsFrame,
18}
19
20pub struct TimsDatasetDDA {
21    pub loader: TimsDataLoader,
22    pub pasef_meta: Vec<PasefMsMsMeta>,
23}
24
25impl TimsDatasetDDA {
26    pub fn new(
27        bruker_lib_path: &str,
28        data_path: &str,
29        in_memory: bool,
30        use_bruker_sdk: bool,
31    ) -> Self {
32        // TODO: error handling
33        let global_meta_data = read_global_meta_sql(data_path).unwrap();
34        let meta_data = read_meta_data_sql(data_path).unwrap();
35
36        let scan_max_index = meta_data.iter().map(|x| x.num_scans).max().unwrap() as u32;
37        let im_lower = global_meta_data.one_over_k0_range_lower;
38        let im_upper = global_meta_data.one_over_k0_range_upper;
39
40        let tof_max_index = global_meta_data.tof_max_index;
41        let mz_lower = global_meta_data.mz_acquisition_range_lower;
42        let mz_upper = global_meta_data.mz_acquisition_range_upper;
43
44        let loader = match in_memory {
45            true => TimsDataLoader::new_in_memory(
46                bruker_lib_path,
47                data_path,
48                use_bruker_sdk,
49                scan_max_index,
50                im_lower,
51                im_upper,
52                tof_max_index,
53                mz_lower,
54                mz_upper,
55            ),
56            false => TimsDataLoader::new_lazy(
57                bruker_lib_path,
58                data_path,
59                use_bruker_sdk,
60                scan_max_index,
61                im_lower,
62                im_upper,
63                tof_max_index,
64                mz_lower,
65                mz_upper,
66            ),
67        };
68        
69        let pasef_meta = read_pasef_frame_ms_ms_info(data_path).unwrap();
70        
71        TimsDatasetDDA { loader, pasef_meta }
72    }
73
74    pub fn get_selected_precursors(&self) -> Vec<DDAPrecursor> {
75        let precursor_meta = read_dda_precursor_meta(&self.loader.get_data_path()).unwrap();
76        let pasef_meta = &self.pasef_meta;
77
78        let precursor_id_to_pasef_meta: BTreeMap<i64, &PasefMsMsMeta> = pasef_meta
79            .iter()
80            .map(|x| (x.precursor_id as i64, x))
81            .collect();
82
83        // go over all precursors and get the precursor meta data
84        let result: Vec<_> = precursor_meta
85            .iter()
86            .map(|precursor| {
87                let pasef_meta = precursor_id_to_pasef_meta
88                    .get(&precursor.precursor_id)
89                    .unwrap();
90                DDAPrecursor {
91                    frame_id: precursor.precursor_frame_id,
92                    precursor_id: precursor.precursor_id,
93                    mono_mz: precursor.precursor_mz_monoisotopic,
94                    highest_intensity_mz: precursor.precursor_mz_highest_intensity,
95                    average_mz: precursor.precursor_mz_average,
96                    charge: precursor.precursor_charge,
97                    inverse_ion_mobility: self.scan_to_inverse_mobility(
98                        precursor.precursor_frame_id as u32,
99                        &vec![precursor.precursor_average_scan_number as u32],
100                    )[0],
101                    collision_energy: pasef_meta.collision_energy,
102                    precuror_total_intensity: precursor.precursor_total_intensity,
103                    isolation_mz: pasef_meta.isolation_mz,
104                    isolation_width: pasef_meta.isolation_width,
105                }
106            })
107            .collect();
108
109        result
110    }
111
112    pub fn get_precursor_frames(
113        &self,
114        min_intensity: f64,
115        max_num_peaks: usize,
116        num_threads: usize,
117    ) -> Vec<TimsFrame> {
118        // get all precursor frames
119        let meta_data = read_meta_data_sql(&self.loader.get_data_path()).unwrap();
120
121        // get the precursor frames
122        let precursor_frames = meta_data.iter().filter(|x| x.ms_ms_type == 0);
123
124        let tims_silce =
125            self.get_slice(precursor_frames.map(|x| x.id as u32).collect(), num_threads);
126
127        let result: Vec<_> = tims_silce
128            .frames
129            .par_iter()
130            .map(|frame| {
131                frame
132                    .filter_ranged(0.0, 2000.0, 0, 2000, 0.0, 5.0, min_intensity, 1e9)
133                    .top_n(max_num_peaks)
134            })
135            .collect();
136
137        result
138    }
139
140    pub fn get_pasef_frame_ms_ms_info(&self) -> Vec<PasefMsMsMeta> {
141        read_pasef_frame_ms_ms_info(&self.loader.get_data_path()).unwrap()
142    }
143
144    /// Get the fragment spectra for all PASEF selected precursors
145    pub fn get_pasef_fragments(&self, num_threads: usize) -> Vec<PASEFDDAFragment> {
146        // extract fragment spectra information
147        let pasef_info = self.get_pasef_frame_ms_ms_info();
148
149        let pool = ThreadPoolBuilder::new()
150            .num_threads(num_threads)
151            .build()
152            .unwrap();
153
154        let filtered_frames = pool.install(|| {
155            let result: Vec<_> = pasef_info
156                .par_iter()
157                .map(|pasef_info| {
158                    // get the frame
159                    let frame = self.loader.get_frame(pasef_info.frame_id as u32);
160
161                    // get five percent of the scan range
162                    let scan_margin = (pasef_info.scan_num_end - pasef_info.scan_num_begin) / 20;
163
164                    // get the fragment spectrum by scan range
165                    let filtered_frame = frame.filter_ranged(
166                        0.0,
167                        2000.0,
168                        (pasef_info.scan_num_begin - scan_margin) as i32,
169                        (pasef_info.scan_num_end + scan_margin) as i32,
170                        0.0,
171                        5.0,
172                        0.0,
173                        1e9,
174                    );
175
176                    PASEFDDAFragment {
177                        frame_id: pasef_info.frame_id as u32,
178                        precursor_id: pasef_info.precursor_id as u32,
179                        collision_energy: pasef_info.collision_energy,
180                        // flatten the spectrum
181                        selected_fragment: filtered_frame,
182                    }
183                })
184                .collect();
185
186            result
187        });
188
189        filtered_frames
190    }
191    
192    pub fn sample_pasef_fragment_random(
193        &self,
194        target_scan_apex: i32,
195        experiment_max_scan: i32,
196    ) -> TimsFrame {
197        let pasef_meta = &self.pasef_meta;
198        let random_index = rand::random::<usize>() % pasef_meta.len();
199        let pasef_info = &pasef_meta[random_index];
200        
201        // get the frame
202        let frame = self.loader.get_frame(pasef_info.frame_id as u32);
203        
204        // get five percent of the scan range
205        let scan_margin = (pasef_info.scan_num_end - pasef_info.scan_num_begin) / 20;
206        
207        // get the fragment spectrum by scan range
208        let mut filtered = frame.filter_ranged(
209            0.0,
210            2000.0,
211            (pasef_info.scan_num_begin - scan_margin) as i32,
212            (pasef_info.scan_num_end + scan_margin) as i32,
213            0.0,
214            5.0,
215            0.0,
216            1e9,
217        );
218
219        // Safety check
220        if filtered.scan.is_empty() {
221            return filtered;
222        }
223
224        // Compute median scan
225        let mut scan_copy = filtered.scan.clone();
226        scan_copy.sort_unstable();
227        let median_scan = scan_copy[scan_copy.len() / 2];
228
229        // Compute shift
230        let scan_shift = target_scan_apex - median_scan;
231
232        // Apply shift to scan values
233        for s in filtered.scan.iter_mut() {
234            *s += scan_shift;
235        }
236
237        // Refilter to clip shifted scans that fall outside valid bounds
238        let re_filtered = filtered.filter_ranged(
239            0.0,
240            2000.0,
241            0,
242            experiment_max_scan,
243            0.0,
244            5.0,
245            0.0,
246            1e9,
247        );
248
249        re_filtered
250    }
251    
252    pub fn sample_pasef_fragments_random(
253        &self,
254        target_scan_apex_values: Vec<i32>,
255        experiment_max_scan: i32,
256    ) -> TimsFrame {
257
258        // return empty frame is target_scan_apex_values is empty
259        if target_scan_apex_values.is_empty() {
260            return TimsFrame {
261                frame_id: 0, // Replace with a suitable default value
262                ms_type: MsType::FragmentDda,
263                scan: Vec::new(),
264                tof: Vec::new(),
265                ims_frame: ImsFrame::default(), // Uses the default implementation for `ImsFrame`
266            }
267        }
268        
269        let mut pasef_frames = Vec::new();
270        
271        for target_scan_apex in target_scan_apex_values {
272            let pasef_frame = self.sample_pasef_fragment_random(target_scan_apex, experiment_max_scan);
273            pasef_frames.push(pasef_frame);
274        }
275        
276        // create combined frame by summing the frame structures, they override add
277        let mut combined_frame = pasef_frames[0].clone();
278        
279        for frame in pasef_frames.iter().skip(1) {
280            combined_frame = combined_frame + frame.clone();
281        }
282
283        // re-calculate ion mobility
284        let im_values = self.scan_to_inverse_mobility(
285            combined_frame.frame_id as u32,
286            &combined_frame.scan.iter().map(|x| *x as u32).collect(),
287        );
288
289        // Update the inverse mobility values
290        combined_frame.ims_frame.mobility = im_values;
291        
292        combined_frame
293    }
294
295    pub fn sample_precursor_signal(
296        &self,
297        num_frames: usize,
298        max_intensity: f64,
299        take_probability: f64,
300    ) -> TimsFrame {
301        // get all precursor frames
302        let meta_data = read_meta_data_sql(&self.loader.get_data_path()).unwrap();
303        let precursor_frames = meta_data.iter().filter(|x| x.ms_ms_type == 0);
304
305        // randomly sample num_frames
306        let mut rng = rand::thread_rng();
307        let mut sampled_frames: Vec<TimsFrame> = Vec::new();
308
309        // go through each frame and sample the data
310        for frame in precursor_frames.choose_multiple(&mut rng, num_frames) {
311            let frame_id = frame.id;
312            let frame_data = self
313                .loader
314                .get_frame(frame_id as u32)
315                .filter_ranged(0.0, 2000.0, 0, 1000, 0.0, 5.0, 1.0, max_intensity)
316                .generate_random_sample(take_probability);
317            sampled_frames.push(frame_data);
318        }
319
320        // get the first frame
321        let mut sampled_frame = sampled_frames.remove(0);
322
323        // sum all the other frames to the first frame
324        for frame in sampled_frames {
325            sampled_frame = sampled_frame + frame;
326        }
327
328        sampled_frame
329    }
330}
331
332impl TimsData for TimsDatasetDDA {
333    fn get_frame(&self, frame_id: u32) -> TimsFrame {
334        self.loader.get_frame(frame_id)
335    }
336
337    fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
338        self.loader.get_raw_frame(frame_id)
339    }
340
341    fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
342        self.loader.get_slice(frame_ids, num_threads)
343    }
344
345    fn get_acquisition_mode(&self) -> AcquisitionMode {
346        self.loader.get_acquisition_mode().clone()
347    }
348
349    fn get_frame_count(&self) -> i32 {
350        self.loader.get_frame_count()
351    }
352
353    fn get_data_path(&self) -> &str {
354        &self.loader.get_data_path()
355    }
356}
357
358impl IndexConverter for TimsDatasetDDA {
359    fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
360        self.loader
361            .get_index_converter()
362            .tof_to_mz(frame_id, tof_values)
363    }
364
365    fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
366        self.loader
367            .get_index_converter()
368            .mz_to_tof(frame_id, mz_values)
369    }
370
371    fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
372        self.loader
373            .get_index_converter()
374            .scan_to_inverse_mobility(frame_id, scan_values)
375    }
376
377    fn inverse_mobility_to_scan(
378        &self,
379        frame_id: u32,
380        inverse_mobility_values: &Vec<f64>,
381    ) -> Vec<u32> {
382        self.loader
383            .get_index_converter()
384            .inverse_mobility_to_scan(frame_id, inverse_mobility_values)
385    }
386}