mscore/timstof/
slice.rs

1use rayon::prelude::*;
2use rayon::ThreadPoolBuilder;
3
4use std::collections::BTreeMap;
5use std::collections::BTreeSet;
6use itertools::multizip;
7
8use crate::data::spectrum::{MsType, Vectorized, ToResolution};
9use crate::timstof::spectrum::{TimsSpectrum};
10use crate::timstof::frame::{ImsFrame, TimsFrame, TimsFrameVectorized};
11
12#[derive(Clone)]
13pub struct TimsSlice {
14    pub frames: Vec<TimsFrame>,
15}
16
17impl TimsSlice {
18
19    /// Create a new TimsSlice from a vector of TimsFrames
20    ///
21    /// # Arguments
22    ///
23    /// * `frames` - A vector of TimsFrames
24    ///
25    /// # Returns
26    ///
27    /// * `TimsSlice` - A TimsSlice containing the TimsFrames
28    ///
29    /// # Example
30    ///
31    /// ```
32    /// use mscore::timstof::slice::TimsSlice;
33    ///
34    /// let slice = TimsSlice::new(vec![]);
35    /// ```
36    pub fn new(frames: Vec<TimsFrame>) -> Self {
37        TimsSlice { frames }
38    }
39
40    /// Filter the TimsSlice by m/z, scan, and intensity
41    ///
42    /// # Arguments
43    ///
44    /// * `mz_min` - The minimum m/z value
45    /// * `mz_max` - The maximum m/z value
46    /// * `scan_min` - The minimum scan value
47    /// * `scan_max` - The maximum scan value
48    /// * `intensity_min` - The minimum intensity value
49    /// * `intensity_max` - The maximum intensity value
50    /// * `num_threads` - The number of threads to use
51    ///
52    /// # Returns
53    ///
54    /// * `TimsSlice` - A TimsSlice containing only the TimsFrames that pass the filter
55    ///
56    /// # Example
57    ///
58    /// ```
59    /// use mscore::timstof::slice::TimsSlice;
60    ///
61    /// let slice = TimsSlice::new(vec![]);
62    /// let filtered_slice = slice.filter_ranged(100.0, 1000.0, 1, 100, 0.0, 1.0, 100.0, 10000.0, 0, 10000, 1);
63    /// ```
64    pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, scan_min: i32, scan_max: i32, inv_mob_min: f64, inv_mob_max: f64, intensity_min: f64, intensity_max: f64, tof_min: i32, tof_max: i32, num_threads: usize) -> TimsSlice {
65
66        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
67
68        // Use the thread pool
69        let filtered_frames = pool.install(|| {
70            let result: Vec<_> =  self.frames.par_iter()
71                .map(|f| f.filter_ranged(mz_min, mz_max, scan_min, scan_max, inv_mob_min, inv_mob_max, intensity_min, intensity_max, tof_min, tof_max))
72                .collect();
73            result
74        });
75
76        TimsSlice { frames: filtered_frames }
77    }
78
79    pub fn filter_ranged_ms_type_specific(&self,
80                                          mz_min_ms1: f64,
81                                          mz_max_ms1: f64,
82                                          scan_min_ms1: i32,
83                                          scan_max_ms1: i32,
84                                          inv_mob_min_ms1: f64,
85                                          inv_mob_max_ms1: f64,
86                                          intensity_min_ms1: f64,
87                                          intensity_max_ms1: f64,
88                                          tof_min_ms1: i32,
89                                          tof_max_ms1: i32,
90                                          mz_min_ms2: f64,
91                                          mz_max_ms2: f64,
92                                          scan_min_ms2: i32,
93                                          scan_max_ms2: i32,
94                                          inv_mob_min_ms2: f64,
95                                          inv_mob_max_ms2: f64,
96                                          intensity_min_ms2: f64,
97                                          intensity_max_ms2: f64,
98                                          tof_min_ms2: i32,
99                                          tof_max_ms2: i32,
100                                          num_threads: usize) -> TimsSlice {
101
102        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
103
104        // Use the thread pool
105        let filtered_frames = pool.install(|| {
106            let result: Vec<_> =  self.frames.par_iter()
107                .map(|f| match f.ms_type {
108                    MsType::Precursor => f.filter_ranged(mz_min_ms1, mz_max_ms1, scan_min_ms1, scan_max_ms1, inv_mob_min_ms1, inv_mob_max_ms1, intensity_min_ms1, intensity_max_ms1, tof_min_ms1, tof_max_ms1),
109                    _ => f.filter_ranged(mz_min_ms2, mz_max_ms2, scan_min_ms2, scan_max_ms2, inv_mob_min_ms2, inv_mob_max_ms2, intensity_min_ms2, intensity_max_ms2, tof_min_ms2, tof_max_ms2),
110                })
111                .collect();
112            result
113        });
114
115        TimsSlice { frames: filtered_frames }
116    }
117
118    /// Get a vector of TimsFrames by MsType
119    ///
120    /// # Arguments
121    ///
122    /// * `t` - The MsType to filter by
123    ///
124    /// # Returns
125    ///
126    /// * `TimsSlice` - A TimsSlice containing only the TimsFrames of the specified MsType
127    pub fn get_slice_by_type(&self, t: MsType) -> TimsSlice {
128        let filtered_frames = self.frames.iter()
129            .filter(|f| f.ms_type == t)
130            .map(|f| f.clone())
131            .collect();
132        TimsSlice { frames: filtered_frames }
133    }
134
135    pub fn to_resolution(&self, resolution: i32, num_threads: usize) -> TimsSlice {
136
137        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
138
139        // Use the thread pool
140        let result_frames = pool.install(|| {
141            let result: Vec<_> =  self.frames.par_iter()
142                .map(|f| f.to_resolution(resolution))
143                .collect();
144            result
145        });
146
147        TimsSlice { frames: result_frames }
148    }
149
150    pub fn vectorized(&self, resolution: i32, num_threads: usize) -> TimsSliceVectorized {
151
152        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
153
154        // Use the thread pool
155        let result_frames = pool.install(|| {
156            let result: Vec<_> =  self.frames.par_iter()
157                .map(|f| f.vectorized(resolution))
158                .collect();
159            result
160        });
161
162        let frame_map = get_index_map(&result_frames);
163
164        TimsSliceVectorized { frames: result_frames, frame_map }
165    }
166
167    pub fn from_flat_slice(frame_ids: Vec<i32>,
168                           scans: Vec<i32>,
169                           tofs: Vec<i32>,
170                           retention_times: Vec<f64>,
171                           mobilities: Vec<f64>,
172                           mzs: Vec<f64>,
173                           intensities: Vec<f64>) -> Self {
174
175        let mut frames = Vec::new();
176        let unique_frame_ids: BTreeSet<_> = frame_ids.iter().cloned().collect();
177
178        for frame_id in unique_frame_ids {
179            let indices: Vec<usize> = frame_ids.iter().enumerate().filter(|(_, &x)| x == frame_id).map(|(i, _)| i).collect();
180            let mut scan = Vec::new();
181            let mut tof = Vec::new();
182            let mut retention_time = Vec::new();
183            let mut mobility = Vec::new();
184            let mut mz = Vec::new();
185            let mut intensity = Vec::new();
186
187            for index in indices {
188                scan.push(scans[index]);
189                tof.push(tofs[index]);
190                retention_time.push(retention_times[index]);
191                mobility.push(mobilities[index]);
192                mz.push(mzs[index]);
193                intensity.push(intensities[index]);
194            }
195
196            let ims_frame = ImsFrame::new(
197                retention_time[0],
198                mobility,
199                mz,
200                intensity,
201            );
202
203            let tims_frame = TimsFrame {
204                frame_id,
205                ms_type: MsType::Unknown,
206                scan,
207                tof,
208                ims_frame,
209            };
210
211            frames.push(tims_frame);
212        }
213
214        TimsSlice { frames }
215    }
216
217    pub fn flatten(&self) -> TimsSliceFlat {
218        let mut frame_ids = Vec::new();
219        let mut scans = Vec::new();
220        let mut tofs = Vec::new();
221        let mut retention_times = Vec::new();
222        let mut mobilities = Vec::new();
223        let mut mzs = Vec::new();
224        let mut intensities = Vec::new();
225
226        for frame in &self.frames {
227            let length = frame.scan.len();
228            frame_ids.extend(vec![frame.frame_id; length].into_iter());
229            scans.extend(frame.scan.clone());
230            tofs.extend(frame.tof.clone());
231            retention_times.extend(vec![frame.ims_frame.retention_time; length].into_iter());
232            mobilities.extend(frame.ims_frame.mobility.iter());
233            mzs.extend(frame.ims_frame.mz.iter());
234            intensities.extend(frame.ims_frame.intensity.iter());
235        }
236
237        TimsSliceFlat {
238            frame_ids,
239            scans,
240            tofs,
241            retention_times,
242            mobilities,
243            mzs,
244            intensities,
245        }
246    }
247
248    pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64, num_threads: usize) -> Vec<TimsSpectrum> {
249        // Create a thread pool
250        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
251
252        // Use the thread pool
253        let windows = pool.install(|| {
254            let windows: Vec<_> = self.frames.par_iter()
255                .flat_map( | frame | frame.to_windows(window_length, overlapping, min_peaks, min_intensity))
256                .collect();
257            windows
258        });
259
260        windows
261    }
262
263    pub fn to_dense_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64, resolution: i32, num_threads: usize) -> Vec<(Vec<f64>, Vec<f64>, Vec<f64>, Vec<i32>, Vec<i32>, usize, usize)> {
264        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
265
266        let result = pool.install(|| {
267            let t = self.frames.par_iter().map(|f| f.to_dense_windows(window_length, overlapping, min_peaks, min_intensity, resolution)).collect::<Vec<_>>();
268            t
269        });
270
271        result
272    }
273
274    pub fn to_tims_planes(&self, tof_max_value: i32, num_chunks: i32, num_threads: usize) -> Vec<TimsPlane> {
275
276        let flat_slice = self.flatten();
277
278        let chunk_size = (tof_max_value as f64 / num_chunks as f64) as i32;
279
280        // Calculate range_and_width based on num_chunks and chunk_size
281        let range_and_width: Vec<(i32, i32)> = (1..=num_chunks)
282            .map(|i| (chunk_size * i, i + 2))
283            .collect();
284
285        let mut tof_map: BTreeMap<(i32, i32), (Vec<i32>, Vec<f64>, Vec<i32>, Vec<f64>, Vec<i32>, Vec<f64>, Vec<f64>)> = BTreeMap::new();
286
287        // Iterate over the data points using multizip
288        for (id, rt, scan, mobility, tof, mz, intensity)
289
290        in multizip((flat_slice.frame_ids, flat_slice.retention_times, flat_slice.scans, flat_slice.mobilities, flat_slice.tofs, flat_slice.mzs, flat_slice.intensities)) {
291
292            for &(switch_point, width) in &range_and_width {
293                if tof < switch_point {
294
295                    let key = (width, (tof as f64 / width as f64).floor() as i32);
296
297                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).0.push(id);
298                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).1.push(rt);
299                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).2.push(scan);
300                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).3.push(mobility);
301                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).4.push(tof);
302                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).5.push(mz);
303                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).6.push(intensity);
304
305                    break
306                }
307            }
308        }
309
310        // Create a thread pool with the desired number of threads
311        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
312
313        let tims_planes: Vec<TimsPlane> = pool.install(|| {
314            tof_map.par_iter()
315                .map(|(key, values)| collapse_entry(key, values))
316                .collect()
317        });
318
319        tims_planes
320    }
321}
322
323#[derive(Clone)]
324pub struct TimsSliceVectorized {
325    pub frames: Vec<TimsFrameVectorized>,
326    pub frame_map: BTreeMap<u32, (Vec<u32>, Vec<u32>, Vec<f32>)>
327}
328
329impl TimsSliceVectorized {
330
331    pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, scan_min: i32, scan_max: i32, inv_mob_min: f64, inv_mob_max: f64, intensity_min: f64, intensity_max: f64, num_threads: usize) -> TimsSliceVectorized {
332
333        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
334
335        // Use the thread pool
336        let filtered_frames = pool.install(|| {
337            let result: Vec<_> =  self.frames.par_iter()
338                .map(|f| f.filter_ranged(mz_min, mz_max, scan_min, scan_max, inv_mob_min, inv_mob_max, intensity_min, intensity_max))
339                .collect();
340            result
341        });
342
343        let frame_map = get_index_map(&filtered_frames);
344
345        TimsSliceVectorized { frames: filtered_frames, frame_map }
346    }
347
348    pub fn get_vectors_at_index(&self, index: u32) -> Option<(Vec<u32>, Vec<u32>, Vec<f32>)> {
349        self.frame_map.get(&index).cloned()
350    }
351
352    pub fn flatten(&self) -> TimsSliceVectorizedFlat {
353        let mut frame_ids = Vec::new();
354        let mut scans = Vec::new();
355        let mut tofs = Vec::new();
356        let mut retention_times = Vec::new();
357        let mut mobilities = Vec::new();
358        let mut indices = Vec::new();
359        let mut intensities = Vec::new();
360
361        for frame in &self.frames {
362            let length = frame.ims_frame.indices.len();
363            frame_ids.extend(vec![frame.frame_id; length].into_iter());
364            scans.extend(frame.scan.clone());
365            tofs.extend(frame.tof.clone());
366            retention_times.extend(vec![frame.ims_frame.retention_time; length].into_iter());
367            mobilities.extend(&frame.ims_frame.mobility);
368            indices.extend(&frame.ims_frame.indices);
369            intensities.extend(&frame.ims_frame.values);
370        }
371
372        TimsSliceVectorizedFlat {
373            frame_ids,
374            scans,
375            tofs,
376            retention_times,
377            mobilities,
378            indices,
379            intensities,
380        }
381    }
382}
383
384fn get_index_map(frames: &Vec<TimsFrameVectorized>) -> BTreeMap<u32, (Vec<u32>, Vec<u32>, Vec<f32>)> {
385    let mut index_map: BTreeMap<u32, Vec<(u32, u32, f32)>> = BTreeMap::new();
386
387    for frame in frames {
388        for (i, index) in frame.ims_frame.indices.iter().enumerate() {
389            let entry = index_map.entry(*index as u32).or_insert_with(|| vec![]);
390            entry.push((frame.frame_id as u32, frame.scan[i] as u32, frame.ims_frame.values[i] as f32));
391        }
392    }
393
394    let mut result_map: BTreeMap<u32, (Vec<u32>, Vec<u32>, Vec<f32>)> = BTreeMap::new();
395
396    for (index, values) in index_map {
397        for (frame_id, scan, intensity) in values {
398            let entry = result_map.entry(index).or_insert_with(|| (vec![], vec![], vec![]));
399            entry.0.push(frame_id);
400            entry.1.push(scan);
401            entry.2.push(intensity);
402        }
403    }
404
405    result_map
406}
407
408#[derive(Clone)]
409pub struct TimsPlane {
410    pub tof_mean: f64,
411    pub tof_std: f64,
412    pub mz_mean: f64,
413    pub mz_std: f64,
414
415    pub frame_id: Vec<i32>,
416    pub retention_time: Vec<f64>,
417    pub scan: Vec<i32>,
418    pub mobility: Vec<f64>,
419    pub intensity: Vec<f64>,
420}
421
422fn collapse_entry(_key: &(i32, i32), values: &(Vec<i32>, Vec<f64>, Vec<i32>, Vec<f64>, Vec<i32>, Vec<f64>, Vec<f64>)) -> TimsPlane {
423
424    let (frame_ids, retention_times, scans, mobilities, tofs, mzs, intensities) = values;
425
426    // 1. Calculate mean and std for tof and mz
427    let tof_mean: f64 = tofs.iter().map(|&x| x as f64).sum::<f64>() / tofs.len() as f64;
428    let tof_std: f64 = (tofs.iter().map(|&x| (x as f64 - tof_mean).powi(2)).sum::<f64>() / tofs.len() as f64).sqrt();
429    let mz_mean: f64 = mzs.iter().map(|&x| x as f64).sum::<f64>() / mzs.len() as f64;
430    let mz_std: f64 = (mzs.iter().map(|&x| (x as f64 - mz_mean).powi(2)).sum::<f64>() / mzs.len() as f64).sqrt();
431
432    // 2. Aggregate data by frame_id and scan using a BTreeMap for sorted order
433    let mut grouped_data: BTreeMap<(i32, i32), (f64, f64, f64)> = BTreeMap::new();
434
435    for (f, r, s, m, i) in multizip((frame_ids, retention_times, scans, mobilities, intensities)) {
436        let key = (*f, *s);
437        let entry = grouped_data.entry(key).or_insert((0.0, 0.0, 0.0));  // (intensity_sum, mobility, retention_time)
438        entry.0 += *i;
439        entry.1 = *m;
440        entry.2 = *r;
441    }
442
443    // Extract data from the grouped_data
444    let mut frame_id = vec![];
445    let mut retention_time = vec![];
446    let mut scan = vec![];
447    let mut mobility = vec![];
448    let mut intensity = vec![];
449
450    for ((f, s), (i, m, r)) in grouped_data {
451        frame_id.push(f);
452        retention_time.push(r);
453        scan.push(s);
454        mobility.push(m);
455        intensity.push(i);
456    }
457
458    TimsPlane {
459        tof_mean,
460        tof_std,
461        mz_mean,
462        mz_std,
463        frame_id,
464        retention_time,
465        scan,
466        mobility,
467        intensity,
468    }
469}
470
471#[derive(Clone, Debug)]
472pub struct TimsSliceFlat {
473    pub frame_ids: Vec<i32>,
474    pub scans: Vec<i32>,
475    pub tofs: Vec<i32>,
476    pub retention_times: Vec<f64>,
477    pub mobilities: Vec<f64>,
478    pub mzs: Vec<f64>,
479    pub intensities: Vec<f64>,
480}
481
482#[derive(Clone, Debug)]
483pub struct TimsSliceVectorizedFlat {
484    pub frame_ids: Vec<i32>,
485    pub scans: Vec<i32>,
486    pub tofs: Vec<i32>,
487    pub retention_times: Vec<f64>,
488    pub mobilities: Vec<f64>,
489    pub indices: Vec<i32>,
490    pub intensities: Vec<f64>,
491}