mscore/timstof/
slice.rs

1use rayon::prelude::*;
2use rayon::ThreadPoolBuilder;
3
4use std::collections::BTreeMap;
5use std::collections::BTreeSet;
6use itertools::multizip;
7
8use crate::data::spectrum::{MsType, Vectorized, ToResolution};
9use crate::timstof::spectrum::{TimsSpectrum};
10use crate::timstof::frame::{ImsFrame, TimsFrame, TimsFrameVectorized};
11
12#[derive(Clone)]
13pub struct TimsSlice {
14    pub frames: Vec<TimsFrame>,
15}
16
17impl TimsSlice {
18
19    /// Create a new TimsSlice from a vector of TimsFrames
20    ///
21    /// # Arguments
22    ///
23    /// * `frames` - A vector of TimsFrames
24    ///
25    /// # Returns
26    ///
27    /// * `TimsSlice` - A TimsSlice containing the TimsFrames
28    ///
29    /// # Example
30    ///
31    /// ```
32    /// use mscore::timstof::slice::TimsSlice;
33    ///
34    /// let slice = TimsSlice::new(vec![]);
35    /// ```
36    pub fn new(frames: Vec<TimsFrame>) -> Self {
37        TimsSlice { frames }
38    }
39
40    /// Filter the TimsSlice by m/z, scan, and intensity
41    ///
42    /// # Arguments
43    ///
44    /// * `mz_min` - The minimum m/z value
45    /// * `mz_max` - The maximum m/z value
46    /// * `scan_min` - The minimum scan value
47    /// * `scan_max` - The maximum scan value
48    /// * `intensity_min` - The minimum intensity value
49    /// * `intensity_max` - The maximum intensity value
50    /// * `num_threads` - The number of threads to use
51    ///
52    /// # Returns
53    ///
54    /// * `TimsSlice` - A TimsSlice containing only the TimsFrames that pass the filter
55    ///
56    /// # Example
57    ///
58    /// ```
59    /// use mscore::timstof::slice::TimsSlice;
60    ///
61    /// let slice = TimsSlice::new(vec![]);
62    /// let filtered_slice = slice.filter_ranged(400.0, 2000.0, 0, 1000, 0.0, 100000.0, 0.0, 1.6, 4);
63    /// ```
64    pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, scan_min: i32, scan_max: i32, inv_mob_min: f64, inv_mob_max: f64, intensity_min: f64, intensity_max: f64, num_threads: usize) -> TimsSlice {
65
66        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
67
68        // Use the thread pool
69        let filtered_frames = pool.install(|| {
70            let result: Vec<_> =  self.frames.par_iter()
71                .map(|f| f.filter_ranged(mz_min, mz_max, scan_min, scan_max, inv_mob_min, inv_mob_max, intensity_min, intensity_max))
72                .collect();
73            result
74        });
75
76        TimsSlice { frames: filtered_frames }
77    }
78
79    pub fn filter_ranged_ms_type_specific(&self,
80                                          mz_min_ms1: f64,
81                                          mz_max_ms1: f64,
82                                          scan_min_ms1: i32,
83                                          scan_max_ms1: i32,
84                                          inv_mob_min_ms1: f64,
85                                          inv_mob_max_ms1: f64,
86                                          intensity_min_ms1: f64,
87                                          intensity_max_ms1: f64,
88                                          mz_min_ms2: f64,
89                                          mz_max_ms2: f64,
90                                          scan_min_ms2: i32,
91                                          scan_max_ms2: i32,
92                                          inv_mob_min_ms2: f64,
93                                          inv_mob_max_ms2: f64,
94                                          intensity_min_ms2: f64,
95                                          intensity_max_ms2: f64,
96                                          num_threads: usize) -> TimsSlice {
97
98        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
99
100        // Use the thread pool
101        let filtered_frames = pool.install(|| {
102            let result: Vec<_> =  self.frames.par_iter()
103                .map(|f| match f.ms_type {
104                    MsType::Precursor => f.filter_ranged(mz_min_ms1, mz_max_ms1, scan_min_ms1, scan_max_ms1, inv_mob_min_ms1, inv_mob_max_ms1, intensity_min_ms1, intensity_max_ms1),
105                    _ => f.filter_ranged(mz_min_ms2, mz_max_ms2, scan_min_ms2, scan_max_ms2, inv_mob_min_ms2, inv_mob_max_ms2, intensity_min_ms2, intensity_max_ms2),
106                })
107                .collect();
108            result
109        });
110
111        TimsSlice { frames: filtered_frames }
112    }
113
114    /// Get a vector of TimsFrames by MsType
115    ///
116    /// # Arguments
117    ///
118    /// * `t` - The MsType to filter by
119    ///
120    /// # Returns
121    ///
122    /// * `TimsSlice` - A TimsSlice containing only the TimsFrames of the specified MsType
123    pub fn get_slice_by_type(&self, t: MsType) -> TimsSlice {
124        let filtered_frames = self.frames.iter()
125            .filter(|f| f.ms_type == t)
126            .map(|f| f.clone())
127            .collect();
128        TimsSlice { frames: filtered_frames }
129    }
130
131    pub fn to_resolution(&self, resolution: i32, num_threads: usize) -> TimsSlice {
132
133        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
134
135        // Use the thread pool
136        let result_frames = pool.install(|| {
137            let result: Vec<_> =  self.frames.par_iter()
138                .map(|f| f.to_resolution(resolution))
139                .collect();
140            result
141        });
142
143        TimsSlice { frames: result_frames }
144    }
145
146    pub fn vectorized(&self, resolution: i32, num_threads: usize) -> TimsSliceVectorized {
147
148        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
149
150        // Use the thread pool
151        let result_frames = pool.install(|| {
152            let result: Vec<_> =  self.frames.par_iter()
153                .map(|f| f.vectorized(resolution))
154                .collect();
155            result
156        });
157
158        let frame_map = get_index_map(&result_frames);
159
160        TimsSliceVectorized { frames: result_frames, frame_map }
161    }
162
163    pub fn from_flat_slice(frame_ids: Vec<i32>,
164                           scans: Vec<i32>,
165                           tofs: Vec<i32>,
166                           retention_times: Vec<f64>,
167                           mobilities: Vec<f64>,
168                           mzs: Vec<f64>,
169                           intensities: Vec<f64>) -> Self {
170
171        let mut frames = Vec::new();
172        let unique_frame_ids: BTreeSet<_> = frame_ids.iter().cloned().collect();
173
174        for frame_id in unique_frame_ids {
175            let indices: Vec<usize> = frame_ids.iter().enumerate().filter(|(_, &x)| x == frame_id).map(|(i, _)| i).collect();
176            let mut scan = Vec::new();
177            let mut tof = Vec::new();
178            let mut retention_time = Vec::new();
179            let mut mobility = Vec::new();
180            let mut mz = Vec::new();
181            let mut intensity = Vec::new();
182
183            for index in indices {
184                scan.push(scans[index]);
185                tof.push(tofs[index]);
186                retention_time.push(retention_times[index]);
187                mobility.push(mobilities[index]);
188                mz.push(mzs[index]);
189                intensity.push(intensities[index]);
190            }
191
192            let ims_frame = ImsFrame {
193                retention_time: retention_time[0],
194                mobility,
195                mz,
196                intensity,
197            };
198
199            let tims_frame = TimsFrame {
200                frame_id,
201                ms_type: MsType::Unknown,
202                scan,
203                tof,
204                ims_frame,
205            };
206
207            frames.push(tims_frame);
208        }
209
210        TimsSlice { frames }
211    }
212
213    pub fn flatten(&self) -> TimsSliceFlat {
214        let mut frame_ids = Vec::new();
215        let mut scans = Vec::new();
216        let mut tofs = Vec::new();
217        let mut retention_times = Vec::new();
218        let mut mobilities = Vec::new();
219        let mut mzs = Vec::new();
220        let mut intensities = Vec::new();
221
222        for frame in &self.frames {
223            let length = frame.scan.len();
224            frame_ids.extend(vec![frame.frame_id; length].into_iter());
225            scans.extend(frame.scan.clone());
226            tofs.extend(frame.tof.clone());
227            retention_times.extend(vec![frame.ims_frame.retention_time; length].into_iter());
228            mobilities.extend(&frame.ims_frame.mobility);
229            mzs.extend(&frame.ims_frame.mz);
230            intensities.extend(&frame.ims_frame.intensity);
231        }
232
233        TimsSliceFlat {
234            frame_ids,
235            scans,
236            tofs,
237            retention_times,
238            mobilities,
239            mzs,
240            intensities,
241        }
242    }
243
244    pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64, num_threads: usize) -> Vec<TimsSpectrum> {
245        // Create a thread pool
246        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
247
248        // Use the thread pool
249        let windows = pool.install(|| {
250            let windows: Vec<_> = self.frames.par_iter()
251                .flat_map( | frame | frame.to_windows(window_length, overlapping, min_peaks, min_intensity))
252                .collect();
253            windows
254        });
255
256        windows
257    }
258
259    pub fn to_dense_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64, resolution: i32, num_threads: usize) -> Vec<(Vec<f64>, Vec<i32>, Vec<i32>, usize, usize)> {
260        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
261
262        let result = pool.install(|| {
263            let t = self.frames.par_iter().map(|f| f.to_dense_windows(window_length, overlapping, min_peaks, min_intensity, resolution)).collect::<Vec<_>>();
264            t
265        });
266
267        result
268    }
269
270    pub fn to_tims_planes(&self, tof_max_value: i32, num_chunks: i32, num_threads: usize) -> Vec<TimsPlane> {
271
272        let flat_slice = self.flatten();
273
274        let chunk_size = (tof_max_value as f64 / num_chunks as f64) as i32;
275
276        // Calculate range_and_width based on num_chunks and chunk_size
277        let range_and_width: Vec<(i32, i32)> = (1..=num_chunks)
278            .map(|i| (chunk_size * i, i + 2))
279            .collect();
280
281        let mut tof_map: BTreeMap<(i32, i32), (Vec<i32>, Vec<f64>, Vec<i32>, Vec<f64>, Vec<i32>, Vec<f64>, Vec<f64>)> = BTreeMap::new();
282
283        // Iterate over the data points using multizip
284        for (id, rt, scan, mobility, tof, mz, intensity)
285
286        in multizip((flat_slice.frame_ids, flat_slice.retention_times, flat_slice.scans, flat_slice.mobilities, flat_slice.tofs, flat_slice.mzs, flat_slice.intensities)) {
287
288            for &(switch_point, width) in &range_and_width {
289                if tof < switch_point {
290
291                    let key = (width, (tof as f64 / width as f64).floor() as i32);
292
293                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).0.push(id);
294                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).1.push(rt);
295                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).2.push(scan);
296                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).3.push(mobility);
297                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).4.push(tof);
298                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).5.push(mz);
299                    tof_map.entry(key).or_insert_with(|| (vec![], vec![], vec![], vec![], vec![], vec![], vec![])).6.push(intensity);
300
301                    break
302                }
303            }
304        }
305
306        // Create a thread pool with the desired number of threads
307        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap();
308
309        let tims_planes: Vec<TimsPlane> = pool.install(|| {
310            tof_map.par_iter()
311                .map(|(key, values)| collapse_entry(key, values))
312                .collect()
313        });
314
315        tims_planes
316    }
317}
318
319#[derive(Clone)]
320pub struct TimsSliceVectorized {
321    pub frames: Vec<TimsFrameVectorized>,
322    pub frame_map: BTreeMap<u32, (Vec<u32>, Vec<u32>, Vec<f32>)>
323}
324
325impl TimsSliceVectorized {
326
327    pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, scan_min: i32, scan_max: i32, inv_mob_min: f64, inv_mob_max: f64, intensity_min: f64, intensity_max: f64, num_threads: usize) -> TimsSliceVectorized {
328
329        let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads
330
331        // Use the thread pool
332        let filtered_frames = pool.install(|| {
333            let result: Vec<_> =  self.frames.par_iter()
334                .map(|f| f.filter_ranged(mz_min, mz_max, scan_min, scan_max, inv_mob_min, inv_mob_max, intensity_min, intensity_max))
335                .collect();
336            result
337        });
338
339        let frame_map = get_index_map(&filtered_frames);
340
341        TimsSliceVectorized { frames: filtered_frames, frame_map }
342    }
343
344    pub fn get_vectors_at_index(&self, index: u32) -> Option<(Vec<u32>, Vec<u32>, Vec<f32>)> {
345        self.frame_map.get(&index).cloned()
346    }
347
348    pub fn flatten(&self) -> TimsSliceVectorizedFlat {
349        let mut frame_ids = Vec::new();
350        let mut scans = Vec::new();
351        let mut tofs = Vec::new();
352        let mut retention_times = Vec::new();
353        let mut mobilities = Vec::new();
354        let mut indices = Vec::new();
355        let mut intensities = Vec::new();
356
357        for frame in &self.frames {
358            let length = frame.ims_frame.indices.len();
359            frame_ids.extend(vec![frame.frame_id; length].into_iter());
360            scans.extend(frame.scan.clone());
361            tofs.extend(frame.tof.clone());
362            retention_times.extend(vec![frame.ims_frame.retention_time; length].into_iter());
363            mobilities.extend(&frame.ims_frame.mobility);
364            indices.extend(&frame.ims_frame.indices);
365            intensities.extend(&frame.ims_frame.values);
366        }
367
368        TimsSliceVectorizedFlat {
369            frame_ids,
370            scans,
371            tofs,
372            retention_times,
373            mobilities,
374            indices,
375            intensities,
376        }
377    }
378}
379
380fn get_index_map(frames: &Vec<TimsFrameVectorized>) -> BTreeMap<u32, (Vec<u32>, Vec<u32>, Vec<f32>)> {
381    let mut index_map: BTreeMap<u32, Vec<(u32, u32, f32)>> = BTreeMap::new();
382
383    for frame in frames {
384        for (i, index) in frame.ims_frame.indices.iter().enumerate() {
385            let entry = index_map.entry(*index as u32).or_insert_with(|| vec![]);
386            entry.push((frame.frame_id as u32, frame.scan[i] as u32, frame.ims_frame.values[i] as f32));
387        }
388    }
389
390    let mut result_map: BTreeMap<u32, (Vec<u32>, Vec<u32>, Vec<f32>)> = BTreeMap::new();
391
392    for (index, values) in index_map {
393        for (frame_id, scan, intensity) in values {
394            let entry = result_map.entry(index).or_insert_with(|| (vec![], vec![], vec![]));
395            entry.0.push(frame_id);
396            entry.1.push(scan);
397            entry.2.push(intensity);
398        }
399    }
400
401    result_map
402}
403
404#[derive(Clone)]
405pub struct TimsPlane {
406    pub tof_mean: f64,
407    pub tof_std: f64,
408    pub mz_mean: f64,
409    pub mz_std: f64,
410
411    pub frame_id: Vec<i32>,
412    pub retention_time: Vec<f64>,
413    pub scan: Vec<i32>,
414    pub mobility: Vec<f64>,
415    pub intensity: Vec<f64>,
416}
417
418fn collapse_entry(_key: &(i32, i32), values: &(Vec<i32>, Vec<f64>, Vec<i32>, Vec<f64>, Vec<i32>, Vec<f64>, Vec<f64>)) -> TimsPlane {
419
420    let (frame_ids, retention_times, scans, mobilities, tofs, mzs, intensities) = values;
421
422    // 1. Calculate mean and std for tof and mz
423    let tof_mean: f64 = tofs.iter().map(|&x| x as f64).sum::<f64>() / tofs.len() as f64;
424    let tof_std: f64 = (tofs.iter().map(|&x| (x as f64 - tof_mean).powi(2)).sum::<f64>() / tofs.len() as f64).sqrt();
425    let mz_mean: f64 = mzs.iter().map(|&x| x as f64).sum::<f64>() / mzs.len() as f64;
426    let mz_std: f64 = (mzs.iter().map(|&x| (x as f64 - mz_mean).powi(2)).sum::<f64>() / mzs.len() as f64).sqrt();
427
428    // 2. Aggregate data by frame_id and scan using a BTreeMap for sorted order
429    let mut grouped_data: BTreeMap<(i32, i32), (f64, f64, f64)> = BTreeMap::new();
430
431    for (f, r, s, m, i) in multizip((frame_ids, retention_times, scans, mobilities, intensities)) {
432        let key = (*f, *s);
433        let entry = grouped_data.entry(key).or_insert((0.0, 0.0, 0.0));  // (intensity_sum, mobility, retention_time)
434        entry.0 += *i;
435        entry.1 = *m;
436        entry.2 = *r;
437    }
438
439    // Extract data from the grouped_data
440    let mut frame_id = vec![];
441    let mut retention_time = vec![];
442    let mut scan = vec![];
443    let mut mobility = vec![];
444    let mut intensity = vec![];
445
446    for ((f, s), (i, m, r)) in grouped_data {
447        frame_id.push(f);
448        retention_time.push(r);
449        scan.push(s);
450        mobility.push(m);
451        intensity.push(i);
452    }
453
454    TimsPlane {
455        tof_mean,
456        tof_std,
457        mz_mean,
458        mz_std,
459        frame_id,
460        retention_time,
461        scan,
462        mobility,
463        intensity,
464    }
465}
466
467#[derive(Clone, Debug)]
468pub struct TimsSliceFlat {
469    pub frame_ids: Vec<i32>,
470    pub scans: Vec<i32>,
471    pub tofs: Vec<i32>,
472    pub retention_times: Vec<f64>,
473    pub mobilities: Vec<f64>,
474    pub mzs: Vec<f64>,
475    pub intensities: Vec<f64>,
476}
477
478#[derive(Clone, Debug)]
479pub struct TimsSliceVectorizedFlat {
480    pub frame_ids: Vec<i32>,
481    pub scans: Vec<i32>,
482    pub tofs: Vec<i32>,
483    pub retention_times: Vec<f64>,
484    pub mobilities: Vec<f64>,
485    pub indices: Vec<i32>,
486    pub intensities: Vec<f64>,
487}