1use std::collections::BTreeMap;
2use rayon::prelude::*;
3use rayon::ThreadPoolBuilder;
4use serde::{Deserialize, Serialize};
5
6#[derive(Clone, Debug, Serialize, Deserialize)]
8pub struct SpectrumProcessingConfig {
9 pub take_top_n: usize,
11 pub deisotope: bool,
13 pub deisotope_tolerance_ppm: f64,
15 pub deisotope_min_mz_diff: f64,
17}
18
19impl Default for SpectrumProcessingConfig {
20 fn default() -> Self {
21 SpectrumProcessingConfig {
22 take_top_n: 150,
23 deisotope: true,
24 deisotope_tolerance_ppm: 10.0,
25 deisotope_min_mz_diff: 0.0005,
26 }
27 }
28}
29
30#[derive(Clone, Debug, Serialize, Deserialize)]
32pub struct PreprocessedSpectrum {
33 pub spec_id: String,
35 pub precursor_mz: f64,
37 pub precursor_charge: Option<i32>,
39 pub precursor_intensity: f64,
41 pub inverse_ion_mobility: f64,
43 pub collision_energy: f64,
45 pub scan_start_time: f64,
47 pub total_ion_current: f64,
49 pub mz: Vec<f32>,
52 pub intensity: Vec<f32>,
54 pub isolation_mz: f64,
56 pub isolation_width: f64,
58}
59
60impl PreprocessedSpectrum {
61 pub fn new(
62 spec_id: String,
63 precursor_mz: f64,
64 precursor_charge: Option<i32>,
65 precursor_intensity: f64,
66 inverse_ion_mobility: f64,
67 collision_energy: f64,
68 scan_start_time: f64,
69 total_ion_current: f64,
70 mz: Vec<f32>,
71 intensity: Vec<f32>,
72 isolation_mz: f64,
73 isolation_width: f64,
74 ) -> Self {
75 PreprocessedSpectrum {
76 spec_id,
77 precursor_mz,
78 precursor_charge,
79 precursor_intensity,
80 inverse_ion_mobility,
81 collision_energy,
82 scan_start_time,
83 total_ion_current,
84 mz,
85 intensity,
86 isolation_mz,
87 isolation_width,
88 }
89 }
90}
91
92const ISOTOPE_MASS_DIFF: f64 = 1.00335;
94
95const PROTON_MASS: f64 = 1.007276466812;
98
99pub fn deisotope_spectrum(
109 mz: &[f64],
110 intensity: &[f64],
111 tolerance_ppm: f64,
112) -> (Vec<f64>, Vec<f64>) {
113 if mz.is_empty() {
114 return (Vec::new(), Vec::new());
115 }
116
117 let n = mz.len();
118 let mut is_isotope = vec![false; n];
119
120 for i in 0..n {
122 if is_isotope[i] {
123 continue;
124 }
125
126 let current_mz = mz[i];
127 let current_intensity = intensity[i];
128
129 for charge in 1..=4 {
131 let isotope_spacing = ISOTOPE_MASS_DIFF / charge as f64;
132
133 for isotope_num in 1..=3 {
135 let expected_mz = current_mz + isotope_spacing * isotope_num as f64;
136 let tolerance = expected_mz * tolerance_ppm / 1_000_000.0;
137
138 for j in (i + 1)..n {
140 if mz[j] < expected_mz - tolerance {
141 continue;
142 }
143 if mz[j] > expected_mz + tolerance {
144 break;
145 }
146
147 if intensity[j] < current_intensity {
150 is_isotope[j] = true;
151 }
152 }
153 }
154 }
155 }
156
157 let mut filtered_mz = Vec::with_capacity(n);
159 let mut filtered_intensity = Vec::with_capacity(n);
160
161 for i in 0..n {
162 if !is_isotope[i] {
163 filtered_mz.push(mz[i]);
164 filtered_intensity.push(intensity[i]);
165 }
166 }
167
168 (filtered_mz, filtered_intensity)
169}
170
171pub fn filter_top_n(
181 mz: &[f64],
182 intensity: &[f64],
183 top_n: usize,
184) -> (Vec<f64>, Vec<f64>) {
185 if mz.len() <= top_n {
186 return (mz.to_vec(), intensity.to_vec());
187 }
188
189 let mut indices: Vec<usize> = (0..mz.len()).collect();
191 indices.sort_by(|&a, &b| {
192 intensity[b].partial_cmp(&intensity[a]).unwrap_or(std::cmp::Ordering::Equal)
193 });
194
195 indices.truncate(top_n);
197
198 indices.sort_by(|&a, &b| {
200 mz[a].partial_cmp(&mz[b]).unwrap_or(std::cmp::Ordering::Equal)
201 });
202
203 let filtered_mz: Vec<f64> = indices.iter().map(|&i| mz[i]).collect();
204 let filtered_intensity: Vec<f64> = indices.iter().map(|&i| intensity[i]).collect();
205
206 (filtered_mz, filtered_intensity)
207}
208
209pub fn normalize_intensity(intensity: &[f64], target_sum: f64) -> Vec<f64> {
211 let sum: f64 = intensity.iter().sum();
212 if sum == 0.0 {
213 return intensity.to_vec();
214 }
215 intensity.iter().map(|&i| i * target_sum / sum).collect()
216}
217
218pub fn flatten_frame_to_spectrum(
229 tof: &[i32],
230 mz: &[f64],
231 intensity: &[f64],
232) -> (Vec<f64>, Vec<f64>) {
233 let mut grouped_data: BTreeMap<i32, Vec<(f64, f64)>> = BTreeMap::new();
235
236 for i in 0..tof.len() {
237 grouped_data
238 .entry(tof[i])
239 .or_insert_with(Vec::new)
240 .push((mz[i], intensity[i]));
241 }
242
243 let mut result_mz = Vec::with_capacity(grouped_data.len());
244 let mut result_intensity = Vec::with_capacity(grouped_data.len());
245
246 for (_, values) in grouped_data {
247 let sum_intensity: f64 = values.iter().map(|&(_, i)| i).sum();
248 let avg_mz: f64 = values.iter().map(|&(m, _)| m).sum::<f64>() / values.len() as f64;
249
250 result_mz.push(avg_mz);
251 result_intensity.push(sum_intensity);
252 }
253
254 (result_mz, result_intensity)
255}
256
257pub fn get_inverse_mobility_along_scan_marginal(
268 scan: &[i32],
269 mobility: &[f64],
270 intensity: &[f64],
271) -> f64 {
272 let mut marginal_map: BTreeMap<i32, (f64, f64)> = BTreeMap::new();
273
274 for i in 0..scan.len() {
275 let entry = marginal_map.entry(scan[i]).or_insert((0.0, 0.0));
276 entry.0 += intensity[i]; entry.1 = mobility[i]; }
279
280 marginal_map
281 .iter()
282 .max_by(|a, b| a.1.0.partial_cmp(&b.1.0).unwrap_or(std::cmp::Ordering::Equal))
283 .map(|(_, (_, mob))| *mob)
284 .unwrap_or(0.0)
285}
286
287pub fn process_spectrum(
300 tof: &[i32],
301 mz: &[f64],
302 intensity: &[f64],
303 config: &SpectrumProcessingConfig,
304) -> (Vec<f32>, Vec<f32>) {
305 let (flat_mz, flat_intensity) = flatten_frame_to_spectrum(tof, mz, intensity);
307
308 let (deiso_mz, deiso_intensity) = if config.deisotope {
310 deisotope_spectrum(&flat_mz, &flat_intensity, config.deisotope_tolerance_ppm)
311 } else {
312 (flat_mz, flat_intensity)
313 };
314
315 let (final_mz, final_intensity) = filter_top_n(&deiso_mz, &deiso_intensity, config.take_top_n);
317
318 let mass_f32: Vec<f32> = final_mz.iter().map(|&x| (x - PROTON_MASS) as f32).collect();
321 let intensity_f32: Vec<f32> = final_intensity.iter().map(|&x| x as f32).collect();
322
323 (mass_f32, intensity_f32)
324}
325
326#[derive(Clone, Debug)]
328pub struct PASEFFragmentData {
329 pub frame_id: u32,
330 pub precursor_id: u32,
331 pub collision_energy: f64,
332 pub scan_start_time: f64, pub scan: Vec<i32>,
334 pub mobility: Vec<f64>,
335 pub tof: Vec<i32>,
336 pub mz: Vec<f64>,
337 pub intensity: Vec<f64>,
338 pub precursor_mz: f64,
340 pub precursor_charge: Option<i32>,
341 pub precursor_intensity: f64,
342 pub isolation_mz: f64,
343 pub isolation_width: f64,
344}
345
346pub fn process_pasef_fragments_batch(
357 fragments: Vec<PASEFFragmentData>,
358 dataset_name: &str,
359 config: &SpectrumProcessingConfig,
360 num_threads: usize,
361) -> Vec<PreprocessedSpectrum> {
362 let pool = ThreadPoolBuilder::new()
363 .num_threads(num_threads)
364 .build()
365 .unwrap();
366
367 let ds_name = dataset_name.to_string();
368
369 pool.install(|| {
370 fragments
371 .par_iter()
372 .map(|frag| {
373 let inverse_mobility = get_inverse_mobility_along_scan_marginal(
375 &frag.scan,
376 &frag.mobility,
377 &frag.intensity,
378 );
379
380 let (processed_mz, processed_intensity) = process_spectrum(
382 &frag.tof,
383 &frag.mz,
384 &frag.intensity,
385 config,
386 );
387
388 let total_ion_current: f64 = frag.intensity.iter().sum();
390
391 let spec_id = format!("{}-{}-{}", frag.frame_id, frag.precursor_id, ds_name);
393
394 PreprocessedSpectrum::new(
395 spec_id,
396 frag.precursor_mz,
397 frag.precursor_charge,
398 frag.precursor_intensity,
399 inverse_mobility,
400 frag.collision_energy,
401 frag.scan_start_time,
402 total_ion_current,
403 processed_mz,
404 processed_intensity,
405 frag.isolation_mz,
406 frag.isolation_width,
407 )
408 })
409 .collect()
410 })
411}
412
413#[cfg(test)]
414mod tests {
415 use super::*;
416
417 #[test]
418 fn test_deisotope_spectrum() {
419 let mz = vec![500.0, 501.003, 502.006, 600.0];
421 let intensity = vec![1000.0, 500.0, 250.0, 800.0];
422
423 let (filtered_mz, _filtered_intensity) = deisotope_spectrum(&mz, &intensity, 10.0);
424
425 assert_eq!(filtered_mz.len(), 2);
427 assert!((filtered_mz[0] - 500.0).abs() < 0.001);
428 assert!((filtered_mz[1] - 600.0).abs() < 0.001);
429 }
430
431 #[test]
432 fn test_filter_top_n() {
433 let mz = vec![100.0, 200.0, 300.0, 400.0, 500.0];
434 let intensity = vec![10.0, 50.0, 30.0, 100.0, 20.0];
435
436 let (filtered_mz, _filtered_intensity) = filter_top_n(&mz, &intensity, 3);
437
438 assert_eq!(filtered_mz.len(), 3);
440 assert!((filtered_mz[0] - 200.0).abs() < 0.001);
442 assert!((filtered_mz[1] - 300.0).abs() < 0.001);
443 assert!((filtered_mz[2] - 400.0).abs() < 0.001);
444 }
445
446 #[test]
447 fn test_flatten_frame_to_spectrum() {
448 let tof = vec![1000, 1000, 2000];
450 let mz = vec![500.0, 500.1, 600.0];
451 let intensity = vec![100.0, 200.0, 300.0];
452
453 let (flat_mz, flat_intensity) = flatten_frame_to_spectrum(&tof, &mz, &intensity);
454
455 assert_eq!(flat_mz.len(), 2);
456 assert!((flat_mz[0] - 500.05).abs() < 0.001);
458 assert!((flat_intensity[0] - 300.0).abs() < 0.001);
459 assert!((flat_mz[1] - 600.0).abs() < 0.001);
461 assert!((flat_intensity[1] - 300.0).abs() < 0.001);
462 }
463
464 #[test]
465 fn test_get_inverse_mobility_along_scan_marginal() {
466 let scan = vec![100, 100, 101, 101];
467 let mobility = vec![1.0, 1.0, 1.1, 1.1];
468 let intensity = vec![50.0, 100.0, 200.0, 100.0];
469
470 let result = get_inverse_mobility_along_scan_marginal(&scan, &mobility, &intensity);
471
472 assert!((result - 1.1).abs() < 0.001);
475 }
476
477 #[test]
478 fn test_process_spectrum() {
479 let tof = vec![1000, 1001, 1002, 2000, 2001];
480 let mz = vec![500.0, 501.003, 502.006, 600.0, 601.003];
481 let intensity = vec![1000.0, 500.0, 250.0, 800.0, 400.0];
482
483 let config = SpectrumProcessingConfig {
484 take_top_n: 10,
485 deisotope: true,
486 deisotope_tolerance_ppm: 10.0,
487 deisotope_min_mz_diff: 0.0005,
488 };
489
490 let (processed_mz, processed_intensity) = process_spectrum(&tof, &mz, &intensity, &config);
491
492 assert!(processed_mz.len() <= mz.len());
494 let processed_sum: f32 = processed_intensity.iter().sum();
496 let original_sum: f64 = intensity.iter().sum();
497 assert!(processed_sum < original_sum as f32);
498 }
499}