1use crate::data::meta::{read_global_meta_sql, read_meta_data_sql, FrameMeta, GlobalMetaData};
2use crate::data::raw::BrukerTimsDataLibrary;
3use crate::data::utility::{
4 flatten_scan_values, parse_decompressed_bruker_binary_data, zstd_decompress,
5};
6use byteorder::{LittleEndian, ReadBytesExt};
7use mscore::data::spectrum::MsType;
8use mscore::timstof::frame::{ImsFrame, RawTimsFrame, TimsFrame};
9use mscore::timstof::slice::TimsSlice;
10use std::fs::File;
11use std::io::{Cursor, Read, Seek, SeekFrom};
12use std::path::PathBuf;
13
14use crate::data::acquisition::AcquisitionMode;
15use rayon::prelude::*;
16use rayon::ThreadPoolBuilder;
17
18use std::error::Error;
19
20fn derive_mz_calibration(
28 bruker_lib_path: &str,
29 data_path: &str,
30 tof_max_index: u32,
31) -> Option<(f64, f64)> {
32 if bruker_lib_path.is_empty()
35 || bruker_lib_path == "NO_SDK"
36 || bruker_lib_path == "CALIBRATED"
37 || !std::path::Path::new(bruker_lib_path).exists()
38 {
39 return None;
40 }
41
42 let sdk_converter = match std::panic::catch_unwind(|| {
44 BrukerLibTimsDataConverter::new(bruker_lib_path, data_path)
45 }) {
46 Ok(converter) => converter,
47 Err(_) => return None,
48 };
49
50 let n_points = 1000;
52 let step = tof_max_index / n_points;
53 let tof_indices: Vec<u32> = (0..n_points).map(|i| i * step + step / 2).collect();
54
55 let mz_values = sdk_converter.tof_to_mz(1, &tof_indices);
57
58 let valid_pairs: Vec<(f64, f64)> = tof_indices
60 .iter()
61 .zip(mz_values.iter())
62 .filter(|(_, mz)| **mz > 0.0)
63 .map(|(tof, mz)| (*tof as f64, mz.sqrt()))
64 .collect();
65
66 if valid_pairs.len() < 10 {
67 return None;
68 }
69
70 let n = valid_pairs.len() as f64;
73 let sum_x: f64 = valid_pairs.iter().map(|(x, _)| x).sum();
74 let sum_y: f64 = valid_pairs.iter().map(|(_, y)| y).sum();
75 let sum_xy: f64 = valid_pairs.iter().map(|(x, y)| x * y).sum();
76 let sum_xx: f64 = valid_pairs.iter().map(|(x, _)| x * x).sum();
77
78 let mean_x = sum_x / n;
79 let mean_y = sum_y / n;
80
81 let slope = (sum_xy - n * mean_x * mean_y) / (sum_xx - n * mean_x * mean_x);
82 let intercept = mean_y - slope * mean_x;
83
84 Some((intercept, slope))
85}
86
87fn build_lookup_converter(
97 bruker_lib_path: &str,
98 data_path: &str,
99 tof_max_index: u32,
100 mz_lower: f64,
101 mz_upper: f64,
102 im_lookup: Vec<f64>,
103) -> LookupIndexConverter {
104 match derive_mz_calibration(bruker_lib_path, data_path, tof_max_index) {
105 Some((intercept, slope)) => {
106 LookupIndexConverter::with_mz_fit(intercept, slope, im_lookup)
107 }
108 None => {
109 if !bruker_lib_path.is_empty() && bruker_lib_path != "NO_SDK" {
110 eprintln!(
111 "Warning: Could not derive m/z calibration from SDK at '{}'. \
112 Falling back to the 2-point boundary model, which may have a \
113 large m/z error on some datasets.",
114 bruker_lib_path
115 );
116 }
117 LookupIndexConverter::new(mz_lower, mz_upper, tof_max_index, im_lookup)
118 }
119 }
120}
121
122fn lzf_decompress(data: &[u8], max_output_size: usize) -> Result<Vec<u8>, Box<dyn Error>> {
123 let decompressed_data = lzf::decompress(data, max_output_size)
124 .map_err(|e| format!("LZF decompression failed: {}", e))?;
125 Ok(decompressed_data)
126}
127
128fn parse_decompressed_bruker_binary_type1(
129 decompressed_bytes: &[u8],
130 scan_indices: &mut [i64],
131 tof_indices: &mut [u32],
132 intensities: &mut [u16],
133 scan_start: usize,
134 scan_index: usize,
135) -> usize {
136 let int_count = decompressed_bytes.len() / 4;
138 let buffer =
139 unsafe { std::slice::from_raw_parts(decompressed_bytes.as_ptr() as *const i32, int_count) };
140
141 let mut tof_index = 0i32;
142 let mut previous_was_intensity = true;
143 let mut current_index = scan_start;
144
145 for &value in buffer {
146 if value >= 0 {
147 if previous_was_intensity {
149 tof_index += 1;
150 }
151 tof_indices[current_index] = tof_index as u32;
152 intensities[current_index] = value as u16;
153 previous_was_intensity = true;
154 current_index += 1;
155 } else {
156 tof_index -= value; previous_was_intensity = false;
159 }
160 }
161
162 let scan_size = current_index - scan_start;
163 scan_indices[scan_index] = scan_size as i64;
164 scan_size
165}
166
167pub struct TimsRawDataLayout {
168 pub raw_data_path: String,
169 pub global_meta_data: GlobalMetaData,
170 pub frame_meta_data: Vec<FrameMeta>,
171 pub max_scan_count: i64,
172 pub frame_id_ptr: Vec<i64>,
173 pub tims_offset_values: Vec<i64>,
174 pub acquisition_mode: AcquisitionMode,
175}
176
177impl TimsRawDataLayout {
178 pub fn new(data_path: &str) -> Self {
179 let global_meta_data = read_global_meta_sql(data_path).unwrap();
181 let frame_meta_data = read_meta_data_sql(data_path).unwrap();
182
183 let max_scan_count = frame_meta_data.iter().map(|x| x.num_scans).max().unwrap();
185
186 let mut frame_id_ptr: Vec<i64> = Vec::new();
187 frame_id_ptr.resize(frame_meta_data.len() + 1, 0);
188
189 for (i, row) in frame_meta_data.iter().enumerate() {
191 frame_id_ptr[i + 1] = row.num_peaks + frame_id_ptr[i];
192 }
193
194 let tims_offset_values = frame_meta_data
196 .iter()
197 .map(|x| x.tims_id)
198 .collect::<Vec<i64>>();
199
200 let acquisition_mode = match frame_meta_data[0].scan_mode {
202 8 => AcquisitionMode::DDA,
203 9 => AcquisitionMode::DIA,
204 _ => AcquisitionMode::Unknown,
205 };
206
207 TimsRawDataLayout {
208 raw_data_path: data_path.to_string(),
209 global_meta_data,
210 frame_meta_data,
211 max_scan_count,
212 frame_id_ptr,
213 tims_offset_values,
214 acquisition_mode,
215 }
216 }
217}
218
219pub trait TimsData {
220 fn get_frame(&self, frame_id: u32) -> TimsFrame;
221 fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame;
222 fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice;
223 fn get_acquisition_mode(&self) -> AcquisitionMode;
224 fn get_frame_count(&self) -> i32;
225 fn get_data_path(&self) -> &str;
226}
227
228pub trait IndexConverter {
229 fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64>;
230 fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32>;
231 fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64>;
232 fn inverse_mobility_to_scan(
233 &self,
234 frame_id: u32,
235 inverse_mobility_values: &Vec<f64>,
236 ) -> Vec<u32>;
237}
238
239pub struct BrukerLibTimsDataConverter {
240 pub bruker_lib: BrukerTimsDataLibrary,
241}
242
243impl BrukerLibTimsDataConverter {
244 pub fn new(bruker_lib_path: &str, data_path: &str) -> Self {
245 let bruker_lib = BrukerTimsDataLibrary::new(bruker_lib_path, data_path).unwrap();
246 BrukerLibTimsDataConverter { bruker_lib }
247 }
248}
249impl IndexConverter for BrukerLibTimsDataConverter {
250 fn tof_to_mz(&self, frame_id: u32, tof: &Vec<u32>) -> Vec<f64> {
262 let mut dbl_tofs: Vec<f64> = Vec::new();
263 dbl_tofs.resize(tof.len(), 0.0);
264
265 for (i, &val) in tof.iter().enumerate() {
266 dbl_tofs[i] = val as f64;
267 }
268
269 let mut mz_values: Vec<f64> = Vec::new();
270 mz_values.resize(tof.len(), 0.0);
271
272 self.bruker_lib
273 .tims_index_to_mz(frame_id, &dbl_tofs, &mut mz_values)
274 .expect("Bruker binary call failed at: tims_index_to_mz;");
275
276 mz_values
277 }
278
279 fn mz_to_tof(&self, frame_id: u32, mz: &Vec<f64>) -> Vec<u32> {
280 let mut dbl_mz: Vec<f64> = Vec::new();
281 dbl_mz.resize(mz.len(), 0.0);
282
283 for (i, &val) in mz.iter().enumerate() {
284 dbl_mz[i] = val;
285 }
286
287 let mut tof_values: Vec<f64> = Vec::new();
288 tof_values.resize(mz.len(), 0.0);
289
290 self.bruker_lib
291 .tims_mz_to_index(frame_id, &dbl_mz, &mut tof_values)
292 .expect("Bruker binary call failed at: tims_mz_to_index;");
293
294 tof_values.iter().map(|&x| x.round() as u32).collect()
295 }
296
297 fn scan_to_inverse_mobility(&self, frame_id: u32, scan: &Vec<u32>) -> Vec<f64> {
309 let mut dbl_scans: Vec<f64> = Vec::new();
310 dbl_scans.resize(scan.len(), 0.0);
311
312 for (i, &val) in scan.iter().enumerate() {
313 dbl_scans[i] = val as f64;
314 }
315
316 let mut inv_mob: Vec<f64> = Vec::new();
317 inv_mob.resize(scan.len(), 0.0);
318
319 self.bruker_lib
320 .tims_scan_to_inv_mob(frame_id, &dbl_scans, &mut inv_mob)
321 .expect("Bruker binary call failed at: tims_scannum_to_oneoverk0;");
322
323 inv_mob
324 }
325
326 fn inverse_mobility_to_scan(&self, frame_id: u32, inv_mob: &Vec<f64>) -> Vec<u32> {
338 let mut dbl_inv_mob: Vec<f64> = Vec::new();
339 dbl_inv_mob.resize(inv_mob.len(), 0.0);
340
341 for (i, &val) in inv_mob.iter().enumerate() {
342 dbl_inv_mob[i] = val;
343 }
344
345 let mut scan_values: Vec<f64> = Vec::new();
346 scan_values.resize(inv_mob.len(), 0.0);
347
348 self.bruker_lib
349 .inv_mob_to_tims_scan(frame_id, &dbl_inv_mob, &mut scan_values)
350 .expect("Bruker binary call failed at: tims_oneoverk0_to_scannum;");
351
352 scan_values.iter().map(|&x| x.round() as u32).collect()
353 }
354}
355
356pub enum TimsIndexConverter {
357 Simple(SimpleIndexConverter),
358 Calibrated(CalibratedIndexConverter),
359 BrukerLib(BrukerLibTimsDataConverter),
360 Lookup(LookupIndexConverter),
361}
362
363impl IndexConverter for TimsIndexConverter {
364 fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
365 match self {
366 TimsIndexConverter::Simple(converter) => converter.tof_to_mz(frame_id, tof_values),
367 TimsIndexConverter::Calibrated(converter) => converter.tof_to_mz(frame_id, tof_values),
368 TimsIndexConverter::BrukerLib(converter) => converter.tof_to_mz(frame_id, tof_values),
369 TimsIndexConverter::Lookup(converter) => converter.tof_to_mz(frame_id, tof_values),
370 }
371 }
372
373 fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
374 match self {
375 TimsIndexConverter::Simple(converter) => converter.mz_to_tof(frame_id, mz_values),
376 TimsIndexConverter::Calibrated(converter) => converter.mz_to_tof(frame_id, mz_values),
377 TimsIndexConverter::BrukerLib(converter) => converter.mz_to_tof(frame_id, mz_values),
378 TimsIndexConverter::Lookup(converter) => converter.mz_to_tof(frame_id, mz_values),
379 }
380 }
381
382 fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
383 match self {
384 TimsIndexConverter::Simple(converter) => {
385 converter.scan_to_inverse_mobility(frame_id, scan_values)
386 }
387 TimsIndexConverter::Calibrated(converter) => {
388 converter.scan_to_inverse_mobility(frame_id, scan_values)
389 }
390 TimsIndexConverter::BrukerLib(converter) => {
391 converter.scan_to_inverse_mobility(frame_id, scan_values)
392 }
393 TimsIndexConverter::Lookup(converter) => {
394 converter.scan_to_inverse_mobility(frame_id, scan_values)
395 }
396 }
397 }
398
399 fn inverse_mobility_to_scan(
400 &self,
401 frame_id: u32,
402 inverse_mobility_values: &Vec<f64>,
403 ) -> Vec<u32> {
404 match self {
405 TimsIndexConverter::Simple(converter) => {
406 converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values)
407 }
408 TimsIndexConverter::Calibrated(converter) => {
409 converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values)
410 }
411 TimsIndexConverter::BrukerLib(converter) => {
412 converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values)
413 }
414 TimsIndexConverter::Lookup(converter) => {
415 converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values)
416 }
417 }
418 }
419}
420
421pub struct TimsLazyLoder {
422 pub raw_data_layout: TimsRawDataLayout,
423 pub index_converter: TimsIndexConverter,
424}
425
426impl TimsData for TimsLazyLoder {
427 fn get_frame(&self, frame_id: u32) -> TimsFrame {
428 let frame_index = (frame_id - 1) as usize;
429
430 let num_peaks = self.raw_data_layout.frame_meta_data[frame_index].num_peaks;
432
433 if num_peaks == 0 {
434
435 let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
436
437 let ms_type = match ms_type_raw {
438 0 => MsType::Precursor,
439 8 => MsType::FragmentDda,
440 9 => MsType::FragmentDia,
441 _ => MsType::Unknown,
442 };
443
444 return TimsFrame {
445 frame_id: frame_id as i32,
446 ms_type,
447 scan: Vec::new(),
448 tof: Vec::new(),
449 ims_frame: ImsFrame::new(
450 self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time,
451 Vec::new(),
452 Vec::new(),
453 Vec::new(),
454 ),
455 };
456 }
457
458 let offset = self.raw_data_layout.tims_offset_values[frame_index] as u64;
459
460 let mut file_path = PathBuf::from(&self.raw_data_layout.raw_data_path);
461 file_path.push("analysis.tdf_bin");
462 let mut infile = File::open(&file_path).unwrap();
463
464 infile.seek(SeekFrom::Start(offset)).unwrap();
465
466 let mut bin_buffer = [0u8; 4];
467 infile.read_exact(&mut bin_buffer).unwrap();
468 let bin_size = Cursor::new(bin_buffer).read_i32::<LittleEndian>().unwrap();
469
470 infile.read_exact(&mut bin_buffer).unwrap();
471
472 match self.raw_data_layout.global_meta_data.tims_compression_type {
473 1 => {
474 let scan_count =
475 self.raw_data_layout.frame_meta_data[frame_index].num_scans as usize;
476 let num_peaks = num_peaks as usize;
477 let compression_offset = 8 + (scan_count + 1) * 4;
478
479 let mut scan_offsets_buffer = vec![0u8; (scan_count + 1) * 4];
480 infile.read_exact(&mut scan_offsets_buffer).unwrap();
481
482 let mut scan_offsets = Vec::with_capacity(scan_count + 1);
483 {
484 let mut rdr = Cursor::new(&scan_offsets_buffer);
485 for _ in 0..(scan_count + 1) {
486 scan_offsets.push(rdr.read_i32::<LittleEndian>().unwrap());
487 }
488 }
489
490 for offs in &mut scan_offsets {
491 *offs -= compression_offset as i32;
492 }
493
494 let remaining_size = (bin_size as usize - compression_offset) as usize;
495 let mut compressed_data = vec![0u8; remaining_size];
496 infile.read_exact(&mut compressed_data).unwrap();
497
498 let mut scan_indices_ = vec![0i64; scan_count];
499 let mut tof_indices_ = vec![0u32; num_peaks];
500 let mut intensities_ = vec![0u16; num_peaks];
501
502 let mut scan_start = 0usize;
503
504 for scan_index in 0..scan_count {
505 let start = scan_offsets[scan_index] as usize;
506 let end = scan_offsets[scan_index + 1] as usize;
507
508 if start == end {
509 continue;
510 }
511
512 let max_output_size = num_peaks * 8;
513 let decompressed_bytes =
514 lzf_decompress(&compressed_data[start..end], max_output_size)
515 .expect("LZF decompression failed.");
516
517 scan_start += parse_decompressed_bruker_binary_type1(
518 &decompressed_bytes,
519 &mut scan_indices_,
520 &mut tof_indices_,
521 &mut intensities_,
522 scan_start,
523 scan_index,
524 );
525 }
526
527 let mut scan = Vec::with_capacity(num_peaks);
529 {
530 let mut current_scan_index = 0u32;
531 for &size in &scan_indices_ {
532 let sz = size as usize;
533 for _ in 0..sz {
534 scan.push(current_scan_index);
535 }
536 current_scan_index += 1;
537 }
538 }
539
540 let intensity_dbl = intensities_.iter().map(|&x| x as f64).collect::<Vec<f64>>();
541 let tof_i32 = tof_indices_.iter().map(|&x| x as i32).collect::<Vec<i32>>();
542
543 let mz = self.index_converter.tof_to_mz(frame_id, &tof_indices_);
544 let inv_mobility = self
545 .index_converter
546 .scan_to_inverse_mobility(frame_id, &scan);
547
548 let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
549 let ms_type = match ms_type_raw {
550 0 => MsType::Precursor,
551 8 => MsType::FragmentDda,
552 9 => MsType::FragmentDia,
553 _ => MsType::Unknown,
554 };
555
556 TimsFrame {
557 frame_id: frame_id as i32,
558 ms_type,
559 scan: scan.iter().map(|&x| x as i32).collect(),
560 tof: tof_i32,
561 ims_frame: ImsFrame::new(
562 self.raw_data_layout.frame_meta_data[frame_index].time,
563 inv_mobility,
564 mz,
565 intensity_dbl,
566 ),
567 }
568 }
569
570 2 => {
572 let mut compressed_data = vec![0u8; bin_size as usize - 8];
573 infile.read_exact(&mut compressed_data).unwrap();
574
575 let decompressed_bytes = zstd_decompress(&compressed_data).unwrap();
576
577 let (scan, tof, intensity) =
578 parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap();
579 let intensity_dbl = intensity.iter().map(|&x| x as f64).collect();
580 let tof_i32 = tof.iter().map(|&x| x as i32).collect();
581 let scan = flatten_scan_values(&scan, true);
582
583 let mz = self.index_converter.tof_to_mz(frame_id, &tof);
584 let inv_mobility = self
585 .index_converter
586 .scan_to_inverse_mobility(frame_id, &scan);
587
588 let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
589
590 let ms_type = match ms_type_raw {
591 0 => MsType::Precursor,
592 8 => MsType::FragmentDda,
593 9 => MsType::FragmentDia,
594 _ => MsType::Unknown,
595 };
596
597 TimsFrame {
598 frame_id: frame_id as i32,
599 ms_type,
600 scan: scan.iter().map(|&x| x as i32).collect(),
601 tof: tof_i32,
602 ims_frame: ImsFrame::new(
603 self.raw_data_layout.frame_meta_data[frame_index].time,
604 inv_mobility,
605 mz,
606 intensity_dbl,
607 ),
608 }
609 }
610
611 _ => {
612 panic!("TimsCompressionType is not 1 or 2.")
613 }
614 }
615 }
616
617 fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
618 let frame_index = (frame_id - 1) as usize;
619 let offset = self.raw_data_layout.tims_offset_values[frame_index] as u64;
620
621 let num_peaks = self.raw_data_layout.frame_meta_data[frame_index].num_peaks;
623
624 if num_peaks == 0 {
625 return RawTimsFrame {
626 frame_id: frame_id as i32,
627 retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time,
628 ms_type: MsType::Unknown,
629 scan: Vec::new(),
630 tof: Vec::new(),
631 intensity: Vec::new(),
632 };
633 }
634
635 let mut file_path = PathBuf::from(&self.raw_data_layout.raw_data_path);
636 file_path.push("analysis.tdf_bin");
637 let mut infile = File::open(&file_path).unwrap();
638
639 infile.seek(SeekFrom::Start(offset)).unwrap();
640
641 let mut bin_buffer = [0u8; 4];
642 infile.read_exact(&mut bin_buffer).unwrap();
643 let bin_size = Cursor::new(bin_buffer).read_i32::<LittleEndian>().unwrap();
644
645 infile.read_exact(&mut bin_buffer).unwrap();
646
647 match self.raw_data_layout.global_meta_data.tims_compression_type {
648 _ if self.raw_data_layout.global_meta_data.tims_compression_type == 1 => {
649 panic!("Decompression Type1 not implemented.");
650 }
651
652 _ if self.raw_data_layout.global_meta_data.tims_compression_type == 2 => {
654 let mut compressed_data = vec![0u8; bin_size as usize - 8];
655 infile.read_exact(&mut compressed_data).unwrap();
656
657 let decompressed_bytes = zstd_decompress(&compressed_data).unwrap();
658
659 let (scan, tof, intensity) =
660 parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap();
661
662 let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
663
664 let ms_type = match ms_type_raw {
665 0 => MsType::Precursor,
666 8 => MsType::FragmentDda,
667 9 => MsType::FragmentDia,
668 _ => MsType::Unknown,
669 };
670
671 let frame = RawTimsFrame {
672 frame_id: frame_id as i32,
673 retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize]
674 .time,
675 ms_type,
676 scan,
677 tof,
678 intensity: intensity.iter().map(|&x| x as f64).collect(),
679 };
680
681 return frame;
682 }
683
684 _ => {
686 panic!("TimsCompressionType is not 1 or 2.")
687 }
688 }
689 }
690
691 fn get_slice(&self, frame_ids: Vec<u32>, _num_threads: usize) -> TimsSlice {
692 let result: Vec<TimsFrame> = frame_ids.into_iter().map(|f| self.get_frame(f)).collect();
693
694 TimsSlice { frames: result }
695 }
696
697 fn get_acquisition_mode(&self) -> AcquisitionMode {
698 self.raw_data_layout.acquisition_mode.clone()
699 }
700
701 fn get_frame_count(&self) -> i32 {
702 self.raw_data_layout.frame_meta_data.len() as i32
703 }
704
705 fn get_data_path(&self) -> &str {
706 &self.raw_data_layout.raw_data_path
707 }
708}
709
710pub struct TimsInMemoryLoader {
711 pub raw_data_layout: TimsRawDataLayout,
712 pub index_converter: TimsIndexConverter,
713 compressed_data: Vec<u8>,
714}
715
716impl TimsData for TimsInMemoryLoader {
717 fn get_frame(&self, frame_id: u32) -> TimsFrame {
718 let raw_frame = self.get_raw_frame(frame_id);
719
720 let raw_frame = match raw_frame.ms_type {
721 MsType::FragmentDda => raw_frame.smooth(1).centroid(1),
722 _ => raw_frame,
723 };
724
725 if raw_frame.scan.is_empty() {
727 return TimsFrame::default();
728 }
729
730 let tof_i32 = raw_frame.tof.iter().map(|&x| x as i32).collect();
731 let scan = flatten_scan_values(&raw_frame.scan, true);
732
733 let mz = self.index_converter.tof_to_mz(frame_id, &raw_frame.tof);
734 let inverse_mobility = self
735 .index_converter
736 .scan_to_inverse_mobility(frame_id, &scan);
737
738 let ims_frame = ImsFrame::new(
739 raw_frame.retention_time,
740 inverse_mobility,
741 mz,
742 raw_frame.intensity,
743 );
744
745 TimsFrame {
746 frame_id: frame_id as i32,
747 ms_type: raw_frame.ms_type,
748 scan: scan.iter().map(|&x| x as i32).collect(),
749 tof: tof_i32,
750 ims_frame,
751 }
752 }
753
754 fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
755 let frame_index = (frame_id - 1) as usize;
756 let offset = self.raw_data_layout.tims_offset_values[frame_index] as usize;
757
758 let bin_size_offset = offset + 4; let bin_size = Cursor::new(&self.compressed_data[offset..bin_size_offset])
760 .read_i32::<LittleEndian>()
761 .unwrap();
762
763 let data_offset = bin_size_offset + 4; let frame_data = &self.compressed_data[data_offset..data_offset + bin_size as usize - 8];
765
766 let decompressed_bytes = zstd_decompress(&frame_data).unwrap();
767
768 let (scan, tof, intensity) =
769 parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap();
770
771 let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
772
773 let ms_type = match ms_type_raw {
774 0 => MsType::Precursor,
775 8 => MsType::FragmentDda,
776 9 => MsType::FragmentDia,
777 _ => MsType::Unknown,
778 };
779
780 let raw_frame = RawTimsFrame {
781 frame_id: frame_id as i32,
782 retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time,
783 ms_type,
784 scan,
785 tof,
786 intensity: intensity.iter().map(|&x| x as f64).collect(),
787 };
788
789 raw_frame
790 }
791
792 fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
793 let pool = ThreadPoolBuilder::new()
794 .num_threads(num_threads)
795 .build()
796 .unwrap();
797 let frames = pool.install(|| {
798 frame_ids
799 .par_iter()
800 .map(|&frame_id| self.get_frame(frame_id))
801 .collect()
802 });
803
804 TimsSlice { frames }
805 }
806
807 fn get_acquisition_mode(&self) -> AcquisitionMode {
808 self.raw_data_layout.acquisition_mode.clone()
809 }
810
811 fn get_frame_count(&self) -> i32 {
812 self.raw_data_layout.frame_meta_data.len() as i32
813 }
814
815 fn get_data_path(&self) -> &str {
816 &self.raw_data_layout.raw_data_path
817 }
818}
819
820pub enum TimsDataLoader {
821 InMemory(TimsInMemoryLoader),
822 Lazy(TimsLazyLoder),
823}
824
825impl TimsDataLoader {
826 pub fn new_lazy(
827 bruker_lib_path: &str,
828 data_path: &str,
829 use_bruker_sdk: bool,
830 scan_max_index: u32,
831 im_lower: f64,
832 im_upper: f64,
833 tof_max_index: u32,
834 mz_lower: f64,
835 mz_upper: f64,
836 ) -> Self {
837 let raw_data_layout = TimsRawDataLayout::new(data_path);
838
839 let index_converter = match use_bruker_sdk {
840 true => TimsIndexConverter::BrukerLib(BrukerLibTimsDataConverter::new(
841 bruker_lib_path,
842 data_path,
843 )),
844 false => {
845 match derive_mz_calibration(bruker_lib_path, data_path, tof_max_index) {
847 Some((intercept, slope)) => {
848 TimsIndexConverter::Calibrated(CalibratedIndexConverter::new(
849 intercept,
850 slope,
851 im_lower,
852 im_upper,
853 scan_max_index,
854 ))
855 }
856 None => {
857 eprintln!(
858 "Warning: Could not derive m/z calibration from SDK. \
859 Using simple boundary model which may have ~5 Da error on some datasets. \
860 This typically happens on macOS where Bruker SDK is not available."
861 );
862 TimsIndexConverter::Simple(SimpleIndexConverter::from_boundaries(
863 mz_lower,
864 mz_upper,
865 tof_max_index,
866 im_lower,
867 im_upper,
868 scan_max_index,
869 ))
870 }
871 }
872 }
873 };
874
875 TimsDataLoader::Lazy(TimsLazyLoder {
876 raw_data_layout,
877 index_converter,
878 })
879 }
880
881 pub fn new_in_memory(
882 bruker_lib_path: &str,
883 data_path: &str,
884 use_bruker_sdk: bool,
885 scan_max_index: u32,
886 im_lower: f64,
887 im_upper: f64,
888 tof_max_index: u32,
889 mz_lower: f64,
890 mz_upper: f64,
891 ) -> Self {
892 let raw_data_layout = TimsRawDataLayout::new(data_path);
893
894 let index_converter = match use_bruker_sdk {
895 true => TimsIndexConverter::BrukerLib(BrukerLibTimsDataConverter::new(
896 bruker_lib_path,
897 data_path,
898 )),
899 false => {
900 match derive_mz_calibration(bruker_lib_path, data_path, tof_max_index) {
902 Some((intercept, slope)) => {
903 TimsIndexConverter::Calibrated(CalibratedIndexConverter::new(
904 intercept,
905 slope,
906 im_lower,
907 im_upper,
908 scan_max_index,
909 ))
910 }
911 None => {
912 eprintln!(
913 "Warning: Could not derive m/z calibration from SDK. \
914 Using simple boundary model which may have ~5 Da error on some datasets. \
915 This typically happens on macOS where Bruker SDK is not available."
916 );
917 TimsIndexConverter::Simple(SimpleIndexConverter::from_boundaries(
918 mz_lower,
919 mz_upper,
920 tof_max_index,
921 im_lower,
922 im_upper,
923 scan_max_index,
924 ))
925 }
926 }
927 }
928 };
929
930 let mut file_path = PathBuf::from(data_path);
931 file_path.push("analysis.tdf_bin");
932 let mut infile = File::open(file_path).unwrap();
933 let mut data = Vec::new();
934 infile.read_to_end(&mut data).unwrap();
935
936 TimsDataLoader::InMemory(TimsInMemoryLoader {
937 raw_data_layout,
938 index_converter,
939 compressed_data: data,
940 })
941 }
942
943 pub fn new_lazy_with_calibration(
961 data_path: &str,
962 bruker_lib_path: &str,
963 tof_max_index: u32,
964 mz_lower: f64,
965 mz_upper: f64,
966 im_lookup: Vec<f64>,
967 ) -> Self {
968 let raw_data_layout = TimsRawDataLayout::new(data_path);
969
970 let index_converter = TimsIndexConverter::Lookup(build_lookup_converter(
971 bruker_lib_path,
972 data_path,
973 tof_max_index,
974 mz_lower,
975 mz_upper,
976 im_lookup,
977 ));
978
979 TimsDataLoader::Lazy(TimsLazyLoder {
980 raw_data_layout,
981 index_converter,
982 })
983 }
984
985 pub fn new_in_memory_with_calibration(
1003 data_path: &str,
1004 bruker_lib_path: &str,
1005 tof_max_index: u32,
1006 mz_lower: f64,
1007 mz_upper: f64,
1008 im_lookup: Vec<f64>,
1009 ) -> Self {
1010 let raw_data_layout = TimsRawDataLayout::new(data_path);
1011
1012 let index_converter = TimsIndexConverter::Lookup(build_lookup_converter(
1013 bruker_lib_path,
1014 data_path,
1015 tof_max_index,
1016 mz_lower,
1017 mz_upper,
1018 im_lookup,
1019 ));
1020
1021 let mut file_path = PathBuf::from(data_path);
1022 file_path.push("analysis.tdf_bin");
1023 let mut infile = File::open(file_path).unwrap();
1024 let mut data = Vec::new();
1025 infile.read_to_end(&mut data).unwrap();
1026
1027 TimsDataLoader::InMemory(TimsInMemoryLoader {
1028 raw_data_layout,
1029 index_converter,
1030 compressed_data: data,
1031 })
1032 }
1033
1034 pub fn new_lazy_with_mz_calibration(
1047 data_path: &str,
1048 tof_intercept: f64,
1049 tof_slope: f64,
1050 im_min: f64,
1051 im_max: f64,
1052 scan_max_index: u32,
1053 ) -> Self {
1054 let raw_data_layout = TimsRawDataLayout::new(data_path);
1055
1056 let index_converter = TimsIndexConverter::Calibrated(CalibratedIndexConverter::new(
1057 tof_intercept,
1058 tof_slope,
1059 im_min,
1060 im_max,
1061 scan_max_index,
1062 ));
1063
1064 TimsDataLoader::Lazy(TimsLazyLoder {
1065 raw_data_layout,
1066 index_converter,
1067 })
1068 }
1069
1070 pub fn new_in_memory_with_mz_calibration(
1075 data_path: &str,
1076 tof_intercept: f64,
1077 tof_slope: f64,
1078 im_min: f64,
1079 im_max: f64,
1080 scan_max_index: u32,
1081 ) -> Self {
1082 let raw_data_layout = TimsRawDataLayout::new(data_path);
1083
1084 let index_converter = TimsIndexConverter::Calibrated(CalibratedIndexConverter::new(
1085 tof_intercept,
1086 tof_slope,
1087 im_min,
1088 im_max,
1089 scan_max_index,
1090 ));
1091
1092 let mut file_path = PathBuf::from(data_path);
1093 file_path.push("analysis.tdf_bin");
1094 let mut infile = File::open(file_path).unwrap();
1095 let mut data = Vec::new();
1096 infile.read_to_end(&mut data).unwrap();
1097
1098 TimsDataLoader::InMemory(TimsInMemoryLoader {
1099 raw_data_layout,
1100 index_converter,
1101 compressed_data: data,
1102 })
1103 }
1104
1105 pub fn get_index_converter(&self) -> &dyn IndexConverter {
1106 match self {
1107 TimsDataLoader::InMemory(loader) => &loader.index_converter,
1108 TimsDataLoader::Lazy(loader) => &loader.index_converter,
1109 }
1110 }
1111
1112 pub fn uses_bruker_sdk(&self) -> bool {
1116 match self {
1117 TimsDataLoader::InMemory(loader) => matches!(&loader.index_converter, TimsIndexConverter::BrukerLib(_)),
1118 TimsDataLoader::Lazy(loader) => matches!(&loader.index_converter, TimsIndexConverter::BrukerLib(_)),
1119 }
1120 }
1121}
1122
1123impl TimsData for TimsDataLoader {
1124 fn get_frame(&self, frame_id: u32) -> TimsFrame {
1125 match self {
1126 TimsDataLoader::InMemory(loader) => loader.get_frame(frame_id),
1127 TimsDataLoader::Lazy(loader) => loader.get_frame(frame_id),
1128 }
1129 }
1130 fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
1131 match self {
1132 TimsDataLoader::InMemory(loader) => loader.get_raw_frame(frame_id),
1133 TimsDataLoader::Lazy(loader) => loader.get_raw_frame(frame_id),
1134 }
1135 }
1136
1137 fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
1138 match self {
1139 TimsDataLoader::InMemory(loader) => loader.get_slice(frame_ids, num_threads),
1140 TimsDataLoader::Lazy(loader) => loader.get_slice(frame_ids, num_threads),
1141 }
1142 }
1143
1144 fn get_acquisition_mode(&self) -> AcquisitionMode {
1145 match self {
1146 TimsDataLoader::InMemory(loader) => loader.get_acquisition_mode(),
1147 TimsDataLoader::Lazy(loader) => loader.get_acquisition_mode(),
1148 }
1149 }
1150
1151 fn get_frame_count(&self) -> i32 {
1152 match self {
1153 TimsDataLoader::InMemory(loader) => loader.get_frame_count(),
1154 TimsDataLoader::Lazy(loader) => loader.get_frame_count(),
1155 }
1156 }
1157
1158 fn get_data_path(&self) -> &str {
1159 match self {
1160 TimsDataLoader::InMemory(loader) => loader.get_data_path(),
1161 TimsDataLoader::Lazy(loader) => loader.get_data_path(),
1162 }
1163 }
1164}
1165
1166pub struct SimpleIndexConverter {
1167 pub tof_intercept: f64,
1168 pub tof_slope: f64,
1169 pub scan_intercept: f64,
1170 pub scan_slope: f64,
1171}
1172
1173impl SimpleIndexConverter {
1174 pub fn from_boundaries(
1175 mz_min: f64,
1176 mz_max: f64,
1177 tof_max_index: u32,
1178 im_min: f64,
1179 im_max: f64,
1180 scan_max_index: u32,
1181 ) -> Self {
1182 let tof_intercept: f64 = mz_min.sqrt();
1183 let tof_slope: f64 = (mz_max.sqrt() - tof_intercept) / tof_max_index as f64;
1184
1185 let scan_intercept: f64 = im_max;
1186 let scan_slope: f64 = (im_min - scan_intercept) / scan_max_index as f64;
1187 Self {
1188 tof_intercept,
1189 tof_slope,
1190 scan_intercept,
1191 scan_slope,
1192 }
1193 }
1194}
1195
1196impl IndexConverter for SimpleIndexConverter {
1197 fn tof_to_mz(&self, _frame_id: u32, _tof_values: &Vec<u32>) -> Vec<f64> {
1198 let mut mz_values: Vec<f64> = Vec::new();
1199 mz_values.resize(_tof_values.len(), 0.0);
1200
1201 for (i, &val) in _tof_values.iter().enumerate() {
1202 mz_values[i] = (self.tof_intercept + self.tof_slope * val as f64).powi(2);
1203 }
1204
1205 mz_values
1206 }
1207
1208 fn mz_to_tof(&self, _frame_id: u32, _mz_values: &Vec<f64>) -> Vec<u32> {
1209 let mut tof_values: Vec<u32> = Vec::new();
1210 tof_values.resize(_mz_values.len(), 0);
1211
1212 for (i, &val) in _mz_values.iter().enumerate() {
1213 tof_values[i] = ((val.sqrt() - self.tof_intercept) / self.tof_slope) as u32;
1214 }
1215
1216 tof_values
1217 }
1218
1219 fn scan_to_inverse_mobility(&self, _frame_id: u32, _scan_values: &Vec<u32>) -> Vec<f64> {
1220 let mut inv_mobility_values: Vec<f64> = Vec::new();
1221 inv_mobility_values.resize(_scan_values.len(), 0.0);
1222
1223 for (i, &val) in _scan_values.iter().enumerate() {
1224 inv_mobility_values[i] = self.scan_intercept + self.scan_slope * val as f64;
1225 }
1226
1227 inv_mobility_values
1228 }
1229
1230 fn inverse_mobility_to_scan(
1231 &self,
1232 _frame_id: u32,
1233 _inverse_mobility_values: &Vec<f64>,
1234 ) -> Vec<u32> {
1235 let mut scan_values: Vec<u32> = Vec::new();
1236 scan_values.resize(_inverse_mobility_values.len(), 0);
1237
1238 for (i, &val) in _inverse_mobility_values.iter().enumerate() {
1239 scan_values[i] = ((val - self.scan_intercept) / self.scan_slope) as u32;
1240 }
1241
1242 scan_values
1243 }
1244}
1245
1246pub struct CalibratedIndexConverter {
1257 pub tof_intercept: f64,
1258 pub tof_slope: f64,
1259 pub scan_intercept: f64,
1260 pub scan_slope: f64,
1261}
1262
1263impl CalibratedIndexConverter {
1264 pub fn new(
1273 tof_intercept: f64,
1274 tof_slope: f64,
1275 im_min: f64,
1276 im_max: f64,
1277 scan_max_index: u32,
1278 ) -> Self {
1279 let scan_intercept = im_max;
1280 let scan_slope = (im_min - scan_intercept) / scan_max_index as f64;
1281 Self {
1282 tof_intercept,
1283 tof_slope,
1284 scan_intercept,
1285 scan_slope,
1286 }
1287 }
1288}
1289
1290impl IndexConverter for CalibratedIndexConverter {
1291 fn tof_to_mz(&self, _frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
1292 let mut mz_values: Vec<f64> = Vec::with_capacity(tof_values.len());
1293
1294 for &tof_index in tof_values.iter() {
1295 let sqrt_mz = self.tof_intercept + self.tof_slope * tof_index as f64;
1297 mz_values.push(sqrt_mz * sqrt_mz);
1298 }
1299
1300 mz_values
1301 }
1302
1303 fn mz_to_tof(&self, _frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
1304 let mut tof_values: Vec<u32> = Vec::with_capacity(mz_values.len());
1305
1306 for &mz in mz_values.iter() {
1307 let sqrt_mz = mz.sqrt();
1308 tof_values.push(((sqrt_mz - self.tof_intercept) / self.tof_slope) as u32);
1310 }
1311
1312 tof_values
1313 }
1314
1315 fn scan_to_inverse_mobility(&self, _frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
1316 let mut inv_mobility_values: Vec<f64> = Vec::with_capacity(scan_values.len());
1317
1318 for &val in scan_values.iter() {
1319 inv_mobility_values.push(self.scan_intercept + self.scan_slope * val as f64);
1320 }
1321
1322 inv_mobility_values
1323 }
1324
1325 fn inverse_mobility_to_scan(
1326 &self,
1327 _frame_id: u32,
1328 inverse_mobility_values: &Vec<f64>,
1329 ) -> Vec<u32> {
1330 let mut scan_values: Vec<u32> = Vec::with_capacity(inverse_mobility_values.len());
1331
1332 for &val in inverse_mobility_values.iter() {
1333 scan_values.push(((val - self.scan_intercept) / self.scan_slope) as u32);
1334 }
1335
1336 scan_values
1337 }
1338}
1339
1340pub struct LookupIndexConverter {
1353 pub tof_intercept: f64,
1355 pub tof_slope: f64,
1356
1357 pub im_lookup: Vec<f64>,
1360
1361 pub im_min: f64,
1364 pub im_max: f64,
1365}
1366
1367impl LookupIndexConverter {
1368 pub fn new(
1379 mz_min: f64,
1380 mz_max: f64,
1381 tof_max_index: u32,
1382 im_lookup: Vec<f64>,
1383 ) -> Self {
1384 let tof_intercept: f64 = mz_min.sqrt();
1385 let tof_slope: f64 = (mz_max.sqrt() - tof_intercept) / tof_max_index as f64;
1386
1387 let im_min = im_lookup.iter().cloned().fold(f64::INFINITY, f64::min);
1389 let im_max = im_lookup.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1390
1391 Self {
1392 tof_intercept,
1393 tof_slope,
1394 im_lookup,
1395 im_min,
1396 im_max,
1397 }
1398 }
1399
1400 pub fn with_mz_fit(tof_intercept: f64, tof_slope: f64, im_lookup: Vec<f64>) -> Self {
1411 let im_min = im_lookup.iter().cloned().fold(f64::INFINITY, f64::min);
1413 let im_max = im_lookup.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1414
1415 Self {
1416 tof_intercept,
1417 tof_slope,
1418 im_lookup,
1419 im_min,
1420 im_max,
1421 }
1422 }
1423}
1424
1425impl IndexConverter for LookupIndexConverter {
1426 fn tof_to_mz(&self, _frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
1427 let mut mz_values: Vec<f64> = Vec::new();
1428 mz_values.resize(tof_values.len(), 0.0);
1429
1430 for (i, &val) in tof_values.iter().enumerate() {
1431 mz_values[i] = (self.tof_intercept + self.tof_slope * val as f64).powi(2);
1432 }
1433
1434 mz_values
1435 }
1436
1437 fn mz_to_tof(&self, _frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
1438 let mut tof_values: Vec<u32> = Vec::new();
1439 tof_values.resize(mz_values.len(), 0);
1440
1441 for (i, &val) in mz_values.iter().enumerate() {
1442 tof_values[i] = ((val.sqrt() - self.tof_intercept) / self.tof_slope) as u32;
1443 }
1444
1445 tof_values
1446 }
1447
1448 fn scan_to_inverse_mobility(&self, _frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
1449 scan_values
1451 .iter()
1452 .map(|&s| {
1453 self.im_lookup
1454 .get(s as usize)
1455 .copied()
1456 .unwrap_or(f64::NAN)
1457 })
1458 .collect()
1459 }
1460
1461 fn inverse_mobility_to_scan(
1462 &self,
1463 _frame_id: u32,
1464 inverse_mobility_values: &Vec<f64>,
1465 ) -> Vec<u32> {
1466 inverse_mobility_values
1469 .iter()
1470 .map(|&im| {
1471 if im.is_nan() || self.im_lookup.is_empty() {
1472 return 0;
1473 }
1474
1475 let mut best_scan = 0usize;
1478 let mut best_diff = f64::INFINITY;
1479
1480 for (scan, &lookup_im) in self.im_lookup.iter().enumerate() {
1481 let diff = (lookup_im - im).abs();
1482 if diff < best_diff {
1483 best_diff = diff;
1484 best_scan = scan;
1485 }
1486 }
1487
1488 best_scan as u32
1489 })
1490 .collect()
1491 }
1492}