rustdf/data/
handle.rs

1use crate::data::meta::{read_global_meta_sql, read_meta_data_sql, FrameMeta, GlobalMetaData};
2use crate::data::raw::BrukerTimsDataLibrary;
3use crate::data::utility::{
4    flatten_scan_values, parse_decompressed_bruker_binary_data, zstd_decompress,
5};
6use byteorder::{LittleEndian, ReadBytesExt};
7use mscore::data::spectrum::MsType;
8use mscore::timstof::frame::{ImsFrame, RawTimsFrame, TimsFrame};
9use mscore::timstof::slice::TimsSlice;
10use std::fs::File;
11use std::io::{Cursor, Read, Seek, SeekFrom};
12use std::path::PathBuf;
13
14use crate::data::acquisition::AcquisitionMode;
15use rayon::prelude::*;
16use rayon::ThreadPoolBuilder;
17
18use std::error::Error;
19
20/// Derive m/z calibration coefficients by using the Bruker SDK.
21///
22/// This function temporarily loads the SDK to convert a range of TOF indices to m/z,
23/// then fits a linear regression to derive accurate calibration coefficients.
24///
25/// Returns `Some((intercept, slope))` for the formula: `sqrt(mz) = intercept + slope * tof`
26/// Returns `None` if SDK is not available (e.g., on macOS).
27fn derive_mz_calibration(
28    bruker_lib_path: &str,
29    data_path: &str,
30    tof_max_index: u32,
31) -> Option<(f64, f64)> {
32    // Check if SDK path is valid before trying to load
33    // Common invalid values: "NO_SDK", empty string, or non-existent paths
34    if bruker_lib_path.is_empty()
35        || bruker_lib_path == "NO_SDK"
36        || bruker_lib_path == "CALIBRATED"
37        || !std::path::Path::new(bruker_lib_path).exists()
38    {
39        return None;
40    }
41
42    // Try to create a BrukerLib converter
43    let sdk_converter = match std::panic::catch_unwind(|| {
44        BrukerLibTimsDataConverter::new(bruker_lib_path, data_path)
45    }) {
46        Ok(converter) => converter,
47        Err(_) => return None,
48    };
49
50    // Generate a range of TOF indices across the spectrum
51    let n_points = 1000;
52    let step = tof_max_index / n_points;
53    let tof_indices: Vec<u32> = (0..n_points).map(|i| i * step + step / 2).collect();
54
55    // Convert to m/z using SDK
56    let mz_values = sdk_converter.tof_to_mz(1, &tof_indices);
57
58    // Filter out any invalid values (zero or negative)
59    let valid_pairs: Vec<(f64, f64)> = tof_indices
60        .iter()
61        .zip(mz_values.iter())
62        .filter(|(_, mz)| **mz > 0.0)
63        .map(|(tof, mz)| (*tof as f64, mz.sqrt()))
64        .collect();
65
66    if valid_pairs.len() < 10 {
67        return None;
68    }
69
70    // Linear regression: sqrt(mz) = intercept + slope * tof
71    // Using simple least squares: slope = Cov(x,y) / Var(x), intercept = mean_y - slope * mean_x
72    let n = valid_pairs.len() as f64;
73    let sum_x: f64 = valid_pairs.iter().map(|(x, _)| x).sum();
74    let sum_y: f64 = valid_pairs.iter().map(|(_, y)| y).sum();
75    let sum_xy: f64 = valid_pairs.iter().map(|(x, y)| x * y).sum();
76    let sum_xx: f64 = valid_pairs.iter().map(|(x, _)| x * x).sum();
77
78    let mean_x = sum_x / n;
79    let mean_y = sum_y / n;
80
81    let slope = (sum_xy - n * mean_x * mean_y) / (sum_xx - n * mean_x * mean_x);
82    let intercept = mean_y - slope * mean_x;
83
84    Some((intercept, slope))
85}
86
87fn lzf_decompress(data: &[u8], max_output_size: usize) -> Result<Vec<u8>, Box<dyn Error>> {
88    let decompressed_data = lzf::decompress(data, max_output_size)
89        .map_err(|e| format!("LZF decompression failed: {}", e))?;
90    Ok(decompressed_data)
91}
92
93fn parse_decompressed_bruker_binary_type1(
94    decompressed_bytes: &[u8],
95    scan_indices: &mut [i64],
96    tof_indices: &mut [u32],
97    intensities: &mut [u16],
98    scan_start: usize,
99    scan_index: usize,
100) -> usize {
101    // Interpret decompressed_bytes as a slice of i32
102    let int_count = decompressed_bytes.len() / 4;
103    let buffer =
104        unsafe { std::slice::from_raw_parts(decompressed_bytes.as_ptr() as *const i32, int_count) };
105
106    let mut tof_index = 0i32;
107    let mut previous_was_intensity = true;
108    let mut current_index = scan_start;
109
110    for &value in buffer {
111        if value >= 0 {
112            // positive value => intensity
113            if previous_was_intensity {
114                tof_index += 1;
115            }
116            tof_indices[current_index] = tof_index as u32;
117            intensities[current_index] = value as u16;
118            previous_was_intensity = true;
119            current_index += 1;
120        } else {
121            // negative value => indicates a jump in tof_index
122            tof_index -= value; // value is negative, so this adds |value| to tof_index
123            previous_was_intensity = false;
124        }
125    }
126
127    let scan_size = current_index - scan_start;
128    scan_indices[scan_index] = scan_size as i64;
129    scan_size
130}
131
132pub struct TimsRawDataLayout {
133    pub raw_data_path: String,
134    pub global_meta_data: GlobalMetaData,
135    pub frame_meta_data: Vec<FrameMeta>,
136    pub max_scan_count: i64,
137    pub frame_id_ptr: Vec<i64>,
138    pub tims_offset_values: Vec<i64>,
139    pub acquisition_mode: AcquisitionMode,
140}
141
142impl TimsRawDataLayout {
143    pub fn new(data_path: &str) -> Self {
144        // get the global and frame meta data
145        let global_meta_data = read_global_meta_sql(data_path).unwrap();
146        let frame_meta_data = read_meta_data_sql(data_path).unwrap();
147
148        // get the max scan count
149        let max_scan_count = frame_meta_data.iter().map(|x| x.num_scans).max().unwrap();
150
151        let mut frame_id_ptr: Vec<i64> = Vec::new();
152        frame_id_ptr.resize(frame_meta_data.len() + 1, 0);
153
154        // get the frame id_ptr values
155        for (i, row) in frame_meta_data.iter().enumerate() {
156            frame_id_ptr[i + 1] = row.num_peaks + frame_id_ptr[i];
157        }
158
159        // get the tims offset values
160        let tims_offset_values = frame_meta_data
161            .iter()
162            .map(|x| x.tims_id)
163            .collect::<Vec<i64>>();
164
165        // get the acquisition mode
166        let acquisition_mode = match frame_meta_data[0].scan_mode {
167            8 => AcquisitionMode::DDA,
168            9 => AcquisitionMode::DIA,
169            _ => AcquisitionMode::Unknown,
170        };
171
172        TimsRawDataLayout {
173            raw_data_path: data_path.to_string(),
174            global_meta_data,
175            frame_meta_data,
176            max_scan_count,
177            frame_id_ptr,
178            tims_offset_values,
179            acquisition_mode,
180        }
181    }
182}
183
184pub trait TimsData {
185    fn get_frame(&self, frame_id: u32) -> TimsFrame;
186    fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame;
187    fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice;
188    fn get_acquisition_mode(&self) -> AcquisitionMode;
189    fn get_frame_count(&self) -> i32;
190    fn get_data_path(&self) -> &str;
191}
192
193pub trait IndexConverter {
194    fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64>;
195    fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32>;
196    fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64>;
197    fn inverse_mobility_to_scan(
198        &self,
199        frame_id: u32,
200        inverse_mobility_values: &Vec<f64>,
201    ) -> Vec<u32>;
202}
203
204pub struct BrukerLibTimsDataConverter {
205    pub bruker_lib: BrukerTimsDataLibrary,
206}
207
208impl BrukerLibTimsDataConverter {
209    pub fn new(bruker_lib_path: &str, data_path: &str) -> Self {
210        let bruker_lib = BrukerTimsDataLibrary::new(bruker_lib_path, data_path).unwrap();
211        BrukerLibTimsDataConverter { bruker_lib }
212    }
213}
214impl IndexConverter for BrukerLibTimsDataConverter {
215    /// translate tof to mz values calling the bruker library
216    ///
217    /// # Arguments
218    ///
219    /// * `frame_id` - A u32 that holds the frame id
220    /// * `tof` - A vector of u32 that holds the tof values
221    ///
222    /// # Returns
223    ///
224    /// * `mz_values` - A vector of f64 that holds the mz values
225    ///
226    fn tof_to_mz(&self, frame_id: u32, tof: &Vec<u32>) -> Vec<f64> {
227        let mut dbl_tofs: Vec<f64> = Vec::new();
228        dbl_tofs.resize(tof.len(), 0.0);
229
230        for (i, &val) in tof.iter().enumerate() {
231            dbl_tofs[i] = val as f64;
232        }
233
234        let mut mz_values: Vec<f64> = Vec::new();
235        mz_values.resize(tof.len(), 0.0);
236
237        self.bruker_lib
238            .tims_index_to_mz(frame_id, &dbl_tofs, &mut mz_values)
239            .expect("Bruker binary call failed at: tims_index_to_mz;");
240
241        mz_values
242    }
243
244    fn mz_to_tof(&self, frame_id: u32, mz: &Vec<f64>) -> Vec<u32> {
245        let mut dbl_mz: Vec<f64> = Vec::new();
246        dbl_mz.resize(mz.len(), 0.0);
247
248        for (i, &val) in mz.iter().enumerate() {
249            dbl_mz[i] = val;
250        }
251
252        let mut tof_values: Vec<f64> = Vec::new();
253        tof_values.resize(mz.len(), 0.0);
254
255        self.bruker_lib
256            .tims_mz_to_index(frame_id, &dbl_mz, &mut tof_values)
257            .expect("Bruker binary call failed at: tims_mz_to_index;");
258
259        tof_values.iter().map(|&x| x.round() as u32).collect()
260    }
261
262    /// translate scan to inverse mobility values calling the bruker library
263    ///
264    /// # Arguments
265    ///
266    /// * `frame_id` - A u32 that holds the frame id
267    /// * `scan` - A vector of i32 that holds the scan values
268    ///
269    /// # Returns
270    ///
271    /// * `inv_mob` - A vector of f64 that holds the inverse mobility values
272    ///
273    fn scan_to_inverse_mobility(&self, frame_id: u32, scan: &Vec<u32>) -> Vec<f64> {
274        let mut dbl_scans: Vec<f64> = Vec::new();
275        dbl_scans.resize(scan.len(), 0.0);
276
277        for (i, &val) in scan.iter().enumerate() {
278            dbl_scans[i] = val as f64;
279        }
280
281        let mut inv_mob: Vec<f64> = Vec::new();
282        inv_mob.resize(scan.len(), 0.0);
283
284        self.bruker_lib
285            .tims_scan_to_inv_mob(frame_id, &dbl_scans, &mut inv_mob)
286            .expect("Bruker binary call failed at: tims_scannum_to_oneoverk0;");
287
288        inv_mob
289    }
290
291    /// translate inverse mobility to scan values calling the bruker library
292    ///
293    /// # Arguments
294    ///
295    /// * `frame_id` - A u32 that holds the frame id
296    /// * `inv_mob` - A vector of f64 that holds the inverse mobility values
297    ///
298    /// # Returns
299    ///
300    /// * `scan_values` - A vector of i32 that holds the scan values
301    ///
302    fn inverse_mobility_to_scan(&self, frame_id: u32, inv_mob: &Vec<f64>) -> Vec<u32> {
303        let mut dbl_inv_mob: Vec<f64> = Vec::new();
304        dbl_inv_mob.resize(inv_mob.len(), 0.0);
305
306        for (i, &val) in inv_mob.iter().enumerate() {
307            dbl_inv_mob[i] = val;
308        }
309
310        let mut scan_values: Vec<f64> = Vec::new();
311        scan_values.resize(inv_mob.len(), 0.0);
312
313        self.bruker_lib
314            .inv_mob_to_tims_scan(frame_id, &dbl_inv_mob, &mut scan_values)
315            .expect("Bruker binary call failed at: tims_oneoverk0_to_scannum;");
316
317        scan_values.iter().map(|&x| x.round() as u32).collect()
318    }
319}
320
321pub enum TimsIndexConverter {
322    Simple(SimpleIndexConverter),
323    Calibrated(CalibratedIndexConverter),
324    BrukerLib(BrukerLibTimsDataConverter),
325    Lookup(LookupIndexConverter),
326}
327
328impl IndexConverter for TimsIndexConverter {
329    fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
330        match self {
331            TimsIndexConverter::Simple(converter) => converter.tof_to_mz(frame_id, tof_values),
332            TimsIndexConverter::Calibrated(converter) => converter.tof_to_mz(frame_id, tof_values),
333            TimsIndexConverter::BrukerLib(converter) => converter.tof_to_mz(frame_id, tof_values),
334            TimsIndexConverter::Lookup(converter) => converter.tof_to_mz(frame_id, tof_values),
335        }
336    }
337
338    fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
339        match self {
340            TimsIndexConverter::Simple(converter) => converter.mz_to_tof(frame_id, mz_values),
341            TimsIndexConverter::Calibrated(converter) => converter.mz_to_tof(frame_id, mz_values),
342            TimsIndexConverter::BrukerLib(converter) => converter.mz_to_tof(frame_id, mz_values),
343            TimsIndexConverter::Lookup(converter) => converter.mz_to_tof(frame_id, mz_values),
344        }
345    }
346
347    fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
348        match self {
349            TimsIndexConverter::Simple(converter) => {
350                converter.scan_to_inverse_mobility(frame_id, scan_values)
351            }
352            TimsIndexConverter::Calibrated(converter) => {
353                converter.scan_to_inverse_mobility(frame_id, scan_values)
354            }
355            TimsIndexConverter::BrukerLib(converter) => {
356                converter.scan_to_inverse_mobility(frame_id, scan_values)
357            }
358            TimsIndexConverter::Lookup(converter) => {
359                converter.scan_to_inverse_mobility(frame_id, scan_values)
360            }
361        }
362    }
363
364    fn inverse_mobility_to_scan(
365        &self,
366        frame_id: u32,
367        inverse_mobility_values: &Vec<f64>,
368    ) -> Vec<u32> {
369        match self {
370            TimsIndexConverter::Simple(converter) => {
371                converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values)
372            }
373            TimsIndexConverter::Calibrated(converter) => {
374                converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values)
375            }
376            TimsIndexConverter::BrukerLib(converter) => {
377                converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values)
378            }
379            TimsIndexConverter::Lookup(converter) => {
380                converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values)
381            }
382        }
383    }
384}
385
386pub struct TimsLazyLoder {
387    pub raw_data_layout: TimsRawDataLayout,
388    pub index_converter: TimsIndexConverter,
389}
390
391impl TimsData for TimsLazyLoder {
392    fn get_frame(&self, frame_id: u32) -> TimsFrame {
393        let frame_index = (frame_id - 1) as usize;
394
395        // turns out, there can be empty frames in the data, check for that, if so, return an empty frame
396        let num_peaks = self.raw_data_layout.frame_meta_data[frame_index].num_peaks;
397
398        if num_peaks == 0 {
399
400            let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
401
402            let ms_type = match ms_type_raw {
403                0 => MsType::Precursor,
404                8 => MsType::FragmentDda,
405                9 => MsType::FragmentDia,
406                _ => MsType::Unknown,
407            };
408
409            return TimsFrame {
410                frame_id: frame_id as i32,
411                ms_type,
412                scan: Vec::new(),
413                tof: Vec::new(),
414                ims_frame: ImsFrame::new(
415                    self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time,
416                    Vec::new(),
417                    Vec::new(),
418                    Vec::new(),
419                ),
420            };
421        }
422
423        let offset = self.raw_data_layout.tims_offset_values[frame_index] as u64;
424
425        let mut file_path = PathBuf::from(&self.raw_data_layout.raw_data_path);
426        file_path.push("analysis.tdf_bin");
427        let mut infile = File::open(&file_path).unwrap();
428
429        infile.seek(SeekFrom::Start(offset)).unwrap();
430
431        let mut bin_buffer = [0u8; 4];
432        infile.read_exact(&mut bin_buffer).unwrap();
433        let bin_size = Cursor::new(bin_buffer).read_i32::<LittleEndian>().unwrap();
434
435        infile.read_exact(&mut bin_buffer).unwrap();
436
437        match self.raw_data_layout.global_meta_data.tims_compression_type {
438            1 => {
439                let scan_count =
440                    self.raw_data_layout.frame_meta_data[frame_index].num_scans as usize;
441                let num_peaks = num_peaks as usize;
442                let compression_offset = 8 + (scan_count + 1) * 4;
443
444                let mut scan_offsets_buffer = vec![0u8; (scan_count + 1) * 4];
445                infile.read_exact(&mut scan_offsets_buffer).unwrap();
446
447                let mut scan_offsets = Vec::with_capacity(scan_count + 1);
448                {
449                    let mut rdr = Cursor::new(&scan_offsets_buffer);
450                    for _ in 0..(scan_count + 1) {
451                        scan_offsets.push(rdr.read_i32::<LittleEndian>().unwrap());
452                    }
453                }
454
455                for offs in &mut scan_offsets {
456                    *offs -= compression_offset as i32;
457                }
458
459                let remaining_size = (bin_size as usize - compression_offset) as usize;
460                let mut compressed_data = vec![0u8; remaining_size];
461                infile.read_exact(&mut compressed_data).unwrap();
462
463                let mut scan_indices_ = vec![0i64; scan_count];
464                let mut tof_indices_ = vec![0u32; num_peaks];
465                let mut intensities_ = vec![0u16; num_peaks];
466
467                let mut scan_start = 0usize;
468
469                for scan_index in 0..scan_count {
470                    let start = scan_offsets[scan_index] as usize;
471                    let end = scan_offsets[scan_index + 1] as usize;
472
473                    if start == end {
474                        continue;
475                    }
476
477                    let max_output_size = num_peaks * 8;
478                    let decompressed_bytes =
479                        lzf_decompress(&compressed_data[start..end], max_output_size)
480                            .expect("LZF decompression failed.");
481
482                    scan_start += parse_decompressed_bruker_binary_type1(
483                        &decompressed_bytes,
484                        &mut scan_indices_,
485                        &mut tof_indices_,
486                        &mut intensities_,
487                        scan_start,
488                        scan_index,
489                    );
490                }
491
492                // Create a flat scan vector to match what flatten_scan_values expects
493                let mut scan = Vec::with_capacity(num_peaks);
494                {
495                    let mut current_scan_index = 0u32;
496                    for &size in &scan_indices_ {
497                        let sz = size as usize;
498                        for _ in 0..sz {
499                            scan.push(current_scan_index);
500                        }
501                        current_scan_index += 1;
502                    }
503                }
504
505                let intensity_dbl = intensities_.iter().map(|&x| x as f64).collect::<Vec<f64>>();
506                let tof_i32 = tof_indices_.iter().map(|&x| x as i32).collect::<Vec<i32>>();
507
508                let mz = self.index_converter.tof_to_mz(frame_id, &tof_indices_);
509                let inv_mobility = self
510                    .index_converter
511                    .scan_to_inverse_mobility(frame_id, &scan);
512
513                let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
514                let ms_type = match ms_type_raw {
515                    0 => MsType::Precursor,
516                    8 => MsType::FragmentDda,
517                    9 => MsType::FragmentDia,
518                    _ => MsType::Unknown,
519                };
520
521                TimsFrame {
522                    frame_id: frame_id as i32,
523                    ms_type,
524                    scan: scan.iter().map(|&x| x as i32).collect(),
525                    tof: tof_i32,
526                    ims_frame: ImsFrame::new(
527                        self.raw_data_layout.frame_meta_data[frame_index].time,
528                        inv_mobility,
529                        mz,
530                        intensity_dbl,
531                    ),
532                }
533            }
534
535            // Existing handling of Type 2
536            2 => {
537                let mut compressed_data = vec![0u8; bin_size as usize - 8];
538                infile.read_exact(&mut compressed_data).unwrap();
539
540                let decompressed_bytes = zstd_decompress(&compressed_data).unwrap();
541
542                let (scan, tof, intensity) =
543                    parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap();
544                let intensity_dbl = intensity.iter().map(|&x| x as f64).collect();
545                let tof_i32 = tof.iter().map(|&x| x as i32).collect();
546                let scan = flatten_scan_values(&scan, true);
547
548                let mz = self.index_converter.tof_to_mz(frame_id, &tof);
549                let inv_mobility = self
550                    .index_converter
551                    .scan_to_inverse_mobility(frame_id, &scan);
552
553                let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
554
555                let ms_type = match ms_type_raw {
556                    0 => MsType::Precursor,
557                    8 => MsType::FragmentDda,
558                    9 => MsType::FragmentDia,
559                    _ => MsType::Unknown,
560                };
561
562                TimsFrame {
563                    frame_id: frame_id as i32,
564                    ms_type,
565                    scan: scan.iter().map(|&x| x as i32).collect(),
566                    tof: tof_i32,
567                    ims_frame: ImsFrame::new(
568                        self.raw_data_layout.frame_meta_data[frame_index].time,
569                        inv_mobility,
570                        mz,
571                        intensity_dbl,
572                    ),
573                }
574            }
575
576            _ => {
577                panic!("TimsCompressionType is not 1 or 2.")
578            }
579        }
580    }
581
582    fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
583        let frame_index = (frame_id - 1) as usize;
584        let offset = self.raw_data_layout.tims_offset_values[frame_index] as u64;
585
586        // turns out, there can be empty frames in the data, check for that, if so, return an empty frame
587        let num_peaks = self.raw_data_layout.frame_meta_data[frame_index].num_peaks;
588
589        if num_peaks == 0 {
590            return RawTimsFrame {
591                frame_id: frame_id as i32,
592                retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time,
593                ms_type: MsType::Unknown,
594                scan: Vec::new(),
595                tof: Vec::new(),
596                intensity: Vec::new(),
597            };
598        }
599
600        let mut file_path = PathBuf::from(&self.raw_data_layout.raw_data_path);
601        file_path.push("analysis.tdf_bin");
602        let mut infile = File::open(&file_path).unwrap();
603
604        infile.seek(SeekFrom::Start(offset)).unwrap();
605
606        let mut bin_buffer = [0u8; 4];
607        infile.read_exact(&mut bin_buffer).unwrap();
608        let bin_size = Cursor::new(bin_buffer).read_i32::<LittleEndian>().unwrap();
609
610        infile.read_exact(&mut bin_buffer).unwrap();
611
612        match self.raw_data_layout.global_meta_data.tims_compression_type {
613            _ if self.raw_data_layout.global_meta_data.tims_compression_type == 1 => {
614                panic!("Decompression Type1 not implemented.");
615            }
616
617            // Extract from ZSTD compressed binary
618            _ if self.raw_data_layout.global_meta_data.tims_compression_type == 2 => {
619                let mut compressed_data = vec![0u8; bin_size as usize - 8];
620                infile.read_exact(&mut compressed_data).unwrap();
621
622                let decompressed_bytes = zstd_decompress(&compressed_data).unwrap();
623
624                let (scan, tof, intensity) =
625                    parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap();
626
627                let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
628
629                let ms_type = match ms_type_raw {
630                    0 => MsType::Precursor,
631                    8 => MsType::FragmentDda,
632                    9 => MsType::FragmentDia,
633                    _ => MsType::Unknown,
634                };
635
636                let frame = RawTimsFrame {
637                    frame_id: frame_id as i32,
638                    retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize]
639                        .time,
640                    ms_type,
641                    scan,
642                    tof,
643                    intensity: intensity.iter().map(|&x| x as f64).collect(),
644                };
645
646                return frame;
647            }
648
649            // Error on unknown compression algorithm
650            _ => {
651                panic!("TimsCompressionType is not 1 or 2.")
652            }
653        }
654    }
655
656    fn get_slice(&self, frame_ids: Vec<u32>, _num_threads: usize) -> TimsSlice {
657        let result: Vec<TimsFrame> = frame_ids.into_iter().map(|f| self.get_frame(f)).collect();
658
659        TimsSlice { frames: result }
660    }
661
662    fn get_acquisition_mode(&self) -> AcquisitionMode {
663        self.raw_data_layout.acquisition_mode.clone()
664    }
665
666    fn get_frame_count(&self) -> i32 {
667        self.raw_data_layout.frame_meta_data.len() as i32
668    }
669
670    fn get_data_path(&self) -> &str {
671        &self.raw_data_layout.raw_data_path
672    }
673}
674
675pub struct TimsInMemoryLoader {
676    pub raw_data_layout: TimsRawDataLayout,
677    pub index_converter: TimsIndexConverter,
678    compressed_data: Vec<u8>,
679}
680
681impl TimsData for TimsInMemoryLoader {
682    fn get_frame(&self, frame_id: u32) -> TimsFrame {
683        let raw_frame = self.get_raw_frame(frame_id);
684
685        let raw_frame = match raw_frame.ms_type {
686            MsType::FragmentDda => raw_frame.smooth(1).centroid(1),
687            _ => raw_frame,
688        };
689
690        // if raw frame is empty, return an empty frame
691        if raw_frame.scan.is_empty() {
692            return TimsFrame::default();
693        }
694
695        let tof_i32 = raw_frame.tof.iter().map(|&x| x as i32).collect();
696        let scan = flatten_scan_values(&raw_frame.scan, true);
697
698        let mz = self.index_converter.tof_to_mz(frame_id, &raw_frame.tof);
699        let inverse_mobility = self
700            .index_converter
701            .scan_to_inverse_mobility(frame_id, &scan);
702
703        let ims_frame = ImsFrame::new(
704            raw_frame.retention_time,
705            inverse_mobility,
706            mz,
707            raw_frame.intensity,
708        );
709
710        TimsFrame {
711            frame_id: frame_id as i32,
712            ms_type: raw_frame.ms_type,
713            scan: scan.iter().map(|&x| x as i32).collect(),
714            tof: tof_i32,
715            ims_frame,
716        }
717    }
718
719    fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
720        let frame_index = (frame_id - 1) as usize;
721        let offset = self.raw_data_layout.tims_offset_values[frame_index] as usize;
722
723        let bin_size_offset = offset + 4; // Assuming the size is stored immediately before the frame data
724        let bin_size = Cursor::new(&self.compressed_data[offset..bin_size_offset])
725            .read_i32::<LittleEndian>()
726            .unwrap();
727
728        let data_offset = bin_size_offset + 4; // Adjust based on actual structure
729        let frame_data = &self.compressed_data[data_offset..data_offset + bin_size as usize - 8];
730
731        let decompressed_bytes = zstd_decompress(&frame_data).unwrap();
732
733        let (scan, tof, intensity) =
734            parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap();
735
736        let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
737
738        let ms_type = match ms_type_raw {
739            0 => MsType::Precursor,
740            8 => MsType::FragmentDda,
741            9 => MsType::FragmentDia,
742            _ => MsType::Unknown,
743        };
744
745        let raw_frame = RawTimsFrame {
746            frame_id: frame_id as i32,
747            retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time,
748            ms_type,
749            scan,
750            tof,
751            intensity: intensity.iter().map(|&x| x as f64).collect(),
752        };
753
754        raw_frame
755    }
756
757    fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
758        let pool = ThreadPoolBuilder::new()
759            .num_threads(num_threads)
760            .build()
761            .unwrap();
762        let frames = pool.install(|| {
763            frame_ids
764                .par_iter()
765                .map(|&frame_id| self.get_frame(frame_id))
766                .collect()
767        });
768
769        TimsSlice { frames }
770    }
771
772    fn get_acquisition_mode(&self) -> AcquisitionMode {
773        self.raw_data_layout.acquisition_mode.clone()
774    }
775
776    fn get_frame_count(&self) -> i32 {
777        self.raw_data_layout.frame_meta_data.len() as i32
778    }
779
780    fn get_data_path(&self) -> &str {
781        &self.raw_data_layout.raw_data_path
782    }
783}
784
785pub enum TimsDataLoader {
786    InMemory(TimsInMemoryLoader),
787    Lazy(TimsLazyLoder),
788}
789
790impl TimsDataLoader {
791    pub fn new_lazy(
792        bruker_lib_path: &str,
793        data_path: &str,
794        use_bruker_sdk: bool,
795        scan_max_index: u32,
796        im_lower: f64,
797        im_upper: f64,
798        tof_max_index: u32,
799        mz_lower: f64,
800        mz_upper: f64,
801    ) -> Self {
802        let raw_data_layout = TimsRawDataLayout::new(data_path);
803
804        let index_converter = match use_bruker_sdk {
805            true => TimsIndexConverter::BrukerLib(BrukerLibTimsDataConverter::new(
806                bruker_lib_path,
807                data_path,
808            )),
809            false => {
810                // Try to derive accurate calibration using SDK, fall back to simple model
811                match derive_mz_calibration(bruker_lib_path, data_path, tof_max_index) {
812                    Some((intercept, slope)) => {
813                        TimsIndexConverter::Calibrated(CalibratedIndexConverter::new(
814                            intercept,
815                            slope,
816                            im_lower,
817                            im_upper,
818                            scan_max_index,
819                        ))
820                    }
821                    None => {
822                        eprintln!(
823                            "Warning: Could not derive m/z calibration from SDK. \
824                            Using simple boundary model which may have ~5 Da error on some datasets. \
825                            This typically happens on macOS where Bruker SDK is not available."
826                        );
827                        TimsIndexConverter::Simple(SimpleIndexConverter::from_boundaries(
828                            mz_lower,
829                            mz_upper,
830                            tof_max_index,
831                            im_lower,
832                            im_upper,
833                            scan_max_index,
834                        ))
835                    }
836                }
837            }
838        };
839
840        TimsDataLoader::Lazy(TimsLazyLoder {
841            raw_data_layout,
842            index_converter,
843        })
844    }
845
846    pub fn new_in_memory(
847        bruker_lib_path: &str,
848        data_path: &str,
849        use_bruker_sdk: bool,
850        scan_max_index: u32,
851        im_lower: f64,
852        im_upper: f64,
853        tof_max_index: u32,
854        mz_lower: f64,
855        mz_upper: f64,
856    ) -> Self {
857        let raw_data_layout = TimsRawDataLayout::new(data_path);
858
859        let index_converter = match use_bruker_sdk {
860            true => TimsIndexConverter::BrukerLib(BrukerLibTimsDataConverter::new(
861                bruker_lib_path,
862                data_path,
863            )),
864            false => {
865                // Try to derive accurate calibration using SDK, fall back to simple model
866                match derive_mz_calibration(bruker_lib_path, data_path, tof_max_index) {
867                    Some((intercept, slope)) => {
868                        TimsIndexConverter::Calibrated(CalibratedIndexConverter::new(
869                            intercept,
870                            slope,
871                            im_lower,
872                            im_upper,
873                            scan_max_index,
874                        ))
875                    }
876                    None => {
877                        eprintln!(
878                            "Warning: Could not derive m/z calibration from SDK. \
879                            Using simple boundary model which may have ~5 Da error on some datasets. \
880                            This typically happens on macOS where Bruker SDK is not available."
881                        );
882                        TimsIndexConverter::Simple(SimpleIndexConverter::from_boundaries(
883                            mz_lower,
884                            mz_upper,
885                            tof_max_index,
886                            im_lower,
887                            im_upper,
888                            scan_max_index,
889                        ))
890                    }
891                }
892            }
893        };
894
895        let mut file_path = PathBuf::from(data_path);
896        file_path.push("analysis.tdf_bin");
897        let mut infile = File::open(file_path).unwrap();
898        let mut data = Vec::new();
899        infile.read_to_end(&mut data).unwrap();
900
901        TimsDataLoader::InMemory(TimsInMemoryLoader {
902            raw_data_layout,
903            index_converter,
904            compressed_data: data,
905        })
906    }
907
908    /// Create a lazy loader with pre-computed ion mobility calibration lookup table.
909    ///
910    /// This method enables accurate ion mobility calibration with fast parallel extraction.
911    /// The im_lookup table should be pre-computed using the Bruker SDK.
912    ///
913    /// # Arguments
914    /// * `data_path` - Path to the .d folder
915    /// * `tof_max_index` - Maximum TOF index (from GlobalMetaData)
916    /// * `mz_lower` - Minimum m/z value (from GlobalMetaData)
917    /// * `mz_upper` - Maximum m/z value (from GlobalMetaData)
918    /// * `im_lookup` - Pre-computed scan→1/K0 lookup table
919    ///
920    /// # Returns
921    /// A new TimsDataLoader with LookupIndexConverter
922    pub fn new_lazy_with_calibration(
923        data_path: &str,
924        tof_max_index: u32,
925        mz_lower: f64,
926        mz_upper: f64,
927        im_lookup: Vec<f64>,
928    ) -> Self {
929        let raw_data_layout = TimsRawDataLayout::new(data_path);
930
931        let index_converter = TimsIndexConverter::Lookup(LookupIndexConverter::new(
932            mz_lower,
933            mz_upper,
934            tof_max_index,
935            im_lookup,
936        ));
937
938        TimsDataLoader::Lazy(TimsLazyLoder {
939            raw_data_layout,
940            index_converter,
941        })
942    }
943
944    /// Create an in-memory loader with pre-computed ion mobility calibration lookup table.
945    ///
946    /// This method enables accurate ion mobility calibration with fast parallel extraction.
947    /// The im_lookup table should be pre-computed using the Bruker SDK.
948    ///
949    /// # Arguments
950    /// * `data_path` - Path to the .d folder
951    /// * `tof_max_index` - Maximum TOF index (from GlobalMetaData)
952    /// * `mz_lower` - Minimum m/z value (from GlobalMetaData)
953    /// * `mz_upper` - Maximum m/z value (from GlobalMetaData)
954    /// * `im_lookup` - Pre-computed scan→1/K0 lookup table
955    ///
956    /// # Returns
957    /// A new TimsDataLoader with LookupIndexConverter
958    pub fn new_in_memory_with_calibration(
959        data_path: &str,
960        tof_max_index: u32,
961        mz_lower: f64,
962        mz_upper: f64,
963        im_lookup: Vec<f64>,
964    ) -> Self {
965        let raw_data_layout = TimsRawDataLayout::new(data_path);
966
967        let index_converter = TimsIndexConverter::Lookup(LookupIndexConverter::new(
968            mz_lower,
969            mz_upper,
970            tof_max_index,
971            im_lookup,
972        ));
973
974        let mut file_path = PathBuf::from(data_path);
975        file_path.push("analysis.tdf_bin");
976        let mut infile = File::open(file_path).unwrap();
977        let mut data = Vec::new();
978        infile.read_to_end(&mut data).unwrap();
979
980        TimsDataLoader::InMemory(TimsInMemoryLoader {
981            raw_data_layout,
982            index_converter,
983            compressed_data: data,
984        })
985    }
986
987    /// Create a lazy loader with full calibration (both m/z and IM).
988    ///
989    /// This method uses regression-derived m/z calibration coefficients instead of
990    /// the simple boundary model, providing more accurate m/z conversion.
991    ///
992    /// # Arguments
993    /// * `data_path` - Path to the .d folder
994    /// * `tof_intercept` - Intercept for sqrt(mz) = intercept + slope * tof
995    /// * `tof_slope` - Slope for sqrt(mz) = intercept + slope * tof
996    /// * `im_min` - Minimum 1/K0 value
997    /// * `im_max` - Maximum 1/K0 value
998    /// * `scan_max_index` - Maximum scan index
999    pub fn new_lazy_with_mz_calibration(
1000        data_path: &str,
1001        tof_intercept: f64,
1002        tof_slope: f64,
1003        im_min: f64,
1004        im_max: f64,
1005        scan_max_index: u32,
1006    ) -> Self {
1007        let raw_data_layout = TimsRawDataLayout::new(data_path);
1008
1009        let index_converter = TimsIndexConverter::Calibrated(CalibratedIndexConverter::new(
1010            tof_intercept,
1011            tof_slope,
1012            im_min,
1013            im_max,
1014            scan_max_index,
1015        ));
1016
1017        TimsDataLoader::Lazy(TimsLazyLoder {
1018            raw_data_layout,
1019            index_converter,
1020        })
1021    }
1022
1023    /// Create an in-memory loader with full calibration (both m/z and IM).
1024    ///
1025    /// This method uses regression-derived m/z calibration coefficients instead of
1026    /// the simple boundary model, providing more accurate m/z conversion.
1027    pub fn new_in_memory_with_mz_calibration(
1028        data_path: &str,
1029        tof_intercept: f64,
1030        tof_slope: f64,
1031        im_min: f64,
1032        im_max: f64,
1033        scan_max_index: u32,
1034    ) -> Self {
1035        let raw_data_layout = TimsRawDataLayout::new(data_path);
1036
1037        let index_converter = TimsIndexConverter::Calibrated(CalibratedIndexConverter::new(
1038            tof_intercept,
1039            tof_slope,
1040            im_min,
1041            im_max,
1042            scan_max_index,
1043        ));
1044
1045        let mut file_path = PathBuf::from(data_path);
1046        file_path.push("analysis.tdf_bin");
1047        let mut infile = File::open(file_path).unwrap();
1048        let mut data = Vec::new();
1049        infile.read_to_end(&mut data).unwrap();
1050
1051        TimsDataLoader::InMemory(TimsInMemoryLoader {
1052            raw_data_layout,
1053            index_converter,
1054            compressed_data: data,
1055        })
1056    }
1057
1058    pub fn get_index_converter(&self) -> &dyn IndexConverter {
1059        match self {
1060            TimsDataLoader::InMemory(loader) => &loader.index_converter,
1061            TimsDataLoader::Lazy(loader) => &loader.index_converter,
1062        }
1063    }
1064
1065    /// Check if the Bruker SDK is being used for index conversion.
1066    /// The Bruker SDK is NOT thread-safe, so parallel operations that call
1067    /// the index converter must be disabled when using the SDK.
1068    pub fn uses_bruker_sdk(&self) -> bool {
1069        match self {
1070            TimsDataLoader::InMemory(loader) => matches!(&loader.index_converter, TimsIndexConverter::BrukerLib(_)),
1071            TimsDataLoader::Lazy(loader) => matches!(&loader.index_converter, TimsIndexConverter::BrukerLib(_)),
1072        }
1073    }
1074}
1075
1076impl TimsData for TimsDataLoader {
1077    fn get_frame(&self, frame_id: u32) -> TimsFrame {
1078        match self {
1079            TimsDataLoader::InMemory(loader) => loader.get_frame(frame_id),
1080            TimsDataLoader::Lazy(loader) => loader.get_frame(frame_id),
1081        }
1082    }
1083    fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
1084        match self {
1085            TimsDataLoader::InMemory(loader) => loader.get_raw_frame(frame_id),
1086            TimsDataLoader::Lazy(loader) => loader.get_raw_frame(frame_id),
1087        }
1088    }
1089
1090    fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
1091        match self {
1092            TimsDataLoader::InMemory(loader) => loader.get_slice(frame_ids, num_threads),
1093            TimsDataLoader::Lazy(loader) => loader.get_slice(frame_ids, num_threads),
1094        }
1095    }
1096
1097    fn get_acquisition_mode(&self) -> AcquisitionMode {
1098        match self {
1099            TimsDataLoader::InMemory(loader) => loader.get_acquisition_mode(),
1100            TimsDataLoader::Lazy(loader) => loader.get_acquisition_mode(),
1101        }
1102    }
1103
1104    fn get_frame_count(&self) -> i32 {
1105        match self {
1106            TimsDataLoader::InMemory(loader) => loader.get_frame_count(),
1107            TimsDataLoader::Lazy(loader) => loader.get_frame_count(),
1108        }
1109    }
1110
1111    fn get_data_path(&self) -> &str {
1112        match self {
1113            TimsDataLoader::InMemory(loader) => loader.get_data_path(),
1114            TimsDataLoader::Lazy(loader) => loader.get_data_path(),
1115        }
1116    }
1117}
1118
1119pub struct SimpleIndexConverter {
1120    pub tof_intercept: f64,
1121    pub tof_slope: f64,
1122    pub scan_intercept: f64,
1123    pub scan_slope: f64,
1124}
1125
1126impl SimpleIndexConverter {
1127    pub fn from_boundaries(
1128        mz_min: f64,
1129        mz_max: f64,
1130        tof_max_index: u32,
1131        im_min: f64,
1132        im_max: f64,
1133        scan_max_index: u32,
1134    ) -> Self {
1135        let tof_intercept: f64 = mz_min.sqrt();
1136        let tof_slope: f64 = (mz_max.sqrt() - tof_intercept) / tof_max_index as f64;
1137
1138        let scan_intercept: f64 = im_max;
1139        let scan_slope: f64 = (im_min - scan_intercept) / scan_max_index as f64;
1140        Self {
1141            tof_intercept,
1142            tof_slope,
1143            scan_intercept,
1144            scan_slope,
1145        }
1146    }
1147}
1148
1149impl IndexConverter for SimpleIndexConverter {
1150    fn tof_to_mz(&self, _frame_id: u32, _tof_values: &Vec<u32>) -> Vec<f64> {
1151        let mut mz_values: Vec<f64> = Vec::new();
1152        mz_values.resize(_tof_values.len(), 0.0);
1153
1154        for (i, &val) in _tof_values.iter().enumerate() {
1155            mz_values[i] = (self.tof_intercept + self.tof_slope * val as f64).powi(2);
1156        }
1157
1158        mz_values
1159    }
1160
1161    fn mz_to_tof(&self, _frame_id: u32, _mz_values: &Vec<f64>) -> Vec<u32> {
1162        let mut tof_values: Vec<u32> = Vec::new();
1163        tof_values.resize(_mz_values.len(), 0);
1164
1165        for (i, &val) in _mz_values.iter().enumerate() {
1166            tof_values[i] = ((val.sqrt() - self.tof_intercept) / self.tof_slope) as u32;
1167        }
1168
1169        tof_values
1170    }
1171
1172    fn scan_to_inverse_mobility(&self, _frame_id: u32, _scan_values: &Vec<u32>) -> Vec<f64> {
1173        let mut inv_mobility_values: Vec<f64> = Vec::new();
1174        inv_mobility_values.resize(_scan_values.len(), 0.0);
1175
1176        for (i, &val) in _scan_values.iter().enumerate() {
1177            inv_mobility_values[i] = self.scan_intercept + self.scan_slope * val as f64;
1178        }
1179
1180        inv_mobility_values
1181    }
1182
1183    fn inverse_mobility_to_scan(
1184        &self,
1185        _frame_id: u32,
1186        _inverse_mobility_values: &Vec<f64>,
1187    ) -> Vec<u32> {
1188        let mut scan_values: Vec<u32> = Vec::new();
1189        scan_values.resize(_inverse_mobility_values.len(), 0);
1190
1191        for (i, &val) in _inverse_mobility_values.iter().enumerate() {
1192            scan_values[i] = ((val - self.scan_intercept) / self.scan_slope) as u32;
1193        }
1194
1195        scan_values
1196    }
1197}
1198
1199/// M/z calibrated index converter using regression-derived coefficients.
1200///
1201/// This provides accurate TOF to m/z conversion without requiring the Bruker SDK
1202/// by using linear regression coefficients derived from known precursor m/z values.
1203///
1204/// The calibration formula is:
1205///   sqrt(mz) = tof_intercept + tof_slope * tof_index
1206///
1207/// This is similar to SimpleIndexConverter but uses externally-provided coefficients
1208/// (e.g., from regression on precursor data) rather than boundary-derived values.
1209pub struct CalibratedIndexConverter {
1210    pub tof_intercept: f64,
1211    pub tof_slope: f64,
1212    pub scan_intercept: f64,
1213    pub scan_slope: f64,
1214}
1215
1216impl CalibratedIndexConverter {
1217    /// Create a new calibrated converter with regression-derived coefficients.
1218    ///
1219    /// # Arguments
1220    /// * `tof_intercept` - Intercept for sqrt(mz) = intercept + slope * tof
1221    /// * `tof_slope` - Slope for sqrt(mz) = intercept + slope * tof
1222    /// * `im_min` - Minimum 1/K0 value
1223    /// * `im_max` - Maximum 1/K0 value
1224    /// * `scan_max_index` - Maximum scan index
1225    pub fn new(
1226        tof_intercept: f64,
1227        tof_slope: f64,
1228        im_min: f64,
1229        im_max: f64,
1230        scan_max_index: u32,
1231    ) -> Self {
1232        let scan_intercept = im_max;
1233        let scan_slope = (im_min - scan_intercept) / scan_max_index as f64;
1234        Self {
1235            tof_intercept,
1236            tof_slope,
1237            scan_intercept,
1238            scan_slope,
1239        }
1240    }
1241}
1242
1243impl IndexConverter for CalibratedIndexConverter {
1244    fn tof_to_mz(&self, _frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
1245        let mut mz_values: Vec<f64> = Vec::with_capacity(tof_values.len());
1246
1247        for &tof_index in tof_values.iter() {
1248            // sqrt(mz) = tof_intercept + tof_slope * tof_index
1249            let sqrt_mz = self.tof_intercept + self.tof_slope * tof_index as f64;
1250            mz_values.push(sqrt_mz * sqrt_mz);
1251        }
1252
1253        mz_values
1254    }
1255
1256    fn mz_to_tof(&self, _frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
1257        let mut tof_values: Vec<u32> = Vec::with_capacity(mz_values.len());
1258
1259        for &mz in mz_values.iter() {
1260            let sqrt_mz = mz.sqrt();
1261            // tof_index = (sqrt(mz) - tof_intercept) / tof_slope
1262            tof_values.push(((sqrt_mz - self.tof_intercept) / self.tof_slope) as u32);
1263        }
1264
1265        tof_values
1266    }
1267
1268    fn scan_to_inverse_mobility(&self, _frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
1269        let mut inv_mobility_values: Vec<f64> = Vec::with_capacity(scan_values.len());
1270
1271        for &val in scan_values.iter() {
1272            inv_mobility_values.push(self.scan_intercept + self.scan_slope * val as f64);
1273        }
1274
1275        inv_mobility_values
1276    }
1277
1278    fn inverse_mobility_to_scan(
1279        &self,
1280        _frame_id: u32,
1281        inverse_mobility_values: &Vec<f64>,
1282    ) -> Vec<u32> {
1283        let mut scan_values: Vec<u32> = Vec::with_capacity(inverse_mobility_values.len());
1284
1285        for &val in inverse_mobility_values.iter() {
1286            scan_values.push(((val - self.scan_intercept) / self.scan_slope) as u32);
1287        }
1288
1289        scan_values
1290    }
1291}
1292
1293/// Ion mobility index converter using pre-computed lookup table.
1294///
1295/// This converter uses a pre-computed scan→1/K0 lookup table extracted from the Bruker SDK.
1296/// It enables accurate ion mobility calibration with fast parallel extraction.
1297///
1298/// Background:
1299/// - The Bruker calibration formula is patented and proprietary
1300/// - Using the Bruker SDK gives accurate values but is slow (not thread-safe)
1301/// - Linear interpolation is fast but inaccurate
1302/// - This converter uses SDK-probed lookup for accuracy with O(1) thread-safe lookups
1303///
1304/// The lookup table is typically small (~8KB for 1000 scans) and constant across all frames.
1305pub struct LookupIndexConverter {
1306    // m/z conversion uses simple linear model (accurate enough for most purposes)
1307    pub tof_intercept: f64,
1308    pub tof_slope: f64,
1309
1310    // Ion mobility: pre-computed lookup table from Bruker SDK
1311    // scan_index → 1/K0 value
1312    pub im_lookup: Vec<f64>,
1313
1314    // Fallback for inverse conversion (1/K0 → scan)
1315    // We store the min/max for binary search bounds
1316    pub im_min: f64,
1317    pub im_max: f64,
1318}
1319
1320impl LookupIndexConverter {
1321    /// Create a new LookupIndexConverter with pre-computed ion mobility lookup.
1322    ///
1323    /// # Arguments
1324    /// * `mz_min` - Minimum m/z value for TOF conversion
1325    /// * `mz_max` - Maximum m/z value for TOF conversion
1326    /// * `tof_max_index` - Maximum TOF index
1327    /// * `im_lookup` - Pre-computed scan→1/K0 lookup table from Bruker SDK
1328    ///
1329    /// # Returns
1330    /// A new LookupIndexConverter instance
1331    pub fn new(
1332        mz_min: f64,
1333        mz_max: f64,
1334        tof_max_index: u32,
1335        im_lookup: Vec<f64>,
1336    ) -> Self {
1337        let tof_intercept: f64 = mz_min.sqrt();
1338        let tof_slope: f64 = (mz_max.sqrt() - tof_intercept) / tof_max_index as f64;
1339
1340        // Get IM bounds for inverse conversion
1341        let im_min = im_lookup.iter().cloned().fold(f64::INFINITY, f64::min);
1342        let im_max = im_lookup.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1343
1344        Self {
1345            tof_intercept,
1346            tof_slope,
1347            im_lookup,
1348            im_min,
1349            im_max,
1350        }
1351    }
1352}
1353
1354impl IndexConverter for LookupIndexConverter {
1355    fn tof_to_mz(&self, _frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
1356        let mut mz_values: Vec<f64> = Vec::new();
1357        mz_values.resize(tof_values.len(), 0.0);
1358
1359        for (i, &val) in tof_values.iter().enumerate() {
1360            mz_values[i] = (self.tof_intercept + self.tof_slope * val as f64).powi(2);
1361        }
1362
1363        mz_values
1364    }
1365
1366    fn mz_to_tof(&self, _frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
1367        let mut tof_values: Vec<u32> = Vec::new();
1368        tof_values.resize(mz_values.len(), 0);
1369
1370        for (i, &val) in mz_values.iter().enumerate() {
1371            tof_values[i] = ((val.sqrt() - self.tof_intercept) / self.tof_slope) as u32;
1372        }
1373
1374        tof_values
1375    }
1376
1377    fn scan_to_inverse_mobility(&self, _frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
1378        // Use the pre-computed lookup table for O(1) conversion
1379        scan_values
1380            .iter()
1381            .map(|&s| {
1382                self.im_lookup
1383                    .get(s as usize)
1384                    .copied()
1385                    .unwrap_or(f64::NAN)
1386            })
1387            .collect()
1388    }
1389
1390    fn inverse_mobility_to_scan(
1391        &self,
1392        _frame_id: u32,
1393        inverse_mobility_values: &Vec<f64>,
1394    ) -> Vec<u32> {
1395        // Use binary search to find the closest scan index for each 1/K0 value
1396        // The lookup table is monotonically decreasing (higher scan = lower 1/K0)
1397        inverse_mobility_values
1398            .iter()
1399            .map(|&im| {
1400                if im.is_nan() || self.im_lookup.is_empty() {
1401                    return 0;
1402                }
1403
1404                // Binary search for the closest value
1405                // Note: im_lookup is typically monotonically decreasing
1406                let mut best_scan = 0usize;
1407                let mut best_diff = f64::INFINITY;
1408
1409                for (scan, &lookup_im) in self.im_lookup.iter().enumerate() {
1410                    let diff = (lookup_im - im).abs();
1411                    if diff < best_diff {
1412                        best_diff = diff;
1413                        best_scan = scan;
1414                    }
1415                }
1416
1417                best_scan as u32
1418            })
1419            .collect()
1420    }
1421}