Skip to main content

rustdf/data/
handle.rs

1use crate::data::meta::{read_global_meta_sql, read_meta_data_sql, FrameMeta, GlobalMetaData};
2use crate::data::raw::BrukerTimsDataLibrary;
3use crate::data::utility::{
4    flatten_scan_values, parse_decompressed_bruker_binary_data, zstd_decompress,
5};
6use byteorder::{LittleEndian, ReadBytesExt};
7use mscore::data::spectrum::MsType;
8use mscore::timstof::frame::{ImsFrame, RawTimsFrame, TimsFrame};
9use mscore::timstof::slice::TimsSlice;
10use std::fs::File;
11use std::io::{Cursor, Read, Seek, SeekFrom};
12use std::path::PathBuf;
13
14use crate::data::acquisition::AcquisitionMode;
15use rayon::prelude::*;
16use rayon::ThreadPoolBuilder;
17
18use std::error::Error;
19
20/// Derive m/z calibration coefficients by using the Bruker SDK.
21///
22/// This function temporarily loads the SDK to convert a range of TOF indices to m/z,
23/// then fits a linear regression to derive accurate calibration coefficients.
24///
25/// Returns `Some((intercept, slope))` for the formula: `sqrt(mz) = intercept + slope * tof`
26/// Returns `None` if SDK is not available (e.g., on macOS).
27fn derive_mz_calibration(
28    bruker_lib_path: &str,
29    data_path: &str,
30    tof_max_index: u32,
31) -> Option<(f64, f64)> {
32    // Check if SDK path is valid before trying to load
33    // Common invalid values: "NO_SDK", empty string, or non-existent paths
34    if bruker_lib_path.is_empty()
35        || bruker_lib_path == "NO_SDK"
36        || bruker_lib_path == "CALIBRATED"
37        || !std::path::Path::new(bruker_lib_path).exists()
38    {
39        return None;
40    }
41
42    // Try to create a BrukerLib converter
43    let sdk_converter = match std::panic::catch_unwind(|| {
44        BrukerLibTimsDataConverter::new(bruker_lib_path, data_path)
45    }) {
46        Ok(converter) => converter,
47        Err(_) => return None,
48    };
49
50    // Generate a range of TOF indices across the spectrum
51    let n_points = 1000;
52    let step = tof_max_index / n_points;
53    let tof_indices: Vec<u32> = (0..n_points).map(|i| i * step + step / 2).collect();
54
55    // Convert to m/z using SDK
56    let mz_values = sdk_converter.tof_to_mz(1, &tof_indices);
57
58    // Filter out any invalid values (zero or negative)
59    let valid_pairs: Vec<(f64, f64)> = tof_indices
60        .iter()
61        .zip(mz_values.iter())
62        .filter(|(_, mz)| **mz > 0.0)
63        .map(|(tof, mz)| (*tof as f64, mz.sqrt()))
64        .collect();
65
66    if valid_pairs.len() < 10 {
67        return None;
68    }
69
70    // Linear regression: sqrt(mz) = intercept + slope * tof
71    // Using simple least squares: slope = Cov(x,y) / Var(x), intercept = mean_y - slope * mean_x
72    let n = valid_pairs.len() as f64;
73    let sum_x: f64 = valid_pairs.iter().map(|(x, _)| x).sum();
74    let sum_y: f64 = valid_pairs.iter().map(|(_, y)| y).sum();
75    let sum_xy: f64 = valid_pairs.iter().map(|(x, y)| x * y).sum();
76    let sum_xx: f64 = valid_pairs.iter().map(|(x, _)| x * x).sum();
77
78    let mean_x = sum_x / n;
79    let mean_y = sum_y / n;
80
81    let slope = (sum_xy - n * mean_x * mean_y) / (sum_xx - n * mean_x * mean_x);
82    let intercept = mean_y - slope * mean_x;
83
84    Some((intercept, slope))
85}
86
87/// Build a `LookupIndexConverter`, preferring an SDK-derived m/z calibration.
88///
89/// The `LookupIndexConverter` always carries the pre-computed scan→1/K0 lookup
90/// for ion mobility. For m/z it normally uses a 2-point boundary model
91/// (`LookupIndexConverter::new`), which can have a large m/z error on some
92/// datasets. When a valid `bruker_lib_path` is supplied, m/z is instead
93/// calibrated with the same regression fit used by the `Calibrated` converter
94/// (`derive_mz_calibration`). Falls back to the boundary model when the SDK is
95/// unavailable (e.g. macOS, or `bruker_lib_path` is empty / "NO_SDK").
96fn build_lookup_converter(
97    bruker_lib_path: &str,
98    data_path: &str,
99    tof_max_index: u32,
100    mz_lower: f64,
101    mz_upper: f64,
102    im_lookup: Vec<f64>,
103) -> LookupIndexConverter {
104    match derive_mz_calibration(bruker_lib_path, data_path, tof_max_index) {
105        Some((intercept, slope)) => {
106            LookupIndexConverter::with_mz_fit(intercept, slope, im_lookup)
107        }
108        None => {
109            if !bruker_lib_path.is_empty() && bruker_lib_path != "NO_SDK" {
110                eprintln!(
111                    "Warning: Could not derive m/z calibration from SDK at '{}'. \
112                    Falling back to the 2-point boundary model, which may have a \
113                    large m/z error on some datasets.",
114                    bruker_lib_path
115                );
116            }
117            LookupIndexConverter::new(mz_lower, mz_upper, tof_max_index, im_lookup)
118        }
119    }
120}
121
122fn lzf_decompress(data: &[u8], max_output_size: usize) -> Result<Vec<u8>, Box<dyn Error>> {
123    let decompressed_data = lzf::decompress(data, max_output_size)
124        .map_err(|e| format!("LZF decompression failed: {}", e))?;
125    Ok(decompressed_data)
126}
127
128fn parse_decompressed_bruker_binary_type1(
129    decompressed_bytes: &[u8],
130    scan_indices: &mut [i64],
131    tof_indices: &mut [u32],
132    intensities: &mut [u16],
133    scan_start: usize,
134    scan_index: usize,
135) -> usize {
136    // Interpret decompressed_bytes as a slice of i32
137    let int_count = decompressed_bytes.len() / 4;
138    let buffer =
139        unsafe { std::slice::from_raw_parts(decompressed_bytes.as_ptr() as *const i32, int_count) };
140
141    let mut tof_index = 0i32;
142    let mut previous_was_intensity = true;
143    let mut current_index = scan_start;
144
145    for &value in buffer {
146        if value >= 0 {
147            // positive value => intensity
148            if previous_was_intensity {
149                tof_index += 1;
150            }
151            tof_indices[current_index] = tof_index as u32;
152            intensities[current_index] = value as u16;
153            previous_was_intensity = true;
154            current_index += 1;
155        } else {
156            // negative value => indicates a jump in tof_index
157            tof_index -= value; // value is negative, so this adds |value| to tof_index
158            previous_was_intensity = false;
159        }
160    }
161
162    let scan_size = current_index - scan_start;
163    scan_indices[scan_index] = scan_size as i64;
164    scan_size
165}
166
167pub struct TimsRawDataLayout {
168    pub raw_data_path: String,
169    pub global_meta_data: GlobalMetaData,
170    pub frame_meta_data: Vec<FrameMeta>,
171    pub max_scan_count: i64,
172    pub frame_id_ptr: Vec<i64>,
173    pub tims_offset_values: Vec<i64>,
174    pub acquisition_mode: AcquisitionMode,
175}
176
177impl TimsRawDataLayout {
178    pub fn new(data_path: &str) -> Self {
179        // get the global and frame meta data
180        let global_meta_data = read_global_meta_sql(data_path).unwrap();
181        let frame_meta_data = read_meta_data_sql(data_path).unwrap();
182
183        // get the max scan count
184        let max_scan_count = frame_meta_data.iter().map(|x| x.num_scans).max().unwrap();
185
186        let mut frame_id_ptr: Vec<i64> = Vec::new();
187        frame_id_ptr.resize(frame_meta_data.len() + 1, 0);
188
189        // get the frame id_ptr values
190        for (i, row) in frame_meta_data.iter().enumerate() {
191            frame_id_ptr[i + 1] = row.num_peaks + frame_id_ptr[i];
192        }
193
194        // get the tims offset values
195        let tims_offset_values = frame_meta_data
196            .iter()
197            .map(|x| x.tims_id)
198            .collect::<Vec<i64>>();
199
200        // get the acquisition mode
201        let acquisition_mode = match frame_meta_data[0].scan_mode {
202            8 => AcquisitionMode::DDA,
203            9 => AcquisitionMode::DIA,
204            _ => AcquisitionMode::Unknown,
205        };
206
207        TimsRawDataLayout {
208            raw_data_path: data_path.to_string(),
209            global_meta_data,
210            frame_meta_data,
211            max_scan_count,
212            frame_id_ptr,
213            tims_offset_values,
214            acquisition_mode,
215        }
216    }
217}
218
219pub trait TimsData {
220    fn get_frame(&self, frame_id: u32) -> TimsFrame;
221    fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame;
222    fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice;
223    fn get_acquisition_mode(&self) -> AcquisitionMode;
224    fn get_frame_count(&self) -> i32;
225    fn get_data_path(&self) -> &str;
226}
227
228pub trait IndexConverter {
229    fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64>;
230    fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32>;
231    fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64>;
232    fn inverse_mobility_to_scan(
233        &self,
234        frame_id: u32,
235        inverse_mobility_values: &Vec<f64>,
236    ) -> Vec<u32>;
237}
238
239pub struct BrukerLibTimsDataConverter {
240    pub bruker_lib: BrukerTimsDataLibrary,
241}
242
243impl BrukerLibTimsDataConverter {
244    pub fn new(bruker_lib_path: &str, data_path: &str) -> Self {
245        let bruker_lib = BrukerTimsDataLibrary::new(bruker_lib_path, data_path).unwrap();
246        BrukerLibTimsDataConverter { bruker_lib }
247    }
248}
249impl IndexConverter for BrukerLibTimsDataConverter {
250    /// translate tof to mz values calling the bruker library
251    ///
252    /// # Arguments
253    ///
254    /// * `frame_id` - A u32 that holds the frame id
255    /// * `tof` - A vector of u32 that holds the tof values
256    ///
257    /// # Returns
258    ///
259    /// * `mz_values` - A vector of f64 that holds the mz values
260    ///
261    fn tof_to_mz(&self, frame_id: u32, tof: &Vec<u32>) -> Vec<f64> {
262        let mut dbl_tofs: Vec<f64> = Vec::new();
263        dbl_tofs.resize(tof.len(), 0.0);
264
265        for (i, &val) in tof.iter().enumerate() {
266            dbl_tofs[i] = val as f64;
267        }
268
269        let mut mz_values: Vec<f64> = Vec::new();
270        mz_values.resize(tof.len(), 0.0);
271
272        self.bruker_lib
273            .tims_index_to_mz(frame_id, &dbl_tofs, &mut mz_values)
274            .expect("Bruker binary call failed at: tims_index_to_mz;");
275
276        mz_values
277    }
278
279    fn mz_to_tof(&self, frame_id: u32, mz: &Vec<f64>) -> Vec<u32> {
280        let mut dbl_mz: Vec<f64> = Vec::new();
281        dbl_mz.resize(mz.len(), 0.0);
282
283        for (i, &val) in mz.iter().enumerate() {
284            dbl_mz[i] = val;
285        }
286
287        let mut tof_values: Vec<f64> = Vec::new();
288        tof_values.resize(mz.len(), 0.0);
289
290        self.bruker_lib
291            .tims_mz_to_index(frame_id, &dbl_mz, &mut tof_values)
292            .expect("Bruker binary call failed at: tims_mz_to_index;");
293
294        tof_values.iter().map(|&x| x.round() as u32).collect()
295    }
296
297    /// translate scan to inverse mobility values calling the bruker library
298    ///
299    /// # Arguments
300    ///
301    /// * `frame_id` - A u32 that holds the frame id
302    /// * `scan` - A vector of i32 that holds the scan values
303    ///
304    /// # Returns
305    ///
306    /// * `inv_mob` - A vector of f64 that holds the inverse mobility values
307    ///
308    fn scan_to_inverse_mobility(&self, frame_id: u32, scan: &Vec<u32>) -> Vec<f64> {
309        let mut dbl_scans: Vec<f64> = Vec::new();
310        dbl_scans.resize(scan.len(), 0.0);
311
312        for (i, &val) in scan.iter().enumerate() {
313            dbl_scans[i] = val as f64;
314        }
315
316        let mut inv_mob: Vec<f64> = Vec::new();
317        inv_mob.resize(scan.len(), 0.0);
318
319        self.bruker_lib
320            .tims_scan_to_inv_mob(frame_id, &dbl_scans, &mut inv_mob)
321            .expect("Bruker binary call failed at: tims_scannum_to_oneoverk0;");
322
323        inv_mob
324    }
325
326    /// translate inverse mobility to scan values calling the bruker library
327    ///
328    /// # Arguments
329    ///
330    /// * `frame_id` - A u32 that holds the frame id
331    /// * `inv_mob` - A vector of f64 that holds the inverse mobility values
332    ///
333    /// # Returns
334    ///
335    /// * `scan_values` - A vector of i32 that holds the scan values
336    ///
337    fn inverse_mobility_to_scan(&self, frame_id: u32, inv_mob: &Vec<f64>) -> Vec<u32> {
338        let mut dbl_inv_mob: Vec<f64> = Vec::new();
339        dbl_inv_mob.resize(inv_mob.len(), 0.0);
340
341        for (i, &val) in inv_mob.iter().enumerate() {
342            dbl_inv_mob[i] = val;
343        }
344
345        let mut scan_values: Vec<f64> = Vec::new();
346        scan_values.resize(inv_mob.len(), 0.0);
347
348        self.bruker_lib
349            .inv_mob_to_tims_scan(frame_id, &dbl_inv_mob, &mut scan_values)
350            .expect("Bruker binary call failed at: tims_oneoverk0_to_scannum;");
351
352        scan_values.iter().map(|&x| x.round() as u32).collect()
353    }
354}
355
356pub enum TimsIndexConverter {
357    Simple(SimpleIndexConverter),
358    Calibrated(CalibratedIndexConverter),
359    BrukerLib(BrukerLibTimsDataConverter),
360    Lookup(LookupIndexConverter),
361}
362
363impl IndexConverter for TimsIndexConverter {
364    fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
365        match self {
366            TimsIndexConverter::Simple(converter) => converter.tof_to_mz(frame_id, tof_values),
367            TimsIndexConverter::Calibrated(converter) => converter.tof_to_mz(frame_id, tof_values),
368            TimsIndexConverter::BrukerLib(converter) => converter.tof_to_mz(frame_id, tof_values),
369            TimsIndexConverter::Lookup(converter) => converter.tof_to_mz(frame_id, tof_values),
370        }
371    }
372
373    fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
374        match self {
375            TimsIndexConverter::Simple(converter) => converter.mz_to_tof(frame_id, mz_values),
376            TimsIndexConverter::Calibrated(converter) => converter.mz_to_tof(frame_id, mz_values),
377            TimsIndexConverter::BrukerLib(converter) => converter.mz_to_tof(frame_id, mz_values),
378            TimsIndexConverter::Lookup(converter) => converter.mz_to_tof(frame_id, mz_values),
379        }
380    }
381
382    fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
383        match self {
384            TimsIndexConverter::Simple(converter) => {
385                converter.scan_to_inverse_mobility(frame_id, scan_values)
386            }
387            TimsIndexConverter::Calibrated(converter) => {
388                converter.scan_to_inverse_mobility(frame_id, scan_values)
389            }
390            TimsIndexConverter::BrukerLib(converter) => {
391                converter.scan_to_inverse_mobility(frame_id, scan_values)
392            }
393            TimsIndexConverter::Lookup(converter) => {
394                converter.scan_to_inverse_mobility(frame_id, scan_values)
395            }
396        }
397    }
398
399    fn inverse_mobility_to_scan(
400        &self,
401        frame_id: u32,
402        inverse_mobility_values: &Vec<f64>,
403    ) -> Vec<u32> {
404        match self {
405            TimsIndexConverter::Simple(converter) => {
406                converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values)
407            }
408            TimsIndexConverter::Calibrated(converter) => {
409                converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values)
410            }
411            TimsIndexConverter::BrukerLib(converter) => {
412                converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values)
413            }
414            TimsIndexConverter::Lookup(converter) => {
415                converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values)
416            }
417        }
418    }
419}
420
421pub struct TimsLazyLoder {
422    pub raw_data_layout: TimsRawDataLayout,
423    pub index_converter: TimsIndexConverter,
424}
425
426impl TimsData for TimsLazyLoder {
427    fn get_frame(&self, frame_id: u32) -> TimsFrame {
428        let frame_index = (frame_id - 1) as usize;
429
430        // turns out, there can be empty frames in the data, check for that, if so, return an empty frame
431        let num_peaks = self.raw_data_layout.frame_meta_data[frame_index].num_peaks;
432
433        if num_peaks == 0 {
434
435            let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
436
437            let ms_type = match ms_type_raw {
438                0 => MsType::Precursor,
439                8 => MsType::FragmentDda,
440                9 => MsType::FragmentDia,
441                _ => MsType::Unknown,
442            };
443
444            return TimsFrame {
445                frame_id: frame_id as i32,
446                ms_type,
447                scan: Vec::new(),
448                tof: Vec::new(),
449                ims_frame: ImsFrame::new(
450                    self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time,
451                    Vec::new(),
452                    Vec::new(),
453                    Vec::new(),
454                ),
455            };
456        }
457
458        let offset = self.raw_data_layout.tims_offset_values[frame_index] as u64;
459
460        let mut file_path = PathBuf::from(&self.raw_data_layout.raw_data_path);
461        file_path.push("analysis.tdf_bin");
462        let mut infile = File::open(&file_path).unwrap();
463
464        infile.seek(SeekFrom::Start(offset)).unwrap();
465
466        let mut bin_buffer = [0u8; 4];
467        infile.read_exact(&mut bin_buffer).unwrap();
468        let bin_size = Cursor::new(bin_buffer).read_i32::<LittleEndian>().unwrap();
469
470        infile.read_exact(&mut bin_buffer).unwrap();
471
472        match self.raw_data_layout.global_meta_data.tims_compression_type {
473            1 => {
474                let scan_count =
475                    self.raw_data_layout.frame_meta_data[frame_index].num_scans as usize;
476                let num_peaks = num_peaks as usize;
477                let compression_offset = 8 + (scan_count + 1) * 4;
478
479                let mut scan_offsets_buffer = vec![0u8; (scan_count + 1) * 4];
480                infile.read_exact(&mut scan_offsets_buffer).unwrap();
481
482                let mut scan_offsets = Vec::with_capacity(scan_count + 1);
483                {
484                    let mut rdr = Cursor::new(&scan_offsets_buffer);
485                    for _ in 0..(scan_count + 1) {
486                        scan_offsets.push(rdr.read_i32::<LittleEndian>().unwrap());
487                    }
488                }
489
490                for offs in &mut scan_offsets {
491                    *offs -= compression_offset as i32;
492                }
493
494                let remaining_size = (bin_size as usize - compression_offset) as usize;
495                let mut compressed_data = vec![0u8; remaining_size];
496                infile.read_exact(&mut compressed_data).unwrap();
497
498                let mut scan_indices_ = vec![0i64; scan_count];
499                let mut tof_indices_ = vec![0u32; num_peaks];
500                let mut intensities_ = vec![0u16; num_peaks];
501
502                let mut scan_start = 0usize;
503
504                for scan_index in 0..scan_count {
505                    let start = scan_offsets[scan_index] as usize;
506                    let end = scan_offsets[scan_index + 1] as usize;
507
508                    if start == end {
509                        continue;
510                    }
511
512                    let max_output_size = num_peaks * 8;
513                    let decompressed_bytes =
514                        lzf_decompress(&compressed_data[start..end], max_output_size)
515                            .expect("LZF decompression failed.");
516
517                    scan_start += parse_decompressed_bruker_binary_type1(
518                        &decompressed_bytes,
519                        &mut scan_indices_,
520                        &mut tof_indices_,
521                        &mut intensities_,
522                        scan_start,
523                        scan_index,
524                    );
525                }
526
527                // Create a flat scan vector to match what flatten_scan_values expects
528                let mut scan = Vec::with_capacity(num_peaks);
529                {
530                    let mut current_scan_index = 0u32;
531                    for &size in &scan_indices_ {
532                        let sz = size as usize;
533                        for _ in 0..sz {
534                            scan.push(current_scan_index);
535                        }
536                        current_scan_index += 1;
537                    }
538                }
539
540                let intensity_dbl = intensities_.iter().map(|&x| x as f64).collect::<Vec<f64>>();
541                let tof_i32 = tof_indices_.iter().map(|&x| x as i32).collect::<Vec<i32>>();
542
543                let mz = self.index_converter.tof_to_mz(frame_id, &tof_indices_);
544                let inv_mobility = self
545                    .index_converter
546                    .scan_to_inverse_mobility(frame_id, &scan);
547
548                let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
549                let ms_type = match ms_type_raw {
550                    0 => MsType::Precursor,
551                    8 => MsType::FragmentDda,
552                    9 => MsType::FragmentDia,
553                    _ => MsType::Unknown,
554                };
555
556                TimsFrame {
557                    frame_id: frame_id as i32,
558                    ms_type,
559                    scan: scan.iter().map(|&x| x as i32).collect(),
560                    tof: tof_i32,
561                    ims_frame: ImsFrame::new(
562                        self.raw_data_layout.frame_meta_data[frame_index].time,
563                        inv_mobility,
564                        mz,
565                        intensity_dbl,
566                    ),
567                }
568            }
569
570            // Existing handling of Type 2
571            2 => {
572                let mut compressed_data = vec![0u8; bin_size as usize - 8];
573                infile.read_exact(&mut compressed_data).unwrap();
574
575                let decompressed_bytes = zstd_decompress(&compressed_data).unwrap();
576
577                let (scan, tof, intensity) =
578                    parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap();
579                let intensity_dbl = intensity.iter().map(|&x| x as f64).collect();
580                let tof_i32 = tof.iter().map(|&x| x as i32).collect();
581                let scan = flatten_scan_values(&scan, true);
582
583                let mz = self.index_converter.tof_to_mz(frame_id, &tof);
584                let inv_mobility = self
585                    .index_converter
586                    .scan_to_inverse_mobility(frame_id, &scan);
587
588                let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
589
590                let ms_type = match ms_type_raw {
591                    0 => MsType::Precursor,
592                    8 => MsType::FragmentDda,
593                    9 => MsType::FragmentDia,
594                    _ => MsType::Unknown,
595                };
596
597                TimsFrame {
598                    frame_id: frame_id as i32,
599                    ms_type,
600                    scan: scan.iter().map(|&x| x as i32).collect(),
601                    tof: tof_i32,
602                    ims_frame: ImsFrame::new(
603                        self.raw_data_layout.frame_meta_data[frame_index].time,
604                        inv_mobility,
605                        mz,
606                        intensity_dbl,
607                    ),
608                }
609            }
610
611            _ => {
612                panic!("TimsCompressionType is not 1 or 2.")
613            }
614        }
615    }
616
617    fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
618        let frame_index = (frame_id - 1) as usize;
619        let offset = self.raw_data_layout.tims_offset_values[frame_index] as u64;
620
621        // turns out, there can be empty frames in the data, check for that, if so, return an empty frame
622        let num_peaks = self.raw_data_layout.frame_meta_data[frame_index].num_peaks;
623
624        if num_peaks == 0 {
625            return RawTimsFrame {
626                frame_id: frame_id as i32,
627                retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time,
628                ms_type: MsType::Unknown,
629                scan: Vec::new(),
630                tof: Vec::new(),
631                intensity: Vec::new(),
632            };
633        }
634
635        let mut file_path = PathBuf::from(&self.raw_data_layout.raw_data_path);
636        file_path.push("analysis.tdf_bin");
637        let mut infile = File::open(&file_path).unwrap();
638
639        infile.seek(SeekFrom::Start(offset)).unwrap();
640
641        let mut bin_buffer = [0u8; 4];
642        infile.read_exact(&mut bin_buffer).unwrap();
643        let bin_size = Cursor::new(bin_buffer).read_i32::<LittleEndian>().unwrap();
644
645        infile.read_exact(&mut bin_buffer).unwrap();
646
647        match self.raw_data_layout.global_meta_data.tims_compression_type {
648            _ if self.raw_data_layout.global_meta_data.tims_compression_type == 1 => {
649                panic!("Decompression Type1 not implemented.");
650            }
651
652            // Extract from ZSTD compressed binary
653            _ if self.raw_data_layout.global_meta_data.tims_compression_type == 2 => {
654                let mut compressed_data = vec![0u8; bin_size as usize - 8];
655                infile.read_exact(&mut compressed_data).unwrap();
656
657                let decompressed_bytes = zstd_decompress(&compressed_data).unwrap();
658
659                let (scan, tof, intensity) =
660                    parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap();
661
662                let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
663
664                let ms_type = match ms_type_raw {
665                    0 => MsType::Precursor,
666                    8 => MsType::FragmentDda,
667                    9 => MsType::FragmentDia,
668                    _ => MsType::Unknown,
669                };
670
671                let frame = RawTimsFrame {
672                    frame_id: frame_id as i32,
673                    retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize]
674                        .time,
675                    ms_type,
676                    scan,
677                    tof,
678                    intensity: intensity.iter().map(|&x| x as f64).collect(),
679                };
680
681                return frame;
682            }
683
684            // Error on unknown compression algorithm
685            _ => {
686                panic!("TimsCompressionType is not 1 or 2.")
687            }
688        }
689    }
690
691    fn get_slice(&self, frame_ids: Vec<u32>, _num_threads: usize) -> TimsSlice {
692        let result: Vec<TimsFrame> = frame_ids.into_iter().map(|f| self.get_frame(f)).collect();
693
694        TimsSlice { frames: result }
695    }
696
697    fn get_acquisition_mode(&self) -> AcquisitionMode {
698        self.raw_data_layout.acquisition_mode.clone()
699    }
700
701    fn get_frame_count(&self) -> i32 {
702        self.raw_data_layout.frame_meta_data.len() as i32
703    }
704
705    fn get_data_path(&self) -> &str {
706        &self.raw_data_layout.raw_data_path
707    }
708}
709
710pub struct TimsInMemoryLoader {
711    pub raw_data_layout: TimsRawDataLayout,
712    pub index_converter: TimsIndexConverter,
713    compressed_data: Vec<u8>,
714}
715
716impl TimsData for TimsInMemoryLoader {
717    fn get_frame(&self, frame_id: u32) -> TimsFrame {
718        let raw_frame = self.get_raw_frame(frame_id);
719
720        let raw_frame = match raw_frame.ms_type {
721            MsType::FragmentDda => raw_frame.smooth(1).centroid(1),
722            _ => raw_frame,
723        };
724
725        // if raw frame is empty, return an empty frame
726        if raw_frame.scan.is_empty() {
727            return TimsFrame::default();
728        }
729
730        let tof_i32 = raw_frame.tof.iter().map(|&x| x as i32).collect();
731        let scan = flatten_scan_values(&raw_frame.scan, true);
732
733        let mz = self.index_converter.tof_to_mz(frame_id, &raw_frame.tof);
734        let inverse_mobility = self
735            .index_converter
736            .scan_to_inverse_mobility(frame_id, &scan);
737
738        let ims_frame = ImsFrame::new(
739            raw_frame.retention_time,
740            inverse_mobility,
741            mz,
742            raw_frame.intensity,
743        );
744
745        TimsFrame {
746            frame_id: frame_id as i32,
747            ms_type: raw_frame.ms_type,
748            scan: scan.iter().map(|&x| x as i32).collect(),
749            tof: tof_i32,
750            ims_frame,
751        }
752    }
753
754    fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
755        let frame_index = (frame_id - 1) as usize;
756        let offset = self.raw_data_layout.tims_offset_values[frame_index] as usize;
757
758        let bin_size_offset = offset + 4; // Assuming the size is stored immediately before the frame data
759        let bin_size = Cursor::new(&self.compressed_data[offset..bin_size_offset])
760            .read_i32::<LittleEndian>()
761            .unwrap();
762
763        let data_offset = bin_size_offset + 4; // Adjust based on actual structure
764        let frame_data = &self.compressed_data[data_offset..data_offset + bin_size as usize - 8];
765
766        let decompressed_bytes = zstd_decompress(&frame_data).unwrap();
767
768        let (scan, tof, intensity) =
769            parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap();
770
771        let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type;
772
773        let ms_type = match ms_type_raw {
774            0 => MsType::Precursor,
775            8 => MsType::FragmentDda,
776            9 => MsType::FragmentDia,
777            _ => MsType::Unknown,
778        };
779
780        let raw_frame = RawTimsFrame {
781            frame_id: frame_id as i32,
782            retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time,
783            ms_type,
784            scan,
785            tof,
786            intensity: intensity.iter().map(|&x| x as f64).collect(),
787        };
788
789        raw_frame
790    }
791
792    fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
793        let pool = ThreadPoolBuilder::new()
794            .num_threads(num_threads)
795            .build()
796            .unwrap();
797        let frames = pool.install(|| {
798            frame_ids
799                .par_iter()
800                .map(|&frame_id| self.get_frame(frame_id))
801                .collect()
802        });
803
804        TimsSlice { frames }
805    }
806
807    fn get_acquisition_mode(&self) -> AcquisitionMode {
808        self.raw_data_layout.acquisition_mode.clone()
809    }
810
811    fn get_frame_count(&self) -> i32 {
812        self.raw_data_layout.frame_meta_data.len() as i32
813    }
814
815    fn get_data_path(&self) -> &str {
816        &self.raw_data_layout.raw_data_path
817    }
818}
819
820pub enum TimsDataLoader {
821    InMemory(TimsInMemoryLoader),
822    Lazy(TimsLazyLoder),
823}
824
825impl TimsDataLoader {
826    pub fn new_lazy(
827        bruker_lib_path: &str,
828        data_path: &str,
829        use_bruker_sdk: bool,
830        scan_max_index: u32,
831        im_lower: f64,
832        im_upper: f64,
833        tof_max_index: u32,
834        mz_lower: f64,
835        mz_upper: f64,
836    ) -> Self {
837        let raw_data_layout = TimsRawDataLayout::new(data_path);
838
839        let index_converter = match use_bruker_sdk {
840            true => TimsIndexConverter::BrukerLib(BrukerLibTimsDataConverter::new(
841                bruker_lib_path,
842                data_path,
843            )),
844            false => {
845                // Try to derive accurate calibration using SDK, fall back to simple model
846                match derive_mz_calibration(bruker_lib_path, data_path, tof_max_index) {
847                    Some((intercept, slope)) => {
848                        TimsIndexConverter::Calibrated(CalibratedIndexConverter::new(
849                            intercept,
850                            slope,
851                            im_lower,
852                            im_upper,
853                            scan_max_index,
854                        ))
855                    }
856                    None => {
857                        eprintln!(
858                            "Warning: Could not derive m/z calibration from SDK. \
859                            Using simple boundary model which may have ~5 Da error on some datasets. \
860                            This typically happens on macOS where Bruker SDK is not available."
861                        );
862                        TimsIndexConverter::Simple(SimpleIndexConverter::from_boundaries(
863                            mz_lower,
864                            mz_upper,
865                            tof_max_index,
866                            im_lower,
867                            im_upper,
868                            scan_max_index,
869                        ))
870                    }
871                }
872            }
873        };
874
875        TimsDataLoader::Lazy(TimsLazyLoder {
876            raw_data_layout,
877            index_converter,
878        })
879    }
880
881    pub fn new_in_memory(
882        bruker_lib_path: &str,
883        data_path: &str,
884        use_bruker_sdk: bool,
885        scan_max_index: u32,
886        im_lower: f64,
887        im_upper: f64,
888        tof_max_index: u32,
889        mz_lower: f64,
890        mz_upper: f64,
891    ) -> Self {
892        let raw_data_layout = TimsRawDataLayout::new(data_path);
893
894        let index_converter = match use_bruker_sdk {
895            true => TimsIndexConverter::BrukerLib(BrukerLibTimsDataConverter::new(
896                bruker_lib_path,
897                data_path,
898            )),
899            false => {
900                // Try to derive accurate calibration using SDK, fall back to simple model
901                match derive_mz_calibration(bruker_lib_path, data_path, tof_max_index) {
902                    Some((intercept, slope)) => {
903                        TimsIndexConverter::Calibrated(CalibratedIndexConverter::new(
904                            intercept,
905                            slope,
906                            im_lower,
907                            im_upper,
908                            scan_max_index,
909                        ))
910                    }
911                    None => {
912                        eprintln!(
913                            "Warning: Could not derive m/z calibration from SDK. \
914                            Using simple boundary model which may have ~5 Da error on some datasets. \
915                            This typically happens on macOS where Bruker SDK is not available."
916                        );
917                        TimsIndexConverter::Simple(SimpleIndexConverter::from_boundaries(
918                            mz_lower,
919                            mz_upper,
920                            tof_max_index,
921                            im_lower,
922                            im_upper,
923                            scan_max_index,
924                        ))
925                    }
926                }
927            }
928        };
929
930        let mut file_path = PathBuf::from(data_path);
931        file_path.push("analysis.tdf_bin");
932        let mut infile = File::open(file_path).unwrap();
933        let mut data = Vec::new();
934        infile.read_to_end(&mut data).unwrap();
935
936        TimsDataLoader::InMemory(TimsInMemoryLoader {
937            raw_data_layout,
938            index_converter,
939            compressed_data: data,
940        })
941    }
942
943    /// Create a lazy loader with pre-computed ion mobility calibration lookup table.
944    ///
945    /// This method enables accurate ion mobility calibration with fast parallel extraction.
946    /// The im_lookup table should be pre-computed using the Bruker SDK.
947    ///
948    /// # Arguments
949    /// * `data_path` - Path to the .d folder
950    /// * `bruker_lib_path` - Path to the Bruker SDK shared library; used to
951    ///   derive an accurate m/z calibration. Pass "NO_SDK" (or an empty
952    ///   string) to skip and use the 2-point boundary m/z model.
953    /// * `tof_max_index` - Maximum TOF index (from GlobalMetaData)
954    /// * `mz_lower` - Minimum m/z value (from GlobalMetaData)
955    /// * `mz_upper` - Maximum m/z value (from GlobalMetaData)
956    /// * `im_lookup` - Pre-computed scan→1/K0 lookup table
957    ///
958    /// # Returns
959    /// A new TimsDataLoader with LookupIndexConverter
960    pub fn new_lazy_with_calibration(
961        data_path: &str,
962        bruker_lib_path: &str,
963        tof_max_index: u32,
964        mz_lower: f64,
965        mz_upper: f64,
966        im_lookup: Vec<f64>,
967    ) -> Self {
968        let raw_data_layout = TimsRawDataLayout::new(data_path);
969
970        let index_converter = TimsIndexConverter::Lookup(build_lookup_converter(
971            bruker_lib_path,
972            data_path,
973            tof_max_index,
974            mz_lower,
975            mz_upper,
976            im_lookup,
977        ));
978
979        TimsDataLoader::Lazy(TimsLazyLoder {
980            raw_data_layout,
981            index_converter,
982        })
983    }
984
985    /// Create an in-memory loader with pre-computed ion mobility calibration lookup table.
986    ///
987    /// This method enables accurate ion mobility calibration with fast parallel extraction.
988    /// The im_lookup table should be pre-computed using the Bruker SDK.
989    ///
990    /// # Arguments
991    /// * `data_path` - Path to the .d folder
992    /// * `bruker_lib_path` - Path to the Bruker SDK shared library; used to
993    ///   derive an accurate m/z calibration. Pass "NO_SDK" (or an empty
994    ///   string) to skip and use the 2-point boundary m/z model.
995    /// * `tof_max_index` - Maximum TOF index (from GlobalMetaData)
996    /// * `mz_lower` - Minimum m/z value (from GlobalMetaData)
997    /// * `mz_upper` - Maximum m/z value (from GlobalMetaData)
998    /// * `im_lookup` - Pre-computed scan→1/K0 lookup table
999    ///
1000    /// # Returns
1001    /// A new TimsDataLoader with LookupIndexConverter
1002    pub fn new_in_memory_with_calibration(
1003        data_path: &str,
1004        bruker_lib_path: &str,
1005        tof_max_index: u32,
1006        mz_lower: f64,
1007        mz_upper: f64,
1008        im_lookup: Vec<f64>,
1009    ) -> Self {
1010        let raw_data_layout = TimsRawDataLayout::new(data_path);
1011
1012        let index_converter = TimsIndexConverter::Lookup(build_lookup_converter(
1013            bruker_lib_path,
1014            data_path,
1015            tof_max_index,
1016            mz_lower,
1017            mz_upper,
1018            im_lookup,
1019        ));
1020
1021        let mut file_path = PathBuf::from(data_path);
1022        file_path.push("analysis.tdf_bin");
1023        let mut infile = File::open(file_path).unwrap();
1024        let mut data = Vec::new();
1025        infile.read_to_end(&mut data).unwrap();
1026
1027        TimsDataLoader::InMemory(TimsInMemoryLoader {
1028            raw_data_layout,
1029            index_converter,
1030            compressed_data: data,
1031        })
1032    }
1033
1034    /// Create a lazy loader with full calibration (both m/z and IM).
1035    ///
1036    /// This method uses regression-derived m/z calibration coefficients instead of
1037    /// the simple boundary model, providing more accurate m/z conversion.
1038    ///
1039    /// # Arguments
1040    /// * `data_path` - Path to the .d folder
1041    /// * `tof_intercept` - Intercept for sqrt(mz) = intercept + slope * tof
1042    /// * `tof_slope` - Slope for sqrt(mz) = intercept + slope * tof
1043    /// * `im_min` - Minimum 1/K0 value
1044    /// * `im_max` - Maximum 1/K0 value
1045    /// * `scan_max_index` - Maximum scan index
1046    pub fn new_lazy_with_mz_calibration(
1047        data_path: &str,
1048        tof_intercept: f64,
1049        tof_slope: f64,
1050        im_min: f64,
1051        im_max: f64,
1052        scan_max_index: u32,
1053    ) -> Self {
1054        let raw_data_layout = TimsRawDataLayout::new(data_path);
1055
1056        let index_converter = TimsIndexConverter::Calibrated(CalibratedIndexConverter::new(
1057            tof_intercept,
1058            tof_slope,
1059            im_min,
1060            im_max,
1061            scan_max_index,
1062        ));
1063
1064        TimsDataLoader::Lazy(TimsLazyLoder {
1065            raw_data_layout,
1066            index_converter,
1067        })
1068    }
1069
1070    /// Create an in-memory loader with full calibration (both m/z and IM).
1071    ///
1072    /// This method uses regression-derived m/z calibration coefficients instead of
1073    /// the simple boundary model, providing more accurate m/z conversion.
1074    pub fn new_in_memory_with_mz_calibration(
1075        data_path: &str,
1076        tof_intercept: f64,
1077        tof_slope: f64,
1078        im_min: f64,
1079        im_max: f64,
1080        scan_max_index: u32,
1081    ) -> Self {
1082        let raw_data_layout = TimsRawDataLayout::new(data_path);
1083
1084        let index_converter = TimsIndexConverter::Calibrated(CalibratedIndexConverter::new(
1085            tof_intercept,
1086            tof_slope,
1087            im_min,
1088            im_max,
1089            scan_max_index,
1090        ));
1091
1092        let mut file_path = PathBuf::from(data_path);
1093        file_path.push("analysis.tdf_bin");
1094        let mut infile = File::open(file_path).unwrap();
1095        let mut data = Vec::new();
1096        infile.read_to_end(&mut data).unwrap();
1097
1098        TimsDataLoader::InMemory(TimsInMemoryLoader {
1099            raw_data_layout,
1100            index_converter,
1101            compressed_data: data,
1102        })
1103    }
1104
1105    pub fn get_index_converter(&self) -> &dyn IndexConverter {
1106        match self {
1107            TimsDataLoader::InMemory(loader) => &loader.index_converter,
1108            TimsDataLoader::Lazy(loader) => &loader.index_converter,
1109        }
1110    }
1111
1112    /// Check if the Bruker SDK is being used for index conversion.
1113    /// The Bruker SDK is NOT thread-safe, so parallel operations that call
1114    /// the index converter must be disabled when using the SDK.
1115    pub fn uses_bruker_sdk(&self) -> bool {
1116        match self {
1117            TimsDataLoader::InMemory(loader) => matches!(&loader.index_converter, TimsIndexConverter::BrukerLib(_)),
1118            TimsDataLoader::Lazy(loader) => matches!(&loader.index_converter, TimsIndexConverter::BrukerLib(_)),
1119        }
1120    }
1121}
1122
1123impl TimsData for TimsDataLoader {
1124    fn get_frame(&self, frame_id: u32) -> TimsFrame {
1125        match self {
1126            TimsDataLoader::InMemory(loader) => loader.get_frame(frame_id),
1127            TimsDataLoader::Lazy(loader) => loader.get_frame(frame_id),
1128        }
1129    }
1130    fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame {
1131        match self {
1132            TimsDataLoader::InMemory(loader) => loader.get_raw_frame(frame_id),
1133            TimsDataLoader::Lazy(loader) => loader.get_raw_frame(frame_id),
1134        }
1135    }
1136
1137    fn get_slice(&self, frame_ids: Vec<u32>, num_threads: usize) -> TimsSlice {
1138        match self {
1139            TimsDataLoader::InMemory(loader) => loader.get_slice(frame_ids, num_threads),
1140            TimsDataLoader::Lazy(loader) => loader.get_slice(frame_ids, num_threads),
1141        }
1142    }
1143
1144    fn get_acquisition_mode(&self) -> AcquisitionMode {
1145        match self {
1146            TimsDataLoader::InMemory(loader) => loader.get_acquisition_mode(),
1147            TimsDataLoader::Lazy(loader) => loader.get_acquisition_mode(),
1148        }
1149    }
1150
1151    fn get_frame_count(&self) -> i32 {
1152        match self {
1153            TimsDataLoader::InMemory(loader) => loader.get_frame_count(),
1154            TimsDataLoader::Lazy(loader) => loader.get_frame_count(),
1155        }
1156    }
1157
1158    fn get_data_path(&self) -> &str {
1159        match self {
1160            TimsDataLoader::InMemory(loader) => loader.get_data_path(),
1161            TimsDataLoader::Lazy(loader) => loader.get_data_path(),
1162        }
1163    }
1164}
1165
1166pub struct SimpleIndexConverter {
1167    pub tof_intercept: f64,
1168    pub tof_slope: f64,
1169    pub scan_intercept: f64,
1170    pub scan_slope: f64,
1171}
1172
1173impl SimpleIndexConverter {
1174    pub fn from_boundaries(
1175        mz_min: f64,
1176        mz_max: f64,
1177        tof_max_index: u32,
1178        im_min: f64,
1179        im_max: f64,
1180        scan_max_index: u32,
1181    ) -> Self {
1182        let tof_intercept: f64 = mz_min.sqrt();
1183        let tof_slope: f64 = (mz_max.sqrt() - tof_intercept) / tof_max_index as f64;
1184
1185        let scan_intercept: f64 = im_max;
1186        let scan_slope: f64 = (im_min - scan_intercept) / scan_max_index as f64;
1187        Self {
1188            tof_intercept,
1189            tof_slope,
1190            scan_intercept,
1191            scan_slope,
1192        }
1193    }
1194}
1195
1196impl IndexConverter for SimpleIndexConverter {
1197    fn tof_to_mz(&self, _frame_id: u32, _tof_values: &Vec<u32>) -> Vec<f64> {
1198        let mut mz_values: Vec<f64> = Vec::new();
1199        mz_values.resize(_tof_values.len(), 0.0);
1200
1201        for (i, &val) in _tof_values.iter().enumerate() {
1202            mz_values[i] = (self.tof_intercept + self.tof_slope * val as f64).powi(2);
1203        }
1204
1205        mz_values
1206    }
1207
1208    fn mz_to_tof(&self, _frame_id: u32, _mz_values: &Vec<f64>) -> Vec<u32> {
1209        let mut tof_values: Vec<u32> = Vec::new();
1210        tof_values.resize(_mz_values.len(), 0);
1211
1212        for (i, &val) in _mz_values.iter().enumerate() {
1213            tof_values[i] = ((val.sqrt() - self.tof_intercept) / self.tof_slope) as u32;
1214        }
1215
1216        tof_values
1217    }
1218
1219    fn scan_to_inverse_mobility(&self, _frame_id: u32, _scan_values: &Vec<u32>) -> Vec<f64> {
1220        let mut inv_mobility_values: Vec<f64> = Vec::new();
1221        inv_mobility_values.resize(_scan_values.len(), 0.0);
1222
1223        for (i, &val) in _scan_values.iter().enumerate() {
1224            inv_mobility_values[i] = self.scan_intercept + self.scan_slope * val as f64;
1225        }
1226
1227        inv_mobility_values
1228    }
1229
1230    fn inverse_mobility_to_scan(
1231        &self,
1232        _frame_id: u32,
1233        _inverse_mobility_values: &Vec<f64>,
1234    ) -> Vec<u32> {
1235        let mut scan_values: Vec<u32> = Vec::new();
1236        scan_values.resize(_inverse_mobility_values.len(), 0);
1237
1238        for (i, &val) in _inverse_mobility_values.iter().enumerate() {
1239            scan_values[i] = ((val - self.scan_intercept) / self.scan_slope) as u32;
1240        }
1241
1242        scan_values
1243    }
1244}
1245
1246/// M/z calibrated index converter using regression-derived coefficients.
1247///
1248/// This provides accurate TOF to m/z conversion without requiring the Bruker SDK
1249/// by using linear regression coefficients derived from known precursor m/z values.
1250///
1251/// The calibration formula is:
1252///   sqrt(mz) = tof_intercept + tof_slope * tof_index
1253///
1254/// This is similar to SimpleIndexConverter but uses externally-provided coefficients
1255/// (e.g., from regression on precursor data) rather than boundary-derived values.
1256pub struct CalibratedIndexConverter {
1257    pub tof_intercept: f64,
1258    pub tof_slope: f64,
1259    pub scan_intercept: f64,
1260    pub scan_slope: f64,
1261}
1262
1263impl CalibratedIndexConverter {
1264    /// Create a new calibrated converter with regression-derived coefficients.
1265    ///
1266    /// # Arguments
1267    /// * `tof_intercept` - Intercept for sqrt(mz) = intercept + slope * tof
1268    /// * `tof_slope` - Slope for sqrt(mz) = intercept + slope * tof
1269    /// * `im_min` - Minimum 1/K0 value
1270    /// * `im_max` - Maximum 1/K0 value
1271    /// * `scan_max_index` - Maximum scan index
1272    pub fn new(
1273        tof_intercept: f64,
1274        tof_slope: f64,
1275        im_min: f64,
1276        im_max: f64,
1277        scan_max_index: u32,
1278    ) -> Self {
1279        let scan_intercept = im_max;
1280        let scan_slope = (im_min - scan_intercept) / scan_max_index as f64;
1281        Self {
1282            tof_intercept,
1283            tof_slope,
1284            scan_intercept,
1285            scan_slope,
1286        }
1287    }
1288}
1289
1290impl IndexConverter for CalibratedIndexConverter {
1291    fn tof_to_mz(&self, _frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
1292        let mut mz_values: Vec<f64> = Vec::with_capacity(tof_values.len());
1293
1294        for &tof_index in tof_values.iter() {
1295            // sqrt(mz) = tof_intercept + tof_slope * tof_index
1296            let sqrt_mz = self.tof_intercept + self.tof_slope * tof_index as f64;
1297            mz_values.push(sqrt_mz * sqrt_mz);
1298        }
1299
1300        mz_values
1301    }
1302
1303    fn mz_to_tof(&self, _frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
1304        let mut tof_values: Vec<u32> = Vec::with_capacity(mz_values.len());
1305
1306        for &mz in mz_values.iter() {
1307            let sqrt_mz = mz.sqrt();
1308            // tof_index = (sqrt(mz) - tof_intercept) / tof_slope
1309            tof_values.push(((sqrt_mz - self.tof_intercept) / self.tof_slope) as u32);
1310        }
1311
1312        tof_values
1313    }
1314
1315    fn scan_to_inverse_mobility(&self, _frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
1316        let mut inv_mobility_values: Vec<f64> = Vec::with_capacity(scan_values.len());
1317
1318        for &val in scan_values.iter() {
1319            inv_mobility_values.push(self.scan_intercept + self.scan_slope * val as f64);
1320        }
1321
1322        inv_mobility_values
1323    }
1324
1325    fn inverse_mobility_to_scan(
1326        &self,
1327        _frame_id: u32,
1328        inverse_mobility_values: &Vec<f64>,
1329    ) -> Vec<u32> {
1330        let mut scan_values: Vec<u32> = Vec::with_capacity(inverse_mobility_values.len());
1331
1332        for &val in inverse_mobility_values.iter() {
1333            scan_values.push(((val - self.scan_intercept) / self.scan_slope) as u32);
1334        }
1335
1336        scan_values
1337    }
1338}
1339
1340/// Ion mobility index converter using pre-computed lookup table.
1341///
1342/// This converter uses a pre-computed scan→1/K0 lookup table extracted from the Bruker SDK.
1343/// It enables accurate ion mobility calibration with fast parallel extraction.
1344///
1345/// Background:
1346/// - The Bruker calibration formula is patented and proprietary
1347/// - Using the Bruker SDK gives accurate values but is slow (not thread-safe)
1348/// - Linear interpolation is fast but inaccurate
1349/// - This converter uses SDK-probed lookup for accuracy with O(1) thread-safe lookups
1350///
1351/// The lookup table is typically small (~8KB for 1000 scans) and constant across all frames.
1352pub struct LookupIndexConverter {
1353    // m/z conversion uses simple linear model (accurate enough for most purposes)
1354    pub tof_intercept: f64,
1355    pub tof_slope: f64,
1356
1357    // Ion mobility: pre-computed lookup table from Bruker SDK
1358    // scan_index → 1/K0 value
1359    pub im_lookup: Vec<f64>,
1360
1361    // Fallback for inverse conversion (1/K0 → scan)
1362    // We store the min/max for binary search bounds
1363    pub im_min: f64,
1364    pub im_max: f64,
1365}
1366
1367impl LookupIndexConverter {
1368    /// Create a new LookupIndexConverter with pre-computed ion mobility lookup.
1369    ///
1370    /// # Arguments
1371    /// * `mz_min` - Minimum m/z value for TOF conversion
1372    /// * `mz_max` - Maximum m/z value for TOF conversion
1373    /// * `tof_max_index` - Maximum TOF index
1374    /// * `im_lookup` - Pre-computed scan→1/K0 lookup table from Bruker SDK
1375    ///
1376    /// # Returns
1377    /// A new LookupIndexConverter instance
1378    pub fn new(
1379        mz_min: f64,
1380        mz_max: f64,
1381        tof_max_index: u32,
1382        im_lookup: Vec<f64>,
1383    ) -> Self {
1384        let tof_intercept: f64 = mz_min.sqrt();
1385        let tof_slope: f64 = (mz_max.sqrt() - tof_intercept) / tof_max_index as f64;
1386
1387        // Get IM bounds for inverse conversion
1388        let im_min = im_lookup.iter().cloned().fold(f64::INFINITY, f64::min);
1389        let im_max = im_lookup.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1390
1391        Self {
1392            tof_intercept,
1393            tof_slope,
1394            im_lookup,
1395            im_min,
1396            im_max,
1397        }
1398    }
1399
1400    /// Create a `LookupIndexConverter` with regression-derived m/z coefficients.
1401    ///
1402    /// Uses `sqrt(mz) = tof_intercept + tof_slope * tof` with coefficients fitted
1403    /// from the Bruker SDK (see `derive_mz_calibration`), instead of the 2-point
1404    /// boundary model in `new`. Preferred whenever the SDK is available.
1405    ///
1406    /// # Arguments
1407    /// * `tof_intercept` - Intercept of the sqrt(mz)-vs-tof regression
1408    /// * `tof_slope` - Slope of the sqrt(mz)-vs-tof regression
1409    /// * `im_lookup` - Pre-computed scan→1/K0 lookup table from Bruker SDK
1410    pub fn with_mz_fit(tof_intercept: f64, tof_slope: f64, im_lookup: Vec<f64>) -> Self {
1411        // Get IM bounds for inverse conversion
1412        let im_min = im_lookup.iter().cloned().fold(f64::INFINITY, f64::min);
1413        let im_max = im_lookup.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
1414
1415        Self {
1416            tof_intercept,
1417            tof_slope,
1418            im_lookup,
1419            im_min,
1420            im_max,
1421        }
1422    }
1423}
1424
1425impl IndexConverter for LookupIndexConverter {
1426    fn tof_to_mz(&self, _frame_id: u32, tof_values: &Vec<u32>) -> Vec<f64> {
1427        let mut mz_values: Vec<f64> = Vec::new();
1428        mz_values.resize(tof_values.len(), 0.0);
1429
1430        for (i, &val) in tof_values.iter().enumerate() {
1431            mz_values[i] = (self.tof_intercept + self.tof_slope * val as f64).powi(2);
1432        }
1433
1434        mz_values
1435    }
1436
1437    fn mz_to_tof(&self, _frame_id: u32, mz_values: &Vec<f64>) -> Vec<u32> {
1438        let mut tof_values: Vec<u32> = Vec::new();
1439        tof_values.resize(mz_values.len(), 0);
1440
1441        for (i, &val) in mz_values.iter().enumerate() {
1442            tof_values[i] = ((val.sqrt() - self.tof_intercept) / self.tof_slope) as u32;
1443        }
1444
1445        tof_values
1446    }
1447
1448    fn scan_to_inverse_mobility(&self, _frame_id: u32, scan_values: &Vec<u32>) -> Vec<f64> {
1449        // Use the pre-computed lookup table for O(1) conversion
1450        scan_values
1451            .iter()
1452            .map(|&s| {
1453                self.im_lookup
1454                    .get(s as usize)
1455                    .copied()
1456                    .unwrap_or(f64::NAN)
1457            })
1458            .collect()
1459    }
1460
1461    fn inverse_mobility_to_scan(
1462        &self,
1463        _frame_id: u32,
1464        inverse_mobility_values: &Vec<f64>,
1465    ) -> Vec<u32> {
1466        // Use binary search to find the closest scan index for each 1/K0 value
1467        // The lookup table is monotonically decreasing (higher scan = lower 1/K0)
1468        inverse_mobility_values
1469            .iter()
1470            .map(|&im| {
1471                if im.is_nan() || self.im_lookup.is_empty() {
1472                    return 0;
1473                }
1474
1475                // Binary search for the closest value
1476                // Note: im_lookup is typically monotonically decreasing
1477                let mut best_scan = 0usize;
1478                let mut best_diff = f64::INFINITY;
1479
1480                for (scan, &lookup_im) in self.im_lookup.iter().enumerate() {
1481                    let diff = (lookup_im - im).abs();
1482                    if diff < best_diff {
1483                        best_diff = diff;
1484                        best_scan = scan;
1485                    }
1486                }
1487
1488                best_scan as u32
1489            })
1490            .collect()
1491    }
1492}