rustdf/data/
meta.rs

1extern crate rusqlite;
2
3use rusqlite::{Connection, Result};
4use std::path::Path;
5
6#[derive(Debug, Clone)]
7pub struct DiaMsMisInfo {
8    pub frame_id: u32,
9    pub window_group: u32,
10}
11
12#[derive(Debug, Clone)]
13pub struct DiaMsMsWindow {
14    pub window_group: u32,
15    pub scan_num_begin: u32,
16    pub scan_num_end: u32,
17    pub isolation_mz: f64,
18    pub isolation_width: f64,
19    pub collision_energy: f64,
20}
21
22#[derive(Debug, Clone)]
23pub struct PasefMsMsMeta {
24    pub frame_id: i64,
25    pub scan_num_begin: i64,
26    pub scan_num_end: i64,
27    pub isolation_mz: f64,
28    pub isolation_width: f64,
29    pub collision_energy: f64,
30    pub precursor_id: i64,
31}
32
33#[derive(Debug, Clone)]
34pub struct DDAPrecursorMeta {
35    pub precursor_id: i64,
36    pub precursor_mz_highest_intensity: f64,
37    pub precursor_mz_average: f64,
38    pub precursor_mz_monoisotopic: Option<f64>,
39    pub precursor_charge: Option<i64>,
40    pub precursor_average_scan_number: f64,
41    pub precursor_total_intensity: f64,
42    pub precursor_frame_id: i64,
43}
44
45#[derive(Debug, Clone)]
46pub struct DDAPrecursor {
47    pub frame_id: i64,
48    pub precursor_id: i64,
49    pub mono_mz: Option<f64>,
50    pub highest_intensity_mz: f64,
51    pub average_mz: f64,
52    pub charge: Option<i64>,
53    pub inverse_ion_mobility: f64,
54    pub collision_energy: f64,
55    pub precuror_total_intensity: f64,
56    pub isolation_mz: f64,
57    pub isolation_width: f64,
58}
59
60#[derive(Debug, Clone)]
61pub struct DDAFragmentInfo {
62    pub frame_id: i64,
63    pub scan_begin: i64,
64    pub scan_end: i64,
65    pub isolation_mz: f64,
66    pub isolation_width: f64,
67    pub collision_energy: f64,
68    pub precursor_id: i64,
69}
70
71pub struct DIAFragmentFrameInfo {}
72
73pub struct DIAWindowGroupInfo {}
74
75/// M/z calibration data from the MzCalibration table.
76/// Used for accurate TOF to m/z conversion without Bruker SDK.
77#[derive(Debug, Clone)]
78pub struct MzCalibration {
79    pub id: i64,
80    pub model_type: i64,
81    pub digitizer_timebase: f64,
82    pub digitizer_delay: f64,
83    pub t1: f64,
84    pub t2: f64,
85    pub c0: f64,
86    pub c1: f64,
87    pub c2: f64,
88    pub c3: f64,
89    pub c4: f64,
90}
91
92#[derive(Debug)]
93pub struct GlobalMetaData {
94    pub schema_type: String,
95    pub schema_version_major: i64,
96    pub schema_version_minor: i64,
97    pub acquisition_software_vendor: String,
98    pub instrument_vendor: String,
99    pub closed_property: i64,
100    pub tims_compression_type: i64,
101    pub max_num_peaks_per_scan: i64,
102    pub mz_acquisition_range_lower: f64,
103    pub mz_acquisition_range_upper: f64,
104    pub one_over_k0_range_lower: f64,
105    pub one_over_k0_range_upper: f64,
106    pub tof_max_index: u32,
107}
108
109#[derive(Debug)]
110pub struct FrameMeta {
111    pub id: i64,
112    pub time: f64,
113    pub polarity: String,
114    pub scan_mode: i64,
115    pub ms_ms_type: i64,
116    pub tims_id: i64,
117    pub max_intensity: f64,
118    pub sum_intensity: f64,
119    pub num_scans: i64,
120    pub num_peaks: i64,
121    pub mz_calibration: i64,
122    pub t_1: f64,
123    pub t_2: f64,
124    pub tims_calibration: i64,
125    pub property_group: i64,
126    pub accumulation_time: f64,
127    pub ramp_time: f64,
128}
129
130struct GlobalMetaInternal {
131    key: String,
132    value: String,
133}
134
135pub fn read_dda_precursor_meta(
136    bruker_d_folder_name: &str,
137) -> Result<Vec<DDAPrecursorMeta>, Box<dyn std::error::Error>> {
138    // Connect to the database
139    let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf");
140    let conn = Connection::open(db_path)?;
141
142    // prepare the query
143    let rows: Vec<&str> = vec![
144        "Id",
145        "LargestPeakMz",
146        "AverageMz",
147        "MonoisotopicMz",
148        "Charge",
149        "ScanNumber",
150        "Intensity",
151        "Parent",
152    ];
153    let query = format!("SELECT {} FROM Precursors", rows.join(", "));
154
155    // execute the query
156    let frames_rows: Result<Vec<DDAPrecursorMeta>, _> = conn
157        .prepare(&query)?
158        .query_map([], |row| {
159            Ok(DDAPrecursorMeta {
160                precursor_id: row.get(0)?,
161                precursor_mz_highest_intensity: row.get(1)?,
162                precursor_mz_average: row.get(2)?,
163                precursor_mz_monoisotopic: row.get(3)?, // Now using Option<f64>
164                precursor_charge: row.get(4)?,          // Now using Option<i64>
165                precursor_average_scan_number: row.get(5)?,
166                precursor_total_intensity: row.get(6)?,
167                precursor_frame_id: row.get(7)?,
168            })
169        })?
170        .collect();
171
172    // return the frames
173    Ok(frames_rows?)
174}
175
176pub fn read_pasef_frame_ms_ms_info(
177    bruker_d_folder_name: &str,
178) -> Result<Vec<PasefMsMsMeta>, Box<dyn std::error::Error>> {
179    // Connect to the database
180    let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf");
181    let conn = Connection::open(db_path)?;
182
183    // prepare the query
184    let rows: Vec<&str> = vec![
185        "Frame",
186        "ScanNumBegin",
187        "ScanNumEnd",
188        "IsolationMz",
189        "IsolationWidth",
190        "CollisionEnergy",
191        "Precursor",
192    ];
193    let query = format!("SELECT {} FROM PasefFrameMsMsInfo", rows.join(", "));
194
195    // execute the query
196    let frames_rows: Result<Vec<PasefMsMsMeta>, _> = conn
197        .prepare(&query)?
198        .query_map([], |row| {
199            Ok(PasefMsMsMeta {
200                frame_id: row.get(0)?,
201                scan_num_begin: row.get(1)?,
202                scan_num_end: row.get(2)?,
203                isolation_mz: row.get(3)?,
204                isolation_width: row.get(4)?,
205                collision_energy: row.get(5)?,
206                precursor_id: row.get(6)?,
207            })
208        })?
209        .collect();
210
211    // return the frames
212    Ok(frames_rows?)
213}
214
215// Read the global meta data from the analysis.tdf file
216pub fn read_global_meta_sql(
217    bruker_d_folder_name: &str,
218) -> Result<GlobalMetaData, Box<dyn std::error::Error>> {
219    // Connect to the database
220    let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf");
221    let conn = Connection::open(db_path)?;
222
223    // execute the query
224    let frames_rows: Result<Vec<GlobalMetaInternal>, _> = conn
225        .prepare("SELECT * FROM GlobalMetadata")?
226        .query_map([], |row| {
227            Ok(GlobalMetaInternal {
228                key: row.get(0)?,
229                value: row.get(1)?,
230            })
231        })?
232        .collect();
233
234    let mut global_meta = GlobalMetaData {
235        schema_type: String::new(),
236        schema_version_major: -1,
237        schema_version_minor: -1,
238        acquisition_software_vendor: String::new(),
239        instrument_vendor: String::new(),
240        closed_property: -1,
241        tims_compression_type: -1,
242        max_num_peaks_per_scan: -1,
243        mz_acquisition_range_lower: -1.0,
244        mz_acquisition_range_upper: -1.0,
245        one_over_k0_range_lower: -1.0,
246        one_over_k0_range_upper: -1.0,
247        tof_max_index: 0,
248    };
249
250    // go over the keys and parse values for the global meta data
251    for row in frames_rows? {
252        match row.key.as_str() {
253            "SchemaType" => global_meta.schema_type = row.value,
254            "SchemaVersionMajor" => {
255                global_meta.schema_version_major = row.value.parse::<i64>().unwrap()
256            }
257            "SchemaVersionMinor" => {
258                global_meta.schema_version_minor = row.value.parse::<i64>().unwrap()
259            }
260            "AcquisitionSoftwareVendor" => global_meta.acquisition_software_vendor = row.value,
261            "InstrumentVendor" => global_meta.instrument_vendor = row.value,
262            "ClosedProperly" => global_meta.closed_property = row.value.parse::<i64>().unwrap(),
263            "TimsCompressionType" => {
264                global_meta.tims_compression_type = row.value.parse::<i64>().unwrap()
265            }
266            "MaxNumPeaksPerScan" => {
267                global_meta.max_num_peaks_per_scan = row.value.parse::<i64>().unwrap()
268            }
269            "MzAcqRangeLower" => {
270                global_meta.mz_acquisition_range_lower = row.value.parse::<f64>().unwrap()
271            }
272            "MzAcqRangeUpper" => {
273                global_meta.mz_acquisition_range_upper = row.value.parse::<f64>().unwrap()
274            }
275            "OneOverK0AcqRangeLower" => {
276                global_meta.one_over_k0_range_lower = row.value.parse::<f64>().unwrap()
277            }
278            "OneOverK0AcqRangeUpper" => {
279                global_meta.one_over_k0_range_upper = row.value.parse::<f64>().unwrap()
280            }
281            "DigitizerNumSamples" => {
282                global_meta.tof_max_index = (row.value.parse::<i64>().unwrap() + 1) as u32
283            }
284            _ => (),
285        }
286    }
287    // return global_meta
288    Ok(global_meta)
289}
290
291// Read the frame meta data from the analysis.tdf file
292pub fn read_meta_data_sql(
293    bruker_d_folder_name: &str,
294) -> Result<Vec<FrameMeta>, Box<dyn std::error::Error>> {
295    // Connect to the database
296    let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf");
297    let conn = Connection::open(db_path)?;
298
299    // prepare the query
300    let rows: Vec<&str> = vec![
301        "Id",
302        "Time",
303        "ScanMode",
304        "Polarity",
305        "MsMsType",
306        "TimsId",
307        "MaxIntensity",
308        "SummedIntensities",
309        "NumScans",
310        "NumPeaks",
311        "MzCalibration",
312        "T1",
313        "T2",
314        "TimsCalibration",
315        "PropertyGroup",
316        "AccumulationTime",
317        "RampTime",
318    ];
319    let query = format!("SELECT {} FROM Frames", rows.join(", "));
320
321    // execute the query
322    let frames_rows: Result<Vec<FrameMeta>, _> = conn
323        .prepare(&query)?
324        .query_map([], |row| {
325            Ok(FrameMeta {
326                id: row.get(0)?,
327                time: row.get(1)?,
328                scan_mode: row.get(2)?,
329                polarity: row.get(3)?,
330                ms_ms_type: row.get(4)?,
331                tims_id: row.get(5)?,
332                max_intensity: row.get(6)?,
333                sum_intensity: row.get(7)?,
334                num_scans: row.get(8)?,
335                num_peaks: row.get(9)?,
336                mz_calibration: row.get(10)?,
337                t_1: row.get(11)?,
338                t_2: row.get(12)?,
339                tims_calibration: row.get(13)?,
340                property_group: row.get(14)?,
341                accumulation_time: row.get(15)?,
342                ramp_time: row.get(16)?,
343            })
344        })?
345        .collect();
346
347    // return the frames
348    Ok(frames_rows?)
349}
350
351pub fn read_dia_ms_ms_info(
352    bruker_d_folder_name: &str,
353) -> Result<Vec<DiaMsMisInfo>, Box<dyn std::error::Error>> {
354    // Connect to the database
355    let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf");
356    let conn = Connection::open(db_path)?;
357
358    // prepare the query
359    let rows: Vec<&str> = vec!["Frame", "WindowGroup"];
360    let query = format!("SELECT {} FROM DiaFrameMsMsInfo", rows.join(", "));
361
362    // execute the query
363    let frames_rows: Result<Vec<DiaMsMisInfo>, _> = conn
364        .prepare(&query)?
365        .query_map([], |row| {
366            Ok(DiaMsMisInfo {
367                frame_id: row.get(0)?,
368                window_group: row.get(1)?,
369            })
370        })?
371        .collect();
372
373    // return the frames
374    Ok(frames_rows?)
375}
376
377pub fn read_dia_ms_ms_windows(
378    bruker_d_folder_name: &str,
379) -> Result<Vec<DiaMsMsWindow>, Box<dyn std::error::Error>> {
380    // Connect to the database
381    let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf");
382    let conn = Connection::open(db_path)?;
383
384    // prepare the query
385    let rows: Vec<&str> = vec![
386        "WindowGroup",
387        "ScanNumBegin",
388        "ScanNumEnd",
389        "IsolationMz",
390        "IsolationWidth",
391        "CollisionEnergy",
392    ];
393    let query = format!("SELECT {} FROM DiaFrameMsMsWindows", rows.join(", "));
394
395    // execute the query
396    let frames_rows: Result<Vec<DiaMsMsWindow>, _> = conn
397        .prepare(&query)?
398        .query_map([], |row| {
399            Ok(DiaMsMsWindow {
400                window_group: row.get(0)?,
401                scan_num_begin: row.get(1)?,
402                scan_num_end: row.get(2)?,
403                isolation_mz: row.get(3)?,
404                isolation_width: row.get(4)?,
405                collision_energy: row.get(5)?,
406            })
407        })?
408        .collect();
409
410    // return the frames
411    Ok(frames_rows?)
412}
413
414/// Read m/z calibration data from the MzCalibration table.
415/// This provides the coefficients needed for accurate TOF to m/z conversion
416/// without requiring the Bruker SDK.
417///
418/// The calibration formula is:
419///   tof_time = (tof_index + 0.5) * digitizer_timebase + digitizer_delay
420///   sqrt(mz) = c0 + c1*tof_time + c2*tof_time^2 + ...
421///
422/// For model_type 2 (most common), the formula simplifies to:
423///   sqrt(mz) = (tof_time - c1) / c0
424pub fn read_mz_calibration(
425    bruker_d_folder_name: &str,
426) -> Result<Vec<MzCalibration>, Box<dyn std::error::Error>> {
427    let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf");
428    let conn = Connection::open(db_path)?;
429
430    // Query MzCalibration table
431    let query = "SELECT Id, ModelType, DigitizerTimebase, DigitizerDelay, T1, T2, C0, C1, C2, C3, C4 FROM MzCalibration";
432
433    let calibrations: Result<Vec<MzCalibration>, _> = conn
434        .prepare(query)?
435        .query_map([], |row| {
436            Ok(MzCalibration {
437                id: row.get(0)?,
438                model_type: row.get(1)?,
439                digitizer_timebase: row.get(2)?,
440                digitizer_delay: row.get(3)?,
441                t1: row.get(4)?,
442                t2: row.get(5)?,
443                c0: row.get(6)?,
444                c1: row.get(7)?,
445                c2: row.get(8)?,
446                c3: row.get(9)?,
447                c4: row.get(10)?,
448            })
449        })?
450        .collect();
451
452    Ok(calibrations?)
453}