rustdf/cluster/
io.rs

1use std::fs::File;
2use std::io::{BufReader, BufWriter};
3use serde::{Deserialize, Serialize};
4use crate::cluster::utility::Fit1D;
5use super::cluster::ClusterResult1D;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct ClusterRow {
9    pub cluster_id: u64,
10    pub ms_level: u8,
11    pub window_group: Option<u32>,
12    pub parent_im_id: Option<i64>,
13    pub parent_rt_id: Option<i64>,
14
15    pub rt_lo: usize,
16    pub rt_hi: usize,
17    pub im_lo: usize,
18    pub im_hi: usize,
19    pub tof_lo: usize,
20    pub tof_hi: usize,
21    pub tof_index_lo: i32,
22    pub tof_index_hi: i32,
23    pub mz_lo: Option<f32>,
24    pub mz_hi: Option<f32>,
25
26    pub rt_mu: f32,
27    pub rt_sigma: f32,
28    pub rt_height: f32,
29    pub rt_area: f32,
30
31    pub im_mu: f32,
32    pub im_sigma: f32,
33    pub im_height: f32,
34    pub im_area: f32,
35
36    pub tof_mu: f32,
37    pub tof_sigma: f32,
38    pub tof_height: f32,
39    pub tof_area: f32,
40
41    pub mz_mu: Option<f32>,
42    pub mz_sigma: Option<f32>,
43    pub mz_height: Option<f32>,
44    pub mz_area: Option<f32>,
45
46    pub raw_sum: f32,
47    pub volume_proxy: f32,
48}
49
50impl From<&ClusterResult1D> for ClusterRow {
51    fn from(c: &ClusterResult1D) -> Self {
52        let (rt_lo, rt_hi) = c.rt_window;
53        let (im_lo, im_hi) = c.im_window;
54        let (tof_lo, tof_hi) = c.tof_window;
55        let (tof_index_lo, tof_index_hi) = c.tof_index_window;
56
57        let fit = |f: &Fit1D| (f.mu, f.sigma, f.height, f.area);
58
59        let (rt_mu, rt_sigma, rt_height, rt_area) = fit(&c.rt_fit);
60        let (im_mu, im_sigma, im_height, im_area) = fit(&c.im_fit);
61        let (tof_mu, tof_sigma, tof_height, tof_area) = fit(&c.tof_fit);
62
63        // First get mz window
64        let (mz_lo, mz_hi) = match c.mz_window {
65            Some((lo, hi)) => (Some(lo), Some(hi)),
66            None => (None, None),
67        };
68
69        // Then get any fitted mz parameters
70        let (mz_mu_raw, mz_sigma, mz_height, mz_area) = match &c.mz_fit {
71            Some(f) => {
72                let (mu, sigma, h, a) = fit(f);
73                (Some(mu), Some(sigma), Some(h), Some(a))
74            }
75            None => (None, None, None, None),
76        };
77
78        // Fallback: if no fitted mz_mu but we *do* have mz_lo/mz_hi, use midpoint
79        let mz_mu = match (mz_mu_raw, mz_lo, mz_hi) {
80            (Some(mu), _, _) => Some(mu),
81            (None, Some(lo), Some(hi)) => Some(0.5 * (lo + hi)),
82            _ => None,
83        };
84
85        ClusterRow {
86            cluster_id: c.cluster_id,
87            ms_level: c.ms_level,
88            window_group: c.window_group,
89            parent_im_id: c.parent_im_id,
90            parent_rt_id: c.parent_rt_id,
91
92            rt_lo,
93            rt_hi,
94            im_lo,
95            im_hi,
96            tof_lo,
97            tof_hi,
98            tof_index_lo,
99            tof_index_hi,
100            mz_lo,
101            mz_hi,
102
103            rt_mu,
104            rt_sigma,
105            rt_height,
106            rt_area,
107
108            im_mu,
109            im_sigma,
110            im_height,
111            im_area,
112
113            tof_mu,
114            tof_sigma,
115            tof_height,
116            tof_area,
117
118            mz_mu,
119            mz_sigma,
120            mz_height,
121            mz_area,
122
123            raw_sum: c.raw_sum,
124            volume_proxy: c.volume_proxy,
125        }
126    }
127}
128
129impl ClusterRow {
130    /// Convert a flattened row back into a lightweight ClusterResult1D.
131    /// Heavy fields (raw_points, axes, traces) are left empty / None.
132    pub fn into_cluster_result(self) -> ClusterResult1D {
133        let ClusterRow {
134            cluster_id,
135            ms_level,
136            window_group,
137            parent_im_id,
138            parent_rt_id,
139
140            rt_lo,
141            rt_hi,
142            im_lo,
143            im_hi,
144            tof_lo,
145            tof_hi,
146            tof_index_lo,
147            tof_index_hi,
148            mz_lo,
149            mz_hi,
150
151            rt_mu,
152            rt_sigma,
153            rt_height,
154            rt_area,
155
156            im_mu,
157            im_sigma,
158            im_height,
159            im_area,
160
161            tof_mu,
162            tof_sigma,
163            tof_height,
164            tof_area,
165
166            mz_mu,
167            mz_sigma,
168            mz_height,
169            mz_area,
170
171            raw_sum,
172            volume_proxy,
173        } = self;
174
175        let rt_fit = Fit1D {
176            mu: rt_mu,
177            sigma: rt_sigma,
178            height: rt_height,
179            baseline: 0.0,
180            area: rt_area,
181            r2: 0.0,
182            n: 0,
183        };
184        let im_fit = Fit1D {
185            mu: im_mu,
186            sigma: im_sigma,
187            height: im_height,
188            baseline: 0.0,
189            area: im_area,
190            r2: 0.0,
191            n: 0,
192        };
193        let tof_fit = Fit1D {
194            mu: tof_mu,
195            sigma: tof_sigma,
196            height: tof_height,
197            baseline: 0.0,
198            area: tof_area,
199            r2: 0.0,
200            n: 0,
201        };
202        let mz_fit = match (mz_mu, mz_sigma, mz_height, mz_area) {
203            (Some(mu), Some(sigma), Some(h), Some(a)) => Some(Fit1D {
204                mu,
205                sigma,
206                height: h,
207                baseline: 0.0,
208                area: a,
209                r2: 0.0,
210                n: 0,
211            }),
212            _ => None,
213        };
214
215        ClusterResult1D {
216            cluster_id,
217            rt_window: (rt_lo, rt_hi),
218            im_window: (im_lo, im_hi),
219            tof_window: (tof_lo, tof_hi),
220            tof_index_window: (tof_index_lo, tof_index_hi),
221            mz_window: match (mz_lo, mz_hi) {
222                (Some(lo), Some(hi)) => Some((lo, hi)),
223                _ => None,
224            },
225            rt_fit,
226            im_fit,
227            tof_fit,
228            mz_fit,
229            raw_sum,
230            volume_proxy,
231            frame_ids_used: Vec::new(), // not stored in parquet
232            window_group,
233            parent_im_id,
234            parent_rt_id,
235            ms_level,
236
237            // heavy / optional stuff omitted in parquet
238            rt_axis_sec: None,
239            im_axis_scans: None,
240            mz_axis_da: None,
241            raw_points: None,
242            rt_trace: None,
243            im_trace: None,
244        }
245    }
246}
247
248use std::io;
249use polars::prelude::*;
250use rayon::prelude::*;
251use crate::cluster::pseudo::PseudoSpectrum;
252
253pub fn save_parquet(path: &str, clusters: &[ClusterResult1D]) -> io::Result<()> {
254    let rows: Vec<ClusterRow> = clusters.iter().map(ClusterRow::from).collect();
255    let n = rows.len();
256
257    // Pre-allocate vectors for each column
258    let mut cluster_id = Vec::with_capacity(n);
259    let mut ms_level = Vec::with_capacity(n);
260    let mut window_group = Vec::with_capacity(n);
261    let mut parent_im_id = Vec::with_capacity(n);
262    let mut parent_rt_id = Vec::with_capacity(n);
263
264    let mut rt_lo = Vec::with_capacity(n);
265    let mut rt_hi = Vec::with_capacity(n);
266    let mut im_lo = Vec::with_capacity(n);
267    let mut im_hi = Vec::with_capacity(n);
268    let mut tof_lo = Vec::with_capacity(n);
269    let mut tof_hi = Vec::with_capacity(n);
270    let mut tof_index_lo = Vec::with_capacity(n);
271    let mut tof_index_hi = Vec::with_capacity(n);
272    let mut mz_lo = Vec::with_capacity(n);
273    let mut mz_hi = Vec::with_capacity(n);
274
275    let mut rt_mu = Vec::with_capacity(n);
276    let mut rt_sigma = Vec::with_capacity(n);
277    let mut rt_height = Vec::with_capacity(n);
278    let mut rt_area = Vec::with_capacity(n);
279
280    let mut im_mu = Vec::with_capacity(n);
281    let mut im_sigma = Vec::with_capacity(n);
282    let mut im_height = Vec::with_capacity(n);
283    let mut im_area = Vec::with_capacity(n);
284
285    let mut tof_mu = Vec::with_capacity(n);
286    let mut tof_sigma = Vec::with_capacity(n);
287    let mut tof_height = Vec::with_capacity(n);
288    let mut tof_area = Vec::with_capacity(n);
289
290    let mut mz_mu = Vec::with_capacity(n);
291    let mut mz_sigma = Vec::with_capacity(n);
292    let mut mz_height = Vec::with_capacity(n);
293    let mut mz_area = Vec::with_capacity(n);
294
295    let mut raw_sum = Vec::with_capacity(n);
296    let mut volume_proxy = Vec::with_capacity(n);
297
298    for r in rows {
299        cluster_id.push(r.cluster_id);
300        ms_level.push(r.ms_level);
301        window_group.push(r.window_group);
302        parent_im_id.push(r.parent_im_id);
303        parent_rt_id.push(r.parent_rt_id);
304
305        rt_lo.push(r.rt_lo as u32);
306        rt_hi.push(r.rt_hi as u32);
307        im_lo.push(r.im_lo as u32);
308        im_hi.push(r.im_hi as u32);
309        tof_lo.push(r.tof_lo as u32);
310        tof_hi.push(r.tof_hi as u32);
311        tof_index_lo.push(r.tof_index_lo);
312        tof_index_hi.push(r.tof_index_hi);
313        mz_lo.push(r.mz_lo);
314        mz_hi.push(r.mz_hi);
315
316        rt_mu.push(r.rt_mu);
317        rt_sigma.push(r.rt_sigma);
318        rt_height.push(r.rt_height);
319        rt_area.push(r.rt_area);
320
321        im_mu.push(r.im_mu);
322        im_sigma.push(r.im_sigma);
323        im_height.push(r.im_height);
324        im_area.push(r.im_area);
325
326        tof_mu.push(r.tof_mu);
327        tof_sigma.push(r.tof_sigma);
328        tof_height.push(r.tof_height);
329        tof_area.push(r.tof_area);
330
331        mz_mu.push(r.mz_mu);
332        mz_sigma.push(r.mz_sigma);
333        mz_height.push(r.mz_height);
334        mz_area.push(r.mz_area);
335
336        raw_sum.push(r.raw_sum);
337        volume_proxy.push(r.volume_proxy);
338    }
339
340    let mut df = DataFrame::new(vec![
341        Series::new(PlSmallStr::from("cluster_id"), cluster_id),
342        Series::new(PlSmallStr::from("ms_level"), ms_level),
343        Series::new(PlSmallStr::from("window_group"), window_group),
344        Series::new(PlSmallStr::from("parent_im_id"), parent_im_id),
345        Series::new(PlSmallStr::from("parent_rt_id"), parent_rt_id),
346
347        Series::new(PlSmallStr::from("rt_lo"), rt_lo),
348        Series::new(PlSmallStr::from("rt_hi"), rt_hi),
349        Series::new(PlSmallStr::from("im_lo"), im_lo),
350        Series::new(PlSmallStr::from("im_hi"), im_hi),
351        Series::new(PlSmallStr::from("tof_lo"), tof_lo),
352        Series::new(PlSmallStr::from("tof_hi"), tof_hi),
353        Series::new(PlSmallStr::from("tof_index_lo"), tof_index_lo),
354        Series::new(PlSmallStr::from("tof_index_hi"), tof_index_hi),
355        Series::new(PlSmallStr::from("mz_lo"), mz_lo),
356        Series::new(PlSmallStr::from("mz_hi"), mz_hi),
357
358        Series::new(PlSmallStr::from("rt_mu"), rt_mu),
359        Series::new(PlSmallStr::from("rt_sigma"), rt_sigma),
360        Series::new(PlSmallStr::from("rt_height"), rt_height),
361        Series::new(PlSmallStr::from("rt_area"), rt_area),
362
363        Series::new(PlSmallStr::from("im_mu"), im_mu),
364        Series::new(PlSmallStr::from("im_sigma"), im_sigma),
365        Series::new(PlSmallStr::from("im_height"), im_height),
366        Series::new(PlSmallStr::from("im_area"), im_area),
367
368        Series::new(PlSmallStr::from("tof_mu"), tof_mu),
369        Series::new(PlSmallStr::from("tof_sigma"), tof_sigma),
370        Series::new(PlSmallStr::from("tof_height"), tof_height),
371        Series::new(PlSmallStr::from("tof_area"), tof_area),
372
373        Series::new(PlSmallStr::from("mz_mu"), mz_mu),
374        Series::new(PlSmallStr::from("mz_sigma"), mz_sigma),
375        Series::new(PlSmallStr::from("mz_height"), mz_height),
376        Series::new(PlSmallStr::from("mz_area"), mz_area),
377
378        Series::new(PlSmallStr::from("raw_sum"), raw_sum),
379        Series::new(PlSmallStr::from("volume_proxy"), volume_proxy),
380    ]).map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
381
382    let f = File::create(path)?;
383    let mut writer = ParquetWriter::new(f);
384    // You can tune compression here; Zstd is a good default.
385    writer = writer.with_compression(ParquetCompression::Zstd(None));
386
387    writer
388        .finish(&mut df)
389        .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
390    Ok(())
391}
392
393
394#[derive(Serialize, Deserialize)]
395pub struct ClusterFile {
396    pub version: u32,
397    pub clusters: Vec<ClusterResult1D>,
398}
399
400impl ClusterFile {
401    pub fn new(clusters: Vec<ClusterResult1D>) -> Self {
402        Self { version: 1, clusters }
403    }
404}
405
406// --- JSON (human-readable) ---
407pub fn save_json(path: &str, clusters: &[ClusterResult1D]) -> std::io::Result<()> {
408    let f = BufWriter::new(File::create(path)?);
409    let cf = ClusterFile::new(clusters.to_vec());
410    serde_json::to_writer_pretty(f, &cf).map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
411}
412
413pub fn load_json(path: &str) -> std::io::Result<Vec<ClusterResult1D>> {
414    let f = BufReader::new(File::open(path)?);
415    let cf: ClusterFile = serde_json::from_reader(f)
416        .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
417    Ok(cf.clusters)
418}
419
420// --- Bincode + optional zstd compression ---
421pub fn save_bincode(path: &str, clusters: &[ClusterResult1D], compress: bool) -> std::io::Result<()> {
422    let f = File::create(path)?;
423    if compress {
424        let mut zw = zstd::Encoder::new(f, 3)?; // level 3 is a good default
425        bincode::serialize_into(&mut zw, &ClusterFile::new(clusters.to_vec()))
426            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
427        zw.finish()?;
428        Ok(())
429    } else {
430        let mut bw = BufWriter::new(f);
431        bincode::serialize_into(&mut bw, &ClusterFile::new(clusters.to_vec()))
432            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
433    }
434}
435
436pub fn load_bincode(path: &str) -> std::io::Result<Vec<ClusterResult1D>> {
437    let f = File::open(path)?;
438    // Try zstd first, then plain bincode
439    let try_zstd = zstd::Decoder::new(&f);
440    if let Ok(mut zr) = try_zstd {
441        let cf: ClusterFile = bincode::deserialize_from(&mut zr)
442            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
443        return Ok(cf.clusters);
444    }
445    let f = BufReader::new(File::open(path)?);
446    let cf: ClusterFile = bincode::deserialize_from(f)
447        .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
448    Ok(cf.clusters)
449}
450
451pub fn strip_heavy(mut clusters: Vec<ClusterResult1D>, keep_points: bool, keep_axes: bool) -> Vec<ClusterResult1D> {
452    for c in &mut clusters {
453        if !keep_points { c.raw_points = None; }
454        if !keep_axes { c.rt_axis_sec = None; c.im_axis_scans = None; c.mz_axis_da = None; }
455    }
456    clusters
457}
458
459pub fn load_parquet(path: &str) -> io::Result<Vec<ClusterResult1D>> {
460    let f = File::open(path)?;
461    let df = ParquetReader::new(f)
462        // .with_parallel(true)  // usually default, but you can force it
463        .finish()
464        .map_err(to_io)?;
465
466    let n = df.height();
467
468    // --- Helper macros to keep things readable ---
469
470    macro_rules! col_u64 {
471        ($name:literal) => {{
472            df.column($name)
473                .map_err(to_io)?
474                .u64()
475                .map_err(to_io)?
476                .into_iter()
477                .map(|v| v.unwrap_or(0))
478                .collect::<Vec<u64>>()
479        }};
480    }
481
482    macro_rules! col_u8 {
483        ($name:literal) => {{
484            df.column($name)
485                .map_err(to_io)?
486                .u8()
487                .map_err(to_io)?
488                .into_iter()
489                .map(|v| v.unwrap_or(0))
490                .collect::<Vec<u8>>()
491        }};
492    }
493
494    macro_rules! col_u32 {
495        ($name:literal) => {{
496            df.column($name)
497                .map_err(to_io)?
498                .u32()
499                .map_err(to_io)?
500                .into_iter()
501                .map(|v| v.unwrap_or(0))
502                .collect::<Vec<u32>>()
503        }};
504    }
505
506    macro_rules! col_i32 {
507        ($name:literal) => {{
508            df.column($name)
509                .map_err(to_io)?
510                .i32()
511                .map_err(to_io)?
512                .into_iter()
513                .map(|v| v.unwrap_or(0))
514                .collect::<Vec<i32>>()
515        }};
516    }
517
518    macro_rules! col_i64_opt {
519        ($name:literal) => {{
520            df.column($name)
521                .map_err(to_io)?
522                .i64()
523                .map_err(to_io)?
524                .into_iter()
525                .collect::<Vec<Option<i64>>>()
526        }};
527    }
528
529    macro_rules! col_u32_opt {
530        ($name:literal) => {{
531            df.column($name)
532                .map_err(to_io)?
533                .u32()
534                .map_err(to_io)?
535                .into_iter()
536                .collect::<Vec<Option<u32>>>()
537        }};
538    }
539
540    macro_rules! col_f32 {
541        ($name:literal) => {{
542            df.column($name)
543                .map_err(to_io)?
544                .f32()
545                .map_err(to_io)?
546                .into_iter()
547                .map(|v| v.unwrap_or(0.0))
548                .collect::<Vec<f32>>()
549        }};
550    }
551
552    macro_rules! col_f32_opt {
553        ($name:literal) => {{
554            df.column($name)
555                .map_err(to_io)?
556                .f32()
557                .map_err(to_io)?
558                .into_iter()
559                .collect::<Vec<Option<f32>>>()
560        }};
561    }
562
563    // --- Materialize all columns into plain Vecs once ---
564
565    let cluster_id      = col_u64!("cluster_id");
566    let ms_level        = col_u8!("ms_level");
567    let window_group    = col_u32_opt!("window_group");
568    let parent_im_id    = col_i64_opt!("parent_im_id");
569    let parent_rt_id    = col_i64_opt!("parent_rt_id");
570
571    let rt_lo           = col_u32!("rt_lo");
572    let rt_hi           = col_u32!("rt_hi");
573    let im_lo           = col_u32!("im_lo");
574    let im_hi           = col_u32!("im_hi");
575    let tof_lo          = col_u32!("tof_lo");
576    let tof_hi          = col_u32!("tof_hi");
577    let tof_index_lo    = col_i32!("tof_index_lo");
578    let tof_index_hi    = col_i32!("tof_index_hi");
579    let mz_lo           = col_f32_opt!("mz_lo");
580    let mz_hi           = col_f32_opt!("mz_hi");
581
582    let rt_mu           = col_f32!("rt_mu");
583    let rt_sigma        = col_f32!("rt_sigma");
584    let rt_height       = col_f32!("rt_height");
585    let rt_area         = col_f32!("rt_area");
586
587    let im_mu           = col_f32!("im_mu");
588    let im_sigma        = col_f32!("im_sigma");
589    let im_height       = col_f32!("im_height");
590    let im_area         = col_f32!("im_area");
591
592    let tof_mu          = col_f32!("tof_mu");
593    let tof_sigma       = col_f32!("tof_sigma");
594    let tof_height      = col_f32!("tof_height");
595    let tof_area        = col_f32!("tof_area");
596
597    let mz_mu           = col_f32_opt!("mz_mu");
598    let mz_sigma        = col_f32_opt!("mz_sigma");
599    let mz_height       = col_f32_opt!("mz_height");
600    let mz_area         = col_f32_opt!("mz_area");
601
602    let raw_sum         = col_f32!("raw_sum");
603    let volume_proxy    = col_f32!("volume_proxy");
604
605    // Sanity check (paranoid, but catches schema drift)
606    debug_assert_eq!(cluster_id.len(), n);
607
608    // --- Reconstruct ClusterResult1D rows ---
609    //
610    // This is now just cheap slice indexing. You can keep it serial,
611    // or parallelize with rayon if `ClusterResult1D: Send + Sync`.
612
613    let out: Vec<ClusterResult1D> = (0..n)
614        .into_par_iter() // ← use into_iter() if you don't want rayon
615        .map(|i| {
616            let row = ClusterRow {
617                cluster_id: cluster_id[i],
618                ms_level:   ms_level[i],
619
620                window_group: window_group[i],
621                parent_im_id: parent_im_id[i],
622                parent_rt_id: parent_rt_id[i],
623
624                rt_lo: rt_lo[i] as usize,
625                rt_hi: rt_hi[i] as usize,
626                im_lo: im_lo[i] as usize,
627                im_hi: im_hi[i] as usize,
628                tof_lo: tof_lo[i] as usize,
629                tof_hi: tof_hi[i] as usize,
630                tof_index_lo: tof_index_lo[i],
631                tof_index_hi: tof_index_hi[i],
632                mz_lo: mz_lo[i],
633                mz_hi: mz_hi[i],
634
635                rt_mu: rt_mu[i],
636                rt_sigma: rt_sigma[i],
637                rt_height: rt_height[i],
638                rt_area: rt_area[i],
639
640                im_mu: im_mu[i],
641                im_sigma: im_sigma[i],
642                im_height: im_height[i],
643                im_area: im_area[i],
644
645                tof_mu: tof_mu[i],
646                tof_sigma: tof_sigma[i],
647                tof_height: tof_height[i],
648                tof_area: tof_area[i],
649
650                mz_mu: mz_mu[i],
651                mz_sigma: mz_sigma[i],
652                mz_height: mz_height[i],
653                mz_area: mz_area[i],
654
655                raw_sum: raw_sum[i],
656                volume_proxy: volume_proxy[i],
657            };
658
659            row.into_cluster_result()
660        })
661        .collect();
662
663    Ok(out)
664}
665
666fn to_io(e: PolarsError) -> io::Error {
667    io::Error::new(io::ErrorKind::Other, e)
668}
669
670#[derive(Serialize, Deserialize)]
671pub struct PseudoSpectraFile {
672    pub version: u32,
673    pub spectra: Vec<PseudoSpectrum>,
674}
675
676impl PseudoSpectraFile {
677    pub fn new(spectra: Vec<PseudoSpectrum>) -> Self {
678        Self {
679            version: 1,
680            spectra,
681        }
682    }
683}
684
685pub fn save_pseudo_bincode(
686    path: &str,
687    spectra: &[PseudoSpectrum],
688    compress: bool,
689) -> io::Result<()> {
690    let f = File::create(path)?;
691    if compress {
692        // zstd compression, level 3 is a good default
693        let mut zw = zstd::Encoder::new(f, 3)?;
694        bincode::serialize_into(&mut zw, &PseudoSpectraFile::new(spectra.to_vec()))
695            .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
696        zw.finish()?;
697        Ok(())
698    } else {
699        let mut bw = BufWriter::new(f);
700        bincode::serialize_into(&mut bw, &PseudoSpectraFile::new(spectra.to_vec()))
701            .map_err(|e| io::Error::new(io::ErrorKind::Other, e))
702    }
703}
704
705pub fn load_pseudo_bincode(path: &str) -> io::Result<Vec<PseudoSpectrum>> {
706    let f = File::open(path)?;
707
708    // Try zstd first
709    if let Ok(mut zr) = zstd::Decoder::new(&f) {
710        let pf: PseudoSpectraFile = bincode::deserialize_from(&mut zr)
711            .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
712        return Ok(pf.spectra);
713    }
714
715    // Fallback: plain bincode
716    let f = BufReader::new(File::open(path)?);
717    let pf: PseudoSpectraFile = bincode::deserialize_from(f)
718        .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
719    Ok(pf.spectra)
720}