1use std::fs::File;
2use std::io::{BufReader, BufWriter};
3use serde::{Deserialize, Serialize};
4use crate::cluster::utility::Fit1D;
5use super::cluster::ClusterResult1D;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct ClusterRow {
9 pub cluster_id: u64,
10 pub ms_level: u8,
11 pub window_group: Option<u32>,
12 pub parent_im_id: Option<i64>,
13 pub parent_rt_id: Option<i64>,
14
15 pub rt_lo: usize,
16 pub rt_hi: usize,
17 pub im_lo: usize,
18 pub im_hi: usize,
19 pub tof_lo: usize,
20 pub tof_hi: usize,
21 pub tof_index_lo: i32,
22 pub tof_index_hi: i32,
23 pub mz_lo: Option<f32>,
24 pub mz_hi: Option<f32>,
25
26 pub rt_mu: f32,
27 pub rt_sigma: f32,
28 pub rt_height: f32,
29 pub rt_area: f32,
30
31 pub im_mu: f32,
32 pub im_sigma: f32,
33 pub im_height: f32,
34 pub im_area: f32,
35
36 pub tof_mu: f32,
37 pub tof_sigma: f32,
38 pub tof_height: f32,
39 pub tof_area: f32,
40
41 pub mz_mu: Option<f32>,
42 pub mz_sigma: Option<f32>,
43 pub mz_height: Option<f32>,
44 pub mz_area: Option<f32>,
45
46 pub raw_sum: f32,
47 pub volume_proxy: f32,
48}
49
50impl From<&ClusterResult1D> for ClusterRow {
51 fn from(c: &ClusterResult1D) -> Self {
52 let (rt_lo, rt_hi) = c.rt_window;
53 let (im_lo, im_hi) = c.im_window;
54 let (tof_lo, tof_hi) = c.tof_window;
55 let (tof_index_lo, tof_index_hi) = c.tof_index_window;
56
57 let fit = |f: &Fit1D| (f.mu, f.sigma, f.height, f.area);
58
59 let (rt_mu, rt_sigma, rt_height, rt_area) = fit(&c.rt_fit);
60 let (im_mu, im_sigma, im_height, im_area) = fit(&c.im_fit);
61 let (tof_mu, tof_sigma, tof_height, tof_area) = fit(&c.tof_fit);
62
63 let (mz_lo, mz_hi) = match c.mz_window {
65 Some((lo, hi)) => (Some(lo), Some(hi)),
66 None => (None, None),
67 };
68
69 let (mz_mu_raw, mz_sigma, mz_height, mz_area) = match &c.mz_fit {
71 Some(f) => {
72 let (mu, sigma, h, a) = fit(f);
73 (Some(mu), Some(sigma), Some(h), Some(a))
74 }
75 None => (None, None, None, None),
76 };
77
78 let mz_mu = match (mz_mu_raw, mz_lo, mz_hi) {
80 (Some(mu), _, _) => Some(mu),
81 (None, Some(lo), Some(hi)) => Some(0.5 * (lo + hi)),
82 _ => None,
83 };
84
85 ClusterRow {
86 cluster_id: c.cluster_id,
87 ms_level: c.ms_level,
88 window_group: c.window_group,
89 parent_im_id: c.parent_im_id,
90 parent_rt_id: c.parent_rt_id,
91
92 rt_lo,
93 rt_hi,
94 im_lo,
95 im_hi,
96 tof_lo,
97 tof_hi,
98 tof_index_lo,
99 tof_index_hi,
100 mz_lo,
101 mz_hi,
102
103 rt_mu,
104 rt_sigma,
105 rt_height,
106 rt_area,
107
108 im_mu,
109 im_sigma,
110 im_height,
111 im_area,
112
113 tof_mu,
114 tof_sigma,
115 tof_height,
116 tof_area,
117
118 mz_mu,
119 mz_sigma,
120 mz_height,
121 mz_area,
122
123 raw_sum: c.raw_sum,
124 volume_proxy: c.volume_proxy,
125 }
126 }
127}
128
129impl ClusterRow {
130 pub fn into_cluster_result(self) -> ClusterResult1D {
133 let ClusterRow {
134 cluster_id,
135 ms_level,
136 window_group,
137 parent_im_id,
138 parent_rt_id,
139
140 rt_lo,
141 rt_hi,
142 im_lo,
143 im_hi,
144 tof_lo,
145 tof_hi,
146 tof_index_lo,
147 tof_index_hi,
148 mz_lo,
149 mz_hi,
150
151 rt_mu,
152 rt_sigma,
153 rt_height,
154 rt_area,
155
156 im_mu,
157 im_sigma,
158 im_height,
159 im_area,
160
161 tof_mu,
162 tof_sigma,
163 tof_height,
164 tof_area,
165
166 mz_mu,
167 mz_sigma,
168 mz_height,
169 mz_area,
170
171 raw_sum,
172 volume_proxy,
173 } = self;
174
175 let rt_fit = Fit1D {
176 mu: rt_mu,
177 sigma: rt_sigma,
178 height: rt_height,
179 baseline: 0.0,
180 area: rt_area,
181 r2: 0.0,
182 n: 0,
183 };
184 let im_fit = Fit1D {
185 mu: im_mu,
186 sigma: im_sigma,
187 height: im_height,
188 baseline: 0.0,
189 area: im_area,
190 r2: 0.0,
191 n: 0,
192 };
193 let tof_fit = Fit1D {
194 mu: tof_mu,
195 sigma: tof_sigma,
196 height: tof_height,
197 baseline: 0.0,
198 area: tof_area,
199 r2: 0.0,
200 n: 0,
201 };
202 let mz_fit = match (mz_mu, mz_sigma, mz_height, mz_area) {
203 (Some(mu), Some(sigma), Some(h), Some(a)) => Some(Fit1D {
204 mu,
205 sigma,
206 height: h,
207 baseline: 0.0,
208 area: a,
209 r2: 0.0,
210 n: 0,
211 }),
212 _ => None,
213 };
214
215 ClusterResult1D {
216 cluster_id,
217 rt_window: (rt_lo, rt_hi),
218 im_window: (im_lo, im_hi),
219 tof_window: (tof_lo, tof_hi),
220 tof_index_window: (tof_index_lo, tof_index_hi),
221 mz_window: match (mz_lo, mz_hi) {
222 (Some(lo), Some(hi)) => Some((lo, hi)),
223 _ => None,
224 },
225 rt_fit,
226 im_fit,
227 tof_fit,
228 mz_fit,
229 raw_sum,
230 volume_proxy,
231 frame_ids_used: Vec::new(), window_group,
233 parent_im_id,
234 parent_rt_id,
235 ms_level,
236
237 rt_axis_sec: None,
239 im_axis_scans: None,
240 mz_axis_da: None,
241 raw_points: None,
242 rt_trace: None,
243 im_trace: None,
244 }
245 }
246}
247
248use std::io;
249use polars::prelude::*;
250use rayon::prelude::*;
251use crate::cluster::pseudo::PseudoSpectrum;
252
253pub fn save_parquet(path: &str, clusters: &[ClusterResult1D]) -> io::Result<()> {
254 let rows: Vec<ClusterRow> = clusters.iter().map(ClusterRow::from).collect();
255 let n = rows.len();
256
257 let mut cluster_id = Vec::with_capacity(n);
259 let mut ms_level = Vec::with_capacity(n);
260 let mut window_group = Vec::with_capacity(n);
261 let mut parent_im_id = Vec::with_capacity(n);
262 let mut parent_rt_id = Vec::with_capacity(n);
263
264 let mut rt_lo = Vec::with_capacity(n);
265 let mut rt_hi = Vec::with_capacity(n);
266 let mut im_lo = Vec::with_capacity(n);
267 let mut im_hi = Vec::with_capacity(n);
268 let mut tof_lo = Vec::with_capacity(n);
269 let mut tof_hi = Vec::with_capacity(n);
270 let mut tof_index_lo = Vec::with_capacity(n);
271 let mut tof_index_hi = Vec::with_capacity(n);
272 let mut mz_lo = Vec::with_capacity(n);
273 let mut mz_hi = Vec::with_capacity(n);
274
275 let mut rt_mu = Vec::with_capacity(n);
276 let mut rt_sigma = Vec::with_capacity(n);
277 let mut rt_height = Vec::with_capacity(n);
278 let mut rt_area = Vec::with_capacity(n);
279
280 let mut im_mu = Vec::with_capacity(n);
281 let mut im_sigma = Vec::with_capacity(n);
282 let mut im_height = Vec::with_capacity(n);
283 let mut im_area = Vec::with_capacity(n);
284
285 let mut tof_mu = Vec::with_capacity(n);
286 let mut tof_sigma = Vec::with_capacity(n);
287 let mut tof_height = Vec::with_capacity(n);
288 let mut tof_area = Vec::with_capacity(n);
289
290 let mut mz_mu = Vec::with_capacity(n);
291 let mut mz_sigma = Vec::with_capacity(n);
292 let mut mz_height = Vec::with_capacity(n);
293 let mut mz_area = Vec::with_capacity(n);
294
295 let mut raw_sum = Vec::with_capacity(n);
296 let mut volume_proxy = Vec::with_capacity(n);
297
298 for r in rows {
299 cluster_id.push(r.cluster_id);
300 ms_level.push(r.ms_level);
301 window_group.push(r.window_group);
302 parent_im_id.push(r.parent_im_id);
303 parent_rt_id.push(r.parent_rt_id);
304
305 rt_lo.push(r.rt_lo as u32);
306 rt_hi.push(r.rt_hi as u32);
307 im_lo.push(r.im_lo as u32);
308 im_hi.push(r.im_hi as u32);
309 tof_lo.push(r.tof_lo as u32);
310 tof_hi.push(r.tof_hi as u32);
311 tof_index_lo.push(r.tof_index_lo);
312 tof_index_hi.push(r.tof_index_hi);
313 mz_lo.push(r.mz_lo);
314 mz_hi.push(r.mz_hi);
315
316 rt_mu.push(r.rt_mu);
317 rt_sigma.push(r.rt_sigma);
318 rt_height.push(r.rt_height);
319 rt_area.push(r.rt_area);
320
321 im_mu.push(r.im_mu);
322 im_sigma.push(r.im_sigma);
323 im_height.push(r.im_height);
324 im_area.push(r.im_area);
325
326 tof_mu.push(r.tof_mu);
327 tof_sigma.push(r.tof_sigma);
328 tof_height.push(r.tof_height);
329 tof_area.push(r.tof_area);
330
331 mz_mu.push(r.mz_mu);
332 mz_sigma.push(r.mz_sigma);
333 mz_height.push(r.mz_height);
334 mz_area.push(r.mz_area);
335
336 raw_sum.push(r.raw_sum);
337 volume_proxy.push(r.volume_proxy);
338 }
339
340 let mut df = DataFrame::new(vec![
341 Series::new(PlSmallStr::from("cluster_id"), cluster_id),
342 Series::new(PlSmallStr::from("ms_level"), ms_level),
343 Series::new(PlSmallStr::from("window_group"), window_group),
344 Series::new(PlSmallStr::from("parent_im_id"), parent_im_id),
345 Series::new(PlSmallStr::from("parent_rt_id"), parent_rt_id),
346
347 Series::new(PlSmallStr::from("rt_lo"), rt_lo),
348 Series::new(PlSmallStr::from("rt_hi"), rt_hi),
349 Series::new(PlSmallStr::from("im_lo"), im_lo),
350 Series::new(PlSmallStr::from("im_hi"), im_hi),
351 Series::new(PlSmallStr::from("tof_lo"), tof_lo),
352 Series::new(PlSmallStr::from("tof_hi"), tof_hi),
353 Series::new(PlSmallStr::from("tof_index_lo"), tof_index_lo),
354 Series::new(PlSmallStr::from("tof_index_hi"), tof_index_hi),
355 Series::new(PlSmallStr::from("mz_lo"), mz_lo),
356 Series::new(PlSmallStr::from("mz_hi"), mz_hi),
357
358 Series::new(PlSmallStr::from("rt_mu"), rt_mu),
359 Series::new(PlSmallStr::from("rt_sigma"), rt_sigma),
360 Series::new(PlSmallStr::from("rt_height"), rt_height),
361 Series::new(PlSmallStr::from("rt_area"), rt_area),
362
363 Series::new(PlSmallStr::from("im_mu"), im_mu),
364 Series::new(PlSmallStr::from("im_sigma"), im_sigma),
365 Series::new(PlSmallStr::from("im_height"), im_height),
366 Series::new(PlSmallStr::from("im_area"), im_area),
367
368 Series::new(PlSmallStr::from("tof_mu"), tof_mu),
369 Series::new(PlSmallStr::from("tof_sigma"), tof_sigma),
370 Series::new(PlSmallStr::from("tof_height"), tof_height),
371 Series::new(PlSmallStr::from("tof_area"), tof_area),
372
373 Series::new(PlSmallStr::from("mz_mu"), mz_mu),
374 Series::new(PlSmallStr::from("mz_sigma"), mz_sigma),
375 Series::new(PlSmallStr::from("mz_height"), mz_height),
376 Series::new(PlSmallStr::from("mz_area"), mz_area),
377
378 Series::new(PlSmallStr::from("raw_sum"), raw_sum),
379 Series::new(PlSmallStr::from("volume_proxy"), volume_proxy),
380 ]).map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
381
382 let f = File::create(path)?;
383 let mut writer = ParquetWriter::new(f);
384 writer = writer.with_compression(ParquetCompression::Zstd(None));
386
387 writer
388 .finish(&mut df)
389 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
390 Ok(())
391}
392
393
394#[derive(Serialize, Deserialize)]
395pub struct ClusterFile {
396 pub version: u32,
397 pub clusters: Vec<ClusterResult1D>,
398}
399
400impl ClusterFile {
401 pub fn new(clusters: Vec<ClusterResult1D>) -> Self {
402 Self { version: 1, clusters }
403 }
404}
405
406pub fn save_json(path: &str, clusters: &[ClusterResult1D]) -> std::io::Result<()> {
408 let f = BufWriter::new(File::create(path)?);
409 let cf = ClusterFile::new(clusters.to_vec());
410 serde_json::to_writer_pretty(f, &cf).map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
411}
412
413pub fn load_json(path: &str) -> std::io::Result<Vec<ClusterResult1D>> {
414 let f = BufReader::new(File::open(path)?);
415 let cf: ClusterFile = serde_json::from_reader(f)
416 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
417 Ok(cf.clusters)
418}
419
420pub fn save_bincode(path: &str, clusters: &[ClusterResult1D], compress: bool) -> std::io::Result<()> {
422 let f = File::create(path)?;
423 if compress {
424 let mut zw = zstd::Encoder::new(f, 3)?; bincode::serialize_into(&mut zw, &ClusterFile::new(clusters.to_vec()))
426 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
427 zw.finish()?;
428 Ok(())
429 } else {
430 let mut bw = BufWriter::new(f);
431 bincode::serialize_into(&mut bw, &ClusterFile::new(clusters.to_vec()))
432 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
433 }
434}
435
436pub fn load_bincode(path: &str) -> std::io::Result<Vec<ClusterResult1D>> {
437 let f = File::open(path)?;
438 let try_zstd = zstd::Decoder::new(&f);
440 if let Ok(mut zr) = try_zstd {
441 let cf: ClusterFile = bincode::deserialize_from(&mut zr)
442 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
443 return Ok(cf.clusters);
444 }
445 let f = BufReader::new(File::open(path)?);
446 let cf: ClusterFile = bincode::deserialize_from(f)
447 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
448 Ok(cf.clusters)
449}
450
451pub fn strip_heavy(mut clusters: Vec<ClusterResult1D>, keep_points: bool, keep_axes: bool) -> Vec<ClusterResult1D> {
452 for c in &mut clusters {
453 if !keep_points { c.raw_points = None; }
454 if !keep_axes { c.rt_axis_sec = None; c.im_axis_scans = None; c.mz_axis_da = None; }
455 }
456 clusters
457}
458
459pub fn load_parquet(path: &str) -> io::Result<Vec<ClusterResult1D>> {
460 let f = File::open(path)?;
461 let df = ParquetReader::new(f)
462 .finish()
464 .map_err(to_io)?;
465
466 let n = df.height();
467
468 macro_rules! col_u64 {
471 ($name:literal) => {{
472 df.column($name)
473 .map_err(to_io)?
474 .u64()
475 .map_err(to_io)?
476 .into_iter()
477 .map(|v| v.unwrap_or(0))
478 .collect::<Vec<u64>>()
479 }};
480 }
481
482 macro_rules! col_u8 {
483 ($name:literal) => {{
484 df.column($name)
485 .map_err(to_io)?
486 .u8()
487 .map_err(to_io)?
488 .into_iter()
489 .map(|v| v.unwrap_or(0))
490 .collect::<Vec<u8>>()
491 }};
492 }
493
494 macro_rules! col_u32 {
495 ($name:literal) => {{
496 df.column($name)
497 .map_err(to_io)?
498 .u32()
499 .map_err(to_io)?
500 .into_iter()
501 .map(|v| v.unwrap_or(0))
502 .collect::<Vec<u32>>()
503 }};
504 }
505
506 macro_rules! col_i32 {
507 ($name:literal) => {{
508 df.column($name)
509 .map_err(to_io)?
510 .i32()
511 .map_err(to_io)?
512 .into_iter()
513 .map(|v| v.unwrap_or(0))
514 .collect::<Vec<i32>>()
515 }};
516 }
517
518 macro_rules! col_i64_opt {
519 ($name:literal) => {{
520 df.column($name)
521 .map_err(to_io)?
522 .i64()
523 .map_err(to_io)?
524 .into_iter()
525 .collect::<Vec<Option<i64>>>()
526 }};
527 }
528
529 macro_rules! col_u32_opt {
530 ($name:literal) => {{
531 df.column($name)
532 .map_err(to_io)?
533 .u32()
534 .map_err(to_io)?
535 .into_iter()
536 .collect::<Vec<Option<u32>>>()
537 }};
538 }
539
540 macro_rules! col_f32 {
541 ($name:literal) => {{
542 df.column($name)
543 .map_err(to_io)?
544 .f32()
545 .map_err(to_io)?
546 .into_iter()
547 .map(|v| v.unwrap_or(0.0))
548 .collect::<Vec<f32>>()
549 }};
550 }
551
552 macro_rules! col_f32_opt {
553 ($name:literal) => {{
554 df.column($name)
555 .map_err(to_io)?
556 .f32()
557 .map_err(to_io)?
558 .into_iter()
559 .collect::<Vec<Option<f32>>>()
560 }};
561 }
562
563 let cluster_id = col_u64!("cluster_id");
566 let ms_level = col_u8!("ms_level");
567 let window_group = col_u32_opt!("window_group");
568 let parent_im_id = col_i64_opt!("parent_im_id");
569 let parent_rt_id = col_i64_opt!("parent_rt_id");
570
571 let rt_lo = col_u32!("rt_lo");
572 let rt_hi = col_u32!("rt_hi");
573 let im_lo = col_u32!("im_lo");
574 let im_hi = col_u32!("im_hi");
575 let tof_lo = col_u32!("tof_lo");
576 let tof_hi = col_u32!("tof_hi");
577 let tof_index_lo = col_i32!("tof_index_lo");
578 let tof_index_hi = col_i32!("tof_index_hi");
579 let mz_lo = col_f32_opt!("mz_lo");
580 let mz_hi = col_f32_opt!("mz_hi");
581
582 let rt_mu = col_f32!("rt_mu");
583 let rt_sigma = col_f32!("rt_sigma");
584 let rt_height = col_f32!("rt_height");
585 let rt_area = col_f32!("rt_area");
586
587 let im_mu = col_f32!("im_mu");
588 let im_sigma = col_f32!("im_sigma");
589 let im_height = col_f32!("im_height");
590 let im_area = col_f32!("im_area");
591
592 let tof_mu = col_f32!("tof_mu");
593 let tof_sigma = col_f32!("tof_sigma");
594 let tof_height = col_f32!("tof_height");
595 let tof_area = col_f32!("tof_area");
596
597 let mz_mu = col_f32_opt!("mz_mu");
598 let mz_sigma = col_f32_opt!("mz_sigma");
599 let mz_height = col_f32_opt!("mz_height");
600 let mz_area = col_f32_opt!("mz_area");
601
602 let raw_sum = col_f32!("raw_sum");
603 let volume_proxy = col_f32!("volume_proxy");
604
605 debug_assert_eq!(cluster_id.len(), n);
607
608 let out: Vec<ClusterResult1D> = (0..n)
614 .into_par_iter() .map(|i| {
616 let row = ClusterRow {
617 cluster_id: cluster_id[i],
618 ms_level: ms_level[i],
619
620 window_group: window_group[i],
621 parent_im_id: parent_im_id[i],
622 parent_rt_id: parent_rt_id[i],
623
624 rt_lo: rt_lo[i] as usize,
625 rt_hi: rt_hi[i] as usize,
626 im_lo: im_lo[i] as usize,
627 im_hi: im_hi[i] as usize,
628 tof_lo: tof_lo[i] as usize,
629 tof_hi: tof_hi[i] as usize,
630 tof_index_lo: tof_index_lo[i],
631 tof_index_hi: tof_index_hi[i],
632 mz_lo: mz_lo[i],
633 mz_hi: mz_hi[i],
634
635 rt_mu: rt_mu[i],
636 rt_sigma: rt_sigma[i],
637 rt_height: rt_height[i],
638 rt_area: rt_area[i],
639
640 im_mu: im_mu[i],
641 im_sigma: im_sigma[i],
642 im_height: im_height[i],
643 im_area: im_area[i],
644
645 tof_mu: tof_mu[i],
646 tof_sigma: tof_sigma[i],
647 tof_height: tof_height[i],
648 tof_area: tof_area[i],
649
650 mz_mu: mz_mu[i],
651 mz_sigma: mz_sigma[i],
652 mz_height: mz_height[i],
653 mz_area: mz_area[i],
654
655 raw_sum: raw_sum[i],
656 volume_proxy: volume_proxy[i],
657 };
658
659 row.into_cluster_result()
660 })
661 .collect();
662
663 Ok(out)
664}
665
666fn to_io(e: PolarsError) -> io::Error {
667 io::Error::new(io::ErrorKind::Other, e)
668}
669
670#[derive(Serialize, Deserialize)]
671pub struct PseudoSpectraFile {
672 pub version: u32,
673 pub spectra: Vec<PseudoSpectrum>,
674}
675
676impl PseudoSpectraFile {
677 pub fn new(spectra: Vec<PseudoSpectrum>) -> Self {
678 Self {
679 version: 1,
680 spectra,
681 }
682 }
683}
684
685pub fn save_pseudo_bincode(
686 path: &str,
687 spectra: &[PseudoSpectrum],
688 compress: bool,
689) -> io::Result<()> {
690 let f = File::create(path)?;
691 if compress {
692 let mut zw = zstd::Encoder::new(f, 3)?;
694 bincode::serialize_into(&mut zw, &PseudoSpectraFile::new(spectra.to_vec()))
695 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
696 zw.finish()?;
697 Ok(())
698 } else {
699 let mut bw = BufWriter::new(f);
700 bincode::serialize_into(&mut bw, &PseudoSpectraFile::new(spectra.to_vec()))
701 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))
702 }
703}
704
705pub fn load_pseudo_bincode(path: &str) -> io::Result<Vec<PseudoSpectrum>> {
706 let f = File::open(path)?;
707
708 if let Ok(mut zr) = zstd::Decoder::new(&f) {
710 let pf: PseudoSpectraFile = bincode::deserialize_from(&mut zr)
711 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
712 return Ok(pf.spectra);
713 }
714
715 let f = BufReader::new(File::open(path)?);
717 let pf: PseudoSpectraFile = bincode::deserialize_from(f)
718 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
719 Ok(pf.spectra)
720}