rustdf/cluster/
feature.rs

1use std::cmp::Ordering;
2use mscore::algorithm::isotope::generate_averagine_spectra;
3use crate::cluster::cluster::ClusterResult1D;
4
5#[derive(Clone, Debug)]
6pub struct AveragineLut {
7    pub masses: Vec<f32>,      // neutral-mass grid (Da)
8    pub z_min: u8,
9    pub z_max: u8,
10    pub k: usize,              // kept peaks (<=8), zero-padded to 8
11    pub envs: Vec<[f32; 8]>,   // flattened by (z, mass_index)
12}
13
14impl AveragineLut {
15    #[inline]
16    fn clamp_resolution_decimals(resolution: i32) -> i32 {
17        // Preserve “decimals” meaning from the old impl; 0..=6 is already plenty.
18        resolution.clamp(0, 6)
19    }
20
21    #[inline]
22    fn clamp_threads(n: usize) -> usize {
23        n.clamp(1, 32)
24    }
25
26    pub fn build(
27        mass_min: f32,
28        mass_max: f32,
29        step: f32,        // e.g. 25–50 Da
30        z_min: u8,
31        z_max: u8,
32        k: usize,         // keep first k peaks (<=8)
33        resolution: i32,  // interpreted as *decimals* like the old code
34        num_threads: usize,
35    ) -> Self {
36        // ---- grid & parameter guards -------------------------------------------------
37        let mass_min = mass_min.max(50.0);
38        let mass_max = mass_max.max(mass_min + 1.0);
39        let step     = step.max(1.0);
40        let z_min    = z_min.max(1);
41        let z_max    = z_max.max(z_min);
42        let k        = k.clamp(1, 8);
43
44        let mut masses: Vec<f32> = Vec::new();
45        let mut m = mass_min;
46        while m <= mass_max + 1e-6 {
47            masses.push(m);
48            m += step;
49        }
50
51        // If someone asks for a pathological grid, refuse early.
52        const MAX_GRID_POINTS: usize = 200_000; // generous hard-stop
53        if masses.len() > MAX_GRID_POINTS {
54            panic!("AveragineLut grid too large: {} points (> {})", masses.len(), MAX_GRID_POINTS);
55        }
56
57        // ---- prepare storage ---------------------------------------------------------
58        let per_z = masses.len();
59        let n_env = per_z * (z_max - z_min + 1) as usize;
60        let mut envs: Vec<[f32; 8]> = Vec::with_capacity(n_env);
61
62        // ---- clamp heavy knobs -------------------------------------------------------
63        let res_dec = Self::clamp_resolution_decimals(resolution);
64        let threads = Self::clamp_threads(num_threads);
65
66        // ---- CHUNKED generation to bound memory -------------------------------------
67        // We never build all spectra at once; do it in slices per charge.
68        const CHUNK: usize = 512;
69
70        // Reusable scratch buffers to avoid re-allocs in the loop.
71        let mut masses_f64: Vec<f64> = Vec::with_capacity(CHUNK);
72        let mut charges:     Vec<i32> = Vec::with_capacity(CHUNK);
73
74        for z in z_min..=z_max {
75            let zi = z as i32;
76
77            let mut start = 0;
78            while start < masses.len() {
79                let end = (start + CHUNK).min(masses.len());
80
81                // fill scratch
82                masses_f64.clear();
83                masses_f64.extend(masses[start..end].iter().map(|&x| x as f64));
84
85                charges.clear();
86                charges.resize(end - start, zi);
87
88                // generate only for this chunk
89                let specs = generate_averagine_spectra(
90                    masses_f64.clone(), // (the API takes owned Vecs)
91                    charges.clone(),
92                    /*min_intensity*/ 1,
93                    /*k*/ k as i32,
94                    /*resolution(decimals)*/ res_dec,
95                    /*centroid*/ true,
96                    /*threads*/  threads,
97                    /*amp*/ None,
98                );
99
100                // compact & normalize to unit-length k vector, zero-padded to 8
101                for sp in specs {
102                    let mut v = [0f32; 8];
103                    let keep = k.min(sp.intensity.len());
104                    for i in 0..keep {
105                        v[i] = sp.intensity[i] as f32;
106                    }
107                    // L2 normalize
108                    let norm = v.iter().map(|x| (*x as f64) * (*x as f64)).sum::<f64>().sqrt() as f32;
109                    if norm > 0.0 {
110                        for x in &mut v { *x /= norm; }
111                    }
112                    envs.push(v);
113                }
114
115                start = end;
116            }
117        }
118
119        Self { masses, z_min, z_max, k, envs }
120    }
121
122    #[inline]
123    pub fn lookup(&self, neutral_mass: f32, z: u8) -> [f32; 8] {
124        if z < self.z_min || z > self.z_max || self.masses.is_empty() {
125            return [0.0; 8];
126        }
127        let zi = (z - self.z_min) as usize;
128        let per_z = self.masses.len();
129        // nearest-neighbor on mass grid
130        let i = match self.masses.binary_search_by(|m| m.partial_cmp(&neutral_mass).unwrap_or(Ordering::Equal)) {
131            Ok(i) => i,
132            Err(i) => i.saturating_sub(1).min(per_z.saturating_sub(1)),
133        };
134        self.envs[zi * per_z + i]
135    }
136}
137
138#[derive(Clone, Debug)]
139pub struct GroupingParams {
140    pub rt_pad_overlap: usize,   // pad windows for edge gating
141    pub im_pad_overlap: usize,
142    pub mz_ppm_tol: f32,         // tight (~3–6 ppm) for near-dup merge
143    pub iso_ppm_tol: f32,        // 8–12 ppm for isotopic spacing
144    pub iso_abs_da: f32,         // 0.002–0.005 Da safety floor
145    pub z_min: u8,               // 1
146    pub z_max: u8,               // 6
147}
148
149#[derive(Clone, Debug)]
150pub struct FeatureBuildParams {
151    pub k_max: usize,            // 3–6 typical
152    pub min_members: usize,      // ≥2 isotopes required
153    pub min_cosine: f32,         // 0.85–0.92 if LUT provided
154    // DP/edge scoring weights
155    pub w_spacing: f32,          // α
156    pub w_coelute: f32,          // β
157    pub w_monotonic: f32,        // γ (soft)
158    pub penalty_skip_one: f32,   // λ_gap
159    // auction
160    pub steal_delta: f32,        // improvement needed to steal a node (not used in simple version)
161    // seed hygiene
162    pub require_lowest_is_mono: bool, // guard: seed must be local mono
163}
164
165#[derive(Clone, Debug)]
166pub struct SimpleFeature {
167    pub feature_id: usize,
168    pub charge: u8,
169    pub mz_mono: f32,
170    pub neutral_mass: f32,
171    pub rt_bounds: (usize, usize),
172    pub im_bounds: (usize, usize),
173    pub mz_center: f32,
174    pub n_members: usize,
175
176    /// Indices into the *input slice* of clusters (for local lookups).
177    pub member_cluster_indices: Vec<usize>,
178
179    /// Stable cluster IDs copied from `ClusterResult1D::cluster_id`.
180    pub member_cluster_ids: Vec<u64>,
181
182    pub raw_sum: f32,
183
184    /// Cosine similarity vs. Averagine envelope (if LUT was provided; 0 otherwise).
185    pub cos_averagine: f32,
186
187    /// Optional: the actual MS1 member clusters that make up this feature.
188    /// This lets us score a feature directly against an MS2 cluster without
189    /// needing the global MS1 slice.
190    pub member_clusters: Vec<ClusterResult1D>,
191}
192
193#[derive(Clone, Debug)]
194pub struct SimpleFeatureParams {
195    pub z_min: u8,
196    pub z_max: u8,
197    pub iso_ppm_tol: f32,        // e.g. 10.0
198    pub iso_abs_da: f32,         // e.g. 0.003
199    pub min_members: usize,      // at least this many isotopes
200    pub max_members: usize,      // cap chain length
201    pub min_raw_sum: f32,        // minimal cluster raw_sum
202    pub min_mz: f32,             // minimal m/z to consider
203    pub min_rt_overlap_frac: f32, // e.g. 0.3
204    pub min_im_overlap_frac: f32, // e.g. 0.3
205    /// If > 0 and LUT is provided, chains with cosine < min_cosine are dropped.
206    pub min_cosine: f32,
207
208    /// Weight for spacing penalty (0.0 = disabled, higher = penalize bad spacing more).
209    ///
210    /// Penalty is computed as average |Δmz_observed - Δmz_theory(z)| in *units of tolerance*
211    /// (i.e. divided by `iso_tolerance_da(mid_mz)`), then multiplied by this weight and
212    /// subtracted from the raw_sum-based chain score.
213    pub w_spacing_penalty: f32,
214}
215
216impl Default for SimpleFeatureParams {
217    fn default() -> Self {
218        Self {
219            z_min: 1,
220            z_max: 5,
221            iso_ppm_tol: 10.0,
222            iso_abs_da: 0.003,
223            min_members: 2,
224            max_members: 5,
225            min_raw_sum: 0.0,
226            min_mz: 100.0,
227            min_rt_overlap_frac: 0.3,
228            min_im_overlap_frac: 0.3,
229            min_cosine: 0.0,
230            w_spacing_penalty: 0.0,
231        }
232    }
233}
234
235fn cluster_mz_mu(c: &ClusterResult1D) -> Option<f32> {
236    if let Some(ref f) = c.mz_fit {
237        if f.mu.is_finite() && f.mu > 0.0 {
238            return Some(f.mu);
239        }
240    }
241    if let Some((lo, hi)) = c.mz_window {
242        let mu = 0.5 * (lo + hi);
243        if mu.is_finite() && mu > 0.0 {
244            return Some(mu);
245        }
246    }
247    None
248}
249
250fn _sigma_from_fwhm(fwhm: f32) -> Option<f32> {
251    if !fwhm.is_finite() || fwhm <= 0.0 {
252        return None;
253    }
254    let two_sqrt_two_ln2 = 2.0 * 2.0_f32.ln().sqrt();
255    let sigma = fwhm / two_sqrt_two_ln2;
256    if sigma.is_finite() && sigma > 0.0 {
257        Some(sigma)
258    } else {
259        None
260    }
261}
262
263fn _cluster_mz_sigma(c: &ClusterResult1D) -> Option<f32> {
264    if let Some(ref f) = c.mz_fit {
265        if f.sigma.is_finite() && f.sigma > 0.0 {
266            return Some(f.sigma);
267        }
268    }
269    if let Some((lo, hi)) = c.mz_window {
270        let fwhm = (hi - lo).abs();
271        if let Some(s) = _sigma_from_fwhm(fwhm) {
272            return Some(s);
273        }
274    }
275    None
276}
277
278#[inline]
279fn frac_overlap(a: (usize, usize), b: (usize, usize)) -> f32 {
280    let l = a.0.max(b.0);
281    let r = a.1.min(b.1);
282    if r < l {
283        0.0
284    } else {
285        let len = (r - l + 1) as f32;
286        let len_a = (a.1 - a.0 + 1) as f32;
287        let len_b = (b.1 - b.0 + 1) as f32;
288        len / len_a.max(len_b)
289    }
290}
291
292#[inline]
293fn iso_tolerance_da(mz: f32, p: &SimpleFeatureParams) -> f32 {
294    let ppm_term = mz.abs().max(1e-6) * (p.iso_ppm_tol * 1e-6);
295    ppm_term.max(p.iso_abs_da)
296}
297
298/// Cosine between raw isotope vector (from chain) and LUT envelope.
299fn cosine_iso_lut(
300    neutral_mass: f32,
301    z: u8,
302    iso_raw: &[f32],
303    lut: &AveragineLut,
304) -> f32 {
305    if iso_raw.is_empty() {
306        return 0.0;
307    }
308
309    let env = lut.lookup(neutral_mass, z); // already L2-normalized, zero-padded to 8
310    let mut v = [0.0f32; 8];
311
312    let keep = iso_raw.len().min(8);
313    let mut norm2 = 0.0f32;
314    for i in 0..keep {
315        let x = iso_raw[i].max(0.0); // just in case
316        v[i] = x;
317        norm2 += x * x;
318    }
319
320    if norm2 <= 0.0 {
321        return 0.0;
322    }
323    let inv_norm = norm2.sqrt().recip();
324
325    let mut dot = 0.0f32;
326    for i in 0..8 {
327        let x = v[i] * inv_norm;
328        dot += x * env[i];
329    }
330    dot
331}
332
333#[derive(Clone, Debug)]
334struct GoodCluster {
335    /// index in the original `clusters` slice
336    orig_idx: usize,
337
338    /// stable ID from `ClusterResult1D`
339    pub cluster_id: u64,
340
341    mz_mu: f32,
342    _rt_mu: f32,
343    rt_win: (usize, usize),
344    _im_mu: f32,
345    im_win: (usize, usize),
346    raw_sum: f32,
347}
348
349// ---------------------------- Core algorithm ------------------------------
350// ---------------------------- Core algorithm ------------------------------
351
352pub fn build_simple_features_from_clusters(
353    clusters: &[ClusterResult1D],
354    params: &SimpleFeatureParams,
355    lut: Option<&AveragineLut>,
356) -> Vec<SimpleFeature> {
357    if clusters.is_empty() {
358        return Vec::new();
359    }
360
361    // 1) Filter to usable clusters and precompute a light-weight view
362    let mut good: Vec<GoodCluster> = clusters
363        .iter()
364        .enumerate()
365        .filter_map(|(i, c)| {
366            if c.raw_sum < params.min_raw_sum {
367                return None;
368            }
369            let mz_mu = cluster_mz_mu(c)?;
370            if mz_mu <= params.min_mz {
371                return None;
372            }
373            if !c.rt_fit.mu.is_finite() || !c.im_fit.mu.is_finite() {
374                return None;
375            }
376            Some(GoodCluster {
377                orig_idx: i,
378                cluster_id: c.cluster_id,
379                mz_mu,
380                _rt_mu: c.rt_fit.mu,
381                rt_win: c.rt_window,
382                _im_mu: c.im_fit.mu,
383                im_win: c.im_window,
384                raw_sum: c.raw_sum,
385            })
386        })
387        .collect();
388
389    if good.is_empty() {
390        return Vec::new();
391    }
392
393    // 2) Sort by m/z for fast neighborhood search
394    good.sort_by(|a, b| a.mz_mu.partial_cmp(&b.mz_mu).unwrap_or(Ordering::Equal));
395
396    // Precompute mz array for binary search
397    let mzs: Vec<f32> = good.iter().map(|g| g.mz_mu).collect();
398
399    // Also build a "seed order" by descending raw_sum
400    let mut seed_indices: Vec<usize> = (0..good.len()).collect();
401    seed_indices.sort_unstable_by(|&a, &b| {
402        good[b]
403            .raw_sum
404            .partial_cmp(&good[a].raw_sum)
405            .unwrap_or(Ordering::Equal)
406    });
407
408    // track assignment
409    let mut assigned = vec![false; good.len()];
410    let mut features: Vec<SimpleFeature> = Vec::new();
411
412    // 3) Main loop over seeds
413    for seed_idx in seed_indices {
414        if assigned[seed_idx] {
415            continue;
416        }
417
418        let mut best_chain: Vec<usize> = Vec::new();
419        let mut best_z: u8 = 0;
420        let mut best_score: f32 = f32::NEG_INFINITY;
421
422        for z in params.z_min..=params.z_max {
423            let delta = 1.003_355_f32 / (z as f32);
424
425            // ------------------------------------------------------------
426            // Precompute reference envelope for this (seed, z) if LUT present
427            // ------------------------------------------------------------
428            let env_opt: Option<[f32; 8]> = if let Some(lut_ref) = lut {
429                let seed_mz = good[seed_idx].mz_mu;
430                if seed_mz.is_finite() && seed_mz > params.min_mz {
431                    let neutral_seed =
432                        (seed_mz - 1.007_276_5_f32).max(0.0) * (z as f32);
433                    Some(lut_ref.lookup(neutral_seed, z))
434                } else {
435                    None
436                }
437            } else {
438                None
439            };
440
441            // Small slack so we don't overreact to noise
442            const DIR_EPS_ENV: f32 = 1e-3; // tiny threshold for env direction
443            const SLACK_UP: f32 = 1.05;    // require ≥ +5% when env rises
444            const SLACK_DOWN: f32 = 0.95;  // require ≤ -5% when env falls
445
446            // ---- UPWARD extension -------------------------------------------------
447            let mut chain_up: Vec<usize> = Vec::new();
448            chain_up.push(seed_idx);
449
450            // keep observed raw sums for shape check
451            let mut iso_raw_up: Vec<f32> = Vec::new();
452            iso_raw_up.push(good[seed_idx].raw_sum.max(0.0));
453
454            let mut cur_up = seed_idx;
455            'upward: while chain_up.len() < params.max_members {
456                let cur = &good[cur_up];
457                let target_m = cur.mz_mu + delta;
458                let tol = iso_tolerance_da(target_m, params);
459
460                let lo = target_m - tol;
461                let hi = target_m + tol;
462
463                // binary search window [lo, hi] in sorted mzs
464                let left = mzs
465                    .binary_search_by(|x| x.partial_cmp(&lo).unwrap_or(Ordering::Equal))
466                    .unwrap_or_else(|i| i);
467                let mut right = match mzs.binary_search_by(|x| x.partial_cmp(&hi).unwrap_or(Ordering::Equal)) {
468                    Ok(i) => i + 1,
469                    Err(i) => i,
470                };
471                if right > mzs.len() {
472                    right = mzs.len();
473                }
474
475                let mut best_candidate: Option<(usize, f32)> = None; // (idx, score)
476
477                for k in left..right {
478                    if assigned[k] || chain_up.contains(&k) {
479                        continue;
480                    }
481                    let cand = &good[k];
482
483                    // co-elution constraints (simple v1: overlap fractions)
484                    let rt_ov = frac_overlap(cur.rt_win, cand.rt_win);
485                    if rt_ov < params.min_rt_overlap_frac {
486                        continue;
487                    }
488                    let im_ov = frac_overlap(cur.im_win, cand.im_win);
489                    if im_ov < params.min_im_overlap_frac {
490                        continue;
491                    }
492
493                    // simple scoring: weighted by raw_sum and overlap
494                    let score = cand.raw_sum * (0.5 * (rt_ov + im_ov));
495
496                    match best_candidate {
497                        None => best_candidate = Some((k, score)),
498                        Some((_, best_s)) => {
499                            if score > best_s {
500                                best_candidate = Some((k, score));
501                            }
502                        }
503                    }
504                }
505
506                if let Some((best_k, _)) = best_candidate {
507                    // ----------------- SHAPE CHECK vs. ENVELOPE (UPWARD) -----------------
508                    if let Some(env) = env_opt {
509                        let obs_prev = *iso_raw_up.last().unwrap_or(&0.0);
510                        let obs_new = good[best_k].raw_sum.max(0.0);
511
512                        // position of the *new* isotope in this direction
513                        let env_idx = iso_raw_up.len().min(env.len().saturating_sub(1));
514
515                        if env_idx > 0 && env_idx < env.len() {
516                            let env_prev = env[env_idx - 1];
517                            let env_cur = env[env_idx];
518
519                            // determine expected direction from envelope
520                            if env_cur > env_prev * (1.0 + DIR_EPS_ENV) {
521                                // Envelope says: this isotope should go UP
522                                if obs_new < obs_prev * SLACK_UP {
523                                    // observed does NOT go up enough -> stop extending
524                                    break 'upward;
525                                }
526                            } else if env_cur < env_prev * (1.0 - DIR_EPS_ENV) {
527                                // Envelope says: this isotope should go DOWN
528                                if obs_new > obs_prev * SLACK_DOWN {
529                                    // observed does NOT go down enough -> stop extending
530                                    break 'upward;
531                                }
532                            }
533                            // If envelope is flat-ish, we don't enforce direction.
534                        }
535
536                        iso_raw_up.push(obs_new);
537                    }
538
539                    chain_up.push(best_k);
540                    cur_up = best_k;
541                } else {
542                    break;
543                }
544            }
545
546            // ---- DOWNWARD extension -----------------------------------------------
547            let mut chain_down: Vec<usize> = Vec::new();
548            let mut iso_raw_down: Vec<f32> = Vec::new();
549            iso_raw_down.push(good[seed_idx].raw_sum.max(0.0));
550
551            let mut cur_down = seed_idx;
552
553            'downward: while (chain_down.len() + chain_up.len()) < params.max_members {
554                let cur = &good[cur_down];
555                let target_m = cur.mz_mu - delta;
556                if target_m <= 0.0 {
557                    break;
558                }
559                let tol = iso_tolerance_da(target_m, params);
560                let lo = target_m - tol;
561                let hi = target_m + tol;
562
563                let left = mzs
564                    .binary_search_by(|x| x.partial_cmp(&lo).unwrap_or(Ordering::Equal))
565                    .unwrap_or_else(|i| i);
566                let mut right = match mzs.binary_search_by(|x| x.partial_cmp(&hi).unwrap_or(Ordering::Equal)) {
567                    Ok(i) => i + 1,
568                    Err(i) => i,
569                };
570                if right > mzs.len() {
571                    right = mzs.len();
572                }
573
574                let mut best_candidate: Option<(usize, f32)> = None; // (idx, score)
575
576                for k in left..right {
577                    if assigned[k] || chain_up.contains(&k) || chain_down.contains(&k) {
578                        continue;
579                    }
580                    let cand = &good[k];
581
582                    let rt_ov = frac_overlap(cur.rt_win, cand.rt_win);
583                    if rt_ov < params.min_rt_overlap_frac {
584                        continue;
585                    }
586                    let im_ov = frac_overlap(cur.im_win, cand.im_win);
587                    if im_ov < params.min_im_overlap_frac {
588                        continue;
589                    }
590
591                    let score = cand.raw_sum * (0.5 * (rt_ov + im_ov));
592
593                    match best_candidate {
594                        None => best_candidate = Some((k, score)),
595                        Some((_, best_s)) => {
596                            if score > best_s {
597                                best_candidate = Some((k, score));
598                            }
599                        }
600                    }
601                }
602
603                if let Some((best_k, _)) = best_candidate {
604                    // ----------------- SHAPE CHECK vs. ENVELOPE (DOWNWARD) ---------------
605                    if let Some(env) = env_opt {
606                        let obs_prev = *iso_raw_down.last().unwrap_or(&0.0);
607                        let obs_new = good[best_k].raw_sum.max(0.0);
608
609                        // treat downward chain similarly: index along envelope
610                        let env_idx = iso_raw_down.len().min(env.len().saturating_sub(1));
611
612                        if env_idx > 0 && env_idx < env.len() {
613                            let env_prev = env[env_idx - 1];
614                            let env_cur = env[env_idx];
615
616                            if env_cur > env_prev * (1.0 + DIR_EPS_ENV) {
617                                // Envelope says: go UP here
618                                if obs_new < obs_prev * SLACK_UP {
619                                    break 'downward;
620                                }
621                            } else if env_cur < env_prev * (1.0 - DIR_EPS_ENV) {
622                                // Envelope says: go DOWN here
623                                if obs_new > obs_prev * SLACK_DOWN {
624                                    break 'downward;
625                                }
626                            }
627                        }
628
629                        iso_raw_down.push(obs_new);
630                    }
631
632                    chain_down.push(best_k);
633                    cur_down = best_k;
634                } else {
635                    break;
636                }
637            }
638
639            chain_down.reverse(); // so that m/z increases
640            let mut chain: Vec<usize> = chain_down;
641            chain.extend(chain_up.into_iter());
642
643            if chain.len() < params.min_members {
644                continue;
645            }
646
647            // --- spacing penalty (optional) ---------------------------------------
648            let mut spacing_penalty = 0.0f32;
649            if params.w_spacing_penalty > 0.0 && chain.len() >= 2 {
650                let mut spacing_err_sum = 0.0f32;
651                let mut spacing_links = 0usize;
652
653                for win in chain.windows(2) {
654                    let i = win[0];
655                    let j = win[1];
656                    let gi = &good[i];
657                    let gj = &good[j];
658
659                    let observed = gj.mz_mu - gi.mz_mu;
660                    let ideal = delta;
661                    let mid_mz = 0.5 * (gi.mz_mu + gj.mz_mu);
662                    let tol_da = iso_tolerance_da(mid_mz, params);
663
664                    if tol_da > 0.0 {
665                        let err_units = (observed - ideal).abs() / tol_da; // in “tolerances”
666                        spacing_err_sum += err_units;
667                        spacing_links += 1;
668                    }
669                }
670
671                if spacing_links > 0 {
672                    spacing_penalty = spacing_err_sum / (spacing_links as f32);
673                }
674            }
675
676            // chain score: sum of raw_sum minus spacing penalty
677            let score_raw: f32 = chain.iter().map(|&i| good[i].raw_sum).sum();
678            let score = if params.w_spacing_penalty > 0.0 {
679                score_raw - params.w_spacing_penalty * spacing_penalty
680            } else {
681                score_raw
682            };
683
684            if score > best_score {
685                best_score = score;
686                best_chain = chain;
687                best_z = z;
688            }
689        }
690
691        if best_chain.len() < params.min_members || best_z == 0 {
692            continue;
693        }
694
695        // --- Averagine cosine gating (before assignment) -----------------
696        let mut cos_averagine = 0.0f32;
697        if let Some(lut_ref) = lut {
698            // Build iso_raw = [raw_sum_0, raw_sum_1, ...] in m/z order (chain is monotonic)
699            let mut mz_mono_tmp = f32::INFINITY;
700            let mut iso_raw: Vec<f32> = Vec::with_capacity(best_chain.len());
701
702            for &idx in &best_chain {
703                let g = &good[idx];
704                mz_mono_tmp = mz_mono_tmp.min(g.mz_mu);
705                iso_raw.push(g.raw_sum.max(0.0));
706            }
707
708            if !mz_mono_tmp.is_finite() || mz_mono_tmp <= params.min_mz {
709                continue;
710            }
711
712            let neutral_tmp = (mz_mono_tmp - 1.007_276_5_f32).max(0.0) * (best_z as f32);
713            cos_averagine = cosine_iso_lut(neutral_tmp, best_z, &iso_raw, lut_ref);
714
715            if params.min_cosine > 0.0 && cos_averagine < params.min_cosine {
716                // Reject this chain; do NOT mark anything as assigned.
717                continue;
718            }
719        }
720
721        // 4) create feature and mark members as assigned
722        for &idx in &best_chain {
723            assigned[idx] = true;
724        }
725
726        // derive mz_mono as the smallest m/z in the chain (now can be < seed for high z)
727        let mut mz_mono = f32::INFINITY;
728        let mut mz_weighted = 0.0_f64;
729        let mut wsum = 0.0_f64;
730        let mut rt_min = usize::MAX;
731        let mut rt_max = 0usize;
732        let mut im_min = usize::MAX;
733        let mut im_max = 0usize;
734        let mut raw_sum_total = 0.0_f32;
735
736        let mut member_cluster_indices: Vec<usize> =
737            Vec::with_capacity(best_chain.len());
738        let mut member_cluster_ids: Vec<u64> =
739            Vec::with_capacity(best_chain.len());
740
741        for &idx in &best_chain {
742            let g = &good[idx];
743            mz_mono = mz_mono.min(g.mz_mu);
744            let w = g.raw_sum.max(1.0) as f64;
745            mz_weighted += (g.mz_mu as f64) * w;
746            wsum += w;
747            rt_min = rt_min.min(g.rt_win.0);
748            rt_max = rt_max.max(g.rt_win.1);
749            im_min = im_min.min(g.im_win.0);
750            im_max = im_max.max(g.im_win.1);
751            raw_sum_total += g.raw_sum;
752
753            member_cluster_indices.push(g.orig_idx);
754            member_cluster_ids.push(g.cluster_id);
755        }
756
757        if !mz_mono.is_finite() || mz_mono <= params.min_mz {
758            continue;
759        }
760        let mz_center = if wsum > 0.0 {
761            (mz_weighted / wsum) as f32
762        } else {
763            mz_mono
764        };
765
766        let neutral_mass = (mz_mono - 1.007_276_5_f32).max(0.0) * (best_z as f32);
767
768        // If we had no LUT, cos_averagine is 0.0; otherwise it’s already set.
769        let feat_id = features.len();
770        features.push(SimpleFeature {
771            feature_id: feat_id,
772            charge: best_z,
773            mz_mono,
774            neutral_mass,
775            rt_bounds: (rt_min, rt_max),
776            im_bounds: (im_min, im_max),
777            mz_center,
778            n_members: best_chain.len(),
779            member_cluster_indices,
780            member_cluster_ids,
781            raw_sum: raw_sum_total,
782            cos_averagine,
783            member_clusters: best_chain
784                .iter()
785                .map(|&i| clusters[good[i].orig_idx].clone())
786                .collect(),
787        });
788    }
789
790    features
791}