mscore/algorithm/
isotope.rs

1extern crate statrs;
2
3use rayon::prelude::*;
4use rayon::ThreadPoolBuilder;
5use std::collections::{BTreeMap, HashMap, HashSet};
6
7use crate::chemistry::constants::{MASS_NEUTRON, MASS_PROTON};
8use crate::chemistry::elements::{atoms_isotopic_weights, isotopic_abundance};
9use crate::data::peptide::PeptideIon;
10use crate::data::spectrum::MzSpectrum;
11use crate::data::spectrum::ToResolution;
12use statrs::distribution::{Continuous, Normal};
13
14/// convolve two distributions of masses and abundances
15///
16/// Arguments:
17///
18/// * `dist_a` - first distribution of masses and abundances
19/// * `dist_b` - second distribution of masses and abundances
20/// * `mass_tolerance` - mass tolerance for combining peaks
21/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
22/// * `max_results` - maximum number of peaks to include in the result
23///
24/// Returns:
25///
26/// * `Vec<(f64, f64)>` - combined distribution of masses and abundances
27///
28/// # Examples
29///
30/// ```
31/// use mscore::algorithm::isotope::convolve;
32///
33/// let dist_a = vec![(100.0, 0.5), (101.0, 0.5)];
34/// let dist_b = vec![(100.0, 0.5), (101.0, 0.5)];
35/// let result = convolve(&dist_a, &dist_b, 1e-6, 1e-12, 200);
36/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
37/// ```
38pub fn convolve(
39    dist_a: &Vec<(f64, f64)>,
40    dist_b: &Vec<(f64, f64)>,
41    mass_tolerance: f64,
42    abundance_threshold: f64,
43    max_results: usize,
44) -> Vec<(f64, f64)> {
45    let mut result: Vec<(f64, f64)> = Vec::new();
46
47    for (mass_a, abundance_a) in dist_a {
48        for (mass_b, abundance_b) in dist_b {
49            let combined_mass = mass_a + mass_b;
50            let combined_abundance = abundance_a * abundance_b;
51
52            // Skip entries with combined abundance below the threshold
53            if combined_abundance < abundance_threshold {
54                continue;
55            }
56
57            // Insert or update the combined mass in the result distribution
58            if let Some(entry) = result
59                .iter_mut()
60                .find(|(m, _)| (*m - combined_mass).abs() < mass_tolerance)
61            {
62                entry.1 += combined_abundance;
63            } else {
64                result.push((combined_mass, combined_abundance));
65            }
66        }
67    }
68
69    // Sort by abundance (descending) to prepare for trimming
70    result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
71
72    // Trim the vector if it exceeds max_results
73    if result.len() > max_results {
74        result.truncate(max_results);
75    }
76
77    // Optionally, sort by mass if needed for further processing
78    result.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
79
80    result
81}
82
83/// convolve a distribution with itself n times
84///
85/// Arguments:
86///
87/// * `dist` - distribution of masses and abundances
88/// * `n` - number of times to convolve the distribution with itself
89///
90/// Returns:
91///
92/// * `Vec<(f64, f64)>` - distribution of masses and abundances
93///
94/// # Examples
95///
96/// ```
97/// use mscore::algorithm::isotope::convolve_pow;
98///
99/// let dist = vec![(100.0, 0.5), (101.0, 0.5)];
100/// let result = convolve_pow(&dist, 2);
101/// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]);
102/// ```
103pub fn convolve_pow(dist: &Vec<(f64, f64)>, n: i32) -> Vec<(f64, f64)> {
104    if n == 0 {
105        return vec![(0.0, 1.0)]; // Return the delta distribution
106    }
107    if n == 1 {
108        return dist.clone();
109    }
110
111    let mut result = dist.clone();
112    let mut power = 2;
113
114    while power <= n {
115        result = convolve(&result, &result, 1e-6, 1e-12, 200); // Square the result to get the next power of 2
116        power *= 2;
117    }
118
119    // If n is not a power of 2, recursively fill in the remainder
120    if power / 2 < n {
121        result = convolve(
122            &result,
123            &convolve_pow(dist, n - power / 2),
124            1e-6,
125            1e-12,
126            200,
127        );
128    }
129
130    result
131}
132
133/// generate the isotope distribution for a given atomic composition
134///
135/// Arguments:
136///
137/// * `atomic_composition` - atomic composition of the peptide
138/// * `mass_tolerance` - mass tolerance for combining peaks
139/// * `abundance_threshold` - minimum abundance for a peak to be included in the result
140/// * `max_result` - maximum number of peaks to include in the result
141///
142/// Returns:
143///
144/// * `Vec<(f64, f64)>` - distribution of masses and abundances
145///
146/// # Examples
147///
148/// ```
149/// use std::collections::HashMap;
150/// use mscore::algorithm::isotope::generate_isotope_distribution;
151///
152/// let mut atomic_composition = HashMap::new();
153/// atomic_composition.insert("C".to_string(), 5);
154/// atomic_composition.insert("H".to_string(), 9);
155/// atomic_composition.insert("N".to_string(), 1);
156/// atomic_composition.insert("O".to_string(), 1);
157/// let result = generate_isotope_distribution(&atomic_composition, 1e-6, 1e-12, 200);
158/// ```
159pub fn generate_isotope_distribution(
160    atomic_composition: &HashMap<String, i32>,
161    mass_tolerance: f64,
162    abundance_threshold: f64,
163    max_result: i32,
164) -> Vec<(f64, f64)> {
165    let mut cumulative_distribution: Option<Vec<(f64, f64)>> = None;
166    let atoms_isotopic_weights: HashMap<String, Vec<f64>> = atoms_isotopic_weights()
167        .iter()
168        .map(|(k, v)| (k.to_string(), v.clone()))
169        .collect();
170    let atomic_isotope_abundance: HashMap<String, Vec<f64>> = isotopic_abundance()
171        .iter()
172        .map(|(k, v)| (k.to_string(), v.clone()))
173        .collect();
174
175    for (element, &count) in atomic_composition.iter() {
176        let elemental_isotope_weights = atoms_isotopic_weights
177            .get(element)
178            .expect("Element not found in isotopic weights table")
179            .clone();
180        let elemental_isotope_abundance = atomic_isotope_abundance
181            .get(element)
182            .expect("Element not found in isotopic abundance table")
183            .clone();
184
185        let element_distribution: Vec<(f64, f64)> = elemental_isotope_weights
186            .iter()
187            .zip(elemental_isotope_abundance.iter())
188            .map(|(&mass, &abundance)| (mass, abundance))
189            .collect();
190
191        let element_power_distribution = if count > 1 {
192            convolve_pow(&element_distribution, count)
193        } else {
194            element_distribution
195        };
196
197        cumulative_distribution = match cumulative_distribution {
198            Some(cum_dist) => Some(convolve(
199                &cum_dist,
200                &element_power_distribution,
201                mass_tolerance,
202                abundance_threshold,
203                max_result as usize,
204            )),
205            None => Some(element_power_distribution),
206        };
207    }
208
209    let final_distribution = cumulative_distribution.expect("Peptide has no elements");
210    // Normalize the distribution
211    let total_abundance: f64 = final_distribution
212        .iter()
213        .map(|&(_, abundance)| abundance)
214        .sum();
215    let result: Vec<_> = final_distribution
216        .into_iter()
217        .map(|(mass, abundance)| (mass, abundance / total_abundance))
218        .collect();
219
220    let mut sort_map: BTreeMap<i64, f64> = BTreeMap::new();
221    let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 };
222
223    for (mz, intensity) in result {
224        let key = quantize(mz);
225        sort_map
226            .entry(key)
227            .and_modify(|e| *e += intensity)
228            .or_insert(intensity);
229    }
230
231    let mz: Vec<f64> = sort_map
232        .keys()
233        .map(|&key| key as f64 / 1_000_000.0)
234        .collect();
235    let intensity: Vec<f64> = sort_map.values().map(|&intensity| intensity).collect();
236    mz.iter()
237        .zip(intensity.iter())
238        .map(|(&mz, &intensity)| (mz, intensity))
239        .collect()
240}
241
242/// calculate the normal probability density function
243///
244/// Arguments:
245///
246/// * `x` - value to calculate the probability density function of
247/// * `mean` - mean of the normal distribution
248/// * `std_dev` - standard deviation of the normal distribution
249///
250/// Returns:
251///
252/// * `f64` - probability density function of `x`
253///
254/// # Examples
255///
256/// ```
257/// use mscore::algorithm::isotope::normal_pdf;
258///
259/// let pdf = normal_pdf(0.0, 0.0, 1.0);
260/// assert_eq!(pdf, 0.39894228040143265);
261/// ```
262pub fn normal_pdf(x: f64, mean: f64, std_dev: f64) -> f64 {
263    let normal = Normal::new(mean, std_dev).unwrap();
264    normal.pdf(x)
265}
266
267/// calculate the factorial of a number
268///
269/// Arguments:
270///
271/// * `n` - number to calculate factorial of
272///
273/// Returns:
274///
275/// * `f64` - factorial of `n`
276///
277/// # Examples
278///
279/// ```
280/// use mscore::algorithm::isotope::factorial;
281///
282/// let fact = factorial(5);
283/// assert_eq!(fact, 120.0);
284/// ```
285pub fn factorial(n: i32) -> f64 {
286    (1..=n).fold(1.0, |acc, x| acc * x as f64)
287}
288
289pub fn weight(mass: f64, peak_nums: Vec<i32>, normalize: bool) -> Vec<f64> {
290    let lam_val = lam(mass, 0.000594, -0.03091);
291    let factorials: Vec<f64> = peak_nums.iter().map(|&k| factorial(k)).collect();
292    let mut weights: Vec<f64> = peak_nums
293        .iter()
294        .map(|&k| {
295            let pow = lam_val.powi(k);
296            let exp = (-lam_val).exp();
297            exp * pow / factorials[k as usize]
298        })
299        .collect();
300
301    if normalize {
302        let sum: f64 = weights.iter().sum();
303        weights = weights.iter().map(|&w| w / sum).collect();
304    }
305
306    weights
307}
308
309/// calculate the lambda value for a given mass
310///
311/// Arguments:
312///
313/// * `mass` - mass of the peptide
314/// * `slope` - slope of the linear regression
315/// * `intercept` - intercept of the linear regression
316///
317/// Returns:
318///
319/// * `f64` - lambda value
320///
321/// # Examples
322///
323/// ```
324/// use mscore::algorithm::isotope::lam;
325///
326/// let lambda = lam(1000.0, 0.000594, -0.03091);
327/// assert_eq!(lambda, 0.56309);
328pub fn lam(mass: f64, slope: f64, intercept: f64) -> f64 {
329    slope * mass + intercept
330}
331
332/// calculate the isotope pattern for a given mass and charge based on the averagine model
333/// using the normal distribution for peak shapes
334///
335/// Arguments:
336///
337/// * `x` - list of m/z values to probe
338/// * `mass` - mass of the peptide
339/// * `charge` - charge of the peptide
340/// * `sigma` - standard deviation of the normal distribution
341/// * `amp` - amplitude of the isotope pattern
342/// * `k` - number of isotopes to consider
343/// * `step_size` - step size for the m/z values to probe
344///
345/// Returns:
346///
347/// * `Vec<f64>` - isotope pattern
348///
349pub fn iso(
350    x: &Vec<f64>,
351    mass: f64,
352    charge: f64,
353    sigma: f64,
354    amp: f64,
355    k: usize,
356    step_size: f64,
357) -> Vec<f64> {
358    let k_range: Vec<usize> = (0..k).collect();
359    let means: Vec<f64> = k_range
360        .iter()
361        .map(|&k_val| (mass + MASS_NEUTRON * k_val as f64) / charge)
362        .collect();
363    let weights = weight(
364        mass,
365        k_range
366            .iter()
367            .map(|&k_val| k_val as i32)
368            .collect::<Vec<i32>>(),
369        true,
370    );
371
372    let mut intensities = vec![0.0; x.len()];
373    for (i, x_val) in x.iter().enumerate() {
374        for (j, &mean) in means.iter().enumerate() {
375            intensities[i] += weights[j] * normal_pdf(*x_val, mean, sigma);
376        }
377        intensities[i] *= step_size;
378    }
379    intensities
380        .iter()
381        .map(|&intensity| intensity * amp)
382        .collect()
383}
384
385/// generate the isotope pattern for a given mass and charge
386///
387/// Arguments:
388///
389/// * `lower_bound` - lower bound of the isotope pattern
390/// * `upper_bound` - upper bound of the isotope pattern
391/// * `mass` - mass of the peptide
392/// * `charge` - charge of the peptide
393/// * `amp` - amplitude of the isotope pattern
394/// * `k` - number of isotopes to consider
395/// * `sigma` - standard deviation of the normal distribution
396/// * `resolution` - resolution of the isotope pattern
397///
398/// Returns:
399///
400/// * `(Vec<f64>, Vec<f64>)` - isotope pattern
401///
402/// # Examples
403///
404/// ```
405/// use mscore::algorithm::isotope::generate_isotope_pattern;
406///
407/// let (mzs, intensities) = generate_isotope_pattern(1500.0, 1510.0, 3000.0, 2.0, 1e4, 10, 1.0, 3);
408/// ```
409pub fn generate_isotope_pattern(
410    lower_bound: f64,
411    upper_bound: f64,
412    mass: f64,
413    charge: f64,
414    amp: f64,
415    k: usize,
416    sigma: f64,
417    resolution: i32,
418) -> (Vec<f64>, Vec<f64>) {
419    let step_size = f64::min(sigma / 10.0, 1.0 / 10f64.powi(resolution));
420    let size = ((upper_bound - lower_bound) / step_size).ceil() as usize;
421    let mzs: Vec<f64> = (0..size)
422        .map(|i| lower_bound + step_size * i as f64)
423        .collect();
424    let intensities = iso(&mzs, mass, charge, sigma, amp, k, step_size);
425
426    (
427        mzs.iter().map(|&mz| mz + MASS_PROTON).collect(),
428        intensities,
429    )
430}
431
432/// generate the averagine spectrum for a given mass and charge
433///
434/// Arguments:
435///
436/// * `mass` - mass of the peptide
437/// * `charge` - charge of the peptide
438/// * `min_intensity` - minimum intensity for a peak to be included in the result
439/// * `k` - number of isotopes to consider
440/// * `resolution` - resolution of the isotope pattern
441/// * `centroid` - whether to centroid the spectrum
442/// * `amp` - amplitude of the isotope pattern
443///
444/// Returns:
445///
446/// * `MzSpectrum` - averagine spectrum
447///
448/// # Examples
449///
450/// ```
451/// use mscore::algorithm::isotope::generate_averagine_spectrum;
452///
453/// let spectrum = generate_averagine_spectrum(3000.0, 2, 1, 10, 3, true, None);
454/// ```
455pub fn generate_averagine_spectrum(
456    mass: f64,
457    charge: i32,
458    min_intensity: i32,
459    k: i32,
460    resolution: i32,
461    centroid: bool,
462    amp: Option<f64>,
463) -> MzSpectrum {
464    let amp = amp.unwrap_or(1e4);
465    let lb = mass / charge as f64 - 0.2;
466    let ub = mass / charge as f64 + k as f64 + 0.2;
467
468    let (mz, intensities) = generate_isotope_pattern(
469        lb,
470        ub,
471        mass,
472        charge as f64,
473        amp,
474        k as usize,
475        0.008492569002123142,
476        resolution,
477    );
478
479    let spectrum = MzSpectrum::new(mz, intensities)
480        .to_resolution(resolution)
481        .filter_ranged(lb, ub, min_intensity as f64, 1e9);
482
483    if centroid {
484        spectrum.to_centroid(
485            std::cmp::max(min_intensity, 1),
486            1.0 / 10f64.powi(resolution - 1),
487            true,
488        )
489    } else {
490        spectrum
491    }
492}
493
494/// generate the averagine spectra for a given list of masses and charges
495/// using multiple threads
496///
497/// Arguments:
498///
499/// * `masses` - list of masses of the peptides
500/// * `charges` - list of charges of the peptides
501/// * `min_intensity` - minimum intensity for a peak to be included in the result
502/// * `k` - number of isotopes to consider
503/// * `resolution` - resolution of the isotope pattern
504/// * `centroid` - whether to centroid the spectrum
505/// * `num_threads` - number of threads to use
506/// * `amp` - amplitude of the isotope pattern
507///
508/// Returns:
509///
510/// * `Vec<MzSpectrum>` - list of averagine spectra
511///
512/// # Examples
513///
514/// ```
515/// use mscore::algorithm::isotope::generate_averagine_spectra;
516///
517/// let masses = vec![3000.0, 3000.0];
518/// let charges = vec![2, 3];
519/// let spectra = generate_averagine_spectra(masses, charges, 1, 10, 3, true, 4, None);
520/// ```
521pub fn generate_averagine_spectra(
522    masses: Vec<f64>,
523    charges: Vec<i32>,
524    min_intensity: i32,
525    k: i32,
526    resolution: i32,
527    centroid: bool,
528    num_threads: usize,
529    amp: Option<f64>,
530) -> Vec<MzSpectrum> {
531    let amp = amp.unwrap_or(1e5);
532    let mut spectra: Vec<MzSpectrum> = Vec::new();
533    let thread_pool = ThreadPoolBuilder::new()
534        .num_threads(num_threads)
535        .build()
536        .unwrap();
537
538    thread_pool.install(|| {
539        spectra = masses
540            .par_iter()
541            .zip(charges.par_iter())
542            .map(|(&mass, &charge)| {
543                generate_averagine_spectrum(
544                    mass,
545                    charge,
546                    min_intensity,
547                    k,
548                    resolution,
549                    centroid,
550                    Some(amp),
551                )
552            })
553            .collect();
554    });
555
556    spectra
557}
558
559/// generate the precursor spectrum for a given peptide sequence and charge
560/// using isotope convolutions
561///
562/// Arguments:
563///
564/// * `sequence` - peptide sequence
565/// * `charge` - charge of the peptide
566///
567/// Returns:
568///
569/// * `MzSpectrum` - precursor spectrum
570///
571pub fn generate_precursor_spectrum(
572    sequence: &str,
573    charge: i32,
574    peptide_id: Option<i32>,
575) -> MzSpectrum {
576    let peptide_ion = PeptideIon::new(sequence.to_string(), charge, 1.0, peptide_id);
577    peptide_ion.calculate_isotopic_spectrum(1e-3, 1e-9, 200, 1e-6)
578}
579
580/// parallel version of `generate_precursor_spectrum`
581///
582/// Arguments:
583///
584/// * `sequences` - list of peptide sequences
585/// * `charges` - list of charges of the peptides
586/// * `num_threads` - number of threads to use
587///
588/// Returns:
589///
590/// * `Vec<MzSpectrum>` - list of precursor spectra
591///
592pub fn generate_precursor_spectra(
593    sequences: &Vec<&str>,
594    charges: &Vec<i32>,
595    num_threads: usize,
596    peptide_ids: Vec<Option<i32>>,
597) -> Vec<MzSpectrum> {
598    let thread_pool = ThreadPoolBuilder::new()
599        .num_threads(num_threads)
600        .build()
601        .unwrap();
602    // need to zip sequences and charges and peptide_ids
603    let result = thread_pool.install(|| {
604        sequences
605            .par_iter()
606            .zip(charges.par_iter())
607            .zip(peptide_ids.par_iter())
608            .map(|((&sequence, &charge), &peptide_id)| {
609                generate_precursor_spectrum(sequence, charge, peptide_id)
610            })
611            .collect()
612    });
613    result
614}
615
616/// Result of transmission-dependent isotope distribution calculation.
617/// Contains the adjusted distribution and the transmission factor for intensity scaling.
618#[derive(Debug, Clone)]
619pub struct TransmissionDependentIsotopeDistribution {
620    /// The adjusted isotope distribution (m/z, relative_intensity)
621    pub distribution: Vec<(f64, f64)>,
622    /// Fraction of signal transmitted (0.0 to 1.0).
623    /// This is the ratio of the sum of transmitted distribution intensities
624    /// to the sum of full (all isotopes transmitted) distribution intensities.
625    pub transmission_factor: f64,
626}
627
628// Calculates the isotope distribution for a fragment given the isotope distribution of the fragment, the isotope distribution of the complementary fragment, and the transmitted precursor isotopes
629// implemented based on OpenMS: "https://github.com/OpenMS/OpenMS/blob/079143800f7ed036a7c68ea6e124fe4f5cfc9569/src/openms/source/CHEMISTRY/ISOTOPEDISTRIBUTION/CoarseIsotopePatternGenerator.cpp#L415"
630pub fn calculate_transmission_dependent_fragment_ion_isotope_distribution(
631    fragment_isotope_dist: &Vec<(f64, f64)>,
632    comp_fragment_isotope_dist: &Vec<(f64, f64)>,
633    precursor_isotopes: &HashSet<usize>,
634    max_isotope: usize,
635) -> Vec<(f64, f64)> {
636    if fragment_isotope_dist.is_empty() || comp_fragment_isotope_dist.is_empty() {
637        return Vec::new();
638    }
639
640    let mut r_max = fragment_isotope_dist.len();
641    if max_isotope != 0 && r_max > max_isotope {
642        r_max = max_isotope;
643    }
644
645    let mut result = (0..r_max)
646        .map(|i| (fragment_isotope_dist[0].0 + i as f64, 0.0))
647        .collect::<Vec<(f64, f64)>>();
648
649    // Calculation of dependent isotope distribution
650    for (i, &(_mz, intensity)) in fragment_isotope_dist.iter().enumerate().take(r_max) {
651        for &precursor in precursor_isotopes {
652            if precursor >= i && (precursor - i) < comp_fragment_isotope_dist.len() {
653                let comp_intensity = comp_fragment_isotope_dist[precursor - i].1;
654                result[i].1 += comp_intensity;
655            }
656        }
657        result[i].1 *= intensity;
658    }
659
660    result
661}
662
663/// Calculates the transmission-dependent fragment isotope distribution with explicit
664/// transmission factor tracking.
665///
666/// This function computes how the fragment ion isotope pattern changes when only
667/// certain precursor isotopes are transmitted through the quadrupole isolation window.
668/// It also calculates the transmission factor, which represents the fraction of
669/// total signal that is transmitted.
670///
671/// # Arguments
672///
673/// * `fragment_isotope_dist` - Isotope distribution of the fragment ion (m/z, intensity)
674/// * `comp_fragment_isotope_dist` - Isotope distribution of the complementary fragment
675/// * `precursor_isotopes` - Set of precursor isotope indices that were transmitted
676/// * `max_isotope` - Maximum number of isotope peaks to consider (0 for unlimited)
677///
678/// # Returns
679///
680/// A `TransmissionDependentIsotopeDistribution` containing:
681/// - The adjusted isotope distribution
682/// - The transmission factor (ratio of transmitted to full signal)
683///
684/// # Algorithm
685///
686/// Based on OpenMS CoarseIsotopePatternGenerator. For each fragment isotope index i:
687/// P(fragment=i | transmitted precursors) = frag[i] * Σ comp[p-i] for all transmitted p >= i
688///
689/// The transmission factor is calculated as:
690/// transmission_factor = sum(transmitted_distribution) / sum(full_distribution)
691pub fn calculate_transmission_dependent_distribution_with_factor(
692    fragment_isotope_dist: &Vec<(f64, f64)>,
693    comp_fragment_isotope_dist: &Vec<(f64, f64)>,
694    precursor_isotopes: &HashSet<usize>,
695    max_isotope: usize,
696) -> TransmissionDependentIsotopeDistribution {
697    if fragment_isotope_dist.is_empty() || comp_fragment_isotope_dist.is_empty() {
698        return TransmissionDependentIsotopeDistribution {
699            distribution: Vec::new(),
700            transmission_factor: 0.0,
701        };
702    }
703
704    // Calculate full distribution (all isotopes transmitted) for reference
705    let all_isotopes: HashSet<usize> = (0..fragment_isotope_dist.len().max(comp_fragment_isotope_dist.len())).collect();
706    let full_distribution = calculate_transmission_dependent_fragment_ion_isotope_distribution(
707        fragment_isotope_dist,
708        comp_fragment_isotope_dist,
709        &all_isotopes,
710        max_isotope,
711    );
712    let full_sum: f64 = full_distribution.iter().map(|(_, i)| i).sum();
713
714    // Calculate transmitted distribution
715    let transmitted_distribution = calculate_transmission_dependent_fragment_ion_isotope_distribution(
716        fragment_isotope_dist,
717        comp_fragment_isotope_dist,
718        precursor_isotopes,
719        max_isotope,
720    );
721    let transmitted_sum: f64 = transmitted_distribution.iter().map(|(_, i)| i).sum();
722
723    // Calculate transmission factor
724    let transmission_factor = if full_sum > 0.0 {
725        transmitted_sum / full_sum
726    } else {
727        0.0
728    };
729
730    TransmissionDependentIsotopeDistribution {
731        distribution: transmitted_distribution,
732        transmission_factor,
733    }
734}
735
736/// Calculate the transmission factor for a precursor based on which isotopes are transmitted.
737///
738/// This provides a simple way to scale fragment intensities based on precursor transmission
739/// without the computational cost of per-fragment isotope recalculation.
740///
741/// # Arguments
742///
743/// * `precursor_isotope_dist` - Isotope distribution of the precursor (m/z, intensity)
744/// * `transmitted_indices` - Set of precursor isotope indices that were transmitted
745///
746/// # Returns
747///
748/// Transmission factor (0.0 to 1.0) representing the fraction of precursor signal transmitted.
749pub fn calculate_precursor_transmission_factor(
750    precursor_isotope_dist: &[(f64, f64)],
751    transmitted_indices: &HashSet<usize>,
752) -> f64 {
753    if precursor_isotope_dist.is_empty() || transmitted_indices.is_empty() {
754        return 0.0;
755    }
756
757    let total_intensity: f64 = precursor_isotope_dist.iter().map(|(_, i)| i).sum();
758    if total_intensity <= 0.0 {
759        return 0.0;
760    }
761
762    let transmitted_intensity: f64 = precursor_isotope_dist
763        .iter()
764        .enumerate()
765        .filter(|(idx, _)| transmitted_indices.contains(idx))
766        .map(|(_, (_, i))| i)
767        .sum();
768
769    transmitted_intensity / total_intensity
770}
771
772#[cfg(test)]
773mod tests {
774    use super::*;
775
776    fn approx_eq(a: f64, b: f64, epsilon: f64) -> bool {
777        (a - b).abs() < epsilon
778    }
779
780    /// Test that transmission-dependent distribution produces correct results
781    /// when the same isotope set is used as the internal reference
782    #[test]
783    fn test_transmission_all_isotopes() {
784        // Simple fragment distribution: M0=0.6, M1=0.3, M2=0.1
785        let fragment_dist = vec![
786            (500.0, 0.6),
787            (501.0, 0.3),
788            (502.0, 0.1),
789        ];
790
791        // Complementary distribution: M0=0.7, M1=0.2, M2=0.1
792        let comp_dist = vec![
793            (800.0, 0.7),
794            (801.0, 0.2),
795            (802.0, 0.1),
796        ];
797
798        // Use the same isotope set that the function uses internally for "full" calculation
799        // This is 0..max(fragment_dist.len(), comp_dist.len()) = 0..3
800        let all_isotopes: HashSet<usize> = (0..3).collect();
801
802        let result = calculate_transmission_dependent_distribution_with_factor(
803            &fragment_dist,
804            &comp_dist,
805            &all_isotopes,
806            0,
807        );
808
809        // When same isotopes as internal reference are transmitted, transmission factor = 1.0
810        assert!(
811            approx_eq(result.transmission_factor, 1.0, 0.001),
812            "Transmission factor should be 1.0 when same isotopes as reference transmitted, got {}",
813            result.transmission_factor
814        );
815
816        // Distribution should not be empty
817        assert!(!result.distribution.is_empty());
818    }
819
820    /// Test that passing more isotopes than exist can increase transmission factor > 1.0
821    /// This is expected behavior: higher precursor isotopes can contribute to lower fragment isotopes
822    #[test]
823    fn test_transmission_extra_isotopes() {
824        let fragment_dist = vec![
825            (500.0, 0.6),
826            (501.0, 0.3),
827            (502.0, 0.1),
828        ];
829
830        let comp_dist = vec![
831            (800.0, 0.7),
832            (801.0, 0.2),
833            (802.0, 0.1),
834        ];
835
836        // Pass more isotopes than exist in distributions
837        let extra_isotopes: HashSet<usize> = [0, 1, 2, 3, 4, 5].iter().cloned().collect();
838
839        let result = calculate_transmission_dependent_distribution_with_factor(
840            &fragment_dist,
841            &comp_dist,
842            &extra_isotopes,
843            0,
844        );
845
846        // With extra isotopes, transmission factor can be > 1.0
847        // because higher precursor isotopes contribute to lower fragment isotopes
848        assert!(
849            result.transmission_factor >= 1.0,
850            "Extra isotopes should give transmission factor >= 1.0, got {}",
851            result.transmission_factor
852        );
853    }
854
855    /// Test that partial transmission reduces the transmission factor
856    #[test]
857    fn test_transmission_partial_isotopes() {
858        // Fragment distribution
859        let fragment_dist = vec![
860            (500.0, 0.5),
861            (501.0, 0.3),
862            (502.0, 0.15),
863            (503.0, 0.05),
864        ];
865
866        // Complementary distribution
867        let comp_dist = vec![
868            (800.0, 0.6),
869            (801.0, 0.25),
870            (802.0, 0.1),
871            (803.0, 0.05),
872        ];
873
874        // All isotopes
875        let all_isotopes: HashSet<usize> = [0, 1, 2, 3].iter().cloned().collect();
876        let full_result = calculate_transmission_dependent_distribution_with_factor(
877            &fragment_dist,
878            &comp_dist,
879            &all_isotopes,
880            0,
881        );
882
883        // Only M0 and M1 transmitted
884        let partial_isotopes: HashSet<usize> = [0, 1].iter().cloned().collect();
885        let partial_result = calculate_transmission_dependent_distribution_with_factor(
886            &fragment_dist,
887            &comp_dist,
888            &partial_isotopes,
889            0,
890        );
891
892        // Partial transmission should have lower transmission factor
893        assert!(
894            partial_result.transmission_factor < full_result.transmission_factor,
895            "Partial transmission ({}) should be less than full transmission ({})",
896            partial_result.transmission_factor,
897            full_result.transmission_factor
898        );
899
900        // Transmission factor should be between 0 and 1
901        assert!(
902            partial_result.transmission_factor > 0.0 && partial_result.transmission_factor < 1.0,
903            "Partial transmission factor should be between 0 and 1, got {}",
904            partial_result.transmission_factor
905        );
906    }
907
908    /// Test that only M0 transmitted gives lowest transmission factor
909    #[test]
910    fn test_transmission_m0_only() {
911        let fragment_dist = vec![
912            (500.0, 0.5),
913            (501.0, 0.3),
914            (502.0, 0.15),
915            (503.0, 0.05),
916        ];
917
918        let comp_dist = vec![
919            (800.0, 0.6),
920            (801.0, 0.25),
921            (802.0, 0.1),
922            (803.0, 0.05),
923        ];
924
925        // Only M0 transmitted
926        let m0_only: HashSet<usize> = [0].iter().cloned().collect();
927        let m0_result = calculate_transmission_dependent_distribution_with_factor(
928            &fragment_dist,
929            &comp_dist,
930            &m0_only,
931            0,
932        );
933
934        // M0 and M1 transmitted
935        let m0_m1: HashSet<usize> = [0, 1].iter().cloned().collect();
936        let m0_m1_result = calculate_transmission_dependent_distribution_with_factor(
937            &fragment_dist,
938            &comp_dist,
939            &m0_m1,
940            0,
941        );
942
943        // M0 only should have lower transmission than M0+M1
944        assert!(
945            m0_result.transmission_factor < m0_m1_result.transmission_factor,
946            "M0 only ({}) should transmit less than M0+M1 ({})",
947            m0_result.transmission_factor,
948            m0_m1_result.transmission_factor
949        );
950
951        // When only M0 transmitted, only M0 of fragment should have signal
952        // (higher isotopes need complementary isotopes that require higher precursor isotopes)
953        let m0_intensity = m0_result.distribution.get(0).map(|(_, i)| *i).unwrap_or(0.0);
954        let m1_intensity = m0_result.distribution.get(1).map(|(_, i)| *i).unwrap_or(0.0);
955
956        assert!(
957            m0_intensity > 0.0,
958            "M0 fragment should have intensity when M0 precursor transmitted"
959        );
960        assert!(
961            approx_eq(m1_intensity, 0.0, 1e-10),
962            "M1 fragment should have zero intensity when only M0 precursor transmitted, got {}",
963            m1_intensity
964        );
965    }
966
967    /// Test that empty inputs are handled gracefully
968    #[test]
969    fn test_transmission_empty_inputs() {
970        let empty: Vec<(f64, f64)> = vec![];
971        let non_empty = vec![(500.0, 0.5)];
972        let isotopes: HashSet<usize> = [0].iter().cloned().collect();
973
974        // Empty fragment distribution
975        let result1 = calculate_transmission_dependent_distribution_with_factor(
976            &empty,
977            &non_empty,
978            &isotopes,
979            0,
980        );
981        assert!(result1.distribution.is_empty());
982        assert!(approx_eq(result1.transmission_factor, 0.0, 1e-10));
983
984        // Empty complementary distribution
985        let result2 = calculate_transmission_dependent_distribution_with_factor(
986            &non_empty,
987            &empty,
988            &isotopes,
989            0,
990        );
991        assert!(result2.distribution.is_empty());
992        assert!(approx_eq(result2.transmission_factor, 0.0, 1e-10));
993    }
994
995    /// Test that the relative isotope pattern changes with partial transmission
996    #[test]
997    fn test_isotope_pattern_shift() {
998        // Use a distribution where we can verify the pattern shift
999        let fragment_dist = vec![
1000            (500.0, 0.6),
1001            (501.0, 0.3),
1002            (502.0, 0.1),
1003        ];
1004
1005        let comp_dist = vec![
1006            (800.0, 0.7),
1007            (801.0, 0.2),
1008            (802.0, 0.1),
1009        ];
1010
1011        // All isotopes - get reference pattern
1012        let all_isotopes: HashSet<usize> = [0, 1, 2].iter().cloned().collect();
1013        let full_result = calculate_transmission_dependent_distribution_with_factor(
1014            &fragment_dist,
1015            &comp_dist,
1016            &all_isotopes,
1017            0,
1018        );
1019
1020        // Only M0 transmitted
1021        let m0_only: HashSet<usize> = [0].iter().cloned().collect();
1022        let m0_result = calculate_transmission_dependent_distribution_with_factor(
1023            &fragment_dist,
1024            &comp_dist,
1025            &m0_only,
1026            0,
1027        );
1028
1029        // Calculate relative M0 contribution for both
1030        let full_sum: f64 = full_result.distribution.iter().map(|(_, i)| i).sum();
1031        let m0_sum: f64 = m0_result.distribution.iter().map(|(_, i)| i).sum();
1032
1033        let full_m0_fraction = if full_sum > 0.0 {
1034            full_result.distribution[0].1 / full_sum
1035        } else {
1036            0.0
1037        };
1038
1039        let m0_m0_fraction = if m0_sum > 0.0 {
1040            m0_result.distribution[0].1 / m0_sum
1041        } else {
1042            0.0
1043        };
1044
1045        // When only M0 precursor transmitted, M0 fragment should be relatively more dominant
1046        // (because higher fragment isotopes can't form without higher precursor isotopes)
1047        assert!(
1048            m0_m0_fraction >= full_m0_fraction,
1049            "M0 fraction with M0-only transmission ({}) should be >= full transmission ({})",
1050            m0_m0_fraction,
1051            full_m0_fraction
1052        );
1053    }
1054
1055    /// Test max_isotope parameter limits output
1056    #[test]
1057    fn test_max_isotope_limit() {
1058        let fragment_dist = vec![
1059            (500.0, 0.5),
1060            (501.0, 0.3),
1061            (502.0, 0.15),
1062            (503.0, 0.05),
1063        ];
1064
1065        let comp_dist = vec![
1066            (800.0, 0.6),
1067            (801.0, 0.25),
1068            (802.0, 0.1),
1069            (803.0, 0.05),
1070        ];
1071
1072        let all_isotopes: HashSet<usize> = [0, 1, 2, 3].iter().cloned().collect();
1073
1074        // With max_isotope = 2
1075        let result = calculate_transmission_dependent_distribution_with_factor(
1076            &fragment_dist,
1077            &comp_dist,
1078            &all_isotopes,
1079            2,
1080        );
1081
1082        assert_eq!(
1083            result.distribution.len(),
1084            2,
1085            "Distribution should be limited to 2 isotopes, got {}",
1086            result.distribution.len()
1087        );
1088    }
1089
1090    /// Integration test: Verify that simulated frame building produces correct intensity scaling
1091    /// This test mimics the behavior in rustdf's build_fragment_frame function
1092    #[test]
1093    fn test_frame_building_intensity_scaling() {
1094        // Simulate realistic isotope distributions for a small peptide fragment
1095        // Fragment: ~500 Da
1096        let fragment_dist = vec![
1097            (500.25, 0.65),
1098            (501.25, 0.25),
1099            (502.25, 0.08),
1100            (503.25, 0.02),
1101        ];
1102
1103        // Complementary fragment: ~800 Da
1104        let comp_dist = vec![
1105            (800.40, 0.55),
1106            (801.40, 0.28),
1107            (802.40, 0.12),
1108            (803.40, 0.05),
1109        ];
1110
1111        // Simulate frame building with different transmission scenarios
1112        let fraction_events: f64 = 1000.0; // Arbitrary intensity scaling factor
1113
1114        // Scenario 1: Full transmission (reference)
1115        let all_isotopes: HashSet<usize> = (0..4).collect();
1116        let full_dist = calculate_transmission_dependent_fragment_ion_isotope_distribution(
1117            &fragment_dist,
1118            &comp_dist,
1119            &all_isotopes,
1120            0,
1121        );
1122        let full_intensity_sum: f64 = full_dist.iter().map(|(_, i)| i * fraction_events).sum();
1123
1124        // Scenario 2: Only M0 transmitted (narrow quad window)
1125        let m0_only: HashSet<usize> = [0].iter().cloned().collect();
1126        let m0_dist = calculate_transmission_dependent_fragment_ion_isotope_distribution(
1127            &fragment_dist,
1128            &comp_dist,
1129            &m0_only,
1130            0,
1131        );
1132        let m0_intensity_sum: f64 = m0_dist.iter().map(|(_, i)| i * fraction_events).sum();
1133
1134        // Scenario 3: M0+M1 transmitted (typical quad window)
1135        let m0_m1: HashSet<usize> = [0, 1].iter().cloned().collect();
1136        let m0_m1_dist = calculate_transmission_dependent_fragment_ion_isotope_distribution(
1137            &fragment_dist,
1138            &comp_dist,
1139            &m0_m1,
1140            0,
1141        );
1142        let m0_m1_intensity_sum: f64 = m0_m1_dist.iter().map(|(_, i)| i * fraction_events).sum();
1143
1144        // Verify intensity ordering: full > M0+M1 > M0 only
1145        assert!(
1146            full_intensity_sum > m0_m1_intensity_sum,
1147            "Full transmission intensity ({}) should be > M0+M1 ({})",
1148            full_intensity_sum, m0_m1_intensity_sum
1149        );
1150        assert!(
1151            m0_m1_intensity_sum > m0_intensity_sum,
1152            "M0+M1 transmission intensity ({}) should be > M0 only ({})",
1153            m0_m1_intensity_sum, m0_intensity_sum
1154        );
1155
1156        // Verify intensity ratios make physical sense
1157        // M0+M1 should give roughly 60-90% of full intensity (depends on distributions)
1158        let m0_m1_ratio = m0_m1_intensity_sum / full_intensity_sum;
1159        assert!(
1160            m0_m1_ratio > 0.5 && m0_m1_ratio < 1.0,
1161            "M0+M1 ratio ({}) should be between 0.5 and 1.0",
1162            m0_m1_ratio
1163        );
1164
1165        // M0 only should give roughly 30-70% of full intensity
1166        let m0_ratio = m0_intensity_sum / full_intensity_sum;
1167        assert!(
1168            m0_ratio > 0.2 && m0_ratio < 0.8,
1169            "M0 ratio ({}) should be between 0.2 and 0.8",
1170            m0_ratio
1171        );
1172
1173        // Use the factor tracking function to verify explicit transmission factor
1174        let factor_result = calculate_transmission_dependent_distribution_with_factor(
1175            &fragment_dist,
1176            &comp_dist,
1177            &m0_m1,
1178            0,
1179        );
1180
1181        // The transmission factor should match our manual calculation
1182        let manual_factor = m0_m1_intensity_sum / full_intensity_sum;
1183        assert!(
1184            approx_eq(factor_result.transmission_factor, manual_factor, 0.001),
1185            "Transmission factor ({}) should match manual calculation ({})",
1186            factor_result.transmission_factor, manual_factor
1187        );
1188
1189        println!("Frame building intensity test results:");
1190        println!("  Full transmission intensity: {:.2}", full_intensity_sum);
1191        println!("  M0+M1 transmission intensity: {:.2} (ratio: {:.3})", m0_m1_intensity_sum, m0_m1_ratio);
1192        println!("  M0 only transmission intensity: {:.2} (ratio: {:.3})", m0_intensity_sum, m0_ratio);
1193        println!("  Transmission factor (M0+M1): {:.4}", factor_result.transmission_factor);
1194    }
1195
1196    /// Test behavior with realistic peptide masses using the factor tracking function
1197    #[test]
1198    fn test_realistic_peptide_transmission() {
1199        // Simulate a typical tryptic peptide (~1500 Da precursor)
1200        // B-ion fragment ~600 Da, Y-ion (complementary) ~900 Da
1201
1202        // B-ion isotope pattern (normalized)
1203        let b_ion_dist = vec![
1204            (600.30, 0.58),
1205            (601.30, 0.28),
1206            (602.30, 0.10),
1207            (603.30, 0.03),
1208            (604.30, 0.01),
1209        ];
1210
1211        // Complementary (Y-ion like) isotope pattern
1212        let comp_dist = vec![
1213            (900.45, 0.48),
1214            (901.45, 0.30),
1215            (902.45, 0.14),
1216            (903.45, 0.06),
1217            (904.45, 0.02),
1218        ];
1219
1220        // Test various quad isolation scenarios
1221
1222        // Wide window: transmits M0, M+1, M+2
1223        let wide_window: HashSet<usize> = [0, 1, 2].iter().cloned().collect();
1224        let wide_result = calculate_transmission_dependent_distribution_with_factor(
1225            &b_ion_dist,
1226            &comp_dist,
1227            &wide_window,
1228            0,
1229        );
1230
1231        // Narrow window: transmits only M0, M+1
1232        let narrow_window: HashSet<usize> = [0, 1].iter().cloned().collect();
1233        let narrow_result = calculate_transmission_dependent_distribution_with_factor(
1234            &b_ion_dist,
1235            &comp_dist,
1236            &narrow_window,
1237            0,
1238        );
1239
1240        // Very narrow: only M0
1241        let very_narrow: HashSet<usize> = [0].iter().cloned().collect();
1242        let very_narrow_result = calculate_transmission_dependent_distribution_with_factor(
1243            &b_ion_dist,
1244            &comp_dist,
1245            &very_narrow,
1246            0,
1247        );
1248
1249        // Verify decreasing transmission with narrower windows
1250        assert!(
1251            wide_result.transmission_factor > narrow_result.transmission_factor,
1252            "Wide window should have higher transmission"
1253        );
1254        assert!(
1255            narrow_result.transmission_factor > very_narrow_result.transmission_factor,
1256            "Narrow window should have higher transmission than very narrow"
1257        );
1258
1259        // Verify all factors are in valid range (0, 1]
1260        assert!(wide_result.transmission_factor > 0.0 && wide_result.transmission_factor <= 1.0);
1261        assert!(narrow_result.transmission_factor > 0.0 && narrow_result.transmission_factor <= 1.0);
1262        assert!(very_narrow_result.transmission_factor > 0.0 && very_narrow_result.transmission_factor <= 1.0);
1263
1264        println!("Realistic peptide transmission test results:");
1265        println!("  Wide window (M0-M2) transmission factor: {:.4}", wide_result.transmission_factor);
1266        println!("  Narrow window (M0-M1) transmission factor: {:.4}", narrow_result.transmission_factor);
1267        println!("  Very narrow (M0 only) transmission factor: {:.4}", very_narrow_result.transmission_factor);
1268    }
1269}