mscore/data/
smiles.rs

1#[derive(Debug)]
2pub struct Modification {
3    pub unimod_id: usize, // e.g., 21 for UNIMOD:21
4}
5
6#[derive(Debug)]
7pub struct AminoAcidResidue {
8    pub aa: char,                      // 'A', 'C', 'D', etc.
9    pub modification: Option<Modification>, // None or Some(Modification)
10}
11
12#[derive(Debug)]
13pub struct PeptideRepresentation {
14    pub n_term_mod: Option<Modification>, // Optional N-terminal mod
15    pub n_term_residue: AminoAcidResidue,           // First amino acid
16    pub core_residues: Vec<AminoAcidResidue>,       // Middle residues
17    pub c_term_residue: AminoAcidResidue,           // Last amino acid
18    pub c_term_mod: Option<Modification>,  // Optional C-terminal mod
19}
20
21impl PeptideRepresentation {
22    pub fn new(
23        n_term_mod: Option<Modification>,
24        n_term_residue: AminoAcidResidue,
25        core_residues: Vec<AminoAcidResidue>,
26        c_term_residue: AminoAcidResidue,
27        c_term_mod: Option<Modification>,
28    ) -> Self {
29        Self {
30            n_term_mod,
31            n_term_residue,
32            core_residues,
33            c_term_residue,
34            c_term_mod,
35        }
36    }
37    
38    pub fn to_string(&self) -> String {
39        let mut result = String::new();
40
41        if let Some(modification) = &self.n_term_mod {
42            result.push_str(&format!("[UNIMOD:{}]-", modification.unimod_id));
43        }
44
45        result.push(self.n_term_residue.aa);
46
47        for residue in &self.core_residues {
48            if let Some(modification) = &residue.modification {
49                result.push_str(&format!("[UNIMOD:{}]", modification.unimod_id));
50            }
51            result.push(residue.aa);
52        }
53
54        result.push(self.c_term_residue.aa);
55
56        if let Some(modification) = &self.c_term_mod {
57            result.push_str(&format!("-[UNIMOD:{}]", modification.unimod_id));
58        }
59
60        result
61    }
62    
63    pub fn from_string(input: &str) -> Result<Self, String> {
64        parse_peptide(input)
65    }
66}
67
68
69pub fn parse_peptide(input: &str) -> Result<PeptideRepresentation, String> {
70    let mut remaining = input.trim();
71
72    let mut n_term_mod = None;
73    let mut c_term_mod = None;
74
75    // Handle N-terminal modification
76    if remaining.starts_with('[') {
77        if let Some(end) = remaining.find("]-") {
78            let mod_str = &remaining[1..end]; // inside [ ]
79            if !mod_str.is_empty() {
80                n_term_mod = Some(parse_modification(mod_str)?);
81            }
82            remaining = &remaining[end+2..]; // Skip "]‑"
83        } else {
84            return Err("Invalid N-terminal modification syntax".to_string());
85        }
86    }
87
88    // Handle C-terminal modification
89    if let Some(pos) = remaining.rfind("-[") {
90        if remaining.ends_with(']') {
91            let mod_str = &remaining[pos+2..remaining.len()-1]; // inside [ ]
92            if !mod_str.is_empty() {
93                c_term_mod = Some(parse_modification(mod_str)?);
94            }
95            remaining = &remaining[..pos]; // cut away "-[mod]"
96        } else {
97            return Err("Invalid C-terminal modification syntax".to_string());
98        }
99    }
100
101    // Now parse sequence and per-residue mods
102    let mut chars = remaining.chars().peekable();
103    let mut residues = Vec::new();
104
105    while let Some(c) = chars.next() {
106        if c == '[' {
107            return Err("Unexpected '[' before amino acid".to_string());
108        }
109
110        if !c.is_ascii_alphabetic() {
111            return Err(format!("Unexpected character '{}'", c));
112        }
113
114        let mut modification = None;
115
116        if let Some('[') = chars.peek() {
117            chars.next(); // consume '['
118
119            let mut mod_buf = String::new();
120            while let Some(&ch) = chars.peek() {
121                if ch == ']' {
122                    chars.next(); // consume ']'
123                    break;
124                }
125                mod_buf.push(ch);
126                chars.next();
127            }
128
129            if !mod_buf.is_empty() {
130                modification = Some(parse_modification(&mod_buf)?);
131            }
132        }
133
134        residues.push(AminoAcidResidue { aa: c, modification });
135    }
136
137    if residues.len() < 2 {
138        return Err("Peptide must have at least two residues.".to_string());
139    }
140
141    let n_term_residue = residues.remove(0);
142    let c_term_residue = residues.pop().unwrap();
143    let core_residues = residues;
144
145    Ok(PeptideRepresentation {
146        n_term_mod,
147        n_term_residue,
148        core_residues,
149        c_term_residue,
150        c_term_mod,
151    })
152}
153
154fn parse_modification(mod_str: &str) -> Result<Modification, String> {
155    if let Some(rest) = mod_str.strip_prefix("UNIMOD:") {
156        rest.parse::<usize>()
157            .map(|id| Modification { unimod_id: id })
158            .map_err(|_| format!("Invalid UNIMOD ID: {}", mod_str))
159    } else if let Some(rest) = mod_str.strip_prefix("U:") {
160        rest.parse::<usize>()
161            .map(|id| Modification { unimod_id: id })
162            .map_err(|_| format!("Invalid U: ID: {}", mod_str))
163    } else {
164        Err(format!("Unsupported modification format: {}", mod_str))
165    }
166}