1#[derive(Debug)]
2pub struct Modification {
3 pub unimod_id: usize, }
5
6#[derive(Debug)]
7pub struct AminoAcidResidue {
8 pub aa: char, pub modification: Option<Modification>, }
11
12#[derive(Debug)]
13pub struct PeptideRepresentation {
14 pub n_term_mod: Option<Modification>, pub n_term_residue: AminoAcidResidue, pub core_residues: Vec<AminoAcidResidue>, pub c_term_residue: AminoAcidResidue, pub c_term_mod: Option<Modification>, }
20
21impl PeptideRepresentation {
22 pub fn new(
23 n_term_mod: Option<Modification>,
24 n_term_residue: AminoAcidResidue,
25 core_residues: Vec<AminoAcidResidue>,
26 c_term_residue: AminoAcidResidue,
27 c_term_mod: Option<Modification>,
28 ) -> Self {
29 Self {
30 n_term_mod,
31 n_term_residue,
32 core_residues,
33 c_term_residue,
34 c_term_mod,
35 }
36 }
37
38 pub fn to_string(&self) -> String {
39 let mut result = String::new();
40
41 if let Some(modification) = &self.n_term_mod {
42 result.push_str(&format!("[UNIMOD:{}]-", modification.unimod_id));
43 }
44
45 result.push(self.n_term_residue.aa);
46
47 for residue in &self.core_residues {
48 if let Some(modification) = &residue.modification {
49 result.push_str(&format!("[UNIMOD:{}]", modification.unimod_id));
50 }
51 result.push(residue.aa);
52 }
53
54 result.push(self.c_term_residue.aa);
55
56 if let Some(modification) = &self.c_term_mod {
57 result.push_str(&format!("-[UNIMOD:{}]", modification.unimod_id));
58 }
59
60 result
61 }
62
63 pub fn from_string(input: &str) -> Result<Self, String> {
64 parse_peptide(input)
65 }
66}
67
68
69pub fn parse_peptide(input: &str) -> Result<PeptideRepresentation, String> {
70 let mut remaining = input.trim();
71
72 let mut n_term_mod = None;
73 let mut c_term_mod = None;
74
75 if remaining.starts_with('[') {
77 if let Some(end) = remaining.find("]-") {
78 let mod_str = &remaining[1..end]; if !mod_str.is_empty() {
80 n_term_mod = Some(parse_modification(mod_str)?);
81 }
82 remaining = &remaining[end+2..]; } else {
84 return Err("Invalid N-terminal modification syntax".to_string());
85 }
86 }
87
88 if let Some(pos) = remaining.rfind("-[") {
90 if remaining.ends_with(']') {
91 let mod_str = &remaining[pos+2..remaining.len()-1]; if !mod_str.is_empty() {
93 c_term_mod = Some(parse_modification(mod_str)?);
94 }
95 remaining = &remaining[..pos]; } else {
97 return Err("Invalid C-terminal modification syntax".to_string());
98 }
99 }
100
101 let mut chars = remaining.chars().peekable();
103 let mut residues = Vec::new();
104
105 while let Some(c) = chars.next() {
106 if c == '[' {
107 return Err("Unexpected '[' before amino acid".to_string());
108 }
109
110 if !c.is_ascii_alphabetic() {
111 return Err(format!("Unexpected character '{}'", c));
112 }
113
114 let mut modification = None;
115
116 if let Some('[') = chars.peek() {
117 chars.next(); let mut mod_buf = String::new();
120 while let Some(&ch) = chars.peek() {
121 if ch == ']' {
122 chars.next(); break;
124 }
125 mod_buf.push(ch);
126 chars.next();
127 }
128
129 if !mod_buf.is_empty() {
130 modification = Some(parse_modification(&mod_buf)?);
131 }
132 }
133
134 residues.push(AminoAcidResidue { aa: c, modification });
135 }
136
137 if residues.len() < 2 {
138 return Err("Peptide must have at least two residues.".to_string());
139 }
140
141 let n_term_residue = residues.remove(0);
142 let c_term_residue = residues.pop().unwrap();
143 let core_residues = residues;
144
145 Ok(PeptideRepresentation {
146 n_term_mod,
147 n_term_residue,
148 core_residues,
149 c_term_residue,
150 c_term_mod,
151 })
152}
153
154fn parse_modification(mod_str: &str) -> Result<Modification, String> {
155 if let Some(rest) = mod_str.strip_prefix("UNIMOD:") {
156 rest.parse::<usize>()
157 .map(|id| Modification { unimod_id: id })
158 .map_err(|_| format!("Invalid UNIMOD ID: {}", mod_str))
159 } else if let Some(rest) = mod_str.strip_prefix("U:") {
160 rest.parse::<usize>()
161 .map(|id| Modification { unimod_id: id })
162 .map_err(|_| format!("Invalid U: ID: {}", mod_str))
163 } else {
164 Err(format!("Unsupported modification format: {}", mod_str))
165 }
166}