glsl_lang_pp/util/
unescaped.rs

1use std::{
2    borrow::Cow,
3    fmt::Write,
4    iter::Peekable,
5    str::{CharIndices, MatchIndices},
6};
7
8use arrayvec::ArrayVec;
9
10use lang_util::SmolStr;
11
12#[derive(Debug, Clone, Copy)]
13pub struct Unescaped<'s> {
14    src: &'s str,
15}
16
17impl<'s> Unescaped<'s> {
18    pub fn new(src: &'s str) -> Self {
19        Self { src }
20    }
21
22    fn backslashes(&self) -> MatchIndices<'s, char> {
23        self.src.match_indices('\\')
24    }
25
26    pub fn chars(&self) -> UnescapeIter<'s> {
27        UnescapeIter {
28            chars: self.src.char_indices(),
29            backslashes: self.backslashes().peekable(),
30        }
31    }
32
33    pub fn to_string(self) -> Cow<'s, str> {
34        if self.backslashes().next().is_none() {
35            Cow::Borrowed(self.src)
36        } else {
37            Cow::Owned(self.chars().collect::<String>())
38        }
39    }
40}
41
42impl<'s> From<&'s str> for Unescaped<'s> {
43    fn from(value: &'s str) -> Self {
44        Self::new(value)
45    }
46}
47
48impl<'s> From<Unescaped<'s>> for SmolStr {
49    fn from(src: Unescaped<'s>) -> Self {
50        src.chars().collect()
51    }
52}
53
54impl<'s> PartialEq<&str> for Unescaped<'s> {
55    fn eq(&self, other: &&str) -> bool {
56        self.chars().eq(other.chars())
57    }
58}
59
60impl<'s> std::fmt::Display for Unescaped<'s> {
61    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62        for ch in self.chars() {
63            f.write_char(ch)?;
64        }
65
66        Ok(())
67    }
68}
69
70pub struct UnescapeIter<'s> {
71    chars: CharIndices<'s>,
72    backslashes: Peekable<MatchIndices<'s, char>>,
73}
74
75impl<'s> Iterator for UnescapeIter<'s> {
76    type Item = char;
77
78    fn next(&mut self) -> Option<Self::Item> {
79        loop {
80            if let Some((cont, _)) = self.backslashes.peek() {
81                // There is a continuation coming
82
83                // Create a peekable chars iterator
84                let mut chars_copy = self.chars.clone().peekable();
85
86                if let Some((i, _)) = chars_copy.peek() {
87                    if *i == *cont {
88                        // Consume this backslash match
89                        self.backslashes.next();
90
91                        // We are at the start of a potential continuation
92                        // Skip 1 char (the backslash character)
93                        // Collect 2 chars (worst case for a Windows CRLF)
94                        let chars: ArrayVec<_, 2> =
95                            chars_copy.map(|(_, ch)| ch).skip(1).take(2).collect();
96
97                        // Consume the backslash char
98                        self.chars.next();
99
100                        if chars.starts_with(&['\r', '\n']) || chars.starts_with(&['\n', '\r']) {
101                            // CRLF, advance thrice, loop again
102                            self.chars.next(); // \r
103                            self.chars.next(); // \n
104                        } else if chars.starts_with(&['\n']) || chars.starts_with(&['\r']) {
105                            // LF, advance twice, loop again
106                            self.chars.next(); // \n
107                        } else {
108                            // Stray backslash, just return as-is
109                            return Some('\\');
110                        }
111                    } else {
112                        // We haven't reached the continuation yet
113                        return self.chars.next().map(|(_, ch)| ch);
114                    }
115                } else {
116                    // Nothing left
117                    return None;
118                }
119            } else {
120                // No continuation, i.e. happy path
121                return self.chars.next().map(|(_, ch)| ch);
122            }
123        }
124    }
125}
126
127#[derive(Debug, Clone, PartialEq, Eq)]
128pub struct TokenText<'s>(TokenTextRepr<'s>);
129
130#[derive(Debug, Clone, PartialEq, Eq)]
131enum TokenTextRepr<'s> {
132    Raw(&'s str),
133    Unescaped(&'s str),
134    JustUnescaped(Cow<'s, str>),
135}
136
137impl<'s> TokenText<'s> {
138    pub fn push_str(&mut self, rest: TokenText<'_>) {
139        let mut self_text = self.to_owned_string();
140        self_text.push_str(rest.to_string().as_ref());
141        self.0 = TokenTextRepr::JustUnescaped(self_text.into());
142    }
143
144    pub fn raw(s: &'s str) -> Self {
145        Self(TokenTextRepr::Raw(s))
146    }
147
148    pub fn to_owned(&self) -> TokenText<'static> {
149        TokenText(TokenTextRepr::JustUnescaped(Cow::Owned(
150            self.to_owned_string(),
151        )))
152    }
153
154    fn to_owned_string(&self) -> String {
155        match &self.0 {
156            TokenTextRepr::Raw(s) => Unescaped::from(*s).chars().collect(),
157            TokenTextRepr::Unescaped(s) => s.to_owned().into(),
158            TokenTextRepr::JustUnescaped(s) => (**s).to_owned(),
159        }
160    }
161
162    pub fn to_string(&self) -> Cow<'s, str> {
163        match &self.0 {
164            TokenTextRepr::Raw(s) => Cow::Owned(Unescaped::from(*s).chars().collect()),
165            TokenTextRepr::Unescaped(s) => (*s).into(),
166            TokenTextRepr::JustUnescaped(s) => s.clone(),
167        }
168    }
169
170    pub fn into_unescaped(self) -> Self {
171        Self(match self.0 {
172            TokenTextRepr::Raw(s) => {
173                TokenTextRepr::JustUnescaped(Cow::Owned(Unescaped::from(s).chars().collect()))
174            }
175            TokenTextRepr::Unescaped(s) => TokenTextRepr::JustUnescaped(s.into()),
176            TokenTextRepr::JustUnescaped(s) => TokenTextRepr::JustUnescaped(s),
177        })
178    }
179
180    pub fn try_as_str(&'s self) -> Option<&'s str> {
181        match &self.0 {
182            TokenTextRepr::Raw(_) => None,
183            TokenTextRepr::Unescaped(s) => Some(*s),
184            TokenTextRepr::JustUnescaped(s) => Some((*s).as_ref()),
185        }
186    }
187
188    pub unsafe fn unescaped(s: &'s str) -> Self {
189        Self(TokenTextRepr::Unescaped(s))
190    }
191}
192
193impl<'s> std::fmt::Display for TokenText<'s> {
194    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
195        match &self.0 {
196            TokenTextRepr::Raw(s) => write!(f, "{}", Unescaped::from(*s)),
197            TokenTextRepr::Unescaped(s) => write!(f, "{}", s),
198            TokenTextRepr::JustUnescaped(s) => write!(f, "{}", s),
199        }
200    }
201}
202
203impl<'s> From<TokenText<'s>> for SmolStr {
204    fn from(value: TokenText<'s>) -> Self {
205        match value.0 {
206            TokenTextRepr::Raw(raw) => Unescaped::from(raw).into(),
207            TokenTextRepr::Unescaped(unescaped) => unescaped.into(),
208            TokenTextRepr::JustUnescaped(unescaped) => unescaped.into(),
209        }
210    }
211}