lang_util/
error.rs

1//! Error type definitions
2
3use std::cmp::Ordering;
4use std::collections::{HashMap, HashSet};
5use std::error::Error;
6use std::fmt;
7
8use text_size::{TextRange, TextSize};
9
10use crate::{located::Located, position::LexerPosition, token::Token, FileId};
11
12/// Information about a lexed token
13#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
14#[cfg_attr(feature = "serde", derive(rserde::Serialize))]
15#[cfg_attr(feature = "serde", serde(crate = "rserde"))]
16pub struct TokenDescription {
17    /// String representation of the lexed token
18    pub formatted: String,
19
20    /// Variant name
21    pub variant_name: &'static str,
22
23    /// Parser token name
24    pub parser_token: &'static str,
25
26    /// List of kinds this token belongs to
27    pub kinds: &'static [&'static str],
28}
29
30impl<'t, T: Token> From<&'t T> for TokenDescription {
31    fn from(token: &'t T) -> Self {
32        Self {
33            formatted: token.to_string(),
34            variant_name: token.variant_name(),
35            parser_token: token.parser_token(),
36            kinds: token.kinds(),
37        }
38    }
39}
40
41impl fmt::Display for TokenDescription {
42    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43        write!(f, "{}", self.formatted)
44    }
45}
46
47/// Methods provided for all [Token] implementations
48pub trait TokenExt {
49    /// Return a descriptor for the current token
50    fn description(&self) -> TokenDescription;
51}
52
53impl<T: Token> TokenExt for T {
54    fn description(&self) -> TokenDescription {
55        TokenDescription::from(self)
56    }
57}
58
59/// An error produced by lexical analysis
60pub trait LexicalError: Error {
61    /// Return the location at which this error occurred
62    ///
63    /// # Returns
64    ///
65    /// [LexerPosition] structure that indicates at which offset in the input the error occurred,
66    /// and length of the range.
67    fn location(&self) -> (LexerPosition, TextSize);
68}
69
70/// A parsing error wrapped from lalrpop_util's error type
71pub type ParseError<E> = Located<ParseErrorKind<E>>;
72
73/// Return the LexerLocation of a lalrpop_util::ParseError
74pub fn error_location<T, E: LexicalError>(
75    error: &lalrpop_util::ParseError<LexerPosition, T, E>,
76) -> (FileId, TextRange) {
77    let (location, len) = match error {
78        // TODO: Find out invalid token length
79        lalrpop_util::ParseError::InvalidToken { location } => (*location, TextSize::default()),
80        lalrpop_util::ParseError::UnrecognizedEof { location, .. } => {
81            (*location, TextSize::default())
82        }
83        lalrpop_util::ParseError::UnrecognizedToken { token, .. } => {
84            (token.0, token.2.offset - token.0.offset)
85        }
86        lalrpop_util::ParseError::ExtraToken { token } => {
87            (token.0, token.2.offset - token.0.offset)
88        }
89        lalrpop_util::ParseError::User { error } => error.location(),
90    };
91
92    (
93        location.source_id,
94        TextRange::new(location.offset, location.offset + len),
95    )
96}
97
98// We represent tokens as formatted string since we only want to display them
99/// Parsing error kind
100#[derive(Debug, Clone, PartialEq, Eq)]
101pub enum ParseErrorKind<E: LexicalError> {
102    /// An invalid token was encountered during lexical analysis
103    InvalidToken,
104    /// Unexpected end of file
105    UnrecognizedEof {
106        /// List of expected token names
107        expected: Vec<String>,
108    },
109    /// Unexpected token
110    UnrecognizedToken {
111        /// The unexpected token
112        token: TokenDescription,
113        /// List of expected token names
114        expected: Vec<String>,
115    },
116    /// Extra token after input
117    ExtraToken {
118        /// The extra token
119        token: TokenDescription,
120    },
121    /// Lexical analysis error
122    LexicalError {
123        /// Lexical error
124        error: E,
125    },
126}
127
128impl<E: std::error::Error + LexicalError + 'static> std::error::Error for ParseErrorKind<E> {}
129
130impl<T: Token, E: LexicalError> From<lalrpop_util::ParseError<LexerPosition, T, E>>
131    for ParseErrorKind<E>
132{
133    fn from(error: lalrpop_util::ParseError<LexerPosition, T, E>) -> Self {
134        // Simplification function
135        let simplify = || {
136            // Lookup structure for token names and kinds
137            let mut token_descriptors = HashMap::new();
138            let mut token_kinds: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();
139            for descriptor in T::all_tokens() {
140                // Add the token descriptor
141                token_descriptors.insert(descriptor.parser_token, descriptor);
142
143                // Add the token descriptor to the various kinds
144                for kind in descriptor.kinds {
145                    if let Some(existing) = token_kinds.get_mut(kind) {
146                        existing.insert(descriptor.parser_token);
147                    } else {
148                        token_kinds
149                            .insert(kind, std::iter::once(descriptor.parser_token).collect());
150                    }
151                }
152            }
153
154            move |expected: Vec<String>| -> Vec<String> {
155                let expected: HashSet<_> = expected.iter().map(String::as_str).collect();
156                let mut seen_tokens = HashSet::new();
157                let mut result = Vec::new();
158
159                for (kind, members) in &token_kinds {
160                    if members.is_subset(&expected) {
161                        // Add all the tokens of this kind as seen
162                        seen_tokens.extend(members);
163                        // Add the kind of token to the expected list
164                        result.push(*kind);
165                    }
166                }
167
168                // Some expected groups might be subsets of others, try to reduce this
169                let mut delete = HashSet::new();
170                for expected_set_name in &result {
171                    for other_set_name in &result {
172                        if expected_set_name != other_set_name
173                            && token_kinds
174                                .get(*expected_set_name)
175                                .unwrap()
176                                .is_subset(token_kinds.get(*other_set_name).unwrap())
177                        {
178                            delete.insert(expected_set_name);
179                        }
180                    }
181                }
182
183                // Remove extra subsets
184                let mut result: Vec<_> = result
185                    .iter()
186                    .filter(|item| !delete.contains(item))
187                    .collect();
188
189                // Leftover tokens should still be expected
190                for leftover in expected.difference(&seen_tokens) {
191                    result.push(leftover);
192                }
193
194                // Sort the result for deterministic results
195                result.sort_unstable_by(|a, b| {
196                    // TODO: Standalone token kinds should be last
197                    let a_spaces = a.contains(' ');
198                    let b_spaces = b.contains(' ');
199                    if a_spaces && b_spaces {
200                        a.cmp(b)
201                    } else if a_spaces {
202                        Ordering::Less
203                    } else if b_spaces {
204                        Ordering::Greater
205                    } else {
206                        a.len().cmp(&b.len()).reverse().then_with(|| a.cmp(b))
207                    }
208                });
209
210                result.into_iter().map(|it| (**it).to_string()).collect()
211            }
212        };
213
214        // Map the error kind
215        match error {
216            lalrpop_util::ParseError::InvalidToken { .. } => ParseErrorKind::InvalidToken,
217            lalrpop_util::ParseError::UnrecognizedEof { expected, .. } => {
218                ParseErrorKind::UnrecognizedEof {
219                    expected: simplify()(expected),
220                }
221            }
222            lalrpop_util::ParseError::UnrecognizedToken { token, expected } => {
223                ParseErrorKind::UnrecognizedToken {
224                    token: token.1.description(),
225                    expected: simplify()(expected),
226                }
227            }
228            lalrpop_util::ParseError::ExtraToken { token } => ParseErrorKind::ExtraToken {
229                token: token.1.description(),
230            },
231            lalrpop_util::ParseError::User { error } => ParseErrorKind::LexicalError { error },
232        }
233    }
234}
235
236struct ListDisplay<'s>(&'s [String]);
237struct KindDisplay<'s>(&'s str);
238
239impl<'s> fmt::Display for KindDisplay<'s> {
240    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
241        if self
242            .0
243            .chars()
244            .next()
245            .map(char::is_alphabetic)
246            .unwrap_or(false)
247        {
248            write!(f, "{}", self.0)
249        } else {
250            write!(f, "`{}`", self.0)
251        }
252    }
253}
254
255impl<'s> fmt::Display for ListDisplay<'s> {
256    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
257        if self.0.is_empty() {
258            write!(f, "nothing")
259        } else {
260            let first = self.0.first().unwrap();
261            match first.chars().next() {
262                Some('a') | Some('e') | Some('i') | Some('u') | Some('o') | Some('y') => {
263                    write!(f, "an ")?
264                }
265                _ => write!(f, "a ")?,
266            }
267
268            write!(f, "{}", KindDisplay(first))?;
269
270            let len = self.0.len();
271            if len >= 2 {
272                for rest in self.0.iter().skip(1).take(len - 2) {
273                    write!(f, ", {}", KindDisplay(rest))?;
274                }
275            }
276
277            if len > 1 {
278                write!(f, " or {}", KindDisplay(self.0.last().unwrap()))?;
279            }
280
281            Ok(())
282        }
283    }
284}
285
286impl<E: LexicalError> fmt::Display for ParseErrorKind<E> {
287    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288        match self {
289            ParseErrorKind::InvalidToken => write!(f, "invalid token"),
290            ParseErrorKind::UnrecognizedEof { expected } => {
291                write!(
292                    f,
293                    "unexpected end of input, expected {}",
294                    ListDisplay(expected)
295                )
296            }
297            ParseErrorKind::UnrecognizedToken { token, expected } => {
298                write!(
299                    f,
300                    "unexpected {}, expected {}",
301                    token,
302                    ListDisplay(expected)
303                )
304            }
305            ParseErrorKind::ExtraToken { token } => {
306                write!(f, "extra {} at end of input", token)
307            }
308            ParseErrorKind::LexicalError { error } => write!(f, "{}", error),
309        }
310    }
311}