1use std::cmp::Ordering;
4use std::collections::{HashMap, HashSet};
5use std::error::Error;
6use std::fmt;
7
8use text_size::{TextRange, TextSize};
9
10use crate::{located::Located, position::LexerPosition, token::Token, FileId};
11
12#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
14#[cfg_attr(feature = "serde", derive(rserde::Serialize))]
15#[cfg_attr(feature = "serde", serde(crate = "rserde"))]
16pub struct TokenDescription {
17 pub formatted: String,
19
20 pub variant_name: &'static str,
22
23 pub parser_token: &'static str,
25
26 pub kinds: &'static [&'static str],
28}
29
30impl<'t, T: Token> From<&'t T> for TokenDescription {
31 fn from(token: &'t T) -> Self {
32 Self {
33 formatted: token.to_string(),
34 variant_name: token.variant_name(),
35 parser_token: token.parser_token(),
36 kinds: token.kinds(),
37 }
38 }
39}
40
41impl fmt::Display for TokenDescription {
42 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
43 write!(f, "{}", self.formatted)
44 }
45}
46
47pub trait TokenExt {
49 fn description(&self) -> TokenDescription;
51}
52
53impl<T: Token> TokenExt for T {
54 fn description(&self) -> TokenDescription {
55 TokenDescription::from(self)
56 }
57}
58
59pub trait LexicalError: Error {
61 fn location(&self) -> (LexerPosition, TextSize);
68}
69
70pub type ParseError<E> = Located<ParseErrorKind<E>>;
72
73pub fn error_location<T, E: LexicalError>(
75 error: &lalrpop_util::ParseError<LexerPosition, T, E>,
76) -> (FileId, TextRange) {
77 let (location, len) = match error {
78 lalrpop_util::ParseError::InvalidToken { location } => (*location, TextSize::default()),
80 lalrpop_util::ParseError::UnrecognizedEof { location, .. } => {
81 (*location, TextSize::default())
82 }
83 lalrpop_util::ParseError::UnrecognizedToken { token, .. } => {
84 (token.0, token.2.offset - token.0.offset)
85 }
86 lalrpop_util::ParseError::ExtraToken { token } => {
87 (token.0, token.2.offset - token.0.offset)
88 }
89 lalrpop_util::ParseError::User { error } => error.location(),
90 };
91
92 (
93 location.source_id,
94 TextRange::new(location.offset, location.offset + len),
95 )
96}
97
98#[derive(Debug, Clone, PartialEq, Eq)]
101pub enum ParseErrorKind<E: LexicalError> {
102 InvalidToken,
104 UnrecognizedEof {
106 expected: Vec<String>,
108 },
109 UnrecognizedToken {
111 token: TokenDescription,
113 expected: Vec<String>,
115 },
116 ExtraToken {
118 token: TokenDescription,
120 },
121 LexicalError {
123 error: E,
125 },
126}
127
128impl<E: std::error::Error + LexicalError + 'static> std::error::Error for ParseErrorKind<E> {}
129
130impl<T: Token, E: LexicalError> From<lalrpop_util::ParseError<LexerPosition, T, E>>
131 for ParseErrorKind<E>
132{
133 fn from(error: lalrpop_util::ParseError<LexerPosition, T, E>) -> Self {
134 let simplify = || {
136 let mut token_descriptors = HashMap::new();
138 let mut token_kinds: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();
139 for descriptor in T::all_tokens() {
140 token_descriptors.insert(descriptor.parser_token, descriptor);
142
143 for kind in descriptor.kinds {
145 if let Some(existing) = token_kinds.get_mut(kind) {
146 existing.insert(descriptor.parser_token);
147 } else {
148 token_kinds
149 .insert(kind, std::iter::once(descriptor.parser_token).collect());
150 }
151 }
152 }
153
154 move |expected: Vec<String>| -> Vec<String> {
155 let expected: HashSet<_> = expected.iter().map(String::as_str).collect();
156 let mut seen_tokens = HashSet::new();
157 let mut result = Vec::new();
158
159 for (kind, members) in &token_kinds {
160 if members.is_subset(&expected) {
161 seen_tokens.extend(members);
163 result.push(*kind);
165 }
166 }
167
168 let mut delete = HashSet::new();
170 for expected_set_name in &result {
171 for other_set_name in &result {
172 if expected_set_name != other_set_name
173 && token_kinds
174 .get(*expected_set_name)
175 .unwrap()
176 .is_subset(token_kinds.get(*other_set_name).unwrap())
177 {
178 delete.insert(expected_set_name);
179 }
180 }
181 }
182
183 let mut result: Vec<_> = result
185 .iter()
186 .filter(|item| !delete.contains(item))
187 .collect();
188
189 for leftover in expected.difference(&seen_tokens) {
191 result.push(leftover);
192 }
193
194 result.sort_unstable_by(|a, b| {
196 let a_spaces = a.contains(' ');
198 let b_spaces = b.contains(' ');
199 if a_spaces && b_spaces {
200 a.cmp(b)
201 } else if a_spaces {
202 Ordering::Less
203 } else if b_spaces {
204 Ordering::Greater
205 } else {
206 a.len().cmp(&b.len()).reverse().then_with(|| a.cmp(b))
207 }
208 });
209
210 result.into_iter().map(|it| (**it).to_string()).collect()
211 }
212 };
213
214 match error {
216 lalrpop_util::ParseError::InvalidToken { .. } => ParseErrorKind::InvalidToken,
217 lalrpop_util::ParseError::UnrecognizedEof { expected, .. } => {
218 ParseErrorKind::UnrecognizedEof {
219 expected: simplify()(expected),
220 }
221 }
222 lalrpop_util::ParseError::UnrecognizedToken { token, expected } => {
223 ParseErrorKind::UnrecognizedToken {
224 token: token.1.description(),
225 expected: simplify()(expected),
226 }
227 }
228 lalrpop_util::ParseError::ExtraToken { token } => ParseErrorKind::ExtraToken {
229 token: token.1.description(),
230 },
231 lalrpop_util::ParseError::User { error } => ParseErrorKind::LexicalError { error },
232 }
233 }
234}
235
236struct ListDisplay<'s>(&'s [String]);
237struct KindDisplay<'s>(&'s str);
238
239impl<'s> fmt::Display for KindDisplay<'s> {
240 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
241 if self
242 .0
243 .chars()
244 .next()
245 .map(char::is_alphabetic)
246 .unwrap_or(false)
247 {
248 write!(f, "{}", self.0)
249 } else {
250 write!(f, "`{}`", self.0)
251 }
252 }
253}
254
255impl<'s> fmt::Display for ListDisplay<'s> {
256 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
257 if self.0.is_empty() {
258 write!(f, "nothing")
259 } else {
260 let first = self.0.first().unwrap();
261 match first.chars().next() {
262 Some('a') | Some('e') | Some('i') | Some('u') | Some('o') | Some('y') => {
263 write!(f, "an ")?
264 }
265 _ => write!(f, "a ")?,
266 }
267
268 write!(f, "{}", KindDisplay(first))?;
269
270 let len = self.0.len();
271 if len >= 2 {
272 for rest in self.0.iter().skip(1).take(len - 2) {
273 write!(f, ", {}", KindDisplay(rest))?;
274 }
275 }
276
277 if len > 1 {
278 write!(f, " or {}", KindDisplay(self.0.last().unwrap()))?;
279 }
280
281 Ok(())
282 }
283 }
284}
285
286impl<E: LexicalError> fmt::Display for ParseErrorKind<E> {
287 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288 match self {
289 ParseErrorKind::InvalidToken => write!(f, "invalid token"),
290 ParseErrorKind::UnrecognizedEof { expected } => {
291 write!(
292 f,
293 "unexpected end of input, expected {}",
294 ListDisplay(expected)
295 )
296 }
297 ParseErrorKind::UnrecognizedToken { token, expected } => {
298 write!(
299 f,
300 "unexpected {}, expected {}",
301 token,
302 ListDisplay(expected)
303 )
304 }
305 ParseErrorKind::ExtraToken { token } => {
306 write!(f, "extra {} at end of input", token)
307 }
308 ParseErrorKind::LexicalError { error } => write!(f, "{}", error),
309 }
310 }
311}