use std::cmp::Ordering;
use std::collections::{HashMap, HashSet};
use std::error::Error;
use std::fmt;
use text_size::{TextRange, TextSize};
use crate::{located::Located, position::LexerPosition, token::Token, FileId};
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(rserde::Serialize))]
#[cfg_attr(feature = "serde", serde(crate = "rserde"))]
pub struct TokenDescription {
pub formatted: String,
pub variant_name: &'static str,
pub parser_token: &'static str,
pub kinds: &'static [&'static str],
}
impl<'t, T: Token> From<&'t T> for TokenDescription {
fn from(token: &'t T) -> Self {
Self {
formatted: token.to_string(),
variant_name: token.variant_name(),
parser_token: token.parser_token(),
kinds: token.kinds(),
}
}
}
impl fmt::Display for TokenDescription {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.formatted)
}
}
pub trait TokenExt {
fn description(&self) -> TokenDescription;
}
impl<T: Token> TokenExt for T {
fn description(&self) -> TokenDescription {
TokenDescription::from(self)
}
}
pub trait LexicalError: Error {
fn location(&self) -> (LexerPosition, TextSize);
}
pub type ParseError<E> = Located<ParseErrorKind<E>>;
pub fn error_location<T, E: LexicalError>(
error: &lalrpop_util::ParseError<LexerPosition, T, E>,
) -> (FileId, TextRange) {
let (location, len) = match error {
lalrpop_util::ParseError::InvalidToken { location } => (*location, TextSize::default()),
lalrpop_util::ParseError::UnrecognizedEof { location, .. } => {
(*location, TextSize::default())
}
lalrpop_util::ParseError::UnrecognizedToken { token, .. } => {
(token.0, token.2.offset - token.0.offset)
}
lalrpop_util::ParseError::ExtraToken { token } => {
(token.0, token.2.offset - token.0.offset)
}
lalrpop_util::ParseError::User { error } => error.location(),
};
(
location.source_id,
TextRange::new(location.offset, location.offset + len),
)
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ParseErrorKind<E: LexicalError> {
InvalidToken,
UnrecognizedEof {
expected: Vec<String>,
},
UnrecognizedToken {
token: TokenDescription,
expected: Vec<String>,
},
ExtraToken {
token: TokenDescription,
},
LexicalError {
error: E,
},
}
impl<E: std::error::Error + LexicalError + 'static> std::error::Error for ParseErrorKind<E> {}
impl<T: Token, E: LexicalError> From<lalrpop_util::ParseError<LexerPosition, T, E>>
for ParseErrorKind<E>
{
fn from(error: lalrpop_util::ParseError<LexerPosition, T, E>) -> Self {
let simplify = || {
let mut token_descriptors = HashMap::new();
let mut token_kinds: HashMap<&'static str, HashSet<&'static str>> = HashMap::new();
for descriptor in T::all_tokens() {
token_descriptors.insert(descriptor.parser_token, descriptor);
for kind in descriptor.kinds {
if let Some(existing) = token_kinds.get_mut(kind) {
existing.insert(descriptor.parser_token);
} else {
token_kinds
.insert(kind, std::iter::once(descriptor.parser_token).collect());
}
}
}
move |expected: Vec<String>| -> Vec<String> {
let expected: HashSet<_> = expected.iter().map(String::as_str).collect();
let mut seen_tokens = HashSet::new();
let mut result = Vec::new();
for (kind, members) in &token_kinds {
if members.is_subset(&expected) {
seen_tokens.extend(members);
result.push(*kind);
}
}
let mut delete = HashSet::new();
for expected_set_name in &result {
for other_set_name in &result {
if expected_set_name != other_set_name
&& token_kinds
.get(*expected_set_name)
.unwrap()
.is_subset(token_kinds.get(*other_set_name).unwrap())
{
delete.insert(expected_set_name);
}
}
}
let mut result: Vec<_> = result
.iter()
.filter(|item| !delete.contains(item))
.collect();
for leftover in expected.difference(&seen_tokens) {
result.push(leftover);
}
result.sort_unstable_by(|a, b| {
let a_spaces = a.contains(' ');
let b_spaces = b.contains(' ');
if a_spaces && b_spaces {
a.cmp(b)
} else if a_spaces {
Ordering::Less
} else if b_spaces {
Ordering::Greater
} else {
a.len().cmp(&b.len()).reverse().then_with(|| a.cmp(b))
}
});
result.into_iter().map(|it| (**it).to_string()).collect()
}
};
match error {
lalrpop_util::ParseError::InvalidToken { .. } => ParseErrorKind::InvalidToken,
lalrpop_util::ParseError::UnrecognizedEof { expected, .. } => {
ParseErrorKind::UnrecognizedEof {
expected: simplify()(expected),
}
}
lalrpop_util::ParseError::UnrecognizedToken { token, expected } => {
ParseErrorKind::UnrecognizedToken {
token: token.1.description(),
expected: simplify()(expected),
}
}
lalrpop_util::ParseError::ExtraToken { token } => ParseErrorKind::ExtraToken {
token: token.1.description(),
},
lalrpop_util::ParseError::User { error } => ParseErrorKind::LexicalError { error },
}
}
}
struct ListDisplay<'s>(&'s [String]);
struct KindDisplay<'s>(&'s str);
impl<'s> fmt::Display for KindDisplay<'s> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self
.0
.chars()
.next()
.map(char::is_alphabetic)
.unwrap_or(false)
{
write!(f, "{}", self.0)
} else {
write!(f, "`{}`", self.0)
}
}
}
impl<'s> fmt::Display for ListDisplay<'s> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.0.is_empty() {
write!(f, "nothing")
} else {
let first = self.0.first().unwrap();
match first.chars().next() {
Some('a') | Some('e') | Some('i') | Some('u') | Some('o') | Some('y') => {
write!(f, "an ")?
}
_ => write!(f, "a ")?,
}
write!(f, "{}", KindDisplay(first))?;
let len = self.0.len();
if len >= 2 {
for rest in self.0.iter().skip(1).take(len - 2) {
write!(f, ", {}", KindDisplay(rest))?;
}
}
if len > 1 {
write!(f, " or {}", KindDisplay(self.0.last().unwrap()))?;
}
Ok(())
}
}
}
impl<E: LexicalError> fmt::Display for ParseErrorKind<E> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ParseErrorKind::InvalidToken => write!(f, "invalid token"),
ParseErrorKind::UnrecognizedEof { expected } => {
write!(
f,
"unexpected end of input, expected {}",
ListDisplay(expected)
)
}
ParseErrorKind::UnrecognizedToken { token, expected } => {
write!(
f,
"unexpected {}, expected {}",
token,
ListDisplay(expected)
)
}
ParseErrorKind::ExtraToken { token } => {
write!(f, "extra {} at end of input", token)
}
ParseErrorKind::LexicalError { error } => write!(f, "{}", error),
}
}
}