use std::{iter::Peekable, str::CharIndices};
use lang_util::{TextRange, TextSize};
use crate::util::LineMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[allow(clippy::upper_case_acronyms)]
#[repr(u16)]
pub enum NewlineTokenKind {
LETTER,
DIGIT,
PUNCT,
NEWLINE,
WS,
}
pub type NewlineToken = crate::util::TextToken<NewlineTokenKind>;
#[derive(Debug, Clone)]
pub struct NewlineSplitter<'i> {
end: TextSize,
chars: Peekable<CharIndices<'i>>,
line_map: LineMap,
}
impl<'i> NewlineSplitter<'i> {
pub fn new(input: &'i str) -> Self {
Self {
end: TextSize::of(input),
chars: input.char_indices().peekable(),
line_map: LineMap::new(),
}
}
pub fn line_map(&self) -> &LineMap {
&self.line_map
}
pub fn into_line_map(self) -> LineMap {
self.line_map
}
fn current_pos(&mut self, start_pos: usize) -> TextRange {
TextRange::new(
TextSize::from(start_pos as u32),
self.chars
.peek()
.map(|(pos, _)| TextSize::from(*pos as u32))
.unwrap_or(self.end),
)
}
}
impl<'i> Iterator for NewlineSplitter<'i> {
type Item = NewlineToken;
fn next(&mut self) -> Option<Self::Item> {
use NewlineTokenKind::*;
let c = self.chars.next();
match c {
Some((pos, ch)) if ch == '\r' || ch == '\n' => {
let range = if let Some((next_pos, next_ch)) = self.chars.peek() {
let end = if (*next_ch == '\r' || *next_ch == '\n') && *next_ch != ch {
self.chars.next();
self.chars
.peek()
.map(|(pos, _)| TextSize::from(*pos as u32))
.unwrap_or(self.end)
} else {
TextSize::from(*next_pos as u32)
};
TextRange::new(TextSize::from(pos as u32), end)
} else {
TextRange::new(TextSize::from(pos as u32), self.end)
};
self.line_map.add_line(range.end().into());
Some(NewlineToken::new(NEWLINE, range))
}
Some((pos, ch)) if ch.is_ascii_alphabetic() => {
Some(NewlineToken::new(LETTER, self.current_pos(pos)))
}
Some((pos, ch)) if ch.is_ascii_digit() => {
Some(NewlineToken::new(DIGIT, self.current_pos(pos)))
}
Some((pos, ch)) if ch.is_ascii_whitespace() => {
Some(NewlineToken::new(WS, self.current_pos(pos)))
}
Some((pos, _)) => Some(NewlineToken::new(PUNCT, self.current_pos(pos))),
None => None,
}
}
}