glsl_lang_pp/lexer/
newline.rs1use std::{iter::Peekable, str::CharIndices};
4
5use lang_util::{TextRange, TextSize};
6
7use crate::util::LineMap;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11#[allow(clippy::upper_case_acronyms)]
12#[repr(u16)]
13pub enum NewlineTokenKind {
14 LETTER,
15 DIGIT,
16 PUNCT,
17 NEWLINE,
18 WS,
19}
20
21pub type NewlineToken = crate::util::TextToken<NewlineTokenKind>;
23
24#[derive(Debug, Clone)]
29pub struct NewlineSplitter<'i> {
30 end: TextSize,
31 chars: Peekable<CharIndices<'i>>,
32 line_map: LineMap,
33}
34
35impl<'i> NewlineSplitter<'i> {
36 pub fn new(input: &'i str) -> Self {
37 Self {
38 end: TextSize::of(input),
39 chars: input.char_indices().peekable(),
40 line_map: LineMap::new(),
41 }
42 }
43
44 pub fn line_map(&self) -> &LineMap {
45 &self.line_map
46 }
47
48 pub fn into_line_map(self) -> LineMap {
49 self.line_map
50 }
51
52 fn current_pos(&mut self, start_pos: usize) -> TextRange {
53 TextRange::new(
54 TextSize::from(start_pos as u32),
55 self.chars
56 .peek()
57 .map(|(pos, _)| TextSize::from(*pos as u32))
58 .unwrap_or(self.end),
59 )
60 }
61}
62
63impl<'i> Iterator for NewlineSplitter<'i> {
64 type Item = NewlineToken;
65
66 fn next(&mut self) -> Option<Self::Item> {
67 use NewlineTokenKind::*;
68
69 let c = self.chars.next();
70
71 match c {
76 Some((pos, ch)) if ch == '\r' || ch == '\n' => {
77 let range = if let Some((next_pos, next_ch)) = self.chars.peek() {
79 let end = if (*next_ch == '\r' || *next_ch == '\n') && *next_ch != ch {
81 self.chars.next();
82
83 self.chars
85 .peek()
86 .map(|(pos, _)| TextSize::from(*pos as u32))
87 .unwrap_or(self.end)
88 } else {
89 TextSize::from(*next_pos as u32)
90 };
91
92 TextRange::new(TextSize::from(pos as u32), end)
93 } else {
94 TextRange::new(TextSize::from(pos as u32), self.end)
96 };
97
98 self.line_map.add_line(range.end().into());
99 Some(NewlineToken::new(NEWLINE, range))
100 }
101 Some((pos, ch)) if ch.is_ascii_alphabetic() => {
102 Some(NewlineToken::new(LETTER, self.current_pos(pos)))
103 }
104 Some((pos, ch)) if ch.is_ascii_digit() => {
105 Some(NewlineToken::new(DIGIT, self.current_pos(pos)))
106 }
107 Some((pos, ch)) if ch.is_ascii_whitespace() => {
108 Some(NewlineToken::new(WS, self.current_pos(pos)))
110 }
111 Some((pos, _)) => Some(NewlineToken::new(PUNCT, self.current_pos(pos))),
112 None => None,
113 }
114 }
115}