glsl_lang_pp/lexer/
newline.rs

1//! First stage lexer declaration
2
3use std::{iter::Peekable, str::CharIndices};
4
5use lang_util::{TextRange, TextSize};
6
7use crate::util::LineMap;
8
9/// Type of token for line splitting
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11#[allow(clippy::upper_case_acronyms)]
12#[repr(u16)]
13pub enum NewlineTokenKind {
14    LETTER,
15    DIGIT,
16    PUNCT,
17    NEWLINE,
18    WS,
19}
20
21/// First stage token with location
22pub type NewlineToken = crate::util::TextToken<NewlineTokenKind>;
23
24/// Basic lexer to split input lines according to the GLSL spec
25///
26/// This only detects \r\n sequences and classifies other characters following the types declared
27/// in [NewlineTokenKind](NewlineTokenKind).
28#[derive(Debug, Clone)]
29pub struct NewlineSplitter<'i> {
30    end: TextSize,
31    chars: Peekable<CharIndices<'i>>,
32    line_map: LineMap,
33}
34
35impl<'i> NewlineSplitter<'i> {
36    pub fn new(input: &'i str) -> Self {
37        Self {
38            end: TextSize::of(input),
39            chars: input.char_indices().peekable(),
40            line_map: LineMap::new(),
41        }
42    }
43
44    pub fn line_map(&self) -> &LineMap {
45        &self.line_map
46    }
47
48    pub fn into_line_map(self) -> LineMap {
49        self.line_map
50    }
51
52    fn current_pos(&mut self, start_pos: usize) -> TextRange {
53        TextRange::new(
54            TextSize::from(start_pos as u32),
55            self.chars
56                .peek()
57                .map(|(pos, _)| TextSize::from(*pos as u32))
58                .unwrap_or(self.end),
59        )
60    }
61}
62
63impl<'i> Iterator for NewlineSplitter<'i> {
64    type Item = NewlineToken;
65
66    fn next(&mut self) -> Option<Self::Item> {
67        use NewlineTokenKind::*;
68
69        let c = self.chars.next();
70
71        // GLSL spec: Lines are relevant for compiler diagnostic messages and the
72        // preprocessor.  They are terminated by carriage-return or line-feed. If both
73        // are used together, it will count as only a single line termination.
74
75        match c {
76            Some((pos, ch)) if ch == '\r' || ch == '\n' => {
77                // Advance to next char if it's also part of the newline
78                let range = if let Some((next_pos, next_ch)) = self.chars.peek() {
79                    // End boundary of the newline token
80                    let end = if (*next_ch == '\r' || *next_ch == '\n') && *next_ch != ch {
81                        self.chars.next();
82
83                        // Peek to get the next char boundary
84                        self.chars
85                            .peek()
86                            .map(|(pos, _)| TextSize::from(*pos as u32))
87                            .unwrap_or(self.end)
88                    } else {
89                        TextSize::from(*next_pos as u32)
90                    };
91
92                    TextRange::new(TextSize::from(pos as u32), end)
93                } else {
94                    // No more characters
95                    TextRange::new(TextSize::from(pos as u32), self.end)
96                };
97
98                self.line_map.add_line(range.end().into());
99                Some(NewlineToken::new(NEWLINE, range))
100            }
101            Some((pos, ch)) if ch.is_ascii_alphabetic() => {
102                Some(NewlineToken::new(LETTER, self.current_pos(pos)))
103            }
104            Some((pos, ch)) if ch.is_ascii_digit() => {
105                Some(NewlineToken::new(DIGIT, self.current_pos(pos)))
106            }
107            Some((pos, ch)) if ch.is_ascii_whitespace() => {
108                // \n and \r have been already matched
109                Some(NewlineToken::new(WS, self.current_pos(pos)))
110            }
111            Some((pos, _)) => Some(NewlineToken::new(PUNCT, self.current_pos(pos))),
112            None => None,
113        }
114    }
115}