glsl_lang_lexer/min/
str.rs

1//! Memory based glsl-lang-pp preprocessing lexer
2
3use std::collections::VecDeque;
4
5use lang_util::{
6    position::{LexerPosition, NodeSpan},
7    NodeContent, TextRange,
8};
9
10use glsl_lang_pp::{lexer::TextToken, types::type_names::TypeNameState};
11use glsl_lang_types::ast;
12
13use crate::{HasLexerError, LangLexer, LangLexerIterator, ParseContext, ParseOptions, Token};
14
15use super::LexicalError;
16
17/// glsl-lang-pp memory lexer
18pub struct Lexer<'i> {
19    inner: glsl_lang_pp::lexer::Lexer<'i>,
20    opts: ParseOptions,
21}
22
23impl<'i> Lexer<'i> {
24    pub(crate) fn new(source: &'i str, opts: &ParseOptions) -> Self {
25        Self {
26            inner: glsl_lang_pp::lexer::Lexer::new(source),
27            opts: *opts,
28        }
29    }
30
31    fn with_context(self, ctx: ParseContext) -> LexerIterator<'i> {
32        LexerIterator {
33            inner: self.inner,
34            ctx,
35            opts: self.opts,
36            pending_tokens: Default::default(),
37            flags: PpFlags::None,
38        }
39    }
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43enum PpFlags {
44    None,
45    Rest,
46    Include,
47    Version,
48    Extension,
49    DefineName,
50    DefineStart,
51    DefineArgs,
52}
53
54/// glsl-lang-pp memory lexer iterator
55pub struct LexerIterator<'i> {
56    inner: glsl_lang_pp::lexer::Lexer<'i>,
57    ctx: ParseContext,
58    opts: ParseOptions,
59    pending_tokens: VecDeque<TextToken>,
60    flags: PpFlags,
61}
62
63impl<'i> LexerIterator<'i> {
64    fn consume_pp_rest(
65        &mut self,
66        source_token: TextToken,
67        pos: NodeSpan,
68        text: impl AsRef<str>,
69    ) -> (NodeSpan, String) {
70        // Collect all tokens until the end of the string
71        let (mut rest, mut start_trivia) = if source_token.is_trivia() {
72            (String::new(), true)
73        } else {
74            (text.as_ref().to_string(), false)
75        };
76
77        let mut last = pos.range();
78        let mut trivia_buffer = Vec::new();
79
80        // allow: we need to release the borrow on self.inner to call input
81        #[allow(clippy::while_let_on_iterator)]
82        while let Some(token) = self.inner.next() {
83            if token.token == glsl_lang_pp::lexer::Token::NEWLINE {
84                self.pending_tokens.push_back(token);
85                break;
86            } else {
87                last = token.range;
88
89                if !(start_trivia && token.is_trivia()) {
90                    start_trivia = false;
91
92                    if token.is_trivia() {
93                        trivia_buffer.push(token);
94                    } else {
95                        for token in trivia_buffer.drain(..).chain(std::iter::once(token)) {
96                            rest.push_str(token.text(self.inner.input()).to_string().as_ref());
97                        }
98                    }
99                }
100            }
101        }
102
103        // Reset flags
104        self.flags = PpFlags::None;
105
106        // Emit token
107        let range = NodeSpan::new(
108            self.opts.source_id,
109            TextRange::new(pos.start().offset, last.end()),
110        );
111
112        (range, rest)
113    }
114}
115
116impl<'i> Iterator for LexerIterator<'i> {
117    type Item = Result<(LexerPosition, Token, LexerPosition), LexicalError>;
118
119    fn next(&mut self) -> Option<Self::Item> {
120        'outer: loop {
121            let mut buffered = false;
122            let source_token = self
123                .pending_tokens
124                .pop_front()
125                .inspect(|_| {
126                    buffered = true;
127                })
128                .or_else(|| self.inner.next())?;
129
130            let pos = NodeSpan::new(self.opts.source_id, source_token.range);
131            let text = source_token.text(self.inner.input());
132
133            match self.flags {
134                PpFlags::None => {
135                    // No flags, parse as usual
136                }
137
138                PpFlags::Rest => {
139                    // Merge all tokens into a single PpRest token
140                    // This can only happen after a preprocessor directive, so the tokens will not
141                    // be buffered.
142                    debug_assert!(self.pending_tokens.is_empty());
143
144                    let (range, rest) = self.consume_pp_rest(source_token, pos, text.to_string());
145                    return Some(Ok((range.start(), Token::PpRest(rest), range.end())));
146                }
147
148                PpFlags::Include => {
149                    // Expect an include path
150                    debug_assert!(self.pending_tokens.is_empty());
151
152                    let (range, rest) = self.consume_pp_rest(source_token, pos, text.to_string());
153                    return Some(Ok((
154                        range.start(),
155                        if rest.starts_with('"') {
156                            Token::PpPathRelative(
157                                rest.strip_prefix('"')
158                                    .and_then(|rest| rest.strip_suffix('"'))
159                                    .map(str::to_owned)
160                                    .unwrap_or(rest),
161                            )
162                        } else if rest.starts_with('<') {
163                            Token::PpPathAbsolute(
164                                rest.strip_prefix('<')
165                                    .and_then(|rest| rest.strip_suffix('>'))
166                                    .map(str::to_owned)
167                                    .unwrap_or(rest),
168                            )
169                        } else {
170                            Token::Error
171                        },
172                        range.end(),
173                    )));
174                }
175
176                PpFlags::Version | PpFlags::Extension => {
177                    // Handled later
178
179                    // On newline, return to normal parsing
180                    if source_token.token == glsl_lang_pp::lexer::Token::NEWLINE {
181                        self.flags = PpFlags::None;
182                    }
183                }
184
185                PpFlags::DefineName => {
186                    // Start of a preprocessor define
187                    match source_token.token {
188                        glsl_lang_pp::lexer::Token::IDENT_KW => {
189                            // We've seen the name, advance
190                            self.flags = PpFlags::DefineStart;
191                        }
192                        glsl_lang_pp::lexer::Token::NEWLINE => {
193                            // Done with preprocessing directive
194                            self.flags = PpFlags::None;
195                        }
196                        _ => {
197                            // Other tokens, just advance
198                        }
199                    }
200                }
201
202                PpFlags::DefineStart => {
203                    match source_token.token {
204                        glsl_lang_pp::lexer::Token::LPAREN => {
205                            // Arguments
206                            self.flags = PpFlags::DefineArgs;
207                        }
208                        glsl_lang_pp::lexer::Token::NEWLINE => {
209                            // Done with preprocessing directive
210                            self.flags = PpFlags::None;
211                        }
212                        _ => {
213                            // Anything else, this is the body of an object define
214                            self.pending_tokens.push_back(source_token);
215                            self.flags = PpFlags::Rest;
216                            continue 'outer;
217                        }
218                    }
219                }
220
221                PpFlags::DefineArgs => {
222                    // Inside preprocessor arguments
223                    match source_token.token {
224                        glsl_lang_pp::lexer::Token::RPAREN => {
225                            // Done with arguments
226                            self.flags = PpFlags::Rest;
227                        }
228                        glsl_lang_pp::lexer::Token::NEWLINE => {
229                            // Done with preprocessing directive
230                            self.flags = PpFlags::None;
231                        }
232                        _ => {
233                            // Other token, just advance
234                        }
235                    }
236                }
237            }
238
239            let (token, _type_name_state) = glsl_lang_pp::types::Token::from_token(
240                source_token,
241                self.inner.input(),
242                self.opts.default_version,
243                self.opts.target_vulkan,
244                |name| {
245                    if self.ctx.is_type_name(name.as_ref()) {
246                        TypeNameState::Type
247                    } else {
248                        TypeNameState::Ident
249                    }
250                },
251            );
252
253            let text = text.into_unescaped();
254            let text = text.try_as_str().unwrap();
255
256            match crate::lang_token::lang_token(&self.ctx, text, pos, token) {
257                Ok(token) => {
258                    // Try to get the next token when we encounter trivia
259                    match token.1 {
260                        Token::Whitespace => {}
261                        Token::SingleLineComment | Token::MultiLineComment => {
262                            if self.ctx.has_comments() {
263                                let mut text = text.split_at(2).1.to_string();
264
265                                let comment = match token.1 {
266                                    Token::SingleLineComment => ast::CommentData::Single(text),
267                                    Token::MultiLineComment => {
268                                        text.pop();
269                                        text.pop();
270                                        ast::CommentData::Multi(text)
271                                    }
272                                    _ => unreachable!(),
273                                }
274                                .spanned(token.0, token.2);
275
276                                self.ctx.add_comment(comment);
277                            }
278                        }
279                        Token::Identifier(ref ident) | Token::TypeName(ref ident)
280                            if self.flags == PpFlags::Version =>
281                        {
282                            return Some(Ok(match ident.as_str() {
283                                "core" => (token.0, Token::PpCore, token.2),
284                                "compatibility" => (token.0, Token::PpCompatibility, token.2),
285                                "es" => (token.0, Token::PpEs, token.2),
286                                _ => token,
287                            }))
288                        }
289                        Token::Identifier(ref ident) | Token::TypeName(ref ident)
290                            if self.flags == PpFlags::Extension =>
291                        {
292                            return Some(Ok(match ident.as_str() {
293                                "require" => (token.0, Token::PpExtRequire, token.2),
294                                "enable" => (token.0, Token::PpExtEnable, token.2),
295                                "warn" => (token.0, Token::PpExtWarn, token.2),
296                                "disable" => (token.0, Token::PpExtDisable, token.2),
297                                _ => token,
298                            }))
299                        }
300                        _ => {
301                            if token.1 == Token::LeftBrace {
302                                self.ctx.push_scope();
303                            } else if token.1 == Token::RightBrace {
304                                self.ctx.pop_scope();
305                            }
306
307                            return Some(Ok(token));
308                        }
309                    }
310                }
311
312                Err((token, kind)) => match kind {
313                    glsl_lang_pp::types::token::ErrorKind::InvalidToken
314                        if !buffered && token == glsl_lang_pp::types::Token::HASH =>
315                    {
316                        self.pending_tokens.push_back(source_token);
317
318                        for token in &mut self.inner {
319                            self.pending_tokens.push_back(token);
320
321                            match token.token {
322                                glsl_lang_pp::lexer::Token::IDENT_KW => {
323                                    // IDENT_KW, this is the name of the preprocessing directive
324                                    break;
325                                }
326                                glsl_lang_pp::lexer::Token::WS
327                                | glsl_lang_pp::lexer::Token::COMMENT
328                                | glsl_lang_pp::lexer::Token::LINECONT => {
329                                    // Whitespace, continue
330                                }
331                                _ => {
332                                    // Unexpected token, stop scanning and return this sequence as
333                                    // an error followed by the raw tokens
334                                    continue 'outer;
335                                }
336                            }
337
338                            if token.token == glsl_lang_pp::lexer::Token::IDENT_KW {
339                                break;
340                            }
341                        }
342
343                        // unwrap: there is at least one token, the #
344                        let first_token = self.pending_tokens.front().unwrap();
345                        let last_token = self.pending_tokens.back().unwrap();
346
347                        let text = last_token.text(self.inner.input()).to_string();
348                        let result = match text.as_ref() {
349                            "define" => {
350                                self.flags = PpFlags::DefineName;
351                                Token::PpDefine
352                            }
353                            "elif" => {
354                                self.flags = PpFlags::Rest;
355                                Token::PpElif
356                            }
357                            "else" => Token::PpElse,
358                            "endif" => Token::PpEndIf,
359                            "error" => {
360                                self.flags = PpFlags::Rest;
361                                Token::PpError
362                            }
363                            "extension" => {
364                                self.flags = PpFlags::Extension;
365                                Token::PpExtension
366                            }
367                            "if" => {
368                                self.flags = PpFlags::Rest;
369                                Token::PpIf
370                            }
371                            "ifdef" => Token::PpIfDef,
372                            "ifndef" => Token::PpIfNDef,
373                            "include" => {
374                                self.flags = PpFlags::Include;
375                                Token::PpInclude
376                            }
377                            "line" => Token::PpLine,
378                            "pragma" => {
379                                self.flags = PpFlags::Rest;
380                                Token::PpPragma
381                            }
382                            "undef" => Token::PpUndef,
383                            "version" => {
384                                self.flags = PpFlags::Version;
385                                Token::PpVersion
386                            }
387                            _ => {
388                                // Invalid preprocessing directive, just process tokens normally
389                                continue 'outer;
390                            }
391                        };
392
393                        let pos = NodeSpan::new(
394                            self.opts.source_id,
395                            TextRange::new(first_token.range.start(), last_token.range.end()),
396                        );
397
398                        // Drop the buffered tokens
399                        self.pending_tokens.clear();
400
401                        return Some(Ok((pos.start(), result, pos.end())));
402                    }
403                    _ => {
404                        return Some(Err(LexicalError::Token { kind, pos }));
405                    }
406                },
407            }
408        }
409    }
410}
411
412impl HasLexerError for Lexer<'_> {
413    type Error = LexicalError;
414}
415
416impl<'i> LangLexer<'i> for Lexer<'i> {
417    type Input = &'i str;
418    type Iter = LexerIterator<'i>;
419
420    fn new(source: Self::Input, opts: &ParseOptions) -> Self {
421        Self::new(source, opts)
422    }
423
424    fn run(self, ctx: ParseContext) -> Self::Iter {
425        self.with_context(ctx)
426    }
427}
428
429impl HasLexerError for LexerIterator<'_> {
430    type Error = LexicalError;
431}
432
433impl<'i> LangLexerIterator for LexerIterator<'i> {
434    fn resolve_err(
435        &self,
436        err: lalrpop_util::ParseError<LexerPosition, Token, Self::Error>,
437    ) -> lang_util::error::ParseError<Self::Error> {
438        let location = self.inner.line_map();
439        let (_file_id, lexer) = lang_util::error::error_location(&err);
440
441        lang_util::error::ParseError::<Self::Error>::builder()
442            .pos(lexer)
443            .current_file(self.opts.source_id)
444            .resolve(location)
445            .finish(err.into())
446    }
447}