glsl_lang_pp/processor/
definition.rs

1use std::{
2    collections::{HashMap, HashSet, VecDeque},
3    rc::Rc,
4};
5
6use itertools::Itertools;
7use rowan::NodeOrToken;
8
9use lang_util::{position::NodeSpan, FileId, SmolStr, TextRange};
10
11use crate::{
12    parser::{
13        SyntaxKind::{self, *},
14        SyntaxNode, SyntaxToken,
15    },
16    util::TokenText,
17};
18
19use super::{
20    event::{Event, OutputToken, ProcessingError, ProcessingErrorKind, TokenLike},
21    expand::ExpandLocation,
22    nodes::{Define, DefineFunction, DefineKind, DefineObject},
23    ProcessorState,
24};
25
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub enum Definition {
28    Regular(Rc<Define>, FileId),
29    Line,
30    File,
31    Version,
32}
33
34impl Definition {
35    pub fn file_id(&self) -> FileId {
36        match self {
37            Definition::Regular(_, file_id) => *file_id,
38            _ => FileId::builtin(0),
39        }
40    }
41
42    pub fn name(&self) -> &str {
43        match self {
44            Definition::Regular(d, _) => d.name(),
45            Definition::Line => "__LINE__",
46            Definition::File => "__FILE__",
47            Definition::Version => "__VERSION__",
48        }
49    }
50
51    pub fn protected(&self) -> bool {
52        match self {
53            Definition::Regular(d, _) => d.protected(),
54            Definition::Line => true,
55            Definition::File => true,
56            Definition::Version => true,
57        }
58    }
59
60    pub fn object_like(&self) -> bool {
61        match self {
62            Definition::Regular(d, _) => matches!(d.kind(), DefineKind::Object(_)),
63            Definition::Line => true,
64            Definition::File => true,
65            Definition::Version => true,
66        }
67    }
68
69    pub fn arg_count(&self) -> usize {
70        match self {
71            Definition::Regular(d, _) => match d.kind() {
72                DefineKind::Object(_) => 0,
73                DefineKind::Function(f) => f.arg_names().len(),
74            },
75            _ => 0,
76        }
77    }
78
79    fn substitute_string(
80        src: &str,
81        kind: SyntaxKind,
82        range: NodeSpan,
83    ) -> impl IntoIterator<Item = OutputToken> {
84        // SAFETY: Assume the passed string is already newline-escaped, since it's generated by the
85        // preprocessor
86        Some(OutputToken::new(
87            kind,
88            unsafe { TokenText::unescaped(src) },
89            range,
90        ))
91    }
92
93    fn concat_node_to_tokens<T: TokenLike>(
94        definition_file_id: FileId,
95        node: SyntaxNode,
96        args: Option<&HashMap<&str, &[T]>>,
97        entire_range: NodeSpan,
98    ) -> impl IntoIterator<Item = OutputToken> {
99        // Find the first non-trivial token
100        #[derive(Debug, Default)]
101        enum State {
102            #[default]
103            Init,
104            Lhs {
105                kind: SyntaxKind,
106                text: TokenText<'static>,
107                pos: NodeSpan,
108                trivia_buffer: VecDeque<(SyntaxKind, TokenText<'static>, NodeSpan)>,
109            },
110            ExpectRhs {
111                kind: SyntaxKind,
112                text: TokenText<'static>,
113                pos: NodeSpan,
114                trivia_buffer: VecDeque<(SyntaxKind, TokenText<'static>, NodeSpan)>,
115            },
116        }
117
118        let mut state = State::default();
119
120        let mut input_tokens = node
121            .descendants_with_tokens()
122            .filter_map(NodeOrToken::into_token);
123
124        let mut output_tokens = Vec::new();
125
126        let mut current_arg: Option<std::slice::Iter<'_, T>> = None;
127
128        loop {
129            let input_token;
130            let input_text;
131            let (current_kind, current_text, current_span) = if let Some(iter) = &mut current_arg {
132                if let Some(token) = iter.next() {
133                    (
134                        token.kind(),
135                        token.text().into_unescaped(),
136                        token.text_range(),
137                    )
138                } else {
139                    current_arg.take();
140                    continue;
141                }
142            } else if let Some(token) = input_tokens.next() {
143                input_token = token;
144                input_text = TokenText::raw(input_token.text()).into_unescaped();
145
146                if input_token.kind() == IDENT_KW {
147                    if let Some(value) = args.and_then(|args| {
148                        // unwrap: try_as_str after into_unescaped
149                        args.get(input_text.try_as_str().unwrap())
150                    }) {
151                        current_arg = Some(value.iter());
152                        continue;
153                    }
154                }
155
156                (
157                    input_token.kind(),
158                    input_text,
159                    NodeSpan::new(definition_file_id, input_token.text_range()),
160                )
161            } else {
162                // No more input tokens
163                break;
164            };
165
166            match std::mem::take(&mut state) {
167                State::Init => {
168                    // Initial state, we should start with a token that can be pasted
169                    if current_kind == PP_CONCAT_OP {
170                        output_tokens.push(OutputToken::new_error(entire_range));
171                        return output_tokens;
172                    }
173
174                    if current_kind.is_trivia() {
175                        // Forward leading trivia
176                        output_tokens.push(OutputToken::new(
177                            current_kind,
178                            current_text,
179                            current_span,
180                        ));
181                    } else {
182                        // Non-trivia is the LHS
183                        state = State::Lhs {
184                            kind: current_kind,
185                            text: current_text.to_owned(),
186                            pos: current_span,
187                            trivia_buffer: VecDeque::with_capacity(1),
188                        };
189                    }
190                }
191                State::Lhs {
192                    kind,
193                    text,
194                    pos,
195                    mut trivia_buffer,
196                } => {
197                    // We saw the LHS of a concat
198                    if current_kind == PP_CONCAT_OP {
199                        // And then we saw the ##, so we're expecting the RHS
200                        state = State::ExpectRhs {
201                            kind,
202                            text,
203                            pos,
204                            trivia_buffer: {
205                                trivia_buffer.clear();
206                                trivia_buffer
207                            },
208                        };
209                    } else if current_kind.is_trivia() {
210                        // Just buffer trivia between LHS and ##
211                        state = State::Lhs {
212                            kind,
213                            text,
214                            pos,
215                            trivia_buffer: {
216                                trivia_buffer.push_back((
217                                    current_kind,
218                                    current_text.to_owned(),
219                                    current_span,
220                                ));
221                                trivia_buffer
222                            },
223                        };
224                    } else {
225                        // Non-trivia instead of ##, so just bump the LHS and restart
226
227                        output_tokens.push(OutputToken::new(kind, text, pos));
228
229                        // Bump trivia
230                        while let Some((kind, text, pos)) = trivia_buffer.pop_front() {
231                            output_tokens.push(OutputToken::new(kind, text, pos));
232                        }
233
234                        // Restart with the new token as LHS
235                        state = State::Lhs {
236                            kind: current_kind,
237                            text: current_text.to_owned(),
238                            pos: current_span,
239                            trivia_buffer,
240                        };
241                    }
242                }
243
244                State::ExpectRhs {
245                    kind,
246                    mut text,
247                    pos,
248                    mut trivia_buffer,
249                } => {
250                    // We are expecting the RHS
251                    if current_kind == PP_CONCAT_OP {
252                        // Can't concat with a ##
253                        output_tokens.push(OutputToken::new_error(pos));
254                        return output_tokens;
255                    } else if current_kind.is_trivia() {
256                        // Just buffer trivia between ## and RHS
257                        state = State::ExpectRhs {
258                            kind,
259                            text,
260                            pos,
261                            trivia_buffer: {
262                                trivia_buffer.push_back((
263                                    current_kind,
264                                    current_text.to_owned(),
265                                    current_span,
266                                ));
267                                trivia_buffer
268                            },
269                        };
270                    } else {
271                        // Non-trivia, build resulting token
272                        state = State::Lhs {
273                            kind: SyntaxKind::paste(kind, current_kind),
274                            text: {
275                                text.push_str(current_text);
276                                text
277                            },
278                            pos,
279                            trivia_buffer: {
280                                // Discard trivia
281                                trivia_buffer.clear();
282                                trivia_buffer
283                            },
284                        };
285                    }
286                }
287            }
288        }
289
290        match state {
291            State::Init => {}
292            State::Lhs {
293                kind,
294                text,
295                pos,
296                trivia_buffer: _,
297            } => {
298                output_tokens.push(OutputToken::new(kind, text, pos));
299            }
300            State::ExpectRhs {
301                kind: _,
302                mut text,
303                pos,
304                trivia_buffer: _,
305            } => {
306                // We were expecting a RHS
307                text.push_str(TokenText::raw(" ##"));
308                output_tokens.push(OutputToken::new(ERROR, text, pos));
309            }
310        }
311
312        output_tokens
313    }
314
315    fn substitute_define_object(
316        definition_file_id: FileId,
317        object: &DefineObject,
318        entire_range: NodeSpan,
319        location: &ExpandLocation,
320    ) -> Vec<Event> {
321        let mut tokens = Vec::new();
322        for node_or_token in object.body().children_with_tokens() {
323            match node_or_token {
324                NodeOrToken::Node(node) => {
325                    debug_assert!(node.kind() == PP_CONCAT);
326                    tokens.extend(Self::concat_node_to_tokens::<OutputToken>(
327                        definition_file_id,
328                        node,
329                        None,
330                        entire_range,
331                    ));
332                }
333                NodeOrToken::Token(token) => {
334                    tokens.push(OutputToken::new(
335                        token.kind(),
336                        TokenText::raw(token.text()),
337                        NodeSpan::new(definition_file_id, token.text_range()),
338                    ));
339                }
340            }
341        }
342        Self::subs_tokens(tokens, entire_range, location)
343    }
344
345    fn substitute_define_function(
346        definition_file_id: FileId,
347        function: &DefineFunction,
348        args: &[Vec<impl TokenLike>],
349        entire_range: NodeSpan,
350        location: &ExpandLocation,
351    ) -> Vec<Event> {
352        // Put the arguments into a hashmap
353        let args: HashMap<_, _> = args
354            .iter()
355            .zip(function.arg_names())
356            .map(|(tokens, arg_name)| {
357                (
358                    arg_name.as_str(),
359                    // TODO: Should we trim whitespace out of macro arguments?
360                    trim_ws(tokens),
361                )
362            })
363            .collect();
364
365        let mut tokens = Vec::new();
366        for node_or_token in function.body().children_with_tokens() {
367            match node_or_token {
368                NodeOrToken::Node(node) => {
369                    debug_assert!(node.kind() == PP_CONCAT);
370                    tokens.extend(Self::concat_node_to_tokens(
371                        definition_file_id,
372                        node,
373                        Some(&args),
374                        entire_range,
375                    ));
376                }
377                NodeOrToken::Token(token) => {
378                    let kind = token.kind();
379                    let text = TokenText::raw(token.text()).into_unescaped();
380
381                    if kind == IDENT_KW {
382                        // unwrap: try_as_str after into_unescaped
383                        if let Some(value) = args.get(text.try_as_str().unwrap()) {
384                            // There is an argument with those tokens
385                            for subs_token in value.iter() {
386                                tokens.push(OutputToken::from_token(subs_token));
387                            }
388
389                            continue;
390                        }
391                    }
392
393                    tokens.push(OutputToken::new(
394                        kind,
395                        text,
396                        NodeSpan::new(definition_file_id, token.text_range()),
397                    ));
398                }
399            }
400        }
401
402        Self::subs_tokens(tokens, entire_range, location)
403    }
404
405    fn subs_tokens(
406        tokens: impl IntoIterator<Item = OutputToken>,
407        entire_range: NodeSpan,
408        location: &ExpandLocation,
409    ) -> Vec<Event> {
410        tokens
411            .into_iter()
412            .map(|token| {
413                if token.kind() == ERROR {
414                    Event::error(
415                        ProcessingErrorKind::InvalidTokenPaste {
416                            token: {
417                                let text = token.text();
418                                if text.is_empty() {
419                                    None
420                                } else {
421                                    Some(text.into())
422                                }
423                            },
424                        },
425                        entire_range,
426                        location,
427                        false,
428                    )
429                } else {
430                    // Replace the range of substitued tokens with the macro call range
431                    OutputToken::new(
432                        token.kind(),
433                        <OutputToken as TokenLike>::text(&token),
434                        entire_range,
435                    )
436                    .into()
437                }
438            })
439            .collect()
440    }
441
442    fn substitute_object(
443        &self,
444        entire_range: NodeSpan,
445        current_state: &ProcessorState,
446        location: &ExpandLocation,
447    ) -> Vec<Event> {
448        match self {
449            Definition::Line => Self::substitute_string(
450                &location
451                    .offset_to_line_and_col(entire_range.start().offset)
452                    .0
453                    .to_string(),
454                DIGITS,
455                entire_range,
456            )
457            .into_iter()
458            .map(Into::into)
459            .collect(),
460
461            Definition::File => {
462                let string = location.string();
463                let (string, kind) = if string.is_number() {
464                    (string.to_string(), DIGITS)
465                } else {
466                    (format!("\"{}\"", string), QUOTE_STRING)
467                };
468
469                Self::substitute_string(&string, kind, entire_range)
470                    .into_iter()
471                    .map(Into::into)
472                    .collect()
473            }
474            Definition::Version => Self::substitute_string(
475                &format!("{}", current_state.version.number),
476                DIGITS,
477                entire_range,
478            )
479            .into_iter()
480            .map(Into::into)
481            .collect(),
482
483            Definition::Regular(define, _) => {
484                if let DefineKind::Object(object) = define.kind() {
485                    Self::substitute_define_object(self.file_id(), object, entire_range, location)
486                } else {
487                    panic!("expected object define")
488                }
489            }
490        }
491    }
492
493    fn substitute_function(
494        &self,
495        args: &[Vec<impl TokenLike>],
496        entire_range: NodeSpan,
497        location: &ExpandLocation,
498    ) -> Vec<Event> {
499        match self {
500            Definition::Regular(define, _) => {
501                if let DefineKind::Function(function) = define.kind() {
502                    Self::substitute_define_function(
503                        self.file_id(),
504                        function,
505                        args,
506                        entire_range,
507                        location,
508                    )
509                } else {
510                    panic!("expected function define");
511                }
512            }
513            _ => {
514                panic!("expected function define");
515            }
516        }
517    }
518}
519
520pub(crate) fn trim_ws<T: TokenLike>(tokens: &[T]) -> &[T] {
521    let leading_ws = tokens
522        .iter()
523        .take_while(|token| token.kind().is_whitespace())
524        .count();
525    let trailing_ws = tokens
526        .iter()
527        .rev()
528        .take_while(|token| token.kind().is_whitespace())
529        .count();
530
531    let end = tokens.len() - trailing_ws;
532    if end < leading_ws {
533        // The slice is only whitespace
534        &[]
535    } else {
536        &tokens[leading_ws..(tokens.len() - trailing_ws)]
537    }
538}
539
540pub struct MacroInvocation<'d> {
541    definition: &'d Definition,
542    tokens: MacroCall,
543    range: NodeSpan,
544}
545
546enum MacroCall {
547    Object,
548    Function(Vec<Vec<OutputToken>>),
549}
550
551impl<'d> MacroInvocation<'d> {
552    pub fn parse_raw<I>(
553        definition: &'d Definition,
554        first_token: SyntaxToken,
555        iterator: I,
556        location: &ExpandLocation,
557    ) -> Result<Option<(MacroInvocation<'d>, I)>, ProcessingError>
558    where
559        I: Iterator<Item = NodeOrToken<SyntaxNode, SyntaxToken>>,
560    {
561        Self::parse_nested(definition, first_token, iterator, location, None, |token| {
562            (token, location.current_file())
563        })
564    }
565
566    fn parse_nested<I, Q, P>(
567        definition: &'d Definition,
568        first_token: Q,
569        mut iterator: I,
570        location: &ExpandLocation,
571        text_range: Option<NodeSpan>,
572        token_fn: impl Fn(Q) -> P,
573    ) -> Result<Option<(Self, I)>, ProcessingError>
574    where
575        I: Iterator<Item = NodeOrToken<SyntaxNode, Q>>,
576        P: TokenLike,
577    {
578        let first_token = token_fn(first_token);
579
580        let (tokens, computed_range) = if definition.object_like() {
581            (MacroCall::Object, first_token.text_range())
582        } else {
583            // A function-like, we need to parse arguments
584            let mut args = Vec::new();
585            let mut seen_comma = false;
586            let mut nesting_level = 0;
587
588            let token_start = first_token.text_range().start();
589            let mut last_end = token_start;
590            let token_end = loop {
591                match iterator.next() {
592                    Some(node_or_token) => match node_or_token {
593                        NodeOrToken::Node(node) => {
594                            // Node, i.e. a processing directive. unexpected here
595                            return Err(ProcessingError::builder()
596                                .pos(first_token.text_range())
597                                .resolve_file(location)
598                                .finish(ProcessingErrorKind::UnexpectedDirective {
599                                    ident: definition.name().into(),
600                                    node: (&node).into(),
601                                }));
602                        }
603                        NodeOrToken::Token(inner_token) => {
604                            let inner_token = token_fn(inner_token);
605
606                            // A token
607                            let kind = inner_token.kind();
608                            let end = inner_token.text_range().end();
609                            last_end = end;
610
611                            if nesting_level == 0 {
612                                if kind.is_whitespace() {
613                                    // Just ignore it, it's whitespace before the
614                                    // first lparen
615                                } else if kind == LPAREN {
616                                    nesting_level += 1;
617
618                                    // Create space for first argument
619                                    args.push(Vec::new());
620                                } else {
621                                    // Unexpected garbage. Note that this is not fatal to the
622                                    // compiler, the identifier will just get ignored
623                                    return Ok(None);
624                                }
625                            } else if kind == COMMA && nesting_level == 1 {
626                                // Create space for next argument
627                                args.push(Vec::new());
628                                seen_comma = true;
629                            } else {
630                                if kind == LPAREN {
631                                    nesting_level += 1;
632                                } else if kind == RPAREN {
633                                    nesting_level -= 1;
634                                }
635
636                                if nesting_level > 0 {
637                                    args.last_mut()
638                                        .unwrap()
639                                        .push(OutputToken::from_token(&inner_token));
640                                }
641                            }
642
643                            if kind == RPAREN && nesting_level == 0 {
644                                break end;
645                            }
646                        }
647                    },
648                    None => {
649                        // End-of-file. Not that we haven't consumed any nodes yet
650                        // so we just need to return the events via the state
651                        return Err(ProcessingError::builder()
652                            .pos(text_range.unwrap_or_else(|| {
653                                NodeSpan::new(
654                                    token_start.source_id,
655                                    TextRange::new(token_start.offset, last_end.offset),
656                                )
657                            }))
658                            .resolve_file(location)
659                            .finish(ProcessingErrorKind::UnterminatedMacroInvocation {
660                                ident: definition.name().into(),
661                            }));
662                    }
663                }
664            };
665
666            // If we haven't seen a comma, this ambiguous: it could either be a 0-arguments
667            // macro or one argument which happens to be empty. To fix this, we pop the empty
668            // argument we may have added, but only if it's trivia
669            if !seen_comma
670                && definition.arg_count() == 0
671                && args.len() == 1
672                && args
673                    .first()
674                    .unwrap()
675                    .iter()
676                    .all(|token| token.kind().is_whitespace())
677            {
678                args.pop();
679            }
680
681            if args.len() != definition.arg_count() {
682                return Err(ProcessingError::builder()
683                    .pos(first_token.text_range())
684                    .resolve_file(location)
685                    .finish(ProcessingErrorKind::MismatchedArguments {
686                        ident: definition.name().into(),
687                        expected: definition.arg_count(),
688                        actual: args.len(),
689                    }));
690            }
691
692            (
693                MacroCall::Function(args),
694                // Do not build the NodeSpan if we have an existing range, since the tokens have
695                // been generated and may not be in order, thus TextRange::new will panic because
696                // start > end
697                text_range.unwrap_or_else(|| {
698                    NodeSpan::new(
699                        token_start.source_id,
700                        TextRange::new(token_start.offset, token_end.offset),
701                    )
702                }),
703            )
704        };
705
706        Ok(Some((
707            Self {
708                definition,
709                tokens,
710                range: text_range.unwrap_or(computed_range),
711            },
712            iterator,
713        )))
714    }
715
716    pub fn substitute_vec(
717        current_state: &ProcessorState,
718        tokens: Vec<impl TokenLike>,
719        location: &ExpandLocation,
720    ) -> Vec<Event> {
721        let mut subs_stack = HashSet::new();
722        Self::substitute_vec_inner(current_state, tokens, location, &mut subs_stack, None)
723    }
724
725    fn substitute_vec_inner(
726        current_state: &ProcessorState,
727        tokens: Vec<impl TokenLike>,
728        location: &ExpandLocation,
729        subs_stack: &mut HashSet<SmolStr>,
730        range: Option<NodeSpan>,
731    ) -> Vec<Event> {
732        // Macros are recursive, so we need to scan again for further substitutions
733        let mut result = Vec::with_capacity(tokens.len());
734        let mut iterator = tokens.into_iter().map(NodeOrToken::Token);
735        let mut seen_defined_recently = false;
736
737        while let Some(node_or_token) = iterator.next() {
738            // Just a regular token
739            match node_or_token {
740                NodeOrToken::Node(_) => unreachable!(),
741                NodeOrToken::Token(token) => {
742                    let kind = token.kind();
743
744                    if let Some(definition) = (if kind == IDENT_KW && !seen_defined_recently {
745                        Some(token.text().to_string())
746                    } else {
747                        None
748                    })
749                    .and_then(|ident| {
750                        if subs_stack.contains(ident.as_ref()) {
751                            None
752                        } else {
753                            Some(ident)
754                        }
755                    })
756                    .and_then(|ident| current_state.definitions.get(ident.as_ref()))
757                    {
758                        match MacroInvocation::parse_nested(
759                            definition,
760                            token.clone(),
761                            iterator.clone(),
762                            location,
763                            range,
764                            |token| token,
765                        ) {
766                            Ok(Some((invocation, new_iterator))) => {
767                                result.extend(invocation.substitute_inner(
768                                    current_state,
769                                    location,
770                                    subs_stack,
771                                ));
772
773                                iterator = new_iterator;
774                            }
775                            Ok(None) => {
776                                result.push(Event::token(token, false));
777                            }
778                            Err(err) => {
779                                result.push(Event::map_error(err, false));
780                            }
781                        }
782                    } else {
783                        result.push(Event::token(token, false));
784                    }
785
786                    if seen_defined_recently {
787                        if !kind.is_trivia() {
788                            if kind == LPAREN {
789                                // Wait for (maybe) an IDENT_KW
790                            } else {
791                                // IDENT_KW, RPAREN, anything else: done
792                                seen_defined_recently = false;
793                            }
794                        }
795                    } else if kind == DEFINED {
796                        seen_defined_recently = true;
797                    }
798                }
799            }
800        }
801
802        result
803    }
804
805    pub fn substitute(
806        self,
807        current_state: &ProcessorState,
808        location: &ExpandLocation,
809    ) -> Vec<Event> {
810        let mut subs_stack = HashSet::new();
811        self.substitute_inner(current_state, location, &mut subs_stack)
812    }
813
814    fn substitute_inner(
815        self,
816        current_state: &ProcessorState,
817        location: &ExpandLocation,
818        subs_stack: &mut HashSet<SmolStr>,
819    ) -> Vec<Event> {
820        let events = match self.tokens {
821            MacroCall::Object => {
822                self.definition
823                    .substitute_object(self.range, current_state, location)
824            }
825            MacroCall::Function(function) => self
826                .definition
827                .substitute_function(&function, self.range, location),
828        };
829
830        // Disable recursion for the current name
831        subs_stack.insert(self.definition.name().into());
832
833        let range = Some(self.range);
834
835        // We use itertools group_by to insert the error events in the right locations in the
836        // output sequence. This means we split the token sequence at errors and this wouldn't
837        // return the "right" result accross errors, but since there's an error, there is no
838        // spec-defined expected result.
839        let result: Vec<_> = events
840            .into_iter()
841            .chunk_by(Event::is_token)
842            .into_iter()
843            .flat_map(|(is_token, events)| {
844                if is_token {
845                    // A token sequence
846                    // TODO: Prevent re-allocation
847                    Self::substitute_vec_inner(
848                        current_state,
849                        events
850                            .into_iter()
851                            .filter_map(Event::into_token)
852                            .collect::<Vec<_>>(),
853                        location,
854                        subs_stack,
855                        range,
856                    )
857                } else {
858                    events.collect()
859                }
860            })
861            .collect();
862
863        subs_stack.remove(self.definition.name());
864
865        result
866    }
867}