1use std::collections::VecDeque;
3
4use rowan::{TextRange, TextSize};
5
6use crate::{
7 lexer::{self, Lexer, TextToken},
8 util::Unescaped,
9};
10
11mod ast;
12pub use ast::*;
13
14mod error;
15pub use error::*;
16
17mod lang;
18pub use lang::*;
19
20mod syntax_kind;
21pub use syntax_kind::*;
22
23mod syntax;
24
25type SyntaxBitset = cbitset::BitSet256;
26static_assertions::const_assert!((SyntaxKind::_LAST as usize) < 256);
28
29pub(crate) type SyntaxNode = rowan::SyntaxNode<PreprocessorLang>;
30pub(crate) type SyntaxToken = rowan::SyntaxToken<PreprocessorLang>;
31
32pub struct Parser<'i> {
33 source: &'i str,
34}
35
36pub struct ParserRun<'i, 'cache> {
37 builder: rowan::GreenNodeBuilder<'cache>,
38 errors: Vec<Error>,
39 peeked: Option<Option<lexer::TextToken>>,
40 trivia_buffer: VecDeque<lexer::TextToken>,
41 source: &'i str,
42 input: Lexer<'i>,
43}
44
45impl<'i> Parser<'i> {
47 pub fn new(input: &'i str) -> Self {
48 Self { source: input }
49 }
50
51 pub fn parse(self) -> Ast {
52 ParserRun::new(self.source).parse_ast()
53 }
54
55 pub fn parse_define_body(self) -> Option<SyntaxNode> {
56 ParserRun::new(self.source).parse_one(syntax::define_body)
57 }
58}
59
60impl<'i, 'cache> ParserRun<'i, 'cache> {
62 fn new(source: &'i str) -> Self {
63 Self {
64 builder: rowan::GreenNodeBuilder::new(),
65 errors: Vec::new(),
66 peeked: None,
67 trivia_buffer: VecDeque::with_capacity(4),
68 source,
69 input: Lexer::new(source),
70 }
71 }
72
73 fn parse_ast(mut self) -> Ast {
74 self.start_node(SyntaxKind::ROOT);
75 syntax::file(&mut self);
76 self.finish_node();
77
78 Ast::new(
79 self.builder.finish(),
80 self.errors,
81 self.input.into_line_map(),
82 )
83 }
84
85 fn parse_one(mut self, f: impl FnOnce(&mut Self)) -> Option<SyntaxNode> {
86 self.start_node(SyntaxKind::ROOT);
87 f(&mut self);
88 self.finish_node();
89
90 if self.errors.is_empty() {
91 SyntaxNode::new_root(self.builder.finish()).first_child()
92 } else {
93 None
94 }
95 }
96
97 fn checkpoint(&mut self) -> rowan::Checkpoint {
98 self.builder.checkpoint()
99 }
100
101 fn start_node(&mut self, kind: SyntaxKind) {
102 self.builder.start_node(kind.into());
103 }
104
105 fn start_node_at(&mut self, checkpoint: rowan::Checkpoint, kind: SyntaxKind) {
106 self.builder.start_node_at(checkpoint, kind.into());
107 }
108
109 fn finish_node(&mut self) {
110 self.builder.finish_node();
111 }
112}
113
114impl<'i, 'cache> ParserRun<'i, 'cache> {
116 fn skip(&mut self, what: impl Fn(&lexer::Token) -> bool) {
117 while self.peek().map(|tk| what(&tk.token)).unwrap_or(false) {
118 self.bump();
119 }
120 }
121
122 fn skip_trivia(&mut self) {
123 self.skip(lexer::Token::is_trivia)
124 }
125
126 fn peek(&mut self) -> Option<lexer::TextToken> {
127 if self.peeked.is_none() {
128 self.peeked = Some(self.input.next());
130 }
131
132 self.peeked.unwrap()
135 }
136
137 fn raw(&self, token: lexer::TextToken) -> &'i str {
138 token.raw(self.source)
139 }
140
141 fn text(&self, token: lexer::TextToken) -> Unescaped {
142 Unescaped::new(self.raw(token))
143 }
144
145 fn eat_trivia(&mut self) {
146 while let Some(token) = self.trivia_buffer.pop_front() {
147 self.builder
148 .token(SyntaxKind::from(token.token).into(), token.raw(self.source));
149 }
150 }
151
152 fn buffer_trivia(&mut self) {
153 while let Some(current) = self.peek() {
154 if current.token.is_trivia() {
155 self.trivia_buffer.push_back(current);
156 } else {
157 break;
158 }
159
160 self.peeked.take();
162 }
163 }
164
165 fn push_error(&mut self, error_kind: ErrorKind, range: TextRange) {
166 self.errors.push(
167 Error::builder()
168 .pos(range)
169 .resolve(self.input.line_map())
170 .finish(error_kind),
171 );
172 }
173
174 fn expect_one(&mut self, expected: lexer::Token) -> ExpectAny {
175 if let Some(token) = self.peek() {
176 self.bump();
177
178 if expected == *token {
179 return ExpectAny::Found(token);
180 } else {
181 self.push_error(
182 ErrorKind::Unexpected {
183 actual: *token,
184 expected: Box::new([expected]),
185 },
186 token.range,
187 );
188
189 return ExpectAny::Unexpected(token);
190 }
191 }
192
193 self.push_error(
194 ErrorKind::EndOfInput {
195 expected: Box::new([expected]),
196 },
197 TextRange::new(TextSize::of(self.source), TextSize::of(self.source)),
198 );
199
200 ExpectAny::EndOfInput
201 }
202
203 #[must_use = "None is returned if the expected token was not found"]
204 fn expect_any(&mut self, expected: &[lexer::Token], dont_bump: &[lexer::Token]) -> ExpectAny {
205 let bitset: SyntaxBitset = expected.iter().map(|&k| k as u16).collect();
206 let dont_bump_bitset: SyntaxBitset = dont_bump.iter().map(|&k| k as u16).collect();
207
208 if let Some(token) = self.peek() {
209 if !dont_bump_bitset.contains(*token as _) {
211 self.bump();
212 }
213
214 if bitset.contains(*token as _) {
215 return ExpectAny::Found(token);
216 } else {
217 self.push_error(
218 ErrorKind::Unexpected {
219 actual: *token,
220 expected: expected.into(),
221 },
222 token.range,
223 );
224
225 return ExpectAny::Unexpected(token);
226 }
227 }
228
229 self.push_error(
230 ErrorKind::EndOfInput {
231 expected: expected.into(),
232 },
233 TextRange::new(TextSize::of(self.source), TextSize::of(self.source)),
234 );
235
236 ExpectAny::EndOfInput
237 }
238
239 fn bump(&mut self) {
240 let token = if let Some(token) = self.peeked.take() {
242 token
243 } else {
244 self.input.next()
245 };
246
247 if let Some(token) = token {
248 self.builder
249 .token(SyntaxKind::from(token.token).into(), token.raw(self.source));
250 } else {
251 panic!("tried to bump at end of input");
252 }
253 }
254}
255
256#[derive(Debug, Clone, Copy)]
257enum ExpectAny {
258 Found(TextToken),
259 Unexpected(TextToken),
260 EndOfInput,
261}
262
263#[cfg(test)]
264mod tests;