1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
#[repr(u16)]
pub enum Token {
    /// Identifier or keyword
    IDENT_KW = 1,
    /// defined preprocessor keyword
    DEFINED = 2,
    /// Digit sequence
    DIGITS = 3,
    /// .
    PERIOD = 4,
    /// +
    PLUS = 5,
    /// -
    DASH = 6,
    /// /
    SLASH = 7,
    /// *
    ASTERISK = 8,
    /// %
    PERCENT = 9,
    /// <
    LANGLE = 10,
    /// >
    RANGLE = 11,
    /// [
    LBRACKET = 12,
    /// ]
    RBRACKET = 13,
    /// (
    LPAREN = 14,
    /// )
    RPAREN = 15,
    /// {
    LBRACE = 16,
    /// }
    RBRACE = 17,
    /// ^
    CARET = 18,
    /// |
    BAR = 19,
    /// &
    AMPERSAND = 20,
    /// ~
    TILDE = 21,
    /// =
    EQUAL = 22,
    /// !
    BANG = 23,
    /// :
    COLON = 24,
    /// ;
    SEMICOLON = 25,
    /// ,
    COMMA = 26,
    /// ?
    QUESTION = 27,
    /// #
    HASH = 28,
    /// "string"
    QUOTE_STRING = 29,
    /// <string>
    ANGLE_STRING = 30,
    /// \
    BACKSLASH = 31,
    /// Whitespace
    WS = 32,
    /// Newline
    NEWLINE = 33,
    /// Comment (single-line or multi-line)
    COMMENT = 34,
    /// Line continuation (required for tracking exact offsets)
    LINECONT = 35,
    /// Invalid token
    ERROR = 36,
}

impl Token {
    pub fn is_trivia(&self) -> bool {
        matches!(self, Self::LINECONT | Self::WS | Self::COMMENT)
    }

    pub fn from_punct(raw: &str) -> Self {
        use Token::*;

        match raw {
            "." => PERIOD,
            "+" => PLUS,
            "-" => DASH,
            "/" => SLASH,
            "*" => ASTERISK,
            "%" => PERCENT,
            "<" => LANGLE,
            ">" => RANGLE,
            "[" => LBRACKET,
            "]" => RBRACKET,
            "(" => LPAREN,
            ")" => RPAREN,
            "{" => LBRACE,
            "}" => RBRACE,
            "^" => CARET,
            "|" => BAR,
            "&" => AMPERSAND,
            "~" => TILDE,
            "=" => EQUAL,
            "!" => BANG,
            ":" => COLON,
            ";" => SEMICOLON,
            "," => COMMA,
            "?" => QUESTION,
            "#" => HASH,
            _ => ERROR,
        }
    }
}