acir/parser/
lexer.rs

1use std::str::{CharIndices, FromStr};
2
3use acir_field::{AcirField, FieldElement};
4
5use noirc_span::{Position, Span};
6use num_bigint::BigInt;
7use num_traits::One;
8use thiserror::Error;
9
10use crate::parser::token::Keyword;
11
12use super::token::{SpannedToken, Token};
13
14pub(super) struct Lexer<'a> {
15    chars: CharIndices<'a>,
16    position: Position,
17    done: bool,
18    max_integer: BigInt,
19}
20
21impl<'a> Lexer<'a> {
22    pub(super) fn new(src: &'a str) -> Self {
23        Lexer {
24            chars: src.char_indices(),
25            position: 0,
26            done: false,
27            max_integer: BigInt::from_biguint(num_bigint::Sign::Plus, FieldElement::modulus()) // cSpell:disable-line
28                - BigInt::one(),
29        }
30    }
31
32    pub(super) fn next_token(&mut self) -> SpannedTokenResult {
33        let ch = match self.next_char() {
34            Some(ch) => ch,
35            None => {
36                self.done = true;
37                return Ok(Token::Eof.into_single_span(self.position));
38            }
39        };
40
41        match ch {
42            ch if ch.is_ascii_whitespace() => {
43                while let Some(char) = self.peek_char() {
44                    if char.is_ascii_whitespace() {
45                        self.next_char();
46                    } else {
47                        break;
48                    }
49                }
50                self.next_token()
51            }
52            '/' if self.peek_char() == Some('/') => {
53                while let Some(char) = self.next_char() {
54                    if char == '\n' {
55                        break;
56                    }
57                }
58                self.next_token()
59            }
60            '(' => self.single_char_token(Token::LeftParen),
61            ')' => self.single_char_token(Token::RightParen),
62            '[' => self.single_char_token(Token::LeftBracket),
63            ']' => self.single_char_token(Token::RightBracket),
64            ',' => self.single_char_token(Token::Comma),
65            ':' => self.single_char_token(Token::Colon),
66            ';' => self.single_char_token(Token::Semicolon),
67            '+' => self.single_char_token(Token::Plus),
68            '-' if self.peek_char().is_none_or(|char| !char.is_ascii_digit()) => {
69                self.single_char_token(Token::Minus)
70            }
71            '*' => self.single_char_token(Token::Star),
72            '=' => self.single_char_token(Token::Equal),
73            'b' | 'w' if self.peek_char().is_some_and(|char| char.is_ascii_digit()) => {
74                let start = self.position;
75
76                // Witness token format is 'w' followed by digits.
77                // Block token format is 'b' followed by digits.
78                let digits = self.eat_while(None, |ch| ch.is_ascii_digit());
79                let end = self.position;
80
81                // Parse digits into u32
82                match digits.parse::<u32>() {
83                    Ok(value) => {
84                        let token =
85                            if ch == 'w' { Token::Witness(value) } else { Token::Block(value) };
86                        Ok(token.into_span(start, end))
87                    }
88                    Err(_) => Err(LexerError::InvalidIntegerLiteral {
89                        span: Span::inclusive(start, end),
90                        found: digits,
91                    }),
92                }
93            }
94            '-' | '0'..='9' => self.eat_integer(ch),
95            ch if ch.is_ascii_alphabetic() => self.eat_word(ch),
96            ch => Err(LexerError::UnexpectedCharacter {
97                char: ch,
98                span: Span::single_char(self.position),
99            }),
100        }
101    }
102
103    fn eat_word(&mut self, initial_char: char) -> SpannedTokenResult {
104        let (start, word, end) = self.lex_word(initial_char);
105        self.lookup_word_token(word, start, end)
106    }
107
108    fn lex_word(&mut self, initial_char: char) -> (Position, String, Position) {
109        let start = self.position;
110        let word = self.eat_while(Some(initial_char), |ch| {
111            ch.is_ascii_alphabetic() || ch.is_numeric() || ch == '_'
112        });
113        (start, word, self.position)
114    }
115
116    fn lookup_word_token(
117        &self,
118        word: String,
119        start: Position,
120        end: Position,
121    ) -> SpannedTokenResult {
122        // Check if word either an identifier or a keyword
123        if let Some(keyword_token) = Keyword::lookup_keyword(&word) {
124            return Ok(keyword_token.into_span(start, end));
125        }
126
127        // Else it is just an identifier
128        let ident_token = Token::Ident(word);
129        Ok(ident_token.into_span(start, end))
130    }
131
132    fn eat_integer(&mut self, first_char: char) -> SpannedTokenResult {
133        let start = self.position;
134        let mut number_str = String::new();
135
136        let is_negative = if first_char == '-' {
137            // Peek ahead that '-' must be followed by a digit
138            match self.peek_char() {
139                Some(ch) if ch.is_ascii_digit() => {
140                    // Consume the digit we just peeked
141                    self.next_char();
142                    number_str.push('-');
143                    number_str.push(ch);
144                }
145                _ => {
146                    return Err(LexerError::UnexpectedCharacter {
147                        char: '-',
148                        span: Span::single_char(start),
149                    });
150                }
151            }
152            true
153        } else {
154            number_str.push(first_char);
155            false
156        };
157
158        number_str += &self.eat_while(None, |ch| ch.is_ascii_digit());
159
160        let end = self.position;
161
162        let bigint_result = BigInt::from_str(&number_str);
163        let integer = match bigint_result {
164            Ok(bigint) => {
165                if bigint > self.max_integer {
166                    return Err(LexerError::IntegerLiteralTooLarge {
167                        span: Span::inclusive(start, end),
168                        limit: self.max_integer.to_string(),
169                    });
170                }
171                let big_uint = bigint.magnitude();
172                let field = FieldElement::from_be_bytes_reduce(&big_uint.to_bytes_be());
173                if is_negative { -field } else { field }
174            }
175            Err(_) => {
176                return Err(LexerError::InvalidIntegerLiteral {
177                    span: Span::inclusive(start, end),
178                    found: number_str,
179                });
180            }
181        };
182
183        Ok(Token::Int(integer).into_span(start, end))
184    }
185
186    fn eat_while<F: Fn(char) -> bool>(
187        &mut self,
188        initial_char: Option<char>,
189        predicate: F,
190    ) -> String {
191        // This function is only called when we want to continue consuming a character of the same type.
192        // For example, we see a digit and we want to consume the whole integer
193        // Therefore, the current character which triggered this function will need to be appended
194        let mut word = String::new();
195        if let Some(init_char) = initial_char {
196            word.push(init_char);
197        }
198
199        // Keep checking that we are not at the EOF
200        while let Some(peek_char) = self.peek_char() {
201            // Then check for the predicate, if predicate matches append char and increment the cursor
202            // If not, return word. The next character will be analyzed on the next iteration of next_token,
203            // Which will increment the cursor
204            if !predicate(peek_char) {
205                return word;
206            }
207            word.push(peek_char);
208
209            // If we arrive at this point, then the char has been added to the word and we should increment the cursor
210            self.next_char();
211        }
212
213        word
214    }
215
216    fn single_char_token(&self, token: Token) -> SpannedTokenResult {
217        Ok(token.into_single_span(self.position))
218    }
219
220    fn next_char(&mut self) -> Option<char> {
221        let (position, ch) = self.chars.next()?;
222        self.position = position as u32;
223        Some(ch)
224    }
225
226    fn peek_char(&self) -> Option<char> {
227        self.chars.clone().next().map(|(_, ch)| ch)
228    }
229}
230
231type SpannedTokenResult = Result<SpannedToken, LexerError>;
232
233#[derive(Debug, Error)]
234pub(crate) enum LexerError {
235    #[error("Unexpected character: {char:?}")]
236    UnexpectedCharacter { char: char, span: Span },
237    #[error("Invalid integer literal")]
238    InvalidIntegerLiteral { span: Span, found: String },
239    #[error("Integer literal too large")]
240    IntegerLiteralTooLarge { span: Span, limit: String },
241}
242
243impl LexerError {
244    pub(super) fn span(&self) -> Span {
245        use LexerError::*;
246        match self {
247            UnexpectedCharacter { span, .. } => *span,
248            InvalidIntegerLiteral { span, .. } => *span,
249            IntegerLiteralTooLarge { span, .. } => *span,
250        }
251    }
252}