acir/parser/
lexer.rs

1use std::str::{CharIndices, FromStr};
2
3use acir_field::{AcirField, FieldElement};
4
5use noirc_span::{Position, Span};
6use num_bigint::BigInt;
7use num_traits::One;
8use thiserror::Error;
9
10use crate::parser::token::Keyword;
11
12use super::token::{SpannedToken, Token};
13
14pub(super) struct Lexer<'a> {
15    chars: CharIndices<'a>,
16    position: Position,
17    done: bool,
18    max_integer: BigInt,
19}
20
21impl<'a> Lexer<'a> {
22    pub(super) fn new(src: &'a str) -> Self {
23        Lexer {
24            chars: src.char_indices(),
25            position: 0,
26            done: false,
27            max_integer: BigInt::from_biguint(num_bigint::Sign::Plus, FieldElement::modulus()) // cSpell:disable-line
28                - BigInt::one(),
29        }
30    }
31
32    pub(super) fn next_token(&mut self) -> SpannedTokenResult {
33        let Some(ch) = self.next_char() else {
34            self.done = true;
35            return Ok(Token::Eof.into_single_span(self.position));
36        };
37
38        match ch {
39            ch if ch.is_ascii_whitespace() => {
40                while let Some(char) = self.peek_char() {
41                    if char.is_ascii_whitespace() {
42                        self.next_char();
43                    } else {
44                        break;
45                    }
46                }
47                self.next_token()
48            }
49            '/' if self.peek_char() == Some('/') => {
50                while let Some(char) = self.next_char() {
51                    if char == '\n' {
52                        break;
53                    }
54                }
55                self.next_token()
56            }
57            '(' => self.single_char_token(Token::LeftParen),
58            ')' => self.single_char_token(Token::RightParen),
59            '[' => self.single_char_token(Token::LeftBracket),
60            ']' => self.single_char_token(Token::RightBracket),
61            ',' => self.single_char_token(Token::Comma),
62            ':' => self.single_char_token(Token::Colon),
63            ';' => self.single_char_token(Token::Semicolon),
64            '+' => self.single_char_token(Token::Plus),
65            '-' if self.peek_char().is_none_or(|char| !char.is_ascii_digit()) => {
66                self.single_char_token(Token::Minus)
67            }
68            '*' => self.single_char_token(Token::Star),
69            '=' => self.single_char_token(Token::Equal),
70            'b' | 'w' if self.peek_char().is_some_and(|char| char.is_ascii_digit()) => {
71                let start = self.position;
72
73                // Witness token format is 'w' followed by digits.
74                // Block token format is 'b' followed by digits.
75                let digits = self.eat_while(None, |ch| ch.is_ascii_digit());
76                let end = self.position;
77
78                // Parse digits into u32
79                match digits.parse::<u32>() {
80                    Ok(value) => {
81                        let token =
82                            if ch == 'w' { Token::Witness(value) } else { Token::Block(value) };
83                        Ok(token.into_span(start, end))
84                    }
85                    Err(_) => Err(LexerError::InvalidIntegerLiteral {
86                        span: Span::inclusive(start, end),
87                        found: digits,
88                    }),
89                }
90            }
91            '-' | '0'..='9' => self.eat_integer(ch),
92            ch if ch.is_ascii_alphabetic() => self.eat_word(ch),
93            ch => Err(LexerError::UnexpectedCharacter {
94                char: ch,
95                span: Span::single_char(self.position),
96            }),
97        }
98    }
99
100    fn eat_word(&mut self, initial_char: char) -> SpannedTokenResult {
101        let (start, word, end) = self.lex_word(initial_char);
102        self.lookup_word_token(word, start, end)
103    }
104
105    fn lex_word(&mut self, initial_char: char) -> (Position, String, Position) {
106        let start = self.position;
107        let word = self.eat_while(Some(initial_char), |ch| {
108            ch.is_ascii_alphabetic() || ch.is_numeric() || ch == '_'
109        });
110        (start, word, self.position)
111    }
112
113    fn lookup_word_token(
114        &self,
115        word: String,
116        start: Position,
117        end: Position,
118    ) -> SpannedTokenResult {
119        // Check if word either an identifier or a keyword
120        if let Some(keyword_token) = Keyword::lookup_keyword(&word) {
121            return Ok(keyword_token.into_span(start, end));
122        }
123
124        // Else it is just an identifier
125        let ident_token = Token::Ident(word);
126        Ok(ident_token.into_span(start, end))
127    }
128
129    fn eat_integer(&mut self, first_char: char) -> SpannedTokenResult {
130        let start = self.position;
131        let mut number_str = String::new();
132
133        let is_negative = if first_char == '-' {
134            // Peek ahead that '-' must be followed by a digit
135            match self.peek_char() {
136                Some(ch) if ch.is_ascii_digit() => {
137                    // Consume the digit we just peeked
138                    self.next_char();
139                    number_str.push('-');
140                    number_str.push(ch);
141                }
142                _ => {
143                    return Err(LexerError::UnexpectedCharacter {
144                        char: '-',
145                        span: Span::single_char(start),
146                    });
147                }
148            }
149            true
150        } else {
151            number_str.push(first_char);
152            false
153        };
154
155        number_str += &self.eat_while(None, |ch| ch.is_ascii_digit());
156
157        let end = self.position;
158
159        let bigint_result = BigInt::from_str(&number_str);
160        let integer = match bigint_result {
161            Ok(bigint) => {
162                if bigint > self.max_integer {
163                    return Err(LexerError::IntegerLiteralTooLarge {
164                        span: Span::inclusive(start, end),
165                        limit: self.max_integer.to_string(),
166                    });
167                }
168                let big_uint = bigint.magnitude();
169                let field = FieldElement::from_be_bytes_reduce(&big_uint.to_bytes_be());
170                if is_negative { -field } else { field }
171            }
172            Err(_) => {
173                return Err(LexerError::InvalidIntegerLiteral {
174                    span: Span::inclusive(start, end),
175                    found: number_str,
176                });
177            }
178        };
179
180        Ok(Token::Int(integer).into_span(start, end))
181    }
182
183    fn eat_while<F: Fn(char) -> bool>(
184        &mut self,
185        initial_char: Option<char>,
186        predicate: F,
187    ) -> String {
188        // This function is only called when we want to continue consuming a character of the same type.
189        // For example, we see a digit and we want to consume the whole integer
190        // Therefore, the current character which triggered this function will need to be appended
191        let mut word = String::new();
192        if let Some(init_char) = initial_char {
193            word.push(init_char);
194        }
195
196        // Keep checking that we are not at the EOF
197        while let Some(peek_char) = self.peek_char() {
198            // Then check for the predicate, if predicate matches append char and increment the cursor
199            // If not, return word. The next character will be analyzed on the next iteration of next_token,
200            // Which will increment the cursor
201            if !predicate(peek_char) {
202                return word;
203            }
204            word.push(peek_char);
205
206            // If we arrive at this point, then the char has been added to the word and we should increment the cursor
207            self.next_char();
208        }
209
210        word
211    }
212
213    fn single_char_token(&self, token: Token) -> SpannedTokenResult {
214        Ok(token.into_single_span(self.position))
215    }
216
217    fn next_char(&mut self) -> Option<char> {
218        let (position, ch) = self.chars.next()?;
219        self.position = position as u32;
220        Some(ch)
221    }
222
223    fn peek_char(&self) -> Option<char> {
224        self.chars.clone().next().map(|(_, ch)| ch)
225    }
226}
227
228type SpannedTokenResult = Result<SpannedToken, LexerError>;
229
230#[derive(Debug, Error)]
231pub(crate) enum LexerError {
232    #[error("Unexpected character: {char:?}")]
233    UnexpectedCharacter { char: char, span: Span },
234    #[error("Invalid integer literal")]
235    InvalidIntegerLiteral { span: Span, found: String },
236    #[error("Integer literal too large")]
237    IntegerLiteralTooLarge { span: Span, limit: String },
238}
239
240impl LexerError {
241    pub(super) fn span(&self) -> Span {
242        use LexerError::*;
243        match self {
244            UnexpectedCharacter { span, .. } => *span,
245            InvalidIntegerLiteral { span, .. } => *span,
246            IntegerLiteralTooLarge { span, .. } => *span,
247        }
248    }
249}