1use std::str::{CharIndices, FromStr};
2
3use acir_field::{AcirField, FieldElement};
4
5use noirc_span::{Position, Span};
6use num_bigint::BigInt;
7use num_traits::One;
8use thiserror::Error;
9
10use crate::parser::token::Keyword;
11
12use super::token::{SpannedToken, Token};
13
14pub(super) struct Lexer<'a> {
15 chars: CharIndices<'a>,
16 position: Position,
17 done: bool,
18 max_integer: BigInt,
19}
20
21impl<'a> Lexer<'a> {
22 pub(super) fn new(src: &'a str) -> Self {
23 Lexer {
24 chars: src.char_indices(),
25 position: 0,
26 done: false,
27 max_integer: BigInt::from_biguint(num_bigint::Sign::Plus, FieldElement::modulus()) - BigInt::one(),
29 }
30 }
31
32 pub(super) fn next_token(&mut self) -> SpannedTokenResult {
33 let Some(ch) = self.next_char() else {
34 self.done = true;
35 return Ok(Token::Eof.into_single_span(self.position));
36 };
37
38 match ch {
39 ch if ch.is_ascii_whitespace() => {
40 while let Some(char) = self.peek_char() {
41 if char.is_ascii_whitespace() {
42 self.next_char();
43 } else {
44 break;
45 }
46 }
47 self.next_token()
48 }
49 '/' if self.peek_char() == Some('/') => {
50 while let Some(char) = self.next_char() {
51 if char == '\n' {
52 break;
53 }
54 }
55 self.next_token()
56 }
57 '(' => self.single_char_token(Token::LeftParen),
58 ')' => self.single_char_token(Token::RightParen),
59 '[' => self.single_char_token(Token::LeftBracket),
60 ']' => self.single_char_token(Token::RightBracket),
61 ',' => self.single_char_token(Token::Comma),
62 ':' => self.single_char_token(Token::Colon),
63 ';' => self.single_char_token(Token::Semicolon),
64 '+' => self.single_char_token(Token::Plus),
65 '-' if self.peek_char().is_none_or(|char| !char.is_ascii_digit()) => {
66 self.single_char_token(Token::Minus)
67 }
68 '*' => self.single_char_token(Token::Star),
69 '=' => self.single_char_token(Token::Equal),
70 'b' | 'w' if self.peek_char().is_some_and(|char| char.is_ascii_digit()) => {
71 let start = self.position;
72
73 let digits = self.eat_while(None, |ch| ch.is_ascii_digit());
76 let end = self.position;
77
78 match digits.parse::<u32>() {
80 Ok(value) => {
81 let token =
82 if ch == 'w' { Token::Witness(value) } else { Token::Block(value) };
83 Ok(token.into_span(start, end))
84 }
85 Err(_) => Err(LexerError::InvalidIntegerLiteral {
86 span: Span::inclusive(start, end),
87 found: digits,
88 }),
89 }
90 }
91 '-' | '0'..='9' => self.eat_integer(ch),
92 ch if ch.is_ascii_alphabetic() => self.eat_word(ch),
93 ch => Err(LexerError::UnexpectedCharacter {
94 char: ch,
95 span: Span::single_char(self.position),
96 }),
97 }
98 }
99
100 fn eat_word(&mut self, initial_char: char) -> SpannedTokenResult {
101 let (start, word, end) = self.lex_word(initial_char);
102 self.lookup_word_token(word, start, end)
103 }
104
105 fn lex_word(&mut self, initial_char: char) -> (Position, String, Position) {
106 let start = self.position;
107 let word = self.eat_while(Some(initial_char), |ch| {
108 ch.is_ascii_alphabetic() || ch.is_numeric() || ch == '_'
109 });
110 (start, word, self.position)
111 }
112
113 fn lookup_word_token(
114 &self,
115 word: String,
116 start: Position,
117 end: Position,
118 ) -> SpannedTokenResult {
119 if let Some(keyword_token) = Keyword::lookup_keyword(&word) {
121 return Ok(keyword_token.into_span(start, end));
122 }
123
124 let ident_token = Token::Ident(word);
126 Ok(ident_token.into_span(start, end))
127 }
128
129 fn eat_integer(&mut self, first_char: char) -> SpannedTokenResult {
130 let start = self.position;
131 let mut number_str = String::new();
132
133 let is_negative = if first_char == '-' {
134 match self.peek_char() {
136 Some(ch) if ch.is_ascii_digit() => {
137 self.next_char();
139 number_str.push('-');
140 number_str.push(ch);
141 }
142 _ => {
143 return Err(LexerError::UnexpectedCharacter {
144 char: '-',
145 span: Span::single_char(start),
146 });
147 }
148 }
149 true
150 } else {
151 number_str.push(first_char);
152 false
153 };
154
155 number_str += &self.eat_while(None, |ch| ch.is_ascii_digit());
156
157 let end = self.position;
158
159 let bigint_result = BigInt::from_str(&number_str);
160 let integer = match bigint_result {
161 Ok(bigint) => {
162 if bigint > self.max_integer {
163 return Err(LexerError::IntegerLiteralTooLarge {
164 span: Span::inclusive(start, end),
165 limit: self.max_integer.to_string(),
166 });
167 }
168 let big_uint = bigint.magnitude();
169 let field = FieldElement::from_be_bytes_reduce(&big_uint.to_bytes_be());
170 if is_negative { -field } else { field }
171 }
172 Err(_) => {
173 return Err(LexerError::InvalidIntegerLiteral {
174 span: Span::inclusive(start, end),
175 found: number_str,
176 });
177 }
178 };
179
180 Ok(Token::Int(integer).into_span(start, end))
181 }
182
183 fn eat_while<F: Fn(char) -> bool>(
184 &mut self,
185 initial_char: Option<char>,
186 predicate: F,
187 ) -> String {
188 let mut word = String::new();
192 if let Some(init_char) = initial_char {
193 word.push(init_char);
194 }
195
196 while let Some(peek_char) = self.peek_char() {
198 if !predicate(peek_char) {
202 return word;
203 }
204 word.push(peek_char);
205
206 self.next_char();
208 }
209
210 word
211 }
212
213 fn single_char_token(&self, token: Token) -> SpannedTokenResult {
214 Ok(token.into_single_span(self.position))
215 }
216
217 fn next_char(&mut self) -> Option<char> {
218 let (position, ch) = self.chars.next()?;
219 self.position = position as u32;
220 Some(ch)
221 }
222
223 fn peek_char(&self) -> Option<char> {
224 self.chars.clone().next().map(|(_, ch)| ch)
225 }
226}
227
228type SpannedTokenResult = Result<SpannedToken, LexerError>;
229
230#[derive(Debug, Error)]
231pub(crate) enum LexerError {
232 #[error("Unexpected character: {char:?}")]
233 UnexpectedCharacter { char: char, span: Span },
234 #[error("Invalid integer literal")]
235 InvalidIntegerLiteral { span: Span, found: String },
236 #[error("Integer literal too large")]
237 IntegerLiteralTooLarge { span: Span, limit: String },
238}
239
240impl LexerError {
241 pub(super) fn span(&self) -> Span {
242 use LexerError::*;
243 match self {
244 UnexpectedCharacter { span, .. } => *span,
245 InvalidIntegerLiteral { span, .. } => *span,
246 IntegerLiteralTooLarge { span, .. } => *span,
247 }
248 }
249}