1use std::str::{CharIndices, FromStr};
2
3use acir_field::{AcirField, FieldElement};
4
5use noirc_span::{Position, Span};
6use num_bigint::BigInt;
7use num_traits::One;
8use thiserror::Error;
9
10use crate::parser::token::Keyword;
11
12use super::token::{SpannedToken, Token};
13
14pub(super) struct Lexer<'a> {
15 chars: CharIndices<'a>,
16 position: Position,
17 done: bool,
18 max_integer: BigInt,
19}
20
21impl<'a> Lexer<'a> {
22 pub(super) fn new(src: &'a str) -> Self {
23 Lexer {
24 chars: src.char_indices(),
25 position: 0,
26 done: false,
27 max_integer: BigInt::from_biguint(num_bigint::Sign::Plus, FieldElement::modulus()) - BigInt::one(),
29 }
30 }
31
32 pub(super) fn next_token(&mut self) -> SpannedTokenResult {
33 let ch = match self.next_char() {
34 Some(ch) => ch,
35 None => {
36 self.done = true;
37 return Ok(Token::Eof.into_single_span(self.position));
38 }
39 };
40
41 match ch {
42 ch if ch.is_ascii_whitespace() => {
43 while let Some(char) = self.peek_char() {
44 if char.is_ascii_whitespace() {
45 self.next_char();
46 } else {
47 break;
48 }
49 }
50 self.next_token()
51 }
52 '/' if self.peek_char() == Some('/') => {
53 while let Some(char) = self.next_char() {
54 if char == '\n' {
55 break;
56 }
57 }
58 self.next_token()
59 }
60 '(' => self.single_char_token(Token::LeftParen),
61 ')' => self.single_char_token(Token::RightParen),
62 '[' => self.single_char_token(Token::LeftBracket),
63 ']' => self.single_char_token(Token::RightBracket),
64 ',' => self.single_char_token(Token::Comma),
65 ':' => self.single_char_token(Token::Colon),
66 ';' => self.single_char_token(Token::Semicolon),
67 '+' => self.single_char_token(Token::Plus),
68 '-' if self.peek_char().is_none_or(|char| !char.is_ascii_digit()) => {
69 self.single_char_token(Token::Minus)
70 }
71 '*' => self.single_char_token(Token::Star),
72 '=' => self.single_char_token(Token::Equal),
73 'b' | 'w' if self.peek_char().is_some_and(|char| char.is_ascii_digit()) => {
74 let start = self.position;
75
76 let digits = self.eat_while(None, |ch| ch.is_ascii_digit());
79 let end = self.position;
80
81 match digits.parse::<u32>() {
83 Ok(value) => {
84 let token =
85 if ch == 'w' { Token::Witness(value) } else { Token::Block(value) };
86 Ok(token.into_span(start, end))
87 }
88 Err(_) => Err(LexerError::InvalidIntegerLiteral {
89 span: Span::inclusive(start, end),
90 found: digits,
91 }),
92 }
93 }
94 '-' | '0'..='9' => self.eat_integer(ch),
95 ch if ch.is_ascii_alphabetic() => self.eat_word(ch),
96 ch => Err(LexerError::UnexpectedCharacter {
97 char: ch,
98 span: Span::single_char(self.position),
99 }),
100 }
101 }
102
103 fn eat_word(&mut self, initial_char: char) -> SpannedTokenResult {
104 let (start, word, end) = self.lex_word(initial_char);
105 self.lookup_word_token(word, start, end)
106 }
107
108 fn lex_word(&mut self, initial_char: char) -> (Position, String, Position) {
109 let start = self.position;
110 let word = self.eat_while(Some(initial_char), |ch| {
111 ch.is_ascii_alphabetic() || ch.is_numeric() || ch == '_'
112 });
113 (start, word, self.position)
114 }
115
116 fn lookup_word_token(
117 &self,
118 word: String,
119 start: Position,
120 end: Position,
121 ) -> SpannedTokenResult {
122 if let Some(keyword_token) = Keyword::lookup_keyword(&word) {
124 return Ok(keyword_token.into_span(start, end));
125 }
126
127 let ident_token = Token::Ident(word);
129 Ok(ident_token.into_span(start, end))
130 }
131
132 fn eat_integer(&mut self, first_char: char) -> SpannedTokenResult {
133 let start = self.position;
134 let mut number_str = String::new();
135
136 let is_negative = if first_char == '-' {
137 match self.peek_char() {
139 Some(ch) if ch.is_ascii_digit() => {
140 self.next_char();
142 number_str.push('-');
143 number_str.push(ch);
144 }
145 _ => {
146 return Err(LexerError::UnexpectedCharacter {
147 char: '-',
148 span: Span::single_char(start),
149 });
150 }
151 }
152 true
153 } else {
154 number_str.push(first_char);
155 false
156 };
157
158 number_str += &self.eat_while(None, |ch| ch.is_ascii_digit());
159
160 let end = self.position;
161
162 let bigint_result = BigInt::from_str(&number_str);
163 let integer = match bigint_result {
164 Ok(bigint) => {
165 if bigint > self.max_integer {
166 return Err(LexerError::IntegerLiteralTooLarge {
167 span: Span::inclusive(start, end),
168 limit: self.max_integer.to_string(),
169 });
170 }
171 let big_uint = bigint.magnitude();
172 let field = FieldElement::from_be_bytes_reduce(&big_uint.to_bytes_be());
173 if is_negative { -field } else { field }
174 }
175 Err(_) => {
176 return Err(LexerError::InvalidIntegerLiteral {
177 span: Span::inclusive(start, end),
178 found: number_str,
179 });
180 }
181 };
182
183 Ok(Token::Int(integer).into_span(start, end))
184 }
185
186 fn eat_while<F: Fn(char) -> bool>(
187 &mut self,
188 initial_char: Option<char>,
189 predicate: F,
190 ) -> String {
191 let mut word = String::new();
195 if let Some(init_char) = initial_char {
196 word.push(init_char);
197 }
198
199 while let Some(peek_char) = self.peek_char() {
201 if !predicate(peek_char) {
205 return word;
206 }
207 word.push(peek_char);
208
209 self.next_char();
211 }
212
213 word
214 }
215
216 fn single_char_token(&self, token: Token) -> SpannedTokenResult {
217 Ok(token.into_single_span(self.position))
218 }
219
220 fn next_char(&mut self) -> Option<char> {
221 let (position, ch) = self.chars.next()?;
222 self.position = position as u32;
223 Some(ch)
224 }
225
226 fn peek_char(&self) -> Option<char> {
227 self.chars.clone().next().map(|(_, ch)| ch)
228 }
229}
230
231type SpannedTokenResult = Result<SpannedToken, LexerError>;
232
233#[derive(Debug, Error)]
234pub(crate) enum LexerError {
235 #[error("Unexpected character: {char:?}")]
236 UnexpectedCharacter { char: char, span: Span },
237 #[error("Invalid integer literal")]
238 InvalidIntegerLiteral { span: Span, found: String },
239 #[error("Integer literal too large")]
240 IntegerLiteralTooLarge { span: Span, limit: String },
241}
242
243impl LexerError {
244 pub(super) fn span(&self) -> Span {
245 use LexerError::*;
246 match self {
247 UnexpectedCharacter { span, .. } => *span,
248 InvalidIntegerLiteral { span, .. } => *span,
249 IntegerLiteralTooLarge { span, .. } => *span,
250 }
251 }
252}