Skip to main content

proc_macro2/
parse.rs

1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3use crate::fallback::{
4    self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream,
5    TokenStreamBuilder,
6};
7use crate::{Delimiter, Punct, Spacing, TokenTree};
8use core::char;
9use core::str::{Bytes, CharIndices, Chars};
10
11#[derive(Copy, Clone, Eq, PartialEq)]
12pub(crate) struct Cursor<'a> {
13    pub(crate) rest: &'a str,
14    #[cfg(span_locations)]
15    pub(crate) off: u32,
16}
17
18impl<'a> Cursor<'a> {
19    pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> {
20        let (_front, rest) = self.rest.split_at(bytes);
21        Cursor {
22            rest,
23            #[cfg(span_locations)]
24            off: self.off + _front.chars().count() as u32,
25        }
26    }
27
28    pub(crate) fn starts_with(&self, s: &str) -> bool {
29        self.rest.starts_with(s)
30    }
31
32    pub(crate) fn starts_with_char(&self, ch: char) -> bool {
33        self.rest.starts_with(ch)
34    }
35
36    pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
37    where
38        Pattern: FnMut(char) -> bool,
39    {
40        self.rest.starts_with(f)
41    }
42
43    pub(crate) fn is_empty(&self) -> bool {
44        self.rest.is_empty()
45    }
46
47    fn len(&self) -> usize {
48        self.rest.len()
49    }
50
51    fn as_bytes(&self) -> &'a [u8] {
52        self.rest.as_bytes()
53    }
54
55    fn bytes(&self) -> Bytes<'a> {
56        self.rest.bytes()
57    }
58
59    fn chars(&self) -> Chars<'a> {
60        self.rest.chars()
61    }
62
63    fn char_indices(&self) -> CharIndices<'a> {
64        self.rest.char_indices()
65    }
66
67    fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
68        if self.starts_with(tag) {
69            Ok(self.advance(tag.len()))
70        } else {
71            Err(Reject)
72        }
73    }
74}
75
76pub(crate) struct Reject;
77type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
78
79fn skip_whitespace(input: Cursor) -> Cursor {
80    let mut s = input;
81
82    while !s.is_empty() {
83        let byte = s.as_bytes()[0];
84        if byte == b'/' {
85            if s.starts_with("//")
86                && (!s.starts_with("///") || s.starts_with("////"))
87                && !s.starts_with("//!")
88            {
89                let (cursor, _) = take_until_newline_or_eof(s);
90                s = cursor;
91                continue;
92            } else if s.starts_with("/**/") {
93                s = s.advance(4);
94                continue;
95            } else if s.starts_with("/*")
96                && (!s.starts_with("/**") || s.starts_with("/***"))
97                && !s.starts_with("/*!")
98            {
99                match block_comment(s) {
100                    Ok((rest, _)) => {
101                        s = rest;
102                        continue;
103                    }
104                    Err(Reject) => return s,
105                }
106            }
107        }
108        match byte {
109            b' ' | 0x09..=0x0d => {
110                s = s.advance(1);
111                continue;
112            }
113            b if b.is_ascii() => {}
114            _ => {
115                let ch = s.chars().next().unwrap();
116                if is_whitespace(ch) {
117                    s = s.advance(ch.len_utf8());
118                    continue;
119                }
120            }
121        }
122        return s;
123    }
124    s
125}
126
127fn block_comment(input: Cursor) -> PResult<&str> {
128    if !input.starts_with("/*") {
129        return Err(Reject);
130    }
131
132    let mut depth = 0usize;
133    let bytes = input.as_bytes();
134    let mut i = 0usize;
135    let upper = bytes.len() - 1;
136
137    while i < upper {
138        if bytes[i] == b'/' && bytes[i + 1] == b'*' {
139            depth += 1;
140            i += 1; // eat '*'
141        } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
142            depth -= 1;
143            if depth == 0 {
144                return Ok((input.advance(i + 2), &input.rest[..i + 2]));
145            }
146            i += 1; // eat '/'
147        }
148        i += 1;
149    }
150
151    Err(Reject)
152}
153
154fn is_whitespace(ch: char) -> bool {
155    // Rust treats left-to-right mark and right-to-left mark as whitespace
156    ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
157}
158
159fn word_break(input: Cursor) -> Result<Cursor, Reject> {
160    match input.chars().next() {
161        Some(ch) if is_ident_continue(ch) => Err(Reject),
162        Some(_) | None => Ok(input),
163    }
164}
165
166// Rustc's representation of a macro expansion error in expression position or
167// type position.
168const ERROR: &str = "(/*ERROR*/)";
169
170pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
171    let mut trees = TokenStreamBuilder::new();
172    let mut stack = Vec::new();
173
174    loop {
175        input = skip_whitespace(input);
176
177        if let Ok((rest, ())) = doc_comment(input, &mut trees) {
178            input = rest;
179            continue;
180        }
181
182        #[cfg(span_locations)]
183        let lo = input.off;
184
185        let first = match input.bytes().next() {
186            Some(first) => first,
187            None => match stack.last() {
188                None => return Ok(trees.build()),
189                #[cfg(span_locations)]
190                Some((lo, _frame)) => {
191                    return Err(LexError {
192                        span: Span { lo: *lo, hi: *lo },
193                    })
194                }
195                #[cfg(not(span_locations))]
196                Some(_frame) => return Err(LexError { span: Span {} }),
197            },
198        };
199
200        if let Some(open_delimiter) = match first {
201            b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis),
202            b'[' => Some(Delimiter::Bracket),
203            b'{' => Some(Delimiter::Brace),
204            _ => None,
205        } {
206            input = input.advance(1);
207            let frame = (open_delimiter, trees);
208            #[cfg(span_locations)]
209            let frame = (lo, frame);
210            stack.push(frame);
211            trees = TokenStreamBuilder::new();
212        } else if let Some(close_delimiter) = match first {
213            b')' => Some(Delimiter::Parenthesis),
214            b']' => Some(Delimiter::Bracket),
215            b'}' => Some(Delimiter::Brace),
216            _ => None,
217        } {
218            let frame = match stack.pop() {
219                Some(frame) => frame,
220                None => return Err(lex_error(input)),
221            };
222            #[cfg(span_locations)]
223            let (lo, frame) = frame;
224            let (open_delimiter, outer) = frame;
225            if open_delimiter != close_delimiter {
226                return Err(lex_error(input));
227            }
228            input = input.advance(1);
229            let mut g = Group::new(open_delimiter, trees.build());
230            g.set_span(Span {
231                #[cfg(span_locations)]
232                lo,
233                #[cfg(span_locations)]
234                hi: input.off,
235            });
236            trees = outer;
237            trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
238        } else {
239            let (rest, mut tt) = match leaf_token(input) {
240                Ok((rest, tt)) => (rest, tt),
241                Err(Reject) => return Err(lex_error(input)),
242            };
243            tt.set_span(crate::Span::_new_fallback(Span {
244                #[cfg(span_locations)]
245                lo,
246                #[cfg(span_locations)]
247                hi: rest.off,
248            }));
249            trees.push_token_from_parser(tt);
250            input = rest;
251        }
252    }
253}
254
255fn lex_error(cursor: Cursor) -> LexError {
256    #[cfg(not(span_locations))]
257    let _ = cursor;
258    LexError {
259        span: Span {
260            #[cfg(span_locations)]
261            lo: cursor.off,
262            #[cfg(span_locations)]
263            hi: cursor.off,
264        },
265    }
266}
267
268fn leaf_token(input: Cursor) -> PResult<TokenTree> {
269    if let Ok((input, l)) = literal(input) {
270        // must be parsed before ident
271        Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
272    } else if let Ok((input, p)) = punct(input) {
273        Ok((input, TokenTree::Punct(p)))
274    } else if let Ok((input, i)) = ident(input) {
275        Ok((input, TokenTree::Ident(i)))
276    } else if input.starts_with(ERROR) {
277        let rest = input.advance(ERROR.len());
278        let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned()));
279        Ok((rest, TokenTree::Literal(repr)))
280    } else {
281        Err(Reject)
282    }
283}
284
285fn ident(input: Cursor) -> PResult<crate::Ident> {
286    if [
287        "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
288    ]
289    .iter()
290    .any(|prefix| input.starts_with(prefix))
291    {
292        Err(Reject)
293    } else {
294        ident_any(input)
295    }
296}
297
298fn ident_any(input: Cursor) -> PResult<crate::Ident> {
299    let raw = input.starts_with("r#");
300    let rest = input.advance((raw as usize) << 1);
301
302    let (rest, sym) = ident_not_raw(rest)?;
303
304    if !raw {
305        let ident =
306            crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site()));
307        return Ok((rest, ident));
308    }
309
310    match sym {
311        "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
312        _ => {}
313    }
314
315    let ident =
316        crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site()));
317    Ok((rest, ident))
318}
319
320fn ident_not_raw(input: Cursor) -> PResult<&str> {
321    let mut chars = input.char_indices();
322
323    match chars.next() {
324        Some((_, ch)) if is_ident_start(ch) => {}
325        _ => return Err(Reject),
326    }
327
328    let mut end = input.len();
329    for (i, ch) in chars {
330        if !is_ident_continue(ch) {
331            end = i;
332            break;
333        }
334    }
335
336    Ok((input.advance(end), &input.rest[..end]))
337}
338
339pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
340    let rest = literal_nocapture(input)?;
341    let end = input.len() - rest.len();
342    Ok((rest, Literal::_new(input.rest[..end].to_string())))
343}
344
345fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
346    if let Ok(ok) = string(input) {
347        Ok(ok)
348    } else if let Ok(ok) = byte_string(input) {
349        Ok(ok)
350    } else if let Ok(ok) = c_string(input) {
351        Ok(ok)
352    } else if let Ok(ok) = byte(input) {
353        Ok(ok)
354    } else if let Ok(ok) = character(input) {
355        Ok(ok)
356    } else if let Ok(ok) = float(input) {
357        Ok(ok)
358    } else if let Ok(ok) = int(input) {
359        Ok(ok)
360    } else {
361        Err(Reject)
362    }
363}
364
365fn literal_suffix(input: Cursor) -> Cursor {
366    match ident_not_raw(input) {
367        Ok((input, _)) => input,
368        Err(Reject) => input,
369    }
370}
371
372fn string(input: Cursor) -> Result<Cursor, Reject> {
373    if let Ok(input) = input.parse("\"") {
374        cooked_string(input)
375    } else if let Ok(input) = input.parse("r") {
376        raw_string(input)
377    } else {
378        Err(Reject)
379    }
380}
381
382fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
383    let mut chars = input.char_indices();
384
385    while let Some((i, ch)) = chars.next() {
386        match ch {
387            '"' => {
388                let input = input.advance(i + 1);
389                return Ok(literal_suffix(input));
390            }
391            '\r' => match chars.next() {
392                Some((_, '\n')) => {}
393                _ => break,
394            },
395            '\\' => match chars.next() {
396                Some((_, 'x')) => {
397                    backslash_x_char(&mut chars)?;
398                }
399                Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
400                Some((_, 'u')) => {
401                    backslash_u(&mut chars)?;
402                }
403                Some((newline, ch @ ('\n' | '\r'))) => {
404                    input = input.advance(newline + 1);
405                    trailing_backslash(&mut input, ch as u8)?;
406                    chars = input.char_indices();
407                }
408                _ => break,
409            },
410            _ch => {}
411        }
412    }
413    Err(Reject)
414}
415
416fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
417    let (input, delimiter) = delimiter_of_raw_string(input)?;
418    let mut bytes = input.bytes().enumerate();
419    while let Some((i, byte)) = bytes.next() {
420        match byte {
421            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
422                let rest = input.advance(i + 1 + delimiter.len());
423                return Ok(literal_suffix(rest));
424            }
425            b'\r' => match bytes.next() {
426                Some((_, b'\n')) => {}
427                _ => break,
428            },
429            _ => {}
430        }
431    }
432    Err(Reject)
433}
434
435fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
436    if let Ok(input) = input.parse("b\"") {
437        cooked_byte_string(input)
438    } else if let Ok(input) = input.parse("br") {
439        raw_byte_string(input)
440    } else {
441        Err(Reject)
442    }
443}
444
445fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
446    let mut bytes = input.bytes().enumerate();
447    while let Some((offset, b)) = bytes.next() {
448        match b {
449            b'"' => {
450                let input = input.advance(offset + 1);
451                return Ok(literal_suffix(input));
452            }
453            b'\r' => match bytes.next() {
454                Some((_, b'\n')) => {}
455                _ => break,
456            },
457            b'\\' => match bytes.next() {
458                Some((_, b'x')) => {
459                    backslash_x_byte(&mut bytes)?;
460                }
461                Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
462                Some((newline, b @ (b'\n' | b'\r'))) => {
463                    input = input.advance(newline + 1);
464                    trailing_backslash(&mut input, b)?;
465                    bytes = input.bytes().enumerate();
466                }
467                _ => break,
468            },
469            b if b.is_ascii() => {}
470            _ => break,
471        }
472    }
473    Err(Reject)
474}
475
476fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
477    for (i, byte) in input.bytes().enumerate() {
478        match byte {
479            b'"' => {
480                if i > 255 {
481                    // https://github.com/rust-lang/rust/pull/95251
482                    return Err(Reject);
483                }
484                return Ok((input.advance(i + 1), &input.rest[..i]));
485            }
486            b'#' => {}
487            _ => break,
488        }
489    }
490    Err(Reject)
491}
492
493fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
494    let (input, delimiter) = delimiter_of_raw_string(input)?;
495    let mut bytes = input.bytes().enumerate();
496    while let Some((i, byte)) = bytes.next() {
497        match byte {
498            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
499                let rest = input.advance(i + 1 + delimiter.len());
500                return Ok(literal_suffix(rest));
501            }
502            b'\r' => match bytes.next() {
503                Some((_, b'\n')) => {}
504                _ => break,
505            },
506            other => {
507                if !other.is_ascii() {
508                    break;
509                }
510            }
511        }
512    }
513    Err(Reject)
514}
515
516fn c_string(input: Cursor) -> Result<Cursor, Reject> {
517    if let Ok(input) = input.parse("c\"") {
518        cooked_c_string(input)
519    } else if let Ok(input) = input.parse("cr") {
520        raw_c_string(input)
521    } else {
522        Err(Reject)
523    }
524}
525
526fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
527    let (input, delimiter) = delimiter_of_raw_string(input)?;
528    let mut bytes = input.bytes().enumerate();
529    while let Some((i, byte)) = bytes.next() {
530        match byte {
531            b'"' if input.rest[i + 1..].starts_with(delimiter) => {
532                let rest = input.advance(i + 1 + delimiter.len());
533                return Ok(literal_suffix(rest));
534            }
535            b'\r' => match bytes.next() {
536                Some((_, b'\n')) => {}
537                _ => break,
538            },
539            b'\0' => break,
540            _ => {}
541        }
542    }
543    Err(Reject)
544}
545
546fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
547    let mut chars = input.char_indices();
548
549    while let Some((i, ch)) = chars.next() {
550        match ch {
551            '"' => {
552                let input = input.advance(i + 1);
553                return Ok(literal_suffix(input));
554            }
555            '\r' => match chars.next() {
556                Some((_, '\n')) => {}
557                _ => break,
558            },
559            '\\' => match chars.next() {
560                Some((_, 'x')) => {
561                    backslash_x_nonzero(&mut chars)?;
562                }
563                Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
564                Some((_, 'u')) => {
565                    if backslash_u(&mut chars)? == '\0' {
566                        break;
567                    }
568                }
569                Some((newline, ch @ ('\n' | '\r'))) => {
570                    input = input.advance(newline + 1);
571                    trailing_backslash(&mut input, ch as u8)?;
572                    chars = input.char_indices();
573                }
574                _ => break,
575            },
576            '\0' => break,
577            _ch => {}
578        }
579    }
580    Err(Reject)
581}
582
583fn byte(input: Cursor) -> Result<Cursor, Reject> {
584    let input = input.parse("b'")?;
585    let mut bytes = input.bytes().enumerate();
586    let ok = match bytes.next().map(|(_, b)| b) {
587        Some(b'\\') => match bytes.next().map(|(_, b)| b) {
588            Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
589            Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
590            _ => false,
591        },
592        b => b.is_some(),
593    };
594    if !ok {
595        return Err(Reject);
596    }
597    let (offset, _) = bytes.next().ok_or(Reject)?;
598    if !input.chars().as_str().is_char_boundary(offset) {
599        return Err(Reject);
600    }
601    let input = input.advance(offset).parse("'")?;
602    Ok(literal_suffix(input))
603}
604
605fn character(input: Cursor) -> Result<Cursor, Reject> {
606    let input = input.parse("'")?;
607    let mut chars = input.char_indices();
608    let ok = match chars.next().map(|(_, ch)| ch) {
609        Some('\\') => match chars.next().map(|(_, ch)| ch) {
610            Some('x') => backslash_x_char(&mut chars).is_ok(),
611            Some('u') => backslash_u(&mut chars).is_ok(),
612            Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
613            _ => false,
614        },
615        ch => ch.is_some(),
616    };
617    if !ok {
618        return Err(Reject);
619    }
620    let (idx, _) = chars.next().ok_or(Reject)?;
621    let input = input.advance(idx).parse("'")?;
622    Ok(literal_suffix(input))
623}
624
625macro_rules! next_ch {
626    ($chars:ident @ $pat:pat) => {
627        match $chars.next() {
628            Some((_, ch)) => match ch {
629                $pat => ch,
630                _ => return Err(Reject),
631            },
632            None => return Err(Reject),
633        }
634    };
635}
636
637fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
638where
639    I: Iterator<Item = (usize, char)>,
640{
641    next_ch!(chars @ '0'..='7');
642    next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
643    Ok(())
644}
645
646fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
647where
648    I: Iterator<Item = (usize, u8)>,
649{
650    next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
651    next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
652    Ok(())
653}
654
655fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
656where
657    I: Iterator<Item = (usize, char)>,
658{
659    let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
660    let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
661    if first == '0' && second == '0' {
662        Err(Reject)
663    } else {
664        Ok(())
665    }
666}
667
668fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
669where
670    I: Iterator<Item = (usize, char)>,
671{
672    next_ch!(chars @ '{');
673    let mut value = 0;
674    let mut len = 0;
675    for (_, ch) in chars {
676        let digit = match ch {
677            '0'..='9' => ch as u8 - b'0',
678            'a'..='f' => 10 + ch as u8 - b'a',
679            'A'..='F' => 10 + ch as u8 - b'A',
680            '_' if len > 0 => continue,
681            '}' if len > 0 => return char::from_u32(value).ok_or(Reject),
682            _ => break,
683        };
684        if len == 6 {
685            break;
686        }
687        value *= 0x10;
688        value += u32::from(digit);
689        len += 1;
690    }
691    Err(Reject)
692}
693
694fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
695    let mut whitespace = input.bytes().enumerate();
696    loop {
697        if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
698            return Err(Reject);
699        }
700        match whitespace.next() {
701            Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
702                last = b;
703            }
704            Some((offset, _)) => {
705                *input = input.advance(offset);
706                return Ok(());
707            }
708            None => return Err(Reject),
709        }
710    }
711}
712
713fn float(input: Cursor) -> Result<Cursor, Reject> {
714    let mut rest = float_digits(input)?;
715    if let Some(ch) = rest.chars().next() {
716        if is_ident_start(ch) {
717            rest = ident_not_raw(rest)?.0;
718        }
719    }
720    word_break(rest)
721}
722
723fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
724    let mut chars = input.chars().peekable();
725    match chars.next() {
726        Some(ch) if '0' <= ch && ch <= '9' => {}
727        _ => return Err(Reject),
728    }
729
730    let mut len = 1;
731    let mut has_dot = false;
732    let mut has_exp = false;
733    while let Some(&ch) = chars.peek() {
734        match ch {
735            '0'..='9' | '_' => {
736                chars.next();
737                len += 1;
738            }
739            '.' => {
740                if has_dot {
741                    break;
742                }
743                chars.next();
744                if chars
745                    .peek()
746                    .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
747                {
748                    return Err(Reject);
749                }
750                len += 1;
751                has_dot = true;
752            }
753            'e' | 'E' => {
754                chars.next();
755                len += 1;
756                has_exp = true;
757                break;
758            }
759            _ => break,
760        }
761    }
762
763    if !(has_dot || has_exp) {
764        return Err(Reject);
765    }
766
767    if has_exp {
768        let token_before_exp = if has_dot {
769            Ok(input.advance(len - 1))
770        } else {
771            Err(Reject)
772        };
773        let mut has_sign = false;
774        let mut has_exp_value = false;
775        while let Some(&ch) = chars.peek() {
776            match ch {
777                '+' | '-' => {
778                    if has_exp_value {
779                        break;
780                    }
781                    if has_sign {
782                        return token_before_exp;
783                    }
784                    chars.next();
785                    len += 1;
786                    has_sign = true;
787                }
788                '0'..='9' => {
789                    chars.next();
790                    len += 1;
791                    has_exp_value = true;
792                }
793                '_' => {
794                    chars.next();
795                    len += 1;
796                }
797                _ => break,
798            }
799        }
800        if !has_exp_value {
801            return token_before_exp;
802        }
803    }
804
805    Ok(input.advance(len))
806}
807
808fn int(input: Cursor) -> Result<Cursor, Reject> {
809    let mut rest = digits(input)?;
810    if let Some(ch) = rest.chars().next() {
811        if is_ident_start(ch) {
812            rest = ident_not_raw(rest)?.0;
813        }
814    }
815    word_break(rest)
816}
817
818fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
819    let base = if input.starts_with("0x") {
820        input = input.advance(2);
821        16
822    } else if input.starts_with("0o") {
823        input = input.advance(2);
824        8
825    } else if input.starts_with("0b") {
826        input = input.advance(2);
827        2
828    } else {
829        10
830    };
831
832    let mut len = 0;
833    let mut empty = true;
834    for b in input.bytes() {
835        match b {
836            b'0'..=b'9' => {
837                let digit = (b - b'0') as u64;
838                if digit >= base {
839                    return Err(Reject);
840                }
841            }
842            b'a'..=b'f' => {
843                let digit = 10 + (b - b'a') as u64;
844                if digit >= base {
845                    break;
846                }
847            }
848            b'A'..=b'F' => {
849                let digit = 10 + (b - b'A') as u64;
850                if digit >= base {
851                    break;
852                }
853            }
854            b'_' => {
855                if empty && base == 10 {
856                    return Err(Reject);
857                }
858                len += 1;
859                continue;
860            }
861            _ => break,
862        }
863        len += 1;
864        empty = false;
865    }
866    if empty {
867        Err(Reject)
868    } else {
869        Ok(input.advance(len))
870    }
871}
872
873fn punct(input: Cursor) -> PResult<Punct> {
874    let (rest, ch) = punct_char(input)?;
875    if ch == '\'' {
876        let (after_lifetime, _ident) = ident_any(rest)?;
877        if after_lifetime.starts_with_char('\'')
878            || (after_lifetime.starts_with_char('#') && !rest.starts_with("r#"))
879        {
880            Err(Reject)
881        } else {
882            Ok((rest, Punct::new('\'', Spacing::Joint)))
883        }
884    } else {
885        let kind = match punct_char(rest) {
886            Ok(_) => Spacing::Joint,
887            Err(Reject) => Spacing::Alone,
888        };
889        Ok((rest, Punct::new(ch, kind)))
890    }
891}
892
893fn punct_char(input: Cursor) -> PResult<char> {
894    if input.starts_with("//") || input.starts_with("/*") {
895        // Do not accept `/` of a comment as a punct.
896        return Err(Reject);
897    }
898
899    let mut chars = input.chars();
900    let first = match chars.next() {
901        Some(ch) => ch,
902        None => {
903            return Err(Reject);
904        }
905    };
906    let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
907    if recognized.contains(first) {
908        Ok((input.advance(first.len_utf8()), first))
909    } else {
910        Err(Reject)
911    }
912}
913
914fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
915    #[cfg(span_locations)]
916    let lo = input.off;
917    let (rest, (comment, inner)) = doc_comment_contents(input)?;
918    let fallback_span = Span {
919        #[cfg(span_locations)]
920        lo,
921        #[cfg(span_locations)]
922        hi: rest.off,
923    };
924    let span = crate::Span::_new_fallback(fallback_span);
925
926    let mut scan_for_bare_cr = comment;
927    while let Some(cr) = scan_for_bare_cr.find('\r') {
928        let rest = &scan_for_bare_cr[cr + 1..];
929        if !rest.starts_with('\n') {
930            return Err(Reject);
931        }
932        scan_for_bare_cr = rest;
933    }
934
935    let mut pound = Punct::new('#', Spacing::Alone);
936    pound.set_span(span);
937    trees.push_token_from_parser(TokenTree::Punct(pound));
938
939    if inner {
940        let mut bang = Punct::new('!', Spacing::Alone);
941        bang.set_span(span);
942        trees.push_token_from_parser(TokenTree::Punct(bang));
943    }
944
945    let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span));
946    let mut equal = Punct::new('=', Spacing::Alone);
947    equal.set_span(span);
948    let mut literal = crate::Literal::_new_fallback(Literal::string(comment));
949    literal.set_span(span);
950    let mut bracketed = TokenStreamBuilder::with_capacity(3);
951    bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
952    bracketed.push_token_from_parser(TokenTree::Punct(equal));
953    bracketed.push_token_from_parser(TokenTree::Literal(literal));
954    let group = Group::new(Delimiter::Bracket, bracketed.build());
955    let mut group = crate::Group::_new_fallback(group);
956    group.set_span(span);
957    trees.push_token_from_parser(TokenTree::Group(group));
958
959    Ok((rest, ()))
960}
961
962fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
963    if input.starts_with("//!") {
964        let input = input.advance(3);
965        let (input, s) = take_until_newline_or_eof(input);
966        Ok((input, (s, true)))
967    } else if input.starts_with("/*!") {
968        let (input, s) = block_comment(input)?;
969        Ok((input, (&s[3..s.len() - 2], true)))
970    } else if input.starts_with("///") {
971        let input = input.advance(3);
972        if input.starts_with_char('/') {
973            return Err(Reject);
974        }
975        let (input, s) = take_until_newline_or_eof(input);
976        Ok((input, (s, false)))
977    } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
978        let (input, s) = block_comment(input)?;
979        Ok((input, (&s[3..s.len() - 2], false)))
980    } else {
981        Err(Reject)
982    }
983}
984
985fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
986    let chars = input.char_indices();
987
988    for (i, ch) in chars {
989        if ch == '\n' {
990            return (input.advance(i), &input.rest[..i]);
991        } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
992            return (input.advance(i + 1), &input.rest[..i]);
993        }
994    }
995
996    (input.advance(input.len()), input.rest)
997}