Skip to content

Rewrite macro_rules! parser to not use the MBE engine itself #143070

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 2 additions & 34 deletions compiler/rustc_expand/src/mbe/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,38 +195,6 @@ impl<'dcx> CollectTrackerAndEmitter<'dcx, '_> {
}
}

/// Currently used by macro_rules! compilation to extract a little information from the `Failure`
/// case.
pub(crate) struct FailureForwarder<'matcher> {
expected_token: Option<&'matcher Token>,
}

impl<'matcher> FailureForwarder<'matcher> {
pub(crate) fn new() -> Self {
Self { expected_token: None }
}
}

impl<'matcher> Tracker<'matcher> for FailureForwarder<'matcher> {
type Failure = (Token, u32, &'static str);

fn build_failure(tok: Token, position: u32, msg: &'static str) -> Self::Failure {
(tok, position, msg)
}

fn description() -> &'static str {
"failure-forwarder"
}

fn set_expected_token(&mut self, tok: &'matcher Token) {
self.expected_token = Some(tok);
}

fn get_expected_token(&self) -> Option<&'matcher Token> {
self.expected_token
}
}

pub(super) fn emit_frag_parse_err(
mut e: Diag<'_>,
parser: &Parser<'_>,
Expand Down Expand Up @@ -321,7 +289,7 @@ enum ExplainDocComment {
},
}

pub(super) fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Span) {
fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Span) {
if let Ok(src) = sm.span_to_snippet(span) {
if src.starts_with("///") || src.starts_with("/**") {
err.subdiagnostic(ExplainDocComment::Outer { span });
Expand All @@ -333,7 +301,7 @@ pub(super) fn annotate_doc_comment(err: &mut Diag<'_>, sm: &SourceMap, span: Spa

/// Generates an appropriate parsing failure message. For EOF, this is "unexpected end...". For
/// other tokens, this is "unexpected token...".
pub(super) fn parse_failure_msg(tok: &Token, expected_token: Option<&Token>) -> Cow<'static, str> {
fn parse_failure_msg(tok: &Token, expected_token: Option<&Token>) -> Cow<'static, str> {
if let Some(expected_token) = expected_token {
Cow::from(format!("expected {}, found {}", token_descr(expected_token), token_descr(tok)))
} else {
Expand Down
23 changes: 7 additions & 16 deletions compiler/rustc_expand/src/mbe/macro_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,6 @@
//! stored when entering a macro definition starting from the state in which the meta-variable is
//! bound.
use std::iter;

use rustc_ast::token::{Delimiter, IdentIsRaw, Token, TokenKind};
use rustc_ast::{DUMMY_NODE_ID, NodeId};
use rustc_data_structures::fx::FxHashMap;
Expand Down Expand Up @@ -190,29 +188,22 @@ struct MacroState<'a> {
ops: SmallVec<[KleeneToken; 1]>,
}

/// Checks that meta-variables are used correctly in a macro definition.
/// Checks that meta-variables are used correctly in one rule of a macro definition.
///
/// Arguments:
/// - `psess` is used to emit diagnostics and lints
/// - `node_id` is used to emit lints
/// - `span` is used when no spans are available
/// - `lhses` and `rhses` should have the same length and represent the macro definition
/// - `lhs` and `rhs` represent the rule
pub(super) fn check_meta_variables(
psess: &ParseSess,
node_id: NodeId,
span: Span,
lhses: &[TokenTree],
rhses: &[TokenTree],
lhs: &TokenTree,
rhs: &TokenTree,
) -> Result<(), ErrorGuaranteed> {
if lhses.len() != rhses.len() {
psess.dcx().span_bug(span, "length mismatch between LHSes and RHSes")
}
let mut guar = None;
for (lhs, rhs) in iter::zip(lhses, rhses) {
let mut binders = Binders::default();
check_binders(psess, node_id, lhs, &Stack::Empty, &mut binders, &Stack::Empty, &mut guar);
check_occurrences(psess, node_id, rhs, &Stack::Empty, &binders, &Stack::Empty, &mut guar);
}
let mut binders = Binders::default();
check_binders(psess, node_id, lhs, &Stack::Empty, &mut binders, &Stack::Empty, &mut guar);
check_occurrences(psess, node_id, rhs, &Stack::Empty, &binders, &Stack::Empty, &mut guar);
guar.map_or(Ok(()), Err)
}

Expand Down
2 changes: 0 additions & 2 deletions compiler/rustc_expand/src/mbe/macro_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -536,8 +536,6 @@ impl TtParser {
// The separator matches the current token. Advance past it.
mp.idx += 1;
self.next_mps.push(mp);
} else {
track.set_expected_token(separator);
}
}
&MatcherLoc::SequenceKleeneOpAfterSep { idx_first } => {
Expand Down
206 changes: 52 additions & 154 deletions compiler/rustc_expand/src/mbe/macro_rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@ use rustc_lint_defs::BuiltinLintDiag;
use rustc_lint_defs::builtin::{
RUST_2021_INCOMPATIBLE_OR_PATTERNS, SEMICOLON_IN_EXPRESSIONS_FROM_MACROS,
};
use rustc_parse::parser::{ParseNtResult, Parser, Recovery};
use rustc_parse::exp;
use rustc_parse::parser::{Parser, Recovery};
use rustc_session::Session;
use rustc_session::parse::ParseSess;
use rustc_span::edition::Edition;
use rustc_span::hygiene::Transparency;
use rustc_span::{Ident, MacroRulesNormalizedIdent, Span, kw, sym};
use rustc_span::{Ident, Span, kw, sym};
use tracing::{debug, instrument, trace, trace_span};

use super::macro_parser::{NamedMatches, NamedParseResult};
Expand All @@ -34,8 +35,6 @@ use crate::base::{
SyntaxExtensionKind, TTMacroExpander,
};
use crate::expand::{AstFragment, AstFragmentKind, ensure_complete_parse, parse_ast_fragment};
use crate::mbe::diagnostics::{annotate_doc_comment, parse_failure_msg};
use crate::mbe::macro_parser::NamedMatch::*;
use crate::mbe::macro_parser::{Error, ErrorReported, Failure, MatcherLoc, Success, TtParser};
use crate::mbe::transcribe::transcribe;
use crate::mbe::{self, KleeneOp, macro_check};
Expand Down Expand Up @@ -168,11 +167,6 @@ pub(super) trait Tracker<'matcher> {
fn recovery() -> Recovery {
Recovery::Forbidden
}

fn set_expected_token(&mut self, _tok: &'matcher Token) {}
fn get_expected_token(&self) -> Option<&'matcher Token> {
None
}
}

/// A noop tracker that is used in the hot path of the expansion, has zero overhead thanks to
Expand Down Expand Up @@ -360,11 +354,6 @@ pub(super) fn try_match_macro<'matcher, T: Tracker<'matcher>>(
Err(CanRetry::Yes)
}

// Note that macro-by-example's input is also matched against a token tree:
// $( $lhs:tt => $rhs:tt );+
//
// Holy self-referential!

/// Converts a macro item into a syntax extension.
pub fn compile_declarative_macro(
sess: &Session,
Expand All @@ -390,157 +379,66 @@ pub fn compile_declarative_macro(
};
let dummy_syn_ext = |guar| (mk_syn_ext(Arc::new(DummyExpander(guar))), Vec::new());

let lhs_nm = Ident::new(sym::lhs, span);
let rhs_nm = Ident::new(sym::rhs, span);
let tt_spec = NonterminalKind::TT;
let macro_rules = macro_def.macro_rules;
let exp_sep = if macro_rules { exp!(Semi) } else { exp!(Comma) };

// Parse the macro_rules! invocation

// The pattern that macro_rules matches.
// The grammar for macro_rules! is:
// $( $lhs:tt => $rhs:tt );+
// ...quasiquoting this would be nice.
// These spans won't matter, anyways
let argument_gram = vec![
mbe::TokenTree::Sequence(
DelimSpan::dummy(),
mbe::SequenceRepetition {
tts: vec![
mbe::TokenTree::MetaVarDecl { span, name: lhs_nm, kind: tt_spec },
mbe::TokenTree::token(token::FatArrow, span),
mbe::TokenTree::MetaVarDecl { span, name: rhs_nm, kind: tt_spec },
],
separator: Some(Token::new(
if macro_rules { token::Semi } else { token::Comma },
span,
)),
kleene: mbe::KleeneToken::new(mbe::KleeneOp::OneOrMore, span),
num_captures: 2,
},
),
// to phase into semicolon-termination instead of semicolon-separation
mbe::TokenTree::Sequence(
DelimSpan::dummy(),
mbe::SequenceRepetition {
tts: vec![mbe::TokenTree::token(
if macro_rules { token::Semi } else { token::Comma },
span,
)],
separator: None,
kleene: mbe::KleeneToken::new(mbe::KleeneOp::ZeroOrMore, span),
num_captures: 0,
},
),
];
// Convert it into `MatcherLoc` form.
let argument_gram = mbe::macro_parser::compute_locs(&argument_gram);

let create_parser = || {
let body = macro_def.body.tokens.clone();
Parser::new(&sess.psess, body, rustc_parse::MACRO_ARGUMENTS)
};

let parser = create_parser();
let mut tt_parser =
TtParser::new(Ident::with_dummy_span(if macro_rules { kw::MacroRules } else { kw::Macro }));
let argument_map =
match tt_parser.parse_tt(&mut Cow::Owned(parser), &argument_gram, &mut NoopTracker) {
Success(m) => m,
Failure(()) => {
debug!("failed to parse macro tt");
// The fast `NoopTracker` doesn't have any info on failure, so we need to retry it
// with another one that gives us the information we need.
// For this we need to reclone the macro body as the previous parser consumed it.
let retry_parser = create_parser();

let mut track = diagnostics::FailureForwarder::new();
let parse_result =
tt_parser.parse_tt(&mut Cow::Owned(retry_parser), &argument_gram, &mut track);
let Failure((token, _, msg)) = parse_result else {
unreachable!("matcher returned something other than Failure after retry");
};

let s = parse_failure_msg(&token, track.get_expected_token());
let sp = token.span.substitute_dummy(span);
let mut err = sess.dcx().struct_span_err(sp, s);
err.span_label(sp, msg);
annotate_doc_comment(&mut err, sess.source_map(), sp);
let guar = err.emit();
return dummy_syn_ext(guar);
}
Error(sp, msg) => {
let guar = sess.dcx().span_err(sp.substitute_dummy(span), msg);
return dummy_syn_ext(guar);
}
ErrorReported(guar) => {
return dummy_syn_ext(guar);
}
};
let body = macro_def.body.tokens.clone();
let mut p = Parser::new(&sess.psess, body, rustc_parse::MACRO_ARGUMENTS);

// Don't abort iteration early, so that multiple errors can be reported.
let mut guar = None;
let mut check_emission = |ret: Result<(), ErrorGuaranteed>| guar = guar.or(ret.err());

// Extract the arguments:
let lhses = match &argument_map[&MacroRulesNormalizedIdent::new(lhs_nm)] {
MatchedSeq(s) => s
.iter()
.map(|m| {
if let MatchedSingle(ParseNtResult::Tt(tt)) = m {
let tt = mbe::quoted::parse(
&TokenStream::new(vec![tt.clone()]),
true,
sess,
node_id,
features,
edition,
)
.pop()
.unwrap();
// We don't handle errors here, the driver will abort
// after parsing/expansion. We can report every error in every macro this way.
check_emission(check_lhs_nt_follows(sess, node_id, &tt));
return tt;
}
sess.dcx().span_bug(span, "wrong-structured lhs")
})
.collect::<Vec<mbe::TokenTree>>(),
_ => sess.dcx().span_bug(span, "wrong-structured lhs"),
};
let mut lhses = Vec::new();
let mut rhses = Vec::new();

let rhses = match &argument_map[&MacroRulesNormalizedIdent::new(rhs_nm)] {
MatchedSeq(s) => s
.iter()
.map(|m| {
if let MatchedSingle(ParseNtResult::Tt(tt)) = m {
return mbe::quoted::parse(
&TokenStream::new(vec![tt.clone()]),
false,
sess,
node_id,
features,
edition,
)
.pop()
.unwrap();
}
sess.dcx().span_bug(span, "wrong-structured rhs")
})
.collect::<Vec<mbe::TokenTree>>(),
_ => sess.dcx().span_bug(span, "wrong-structured rhs"),
};

for rhs in &rhses {
check_emission(check_rhs(sess, rhs));
while p.token != token::Eof {
let lhs_tt = p.parse_token_tree();
let lhs_tt = mbe::quoted::parse(
&TokenStream::new(vec![lhs_tt]),
true, // LHS
sess,
node_id,
features,
edition,
)
.pop()
.unwrap();
// We don't handle errors here, the driver will abort after parsing/expansion. We can
// report every error in every macro this way.
check_emission(check_lhs_nt_follows(sess, node_id, &lhs_tt));
check_emission(check_lhs_no_empty_seq(sess, slice::from_ref(&lhs_tt)));
if let Err(e) = p.expect(exp!(FatArrow)) {
return dummy_syn_ext(e.emit());
}
let rhs_tt = p.parse_token_tree();
let rhs_tt = mbe::quoted::parse(
&TokenStream::new(vec![rhs_tt]),
false, // RHS
sess,
node_id,
features,
edition,
)
.pop()
.unwrap();
check_emission(check_rhs(sess, &rhs_tt));
check_emission(macro_check::check_meta_variables(&sess.psess, node_id, &lhs_tt, &rhs_tt));
lhses.push(lhs_tt);
rhses.push(rhs_tt);
if p.token == token::Eof {
break;
}
if let Err(e) = p.expect(exp_sep) {
return dummy_syn_ext(e.emit());
}
}

// Don't abort iteration early, so that errors for multiple lhses can be reported.
for lhs in &lhses {
check_emission(check_lhs_no_empty_seq(sess, slice::from_ref(lhs)));
if lhses.is_empty() {
let guar = sess.dcx().span_err(span, "macros must contain at least one rule");
return dummy_syn_ext(guar);
}

check_emission(macro_check::check_meta_variables(&sess.psess, node_id, span, &lhses, &rhses));

let transparency = find_attr!(attrs, AttributeKind::MacroTransparency(x) => *x)
.unwrap_or(Transparency::fallback(macro_rules));

Expand Down
1 change: 0 additions & 1 deletion compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1777,7 +1777,6 @@ symbols! {
resume,
return_position_impl_trait_in_trait,
return_type_notation,
rhs,
riscv_target_feature,
rlib,
ropi,
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/attributes/crate-type-macro-empty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
#[crate_type = foo!()]
//~^ ERROR cannot find macro `foo` in this scope

macro_rules! foo {} //~ ERROR unexpected end of macro invocation
macro_rules! foo {} //~ ERROR macros must contain at least one rule

fn main() {}
4 changes: 2 additions & 2 deletions tests/ui/attributes/crate-type-macro-empty.stderr
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
error: unexpected end of macro invocation
error: macros must contain at least one rule
--> $DIR/crate-type-macro-empty.rs:5:1
|
LL | macro_rules! foo {}
| ^^^^^^^^^^^^^^^^^^^ missing tokens in macro arguments
| ^^^^^^^^^^^^^^^^^^^

error: cannot find macro `foo` in this scope
--> $DIR/crate-type-macro-empty.rs:2:16
Expand Down
Loading
Loading