Skip to content

Add OperandValue::Uninit to improve lowering of MaybeUninit::uninit #142837

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_ssa/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ pub(crate) fn coerce_unsized_into<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
let (base, info) = match bx.load_operand(src).val {
OperandValue::Pair(base, info) => unsize_ptr(bx, base, src_ty, dst_ty, Some(info)),
OperandValue::Immediate(base) => unsize_ptr(bx, base, src_ty, dst_ty, None),
OperandValue::Ref(..) | OperandValue::ZeroSized => bug!(),
OperandValue::Ref(..) | OperandValue::ZeroSized | OperandValue::Uninit => bug!(),
};
OperandValue::Pair(base, info).store(bx, dst);
}
Expand Down
18 changes: 13 additions & 5 deletions compiler/rustc_codegen_ssa/src/mir/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use rustc_target::callconv::{ArgAbi, CastTarget, FnAbi, PassMode};
use tracing::{debug, info};

use super::operand::OperandRef;
use super::operand::OperandValue::{Immediate, Pair, Ref, ZeroSized};
use super::operand::OperandValue::{Immediate, Pair, Ref, Uninit, ZeroSized};
use super::place::{PlaceRef, PlaceValue};
use super::{CachedLlbb, FunctionCx, LocalRef};
use crate::base::{self, is_call_from_compiler_builtins_to_upstream_monomorphization};
Expand Down Expand Up @@ -527,10 +527,10 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {

PassMode::Direct(_) | PassMode::Pair(..) => {
let op = self.codegen_consume(bx, mir::Place::return_place().as_ref());
if let Ref(place_val) = op.val {
bx.load_from_place(bx.backend_type(op.layout), place_val)
} else {
op.immediate_or_packed_pair(bx)
match op.val {
Uninit => bx.const_undef(bx.immediate_backend_type(op.layout)),
Ref(place_val) => bx.load_from_place(bx.backend_type(op.layout), place_val),
_ => op.immediate_or_packed_pair(bx),
}
}

Expand All @@ -557,6 +557,10 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
place_val.llval
}
ZeroSized => bug!("ZST return value shouldn't be in PassMode::Cast"),
Uninit => {
bx.ret_void();
return;
}
};
load_cast(bx, cast_ty, llslot, self.fn_abi.ret.layout.align.abi)
}
Expand Down Expand Up @@ -1587,6 +1591,10 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
}
_ => bug!("ZST {op:?} wasn't ignored, but was passed with abi {arg:?}"),
},
Uninit => {
let scratch = PlaceRef::alloca(bx, arg.layout);
(scratch.val.llval, scratch.val.align, true)
}
};

if by_ref && !arg.is_indirect() {
Expand Down
3 changes: 3 additions & 0 deletions compiler/rustc_codegen_ssa/src/mir/debuginfo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,9 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
OperandValue::ZeroSized => {
// These never have a value to talk about
}
OperandValue::Uninit => {
// Better not have a useful name
}
},
LocalRef::PendingOperand => {}
}
Expand Down
78 changes: 65 additions & 13 deletions compiler/rustc_codegen_ssa/src/mir/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,14 @@ pub enum OperandValue<V> {
/// `is_zst` on its `Layout` returns `true`. Note however that
/// these values can still require alignment.
ZeroSized,
Uninit,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add some docs here?

Copy link
Member

@scottmcm scottmcm Jun 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed -- this is changing the "well if it's is_backend_immediate then it's OperandValue::Immediate" rule, for example, so it needs to be thought about carefully since it affects all the consumers of OperandValue, potentially. (And thus probably needs those comments on the other variants updated too.)

For example, it's not clear to me why Immediate(cx.const_undef(…)) wouldn't be fine as representing undef for things with is_backend_immediate.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For example, it's not clear to me why Immediate(cx.const_undef(…)) wouldn't be fine as representing undef for things with is_backend_immediate.

I'm not sure what you mean by "fine". It's valid to do so even with this PR, it just doesn't fix the missed optimization. It does improve the IR that we emit, if also combined with the change to MaybeUninit::uninit. But all that it accomplishes is tripping over a different problem in LLVM. It also breaks the manybeuninit-nrvo codegen tests.

This is the diff I applied to try out your idea:

--- a/compiler/rustc_codegen_ssa/src/mir/operand.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/operand.rs
@@ -170,6 +170,13 @@ pub(crate) fn from_const<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
                 OperandValue::Pair(a_llval, b_llval)
             }
             ConstValue::Indirect { alloc_id, offset } => {
+                if val.all_bytes_uninit(bx.tcx()) {
+                    if let BackendRepr::Scalar(_) = layout.backend_repr {
+                        let llval = bx.const_undef(bx.immediate_backend_type(layout));
+                        let val = OperandValue::Immediate(llval);
+                        return OperandRef { val, layout };
+                    }
+                };
                 let alloc = bx.tcx().global_alloc(alloc_id).unwrap_memory();
                 return Self::from_const_alloc(bx, layout, alloc, offset);
             }
}

impl<V: CodegenObject> OperandValue<V> {
pub(crate) fn is_uninit(&self) -> bool {
matches!(self, OperandValue::Uninit)
}

/// Treat this value as a pointer and return the data pointer and
/// optional metadata as backend values.
///
Expand Down Expand Up @@ -100,6 +105,7 @@ impl<V: CodegenObject> OperandValue<V> {
ty: TyAndLayout<'tcx>,
) -> bool {
match self {
OperandValue::Uninit => true,
OperandValue::ZeroSized => ty.is_zst(),
OperandValue::Immediate(_) => cx.is_backend_immediate(ty),
OperandValue::Pair(_, _) => cx.is_backend_scalar_pair(ty),
Expand Down Expand Up @@ -144,6 +150,10 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
) -> Self {
let layout = bx.layout_of(ty);

if val.all_bytes_uninit(bx.tcx()) {
return OperandRef { val: OperandValue::Uninit, layout };
}

let val = match val {
ConstValue::Scalar(x) => {
let BackendRepr::Scalar(scalar) = layout.backend_repr else {
Expand Down Expand Up @@ -442,6 +452,7 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {

// Read the tag/niche-encoded discriminant from memory.
let tag_op = match self.val {
OperandValue::Uninit => bug!("shouldn't load from uninit"),
OperandValue::ZeroSized => bug!(),
OperandValue::Immediate(_) | OperandValue::Pair(_, _) => {
self.extract_field(fx, bx, tag_field.as_usize())
Expand Down Expand Up @@ -591,6 +602,28 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
}

impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, Result<V, abi::Scalar>> {
fn update_uninit<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do these 2 methods do? Could you maybe write a brief doc comment for each?

bx: &mut Bx,
tgt: &mut Result<V, abi::Scalar>,
) {
let to_scalar = tgt.unwrap_err();
let bty = bx.cx().type_from_scalar(to_scalar);
*tgt = Ok(bx.const_undef(bty));
}

fn update<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
bx: &mut Bx,
tgt: &mut Result<V, abi::Scalar>,
src: V,
from_scalar: rustc_abi::Scalar,
) {
let from_bty = bx.cx().type_from_scalar(from_scalar);
let to_scalar = tgt.unwrap_err();
let to_bty = bx.cx().type_from_scalar(to_scalar);
let imm = transmute_immediate(bx, src, from_scalar, from_bty, to_scalar, to_bty);
*tgt = Ok(imm);
}

pub(crate) fn insert_field<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
&mut self,
bx: &mut Bx,
Expand All @@ -614,37 +647,48 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, Result<V, abi::Scalar>> {
(field_layout.is_zst(), field_offset == Size::ZERO)
};

let mut update = |tgt: &mut Result<V, abi::Scalar>, src, from_scalar| {
let from_bty = bx.cx().type_from_scalar(from_scalar);
let to_scalar = tgt.unwrap_err();
let to_bty = bx.cx().type_from_scalar(to_scalar);
let imm = transmute_immediate(bx, src, from_scalar, from_bty, to_scalar, to_bty);
*tgt = Ok(imm);
};

match (operand.val, operand.layout.backend_repr) {
(OperandValue::ZeroSized, _) if expect_zst => {}
(OperandValue::Immediate(v), BackendRepr::Scalar(from_scalar)) => match &mut self.val {
OperandValue::Immediate(val @ Err(_)) if is_zero_offset => {
update(val, v, from_scalar);
Self::update(bx, val, v, from_scalar);
}
OperandValue::Pair(fst @ Err(_), _) if is_zero_offset => {
update(fst, v, from_scalar);
Self::update(bx, fst, v, from_scalar);
}
OperandValue::Pair(_, snd @ Err(_)) if !is_zero_offset => {
update(snd, v, from_scalar);
Self::update(bx, snd, v, from_scalar);
}
_ => bug!("Tried to insert {operand:?} into {v:?}.{f:?} of {self:?}"),
},
(OperandValue::Uninit, BackendRepr::Scalar(_)) => match &mut self.val {
OperandValue::Immediate(val @ Err(_)) if is_zero_offset => {
Self::update_uninit(bx, val);
}
OperandValue::Pair(fst @ Err(_), _) if is_zero_offset => {
Self::update_uninit(bx, fst);
}
OperandValue::Pair(_, snd @ Err(_)) if !is_zero_offset => {
Self::update_uninit(bx, snd);
}
_ => bug!("Tried to insert {operand:?} into {v:?}.{f:?} of {self:?}"),
},
(OperandValue::Pair(a, b), BackendRepr::ScalarPair(from_sa, from_sb)) => {
match &mut self.val {
OperandValue::Pair(fst @ Err(_), snd @ Err(_)) => {
update(fst, a, from_sa);
update(snd, b, from_sb);
Self::update(bx, fst, a, from_sa);
Self::update(bx, snd, b, from_sb);
}
_ => bug!("Tried to insert {operand:?} into {v:?}.{f:?} of {self:?}"),
}
}
(OperandValue::Uninit, BackendRepr::ScalarPair(..)) => match &mut self.val {
OperandValue::Pair(fst @ Err(_), snd @ Err(_)) => {
Self::update_uninit(bx, fst);
Self::update_uninit(bx, snd);
}
_ => bug!("Tried to insert {operand:?} into {v:?}.{f:?} of {self:?}"),
},
_ => bug!("Unsupported operand {operand:?} inserting into {v:?}.{f:?} of {self:?}"),
}
}
Expand All @@ -663,6 +707,7 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, Result<V, abi::Scalar>> {
};

let val = match val {
OperandValue::Uninit => OperandValue::Uninit,
OperandValue::ZeroSized => OperandValue::ZeroSized,
OperandValue::Immediate(v) => OperandValue::Immediate(unwrap(v)),
OperandValue::Pair(a, b) => OperandValue::Pair(unwrap(a), unwrap(b)),
Expand Down Expand Up @@ -739,6 +784,13 @@ impl<'a, 'tcx, V: CodegenObject> OperandValue<V> {
) {
debug!("OperandRef::store: operand={:?}, dest={:?}", self, dest);
match self {
OperandValue::Uninit => {
// Ideally we'd hint to the backend that the destination is deinitialized by the
// store. But in practice the destination is almost always uninit already because
// OperandValue::Uninit is pretty much only produced by MaybeUninit::uninit.
// Attempting to generate a hint by calling memset with undef mostly seems to
// confuse LLVM.
}
OperandValue::ZeroSized => {
// Avoid generating stores of zero-sized values, because the only way to have a
// zero-sized value is through `undef`/`poison`, and the store itself is useless.
Expand Down
27 changes: 9 additions & 18 deletions compiler/rustc_codegen_ssa/src/mir/rvalue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
base::coerce_unsized_into(bx, scratch, dest);
scratch.storage_dead(bx);
}
OperandValue::Uninit => {}
OperandValue::Ref(val) => {
if val.llextra.is_some() {
bug!("unsized coercion on an unsized rvalue");
Expand All @@ -90,24 +91,13 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
return;
}

// When the element is a const with all bytes uninit, emit a single memset that
// writes undef to the entire destination.
if let mir::Operand::Constant(const_op) = elem {
let val = self.eval_mir_constant(const_op);
if val.all_bytes_uninit(self.cx.tcx()) {
let size = bx.const_usize(dest.layout.size.bytes());
bx.memset(
dest.val.llval,
bx.const_undef(bx.type_i8()),
size,
dest.val.align,
MemFlags::empty(),
);
return;
}
}

let cg_elem = self.codegen_operand(bx, elem);
// Normally the check for uninit is handled inside the operand helpers, but in this
// one case we want to bail early so that we don't generate the loop form with an
// empty body.
if cg_elem.val.is_uninit() {
return;
}

let try_init_all_same = |bx: &mut Bx, v| {
let start = dest.val.llval;
Expand Down Expand Up @@ -206,7 +196,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
}

match src.val {
OperandValue::Ref(..) | OperandValue::ZeroSized => {
OperandValue::Ref(..) | OperandValue::ZeroSized | OperandValue::Uninit => {
span_bug!(
self.mir.span,
"Operand path should have handled transmute \
Expand Down Expand Up @@ -251,6 +241,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
let cast_kind = self.value_kind(cast);

match operand.val {
OperandValue::Uninit => Some(OperandValue::Uninit),
OperandValue::Ref(source_place_val) => {
assert_eq!(source_place_val.llextra, None);
assert_matches!(operand_kind, OperandValueKind::Ref);
Expand Down
5 changes: 4 additions & 1 deletion library/core/src/mem/maybe_uninit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,10 @@ impl<T> MaybeUninit<T> {
#[inline(always)]
#[rustc_diagnostic_item = "maybe_uninit_uninit"]
pub const fn uninit() -> MaybeUninit<T> {
MaybeUninit { uninit: () }
// It is very helpful for codegen to know when are writing uninit bytes. MIR optimizations
// currently do not const-propagate unions, but if we create the const manually that can be
// trivially propagated. See #142837.
Comment on lines +331 to +333
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// It is very helpful for codegen to know when are writing uninit bytes. MIR optimizations
// currently do not const-propagate unions, but if we create the const manually that can be
// trivially propagated. See #142837.
// It is very helpful for codegen to know when we are writing uninit bytes. MIR optimizations
// currently do not const-propagate unions, but if we create the const manually that can be
// trivially propagated. See #142837.
const { MaybeUninit { uninit: () } }
}

/// Creates a new `MaybeUninit<T>` in an uninitialized state, with the memory being
Expand Down
32 changes: 32 additions & 0 deletions tests/codegen/maybeuninit.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
//@ compile-flags: -Copt-level=3 -Cdebuginfo=0

// This is a regression test for https://github.com/rust-lang/rust/issues/139355 as well as
// regressions I introduced while implementing a solution.

#![crate_type = "lib"]

use std::mem::MaybeUninit;

// CHECK-LABEL: @create_small_uninit_array
#[no_mangle]
fn create_small_uninit_array() -> [MaybeUninit<u8>; 4] {
// CHECK-NEXT: start:
// CHECK-NEXT: ret i32 undef
[MaybeUninit::<u8>::uninit(); 4]
}

// CHECK-LABEL: @create_nested_uninit_array
#[no_mangle]
fn create_nested_uninit_array() -> [[MaybeUninit<u8>; 4]; 100] {
// CHECK-NEXT: start:
// CHECK-NEXT: ret void
[[MaybeUninit::<u8>::uninit(); 4]; 100]
}

// CHECK-LABEL: @create_ptr
#[no_mangle]
fn create_ptr() -> MaybeUninit<&'static str> {
// CHECK-NEXT: start:
// CHECK-NEXT: ret { ptr, i64 } undef
MaybeUninit::uninit()
}
Comment on lines +26 to +32
Copy link
Member

@scottmcm scottmcm Jun 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that for scalar pair cases like this I'm already fixing it in https://github.com/rust-lang/rust/pull/138759/files#diff-68480918205d32f0b23d06ba76c5fbd702b7dc842f0f5cf262db6e2e6ae3c630R51-R59 (That also handles partial uninit cases like None::<u32> too.)

I wonder if it's only OperandValue::Ref that needs to be handled specially as a result, rather than a different top-level variant.

Copy link
Member Author

@saethlin saethlin Jun 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This already compiles to the ret undef, I'm adding a codegen test for this because if you pick the wrong backend type for the ret (which I did, repeatedly), everything works fine except that you hit an LLVM assertion when compiling more complicated code.

10 changes: 4 additions & 6 deletions tests/codegen/uninit-consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,18 @@ pub struct PartiallyUninit {
y: MaybeUninit<[u8; 10]>,
}

// CHECK: [[FULLY_UNINIT:@.*]] = private unnamed_addr constant [10 x i8] undef

// CHECK: [[PARTIALLY_UNINIT:@.*]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"{{\\EF\\BE\\AD\\DE|\\DE\\AD\\BE\\EF}}", [12 x i8] undef }>, align 4

// This shouldn't contain undef, since it contains more chunks
// than the default value of uninit_const_chunk_threshold.
// CHECK: [[UNINIT_PADDING_HUGE:@.*]] = private unnamed_addr constant [32768 x i8] c"{{.+}}", align 4

// CHECK: [[FULLY_UNINIT_HUGE:@.*]] = private unnamed_addr constant [16384 x i8] undef

// CHECK-LABEL: @fully_uninit
#[no_mangle]
pub const fn fully_uninit() -> MaybeUninit<[u8; 10]> {
const M: MaybeUninit<[u8; 10]> = MaybeUninit::uninit();
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 1 %_0, ptr align 1 {{.*}}[[FULLY_UNINIT]]{{.*}}, i{{(32|64)}} 10, i1 false)
// CHECK-NEXT: start:
// CHECK-NEXT: ret void
M
}

Expand All @@ -49,6 +46,7 @@ pub const fn uninit_padding_huge() -> [(u32, u8); 4096] {
#[no_mangle]
pub const fn fully_uninit_huge() -> MaybeUninit<[u32; 4096]> {
const F: MaybeUninit<[u32; 4096]> = MaybeUninit::uninit();
// CHECK: call void @llvm.memcpy.{{.+}}(ptr align 4 %_0, ptr align 4 {{.*}}[[FULLY_UNINIT_HUGE]]{{.*}}, i{{(32|64)}} 16384, i1 false)
// CHECK-NEXT: start:
// CHECK-NEXT: ret void
F
}
Loading