diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs index c775a0aed743..43d0af63845d 100644 --- a/cranelift/codegen/src/ir/dfg.rs +++ b/cranelift/codegen/src/ir/dfg.rs @@ -6,6 +6,7 @@ use crate::ir::builder::ReplaceBuilder; use crate::ir::dynamic_type::{DynamicTypeData, DynamicTypes}; use crate::ir::instructions::{CallInfo, InstructionData}; use crate::ir::pcc::Fact; +use crate::ir::user_stack_maps::{UserStackMapEntry, UserStackMapEntryVec}; use crate::ir::{ types, Block, BlockCall, ConstantData, ConstantPool, DynamicType, ExtFuncData, FuncRef, Immediate, Inst, JumpTables, RelSourceLoc, SigRef, Signature, Type, Value, @@ -105,6 +106,16 @@ pub struct DataFlowGraph { /// primary `insts` map. results: SecondaryMap, + /// User-defined stack maps. + /// + /// Not to be confused with the stack maps that `regalloc2` produces. These + /// are defined by the user in `cranelift-frontend`. These will eventually + /// replace the stack maps support in `regalloc2`, but in the name of + /// incrementalism and avoiding gigantic PRs that completely overhaul + /// Cranelift and Wasmtime at the same time, we are allowing them to live in + /// parallel for the time being. + user_stack_maps: alloc::collections::BTreeMap, + /// basic blocks in the function and their parameters. /// /// This map is not in program order. That is handled by `Layout`, and so is the sequence of @@ -155,6 +166,7 @@ impl DataFlowGraph { Self { insts: Insts(PrimaryMap::new()), results: SecondaryMap::new(), + user_stack_maps: alloc::collections::BTreeMap::new(), blocks: Blocks(PrimaryMap::new()), dynamic_types: DynamicTypes::new(), value_lists: ValueListPool::new(), @@ -173,6 +185,7 @@ impl DataFlowGraph { pub fn clear(&mut self) { self.insts.0.clear(); self.results.clear(); + self.user_stack_maps.clear(); self.blocks.0.clear(); self.dynamic_types.clear(); self.value_lists.clear(); @@ -561,6 +574,22 @@ impl DataFlowGraph { self.clear_results(dest_inst); } + + /// Get the stack map entries associated with the given instruction. + pub fn user_stack_map_entries(&self, inst: Inst) -> Option<&[UserStackMapEntry]> { + self.user_stack_maps.get(&inst).map(|es| &**es) + } + + /// Append a new stack map entry for the given call instruction. + /// + /// # Panics + /// + /// Panics if the given instruction is not a (non-tail) call instruction. + pub fn append_user_stack_map_entry(&mut self, inst: Inst, entry: UserStackMapEntry) { + let opcode = self.insts[inst].opcode(); + assert!(opcode.is_call() && !opcode.is_return()); + self.user_stack_maps.entry(inst).or_default().push(entry); + } } /// Where did a value come from? diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs index 445496e5e649..b17e2b111b65 100644 --- a/cranelift/codegen/src/ir/mod.rs +++ b/cranelift/codegen/src/ir/mod.rs @@ -25,6 +25,7 @@ mod sourceloc; pub mod stackslot; mod trapcode; pub mod types; +mod user_stack_maps; #[cfg(feature = "enable-serde")] use serde_derive::{Deserialize, Serialize}; @@ -64,6 +65,7 @@ pub use crate::ir::stackslot::{ }; pub use crate::ir::trapcode::TrapCode; pub use crate::ir::types::Type; +pub use crate::ir::user_stack_maps::UserStackMapEntry; use crate::entity::{entity_impl, PrimaryMap, SecondaryMap}; diff --git a/cranelift/codegen/src/ir/user_stack_maps.rs b/cranelift/codegen/src/ir/user_stack_maps.rs new file mode 100644 index 000000000000..8996404a4dd0 --- /dev/null +++ b/cranelift/codegen/src/ir/user_stack_maps.rs @@ -0,0 +1,19 @@ +use crate::ir; +use smallvec::SmallVec; + +pub(crate) type UserStackMapEntryVec = SmallVec<[UserStackMapEntry; 4]>; + +/// A stack map entry describes a GC-managed value and its location at a +/// particular instruction. +#[derive(Clone, PartialEq, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct UserStackMapEntry { + /// The type of the value stored in this stack map entry. + pub ty: ir::Type, + + /// The stack slot that this stack map entry is within. + pub slot: ir::StackSlot, + + /// The offset within the stack slot where this entry's value can be found. + pub offset: u32, +} diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs index 68864a9d405e..7ca06628eedd 100644 --- a/cranelift/codegen/src/write.rs +++ b/cranelift/codegen/src/write.rs @@ -441,7 +441,10 @@ pub fn write_operands(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt } Call { func_ref, ref args, .. - } => write!(w, " {}({})", func_ref, DisplayValues(args.as_slice(pool))), + } => { + write!(w, " {}({})", func_ref, DisplayValues(args.as_slice(pool)))?; + write_user_stack_map_entries(w, dfg, inst) + } CallIndirect { sig_ref, ref args, .. } => { @@ -452,7 +455,8 @@ pub fn write_operands(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt sig_ref, args[0], DisplayValues(&args[1..]) - ) + )?; + write_user_stack_map_entries(w, dfg, inst) } FuncAddr { func_ref, .. } => write!(w, " {}", func_ref), StackLoad { @@ -504,6 +508,24 @@ pub fn write_operands(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt Ok(()) } +fn write_user_stack_map_entries(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt::Result { + let entries = match dfg.user_stack_map_entries(inst) { + None => return Ok(()), + Some(es) => es, + }; + write!(w, ", stack_map=[")?; + let mut need_comma = false; + for entry in entries { + if need_comma { + write!(w, ", ")?; + } + write!(w, "{} @ {}+{}", entry.ty, entry.slot, entry.offset)?; + need_comma = true; + } + write!(w, "]")?; + Ok(()) +} + /// Displayable slice of values. struct DisplayValues<'a>(&'a [Value]); diff --git a/cranelift/filetests/filetests/parser/user_stack_maps.clif b/cranelift/filetests/filetests/parser/user_stack_maps.clif new file mode 100644 index 000000000000..8596849dbe93 --- /dev/null +++ b/cranelift/filetests/filetests/parser/user_stack_maps.clif @@ -0,0 +1,35 @@ +; Parser tests for user stack maps on call instructions. + +test cat + +function %foo() system_v { + ss0 = explicit_slot 12, align = 4 + sig0 = (i32) system_v + fn0 = colocated u0:0 sig0 + +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iconst.i32 3 + + stack_store v0, ss0 + stack_store v1, ss0+4 + stack_store v2, ss0+8 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8] +; check: call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8] + + stack_store v1, ss0 + stack_store v2, ss0+4 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4] +; check: call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4] + + stack_store v2, ss0 + call fn0(v1), stack_map=[i32 @ ss0+0] +; check: call fn0(v1), stack_map=[i32 @ ss0+0] + + call fn0(v2) +; check: call fn0(v2) + + return +} diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs index 5abaf3e9c8de..650e65ef458c 100644 --- a/cranelift/frontend/src/frontend.rs +++ b/cranelift/frontend/src/frontend.rs @@ -1,6 +1,7 @@ //! A frontend for building Cranelift IR from other languages. use crate::ssa::{SSABuilder, SideEffects}; use crate::variable::Variable; +use alloc::collections::BTreeSet; use core::fmt::{self, Debug}; use cranelift_codegen::cursor::{Cursor, FuncCursor}; use cranelift_codegen::entity::{EntityRef, EntitySet, SecondaryMap}; @@ -15,6 +16,8 @@ use cranelift_codegen::ir::{ }; use cranelift_codegen::isa::TargetFrontendConfig; use cranelift_codegen::packed_option::PackedOption; +use cranelift_codegen::traversals::Dfs; +use smallvec::SmallVec; /// Structure used for translating a series of functions into Cranelift IR. /// @@ -26,6 +29,8 @@ pub struct FunctionBuilderContext { ssa: SSABuilder, status: SecondaryMap, types: SecondaryMap, + needs_stack_map: EntitySet, + dfs: Dfs, } /// Temporary object used to build a single Cranelift IR [`Function`]. @@ -497,6 +502,21 @@ impl<'a> FunctionBuilder<'a> { } } + /// Declare that the given value is a GC reference that requires inclusion + /// in a stack map when it is live across GC safepoints. + /// + /// # Panics + /// + /// Panics if `val` is larger than 16 bytes. + pub fn declare_needs_stack_map(&mut self, val: Value) { + // We rely on these properties in `insert_safepoint_spills`. + let size = self.func.dfg.value_type(val).bytes(); + assert!(size <= 16); + assert!(size.is_power_of_two()); + + self.func_ctx.needs_stack_map.insert(val); + } + /// Creates a jump table in the function, to be used by [`br_table`](InstBuilder::br_table) instructions. pub fn create_jump_table(&mut self, data: JumpTableData) -> JumpTable { self.func.create_jump_table(data) @@ -610,11 +630,186 @@ impl<'a> FunctionBuilder<'a> { } } + /// Insert spills for every value that needs to be in a stack map at every + /// safepoint. + /// + /// First, we do a very simple, imprecise, and overapproximating liveness + /// analysis. This considers any use (regardless if that use produces side + /// effects or flows into another instruction that produces side effects!) + /// of a needs-stack-map value to keep the value live. This allows us to do + /// this liveness analysis in a single post-order traversal of the IR, + /// without any fixed-point loop. The result of this analysis is the set of + /// live needs-stack-map values at each instruction that must be a safepoint + /// (currently this is just non-tail calls). + /// + /// Second, take those results, add stack slots so we have a place to spill + /// to, and then finally spill the live needs-stack-map values at each + /// safepoint. + fn insert_safepoint_spills(&mut self) { + // A map from each safepoint to the set of GC references that are live + // across it. + let mut safepoints: crate::HashMap> = + crate::HashMap::new(); + + // The maximum number of values we need to store in a stack map at the + // same time, bucketed by their type's size. This array is indexed by + // the log2 of the type's size. We do not support recording values whose + // size is greater than 16 in stack maps. + let mut max_vals_in_stack_map_by_size = [0, 0, 0, 0, 0]; + + // The set of needs-stack-maps values that are currently live in our + // traversal. + // + // NB: use a `BTreeSet` so that iteration is deterministic, as we will + // insert spills an order derived from this collection's iteration + // order. + let mut live = BTreeSet::new(); + + // Do our single-pass liveness analysis. + // + // Use a post-order traversal, traversing the IR backwards from uses to + // defs, because liveness is a backwards analysis. + // + // 1. The definition of a value removes it from our `live` set. Values + // are not live before they are defined. + // + // 2. When we see any instruction that requires a safepoint (aka + // non-tail calls) we record the current live set of needs-stack-map + // values. + // + // We ignore tail calls because this caller and its frame won't exist + // by the time the callee is executing and potentially triggers a GC; + // nothing is live in the function after it exits! + // + // Note that this step should actually happen *before* adding uses to + // the `live` set below, in order to avoid holding GC objects alive + // longer than necessary, because arguments to the call that are not + // live afterwards should need not be prevented from reclamation by + // the GC for us, and therefore need not appear in this stack map. It + // is the callee's responsibility to record such arguments in its + // stack maps if it keeps them alive across some call that might + // trigger GC. + // + // 3. Any use of a needs-stack-map value adds it to our `live` set. + // + // Note: we do not flow liveness from block parameters back to branch + // arguments, and instead always consider branch arguments live. That + // additional precision would require a fixed-point loop. + // + // Furthermore, we do not differentiate between uses of a + // needs-stack-map value that ultimately flow into a side-effecting + // operation versus uses that themselves are not live. This could be + // tightened up in the future, but we're starting with the easiest, + // simplest thing. Besides, none of our mid-end optimization passes + // have run at this point in time yet, so there probably isn't much, + // if any, dead code. + for block in self.func_ctx.dfs.post_order_iter(&self.func) { + let mut option_inst = self.func.layout.last_inst(block); + while let Some(inst) = option_inst { + // (1) Remove values defined by this instruction from the `live` + // set. + for val in self.func.dfg.inst_results(inst) { + live.remove(val); + } + + // (2) If this instruction is a call, then we need to add a + // safepoint to record any values in `live`. + let opcode = self.func.dfg.insts[inst].opcode(); + if opcode.is_call() && !opcode.is_return() { + let mut live: SmallVec<[_; 4]> = live.iter().copied().collect(); + live.sort_by_key(|val| self.func.dfg.value_type(*val).bytes()); + for chunk in live.chunk_by(|a, b| { + self.func.dfg.value_type(*a).bytes() == self.func.dfg.value_type(*b).bytes() + }) { + let byte_size = self.func.dfg.value_type(chunk[0]).bytes(); + debug_assert!(byte_size.is_power_of_two()); + let index = usize::try_from(byte_size.ilog2()).unwrap(); + max_vals_in_stack_map_by_size[index] = + core::cmp::max(max_vals_in_stack_map_by_size[index], chunk.len()); + } + + let old_val = safepoints.insert(inst, live); + debug_assert!(old_val.is_none()); + } + + // (3) Add all needs-stack-map values that are operands to this + // instruction to the live set. + for val in self.func.dfg.inst_values(inst) { + if self.func_ctx.needs_stack_map.contains(val) { + live.insert(val); + } + } + + option_inst = self.func.layout.prev_inst(inst); + } + + // After we've processed this block's instructions, remove its + // parameters from the live set. This is part of step (1). + for val in self.func.dfg.block_params(block) { + live.remove(val); + } + } + + // Create a stack slot for each size of needs-stack-map value. These + // slots are arrays capable of holding the maximum number of same-sized + // values that must appear in the same stack map at the same time. + // + // This is indexed by the log2 of the type size. + let mut stack_slots = [PackedOption::::default(); 5]; + for (log2_size, capacity) in max_vals_in_stack_map_by_size.into_iter().enumerate() { + if capacity == 0 { + continue; + } + + let size = 1usize << log2_size; + let slot = self.func.create_sized_stack_slot(ir::StackSlotData::new( + ir::StackSlotKind::ExplicitSlot, + u32::try_from(size * capacity).unwrap(), + u8::try_from(log2_size).unwrap(), + )); + stack_slots[log2_size] = Some(slot).into(); + } + + // Insert spills to our new stack slots before each safepoint + // instruction. + let mut cursor = FuncCursor::new(self.func); + for (inst, live_vals) in safepoints { + cursor = cursor.at_inst(inst); + + // The offset within each stack slot for the next spill to that + // associated stack slot. + let mut stack_slot_offsets = [0; 5]; + + for val in live_vals { + let ty = cursor.func.dfg.value_type(val); + let size_of_val = ty.bytes(); + + debug_assert!(size_of_val.is_power_of_two()); + let index = size_of_val.ilog2(); + let index = usize::try_from(index).unwrap(); + + let slot = stack_slots[index].unwrap(); + + let offset = stack_slot_offsets[index]; + stack_slot_offsets[index] += size_of_val; + + cursor + .ins() + .stack_store(val, slot, i32::try_from(offset).unwrap()); + + cursor + .func + .dfg + .append_user_stack_map_entry(inst, ir::UserStackMapEntry { ty, slot, offset }); + } + } + } + /// Declare that translation of the current function is complete. /// /// This resets the state of the [`FunctionBuilderContext`] in preparation to /// be used for another function. - pub fn finalize(self) { + pub fn finalize(mut self) { // Check that all the `Block`s are filled and sealed. #[cfg(debug_assertions)] { @@ -649,6 +844,10 @@ impl<'a> FunctionBuilder<'a> { } } + if !self.func_ctx.needs_stack_map.is_empty() { + self.insert_safepoint_spills(); + } + // Clear the state (but preserve the allocated buffers) in preparation // for translation another function. self.func_ctx.clear(); @@ -1120,7 +1319,7 @@ mod tests { use alloc::string::ToString; use cranelift_codegen::entity::EntityRef; use cranelift_codegen::ir::condcodes::IntCC; - use cranelift_codegen::ir::{types::*, UserFuncName}; + use cranelift_codegen::ir::{self, types::*, UserFuncName}; use cranelift_codegen::ir::{AbiParam, Function, InstBuilder, MemFlags, Signature, Value}; use cranelift_codegen::isa::{CallConv, TargetFrontendConfig, TargetIsa}; use cranelift_codegen::settings; @@ -1854,4 +2053,795 @@ block0: ); } } + + #[test] + fn needs_stack_map_and_loop() { + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(ir::types::I32)); + sig.params.push(AbiParam::new(ir::types::I32)); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(UserFuncName::testcase("sample"), sig); + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let name = builder + .func + .declare_imported_user_function(ir::UserExternalName { + namespace: 0, + index: 0, + }); + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(ir::types::I32)); + let signature = builder.func.import_signature(sig); + let func_ref = builder.import_function(ir::ExtFuncData { + name: ir::ExternalName::user(name), + signature, + colocated: true, + }); + + // Here the value `v1` is technically not live but our single-pass liveness + // analysis treats every branch argument to a block as live to avoid + // needing to do a fixed-point loop. + // + // block0(v0, v1): + // call $foo(v0) + // jump block0(v0, v1) + let block0 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + let a = builder.func.dfg.block_params(block0)[0]; + let b = builder.func.dfg.block_params(block0)[1]; + builder.declare_needs_stack_map(a); + builder.declare_needs_stack_map(b); + builder.switch_to_block(block0); + builder.ins().call(func_ref, &[a]); + builder.ins().jump(block0, &[a, b]); + builder.seal_all_blocks(); + builder.finalize(); + + eprintln!("Actual = {}", func.display()); + assert_eq!( + func.display().to_string().trim(), + r#" +function %sample(i32, i32) system_v { + ss0 = explicit_slot 8, align = 4 + sig0 = (i32) system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i32, v1: i32): + stack_store v0, ss0 + stack_store v1, ss0+4 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4] + jump block0(v0, v1) +} + "# + .trim() + ); + } + + #[test] + fn needs_stack_map_simple() { + let sig = Signature::new(CallConv::SystemV); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(UserFuncName::testcase("sample"), sig); + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let name = builder + .func + .declare_imported_user_function(ir::UserExternalName { + namespace: 0, + index: 0, + }); + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(ir::types::I32)); + let signature = builder.func.import_signature(sig); + let func_ref = builder.import_function(ir::ExtFuncData { + name: ir::ExternalName::user(name), + signature, + colocated: true, + }); + + // At each `call` we are losing one more value as no longer live, so + // each stack map should be one smaller than the last. `v3` is never + // live, so should never appear in a stack map. Note that a value that + // is an argument to the call, but is not live after the call, should + // not appear in the stack map. This is why `v0` appears in the first + // call's stack map, but not the second call's stack map. + // + // block0: + // v0 = needs stack map + // v1 = needs stack map + // v2 = needs stack map + // v3 = needs stack map + // call $foo(v0) + // call $foo(v0) + // call $foo(v1) + // call $foo(v2) + // return + let block0 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + builder.switch_to_block(block0); + let v0 = builder.ins().iconst(ir::types::I32, 0); + builder.declare_needs_stack_map(v0); + let v1 = builder.ins().iconst(ir::types::I32, 1); + builder.declare_needs_stack_map(v1); + let v2 = builder.ins().iconst(ir::types::I32, 2); + builder.declare_needs_stack_map(v2); + let v3 = builder.ins().iconst(ir::types::I32, 3); + builder.declare_needs_stack_map(v3); + builder.ins().call(func_ref, &[v0]); + builder.ins().call(func_ref, &[v0]); + builder.ins().call(func_ref, &[v1]); + builder.ins().call(func_ref, &[v2]); + builder.ins().return_(&[]); + builder.seal_all_blocks(); + builder.finalize(); + + eprintln!("Actual = {}", func.display()); + assert_eq!( + func.display().to_string().trim(), + r#" +function %sample() system_v { + ss0 = explicit_slot 12, align = 4 + sig0 = (i32) system_v + fn0 = colocated u0:0 sig0 + +block0: + v0 = iconst.i32 0 + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iconst.i32 3 + stack_store v0, ss0 ; v0 = 0 + stack_store v1, ss0+4 ; v1 = 1 + stack_store v2, ss0+8 ; v2 = 2 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4, i32 @ ss0+8] ; v0 = 0 + stack_store v1, ss0 ; v1 = 1 + stack_store v2, ss0+4 ; v2 = 2 + call fn0(v0), stack_map=[i32 @ ss0+0, i32 @ ss0+4] ; v0 = 0 + stack_store v2, ss0 ; v2 = 2 + call fn0(v1), stack_map=[i32 @ ss0+0] ; v1 = 1 + call fn0(v2) ; v2 = 2 + return +} + "# + .trim() + ); + } + + #[test] + fn needs_stack_map_and_post_order_early_return() { + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(ir::types::I32)); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(UserFuncName::testcase("sample"), sig); + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let name = builder + .func + .declare_imported_user_function(ir::UserExternalName { + namespace: 0, + index: 0, + }); + let signature = builder + .func + .import_signature(Signature::new(CallConv::SystemV)); + let func_ref = builder.import_function(ir::ExtFuncData { + name: ir::ExternalName::user(name), + signature, + colocated: true, + }); + + // Here we rely on the post-order to make sure that we never visit block + // 4 and add `v1` to our live set, then visit block 2 and add `v1` to + // its stack map even though `v1` is not in scope. Thanksfully, that + // sequence is impossible because it would be an invalid post-order + // traversal. The only valid post-order traversals are [3, 1, 2, 0] and + // [2, 3, 1, 0]. + // + // block0(v0): + // brif v0, block1, block2 + // + // block1: + // + // v1 = get some gc ref + // jump block3 + // + // block2: + // call $needs_safepoint_accidentally + // return + // + // block3: + // stuff keeping v1 live + // return + let block0 = builder.create_block(); + let block1 = builder.create_block(); + let block2 = builder.create_block(); + let block3 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + + builder.switch_to_block(block0); + let v0 = builder.func.dfg.block_params(block0)[0]; + builder.ins().brif(v0, block1, &[], block2, &[]); + + builder.switch_to_block(block1); + let v1 = builder.ins().iconst(ir::types::I64, 0x12345678); + builder.declare_needs_stack_map(v1); + builder.ins().jump(block3, &[]); + + builder.switch_to_block(block2); + builder.ins().call(func_ref, &[]); + builder.ins().return_(&[]); + + builder.switch_to_block(block3); + // NB: Our simplistic liveness analysis conservatively treats any use of + // a value as keeping it live, regardless if the use has side effects or + // is otherwise itself live, so an `iadd_imm` suffices to keep `v1` live + // here. + builder.ins().iadd_imm(v1, 0); + builder.ins().return_(&[]); + + builder.seal_all_blocks(); + builder.finalize(); + + eprintln!("Actual = {}", func.display()); + assert_eq!( + func.display().to_string().trim(), + r#" +function %sample(i32) system_v { + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i32): + brif v0, block1, block2 + +block1: + v1 = iconst.i64 0x1234_5678 + jump block3 + +block2: + call fn0() + return + +block3: + v2 = iadd_imm.i64 v1, 0 ; v1 = 0x1234_5678 + return +} + "# + .trim() + ); + } + + #[test] + fn needs_stack_map_conditional_branches_and_liveness() { + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(ir::types::I32)); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(UserFuncName::testcase("sample"), sig); + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let name = builder + .func + .declare_imported_user_function(ir::UserExternalName { + namespace: 0, + index: 0, + }); + let signature = builder + .func + .import_signature(Signature::new(CallConv::SystemV)); + let func_ref = builder.import_function(ir::ExtFuncData { + name: ir::ExternalName::user(name), + signature, + colocated: true, + }); + + // Depending on which post-order traversal we take, we might consider + // `v1` live inside `block1` and emit unnecessary safepoint + // spills. That's not great, but ultimately fine, we are trading away + // precision for a single-pass analysis. + // + // block0(v0): + // v1 = needs stack map + // brif v0, block1, block2 + // + // block1: + // call $foo() + // return + // + // block2: + // keep v1 alive + // return + let block0 = builder.create_block(); + let block1 = builder.create_block(); + let block2 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + + builder.switch_to_block(block0); + let v0 = builder.func.dfg.block_params(block0)[0]; + let v1 = builder.ins().iconst(ir::types::I64, 0x12345678); + builder.declare_needs_stack_map(v1); + builder.ins().brif(v0, block1, &[], block2, &[]); + + builder.switch_to_block(block1); + builder.ins().call(func_ref, &[]); + builder.ins().return_(&[]); + + builder.switch_to_block(block2); + // NB: Our simplistic liveness analysis conservatively treats any use of + // a value as keeping it live, regardless if the use has side effects or + // is otherwise itself live, so an `iadd_imm` suffices to keep `v1` live + // here. + builder.ins().iadd_imm(v1, 0); + builder.ins().return_(&[]); + + builder.seal_all_blocks(); + builder.finalize(); + + eprintln!("Actual = {}", func.display()); + assert_eq!( + func.display().to_string().trim(), + r#" +function %sample(i32) system_v { + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i32): + v1 = iconst.i64 0x1234_5678 + brif v0, block1, block2 + +block1: + call fn0() + return + +block2: + v2 = iadd_imm.i64 v1, 0 ; v1 = 0x1234_5678 + return +} + "# + .trim() + ); + + // Now Do the same test but with block 1 and 2 swapped so that we + // exercise what we are trying to exercise, regardless of which + // post-order traversal we happen to take. + func.clear(); + fn_ctx.clear(); + + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(ir::types::I32)); + + func.signature = sig; + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let name = builder + .func + .declare_imported_user_function(ir::UserExternalName { + namespace: 0, + index: 0, + }); + let signature = builder + .func + .import_signature(Signature::new(CallConv::SystemV)); + let func_ref = builder.import_function(ir::ExtFuncData { + name: ir::ExternalName::user(name), + signature, + colocated: true, + }); + + let block0 = builder.create_block(); + let block1 = builder.create_block(); + let block2 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + + builder.switch_to_block(block0); + let v0 = builder.func.dfg.block_params(block0)[0]; + let v1 = builder.ins().iconst(ir::types::I64, 0x12345678); + builder.declare_needs_stack_map(v1); + builder.ins().brif(v0, block1, &[], block2, &[]); + + builder.switch_to_block(block1); + builder.ins().iadd_imm(v1, 0); + builder.ins().return_(&[]); + + builder.switch_to_block(block2); + builder.ins().call(func_ref, &[]); + builder.ins().return_(&[]); + + builder.seal_all_blocks(); + builder.finalize(); + + eprintln!("Actual = {}", func.display()); + assert_eq!( + func.display().to_string().trim(), + r#" +function u0:0(i32) system_v { + ss0 = explicit_slot 8, align = 8 + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i32): + v1 = iconst.i64 0x1234_5678 + brif v0, block1, block2 + +block1: + v2 = iadd_imm.i64 v1, 0 ; v1 = 0x1234_5678 + return + +block2: + stack_store.i64 v1, ss0 ; v1 = 0x1234_5678 + call fn0(), stack_map=[i64 @ ss0+0] + return +} + "# + .trim() + ); + } + + #[test] + fn needs_stack_map_and_tail_calls() { + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(ir::types::I32)); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(UserFuncName::testcase("sample"), sig); + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let name = builder + .func + .declare_imported_user_function(ir::UserExternalName { + namespace: 0, + index: 0, + }); + let signature = builder + .func + .import_signature(Signature::new(CallConv::SystemV)); + let func_ref = builder.import_function(ir::ExtFuncData { + name: ir::ExternalName::user(name), + signature, + colocated: true, + }); + + // Depending on which post-order traversal we take, we might consider + // `v1` live inside `block1`. But nothing is live after a tail call so + // we shouldn't spill `v1` either way here. + // + // block0(v0): + // v1 = needs stack map + // brif v0, block1, block2 + // + // block1: + // return_call $foo() + // + // block2: + // keep v1 alive + // return + let block0 = builder.create_block(); + let block1 = builder.create_block(); + let block2 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + + builder.switch_to_block(block0); + let v0 = builder.func.dfg.block_params(block0)[0]; + let v1 = builder.ins().iconst(ir::types::I64, 0x12345678); + builder.declare_needs_stack_map(v1); + builder.ins().brif(v0, block1, &[], block2, &[]); + + builder.switch_to_block(block1); + builder.ins().return_call(func_ref, &[]); + + builder.switch_to_block(block2); + // NB: Our simplistic liveness analysis conservatively treats any use of + // a value as keeping it live, regardless if the use has side effects or + // is otherwise itself live, so an `iadd_imm` suffices to keep `v1` live + // here. + builder.ins().iadd_imm(v1, 0); + builder.ins().return_(&[]); + + builder.seal_all_blocks(); + builder.finalize(); + + eprintln!("Actual = {}", func.display()); + assert_eq!( + func.display().to_string().trim(), + r#" +function %sample(i32) system_v { + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i32): + v1 = iconst.i64 0x1234_5678 + brif v0, block1, block2 + +block1: + return_call fn0() + +block2: + v2 = iadd_imm.i64 v1, 0 ; v1 = 0x1234_5678 + return +} + "# + .trim() + ); + + // Do the same test but with block 1 and 2 swapped so that we exercise + // what we are trying to exercise, regardless of which post-order + // traversal we happen to take. + func.clear(); + fn_ctx.clear(); + + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(ir::types::I32)); + func.signature = sig; + + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let name = builder + .func + .declare_imported_user_function(ir::UserExternalName { + namespace: 0, + index: 0, + }); + let signature = builder + .func + .import_signature(Signature::new(CallConv::SystemV)); + let func_ref = builder.import_function(ir::ExtFuncData { + name: ir::ExternalName::user(name), + signature, + colocated: true, + }); + + let block0 = builder.create_block(); + let block1 = builder.create_block(); + let block2 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + + builder.switch_to_block(block0); + let v0 = builder.func.dfg.block_params(block0)[0]; + let v1 = builder.ins().iconst(ir::types::I64, 0x12345678); + builder.declare_needs_stack_map(v1); + builder.ins().brif(v0, block1, &[], block2, &[]); + + builder.switch_to_block(block1); + builder.ins().iadd_imm(v1, 0); + builder.ins().return_(&[]); + + builder.switch_to_block(block2); + builder.ins().return_call(func_ref, &[]); + + builder.seal_all_blocks(); + builder.finalize(); + + eprintln!("Actual = {}", func.display()); + assert_eq!( + func.display().to_string().trim(), + r#" +function u0:0(i32) system_v { + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i32): + v1 = iconst.i64 0x1234_5678 + brif v0, block1, block2 + +block1: + v2 = iadd_imm.i64 v1, 0 ; v1 = 0x1234_5678 + return + +block2: + return_call fn0() +} + "# + .trim() + ); + } + + #[test] + fn needs_stack_map_and_cfg_diamond() { + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(ir::types::I32)); + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(UserFuncName::testcase("sample"), sig); + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let name = builder + .func + .declare_imported_user_function(ir::UserExternalName { + namespace: 0, + index: 0, + }); + let signature = builder + .func + .import_signature(Signature::new(CallConv::SystemV)); + let func_ref = builder.import_function(ir::ExtFuncData { + name: ir::ExternalName::user(name), + signature, + colocated: true, + }); + + // Create an if/else CFG diamond that and check that various things get + // spilled as needed. + // + // block0(v0): + // brif v0, block1, block2 + // + // block1: + // v1 = needs stack map + // v2 = needs stack map + // call $foo() + // jump block3(v1, v2) + // + // block2: + // v3 = needs stack map + // v4 = needs stack map + // call $foo() + // jump block3(v3, v3) ;; Note: v4 is not live + // + // block3(v5, v6): + // call $foo() + // keep v5 alive, but not v6 + let block0 = builder.create_block(); + let block1 = builder.create_block(); + let block2 = builder.create_block(); + let block3 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + + builder.switch_to_block(block0); + let v0 = builder.func.dfg.block_params(block0)[0]; + builder.ins().brif(v0, block1, &[], block2, &[]); + + builder.switch_to_block(block1); + let v1 = builder.ins().iconst(ir::types::I64, 1); + builder.declare_needs_stack_map(v1); + let v2 = builder.ins().iconst(ir::types::I64, 2); + builder.declare_needs_stack_map(v2); + builder.ins().call(func_ref, &[]); + builder.ins().jump(block3, &[v1, v2]); + + builder.switch_to_block(block2); + let v3 = builder.ins().iconst(ir::types::I64, 3); + builder.declare_needs_stack_map(v3); + let v4 = builder.ins().iconst(ir::types::I64, 4); + builder.declare_needs_stack_map(v4); + builder.ins().call(func_ref, &[]); + builder.ins().jump(block3, &[v3, v3]); + + builder.switch_to_block(block3); + builder.ins().call(func_ref, &[]); + // NB: Our simplistic liveness analysis conservatively treats any use of + // a value as keeping it live, regardless if the use has side effects or + // is otherwise itself live, so an `iadd_imm` suffices to keep `v1` live + // here. + builder.ins().iadd_imm(v1, 0); + builder.ins().return_(&[]); + + builder.seal_all_blocks(); + builder.finalize(); + + eprintln!("Actual = {}", func.display()); + assert_eq!( + func.display().to_string().trim(), + r#" +function %sample(i32) system_v { + ss0 = explicit_slot 16, align = 8 + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i32): + brif v0, block1, block2 + +block1: + v1 = iconst.i64 1 + v2 = iconst.i64 2 + stack_store v1, ss0 ; v1 = 1 + stack_store v2, ss0+8 ; v2 = 2 + call fn0(), stack_map=[i64 @ ss0+0, i64 @ ss0+8] + jump block3(v1, v2) ; v1 = 1, v2 = 2 + +block2: + v3 = iconst.i64 3 + v4 = iconst.i64 4 + stack_store v3, ss0 ; v3 = 3 + call fn0(), stack_map=[i64 @ ss0+0] + jump block3(v3, v3) ; v3 = 3, v3 = 3 + +block3: + stack_store.i64 v1, ss0 ; v1 = 1 + call fn0(), stack_map=[i64 @ ss0+0] + v5 = iadd_imm.i64 v1, 0 ; v1 = 1 + return +} + "# + .trim() + ); + } + + #[test] + fn needs_stack_map_and_heterogeneous_types() { + let mut sig = Signature::new(CallConv::SystemV); + for ty in [ + ir::types::I8, + ir::types::I16, + ir::types::I32, + ir::types::I64, + ir::types::I128, + ir::types::F32, + ir::types::F64, + ir::types::I8X16, + ir::types::I16X8, + ] { + sig.params.push(AbiParam::new(ty)); + sig.returns.push(AbiParam::new(ty)); + } + + let mut fn_ctx = FunctionBuilderContext::new(); + let mut func = Function::with_name_signature(UserFuncName::testcase("sample"), sig); + let mut builder = FunctionBuilder::new(&mut func, &mut fn_ctx); + + let name = builder + .func + .declare_imported_user_function(ir::UserExternalName { + namespace: 0, + index: 0, + }); + let signature = builder + .func + .import_signature(Signature::new(CallConv::SystemV)); + let func_ref = builder.import_function(ir::ExtFuncData { + name: ir::ExternalName::user(name), + signature, + colocated: true, + }); + + // Test that we support stack maps of heterogeneous types and properly + // coalesce types into stack slots based on their size. + // + // block0(v0, v1, v2, v3, v4, v5, v6, v7, v8): + // call $foo() + // return v0, v1, v2, v3, v4, v5, v6, v7, v8 + let block0 = builder.create_block(); + builder.append_block_params_for_function_params(block0); + + builder.switch_to_block(block0); + let params = builder.func.dfg.block_params(block0).to_vec(); + for val in ¶ms { + builder.declare_needs_stack_map(*val); + } + builder.ins().call(func_ref, &[]); + builder.ins().return_(¶ms); + + builder.seal_all_blocks(); + builder.finalize(); + + eprintln!("Actual = {}", func.display()); + assert_eq!( + func.display().to_string().trim(), + r#" +function %sample(i8, i16, i32, i64, i128, f32, f64, i8x16, i16x8) -> i8, i16, i32, i64, i128, f32, f64, i8x16, i16x8 system_v { + ss0 = explicit_slot 1 + ss1 = explicit_slot 2, align = 2 + ss2 = explicit_slot 8, align = 4 + ss3 = explicit_slot 16, align = 8 + ss4 = explicit_slot 48, align = 16 + sig0 = () system_v + fn0 = colocated u0:0 sig0 + +block0(v0: i8, v1: i16, v2: i32, v3: i64, v4: i128, v5: f32, v6: f64, v7: i8x16, v8: i16x8): + stack_store v0, ss0 + stack_store v1, ss1 + stack_store v2, ss2 + stack_store v5, ss2+4 + stack_store v3, ss3 + stack_store v6, ss3+8 + stack_store v4, ss4 + stack_store v7, ss4+16 + stack_store v8, ss4+32 + call fn0(), stack_map=[i8 @ ss0+0, i16 @ ss1+0, i32 @ ss2+0, f32 @ ss2+4, i64 @ ss3+0, f64 @ ss3+8, i128 @ ss4+0, i8x16 @ ss4+16, i16x8 @ ss4+32] + return v0, v1, v2, v3, v4, v5, v6, v7, v8 +} + "# + .trim() + ); + } } diff --git a/cranelift/reader/src/lexer.rs b/cranelift/reader/src/lexer.rs index cd6d2ed23f66..8eee0d7fc9ad 100644 --- a/cranelift/reader/src/lexer.rs +++ b/cranelift/reader/src/lexer.rs @@ -29,6 +29,7 @@ pub enum Token<'a> { Colon, // ':' Equal, // '=' Bang, // '!' + At, // '@' Arrow, // '->' Float(&'a str), // Floating point immediate Integer(&'a str), // Integer immediate @@ -495,7 +496,13 @@ impl<'a> Lexer<'a> { Some('%') => Some(self.scan_name()), Some('"') => Some(self.scan_string()), Some('#') => Some(self.scan_hex_sequence()), - Some('@') => Some(self.scan_srcloc()), + Some('@') => { + if self.looking_at_numeric() { + Some(self.scan_srcloc()) + } else { + Some(self.scan_char(Token::At)) + } + } // all ascii whitespace Some(' ') | Some('\x09'..='\x0d') => { self.next_ch(); diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 23ce76526383..c03f8be5596a 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -2466,13 +2466,44 @@ impl<'a> Parser<'a> { // instruction ::= [inst-results "="] Opcode(opc) ["." Type] * ... let inst_data = self.parse_inst_operands(ctx, opcode, explicit_ctrl_type)?; - // We're done parsing the instruction now. + // We're done parsing the instruction data itself. // - // We still need to check that the number of result values in the source matches the opcode - // or function call signature. We also need to create values with the right type for all - // the instruction results. + // We still need to check that the number of result values in the source + // matches the opcode or function call signature. We also need to create + // values with the right type for all the instruction results and parse + // and attach stack map entries, if present. let ctrl_typevar = self.infer_typevar(ctx, opcode, explicit_ctrl_type, &inst_data)?; let inst = ctx.function.dfg.make_inst(inst_data); + if opcode.is_call() && !opcode.is_return() && self.optional(Token::Comma) { + self.match_identifier("stack_map", "expected `stack_map = [...]`")?; + self.match_token(Token::Equal, "expected `= [...]`")?; + self.match_token(Token::LBracket, "expected `[...]`")?; + while !self.optional(Token::RBracket) { + let ty = self.match_type("expected ` @ + `")?; + self.match_token(Token::At, "expected `@ + `")?; + let slot = self.match_ss("expected ` + `")?; + let offset: u32 = match self.token() { + Some(Token::Integer(s)) if s.starts_with('+') => { + self.match_uimm32("expected a u32 offset")?.into() + } + _ => { + self.match_token(Token::Plus, "expected `+ `") + .map_err(|e| { + dbg!(self.lookahead); + e + })?; + self.match_uimm32("expected a u32 offset")?.into() + } + }; + ctx.function + .dfg + .append_user_stack_map_entry(inst, ir::UserStackMapEntry { ty, slot, offset }); + if !self.optional(Token::Comma) { + self.match_token(Token::RBracket, "expected `,` or `]`")?; + break; + } + } + } let num_results = ctx.function .dfg