From d926482f7a724770d8f6818f312e8dda7b17738b Mon Sep 17 00:00:00 2001 From: Artem Agvanian Date: Tue, 2 Jul 2024 12:07:47 -0700 Subject: [PATCH] Towards Proving Memory Initialization (#3264) This PR enables automatic memory initialization proofs for raw pointers in Kani. This is done without any extra instrumentation from the user. Currently, due to high memory consumption and only partial support of pointee types for which memory initialization proofs work, this feature is gated behind `-Z uninit-checks` flag. Note that because it uses shadow memory under the hood, programs using this feature need to pass `-Z ghost-state` flag as well. This PR is a part of working towards #3300. By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 and MIT licenses. --------- Co-authored-by: Celina G. Val --- kani-compiler/src/args.rs | 2 + .../codegen_cprover_gotoc/codegen/contract.rs | 22 + .../src/kani_middle/transform/body.rs | 178 ++++- .../kani_middle/transform/check_uninit/mod.rs | 428 +++++++++++ .../transform/check_uninit/ty_layout.rs | 334 ++++++++ .../transform/check_uninit/uninit_visitor.rs | 713 ++++++++++++++++++ .../src/kani_middle/transform/check_values.rs | 80 +- .../src/kani_middle/transform/contracts.rs | 2 +- .../kani_middle/transform/kani_intrinsics.rs | 188 ++++- .../src/kani_middle/transform/mod.rs | 24 +- .../src/kani_middle/transform/stubs.rs | 4 +- kani-driver/src/call_single_file.rs | 4 + kani_metadata/src/unstable.rs | 2 + library/kani/src/lib.rs | 1 + library/kani/src/mem.rs | 10 +- library/kani/src/mem_init.rs | 122 +++ library/kani_core/src/mem.rs | 12 +- .../access-padding-uninit.rs | 16 + .../uninit/access-padding-uninit/expected | 5 + .../access-padding-via-cast.rs | 20 + .../uninit/access-padding-via-cast/expected | 5 + .../uninit/alloc-to-slice/alloc-to-slice.rs | 20 + tests/expected/uninit/alloc-to-slice/expected | 5 + .../expected/uninit/vec-read-bad-len/expected | 5 + .../vec-read-bad-len/vec-read-bad-len.rs | 15 + .../uninit/vec-read-semi-init/expected | 5 + .../vec-read-semi-init/vec-read-semi-init.rs | 11 + .../expected/uninit/vec-read-uninit/expected | 5 + .../uninit/vec-read-uninit/vec-read-uninit.rs | 10 + .../access-padding-enum-diverging-variants.rs | 34 + .../access-padding-enum-multiple-variants.rs | 49 ++ .../access-padding-enum-single-field.rs | 34 + .../access-padding-enum-single-variant.rs | 32 + tests/perf/uninit/Cargo.toml | 14 + tests/perf/uninit/expected | 1 + tests/perf/uninit/src/lib.rs | 68 ++ tests/std-checks/core/mem.expected | 6 +- tests/std-checks/core/slice.expected | 1 + tests/std-checks/core/src/lib.rs | 1 + tests/std-checks/core/src/mem.rs | 35 + tests/std-checks/core/src/slice.rs | 30 + 41 files changed, 2500 insertions(+), 53 deletions(-) create mode 100644 kani-compiler/src/kani_middle/transform/check_uninit/mod.rs create mode 100644 kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs create mode 100644 kani-compiler/src/kani_middle/transform/check_uninit/uninit_visitor.rs create mode 100644 library/kani/src/mem_init.rs create mode 100644 tests/expected/uninit/access-padding-uninit/access-padding-uninit.rs create mode 100644 tests/expected/uninit/access-padding-uninit/expected create mode 100644 tests/expected/uninit/access-padding-via-cast/access-padding-via-cast.rs create mode 100644 tests/expected/uninit/access-padding-via-cast/expected create mode 100644 tests/expected/uninit/alloc-to-slice/alloc-to-slice.rs create mode 100644 tests/expected/uninit/alloc-to-slice/expected create mode 100644 tests/expected/uninit/vec-read-bad-len/expected create mode 100644 tests/expected/uninit/vec-read-bad-len/vec-read-bad-len.rs create mode 100644 tests/expected/uninit/vec-read-semi-init/expected create mode 100644 tests/expected/uninit/vec-read-semi-init/vec-read-semi-init.rs create mode 100644 tests/expected/uninit/vec-read-uninit/expected create mode 100644 tests/expected/uninit/vec-read-uninit/vec-read-uninit.rs create mode 100644 tests/kani/Uninit/access-padding-enum-diverging-variants.rs create mode 100644 tests/kani/Uninit/access-padding-enum-multiple-variants.rs create mode 100644 tests/kani/Uninit/access-padding-enum-single-field.rs create mode 100644 tests/kani/Uninit/access-padding-enum-single-variant.rs create mode 100644 tests/perf/uninit/Cargo.toml create mode 100644 tests/perf/uninit/expected create mode 100644 tests/perf/uninit/src/lib.rs create mode 100644 tests/std-checks/core/slice.expected create mode 100644 tests/std-checks/core/src/slice.rs diff --git a/kani-compiler/src/args.rs b/kani-compiler/src/args.rs index b4d4eb3718d8..e4b7a4435b0f 100644 --- a/kani-compiler/src/args.rs +++ b/kani-compiler/src/args.rs @@ -88,4 +88,6 @@ pub enum ExtraChecks { /// Check pointer validity when casting pointers to references. /// See https://github.com/model-checking/kani/issues/2975. PtrToRefCast, + /// Check for using uninitialized memory. + Uninit, } diff --git a/kani-compiler/src/codegen_cprover_gotoc/codegen/contract.rs b/kani-compiler/src/codegen_cprover_gotoc/codegen/contract.rs index 0f27d3f119f7..d35015aa040d 100644 --- a/kani-compiler/src/codegen_cprover_gotoc/codegen/contract.rs +++ b/kani-compiler/src/codegen_cprover_gotoc/codegen/contract.rs @@ -118,6 +118,27 @@ impl<'tcx> GotocCtx<'tcx> { .typ .clone(); + let shadow_memory_assign = self + .tcx + .all_diagnostic_items(()) + .name_to_id + .get(&rustc_span::symbol::Symbol::intern("KaniMemInitShadowMem")) + .map(|attr_id| { + self.tcx + .symbol_name(rustc_middle::ty::Instance::mono(self.tcx, *attr_id)) + .name + .to_string() + }) + .and_then(|shadow_memory_table| self.symbol_table.lookup(&shadow_memory_table).cloned()) + .map(|shadow_memory_symbol| { + vec![Lambda::as_contract_for( + &goto_annotated_fn_typ, + None, + shadow_memory_symbol.to_expr(), + )] + }) + .unwrap_or_default(); + let assigns = modified_places .into_iter() .map(|local| { @@ -127,6 +148,7 @@ impl<'tcx> GotocCtx<'tcx> { self.codegen_place_stable(&local.into(), loc).unwrap().goto_expr.dereference(), ) }) + .chain(shadow_memory_assign) .collect(); FunctionContract::new(assigns) diff --git a/kani-compiler/src/kani_middle/transform/body.rs b/kani-compiler/src/kani_middle/transform/body.rs index 2bf424a1332d..22895bd8d20d 100644 --- a/kani-compiler/src/kani_middle/transform/body.rs +++ b/kani-compiler/src/kani_middle/transform/body.rs @@ -37,6 +37,13 @@ pub struct MutableBody { span: Span, } +/// Denotes whether instrumentation should be inserted before or after the statement. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum InsertPosition { + Before, + After, +} + impl MutableBody { /// Get the basic blocks of this builder. pub fn blocks(&self) -> &[BasicBlock] { @@ -95,12 +102,13 @@ impl MutableBody { from: Operand, pointee_ty: Ty, mutability: Mutability, - before: &mut SourceInstruction, + source: &mut SourceInstruction, + position: InsertPosition, ) -> Local { assert!(from.ty(self.locals()).unwrap().kind().is_raw_ptr()); let target_ty = Ty::new_ptr(pointee_ty, mutability); let rvalue = Rvalue::Cast(CastKind::PtrToPtr, from, target_ty); - self.new_assignment(rvalue, before) + self.new_assignment(rvalue, source, position) } /// Add a new assignment for the given binary operation. @@ -111,21 +119,27 @@ impl MutableBody { bin_op: BinOp, lhs: Operand, rhs: Operand, - before: &mut SourceInstruction, + source: &mut SourceInstruction, + position: InsertPosition, ) -> Local { let rvalue = Rvalue::BinaryOp(bin_op, lhs, rhs); - self.new_assignment(rvalue, before) + self.new_assignment(rvalue, source, position) } /// Add a new assignment. /// /// Return local where the result is saved. - pub fn new_assignment(&mut self, rvalue: Rvalue, before: &mut SourceInstruction) -> Local { - let span = before.span(&self.blocks); + pub fn new_assignment( + &mut self, + rvalue: Rvalue, + source: &mut SourceInstruction, + position: InsertPosition, + ) -> Local { + let span = source.span(&self.blocks); let ret_ty = rvalue.ty(&self.locals).unwrap(); let result = self.new_local(ret_ty, span, Mutability::Not); let stmt = Statement { kind: StatementKind::Assign(Place::from(result), rvalue), span }; - self.insert_stmt(stmt, before); + self.insert_stmt(stmt, source, position); result } @@ -139,6 +153,7 @@ impl MutableBody { tcx: TyCtxt, check_type: &CheckType, source: &mut SourceInstruction, + position: InsertPosition, value: Local, msg: &str, ) { @@ -168,7 +183,7 @@ impl MutableBody { unwind: UnwindAction::Terminate, }; let terminator = Terminator { kind, span }; - self.split_bb(source, terminator); + self.split_bb(source, position, terminator); } CheckType::Panic | CheckType::NoCore => { tcx.sess @@ -182,11 +197,55 @@ impl MutableBody { } } - /// Split a basic block right before the source location and use the new terminator - /// in the basic block that was split. + /// Add a new call to the basic block indicated by the given index. + /// + /// The new call will have the same span as the source instruction, and the basic block + /// will be split. The source instruction will be adjusted to point to the first instruction in + /// the new basic block. + pub fn add_call( + &mut self, + callee: &Instance, + source: &mut SourceInstruction, + position: InsertPosition, + args: Vec, + destination: Place, + ) { + let new_bb = self.blocks.len(); + let span = source.span(&self.blocks); + let callee_op = + Operand::Copy(Place::from(self.new_local(callee.ty(), span, Mutability::Not))); + let kind = TerminatorKind::Call { + func: callee_op, + args, + destination, + target: Some(new_bb), + unwind: UnwindAction::Terminate, + }; + let terminator = Terminator { kind, span }; + self.split_bb(source, position, terminator); + } + + /// Split a basic block and use the new terminator in the basic block that was split. /// /// The source is updated to point to the same instruction which is now in the new basic block. - pub fn split_bb(&mut self, source: &mut SourceInstruction, new_term: Terminator) { + pub fn split_bb( + &mut self, + source: &mut SourceInstruction, + position: InsertPosition, + new_term: Terminator, + ) { + match position { + InsertPosition::Before => { + self.split_bb_before(source, new_term); + } + InsertPosition::After => { + self.split_bb_after(source, new_term); + } + } + } + + /// Split a basic block right before the source location. + fn split_bb_before(&mut self, source: &mut SourceInstruction, new_term: Terminator) { let new_bb_idx = self.blocks.len(); let (idx, bb) = match source { SourceInstruction::Statement { idx, bb } => { @@ -196,9 +255,9 @@ impl MutableBody { (orig_idx, orig_bb) } SourceInstruction::Terminator { bb } => { - let orig_bb = *bb; + let (orig_idx, orig_bb) = (self.blocks[*bb].statements.len(), *bb); *bb = new_bb_idx; - (self.blocks[orig_bb].statements.len(), orig_bb) + (orig_idx, orig_bb) } }; let old_term = mem::replace(&mut self.blocks[bb].terminator, new_term); @@ -208,16 +267,95 @@ impl MutableBody { self.blocks.push(new_bb); } - /// Insert statement before the source instruction and update the source as needed. - pub fn insert_stmt(&mut self, new_stmt: Statement, before: &mut SourceInstruction) { - match before { + /// Split a basic block right after the source location. + fn split_bb_after(&mut self, source: &mut SourceInstruction, mut new_term: Terminator) { + let new_bb_idx = self.blocks.len(); + match source { + // Split the current block after the statement located at `source` + // and move the remaining statements into the new one. SourceInstruction::Statement { idx, bb } => { - self.blocks[*bb].statements.insert(*idx, new_stmt); - *idx += 1; + let (orig_idx, orig_bb) = (*idx, *bb); + *idx = 0; + *bb = new_bb_idx; + let old_term = mem::replace(&mut self.blocks[orig_bb].terminator, new_term); + let bb_stmts = &mut self.blocks[orig_bb].statements; + let remaining = bb_stmts.split_off(orig_idx + 1); + let new_bb = BasicBlock { statements: remaining, terminator: old_term }; + self.blocks.push(new_bb); } + // Make the terminator at `source` point at the new block, + // the terminator of which is a simple Goto instruction. SourceInstruction::Terminator { bb } => { - // Append statements at the end of the basic block. - self.blocks[*bb].statements.push(new_stmt); + let current_terminator = &mut self.blocks.get_mut(*bb).unwrap().terminator; + // Kani can only instrument function calls like this. + match (&mut current_terminator.kind, &mut new_term.kind) { + ( + TerminatorKind::Call { target: Some(target_bb), .. }, + TerminatorKind::Call { target: Some(new_target_bb), .. }, + ) => { + // Set the new terminator to point where previous terminator pointed. + *new_target_bb = *target_bb; + // Point the current terminator to the new terminator's basic block. + *target_bb = new_bb_idx; + // Update the current poisition. + *bb = new_bb_idx; + self.blocks.push(BasicBlock { statements: vec![], terminator: new_term }); + } + _ => unimplemented!("Kani can only split blocks after calls."), + }; + } + }; + } + + /// Insert statement before or after the source instruction and update the source as needed. + pub fn insert_stmt( + &mut self, + new_stmt: Statement, + source: &mut SourceInstruction, + position: InsertPosition, + ) { + match position { + InsertPosition::Before => { + match source { + SourceInstruction::Statement { idx, bb } => { + self.blocks[*bb].statements.insert(*idx, new_stmt); + *idx += 1; + } + SourceInstruction::Terminator { bb } => { + // Append statements at the end of the basic block. + self.blocks[*bb].statements.push(new_stmt); + } + } + } + InsertPosition::After => { + let new_bb_idx = self.blocks.len(); + let span = source.span(&self.blocks); + match source { + SourceInstruction::Statement { idx, bb } => { + self.blocks[*bb].statements.insert(*idx + 1, new_stmt); + *idx += 1; + } + SourceInstruction::Terminator { bb } => { + // Create a new basic block, as we need to append a statement after the terminator. + let current_terminator = &mut self.blocks.get_mut(*bb).unwrap().terminator; + // Kani can only instrument function calls in this way. + match &mut current_terminator.kind { + TerminatorKind::Call { target: Some(target_bb), .. } => { + *source = SourceInstruction::Statement { idx: 0, bb: new_bb_idx }; + let new_bb = BasicBlock { + statements: vec![new_stmt], + terminator: Terminator { + kind: TerminatorKind::Goto { target: *target_bb }, + span, + }, + }; + *target_bb = new_bb_idx; + self.blocks.push(new_bb); + } + _ => unimplemented!("Kani can only insert statements after calls."), + }; + } + } } } } diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs b/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs new file mode 100644 index 000000000000..6665ab697287 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/mod.rs @@ -0,0 +1,428 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! Implement a transformation pass that instruments the code to detect possible UB due to +//! the accesses to uninitialized memory. + +use crate::args::ExtraChecks; +use crate::kani_middle::find_fn_def; +use crate::kani_middle::transform::body::{ + CheckType, InsertPosition, MutableBody, SourceInstruction, +}; +use crate::kani_middle::transform::{TransformPass, TransformationType}; +use crate::kani_queries::QueryDb; +use rustc_middle::ty::TyCtxt; +use rustc_smir::rustc_internal; +use stable_mir::mir::mono::Instance; +use stable_mir::mir::{AggregateKind, Body, ConstOperand, Mutability, Operand, Place, Rvalue}; +use stable_mir::ty::{ + FnDef, GenericArgKind, GenericArgs, MirConst, RigidTy, Ty, TyConst, TyKind, UintTy, +}; +use stable_mir::CrateDef; +use std::collections::{HashMap, HashSet}; +use std::fmt::Debug; +use tracing::{debug, trace}; + +mod ty_layout; +mod uninit_visitor; + +pub use ty_layout::{PointeeInfo, PointeeLayout}; +use uninit_visitor::{CheckUninitVisitor, InitRelevantInstruction, MemoryInitOp}; + +const SKIPPED_DIAGNOSTIC_ITEMS: &[&str] = + &["KaniIsUnitPtrInitialized", "KaniSetUnitPtrInitialized"]; + +/// Instrument the code with checks for uninitialized memory. +#[derive(Debug)] +pub struct UninitPass { + pub check_type: CheckType, + /// Used to cache FnDef lookups of injected memory initialization functions. + pub mem_init_fn_cache: HashMap<&'static str, FnDef>, +} + +impl TransformPass for UninitPass { + fn transformation_type() -> TransformationType + where + Self: Sized, + { + TransformationType::Instrumentation + } + + fn is_enabled(&self, query_db: &QueryDb) -> bool + where + Self: Sized, + { + let args = query_db.args(); + args.ub_check.contains(&ExtraChecks::Uninit) + } + + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + trace!(function=?instance.name(), "transform"); + + // Need to break infinite recursion when shadow memory checks are inserted, + // so the internal function responsible for shadow memory checks are skipped. + if tcx + .get_diagnostic_name(rustc_internal::internal(tcx, instance.def.def_id())) + .map(|diagnostic_name| { + SKIPPED_DIAGNOSTIC_ITEMS.contains(&diagnostic_name.to_ident_string().as_str()) + }) + .unwrap_or(false) + { + return (false, body); + } + + let mut new_body = MutableBody::from(body); + let orig_len = new_body.blocks().len(); + + // Set of basic block indices for which analyzing first statement should be skipped. + // + // This is necessary because some checks are inserted before the source instruction, which, in + // turn, gets moved to the next basic block. Hence, we would not need to look at the + // instruction again as a part of new basic block. However, if the check is inserted after the + // source instruction, we still need to look at the first statement of the new basic block, so + // we need to keep track of which basic blocks were created as a part of injecting checks after + // the source instruction. + let mut skip_first = HashSet::new(); + + // Do not cache body.blocks().len() since it will change as we add new checks. + let mut bb_idx = 0; + while bb_idx < new_body.blocks().len() { + if let Some(candidate) = + CheckUninitVisitor::find_next(&new_body, bb_idx, skip_first.contains(&bb_idx)) + { + self.build_check_for_instruction(tcx, &mut new_body, candidate, &mut skip_first); + bb_idx += 1 + } else { + bb_idx += 1; + }; + } + (orig_len != new_body.blocks().len(), new_body.into()) + } +} + +impl UninitPass { + /// Inject memory initialization checks for each operation in an instruction. + fn build_check_for_instruction( + &mut self, + tcx: TyCtxt, + body: &mut MutableBody, + instruction: InitRelevantInstruction, + skip_first: &mut HashSet, + ) { + debug!(?instruction, "build_check"); + let mut source = instruction.source; + for operation in instruction.before_instruction { + self.build_check_for_operation(tcx, body, &mut source, operation, skip_first); + } + for operation in instruction.after_instruction { + self.build_check_for_operation(tcx, body, &mut source, operation, skip_first); + } + } + + /// Inject memory initialization check for an operation. + fn build_check_for_operation( + &mut self, + tcx: TyCtxt, + body: &mut MutableBody, + source: &mut SourceInstruction, + operation: MemoryInitOp, + skip_first: &mut HashSet, + ) { + if let MemoryInitOp::Unsupported { reason } = &operation { + collect_skipped(&operation, body, skip_first); + self.unsupported_check(tcx, body, source, operation.position(), reason); + return; + }; + + let pointee_ty_info = { + let ptr_operand = operation.mk_operand(body, source); + let ptr_operand_ty = ptr_operand.ty(body.locals()).unwrap(); + let pointee_ty = match ptr_operand_ty.kind() { + TyKind::RigidTy(RigidTy::RawPtr(pointee_ty, _)) => pointee_ty, + _ => { + unreachable!( + "Should only build checks for raw pointers, `{ptr_operand_ty}` encountered." + ) + } + }; + match PointeeInfo::from_ty(pointee_ty) { + Ok(type_info) => type_info, + Err(_) => { + let reason = format!( + "Kani currently doesn't support checking memory initialization for pointers to `{pointee_ty}.", + ); + collect_skipped(&operation, body, skip_first); + self.unsupported_check(tcx, body, source, operation.position(), &reason); + return; + } + } + }; + + match operation { + MemoryInitOp::Check { .. } => { + self.build_get_and_check(tcx, body, source, operation, pointee_ty_info, skip_first) + } + MemoryInitOp::Set { .. } | MemoryInitOp::SetRef { .. } => { + self.build_set(tcx, body, source, operation, pointee_ty_info, skip_first) + } + MemoryInitOp::Unsupported { .. } => { + unreachable!() + } + } + } + + /// Inject a load from shadow memory tracking memory initialization and an assertion that all + /// non-padding bytes are initialized. + fn build_get_and_check( + &mut self, + tcx: TyCtxt, + body: &mut MutableBody, + source: &mut SourceInstruction, + operation: MemoryInitOp, + pointee_info: PointeeInfo, + skip_first: &mut HashSet, + ) { + let ret_place = Place { + local: body.new_local(Ty::bool_ty(), source.span(body.blocks()), Mutability::Not), + projection: vec![], + }; + let ptr_operand = operation.mk_operand(body, source); + match pointee_info.layout() { + PointeeLayout::Sized { layout } => { + let is_ptr_initialized_instance = resolve_mem_init_fn( + get_mem_init_fn_def(tcx, "KaniIsPtrInitialized", &mut self.mem_init_fn_cache), + layout.len(), + *pointee_info.ty(), + ); + let layout_operand = mk_layout_operand(body, source, operation.position(), &layout); + collect_skipped(&operation, body, skip_first); + body.add_call( + &is_ptr_initialized_instance, + source, + operation.position(), + vec![ptr_operand.clone(), layout_operand, operation.expect_count()], + ret_place.clone(), + ); + } + PointeeLayout::Slice { element_layout } => { + // Since `str`` is a separate type, need to differentiate between [T] and str. + let (slicee_ty, diagnostic) = match pointee_info.ty().kind() { + TyKind::RigidTy(RigidTy::Slice(slicee_ty)) => { + (slicee_ty, "KaniIsSlicePtrInitialized") + } + TyKind::RigidTy(RigidTy::Str) => { + (Ty::unsigned_ty(UintTy::U8), "KaniIsStrPtrInitialized") + } + _ => unreachable!(), + }; + let is_ptr_initialized_instance = resolve_mem_init_fn( + get_mem_init_fn_def(tcx, diagnostic, &mut self.mem_init_fn_cache), + element_layout.len(), + slicee_ty, + ); + let layout_operand = + mk_layout_operand(body, source, operation.position(), &element_layout); + collect_skipped(&operation, body, skip_first); + body.add_call( + &is_ptr_initialized_instance, + source, + operation.position(), + vec![ptr_operand.clone(), layout_operand], + ret_place.clone(), + ); + } + PointeeLayout::TraitObject => { + collect_skipped(&operation, body, skip_first); + let reason = "Kani does not support reasoning about memory initialization of pointers to trait objects."; + self.unsupported_check(tcx, body, source, operation.position(), reason); + return; + } + }; + + // Make sure all non-padding bytes are initialized. + collect_skipped(&operation, body, skip_first); + let ptr_operand_ty = ptr_operand.ty(body.locals()).unwrap(); + body.add_check( + tcx, + &self.check_type, + source, + operation.position(), + ret_place.local, + &format!("Undefined Behavior: Reading from an uninitialized pointer of type `{ptr_operand_ty}`"), + ) + } + + /// Inject a store into shadow memory tracking memory initialization to initialize or + /// deinitialize all non-padding bytes. + fn build_set( + &mut self, + tcx: TyCtxt, + body: &mut MutableBody, + source: &mut SourceInstruction, + operation: MemoryInitOp, + pointee_info: PointeeInfo, + skip_first: &mut HashSet, + ) { + let ret_place = Place { + local: body.new_local(Ty::new_tuple(&[]), source.span(body.blocks()), Mutability::Not), + projection: vec![], + }; + let ptr_operand = operation.mk_operand(body, source); + let value = operation.expect_value(); + + match pointee_info.layout() { + PointeeLayout::Sized { layout } => { + let set_ptr_initialized_instance = resolve_mem_init_fn( + get_mem_init_fn_def(tcx, "KaniSetPtrInitialized", &mut self.mem_init_fn_cache), + layout.len(), + *pointee_info.ty(), + ); + let layout_operand = mk_layout_operand(body, source, operation.position(), &layout); + collect_skipped(&operation, body, skip_first); + body.add_call( + &set_ptr_initialized_instance, + source, + operation.position(), + vec![ + ptr_operand, + layout_operand, + operation.expect_count(), + Operand::Constant(ConstOperand { + span: source.span(body.blocks()), + user_ty: None, + const_: MirConst::from_bool(value), + }), + ], + ret_place, + ); + } + PointeeLayout::Slice { element_layout } => { + // Since `str`` is a separate type, need to differentiate between [T] and str. + let (slicee_ty, diagnostic) = match pointee_info.ty().kind() { + TyKind::RigidTy(RigidTy::Slice(slicee_ty)) => { + (slicee_ty, "KaniSetSlicePtrInitialized") + } + TyKind::RigidTy(RigidTy::Str) => { + (Ty::unsigned_ty(UintTy::U8), "KaniSetStrPtrInitialized") + } + _ => unreachable!(), + }; + let set_ptr_initialized_instance = resolve_mem_init_fn( + get_mem_init_fn_def(tcx, diagnostic, &mut self.mem_init_fn_cache), + element_layout.len(), + slicee_ty, + ); + let layout_operand = + mk_layout_operand(body, source, operation.position(), &element_layout); + collect_skipped(&operation, body, skip_first); + body.add_call( + &set_ptr_initialized_instance, + source, + operation.position(), + vec![ + ptr_operand, + layout_operand, + Operand::Constant(ConstOperand { + span: source.span(body.blocks()), + user_ty: None, + const_: MirConst::from_bool(value), + }), + ], + ret_place, + ); + } + PointeeLayout::TraitObject => { + unreachable!("Cannot change the initialization state of a trait object directly."); + } + }; + } + + fn unsupported_check( + &self, + tcx: TyCtxt, + body: &mut MutableBody, + source: &mut SourceInstruction, + position: InsertPosition, + reason: &str, + ) { + let span = source.span(body.blocks()); + let rvalue = Rvalue::Use(Operand::Constant(ConstOperand { + const_: MirConst::from_bool(false), + span, + user_ty: None, + })); + let result = body.new_assignment(rvalue, source, position); + body.add_check(tcx, &self.check_type, source, position, result, reason); + } +} + +/// Create an operand from a bit array that represents a byte mask for a type layout where padding +/// bytes are marked as `false` and data bytes are marked as `true`. +/// +/// For example, the layout for: +/// ``` +/// [repr(C)] +/// struct { +/// a: u16, +/// b: u8 +/// } +/// ``` +/// will have the following byte mask `[true, true, true, false]`. +pub fn mk_layout_operand( + body: &mut MutableBody, + source: &mut SourceInstruction, + position: InsertPosition, + layout_byte_mask: &[bool], +) -> Operand { + Operand::Move(Place { + local: body.new_assignment( + Rvalue::Aggregate( + AggregateKind::Array(Ty::bool_ty()), + layout_byte_mask + .iter() + .map(|byte| { + Operand::Constant(ConstOperand { + span: source.span(body.blocks()), + user_ty: None, + const_: MirConst::from_bool(*byte), + }) + }) + .collect(), + ), + source, + position, + ), + projection: vec![], + }) +} + +/// If injecting a new call to the function before the current statement, need to skip the original +/// statement when analyzing it as a part of the new basic block. +fn collect_skipped(operation: &MemoryInitOp, body: &MutableBody, skip_first: &mut HashSet) { + if operation.position() == InsertPosition::Before { + let new_bb_idx = body.blocks().len(); + skip_first.insert(new_bb_idx); + } +} + +/// Retrieve a function definition by diagnostic string, caching the result. +pub fn get_mem_init_fn_def( + tcx: TyCtxt, + diagnostic: &'static str, + cache: &mut HashMap<&'static str, FnDef>, +) -> FnDef { + let entry = cache.entry(diagnostic).or_insert_with(|| find_fn_def(tcx, diagnostic).unwrap()); + *entry +} + +/// Resolves a given memory initialization function with passed type parameters. +pub fn resolve_mem_init_fn(fn_def: FnDef, layout_size: usize, associated_type: Ty) -> Instance { + Instance::resolve( + fn_def, + &GenericArgs(vec![ + GenericArgKind::Const(TyConst::try_from_target_usize(layout_size as u64).unwrap()), + GenericArgKind::Type(associated_type), + ]), + ) + .unwrap() +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs b/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs new file mode 100644 index 000000000000..09116230af80 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/ty_layout.rs @@ -0,0 +1,334 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! Utility functions that help calculate type layout. + +use stable_mir::abi::{FieldsShape, Scalar, TagEncoding, ValueAbi, VariantsShape}; +use stable_mir::target::{MachineInfo, MachineSize}; +use stable_mir::ty::{AdtKind, IndexedVal, RigidTy, Ty, TyKind, UintTy}; +use stable_mir::CrateDef; + +/// Represents a chunk of data bytes in a data structure. +#[derive(Clone, Debug, Eq, PartialEq, Hash)] +struct DataBytes { + /// Offset in bytes. + offset: usize, + /// Size of this data chunk. + size: MachineSize, +} + +/// Bytewise mask, representing which bytes of a type are data and which are padding. Here, `false` +/// represents padding bytes and `true` represents data bytes. +type Layout = Vec; + +/// Create a byte-wise mask from known chunks of data bytes. +fn generate_byte_mask(size_in_bytes: usize, data_chunks: Vec) -> Vec { + let mut layout_mask = vec![false; size_in_bytes]; + for data_bytes in data_chunks.iter() { + for layout_item in + layout_mask.iter_mut().skip(data_bytes.offset).take(data_bytes.size.bytes()) + { + *layout_item = true; + } + } + layout_mask +} + +// Depending on whether the type is statically or dynamically sized, +// the layout of the element or the layout of the actual type is returned. +pub enum PointeeLayout { + /// Layout of sized objects. + Sized { layout: Layout }, + /// Layout of slices, *const/mut str is included in this case and treated as *const/mut [u8]. + Slice { element_layout: Layout }, + /// Trait objects have an arbitrary layout. + TraitObject, +} + +pub struct PointeeInfo { + pointee_ty: Ty, + layout: PointeeLayout, +} + +impl PointeeInfo { + pub fn from_ty(ty: Ty) -> Result { + match ty.kind() { + TyKind::RigidTy(rigid_ty) => match rigid_ty { + RigidTy::Str => { + let slicee_ty = Ty::unsigned_ty(UintTy::U8); + let size_in_bytes = slicee_ty.layout().unwrap().shape().size.bytes(); + let data_chunks = data_bytes_for_ty(&MachineInfo::target(), slicee_ty, 0)?; + let layout = PointeeLayout::Slice { + element_layout: generate_byte_mask(size_in_bytes, data_chunks), + }; + Ok(PointeeInfo { pointee_ty: ty, layout }) + } + RigidTy::Slice(slicee_ty) => { + let size_in_bytes = slicee_ty.layout().unwrap().shape().size.bytes(); + let data_chunks = data_bytes_for_ty(&MachineInfo::target(), slicee_ty, 0)?; + let layout = PointeeLayout::Slice { + element_layout: generate_byte_mask(size_in_bytes, data_chunks), + }; + Ok(PointeeInfo { pointee_ty: ty, layout }) + } + RigidTy::Dynamic(..) => { + Ok(PointeeInfo { pointee_ty: ty, layout: PointeeLayout::TraitObject }) + } + _ => { + if ty.layout().unwrap().shape().is_sized() { + let size_in_bytes = ty.layout().unwrap().shape().size.bytes(); + let data_chunks = data_bytes_for_ty(&MachineInfo::target(), ty, 0)?; + let layout = PointeeLayout::Sized { + layout: generate_byte_mask(size_in_bytes, data_chunks), + }; + Ok(PointeeInfo { pointee_ty: ty, layout }) + } else { + Err(format!("Cannot determine type layout for type `{ty}`")) + } + } + }, + TyKind::Alias(..) | TyKind::Param(..) | TyKind::Bound(..) => { + unreachable!("Should only encounter monomorphized types at this point.") + } + } + } + + pub fn ty(&self) -> &Ty { + &self.pointee_ty + } + + pub fn layout(&self) -> &PointeeLayout { + &self.layout + } +} + +/// Retrieve a set of data bytes with offsets for a type. +fn data_bytes_for_ty( + machine_info: &MachineInfo, + ty: Ty, + current_offset: usize, +) -> Result, String> { + let layout = ty.layout().unwrap().shape(); + + match layout.fields { + FieldsShape::Primitive => Ok(vec![match layout.abi { + ValueAbi::Scalar(Scalar::Initialized { value, .. }) => { + DataBytes { offset: current_offset, size: value.size(machine_info) } + } + _ => unreachable!("FieldsShape::Primitive with a different ABI than ValueAbi::Scalar"), + }]), + FieldsShape::Array { stride, count } if count > 0 => { + let TyKind::RigidTy(RigidTy::Array(elem_ty, _)) = ty.kind() else { unreachable!() }; + let elem_data_bytes = data_bytes_for_ty(machine_info, elem_ty, current_offset)?; + let mut result = vec![]; + if !elem_data_bytes.is_empty() { + for idx in 0..count { + let idx: usize = idx.try_into().unwrap(); + let elem_offset = idx * stride.bytes(); + let mut next_data_bytes = elem_data_bytes + .iter() + .cloned() + .map(|mut req| { + req.offset += elem_offset; + req + }) + .collect::>(); + result.append(&mut next_data_bytes) + } + } + Ok(result) + } + FieldsShape::Arbitrary { ref offsets } => { + match ty.kind().rigid().expect(&format!("unexpected type: {ty:?}")) { + RigidTy::Adt(def, args) => { + match def.kind() { + AdtKind::Enum => { + // Support basic enumeration forms + let ty_variants = def.variants(); + match layout.variants { + VariantsShape::Single { index } => { + // Only one variant is reachable. This behaves like a struct. + let fields = ty_variants[index.to_index()].fields(); + let mut fields_data_bytes = vec![]; + for idx in layout.fields.fields_by_offset_order() { + let field_offset = offsets[idx].bytes(); + let field_ty = fields[idx].ty_with_args(&args); + fields_data_bytes.append(&mut data_bytes_for_ty( + machine_info, + field_ty, + field_offset + current_offset, + )?); + } + Ok(fields_data_bytes) + } + VariantsShape::Multiple { + tag_encoding: TagEncoding::Niche { .. }, + .. + } => { + Err(format!("Unsupported Enum `{}` check", def.trimmed_name()))? + } + VariantsShape::Multiple { variants, tag, .. } => { + // Retrieve data bytes for the tag. + let tag_size = match tag { + Scalar::Initialized { value, .. } => { + value.size(&machine_info) + } + Scalar::Union { .. } => { + unreachable!("Enum tag should not be a union.") + } + }; + // For enums, tag is the only field and should have offset of 0. + assert!(offsets.len() == 1 && offsets[0].bytes() == 0); + let tag_data_bytes = + vec![DataBytes { offset: current_offset, size: tag_size }]; + + // Retrieve data bytes for the fields. + let mut fields_data_bytes = vec![]; + // Iterate over all variants for the enum. + for (index, variant) in variants.iter().enumerate() { + let mut field_data_bytes_for_variant = vec![]; + let fields = ty_variants[index].fields(); + // Get offsets of all fields in a variant. + let FieldsShape::Arbitrary { offsets: field_offsets } = + variant.fields.clone() + else { + unreachable!() + }; + for field_idx in variant.fields.fields_by_offset_order() { + let field_offset = field_offsets[field_idx].bytes(); + let field_ty = fields[field_idx].ty_with_args(&args); + field_data_bytes_for_variant.append( + &mut data_bytes_for_ty( + machine_info, + field_ty, + field_offset + current_offset, + )?, + ); + } + fields_data_bytes.push(field_data_bytes_for_variant); + } + + if fields_data_bytes.is_empty() { + // If there are no fields, return the tag data bytes. + Ok(tag_data_bytes) + } else if fields_data_bytes.iter().all( + |data_bytes_for_variant| { + // Byte layout for variant N. + let byte_mask_for_variant = generate_byte_mask( + layout.size.bytes(), + data_bytes_for_variant.clone(), + ); + // Byte layout for variant 0. + let byte_mask_for_first = generate_byte_mask( + layout.size.bytes(), + fields_data_bytes.first().unwrap().clone(), + ); + byte_mask_for_variant == byte_mask_for_first + }, + ) { + // If all fields have the same layout, return fields data + // bytes. + let mut total_data_bytes = tag_data_bytes; + let mut field_data_bytes = + fields_data_bytes.first().unwrap().clone(); + total_data_bytes.append(&mut field_data_bytes); + Ok(total_data_bytes) + } else { + // Struct has multiple padding variants, Kani cannot + // differentiate between them. + Err(format!( + "Unsupported Enum `{}` check", + def.trimmed_name() + )) + } + } + } + } + AdtKind::Union => unreachable!(), + AdtKind::Struct => { + let mut struct_data_bytes = vec![]; + let fields = def.variants_iter().next().unwrap().fields(); + for idx in layout.fields.fields_by_offset_order() { + let field_offset = offsets[idx].bytes(); + let field_ty = fields[idx].ty_with_args(&args); + struct_data_bytes.append(&mut data_bytes_for_ty( + machine_info, + field_ty, + field_offset + current_offset, + )?); + } + Ok(struct_data_bytes) + } + } + } + RigidTy::Pat(base_ty, ..) => { + // This is similar to a structure with one field and with niche defined. + let mut pat_data_bytes = vec![]; + pat_data_bytes.append(&mut data_bytes_for_ty(machine_info, *base_ty, 0)?); + Ok(pat_data_bytes) + } + RigidTy::Tuple(tys) => { + let mut tuple_data_bytes = vec![]; + for idx in layout.fields.fields_by_offset_order() { + let field_offset = offsets[idx].bytes(); + let field_ty = tys[idx]; + tuple_data_bytes.append(&mut data_bytes_for_ty( + machine_info, + field_ty, + field_offset + current_offset, + )?); + } + Ok(tuple_data_bytes) + } + RigidTy::Bool + | RigidTy::Char + | RigidTy::Int(_) + | RigidTy::Uint(_) + | RigidTy::Float(_) + | RigidTy::Never => { + unreachable!("Expected primitive layout for {ty:?}") + } + RigidTy::Str | RigidTy::Slice(_) | RigidTy::Array(_, _) => { + unreachable!("Expected array layout for {ty:?}") + } + RigidTy::RawPtr(_, _) | RigidTy::Ref(_, _, _) => Ok(match layout.abi { + ValueAbi::Scalar(Scalar::Initialized { value, .. }) => { + // Thin pointer, ABI is a single scalar. + vec![DataBytes { offset: current_offset, size: value.size(machine_info) }] + } + ValueAbi::ScalarPair( + Scalar::Initialized { value: value_first, .. }, + Scalar::Initialized { value: value_second, .. }, + ) => { + // Fat pointer, ABI is a scalar pair. + let FieldsShape::Arbitrary { offsets } = layout.fields else { + unreachable!() + }; + // Since this is a scalar pair, only 2 elements are in the offsets vec. + assert!(offsets.len() == 2); + vec![ + DataBytes { + offset: current_offset + offsets[0].bytes(), + size: value_first.size(machine_info), + }, + DataBytes { + offset: current_offset + offsets[1].bytes(), + size: value_second.size(machine_info), + }, + ] + } + _ => unreachable!("RigidTy::RawPtr | RigidTy::Ref with a non-scalar ABI."), + }), + RigidTy::FnDef(_, _) + | RigidTy::FnPtr(_) + | RigidTy::Closure(_, _) + | RigidTy::Coroutine(_, _, _) + | RigidTy::CoroutineWitness(_, _) + | RigidTy::Foreign(_) + | RigidTy::Dynamic(_, _, _) => Err(format!("Unsupported {ty:?}")), + } + } + FieldsShape::Union(_) => Err(format!("Unsupported {ty:?}")), + FieldsShape::Array { .. } => Ok(vec![]), + } +} diff --git a/kani-compiler/src/kani_middle/transform/check_uninit/uninit_visitor.rs b/kani-compiler/src/kani_middle/transform/check_uninit/uninit_visitor.rs new file mode 100644 index 000000000000..19b13c6ab8b6 --- /dev/null +++ b/kani-compiler/src/kani_middle/transform/check_uninit/uninit_visitor.rs @@ -0,0 +1,713 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// +//! Visitor that collects all instructions relevant to uninitialized memory access. + +use crate::kani_middle::transform::body::{InsertPosition, MutableBody, SourceInstruction}; +use stable_mir::mir::alloc::GlobalAlloc; +use stable_mir::mir::mono::{Instance, InstanceKind}; +use stable_mir::mir::visit::{Location, PlaceContext}; +use stable_mir::mir::{ + BasicBlockIdx, CastKind, ConstOperand, LocalDecl, MirVisitor, Mutability, + NonDivergingIntrinsic, Operand, Place, PointerCoercion, ProjectionElem, Rvalue, Statement, + StatementKind, Terminator, TerminatorKind, +}; +use stable_mir::ty::{ConstantKind, MirConst, RigidTy, Span, TyKind, UintTy}; +use strum_macros::AsRefStr; + +/// Memory initialization operations: set or get memory initialization state for a given pointer. +#[derive(AsRefStr, Clone, Debug)] +pub enum MemoryInitOp { + /// Check memory initialization of data bytes in a memory region starting from the pointer + /// `operand` and of length `count * sizeof(operand)` bytes. + Check { operand: Operand, count: Operand }, + /// Set memory initialization state of data bytes in a memory region starting from the pointer + /// `operand` and of length `count * sizeof(operand)` bytes. + Set { operand: Operand, count: Operand, value: bool, position: InsertPosition }, + /// Set memory initialization of data bytes in a memory region starting from the reference to + /// `operand` and of length `count * sizeof(operand)` bytes. + SetRef { operand: Operand, count: Operand, value: bool, position: InsertPosition }, + /// Unsupported memory initialization operation. + Unsupported { reason: String }, +} + +impl MemoryInitOp { + /// Produce an operand for the relevant memory initialization related operation. This is mostly + /// required so that the analysis can create a new local to take a reference in + /// `MemoryInitOp::SetRef`. + pub fn mk_operand(&self, body: &mut MutableBody, source: &mut SourceInstruction) -> Operand { + match self { + MemoryInitOp::Check { operand, .. } | MemoryInitOp::Set { operand, .. } => { + operand.clone() + } + MemoryInitOp::SetRef { operand, .. } => Operand::Copy(Place { + local: { + let place = match operand { + Operand::Copy(place) | Operand::Move(place) => place, + Operand::Constant(_) => unreachable!(), + }; + body.new_assignment( + Rvalue::AddressOf(Mutability::Not, place.clone()), + source, + self.position(), + ) + }, + projection: vec![], + }), + MemoryInitOp::Unsupported { .. } => unreachable!(), + } + } + + pub fn expect_count(&self) -> Operand { + match self { + MemoryInitOp::Check { count, .. } + | MemoryInitOp::Set { count, .. } + | MemoryInitOp::SetRef { count, .. } => count.clone(), + MemoryInitOp::Unsupported { .. } => unreachable!(), + } + } + + pub fn expect_value(&self) -> bool { + match self { + MemoryInitOp::Set { value, .. } | MemoryInitOp::SetRef { value, .. } => *value, + MemoryInitOp::Check { .. } | MemoryInitOp::Unsupported { .. } => unreachable!(), + } + } + + pub fn position(&self) -> InsertPosition { + match self { + MemoryInitOp::Set { position, .. } | MemoryInitOp::SetRef { position, .. } => *position, + MemoryInitOp::Check { .. } | MemoryInitOp::Unsupported { .. } => InsertPosition::Before, + } + } +} + +/// Represents an instruction in the source code together with all memory initialization checks/sets +/// that are connected to the memory used in this instruction and whether they should be inserted +/// before or after the instruction. +#[derive(Clone, Debug)] +pub struct InitRelevantInstruction { + /// The instruction that affects the state of the memory. + pub source: SourceInstruction, + /// All memory-related operations that should happen after the instruction. + pub before_instruction: Vec, + /// All memory-related operations that should happen after the instruction. + pub after_instruction: Vec, +} + +impl InitRelevantInstruction { + pub fn push_operation(&mut self, source_op: MemoryInitOp) { + match source_op.position() { + InsertPosition::Before => self.before_instruction.push(source_op), + InsertPosition::After => self.after_instruction.push(source_op), + } + } +} + +pub struct CheckUninitVisitor<'a> { + locals: &'a [LocalDecl], + /// Whether we should skip the next instruction, since it might've been instrumented already. + /// When we instrument an instruction, we partition the basic block, and the instruction that + /// may trigger UB becomes the first instruction of the basic block, which we need to skip + /// later. + skip_next: bool, + /// The instruction being visited at a given point. + current: SourceInstruction, + /// The target instruction that should be verified. + pub target: Option, + /// The basic block being visited. + bb: BasicBlockIdx, +} + +impl<'a> CheckUninitVisitor<'a> { + pub fn find_next( + body: &'a MutableBody, + bb: BasicBlockIdx, + skip_first: bool, + ) -> Option { + let mut visitor = CheckUninitVisitor { + locals: body.locals(), + skip_next: skip_first, + current: SourceInstruction::Statement { idx: 0, bb }, + target: None, + bb, + }; + visitor.visit_basic_block(&body.blocks()[bb]); + visitor.target + } + + fn push_target(&mut self, source_op: MemoryInitOp) { + let target = self.target.get_or_insert_with(|| InitRelevantInstruction { + source: self.current, + after_instruction: vec![], + before_instruction: vec![], + }); + target.push_operation(source_op); + } +} + +impl<'a> MirVisitor for CheckUninitVisitor<'a> { + fn visit_statement(&mut self, stmt: &Statement, location: Location) { + if self.skip_next { + self.skip_next = false; + } else if self.target.is_none() { + // Leave it as an exhaustive match to be notified when a new kind is added. + match &stmt.kind { + StatementKind::Intrinsic(NonDivergingIntrinsic::CopyNonOverlapping(copy)) => { + self.super_statement(stmt, location); + // Source is a *const T and it must be initialized. + self.push_target(MemoryInitOp::Check { + operand: copy.src.clone(), + count: copy.count.clone(), + }); + // Destimation is a *mut T so it gets initialized. + self.push_target(MemoryInitOp::Set { + operand: copy.dst.clone(), + count: copy.count.clone(), + value: true, + position: InsertPosition::After, + }); + } + StatementKind::Assign(place, rvalue) => { + // First check rvalue. + self.visit_rvalue(rvalue, location); + // Check whether we are assigning into a dereference (*ptr = _). + if let Some(place_without_deref) = try_remove_topmost_deref(place) { + // First, check that we are not dereferencing extra pointers along the way + // (e.g., **ptr = _). If yes, check whether these pointers are initialized. + let mut place_to_add_projections = + Place { local: place_without_deref.local, projection: vec![] }; + for projection_elem in place_without_deref.projection.iter() { + // If the projection is Deref and the current type is raw pointer, check + // if it points to initialized memory. + if *projection_elem == ProjectionElem::Deref { + if let TyKind::RigidTy(RigidTy::RawPtr(..)) = + place_to_add_projections.ty(&self.locals).unwrap().kind() + { + self.push_target(MemoryInitOp::Check { + operand: Operand::Copy(place_to_add_projections.clone()), + count: mk_const_operand(1, location.span()), + }); + }; + } + place_to_add_projections.projection.push(projection_elem.clone()); + } + if place_without_deref.ty(&self.locals).unwrap().kind().is_raw_ptr() { + self.push_target(MemoryInitOp::Set { + operand: Operand::Copy(place_without_deref), + count: mk_const_operand(1, location.span()), + value: true, + position: InsertPosition::After, + }); + } + } + // Check whether Rvalue creates a new initialized pointer previously not captured inside shadow memory. + if place.ty(&self.locals).unwrap().kind().is_raw_ptr() { + if let Rvalue::AddressOf(..) = rvalue { + self.push_target(MemoryInitOp::Set { + operand: Operand::Copy(place.clone()), + count: mk_const_operand(1, location.span()), + value: true, + position: InsertPosition::After, + }); + } + } + } + StatementKind::Deinit(place) => { + self.super_statement(stmt, location); + self.push_target(MemoryInitOp::Set { + operand: Operand::Copy(place.clone()), + count: mk_const_operand(1, location.span()), + value: false, + position: InsertPosition::After, + }); + } + StatementKind::FakeRead(_, _) + | StatementKind::SetDiscriminant { .. } + | StatementKind::StorageLive(_) + | StatementKind::StorageDead(_) + | StatementKind::Retag(_, _) + | StatementKind::PlaceMention(_) + | StatementKind::AscribeUserType { .. } + | StatementKind::Coverage(_) + | StatementKind::ConstEvalCounter + | StatementKind::Intrinsic(NonDivergingIntrinsic::Assume(_)) + | StatementKind::Nop => self.super_statement(stmt, location), + } + } + let SourceInstruction::Statement { idx, bb } = self.current else { unreachable!() }; + self.current = SourceInstruction::Statement { idx: idx + 1, bb }; + } + + fn visit_terminator(&mut self, term: &Terminator, location: Location) { + if !(self.skip_next || self.target.is_some()) { + self.current = SourceInstruction::Terminator { bb: self.bb }; + // Leave it as an exhaustive match to be notified when a new kind is added. + match &term.kind { + TerminatorKind::Call { func, args, destination, .. } => { + self.super_terminator(term, location); + let instance = match try_resolve_instance(self.locals, func) { + Ok(instance) => instance, + Err(reason) => { + self.super_terminator(term, location); + self.push_target(MemoryInitOp::Unsupported { reason }); + return; + } + }; + match instance.kind { + InstanceKind::Intrinsic => { + match instance.intrinsic_name().unwrap().as_str() { + intrinsic_name if can_skip_intrinsic(intrinsic_name) => { + /* Intrinsics that can be safely skipped */ + } + name if name.starts_with("atomic") => { + let num_args = + if name.starts_with("atomic_cxchg") { 3 } else { 2 }; + assert_eq!( + args.len(), + num_args, + "Unexpected number of arguments for `{name}`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(..)) + )); + self.push_target(MemoryInitOp::Check { + operand: args[0].clone(), + count: mk_const_operand(1, location.span()), + }); + } + "compare_bytes" => { + assert_eq!( + args.len(), + 3, + "Unexpected number of arguments for `compare_bytes`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + assert!(matches!( + args[1].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + self.push_target(MemoryInitOp::Check { + operand: args[0].clone(), + count: args[2].clone(), + }); + self.push_target(MemoryInitOp::Check { + operand: args[1].clone(), + count: args[2].clone(), + }); + } + "copy" + | "volatile_copy_memory" + | "volatile_copy_nonoverlapping_memory" => { + assert_eq!( + args.len(), + 3, + "Unexpected number of arguments for `copy`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + assert!(matches!( + args[1].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + self.push_target(MemoryInitOp::Check { + operand: args[0].clone(), + count: args[2].clone(), + }); + self.push_target(MemoryInitOp::Set { + operand: args[1].clone(), + count: args[2].clone(), + value: true, + position: InsertPosition::After, + }); + } + "typed_swap" => { + assert_eq!( + args.len(), + 2, + "Unexpected number of arguments for `typed_swap`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + assert!(matches!( + args[1].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + self.push_target(MemoryInitOp::Check { + operand: args[0].clone(), + count: mk_const_operand(1, location.span()), + }); + self.push_target(MemoryInitOp::Check { + operand: args[1].clone(), + count: mk_const_operand(1, location.span()), + }); + } + "unaligned_volatile_load" => { + assert_eq!( + args.len(), + 1, + "Unexpected number of arguments for `unaligned_volatile_load`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + self.push_target(MemoryInitOp::Check { + operand: args[0].clone(), + count: mk_const_operand(1, location.span()), + }); + } + "volatile_load" => { + assert_eq!( + args.len(), + 1, + "Unexpected number of arguments for `volatile_load`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Not)) + )); + self.push_target(MemoryInitOp::Check { + operand: args[0].clone(), + count: mk_const_operand(1, location.span()), + }); + } + "volatile_store" => { + assert_eq!( + args.len(), + 2, + "Unexpected number of arguments for `volatile_store`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + self.push_target(MemoryInitOp::Set { + operand: args[0].clone(), + count: mk_const_operand(1, location.span()), + value: true, + position: InsertPosition::After, + }); + } + "write_bytes" => { + assert_eq!( + args.len(), + 3, + "Unexpected number of arguments for `write_bytes`" + ); + assert!(matches!( + args[0].ty(self.locals).unwrap().kind(), + TyKind::RigidTy(RigidTy::RawPtr(_, Mutability::Mut)) + )); + self.push_target(MemoryInitOp::Set { + operand: args[0].clone(), + count: args[2].clone(), + value: true, + position: InsertPosition::After, + }) + } + intrinsic => { + self.push_target(MemoryInitOp::Unsupported { + reason: format!("Kani does not support reasoning about memory initialization of intrinsic `{intrinsic}`."), + }); + } + } + } + InstanceKind::Item => { + if instance.is_foreign_item() { + match instance.name().as_str() { + "alloc::alloc::__rust_alloc" + | "alloc::alloc::__rust_realloc" => { + /* Memory is uninitialized, nothing to do here. */ + } + "alloc::alloc::__rust_alloc_zeroed" => { + /* Memory is initialized here, need to update shadow memory. */ + self.push_target(MemoryInitOp::Set { + operand: Operand::Copy(destination.clone()), + count: args[0].clone(), + value: true, + position: InsertPosition::After, + }); + } + "alloc::alloc::__rust_dealloc" => { + /* Memory is uninitialized here, need to update shadow memory. */ + self.push_target(MemoryInitOp::Set { + operand: args[0].clone(), + count: args[1].clone(), + value: false, + position: InsertPosition::After, + }); + } + _ => {} + } + } + } + _ => {} + } + } + TerminatorKind::Drop { place, .. } => { + self.super_terminator(term, location); + let place_ty = place.ty(&self.locals).unwrap(); + // When drop is codegen'ed, a reference is taken to the place which is later implicitly coerced to a pointer. + // Hence, we need to bless this pointer as initialized. + self.push_target(MemoryInitOp::SetRef { + operand: Operand::Copy(place.clone()), + count: mk_const_operand(1, location.span()), + value: true, + position: InsertPosition::Before, + }); + if place_ty.kind().is_raw_ptr() { + self.push_target(MemoryInitOp::Set { + operand: Operand::Copy(place.clone()), + count: mk_const_operand(1, location.span()), + value: false, + position: InsertPosition::After, + }); + } + } + TerminatorKind::Goto { .. } + | TerminatorKind::SwitchInt { .. } + | TerminatorKind::Resume + | TerminatorKind::Abort + | TerminatorKind::Return + | TerminatorKind::Unreachable + | TerminatorKind::Assert { .. } + | TerminatorKind::InlineAsm { .. } => self.super_terminator(term, location), + } + } + } + + fn visit_place(&mut self, place: &Place, ptx: PlaceContext, location: Location) { + for (idx, elem) in place.projection.iter().enumerate() { + let intermediate_place = + Place { local: place.local, projection: place.projection[..idx].to_vec() }; + match elem { + ProjectionElem::Deref => { + let ptr_ty = intermediate_place.ty(self.locals).unwrap(); + if ptr_ty.kind().is_raw_ptr() { + self.push_target(MemoryInitOp::Check { + operand: Operand::Copy(intermediate_place.clone()), + count: mk_const_operand(1, location.span()), + }); + } + } + ProjectionElem::Field(idx, target_ty) => { + if target_ty.kind().is_union() + && (!ptx.is_mutating() || place.projection.len() > idx + 1) + { + self.push_target(MemoryInitOp::Unsupported { + reason: "Kani does not support reasoning about memory initialization of unions.".to_string(), + }); + } + } + ProjectionElem::Index(_) + | ProjectionElem::ConstantIndex { .. } + | ProjectionElem::Subslice { .. } => { + /* For a slice to be indexed, it should be valid first. */ + } + ProjectionElem::Downcast(_) => {} + ProjectionElem::OpaqueCast(_) => {} + ProjectionElem::Subtype(_) => {} + } + } + self.super_place(place, ptx, location) + } + + fn visit_operand(&mut self, operand: &Operand, location: Location) { + if let Operand::Constant(constant) = operand { + if let ConstantKind::Allocated(allocation) = constant.const_.kind() { + for (_, prov) in &allocation.provenance.ptrs { + if let GlobalAlloc::Static(_) = GlobalAlloc::from(prov.0) { + self.push_target(MemoryInitOp::Set { + operand: Operand::Constant(constant.clone()), + count: mk_const_operand(1, location.span()), + value: true, + position: InsertPosition::Before, + }); + }; + } + } + } + self.super_operand(operand, location); + } + + fn visit_rvalue(&mut self, rvalue: &Rvalue, location: Location) { + if let Rvalue::Cast(CastKind::PointerCoercion(PointerCoercion::Unsize), _, ty) = rvalue { + if let TyKind::RigidTy(RigidTy::RawPtr(pointee_ty, _)) = ty.kind() { + if pointee_ty.kind().is_trait() { + self.push_target(MemoryInitOp::Unsupported { + reason: "Kani does not support reasoning about memory initialization of unsized pointers.".to_string(), + }); + } + } + }; + self.super_rvalue(rvalue, location); + } +} + +/// Determines if the intrinsic has no memory initialization related function and hence can be +/// safely skipped. +fn can_skip_intrinsic(intrinsic_name: &str) -> bool { + match intrinsic_name { + "add_with_overflow" + | "arith_offset" + | "assert_inhabited" + | "assert_mem_uninitialized_valid" + | "assert_zero_valid" + | "assume" + | "bitreverse" + | "black_box" + | "breakpoint" + | "bswap" + | "caller_location" + | "ceilf32" + | "ceilf64" + | "copysignf32" + | "copysignf64" + | "cosf32" + | "cosf64" + | "ctlz" + | "ctlz_nonzero" + | "ctpop" + | "cttz" + | "cttz_nonzero" + | "discriminant_value" + | "exact_div" + | "exp2f32" + | "exp2f64" + | "expf32" + | "expf64" + | "fabsf32" + | "fabsf64" + | "fadd_fast" + | "fdiv_fast" + | "floorf32" + | "floorf64" + | "fmaf32" + | "fmaf64" + | "fmul_fast" + | "forget" + | "fsub_fast" + | "is_val_statically_known" + | "likely" + | "log10f32" + | "log10f64" + | "log2f32" + | "log2f64" + | "logf32" + | "logf64" + | "maxnumf32" + | "maxnumf64" + | "min_align_of" + | "min_align_of_val" + | "minnumf32" + | "minnumf64" + | "mul_with_overflow" + | "nearbyintf32" + | "nearbyintf64" + | "needs_drop" + | "powf32" + | "powf64" + | "powif32" + | "powif64" + | "pref_align_of" + | "raw_eq" + | "rintf32" + | "rintf64" + | "rotate_left" + | "rotate_right" + | "roundf32" + | "roundf64" + | "saturating_add" + | "saturating_sub" + | "sinf32" + | "sinf64" + | "sqrtf32" + | "sqrtf64" + | "sub_with_overflow" + | "truncf32" + | "truncf64" + | "type_id" + | "type_name" + | "unchecked_div" + | "unchecked_rem" + | "unlikely" + | "vtable_size" + | "vtable_align" + | "wrapping_add" + | "wrapping_mul" + | "wrapping_sub" => { + /* Intrinsics that do not interact with memory initialization. */ + true + } + "ptr_guaranteed_cmp" | "ptr_offset_from" | "ptr_offset_from_unsigned" | "size_of_val" => { + /* AFAICS from the documentation, none of those require the pointer arguments to be actually initialized. */ + true + } + name if name.starts_with("simd") => { + /* SIMD operations */ + true + } + "copy_nonoverlapping" => unreachable!( + "Expected `core::intrinsics::unreachable` to be handled by `StatementKind::CopyNonOverlapping`" + ), + "offset" => unreachable!( + "Expected `core::intrinsics::unreachable` to be handled by `BinOp::OffSet`" + ), + "unreachable" => unreachable!( + "Expected `std::intrinsics::unreachable` to be handled by `TerminatorKind::Unreachable`" + ), + "transmute" | "transmute_copy" | "unchecked_add" | "unchecked_mul" | "unchecked_shl" + | "size_of" | "unchecked_shr" | "unchecked_sub" => { + unreachable!("Expected intrinsic to be lowered before codegen") + } + "catch_unwind" => { + unimplemented!("") + } + "retag_box_to_raw" => { + unreachable!("This was removed in the latest Rust version.") + } + _ => { + /* Everything else */ + false + } + } +} + +/// Create a constant operand with a given value and span. +fn mk_const_operand(value: usize, span: Span) -> Operand { + Operand::Constant(ConstOperand { + span, + user_ty: None, + const_: MirConst::try_from_uint(value as u128, UintTy::Usize).unwrap(), + }) +} + +/// Try removing a topmost deref projection from a place if it exists, returning a place without it. +fn try_remove_topmost_deref(place: &Place) -> Option { + let mut new_place = place.clone(); + if let Some(ProjectionElem::Deref) = new_place.projection.pop() { + Some(new_place) + } else { + None + } +} + +/// Try retrieving instance for the given function operand. +fn try_resolve_instance(locals: &[LocalDecl], func: &Operand) -> Result { + let ty = func.ty(locals).unwrap(); + match ty.kind() { + TyKind::RigidTy(RigidTy::FnDef(def, args)) => Ok(Instance::resolve(def, &args).unwrap()), + _ => Err(format!( + "Kani does not support reasoning about memory initialization of arguments to `{ty:?}`." + )), + } +} diff --git a/kani-compiler/src/kani_middle/transform/check_values.rs b/kani-compiler/src/kani_middle/transform/check_values.rs index 4ecb16fab023..a7d0f14d270f 100644 --- a/kani-compiler/src/kani_middle/transform/check_values.rs +++ b/kani-compiler/src/kani_middle/transform/check_values.rs @@ -14,7 +14,9 @@ //! 1. We could merge the invalid values by the offset. //! 2. We could avoid checking places that have been checked before. use crate::args::ExtraChecks; -use crate::kani_middle::transform::body::{CheckType, MutableBody, SourceInstruction}; +use crate::kani_middle::transform::body::{ + CheckType, InsertPosition, MutableBody, SourceInstruction, +}; use crate::kani_middle::transform::check_values::SourceOp::UnsupportedCheck; use crate::kani_middle::transform::{TransformPass, TransformationType}; use crate::kani_queries::QueryDb; @@ -59,7 +61,7 @@ impl TransformPass for ValidValuePass { /// Transform the function body by inserting checks one-by-one. /// For every unsafe dereference or a transmute operation, we check all values are valid. - fn transform(&self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { trace!(function=?instance.name(), "transform"); let mut new_body = MutableBody::from(body); let orig_len = new_body.blocks().len(); @@ -83,13 +85,20 @@ impl ValidValuePass { for operation in instruction.operations { match operation { SourceOp::BytesValidity { ranges, target_ty, rvalue } => { - let value = body.new_assignment(rvalue, &mut source); + let value = body.new_assignment(rvalue, &mut source, InsertPosition::Before); let rvalue_ptr = Rvalue::AddressOf(Mutability::Not, Place::from(value)); for range in ranges { let result = build_limits(body, &range, rvalue_ptr.clone(), &mut source); let msg = format!("Undefined Behavior: Invalid value of type `{target_ty}`",); - body.add_check(tcx, &self.check_type, &mut source, result, &msg); + body.add_check( + tcx, + &self.check_type, + &mut source, + InsertPosition::Before, + result, + &msg, + ); } } SourceOp::DerefValidity { pointee_ty, rvalue, ranges } => { @@ -97,7 +106,14 @@ impl ValidValuePass { let result = build_limits(body, &range, rvalue.clone(), &mut source); let msg = format!("Undefined Behavior: Invalid value of type `{pointee_ty}`",); - body.add_check(tcx, &self.check_type, &mut source, result, &msg); + body.add_check( + tcx, + &self.check_type, + &mut source, + InsertPosition::Before, + result, + &msg, + ); } } SourceOp::UnsupportedCheck { check, ty } => { @@ -123,8 +139,8 @@ impl ValidValuePass { span, user_ty: None, })); - let result = body.new_assignment(rvalue, source); - body.add_check(tcx, &self.check_type, source, result, reason); + let result = body.new_assignment(rvalue, source, InsertPosition::Before); + body.add_check(tcx, &self.check_type, source, InsertPosition::Before, result, reason); } } @@ -758,30 +774,60 @@ pub fn build_limits( let start_const = body.new_const_operand(req.valid_range.start, primitive_ty, span); let end_const = body.new_const_operand(req.valid_range.end, primitive_ty, span); let orig_ptr = if req.offset != 0 { - let start_ptr = move_local(body.new_assignment(rvalue_ptr, source)); + let start_ptr = move_local(body.new_assignment(rvalue_ptr, source, InsertPosition::Before)); let byte_ptr = move_local(body.new_cast_ptr( start_ptr, Ty::unsigned_ty(UintTy::U8), Mutability::Not, source, + InsertPosition::Before, )); let offset_const = body.new_const_operand(req.offset as _, UintTy::Usize, span); - let offset = move_local(body.new_assignment(Rvalue::Use(offset_const), source)); - move_local(body.new_binary_op(BinOp::Offset, byte_ptr, offset, source)) + let offset = move_local(body.new_assignment( + Rvalue::Use(offset_const), + source, + InsertPosition::Before, + )); + move_local(body.new_binary_op( + BinOp::Offset, + byte_ptr, + offset, + source, + InsertPosition::Before, + )) } else { - move_local(body.new_assignment(rvalue_ptr, source)) + move_local(body.new_assignment(rvalue_ptr, source, InsertPosition::Before)) }; - let value_ptr = - body.new_cast_ptr(orig_ptr, Ty::unsigned_ty(primitive_ty), Mutability::Not, source); + let value_ptr = body.new_cast_ptr( + orig_ptr, + Ty::unsigned_ty(primitive_ty), + Mutability::Not, + source, + InsertPosition::Before, + ); let value = Operand::Copy(Place { local: value_ptr, projection: vec![ProjectionElem::Deref] }); - let start_result = body.new_binary_op(BinOp::Ge, value.clone(), start_const, source); - let end_result = body.new_binary_op(BinOp::Le, value, end_const, source); + let start_result = + body.new_binary_op(BinOp::Ge, value.clone(), start_const, source, InsertPosition::Before); + let end_result = + body.new_binary_op(BinOp::Le, value, end_const, source, InsertPosition::Before); if req.valid_range.wraps_around() { // valid >= start || valid <= end - body.new_binary_op(BinOp::BitOr, move_local(start_result), move_local(end_result), source) + body.new_binary_op( + BinOp::BitOr, + move_local(start_result), + move_local(end_result), + source, + InsertPosition::Before, + ) } else { // valid >= start && valid <= end - body.new_binary_op(BinOp::BitAnd, move_local(start_result), move_local(end_result), source) + body.new_binary_op( + BinOp::BitAnd, + move_local(start_result), + move_local(end_result), + source, + InsertPosition::Before, + ) } } diff --git a/kani-compiler/src/kani_middle/transform/contracts.rs b/kani-compiler/src/kani_middle/transform/contracts.rs index f5760bd4d829..eb5266e0a0eb 100644 --- a/kani-compiler/src/kani_middle/transform/contracts.rs +++ b/kani-compiler/src/kani_middle/transform/contracts.rs @@ -47,7 +47,7 @@ impl TransformPass for AnyModifiesPass { } /// Transform the function body by replacing it with the stub body. - fn transform(&self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { trace!(function=?instance.name(), "AnyModifiesPass::transform"); if instance.def.def_id() == self.kani_any.unwrap().def_id() { diff --git a/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs b/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs index 53bbd52086ac..ea7bf8625228 100644 --- a/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs +++ b/kani-compiler/src/kani_middle/transform/kani_intrinsics.rs @@ -7,8 +7,12 @@ //! information; thus, they are implemented as a transformation pass where their body get generated //! by the transformation. +use crate::args::{Arguments, ExtraChecks}; use crate::kani_middle::attributes::matches_diagnostic; -use crate::kani_middle::transform::body::{CheckType, MutableBody, SourceInstruction}; +use crate::kani_middle::transform::body::{ + CheckType, InsertPosition, MutableBody, SourceInstruction, +}; +use crate::kani_middle::transform::check_uninit::PointeeInfo; use crate::kani_middle::transform::check_values::{build_limits, ty_validity_per_offset}; use crate::kani_middle::transform::{TransformPass, TransformationType}; use crate::kani_queries::QueryDb; @@ -18,15 +22,24 @@ use stable_mir::mir::{ BinOp, Body, ConstOperand, Operand, Place, Rvalue, Statement, StatementKind, RETURN_LOCAL, }; use stable_mir::target::MachineInfo; -use stable_mir::ty::{MirConst, RigidTy, TyKind}; +use stable_mir::ty::{FnDef, MirConst, RigidTy, Ty, TyKind, UintTy}; +use std::collections::HashMap; use std::fmt::Debug; use strum_macros::AsRefStr; use tracing::trace; +use super::check_uninit::{ + get_mem_init_fn_def, mk_layout_operand, resolve_mem_init_fn, PointeeLayout, +}; + /// Generate the body for a few Kani intrinsics. #[derive(Debug)] pub struct IntrinsicGeneratorPass { pub check_type: CheckType, + /// Used to cache FnDef lookups of injected memory initialization functions. + pub mem_init_fn_cache: HashMap<&'static str, FnDef>, + /// Used to enable intrinsics depending on the flags passed. + pub arguments: Arguments, } impl TransformPass for IntrinsicGeneratorPass { @@ -46,10 +59,12 @@ impl TransformPass for IntrinsicGeneratorPass { /// Transform the function body by inserting checks one-by-one. /// For every unsafe dereference or a transmute operation, we check all values are valid. - fn transform(&self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { trace!(function=?instance.name(), "transform"); if matches_diagnostic(tcx, instance.def, Intrinsics::KaniValidValue.as_ref()) { (true, self.valid_value_body(tcx, body)) + } else if matches_diagnostic(tcx, instance.def, Intrinsics::KaniIsInitialized.as_ref()) { + (true, self.is_initialized_body(tcx, body)) } else { (false, body) } @@ -86,7 +101,7 @@ impl IntrinsicGeneratorPass { })), ); let stmt = Statement { kind: assign, span }; - new_body.insert_stmt(stmt, &mut terminator); + new_body.insert_stmt(stmt, &mut terminator, InsertPosition::Before); let machine_info = MachineInfo::target(); // The first and only argument type. @@ -110,7 +125,7 @@ impl IntrinsicGeneratorPass { ); let assign = StatementKind::Assign(Place::from(ret_var), rvalue); let stmt = Statement { kind: assign, span }; - new_body.insert_stmt(stmt, &mut terminator); + new_body.insert_stmt(stmt, &mut terminator, InsertPosition::Before); } } Err(msg) => { @@ -120,11 +135,169 @@ impl IntrinsicGeneratorPass { span, user_ty: None, })); - let result = new_body.new_assignment(rvalue, &mut terminator); + let result = + new_body.new_assignment(rvalue, &mut terminator, InsertPosition::Before); let reason = format!( "Kani currently doesn't support checking validity of `{target_ty}`. {msg}" ); - new_body.add_check(tcx, &self.check_type, &mut terminator, result, &reason); + new_body.add_check( + tcx, + &self.check_type, + &mut terminator, + InsertPosition::Before, + result, + &reason, + ); + } + } + new_body.into() + } + + /// Generate the body for `is_initialized`, which looks like the following + /// + /// ``` + /// pub fn is_initialized(ptr: *const T, len: usize) -> bool { + /// let layout = ... // Byte mask representing the layout of T. + /// __kani_mem_init_sm_get(ptr, layout, len) + /// } + /// ``` + fn is_initialized_body(&mut self, tcx: TyCtxt, body: Body) -> Body { + let mut new_body = MutableBody::from(body); + new_body.clear_body(); + + // Initialize return variable with True. + let ret_var = RETURN_LOCAL; + let mut terminator = SourceInstruction::Terminator { bb: 0 }; + let span = new_body.locals()[ret_var].span; + let assign = StatementKind::Assign( + Place::from(ret_var), + Rvalue::Use(Operand::Constant(ConstOperand { + span, + user_ty: None, + const_: MirConst::from_bool(true), + })), + ); + let stmt = Statement { kind: assign, span }; + new_body.insert_stmt(stmt, &mut terminator, InsertPosition::Before); + + if !self.arguments.ub_check.contains(&ExtraChecks::Uninit) { + // Short-circut if uninitialized memory checks are not enabled. + return new_body.into(); + } + + // The first argument type. + let arg_ty = new_body.locals()[1].ty; + let TyKind::RigidTy(RigidTy::RawPtr(target_ty, _)) = arg_ty.kind() else { unreachable!() }; + let pointee_info = PointeeInfo::from_ty(target_ty); + match pointee_info { + Ok(pointee_info) => { + match pointee_info.layout() { + PointeeLayout::Sized { layout } => { + if layout.is_empty() { + // Encountered a ZST, so we can short-circut here. + return new_body.into(); + } + let is_ptr_initialized_instance = resolve_mem_init_fn( + get_mem_init_fn_def( + tcx, + "KaniIsPtrInitialized", + &mut self.mem_init_fn_cache, + ), + layout.len(), + *pointee_info.ty(), + ); + let layout_operand = mk_layout_operand( + &mut new_body, + &mut terminator, + InsertPosition::Before, + &layout, + ); + new_body.add_call( + &is_ptr_initialized_instance, + &mut terminator, + InsertPosition::Before, + vec![ + Operand::Copy(Place::from(1)), + layout_operand, + Operand::Copy(Place::from(2)), + ], + Place::from(ret_var), + ); + } + PointeeLayout::Slice { element_layout } => { + // Since `str`` is a separate type, need to differentiate between [T] and str. + let (slicee_ty, diagnostic) = match pointee_info.ty().kind() { + TyKind::RigidTy(RigidTy::Slice(slicee_ty)) => { + (slicee_ty, "KaniIsSlicePtrInitialized") + } + TyKind::RigidTy(RigidTy::Str) => { + (Ty::unsigned_ty(UintTy::U8), "KaniIsStrPtrInitialized") + } + _ => unreachable!(), + }; + let is_ptr_initialized_instance = resolve_mem_init_fn( + get_mem_init_fn_def(tcx, diagnostic, &mut self.mem_init_fn_cache), + element_layout.len(), + slicee_ty, + ); + let layout_operand = mk_layout_operand( + &mut new_body, + &mut terminator, + InsertPosition::Before, + &element_layout, + ); + new_body.add_call( + &is_ptr_initialized_instance, + &mut terminator, + InsertPosition::Before, + vec![Operand::Copy(Place::from(1)), layout_operand], + Place::from(ret_var), + ); + } + PointeeLayout::TraitObject => { + let rvalue = Rvalue::Use(Operand::Constant(ConstOperand { + const_: MirConst::from_bool(false), + span, + user_ty: None, + })); + let result = new_body.new_assignment( + rvalue, + &mut terminator, + InsertPosition::Before, + ); + let reason: &str = "Kani does not support reasoning about memory initialization of pointers to trait objects."; + + new_body.add_check( + tcx, + &self.check_type, + &mut terminator, + InsertPosition::Before, + result, + &reason, + ); + } + }; + } + Err(msg) => { + // We failed to retrieve the type layout. + let rvalue = Rvalue::Use(Operand::Constant(ConstOperand { + const_: MirConst::from_bool(false), + span, + user_ty: None, + })); + let result = + new_body.new_assignment(rvalue, &mut terminator, InsertPosition::Before); + let reason = format!( + "Kani currently doesn't support checking memory initialization of `{target_ty}`. {msg}" + ); + new_body.add_check( + tcx, + &self.check_type, + &mut terminator, + InsertPosition::Before, + result, + &reason, + ); } } new_body.into() @@ -135,4 +308,5 @@ impl IntrinsicGeneratorPass { #[strum(serialize_all = "PascalCase")] enum Intrinsics { KaniValidValue, + KaniIsInitialized, } diff --git a/kani-compiler/src/kani_middle/transform/mod.rs b/kani-compiler/src/kani_middle/transform/mod.rs index 8d5c61f55c92..56ab0be493c4 100644 --- a/kani-compiler/src/kani_middle/transform/mod.rs +++ b/kani-compiler/src/kani_middle/transform/mod.rs @@ -18,6 +18,7 @@ //! case is added. use crate::kani_middle::codegen_units::CodegenUnit; use crate::kani_middle::transform::body::CheckType; +use crate::kani_middle::transform::check_uninit::UninitPass; use crate::kani_middle::transform::check_values::ValidValuePass; use crate::kani_middle::transform::contracts::AnyModifiesPass; use crate::kani_middle::transform::kani_intrinsics::IntrinsicGeneratorPass; @@ -30,6 +31,7 @@ use std::collections::HashMap; use std::fmt::Debug; pub(crate) mod body; +mod check_uninit; mod check_values; mod contracts; mod kani_intrinsics; @@ -64,7 +66,23 @@ impl BodyTransformation { // This has to come after stubs since we want this to replace the stubbed body. transformer.add_pass(queries, AnyModifiesPass::new(tcx, &unit)); transformer.add_pass(queries, ValidValuePass { check_type: check_type.clone() }); - transformer.add_pass(queries, IntrinsicGeneratorPass { check_type }); + // Putting `UninitPass` after `ValidValuePass` makes sure that the code generated by + // `UninitPass` does not get unnecessarily instrumented by valid value checks. However, it + // would also make sense to check that the values are initialized before checking their + // validity. In the future, it would be nice to have a mechanism to skip automatically + // generated code for future instrumentation passes. + transformer.add_pass( + queries, + UninitPass { check_type: check_type.clone(), mem_init_fn_cache: HashMap::new() }, + ); + transformer.add_pass( + queries, + IntrinsicGeneratorPass { + check_type, + mem_init_fn_cache: HashMap::new(), + arguments: queries.args().clone(), + }, + ); transformer } @@ -79,7 +97,7 @@ impl BodyTransformation { None => { let mut body = instance.body().unwrap(); let mut modified = false; - for pass in self.stub_passes.iter().chain(self.inst_passes.iter()) { + for pass in self.stub_passes.iter_mut().chain(self.inst_passes.iter_mut()) { let result = pass.transform(tcx, body, instance); modified |= result.0; body = result.1; @@ -127,7 +145,7 @@ pub(crate) trait TransformPass: Debug { Self: Sized; /// Run a transformation pass in the function body. - fn transform(&self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body); + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body); } /// The transformation result. diff --git a/kani-compiler/src/kani_middle/transform/stubs.rs b/kani-compiler/src/kani_middle/transform/stubs.rs index 4f249afdd45a..3dbd667c3943 100644 --- a/kani-compiler/src/kani_middle/transform/stubs.rs +++ b/kani-compiler/src/kani_middle/transform/stubs.rs @@ -43,7 +43,7 @@ impl TransformPass for FnStubPass { } /// Transform the function body by replacing it with the stub body. - fn transform(&self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + fn transform(&mut self, tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { trace!(function=?instance.name(), "transform"); let ty = instance.ty(); if let TyKind::RigidTy(RigidTy::FnDef(fn_def, mut args)) = ty.kind() { @@ -103,7 +103,7 @@ impl TransformPass for ExternFnStubPass { /// /// We need to find function calls and function pointers. /// We should replace this with a visitor once StableMIR includes a mutable one. - fn transform(&self, _tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { + fn transform(&mut self, _tcx: TyCtxt, body: Body, instance: Instance) -> (bool, Body) { trace!(function=?instance.name(), "transform"); let mut new_body = MutableBody::from(body); let changed = false; diff --git a/kani-driver/src/call_single_file.rs b/kani-driver/src/call_single_file.rs index 18ca11b0a30f..7cbe150427e9 100644 --- a/kani-driver/src/call_single_file.rs +++ b/kani-driver/src/call_single_file.rs @@ -139,6 +139,10 @@ impl KaniSession { flags.push("--ub-check=ptr_to_ref_cast".into()) } + if self.args.common_args.unstable_features.contains(UnstableFeature::UninitChecks) { + flags.push("--ub-check=uninit".into()) + } + if self.args.ignore_locals_lifetime { flags.push("--ignore-storage-markers".into()) } diff --git a/kani_metadata/src/unstable.rs b/kani_metadata/src/unstable.rs index 9f82e4b02a64..68e4fba28819 100644 --- a/kani_metadata/src/unstable.rs +++ b/kani_metadata/src/unstable.rs @@ -91,6 +91,8 @@ pub enum UnstableFeature { GhostState, /// Automatically check that pointers are valid when casting them to references. PtrToRefCastChecks, + /// Automatically check that uninitialized memory is not used. + UninitChecks, /// Enable an unstable option or subcommand. UnstableOptions, } diff --git a/library/kani/src/lib.rs b/library/kani/src/lib.rs index 0a52a9516398..6eab2a331811 100644 --- a/library/kani/src/lib.rs +++ b/library/kani/src/lib.rs @@ -35,6 +35,7 @@ pub mod vec; #[doc(hidden)] pub mod internal; +mod mem_init; mod models; pub use arbitrary::Arbitrary; diff --git a/library/kani/src/mem.rs b/library/kani/src/mem.rs index c40a1aa696e3..0b390e74288d 100644 --- a/library/kani/src/mem.rs +++ b/library/kani/src/mem.rs @@ -120,6 +120,7 @@ where let (thin_ptr, metadata) = ptr.to_raw_parts(); metadata.is_ptr_aligned(thin_ptr, Internal) && is_inbounds(&metadata, thin_ptr) + && is_initialized(ptr, 1) && unsafe { has_valid_value(ptr) } } @@ -147,7 +148,7 @@ where ::Metadata: PtrProperties, { let (thin_ptr, metadata) = ptr.to_raw_parts(); - is_inbounds(&metadata, thin_ptr) && unsafe { has_valid_value(ptr) } + is_inbounds(&metadata, thin_ptr) && is_initialized(ptr, 1) && unsafe { has_valid_value(ptr) } } /// Checks that `data_ptr` points to an allocation that can hold data of size calculated from `T`. @@ -290,6 +291,13 @@ unsafe fn has_valid_value(_ptr: *const T) -> bool { kani_intrinsic() } +/// Check whether `len * size_of::()` bytes are initialized starting from `ptr`. +#[rustc_diagnostic_item = "KaniIsInitialized"] +#[inline(never)] +pub fn is_initialized(_ptr: *const T, _len: usize) -> bool { + kani_intrinsic() +} + /// Get the object ID of the given pointer. #[rustc_diagnostic_item = "KaniPointerObject"] #[inline(never)] diff --git a/library/kani/src/mem_init.rs b/library/kani/src/mem_init.rs new file mode 100644 index 000000000000..37832ea32604 --- /dev/null +++ b/library/kani/src/mem_init.rs @@ -0,0 +1,122 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! This module uses shadow memory API to track memory initialization of raw pointers. +//! +//! Currently, memory initialization is tracked on per-byte basis, so each byte of memory pointed to +//! by raw pointers could be either initialized or uninitialized. Compiler automatically inserts +//! calls to `is_xxx_initialized` and `set_xxx_initialized` at appropriate locations to get or set +//! the initialization status of the memory pointed to. Padding bytes are always considered +//! uninitialized: type layout is determined at compile time and statically injected into the +//! program (see `Layout`). + +// Definitions in this module are not meant to be visible to the end user, only the compiler. +#![allow(dead_code)] + +use crate::shadow::ShadowMem; + +/// Bytewise mask, representing which bytes of a type are data and which are padding. +/// For example, for a type like this: +/// ``` +/// #[repr(C)] +/// struct Foo { +/// a: u16, +/// b: u8, +/// } +/// ``` +/// the layout would be [true, true, true, false]; +type Layout = [bool; N]; + +/// Global shadow memory object for tracking memory initialization. +#[rustc_diagnostic_item = "KaniMemInitShadowMem"] +static mut MEM_INIT_SHADOW_MEM: ShadowMem = ShadowMem::new(false); + +/// Get initialization state of `len` items laid out according to the `layout` starting at address `ptr`. +#[rustc_diagnostic_item = "KaniIsUnitPtrInitialized"] +fn is_unit_ptr_initialized(ptr: *const (), layout: Layout, len: usize) -> bool { + let mut count: usize = 0; + while count < len { + let mut offset: usize = 0; + while offset < N { + unsafe { + if layout[offset] + && !MEM_INIT_SHADOW_MEM.get((ptr as *const u8).add(count * N + offset)) + { + return false; + } + offset += 1; + } + } + count += 1; + } + true +} + +/// Set initialization state to `value` for `len` items laid out according to the `layout` starting at address `ptr`. +#[rustc_diagnostic_item = "KaniSetUnitPtrInitialized"] +fn set_unit_ptr_initialized( + ptr: *const (), + layout: Layout, + len: usize, + value: bool, +) { + let mut count: usize = 0; + while count < len { + let mut offset: usize = 0; + while offset < N { + unsafe { + MEM_INIT_SHADOW_MEM + .set((ptr as *const u8).add(count * N + offset), value && layout[offset]); + } + offset += 1; + } + count += 1; + } +} + +/// Get initialization state of `len` items laid out according to the `layout` starting at address `ptr`. +#[rustc_diagnostic_item = "KaniIsPtrInitialized"] +fn is_ptr_initialized(ptr: *const T, layout: Layout, len: usize) -> bool { + let (ptr, _) = ptr.to_raw_parts(); + is_unit_ptr_initialized(ptr, layout, len) +} + +/// Set initialization state to `value` for `len` items laid out according to the `layout` starting at address `ptr`. +#[rustc_diagnostic_item = "KaniSetPtrInitialized"] +fn set_ptr_initialized( + ptr: *const T, + layout: Layout, + len: usize, + value: bool, +) { + let (ptr, _) = ptr.to_raw_parts(); + set_unit_ptr_initialized(ptr, layout, len, value); +} + +/// Get initialization state of the slice, items of which are laid out according to the `layout` starting at address `ptr`. +#[rustc_diagnostic_item = "KaniIsSlicePtrInitialized"] +fn is_slice_ptr_initialized(ptr: *const [T], layout: Layout) -> bool { + let (ptr, len) = ptr.to_raw_parts(); + is_unit_ptr_initialized(ptr, layout, len) +} + +/// Set initialization state of the slice, items of which are laid out according to the `layout` starting at address `ptr` to `value`. +#[rustc_diagnostic_item = "KaniSetSlicePtrInitialized"] +fn set_slice_ptr_initialized(ptr: *const [T], layout: Layout, value: bool) { + let (ptr, len) = ptr.to_raw_parts(); + set_unit_ptr_initialized(ptr, layout, len, value); +} + +/// Get initialization state of the string slice, items of which are laid out according to the `layout` starting at address `ptr`. +#[rustc_diagnostic_item = "KaniIsStrPtrInitialized"] +fn is_str_ptr_initialized(ptr: *const str, layout: Layout) -> bool { + let (ptr, len) = ptr.to_raw_parts(); + is_unit_ptr_initialized(ptr, layout, len) +} + +/// Set initialization state of the string slice, items of which are laid out according to the `layout` starting at address `ptr` to `value`. +#[rustc_diagnostic_item = "KaniSetStrPtrInitialized"] +fn set_str_ptr_initialized(ptr: *const str, layout: Layout, value: bool) { + let (ptr, len) = ptr.to_raw_parts(); + set_unit_ptr_initialized(ptr, layout, len, value); +} diff --git a/library/kani_core/src/mem.rs b/library/kani_core/src/mem.rs index 8dd4a27cb03a..3b10856765a5 100644 --- a/library/kani_core/src/mem.rs +++ b/library/kani_core/src/mem.rs @@ -125,6 +125,7 @@ macro_rules! kani_mem { let (thin_ptr, metadata) = ptr.to_raw_parts(); metadata.is_ptr_aligned(thin_ptr, Internal) && is_inbounds(&metadata, thin_ptr) + && is_initialized(ptr, 1) && unsafe { has_valid_value(ptr) } } @@ -153,7 +154,9 @@ macro_rules! kani_mem { ::Metadata: PtrProperties, { let (thin_ptr, metadata) = ptr.to_raw_parts(); - is_inbounds(&metadata, thin_ptr) && unsafe { has_valid_value(ptr) } + is_inbounds(&metadata, thin_ptr) + && is_initialized(ptr, 1) + && unsafe { has_valid_value(ptr) } } /// Checks that `data_ptr` points to an allocation that can hold data of size calculated from `T`. @@ -296,6 +299,13 @@ macro_rules! kani_mem { kani_intrinsic() } + /// Check whether `len * size_of::()` bytes are initialized starting from `ptr`. + #[rustc_diagnostic_item = "KaniIsInitialized"] + #[inline(never)] + pub fn is_initialized(_ptr: *const T, _len: usize) -> bool { + kani_intrinsic() + } + /// Get the object ID of the given pointer. #[rustc_diagnostic_item = "KaniPointerObject"] #[inline(never)] diff --git a/tests/expected/uninit/access-padding-uninit/access-padding-uninit.rs b/tests/expected/uninit/access-padding-uninit/access-padding-uninit.rs new file mode 100644 index 000000000000..d6e735e219ad --- /dev/null +++ b/tests/expected/uninit/access-padding-uninit/access-padding-uninit.rs @@ -0,0 +1,16 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ptr::addr_of; + +#[repr(C)] +struct S(u32, u8); + +/// Checks that Kani catches an attempt to access padding of a struct using raw pointers. +#[kani::proof] +fn check_uninit_padding() { + let s = S(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let padding = unsafe { *(ptr.add(5)) }; // ~ERROR: padding bytes are uninitialized, so reading them is UB. +} diff --git a/tests/expected/uninit/access-padding-uninit/expected b/tests/expected/uninit/access-padding-uninit/expected new file mode 100644 index 000000000000..da8d15b2dbb9 --- /dev/null +++ b/tests/expected/uninit/access-padding-uninit/expected @@ -0,0 +1,5 @@ +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` + +VERIFICATION:- FAILED + +Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/access-padding-via-cast/access-padding-via-cast.rs b/tests/expected/uninit/access-padding-via-cast/access-padding-via-cast.rs new file mode 100644 index 000000000000..b73bebc827bc --- /dev/null +++ b/tests/expected/uninit/access-padding-via-cast/access-padding-via-cast.rs @@ -0,0 +1,20 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ptr; +#[repr(C)] +struct S(u8, u16); + +/// Checks that Kani catches an attempt to access padding of a struct using casting to different types. +#[kani::proof] +fn check_uninit_padding_after_cast() { + unsafe { + let mut s = S(0, 0); + let sptr = ptr::addr_of_mut!(s); + let sptr2 = sptr as *mut [u8; 4]; + *sptr2 = [0; 4]; + *sptr = S(0, 0); // should reset the padding + let val = *sptr2; // ~ERROR: encountered uninitialized memory + } +} diff --git a/tests/expected/uninit/access-padding-via-cast/expected b/tests/expected/uninit/access-padding-via-cast/expected new file mode 100644 index 000000000000..12c5c0a4a439 --- /dev/null +++ b/tests/expected/uninit/access-padding-via-cast/expected @@ -0,0 +1,5 @@ +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*mut [u8; 4]` + +VERIFICATION:- FAILED + +Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/alloc-to-slice/alloc-to-slice.rs b/tests/expected/uninit/alloc-to-slice/alloc-to-slice.rs new file mode 100644 index 000000000000..3c4420f5791f --- /dev/null +++ b/tests/expected/uninit/alloc-to-slice/alloc-to-slice.rs @@ -0,0 +1,20 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::alloc::{alloc, dealloc, Layout}; +use std::slice::from_raw_parts; + +/// Checks that Kani catches an attempt to form a slice from uninitialized memory. +#[kani::proof] +fn check_uninit_slice() { + let layout = Layout::from_size_align(16, 8).unwrap(); + unsafe { + let ptr = alloc(layout); + *ptr = 0x41; + *ptr.add(1) = 0x42; + *ptr.add(2) = 0x43; + *ptr.add(3) = 0x44; + let uninit_slice = from_raw_parts(ptr, 16); // ~ERROR: forming a slice from unitialized memory is UB. + } +} diff --git a/tests/expected/uninit/alloc-to-slice/expected b/tests/expected/uninit/alloc-to-slice/expected new file mode 100644 index 000000000000..f8347a591edf --- /dev/null +++ b/tests/expected/uninit/alloc-to-slice/expected @@ -0,0 +1,5 @@ +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const [u8]` + +VERIFICATION:- FAILED + +Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/vec-read-bad-len/expected b/tests/expected/uninit/vec-read-bad-len/expected new file mode 100644 index 000000000000..f8347a591edf --- /dev/null +++ b/tests/expected/uninit/vec-read-bad-len/expected @@ -0,0 +1,5 @@ +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const [u8]` + +VERIFICATION:- FAILED + +Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/vec-read-bad-len/vec-read-bad-len.rs b/tests/expected/uninit/vec-read-bad-len/vec-read-bad-len.rs new file mode 100644 index 000000000000..9778bb11a277 --- /dev/null +++ b/tests/expected/uninit/vec-read-bad-len/vec-read-bad-len.rs @@ -0,0 +1,15 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ops::Index; + +/// Checks that Kani catches an attempt to read uninitialized memory from a vector with bad length. +#[kani::proof] +fn check_vec_read_bad_len() { + let mut v: Vec = Vec::with_capacity(10); + unsafe { + v.set_len(5); // even though length is now 5, vector is still uninitialized + } + let uninit = v.index(0); // ~ERROR: reading from unitialized memory is UB. +} diff --git a/tests/expected/uninit/vec-read-semi-init/expected b/tests/expected/uninit/vec-read-semi-init/expected new file mode 100644 index 000000000000..da8d15b2dbb9 --- /dev/null +++ b/tests/expected/uninit/vec-read-semi-init/expected @@ -0,0 +1,5 @@ +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` + +VERIFICATION:- FAILED + +Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/vec-read-semi-init/vec-read-semi-init.rs b/tests/expected/uninit/vec-read-semi-init/vec-read-semi-init.rs new file mode 100644 index 000000000000..4330f5f53023 --- /dev/null +++ b/tests/expected/uninit/vec-read-semi-init/vec-read-semi-init.rs @@ -0,0 +1,11 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +/// Checks that Kani catches an attempt to read uninitialized memory from a semi-initialized vector. +#[kani::proof] +fn check_vec_read_semi_init() { + let mut v: Vec = Vec::with_capacity(10); + unsafe { *v.as_mut_ptr().add(4) = 0x42 }; + let uninit = unsafe { *v.as_ptr().add(5) }; // ~ERROR: reading from unitialized memory is UB. +} diff --git a/tests/expected/uninit/vec-read-uninit/expected b/tests/expected/uninit/vec-read-uninit/expected new file mode 100644 index 000000000000..da8d15b2dbb9 --- /dev/null +++ b/tests/expected/uninit/vec-read-uninit/expected @@ -0,0 +1,5 @@ +Failed Checks: Undefined Behavior: Reading from an uninitialized pointer of type `*const u8` + +VERIFICATION:- FAILED + +Complete - 0 successfully verified harnesses, 1 failures, 1 total. \ No newline at end of file diff --git a/tests/expected/uninit/vec-read-uninit/vec-read-uninit.rs b/tests/expected/uninit/vec-read-uninit/vec-read-uninit.rs new file mode 100644 index 000000000000..c322b4d33bb2 --- /dev/null +++ b/tests/expected/uninit/vec-read-uninit/vec-read-uninit.rs @@ -0,0 +1,10 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +/// Checks that Kani catches an attempt to read uninitialized memory from an uninitialized vector. +#[kani::proof] +fn check_vec_read_uninit() { + let v: Vec = Vec::with_capacity(10); + let uninit = unsafe { *v.as_ptr().add(5) }; // ~ERROR: reading from unitialized memory is UB. +} diff --git a/tests/kani/Uninit/access-padding-enum-diverging-variants.rs b/tests/kani/Uninit/access-padding-enum-diverging-variants.rs new file mode 100644 index 000000000000..7feb493a5b3f --- /dev/null +++ b/tests/kani/Uninit/access-padding-enum-diverging-variants.rs @@ -0,0 +1,34 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ptr; +use std::ptr::addr_of; + +/// The layout of this enum is variable, so Kani cannot check memory initialization statically. +#[repr(C)] +enum E1 { + A(u16, u8), + B(u16), +} + +/// The layout of this enum is variable, but both of the arms have the same padding, so Kani should +/// support that. +#[repr(C)] +enum E2 { + A(u16), + B(u8, u8), +} + +#[kani::proof] +#[kani::should_panic] +fn access_padding_unsupported() { + let s = E1::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; +} + +#[kani::proof] +fn access_padding_supported() { + let s = E2::A(0); + let ptr: *const u8 = addr_of!(s) as *const u8; +} diff --git a/tests/kani/Uninit/access-padding-enum-multiple-variants.rs b/tests/kani/Uninit/access-padding-enum-multiple-variants.rs new file mode 100644 index 000000000000..fb8fae06ca59 --- /dev/null +++ b/tests/kani/Uninit/access-padding-enum-multiple-variants.rs @@ -0,0 +1,49 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ptr; +use std::ptr::addr_of; + +/// The layout of this enum is the following (D = data, P = padding): +/// 0 1 2 3 4 5 6 7 +/// [D, D, D, D, D, D, D, P] +/// ---------- ------- +/// \_ tag (i32) \_ A|B(u16, u8) +#[repr(C)] +enum E { + A(u16, u8), + B(u16, u8), +} + +#[kani::proof] +fn access_padding_init_a() { + let s = E::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_0 = unsafe { *(ptr.add(0)) }; + let at_4 = unsafe { *(ptr.add(4)) }; +} + +#[kani::proof] +fn access_padding_init_b() { + let s = E::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_0 = unsafe { *(ptr.add(0)) }; + let at_4 = unsafe { *(ptr.add(4)) }; +} + +#[kani::proof] +#[kani::should_panic] +fn access_padding_uninit_a() { + let s = E::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_7 = unsafe { *(ptr.add(7)) }; +} + +#[kani::proof] +#[kani::should_panic] +fn access_padding_uninit_b() { + let s = E::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_7 = unsafe { *(ptr.add(7)) }; +} diff --git a/tests/kani/Uninit/access-padding-enum-single-field.rs b/tests/kani/Uninit/access-padding-enum-single-field.rs new file mode 100644 index 000000000000..c283b603f705 --- /dev/null +++ b/tests/kani/Uninit/access-padding-enum-single-field.rs @@ -0,0 +1,34 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ptr; +use std::ptr::addr_of; + +/// The layout of this enum is the following (D = data, P = padding): +/// 0 1 2 3 4 5 6 7 8 9 A B C D E F +/// [D, D, D, D, P, P, P, P, D, D, D, D, D, D, D, D] +/// ---------- ---------------------- +/// \_ tag (i32) \_ A(u64) +#[repr(C)] +enum E { + A(u64), +} + +#[kani::proof] +fn access_padding_init() { + let s = E::A(0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_0 = unsafe { *(ptr.add(0)) }; + let at_3 = unsafe { *(ptr.add(3)) }; + let at_9 = unsafe { *(ptr.add(9)) }; +} + +#[kani::proof] +#[kani::should_panic] +fn access_padding_uninit() { + let s = E::A(0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_4 = unsafe { *(ptr.add(4)) }; + let at_7 = unsafe { *(ptr.add(7)) }; +} diff --git a/tests/kani/Uninit/access-padding-enum-single-variant.rs b/tests/kani/Uninit/access-padding-enum-single-variant.rs new file mode 100644 index 000000000000..f33cfe7ce6fb --- /dev/null +++ b/tests/kani/Uninit/access-padding-enum-single-variant.rs @@ -0,0 +1,32 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT +// kani-flags: -Z ghost-state -Z uninit-checks + +use std::ptr; +use std::ptr::addr_of; + +/// The layout of this enum is the following (D = data, P = padding): +/// 0 1 2 3 4 5 6 7 +/// [D, D, D, D, D, D, D, P] +/// ---------- ------- +/// \_ tag (i32) \_ A(u16, u8) +#[repr(C)] +enum E { + A(u16, u8), +} + +#[kani::proof] +fn access_padding_init() { + let s = E::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_0 = unsafe { *(ptr.add(0)) }; + let at_4 = unsafe { *(ptr.add(4)) }; +} + +#[kani::proof] +#[kani::should_panic] +fn access_padding_uninit() { + let s = E::A(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let at_7 = unsafe { *(ptr.add(7)) }; +} diff --git a/tests/perf/uninit/Cargo.toml b/tests/perf/uninit/Cargo.toml new file mode 100644 index 000000000000..9f44cb3fe103 --- /dev/null +++ b/tests/perf/uninit/Cargo.toml @@ -0,0 +1,14 @@ +# Copyright Kani Contributors +# SPDX-License-Identifier: Apache-2.0 OR MIT + +[package] +name = "uninit" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +[package.metadata.kani] +unstable = { ghost-state = true, uninit-checks = true } diff --git a/tests/perf/uninit/expected b/tests/perf/uninit/expected new file mode 100644 index 000000000000..f7b4fd303a77 --- /dev/null +++ b/tests/perf/uninit/expected @@ -0,0 +1 @@ +Complete - 5 successfully verified harnesses, 0 failures, 5 total. \ No newline at end of file diff --git a/tests/perf/uninit/src/lib.rs b/tests/perf/uninit/src/lib.rs new file mode 100644 index 000000000000..86b101c0e5d8 --- /dev/null +++ b/tests/perf/uninit/src/lib.rs @@ -0,0 +1,68 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use std::alloc::{alloc, alloc_zeroed, Layout}; +use std::ptr; +use std::ptr::addr_of; +use std::slice::from_raw_parts; + +#[repr(C)] +struct S(u32, u8); + +#[kani::proof] +fn access_padding_init() { + let s = S(0, 0); + let ptr: *const u8 = addr_of!(s) as *const u8; + let data = unsafe { *(ptr.add(3)) }; // Accessing data bytes is valid. +} + +#[kani::proof] +fn alloc_to_slice() { + let layout = Layout::from_size_align(32, 8).unwrap(); + unsafe { + let ptr = alloc(layout); + *ptr = 0x41; + *ptr.add(1) = 0x42; + *ptr.add(2) = 0x43; + *ptr.add(3) = 0x44; + *ptr.add(16) = 0x00; + let val = *(ptr.add(2)); // Accessing previously initialized byte is valid. + } +} + +#[kani::proof] +fn alloc_zeroed_to_slice() { + let layout = Layout::from_size_align(32, 8).unwrap(); + unsafe { + // This returns initialized memory, so any further accesses are valid. + let ptr = alloc_zeroed(layout); + *ptr = 0x41; + *ptr.add(1) = 0x42; + *ptr.add(2) = 0x43; + *ptr.add(3) = 0x44; + *ptr.add(16) = 0x00; + let slice1 = from_raw_parts(ptr, 16); + let slice2 = from_raw_parts(ptr.add(16), 16); + } +} + +#[kani::proof] +fn struct_padding_and_arr_init() { + unsafe { + let mut s = S(0, 0); + let sptr = ptr::addr_of_mut!(s); + let sptr2 = sptr as *mut [u8; 4]; + *sptr2 = [0; 4]; + *sptr = S(0, 0); + // Both S(u32, u8) and [u8; 4] have the same layout, so the memory is initialized. + let val = *sptr2; + } +} + +#[kani::proof] +fn vec_read_init() { + let mut v: Vec = Vec::with_capacity(10); + unsafe { *v.as_mut_ptr().add(5) = 0x42 }; + let def = unsafe { *v.as_ptr().add(5) }; // Accessing previously initialized byte is valid. + let x = def + 1; +} diff --git a/tests/std-checks/core/mem.expected b/tests/std-checks/core/mem.expected index 8a3b89a8f66a..1484c83901fc 100644 --- a/tests/std-checks/core/mem.expected +++ b/tests/std-checks/core/mem.expected @@ -1,3 +1,7 @@ +Checking harness mem::verify::check_swap_unit... + +Failed Checks: ptr NULL or writable up to size + Summary: Verification failed for - mem::verify::check_swap_unit -Complete - 3 successfully verified harnesses, 1 failures, 4 total. +Complete - 6 successfully verified harnesses, 1 failures, 7 total. diff --git a/tests/std-checks/core/slice.expected b/tests/std-checks/core/slice.expected new file mode 100644 index 000000000000..01a90d50b557 --- /dev/null +++ b/tests/std-checks/core/slice.expected @@ -0,0 +1 @@ +Complete - 1 successfully verified harnesses, 0 failures, 1 total. diff --git a/tests/std-checks/core/src/lib.rs b/tests/std-checks/core/src/lib.rs index f0e5b480a2d2..b0a4fbc6154f 100644 --- a/tests/std-checks/core/src/lib.rs +++ b/tests/std-checks/core/src/lib.rs @@ -7,3 +7,4 @@ extern crate kani; pub mod mem; pub mod ptr; +pub mod slice; diff --git a/tests/std-checks/core/src/mem.rs b/tests/std-checks/core/src/mem.rs index 4f41d176a73a..b0400d0a75f5 100644 --- a/tests/std-checks/core/src/mem.rs +++ b/tests/std-checks/core/src/mem.rs @@ -18,6 +18,11 @@ pub mod contracts { pub fn swap(x: &mut T, y: &mut T) { std::mem::swap(x, y) } + + #[kani::modifies(dest)] + pub fn replace(dest: &mut T, src: T) -> T { + std::mem::replace(dest, src) + } } #[cfg(kani)] @@ -70,4 +75,34 @@ mod verify { std::mem::forget(x); std::mem::forget(y); } + + #[kani::proof_for_contract(contracts::replace)] + pub fn check_replace_primitive() { + let mut x: u8 = kani::any(); + let x_before = x; + + let y: u8 = kani::any(); + let x_returned = contracts::replace(&mut x, y); + + kani::assert(x_before == x_returned, "x_before == x_returned"); + } + + #[kani::proof_for_contract(contracts::replace)] + pub fn check_replace_adt_no_drop() { + let mut x: CannotDrop = kani::any(); + let y: CannotDrop = kani::any(); + let new_x = contracts::replace(&mut x, y); + std::mem::forget(x); + std::mem::forget(new_x); + } + + /// Memory replace logic is optimized according to the size and alignment of a type. + #[kani::proof_for_contract(contracts::replace)] + pub fn check_replace_large_adt_no_drop() { + let mut x: CannotDrop<[u128; 4]> = kani::any(); + let y: CannotDrop<[u128; 4]> = kani::any(); + let new_x = contracts::replace(&mut x, y); + std::mem::forget(x); + std::mem::forget(new_x); + } } diff --git a/tests/std-checks/core/src/slice.rs b/tests/std-checks/core/src/slice.rs new file mode 100644 index 000000000000..044f4bd38586 --- /dev/null +++ b/tests/std-checks/core/src/slice.rs @@ -0,0 +1,30 @@ +// Copyright Kani Contributors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +/// Create wrapper functions to standard library functions that contains their contract. +pub mod contracts { + use kani::{mem::*, requires}; + + #[requires(can_dereference(std::ptr::slice_from_raw_parts(data, len)))] + #[requires(is_initialized(data, len))] + pub unsafe fn from_raw_parts<'a, T>(data: *const T, len: usize) -> &'a [T] { + std::slice::from_raw_parts(data, len) + } +} + +#[cfg(kani)] +mod verify { + use super::*; + + const MAX_LEN: usize = 2; + + #[kani::proof_for_contract(contracts::from_raw_parts)] + #[kani::unwind(25)] + pub fn check_from_raw_parts_primitive() { + let len: usize = kani::any(); + kani::assume(len < MAX_LEN); + + let arr = vec![0u8; len]; + let _slice = unsafe { contracts::from_raw_parts(arr.as_ptr(), len) }; + } +}