Skip to content

Commit

Permalink
Merge pull request #65 from lifting-bits/docs_and_small_fixes
Browse files Browse the repository at this point in the history
Add comments and one small bugfix
  • Loading branch information
artemdinaburg authored Nov 5, 2020
2 parents 3dbb4c9 + 9ac52c3 commit 0b2004e
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 5 deletions.
11 changes: 11 additions & 0 deletions bin/JSON.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,9 @@ static bool ParseSpec(const remill::Arch *arch, llvm::LLVMContext &context,
} // namespace

int main(int argc, char *argv[]) {

// get version string from git, and put as output to --version
// from gflags
SetVersion();
google::ParseCommandLineFlags(&argc, &argv, true);
google::InitGoogleLogging(argv[0]);
Expand Down Expand Up @@ -592,6 +595,10 @@ int main(int argc, char *argv[]) {
}

llvm::LLVMContext context;

// Get a unique pointer to a remill architecture object. The architecture
// object knows how to deal with everything for this specific architecture,
// such as semantics, register, etc.
auto arch = remill::Arch::Build(&context, remill::GetOSName(os_str),
remill::GetArchName(arch_str));
if (!arch) {
Expand All @@ -601,6 +608,10 @@ int main(int argc, char *argv[]) {
auto semantics = remill::LoadArchSemantics(arch);

anvill::Program program;

// Parse the spec, which contains as much or as little details about what is
// being lifted as the spec generator desired and put it into an
// anvill::Program object, which is effectively a representation of the spec
if (!ParseSpec(arch.get(), context, program, spec)) {
return EXIT_FAILURE;
}
Expand Down
5 changes: 4 additions & 1 deletion lib/Lift.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,10 @@ bool LiftCodeIntoModule(const remill::Arch *arch, const Program &program,
llvm::Module &module) {
DLOG(INFO) << "LiftCodeIntoModule";

// Create our lifter
// Create our lifter.
// At this point, `module` is just the loaded semantics for
// the arcchitecture. The module will be filled in with lifted program code
// and data as the lifting process progresses.
MCToIRLifter lifter(arch, program, module);

// Declare global variables.
Expand Down
33 changes: 29 additions & 4 deletions lib/MCToIRLifter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,16 @@ MCToIRLifter::LoadFunctionReturnAddress(const remill::Instruction &inst,

static const bool is_sparc = arch->IsSPARC32() || arch->IsSPARC64();
const auto pc = inst.branch_not_taken_pc;

// The semantics for handling a call save the expected return program counter
// into a local variable.
auto ret_pc =
inst_lifter.LoadRegValue(block, state_ptr, remill::kReturnPCVariableName);
if (!is_sparc) {
return {pc, ret_pc};
}

// Only bad SPARC ABI choices below this point.
auto byte = program.FindByte(pc);

uint8_t bytes[4] = {};
Expand Down Expand Up @@ -204,6 +208,8 @@ void MCToIRLifter::VisitDirectFunctionCall(const remill::Instruction &inst,
} else {
LOG(ERROR) << "Missing declaration for function at " << std::hex
<< inst.branch_taken_pc << " called at " << inst.pc << std::dec;

// If we do not have a function declaration, treat this as a call to an unknown address.
remill::AddCall(block, intrinsics.function_call);
}
VisitAfterFunctionCall(inst, block);
Expand Down Expand Up @@ -283,30 +289,40 @@ void MCToIRLifter::VisitInstruction(remill::Instruction &inst,
llvm::BasicBlock *block) {
curr_inst = &inst;

// Reserve space for an instrucion that will go into a delay slot, in case it
// is needed. This is an uncommon case, so avoid instantiating a new
// Instruction unless it is actually needed. The instruction instantition into
// this buffer happens via a placement new call later on.
std::aligned_storage<sizeof(remill::Instruction),
alignof(remill::Instruction)>
delayed_inst_storage;
alignof(remill::Instruction)>::type delayed_inst_storage;

remill::Instruction *delayed_inst = nullptr;

// Even when something isn't supported or is invalid, we still lift
// a call to a semantic, e.g.`INVALID_INSTRUCTION`, so we really want
// to treat instruction lifting as an operation that can't fail.
(void) inst_lifter.LiftIntoBlock(inst, block, state_ptr, false);
(void) inst_lifter.LiftIntoBlock(inst, block, state_ptr,
false /* is_delayed */);

if (arch->MayHaveDelaySlot(inst)) {
delayed_inst = new (&delayed_inst_storage) remill::Instruction;
if (!DecodeInstructionInto(inst.delayed_pc, true, delayed_inst)) {
if (!DecodeInstructionInto(inst.delayed_pc, true /* is_delayed */,
delayed_inst)) {
LOG(ERROR) << "Unable to decode or use delayed instruction at "
<< std::hex << inst.delayed_pc << std::dec << " of "
<< inst.Serialize();
}
}

switch (inst.category) {

// Invalid means failed to decode.
case remill::Instruction::kCategoryInvalid:
VisitInvalid(inst, block);
break;

// Error is a valid instruction, but specifies error semantics for the
// processor. The canonical example is x86's `UD2` instruction.
case remill::Instruction::kCategoryError:
VisitError(inst, delayed_inst, block);
break;
Expand Down Expand Up @@ -350,6 +366,9 @@ FunctionEntry MCToIRLifter::GetOrDeclareFunction(const FunctionDecl &decl) {
return entry;
}

// By default we do not want to deal with function names until the very end of
// lifting. Instead, lets assign a temporary name based on the function's
// starting address.
const auto base_name = CreateFunctionName(decl.address);

entry.lifted_to_native =
Expand All @@ -368,6 +387,8 @@ FunctionEntry MCToIRLifter::GetOrDeclareFunction(const FunctionDecl &decl) {

FunctionEntry MCToIRLifter::LiftFunction(const FunctionDecl &decl) {
const auto entry = GetOrDeclareFunction(decl);

// Check if we already lifted this function. If so, do not re-lift it.
if (!entry.native_to_lifted->isDeclaration()) {
return entry;
}
Expand All @@ -376,6 +397,10 @@ FunctionEntry MCToIRLifter::LiftFunction(const FunctionDecl &decl) {
addr_to_block.clear();

lifted_func = entry.lifted;

// Every lifted function starts as a clone of __remill_basic_block. That
// prototype has multiple arguments (memory pointer, state pointer, program
// counter). This exctracts the state pointer.
state_ptr = remill::NthArgument(lifted_func, remill::kStatePointerArgNum);
CHECK(lifted_func->isDeclaration());

Expand Down

0 comments on commit 0b2004e

Please sign in to comment.