Skip to content

Commit

Permalink
Full SME(1) instruction support and STREAMING Groups (#415)
Browse files Browse the repository at this point in the history
  • Loading branch information
FinnWilkinson authored Feb 6, 2025
1 parent cccbbc9 commit 576d4d4
Show file tree
Hide file tree
Showing 16 changed files with 5,586 additions and 563 deletions.
5 changes: 3 additions & 2 deletions configs/a64fx_SME.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,15 @@ Ports:
- INT_DIV_OR_SQRT
5:
Portname: EAGA
Instruction-Support:
Instruction-Group-Support:
- LOAD
- STORE_ADDRESS
- INT_SIMPLE_ARTH_NOSHIFT
- INT_SIMPLE_LOGICAL_NOSHIFT
- INT_SIMPLE_CMP
6:
Portname: EAGB
Instruction-Support:
Instruction-Group-Support:
- LOAD
- STORE_ADDRESS
- INT_SIMPLE_ARTH_NOSHIFT
Expand All @@ -98,6 +98,7 @@ Ports:
Portname: BR
Instruction-Group-Support:
- BRANCH
# Define example SME unit
8:
Portname: SME
Instruction-Group-Support:
Expand Down
Binary file removed docs/sphinx/assets/instruction_groups.png
Binary file not shown.
1 change: 0 additions & 1 deletion src/include/simeng/Register.hh
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#pragma once
#include <cstdint>
#include <iostream>

namespace simeng {

Expand Down
6 changes: 6 additions & 0 deletions src/include/simeng/arch/aarch64/Architecture.hh
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ class Architecture : public arch::Architecture {
/** Returns the current value of SVCRval_. */
uint64_t getSVCRval() const;

/** Returns if SVE Streaming Mode is enabled. */
bool isStreamingModeEnabled() const;

/** Returns if the SME ZA Register is enabled. */
bool isZARegisterEnabled() const;

/** Update the value of SVCRval_. */
void setSVCRval(const uint64_t newVal) const;

Expand Down
30 changes: 28 additions & 2 deletions src/include/simeng/arch/aarch64/InstructionGroups.hh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,33 @@ namespace simeng {
namespace arch {
namespace aarch64 {

/** The IDs of the instruction groups for AArch64 instructions. */
/** The IDs of the instruction groups for AArch64 instructions.
* Each new group must contain 14 entries to ensure correct group assignment and
* general functionality.
* Their order must be as follows:
* - BASE
* - BASE_SIMPLE
* - BASE_SIMPLE_ARTH
* - BASE_SIMPLE_ARTH_NOSHIFT
* - BASE_SIMPLE_LOGICAL
* - BASE_SIMPLE_LOGICAL_NOSHIFT
* - BASE_SIMPLE_CMP
* - BASE_SIMPLE_CVT
* - BASE_MUL
* - BASE_DIV_OR_SQRT
* - LOAD_BASE
* - STORE_ADDRESS_BASE
* - STORE_DATA_BASE
* - STORE_BASE
*
* An exception to the above is "Parent" groups which do not require the LOAD_*
* or STORE_* groups.
* "Parent" groups allow for easier grouping of similar groups that may have
* identical execution latencies, ports, etc. For example, FP is the parent
* group of SCALAR and VECTOR.
* In simulation, an instruction's allocated group will never be a "Parent"
* group; they are only used to simplify config file creation and management.
*/
namespace InstructionGroups {
const uint16_t INT = 0;
const uint16_t INT_SIMPLE = 1;
Expand Down Expand Up @@ -102,7 +128,7 @@ static constexpr uint8_t NUM_GROUPS = 88;
const std::unordered_map<uint16_t, std::vector<uint16_t>> groupInheritance_ = {
{InstructionGroups::ALL,
{InstructionGroups::INT, InstructionGroups::FP, InstructionGroups::SVE,
InstructionGroups::PREDICATE, InstructionGroups::SME,
InstructionGroups::SME, InstructionGroups::PREDICATE,
InstructionGroups::LOAD, InstructionGroups::STORE,
InstructionGroups::BRANCH}},
{InstructionGroups::INT,
Expand Down
14 changes: 12 additions & 2 deletions src/include/simeng/arch/aarch64/helpers/neon.hh
Original file line number Diff line number Diff line change
Expand Up @@ -568,9 +568,14 @@ RegisterValue vecUMaxP(srcValContainer& sourceValues) {
const T* n = sourceValues[0].getAsVector<T>();
const T* m = sourceValues[1].getAsVector<T>();

// Concatenate the vectors
T temp[2 * I];
memcpy(temp, n, sizeof(T) * I);
memcpy(temp + (sizeof(T) * I), m, sizeof(T) * I);
// Compare each adjacent pair of elements
T out[I];
for (int i = 0; i < I; i++) {
out[i] = std::max(n[i], m[i]);
out[i] = std::max(temp[2 * i], temp[2 * i + 1]);
}
return {out, 256};
}
Expand All @@ -585,9 +590,14 @@ RegisterValue vecUMinP(srcValContainer& sourceValues) {
const T* n = sourceValues[0].getAsVector<T>();
const T* m = sourceValues[1].getAsVector<T>();

// Concatenate the vectors
T temp[2 * I];
memcpy(temp, n, sizeof(T) * I);
memcpy(temp + (sizeof(T) * I), m, sizeof(T) * I);

T out[I];
for (int i = 0; i < I; i++) {
out[i] = std::min(n[i], m[i]);
out[i] = std::min(temp[2 * i], temp[2 * i + 1]);
}
return {out, 256};
}
Expand Down
6 changes: 6 additions & 0 deletions src/lib/arch/aarch64/Architecture.cc
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,12 @@ void Architecture::setSVCRval(const uint64_t newVal) const {
SVCRval_ = newVal;
}

// 0th bit of SVCR register determines if streaming-mode is enabled.
bool Architecture::isStreamingModeEnabled() const { return SVCRval_ & 1; }

// 1st bit of SVCR register determines if ZA register is enabled.
bool Architecture::isZARegisterEnabled() const { return SVCRval_ & 2; }

} // namespace aarch64
} // namespace arch
} // namespace simeng
35 changes: 35 additions & 0 deletions src/lib/arch/aarch64/InstructionMetadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,41 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn)
operands[2].access = CS_AC_READ;
operands[3].access = CS_AC_READ;
break;

case Opcode::AArch64_INSERT_MXIPZ_H_B:
[[fallthrough]];
case Opcode::AArch64_INSERT_MXIPZ_H_D:
[[fallthrough]];
case Opcode::AArch64_INSERT_MXIPZ_H_H:
[[fallthrough]];
case Opcode::AArch64_INSERT_MXIPZ_H_Q:
[[fallthrough]];
case Opcode::AArch64_INSERT_MXIPZ_H_S:
[[fallthrough]];
case Opcode::AArch64_INSERT_MXIPZ_V_B:
[[fallthrough]];
case Opcode::AArch64_INSERT_MXIPZ_V_D:
[[fallthrough]];
case Opcode::AArch64_INSERT_MXIPZ_V_H:
[[fallthrough]];
case Opcode::AArch64_INSERT_MXIPZ_V_Q:
[[fallthrough]];
case Opcode::AArch64_INSERT_MXIPZ_V_S:
// Need to add access specifiers
// although operands[0] should be READ | WRITE, due to the implemented
// decode logic for SME tile destinations, the register will be added as
// both source and destination with just WRITE access.
operands[0].access = CS_AC_WRITE;
operands[1].access = CS_AC_READ;
operands[2].access = CS_AC_READ;
break;
case Opcode::AArch64_LDR_ZA:
// Need to add access specifier
// although operands[0] should be READ | WRITE, due to the implemented
// decode logic for SME tile destinations, the register will be added as
// both source and destination with just WRITE access.
operands[0].access = CS_AC_WRITE;
break;
case Opcode::AArch64_ZERO_M: {
// Incorrect access type: All are READ but should all be WRITE
for (int i = 0; i < operandCount; i++) {
Expand Down
Loading

0 comments on commit 576d4d4

Please sign in to comment.