Skip to content

Latest commit

 

History

History
78 lines (63 loc) · 2.13 KB

vector-examples.adoc

File metadata and controls

78 lines (63 loc) · 2.13 KB

Appendix A: Vector Assembly Code Examples

The following are provided as non-normative text to help explain the vector ISA.

Vector-vector add example

link:example/vvaddint32.s[role=include]

Example with mixed-width mask and compute.

# Code using one width for predicate and different width for masked
# compute.
#   int8_t a[]; int32_t b[], c[];
#   for (i=0;  i<n; i++) { b[i] =  (a[i] < 5) ? c[i] : 1; }
#
# Mixed-width code that keeps SEW/LMUL=8
  loop:
    vsetvli a4, a0, e8,m1,ta,ma   # Byte vector for predicate calc
    vle8.v v1, (a1)               # Load a[i]
      add a1, a1, a4              # Bump pointer.
    vmslt.vi v0, v1, 5            # a[i] < 5?

    vsetvli x0, a0, e32,m4.ta,mu  # Vector of 32-bit values.
      sub a0, a0, a4              # Decrement count
    vmv.v.i v4, 1                 # Splat immediate to destination
    vle32.v v4, (a3), v0.t        # Load requested elements of C, others undisturbed
      sll t1, a4, 2
      add a3, a3, t1              # Bump pointer.
    vse32.v v4, (a2)              # Store b[i].
      add a2, a2, t1              # Bump pointer.
      bnez a0, loop               # Any more?

Memcpy example

link:example/memcpy.s[role=include]

Conditional example

# (int16) z[i] = ((int8) x[i] < 5) ? (int16) a[i] : (int16) b[i];
#

loop:
    vsetvli t0, a0, e8,m1,ta,ma # Use 8b elements.
    vle8.v v0, (a1)         # Get x[i]
      sub a0, a0, t0        # Decrement element count
      add a1, a1, t0        # x[i] Bump pointer
    vmslt.vi v0, v0, 5      # Set mask in v0
      slli t0, t0, 1        # Multiply by 2 bytes
    vsetvli t0, a0, e16,m2,ta,mu  # Use 16b elements.
    vle16.v v1, (a2), v0.t  # z[i] = a[i] case
    vmnot.m v0, v0          # Invert v0
      add a2, a2, t0        # a[i] bump pointer
    vle16.v v1, (a3), v0.t  # z[i] = b[i] case
      add a3, a3, t0        # b[i] bump pointer
    vse16.v v1, (a4)        # Store z
      add a4, a4, t0        # z[i] bump pointer
      bnez a0, loop

SAXPY example

link:example/saxpy.s[role=include]

SGEMM example

link:example/sgemm.S[role=include]