panfrost/midgard: Add fround(_even), ftrunc, ffma
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>
Sat, 23 Feb 2019 01:12:10 +0000 (01:12 +0000)
committerAlyssa Rosenzweig <alyssa@rosenzweig.io>
Mon, 25 Feb 2019 02:36:26 +0000 (02:36 +0000)
These ops were discovered by invoking the correspondingly names GLSL
functions. The rounding ops here behave exact as expected and are mapped
to their corresponding NIR ops where applicable. The ffma behaves as a
LUT instruction and requires some special argument packing (since
Midgard normally only allows for 2 arguments); this quirk will be
addressed in the future, but for now FMA is still lowered.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
src/gallium/drivers/panfrost/midgard/helpers.h
src/gallium/drivers/panfrost/midgard/midgard.h
src/gallium/drivers/panfrost/midgard/midgard_compile.c

index 6940f27b4aba38f2ceb9bdab3693334357eb35d5..12235f8f9ff6973111df872f80d8109e6d2926e6 100644 (file)
@@ -195,8 +195,12 @@ static unsigned alu_opcode_props[256] = {
         [midgard_alu_op_imin]           = UNITS_MOST,
         [midgard_alu_op_imax]           = UNITS_MOST,
         [midgard_alu_op_fmov]           = UNITS_ALL | QUIRK_FLIPPED_R24,
+        [midgard_alu_op_fround]          = UNITS_ADD,
+        [midgard_alu_op_froundeven]      = UNITS_ADD,
+        [midgard_alu_op_ftrunc]          = UNITS_ADD,
         [midgard_alu_op_ffloor]                 = UNITS_ADD,
         [midgard_alu_op_fceil]          = UNITS_ADD,
+        [midgard_alu_op_ffma]           = UNIT_VLUT,
 
         /* Though they output a scalar, they need to run on a vector unit
          * since they process vectors */
index 04e195a635dc88ca9097df41632ef3fa05a841f1..03ac2f5597e3575b88417074e1977acb8801ab45 100644 (file)
@@ -55,8 +55,11 @@ typedef enum {
         midgard_alu_op_fmin       = 0x28,
         midgard_alu_op_fmax       = 0x2C,
         midgard_alu_op_fmov       = 0x30,
+        midgard_alu_op_froundeven = 0x34,
+        midgard_alu_op_ftrunc     = 0x35,
         midgard_alu_op_ffloor     = 0x36,
         midgard_alu_op_fceil      = 0x37,
+        midgard_alu_op_ffma       = 0x38,
         midgard_alu_op_fdot3      = 0x3C,
         midgard_alu_op_fdot3r     = 0x3D,
         midgard_alu_op_fdot4      = 0x3E,
@@ -98,6 +101,7 @@ typedef enum {
         midgard_alu_op_u2f        = 0xBC,
         midgard_alu_op_icsel      = 0xC1,
         midgard_alu_op_fcsel      = 0xC5,
+        midgard_alu_op_fround     = 0xC6,
         midgard_alu_op_fatan_pt2  = 0xE8,
         midgard_alu_op_frcp       = 0xF0,
         midgard_alu_op_frsqrt     = 0xF2,
@@ -402,8 +406,11 @@ static char *alu_opcode_names[256] = {
         [midgard_alu_op_fmin]       = "fmin",
         [midgard_alu_op_fmax]       = "fmax",
         [midgard_alu_op_fmov]       = "fmov",
+        [midgard_alu_op_froundeven] = "froundeven",
+        [midgard_alu_op_ftrunc]     = "ftrunc",
         [midgard_alu_op_ffloor]     = "ffloor",
         [midgard_alu_op_fceil]      = "fceil",
+        [midgard_alu_op_ffma]       = "ffma",
         [midgard_alu_op_fdot3]      = "fdot3",
         [midgard_alu_op_fdot3r]     = "fdot3r",
         [midgard_alu_op_fdot4]      = "fdot4",
@@ -445,6 +452,7 @@ static char *alu_opcode_names[256] = {
         [midgard_alu_op_u2f]        = "u2f",
         [midgard_alu_op_icsel]      = "icsel",
         [midgard_alu_op_fcsel]      = "fcsel",
+        [midgard_alu_op_fround]     = "fround",
         [midgard_alu_op_fatan_pt2]  = "fatan_pt2",
         [midgard_alu_op_frcp]       = "frcp",
         [midgard_alu_op_frsqrt]     = "frsqrt",
index a7da54c00f9b4c0fa5906ea89dc9c113589fda0e..65822d3b28f188ab7561d67e783c3c053f3ef60a 100644 (file)
@@ -950,6 +950,8 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
                 ALU_CASE(imax, imax);
                 ALU_CASE(fmov, fmov);
                 ALU_CASE(ffloor, ffloor);
+                ALU_CASE(fround_even, froundeven);
+                ALU_CASE(ftrunc, ftrunc);
                 ALU_CASE(fceil, fceil);
                 ALU_CASE(fdot3, fdot3);
                 ALU_CASE(fdot4, fdot4);