i965/vec4: add VEC4_OPCODE_SET_{LOW,HIGH}_32BIT opcodes
authorIago Toral Quiroga <itoral@igalia.com>
Fri, 17 Jun 2016 10:19:35 +0000 (12:19 +0200)
committerSamuel Iglesias Gonsálvez <siglesias@igalia.com>
Tue, 3 Jan 2017 10:26:50 +0000 (11:26 +0100)
These opcodes will set the low/high 32-bit in each 64-bit data element
using Align1 mode. We will use this to implement packDouble2x32.

We use Align1 mode because in order to implement this in Align16 mode
we would need to use 32-bit logical swizzles (XZ for low, YW for high),
but the IR works in terms of 64-bit logical swizzles for DF operands
all the way up to codegen.

v2:
 - use suboffset() instead of get_element_ud()
 - no need to set the width on the dst

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_shader.cpp
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp

index 6c981cc5518c2cdc0b1178ce3959d403fa9f1c1a..4314c4c03e0c9e2aa6a4648c1e4962c0b1c03609 100644 (file)
@@ -1102,6 +1102,8 @@ enum opcode {
    VEC4_OPCODE_FLOAT_TO_DOUBLE,
    VEC4_OPCODE_PICK_LOW_32BIT,
    VEC4_OPCODE_PICK_HIGH_32BIT,
+   VEC4_OPCODE_SET_LOW_32BIT,
+   VEC4_OPCODE_SET_HIGH_32BIT,
 
    FS_OPCODE_DDX_COARSE,
    FS_OPCODE_DDX_FINE,
index bea65050211495c76011bfba6d8d8aefc9263e03..a919ae4e0344fbb892477652a19b6f9f11635ceb 100644 (file)
@@ -330,6 +330,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
       return "pick_low_32bit";
    case VEC4_OPCODE_PICK_HIGH_32BIT:
       return "pick_high_32bit";
+   case VEC4_OPCODE_SET_LOW_32BIT:
+      return "set_low_32bit";
+   case VEC4_OPCODE_SET_HIGH_32BIT:
+      return "set_high_32bit";
 
    case FS_OPCODE_DDX_COARSE:
       return "ddx_coarse";
index d70b8db244ec6ff3409dfabee5ab2adc03881f96..a013d38f1bab3da1de4ed7d96f5d774f0cc78ce9 100644 (file)
@@ -257,6 +257,8 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo)
    case VEC4_OPCODE_FLOAT_TO_DOUBLE:
    case VEC4_OPCODE_PICK_LOW_32BIT:
    case VEC4_OPCODE_PICK_HIGH_32BIT:
+   case VEC4_OPCODE_SET_LOW_32BIT:
+   case VEC4_OPCODE_SET_HIGH_32BIT:
    case VS_OPCODE_PULL_CONSTANT_LOAD:
    case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
    case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
@@ -514,6 +516,8 @@ vec4_visitor::opt_reduce_swizzle()
       case VEC4_OPCODE_DOUBLE_TO_FLOAT:
       case VEC4_OPCODE_PICK_LOW_32BIT:
       case VEC4_OPCODE_PICK_HIGH_32BIT:
+      case VEC4_OPCODE_SET_LOW_32BIT:
+      case VEC4_OPCODE_SET_HIGH_32BIT:
          swizzle = brw_swizzle_for_size(4);
          break;
 
index 0a962e05d1e2b41bc6a1cdc814ca382a43357c2d..3039398a64d051e0401a7928357abed72c3947b3 100644 (file)
@@ -1978,6 +1978,31 @@ generate_code(struct brw_codegen *p,
          break;
       }
 
+      case VEC4_OPCODE_SET_LOW_32BIT:
+      case VEC4_OPCODE_SET_HIGH_32BIT: {
+         /* Reads consecutive 32-bit elements from src[0] and writes
+          * them to the low/high 32-bit of each 64-bit element in dst.
+          */
+         assert(type_sz(src[0].type) == 4);
+         assert(type_sz(dst.type) == 8);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+         dst = retype(dst, BRW_REGISTER_TYPE_UD);
+         if (inst->opcode == VEC4_OPCODE_SET_HIGH_32BIT)
+            dst = suboffset(dst, 1);
+         dst.hstride = BRW_HORIZONTAL_STRIDE_2;
+
+         src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
+         src[0].vstride = BRW_VERTICAL_STRIDE_4;
+         src[0].width = BRW_WIDTH_4;
+         src[0].hstride = BRW_HORIZONTAL_STRIDE_1;
+         brw_MOV(p, dst, src[0]);
+
+         brw_set_default_access_mode(p, BRW_ALIGN_16);
+         break;
+      }
+
       case VEC4_OPCODE_PACK_BYTES: {
          /* Is effectively:
           *