i965/vec4: split VEC4_OPCODE_FROM_DOUBLE into one opcode per destination's type

author Samuel Iglesias Gonsálvez <siglesias@igalia.com>

Fri, 24 Mar 2017 07:46:13 +0000 (08:46 +0100)

committer Francisco Jerez <currojerez@riseup.net>

Fri, 14 Apr 2017 21:56:08 +0000 (14:56 -0700)
author Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Fri, 24 Mar 2017 07:46:13 +0000 (08:46 +0100)
committer Francisco Jerez <currojerez@riseup.net>
Fri, 14 Apr 2017 21:56:08 +0000 (14:56 -0700)
diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h

index f0b0d5c2a06f9fe80aa192d1f3240d31f9c0a741..13a70f6f6a1e347159fcd507e75503ce4517d393 100644 (file)
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -447,7 +447,9 @@ enum opcode {
     VEC4_OPCODE_MOV_BYTES,
     VEC4_OPCODE_PACK_BYTES,
     VEC4_OPCODE_UNPACK_UNIFORM,
-   VEC4_OPCODE_FROM_DOUBLE,
+   VEC4_OPCODE_DOUBLE_TO_F32,
+   VEC4_OPCODE_DOUBLE_TO_D32,
+   VEC4_OPCODE_DOUBLE_TO_U32,
     VEC4_OPCODE_TO_DOUBLE,
     VEC4_OPCODE_PICK_LOW_32BIT,
     VEC4_OPCODE_PICK_HIGH_32BIT,
diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp

index 73bbc931352fb75e52483963187498be305a5ca2..304b4ecf4fadb1cd024c337fe50a202080787c98 100644 (file)
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -326,8 +326,12 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
        return "pack_bytes";
     case VEC4_OPCODE_UNPACK_UNIFORM:
        return "unpack_uniform";
-   case VEC4_OPCODE_FROM_DOUBLE:
-      return "double_to_single";
+   case VEC4_OPCODE_DOUBLE_TO_F32:
+      return "double_to_f32";
+   case VEC4_OPCODE_DOUBLE_TO_D32:
+      return "double_to_d32";
+   case VEC4_OPCODE_DOUBLE_TO_U32:
+      return "double_to_u32";
     case VEC4_OPCODE_TO_DOUBLE:
        return "single_to_double";
     case VEC4_OPCODE_PICK_LOW_32BIT:
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp

index 386057e3e3c6dda0ee8878fda5153c75144dd519..0b92ba704e5e791b18d3b8ecc109280c91fac96f 100644 (file)
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -260,7 +260,9 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo)
  {
     switch (opcode) {
     case SHADER_OPCODE_GEN4_SCRATCH_READ:
-   case VEC4_OPCODE_FROM_DOUBLE:
+   case VEC4_OPCODE_DOUBLE_TO_F32:
+   case VEC4_OPCODE_DOUBLE_TO_D32:
+   case VEC4_OPCODE_DOUBLE_TO_U32:
     case VEC4_OPCODE_TO_DOUBLE:
     case VEC4_OPCODE_PICK_LOW_32BIT:
     case VEC4_OPCODE_PICK_HIGH_32BIT:
@@ -521,7 +523,9 @@ vec4_visitor::opt_reduce_swizzle()
           break;
  
        case VEC4_OPCODE_TO_DOUBLE:
-      case VEC4_OPCODE_FROM_DOUBLE:
+      case VEC4_OPCODE_DOUBLE_TO_F32:
+      case VEC4_OPCODE_DOUBLE_TO_D32:
+      case VEC4_OPCODE_DOUBLE_TO_U32:
        case VEC4_OPCODE_PICK_LOW_32BIT:
        case VEC4_OPCODE_PICK_HIGH_32BIT:
        case VEC4_OPCODE_SET_LOW_32BIT:
@@ -2255,7 +2259,9 @@ static bool
  is_align1_df(vec4_instruction *inst)
  {
     switch (inst->opcode) {
-   case VEC4_OPCODE_FROM_DOUBLE:
+   case VEC4_OPCODE_DOUBLE_TO_F32:
+   case VEC4_OPCODE_DOUBLE_TO_D32:
+   case VEC4_OPCODE_DOUBLE_TO_U32:
     case VEC4_OPCODE_TO_DOUBLE:
     case VEC4_OPCODE_PICK_LOW_32BIT:
     case VEC4_OPCODE_PICK_HIGH_32BIT:
diff --git a/src/intel/compiler/brw_vec4_copy_propagation.cpp b/src/intel/compiler/brw_vec4_copy_propagation.cpp

index e7f6f93f8bdb0fec7456fc828a72b06c02a5c7bd..c1ae32a2936c3d07c132ce95da8db0a5cf1f7f0f 100644 (file)
--- a/src/intel/compiler/brw_vec4_copy_propagation.cpp
+++ b/src/intel/compiler/brw_vec4_copy_propagation.cpp
@@ -293,7 +293,9 @@ static bool
  is_align1_opcode(unsigned opcode)
  {
     switch (opcode) {
-   case VEC4_OPCODE_FROM_DOUBLE:
+   case VEC4_OPCODE_DOUBLE_TO_F32:
+   case VEC4_OPCODE_DOUBLE_TO_D32:
+   case VEC4_OPCODE_DOUBLE_TO_U32:
     case VEC4_OPCODE_TO_DOUBLE:
     case VEC4_OPCODE_PICK_LOW_32BIT:
     case VEC4_OPCODE_PICK_HIGH_32BIT:
diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp

index 65f3a9a9f00bd068391fbb8c37c24d7c5b317cf3..5be4ef7fd4b341d3a6afb17f5a58b50ebd26db9d 100644 (file)
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -1940,9 +1940,28 @@ generate_code(struct brw_codegen *p,
           break;
        }
  
-      case VEC4_OPCODE_FROM_DOUBLE: {
+      case VEC4_OPCODE_DOUBLE_TO_F32:
+      case VEC4_OPCODE_DOUBLE_TO_D32:
+      case VEC4_OPCODE_DOUBLE_TO_U32: {
           assert(type_sz(src[0].type) == 8);
-         assert(type_sz(dst.type) == 4);
+         assert(type_sz(dst.type) == 8);
+
+         brw_reg_type dst_type;
+
+         switch (inst->opcode) {
+         case VEC4_OPCODE_DOUBLE_TO_F32:
+            dst_type = BRW_REGISTER_TYPE_F;
+            break;
+         case VEC4_OPCODE_DOUBLE_TO_D32:
+            dst_type = BRW_REGISTER_TYPE_D;
+            break;
+         case VEC4_OPCODE_DOUBLE_TO_U32:
+            dst_type = BRW_REGISTER_TYPE_UD;
+            break;
+         default:
+            unreachable("Not supported conversion");
+         }
+         dst = retype(dst, dst_type);
  
           brw_set_default_access_mode(p, BRW_ALIGN_1);
  
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp

index 64371a16de57c3b7b59669e56ff61a8df735c092..9d9ded2b9657c29b8d76fe841f1fc6f1a37b0b1f 100644 (file)
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -1183,16 +1183,28 @@ vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src,
        return;
     }
  
+   enum opcode op;
+   switch (dst.type) {
+   case BRW_REGISTER_TYPE_D:
+      op = VEC4_OPCODE_DOUBLE_TO_D32;
+      break;
+   case BRW_REGISTER_TYPE_UD:
+      op = VEC4_OPCODE_DOUBLE_TO_U32;
+      break;
+   case BRW_REGISTER_TYPE_F:
+      op = VEC4_OPCODE_DOUBLE_TO_F32;
+      break;
+   default:
+      unreachable("Unknown conversion");
+   }
+
     dst_reg temp = dst_reg(this, glsl_type::dvec4_type);
     emit(MOV(temp, src));
-
     dst_reg temp2 = dst_reg(this, glsl_type::dvec4_type);
-   temp2 = retype(temp2, dst.type);
-   emit(VEC4_OPCODE_FROM_DOUBLE, temp2, src_reg(temp))
-      ->size_written = 2 * REG_SIZE;
+   emit(op, temp2, src_reg(temp));
  
-   emit(VEC4_OPCODE_PICK_LOW_32BIT, temp2, src_reg(retype(temp2, BRW_REGISTER_TYPE_DF)));
-   vec4_instruction *inst = emit(MOV(dst, src_reg(temp2)));
+   emit(VEC4_OPCODE_PICK_LOW_32BIT, retype(temp2, dst.type), src_reg(temp2));
+   vec4_instruction *inst = emit(MOV(dst, src_reg(retype(temp2, dst.type))));
     inst->saturate = saturate;
  }
  
diff --git a/src/intel/compiler/brw_vec4_reg_allocate.cpp b/src/intel/compiler/brw_vec4_reg_allocate.cpp

index e3b46cc2f7f1241793b9e485361d949d07a8961c..a0ba77b867cd75904fb69dfad04751d4ff4d0ecc 100644 (file)
--- a/src/intel/compiler/brw_vec4_reg_allocate.cpp
+++ b/src/intel/compiler/brw_vec4_reg_allocate.cpp
@@ -447,18 +447,6 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill)
           if (type_sz(inst->dst.type) == 8 && inst->exec_size != 8)
              no_spill[inst->dst.nr] = true;
  
-         /* FROM_DOUBLE opcodes are setup so that they use a dst register
-          * with a size of 2 even if they only produce a single-precison
-          * result (this is so that the opcode can use the larger register to
-          * produce a 64-bit aligned intermediary result as required by the
-          * hardware during the conversion process). This creates a problem for
-          * spilling though, because when we attempt to emit a spill for the
-          * dst we see a 32-bit destination and emit a scratch write that
-          * allocates a single spill register.
-          */
-         if (inst->opcode == VEC4_OPCODE_FROM_DOUBLE)
-            no_spill[inst->dst.nr] = true;
-
           /* We can't spill registers that mix 32-bit and 64-bit access (that
            * contain 64-bit data that is operated on via 32-bit instructions)
            */
author	Samuel Iglesias Gonsálvez <siglesias@igalia.com>
	Fri, 24 Mar 2017 07:46:13 +0000 (08:46 +0100)
committer	Francisco Jerez <currojerez@riseup.net>
	Fri, 14 Apr 2017 21:56:08 +0000 (14:56 -0700)
src/intel/compiler/brw_eu_defines.h		patch \| blob \| history
src/intel/compiler/brw_shader.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4_copy_propagation.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4_generator.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4_nir.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4_reg_allocate.cpp		patch \| blob \| history