intel/fs,vec4: Stuff the constant data from NIR in the end of the program
[mesa.git] / src / intel / compiler / brw_vec4_generator.cpp
index 7c038c97196b744156c0a31ded44a811a79e0e4e..e9142c2c65c0270ae46dbbec9bd794fe8dbfd6bb 100644 (file)
@@ -270,6 +270,17 @@ generate_tex(struct brw_codegen *p,
       break;
    }
 
+   /* Stomp the resinfo output type to UINT32.  On gens 4-5, the output type
+    * is set as part of the message descriptor.  On gen4, the PRM seems to
+    * allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on
+    * later gens UINT32 is required.  Once you hit Sandy Bridge, the bit is
+    * gone from the message descriptor entirely and you just get UINT32 all
+    * the time regasrdless.  Since we can really only do non-UINT32 on gen4,
+    * just stomp it to UINT32 all the time.
+    */
+   if (inst->opcode == SHADER_OPCODE_TXS)
+      return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
+
    uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 ||
          inst->opcode == SHADER_OPCODE_TG4_OFFSET)
          ? prog_data->base.binding_table.gather_texture_start
@@ -1911,13 +1922,13 @@ generate_code(struct brw_codegen *p,
          send_count++;
          break;
 
-      case SHADER_OPCODE_MEMORY_FENCE: {
-         const unsigned sends =
-            brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false,
-                             /* bti */ 0);
-         send_count += sends;
+      case SHADER_OPCODE_MEMORY_FENCE:
+         brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND,
+                          brw_message_target(inst->sfid),
+                          /* commit_enable */ false,
+                          /* bti */ 0);
+         send_count++;
          break;
-      }
 
       case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
          const struct brw_reg mask =
@@ -2227,7 +2238,8 @@ generate_code(struct brw_codegen *p,
 
       /* overriding the shader makes disasm_info invalid */
       if (!brw_try_override_assembly(p, 0, sha1buf)) {
-         dump_assembly(p->store, disasm_info, perf.block_latency);
+         dump_assembly(p->store, 0, p->next_insn_offset,
+                       disasm_info, perf.block_latency);
       } else {
          fprintf(stderr, "Successfully overrode shader with sha1 %s\n\n", sha1buf);
       }
@@ -2269,5 +2281,12 @@ brw_vec4_generate_assembly(const struct brw_compiler *compiler,
 
    generate_code(p, compiler, log_data, nir, prog_data, cfg, perf, stats);
 
+   assert(prog_data->base.const_data_size == 0);
+   if (nir->constant_data_size > 0) {
+      prog_data->base.const_data_size = nir->constant_data_size;
+      prog_data->base.const_data_offset =
+         brw_append_data(p, nir->constant_data, nir->constant_data_size, 32);
+   }
+
    return brw_get_program(p, &prog_data->base.program_size);
 }