nir, glsl: move pixel_center_integer/origin_upper_left to shader_info.fs
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
index 55984ff0c78f81b4ef5bcb6eab8df4375127691a..484a5329455c8d5182661105e3486530bd7849de 100644 (file)
 
 #define MAX_GLSL_TEXTURE_OFFSET 4
 
+#ifndef NDEBUG
+#include "util/u_atomic.h"
+#include "util/simple_mtx.h"
+#include <fstream>
+#include <ios>
+
+/* Prepare to make it possible to specify log file */
+static std::ofstream stats_log;
+
+/* Helper function to check whether we want to write some statistics
+ * of the shader conversion.
+ */
+
+static simple_mtx_t print_stats_mutex = _SIMPLE_MTX_INITIALIZER_NP;
+
+static inline bool print_stats_enabled ()
+{
+   static int stats_enabled = 0;
+
+   if (!stats_enabled) {
+      simple_mtx_lock(&print_stats_mutex);
+      if (!stats_enabled) {
+        const char *stats_filename = getenv("GLSL_TO_TGSI_PRINT_STATS");
+        if (stats_filename) {
+           bool write_header = std::ifstream(stats_filename).fail();
+           stats_log.open(stats_filename, std::ios_base::out | std::ios_base::app);
+           stats_enabled = stats_log.good() ? 1 : -1;
+           if (write_header)
+              stats_log << "arrays,temps,temps in arrays,total,instructions\n";
+        } else {
+           stats_enabled = -1;
+        }
+      }
+      simple_mtx_unlock(&print_stats_mutex);
+   }
+   return stats_enabled > 0;
+}
+#define PRINT_STATS(X) if (print_stats_enabled()) do { X; } while (false);
+#else
+#define PRINT_STATS(X)
+#endif
+
+
 static unsigned is_precise(const ir_variable *ir)
 {
    if (!ir)
@@ -340,6 +383,7 @@ public:
    void copy_propagate(void);
    int eliminate_dead_code(void);
 
+   void split_arrays(void);
    void merge_two_dsts(void);
    void merge_registers(void);
    void renumber_registers(void);
@@ -348,6 +392,8 @@ public:
                        st_dst_reg *l, st_src_reg *r,
                        st_src_reg *cond, bool cond_swap);
 
+   void print_stats();
+
    void *mem_ctx;
 };
 
@@ -735,6 +781,7 @@ glsl_to_tgsi_visitor::get_opcode(enum tgsi_opcode op,
       case7(ISHR,    LAST, ISHR,    USHR,    LAST,    I64SHR,  U64SHR);
       case7(ATOMIMAX,LAST, ATOMIMAX,ATOMUMAX,LAST,    LAST,    LAST);
       case7(ATOMIMIN,LAST, ATOMIMIN,ATOMUMIN,LAST,    LAST,    LAST);
+      case7(ATOMUADD,ATOMFADD,ATOMUADD,ATOMUADD,LAST, LAST,    LAST);
 
       casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ);
       casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE);
@@ -1089,11 +1136,6 @@ glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
 void
 glsl_to_tgsi_visitor::visit(ir_variable *ir)
 {
-   if (strcmp(ir->name, "gl_FragCoord") == 0) {
-      this->prog->OriginUpperLeft = ir->data.origin_upper_left;
-      this->prog->PixelCenterInteger = ir->data.pixel_center_integer;
-   }
-
    if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
       unsigned int i;
       const ir_state_slot *const slots = ir->get_state_slots();
@@ -1228,6 +1270,10 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
    st_src_reg a, b, c;
    st_dst_reg result_dst;
 
+   // there is no TGSI opcode for this
+   if (ir->type->is_integer_64())
+      return false;
+
    ir_expression *expr = ir->operands[mul_operand]->as_expression();
    if (!expr || expr->operation != ir_binop_mul)
       return false;
@@ -2038,7 +2084,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
             emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset,
                  st_src_reg_for_int(4));
             cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
-            memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
+            *cbuf.reladdr = index_reg;
          }
 
          if (const_uniform_block) {
@@ -2047,7 +2093,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
          } else {
             /* Relative/variable constant buffer */
             cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
-            memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
+            *cbuf.reladdr2 = op[0];
          }
          cbuf.has_index2 = true;
 
@@ -2758,12 +2804,12 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
 
       if (is_2D) {
          src.reladdr2 = ralloc(mem_ctx, st_src_reg);
-         memcpy(src.reladdr2, &index_reg, sizeof(index_reg));
+         *src.reladdr2 = index_reg;
          src.index2D = 0;
          src.has_index2 = true;
       } else {
          src.reladdr = ralloc(mem_ctx, st_src_reg);
-         memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+         *src.reladdr = index_reg;
       }
    }
 
@@ -4099,8 +4145,7 @@ glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir,
    unsigned location = 0;
    ir_variable *var = ir->variable_referenced();
 
-   memset(reladdr, 0, sizeof(*reladdr));
-   reladdr->file = PROGRAM_UNDEFINED;
+   reladdr->reset();
 
    *base = 0;
    *array_size = 1;
@@ -4609,6 +4654,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
    ctx = NULL;
    prog = NULL;
    precise = 0;
+   need_uarl = false;
    shader_program = NULL;
    shader = NULL;
    options = NULL;
@@ -5438,6 +5484,107 @@ glsl_to_tgsi_visitor::merge_two_dsts(void)
    }
 }
 
+template <typename st_reg>
+void test_indirect_access(const st_reg& reg, bool *has_indirect_access)
+{
+   if (reg.file == PROGRAM_ARRAY) {
+      if (reg.reladdr || reg.reladdr2 || reg.has_index2) {
+        has_indirect_access[reg.array_id] = true;
+        if (reg.reladdr)
+           test_indirect_access(*reg.reladdr, has_indirect_access);
+        if (reg.reladdr2)
+           test_indirect_access(*reg.reladdr2, has_indirect_access);
+      }
+   }
+}
+
+template <typename st_reg>
+void remap_array(st_reg& reg, const int *array_remap_info,
+                const bool *has_indirect_access)
+{
+   if (reg.file == PROGRAM_ARRAY) {
+      if (!has_indirect_access[reg.array_id]) {
+        reg.file = PROGRAM_TEMPORARY;
+        reg.index = reg.index + array_remap_info[reg.array_id];
+        reg.array_id = 0;
+      } else {
+        reg.array_id = array_remap_info[reg.array_id];
+      }
+
+      if (reg.reladdr)
+        remap_array(*reg.reladdr, array_remap_info, has_indirect_access);
+
+      if (reg.reladdr2)
+        remap_array(*reg.reladdr2, array_remap_info, has_indirect_access);
+   }
+}
+
+/* One-dimensional arrays whose elements are only accessed directly are
+ * replaced by an according set of temporary registers that then can become
+ * subject to further optimization steps like copy propagation and
+ * register merging.
+ */
+void
+glsl_to_tgsi_visitor::split_arrays(void)
+{
+   if (!next_array)
+      return;
+
+   bool *has_indirect_access = rzalloc_array(mem_ctx, bool, next_array + 1);
+
+   foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
+      for (unsigned j = 0; j < num_inst_src_regs(inst); j++)
+        test_indirect_access(inst->src[j], has_indirect_access);
+
+      for (unsigned j = 0; j < inst->tex_offset_num_offset; j++)
+        test_indirect_access(inst->tex_offsets[j], has_indirect_access);
+
+      for (unsigned j = 0; j < num_inst_dst_regs(inst); j++)
+        test_indirect_access(inst->dst[j], has_indirect_access);
+
+      test_indirect_access(inst->resource, has_indirect_access);
+   }
+
+   unsigned array_offset = 0;
+   unsigned n_remaining_arrays = 0;
+
+   /* Double use: For arrays that get split this value will contain
+    * the base index of the temporary registers this array is replaced
+    * with. For arrays that remain it contains the new array ID.
+    */
+   int *array_remap_info = rzalloc_array(has_indirect_access, int,
+                                        next_array + 1);
+
+   for (unsigned i = 1; i <= next_array; ++i) {
+      if (!has_indirect_access[i]) {
+        array_remap_info[i] = this->next_temp + array_offset;
+        array_offset += array_sizes[i - 1];
+      } else {
+        array_sizes[n_remaining_arrays] = array_sizes[i-1];
+        array_remap_info[i] = ++n_remaining_arrays;
+      }
+   }
+
+   if (next_array !=  n_remaining_arrays) {
+      foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
+        for (unsigned j = 0; j < num_inst_src_regs(inst); j++)
+           remap_array(inst->src[j], array_remap_info, has_indirect_access);
+
+        for (unsigned j = 0; j < inst->tex_offset_num_offset; j++)
+           remap_array(inst->tex_offsets[j], array_remap_info, has_indirect_access);
+
+        for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) {
+           remap_array(inst->dst[j], array_remap_info, has_indirect_access);
+        }
+        remap_array(inst->resource, array_remap_info, has_indirect_access);
+      }
+   }
+
+   ralloc_free(has_indirect_access);
+   this->next_temp += array_offset;
+   next_array = n_remaining_arrays;
+}
+
 /* Merges temporary registers together where possible to reduce the number of
  * registers needed to run a program.
  *
@@ -5446,19 +5593,35 @@ glsl_to_tgsi_visitor::merge_two_dsts(void)
 void
 glsl_to_tgsi_visitor::merge_registers(void)
 {
-   struct lifetime *lifetimes =
-         rzalloc_array(mem_ctx, struct lifetime, this->next_temp);
+   class array_live_range *arr_live_ranges = NULL;
+
+   struct register_live_range *reg_live_ranges =
+        rzalloc_array(mem_ctx, struct register_live_range, this->next_temp);
 
-   if (get_temp_registers_required_lifetimes(mem_ctx, &this->instructions,
-                                             this->next_temp, lifetimes)) {
+   if (this->next_array > 0) {
+      arr_live_ranges = new array_live_range[this->next_array];
+      for (unsigned i = 0; i < this->next_array; ++i)
+         arr_live_ranges[i] = array_live_range(i+1, this->array_sizes[i]);
+   }
+
+
+   if (get_temp_registers_required_live_ranges(reg_live_ranges, &this->instructions,
+                                              this->next_temp, reg_live_ranges,
+                                              this->next_array, arr_live_ranges)) {
       struct rename_reg_pair *renames =
-            rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
-      get_temp_registers_remapping(mem_ctx, this->next_temp, lifetimes, renames);
+           rzalloc_array(reg_live_ranges, struct rename_reg_pair, this->next_temp);
+      get_temp_registers_remapping(reg_live_ranges, this->next_temp,
+                                  reg_live_ranges, renames);
       rename_temp_registers(renames);
-      ralloc_free(renames);
+
+      this->next_array =  merge_arrays(this->next_array, this->array_sizes,
+                                      &this->instructions, arr_live_ranges);
    }
 
-   ralloc_free(lifetimes);
+   if (arr_live_ranges)
+      delete[] arr_live_ranges;
+
+   ralloc_free(reg_live_ranges);
 }
 
 /* Reassign indices to temporary registers by reusing unused indices created
@@ -5491,6 +5654,27 @@ glsl_to_tgsi_visitor::renumber_registers(void)
    ralloc_free(first_writes);
 }
 
+#ifndef NDEBUG
+void glsl_to_tgsi_visitor::print_stats()
+{
+   int narray_registers = 0;
+   for (unsigned i = 0; i < this->next_array; ++i)
+      narray_registers += this->array_sizes[i];
+
+   int ninstructions = 0;
+   foreach_in_list(glsl_to_tgsi_instruction, inst, &instructions) {
+      ++ninstructions;
+   }
+
+   simple_mtx_lock(&print_stats_mutex);
+   stats_log << next_array << ", "
+            << next_temp << ", "
+            << narray_registers << ", "
+            << next_temp + narray_registers << ", "
+            << ninstructions << "\n";
+   simple_mtx_unlock(&print_stats_mutex);
+}
+#endif
 /* ------------------------- TGSI conversion stuff -------------------------- */
 
 /**
@@ -5609,6 +5793,7 @@ _mesa_sysval_to_semantic(unsigned sysval)
    case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
    case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
    case SYSTEM_VALUE_VERTEX_CNT:
+   case SYSTEM_VALUE_VARYING_COORD:
    default:
       assert(!"Unexpected SYSTEM_VALUE_ enum");
       return TGSI_SEMANTIC_COUNT;
@@ -6015,6 +6200,7 @@ compile_tgsi_instruction(struct st_translate *t,
    case TGSI_OPCODE_ATOMUMAX:
    case TGSI_OPCODE_ATOMIMIN:
    case TGSI_OPCODE_ATOMIMAX:
+   case TGSI_OPCODE_ATOMFADD:
    case TGSI_OPCODE_IMG2HND:
       for (i = num_src - 1; i >= 0; i--)
          src[i + 1] = src[i];
@@ -6080,6 +6266,34 @@ compile_tgsi_instruction(struct st_translate *t,
    }
 }
 
+/* Invert SamplePos.y when rendering to the default framebuffer. */
+static void
+emit_samplepos_adjustment(struct st_translate *t, int wpos_y_transform)
+{
+   struct ureg_program *ureg = t->ureg;
+
+   assert(wpos_y_transform >= 0);
+   struct ureg_src trans_const = ureg_DECL_constant(ureg, wpos_y_transform);
+   struct ureg_src samplepos_sysval = t->systemValues[SYSTEM_VALUE_SAMPLE_POS];
+   struct ureg_dst samplepos_flipped = ureg_DECL_temporary(ureg);
+   struct ureg_dst is_fbo = ureg_DECL_temporary(ureg);
+
+   ureg_ADD(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y),
+            ureg_imm1f(ureg, 1), ureg_negate(samplepos_sysval));
+
+   /* If trans.x == 1, use samplepos.y, else use 1 - samplepos.y. */
+   ureg_FSEQ(ureg, ureg_writemask(is_fbo, TGSI_WRITEMASK_Y),
+             ureg_scalar(trans_const, TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1));
+   ureg_UCMP(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y),
+             ureg_src(is_fbo), samplepos_sysval, ureg_src(samplepos_flipped));
+   ureg_MOV(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_X),
+            samplepos_sysval);
+
+   /* Use the result in place of the system value. */
+   t->systemValues[SYSTEM_VALUE_SAMPLE_POS] = ureg_src(samplepos_flipped);
+}
+
+
 /**
  * Emit the TGSI instructions for inverting and adjusting WPOS.
  * This code is unavoidable because it also depends on whether
@@ -6203,7 +6417,7 @@ emit_wpos(struct st_context *st,
     * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
     * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
     */
-   if (program->OriginUpperLeft) {
+   if (program->info.fs.origin_upper_left) {
       /* Fragment shader wants origin in upper-left */
       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
          /* the driver supports upper-left origin */
@@ -6230,7 +6444,7 @@ emit_wpos(struct st_context *st,
          assert(0);
    }
 
-   if (program->PixelCenterInteger) {
+   if (program->info.fs.pixel_center_integer) {
       /* Fragment shader wants pixel center integer */
       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
          /* the driver supports pixel center integer */
@@ -6647,6 +6861,10 @@ st_translate_program(
                emit_wpos(st_context(ctx), t, proginfo, ureg,
                          program->wpos_transform_const);
 
+            if (procType == PIPE_SHADER_FRAGMENT &&
+                semName == TGSI_SEMANTIC_SAMPLEPOS)
+               emit_samplepos_adjustment(t, program->wpos_transform_const);
+
             sysInputs &= ~(1ull << i);
          }
       }
@@ -6915,8 +7133,17 @@ get_mesa_program_tgsi(struct gl_context *ctx,
    while (v->eliminate_dead_code());
 
    v->merge_two_dsts();
-   if (!skip_merge_registers)
+
+   if (!skip_merge_registers) {
+      v->split_arrays();
+      v->copy_propagate();
+      while (v->eliminate_dead_code());
+
       v->merge_registers();
+      v->copy_propagate();
+      while (v->eliminate_dead_code());
+   }
+
    v->renumber_registers();
 
    /* Write the END instruction. */
@@ -6935,7 +7162,7 @@ get_mesa_program_tgsi(struct gl_context *ctx,
    _mesa_copy_linked_program_data(shader_program, shader);
    shrink_array_declarations(v->inputs, v->num_inputs,
                              &prog->info.inputs_read,
-                             prog->info.vs.double_inputs_read,
+                             prog->DualSlotInputs,
                              &prog->info.patch_inputs_read);
    shrink_array_declarations(v->outputs, v->num_outputs,
                              &prog->info.outputs_written, 0ULL,
@@ -6949,7 +7176,8 @@ get_mesa_program_tgsi(struct gl_context *ctx,
    /* This must be done before the uniform storage is associated. */
    if (shader->Stage == MESA_SHADER_FRAGMENT &&
        (prog->info.inputs_read & VARYING_BIT_POS ||
-        prog->info.system_values_read & (1ull << SYSTEM_VALUE_FRAG_COORD))) {
+        prog->info.system_values_read & (1ull << SYSTEM_VALUE_FRAG_COORD) ||
+        prog->info.system_values_read & (1ull << SYSTEM_VALUE_SAMPLE_POS))) {
       static const gl_state_index16 wposTransformState[STATE_LENGTH] = {
          STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
       };
@@ -7004,6 +7232,8 @@ get_mesa_program_tgsi(struct gl_context *ctx,
       return NULL;
    }
 
+   PRINT_STATS(v->print_stats());
+
    return prog;
 }
 
@@ -7236,25 +7466,17 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
 }
 
 void
-st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
-                                const ubyte outputMapping[],
-                                struct pipe_stream_output_info *so)
-{
-   if (!glsl_to_tgsi->shader_program->last_vert_prog)
-      return;
-
-   struct gl_transform_feedback_info *info =
-      glsl_to_tgsi->shader_program->last_vert_prog->sh.LinkedTransformFeedback;
-   st_translate_stream_output_info2(info, outputMapping, so);
-}
-
-void
-st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
+st_translate_stream_output_info(struct gl_transform_feedback_info *info,
                                 const ubyte outputMapping[],
                                 struct pipe_stream_output_info *so)
 {
    unsigned i;
 
+   if (!info) {
+      so->num_outputs = 0;
+      return;
+   }
+
    for (i = 0; i < info->NumOutputs; i++) {
       so->output[i].register_index =
          outputMapping[info->Outputs[i].OutputRegister];