r600: add support for ARB_shader_clock.

[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c

index f71bca75a9343585a7610246c24235db5bca2387..623e6f7f70473a6ce0e83926b55ea8159a6e07c1 100644 (file)
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -346,8 +346,10 @@ struct r600_shader_ctx {
         boolean                 clip_vertex_write;
         unsigned                cv_output;
         unsigned                edgeflag_output;
+       int                                     cs_block_size_reg;
+       int                                     cs_grid_size_reg;
+       bool cs_block_size_loaded, cs_grid_size_loaded;
         int                                     fragcoord_input;
-       int                                     native_integers;
         int                                     next_ring_offset;
         int                                     gs_out_ring_offset;
         int                                     gs_next_vertex;
@@ -995,22 +997,6 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                         d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
                         break; /* Already handled from allocate_system_value_inputs */
                 } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
-                       if (!ctx->native_integers) {
-                               struct r600_bytecode_alu alu;
-                               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-
-                               alu.op = ALU_OP1_INT_TO_FLT;
-                               alu.src[0].sel = 0;
-                               alu.src[0].chan = 3;
-
-                               alu.dst.sel = 0;
-                               alu.dst.chan = 3;
-                               alu.dst.write = 1;
-                               alu.last = 1;
-
-                               if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
-                                       return r;
-                       }
                         break;
                 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
                         break;
@@ -1298,7 +1284,57 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
         vtx.num_format_all = 2;
         vtx.format_comp_all = 1;
         vtx.use_const_fields = 0;
-       vtx.offset = 1; // first element is size of buffer
+       vtx.offset = 0;
+       vtx.endian = r600_endian_swap(32);
+       vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
+
+       r = r600_bytecode_add_vtx(ctx->bc, &vtx);
+       if (r)
+               return r;
+
+       return t1;
+}
+
+static int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block)
+{
+       struct r600_bytecode_vtx vtx;
+       int r, t1;
+
+       if (ctx->cs_block_size_loaded)
+               return ctx->cs_block_size_reg;
+       if (ctx->cs_grid_size_loaded)
+               return ctx->cs_grid_size_reg;
+
+       t1 = load_block ? ctx->cs_block_size_reg : ctx->cs_grid_size_reg;
+       struct r600_bytecode_alu alu;
+       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+       alu.op = ALU_OP1_MOV;
+       alu.src[0].sel = V_SQ_ALU_SRC_0;
+       alu.dst.sel = t1;
+       alu.dst.write = 1;
+       alu.last = 1;
+       r = r600_bytecode_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+
+       memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
+       vtx.op = FETCH_OP_VFETCH;
+       vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
+       vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
+       vtx.src_gpr = t1;
+       vtx.src_sel_x = 0;
+
+       vtx.mega_fetch_count = 16;
+       vtx.dst_gpr = t1;
+       vtx.dst_sel_x = 0;
+       vtx.dst_sel_y = 1;
+       vtx.dst_sel_z = 2;
+       vtx.dst_sel_w = 7;
+       vtx.data_format = FMT_32_32_32_32;
+       vtx.num_format_all = 1;
+       vtx.format_comp_all = 0;
+       vtx.use_const_fields = 0;
+       vtx.offset = load_block ? 0 : 16; // first element is size of buffer
         vtx.endian = r600_endian_swap(32);
         vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */
  
@@ -1306,6 +1342,10 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
         if (r)
                 return r;
  
+       if (load_block)
+               ctx->cs_block_size_loaded = true;
+       else
+               ctx->cs_grid_size_loaded = true;
         return t1;
  }
  
@@ -1414,6 +1454,10 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
                         r600_src->swizzle[1] = 3;
                         r600_src->swizzle[2] = 3;
                         r600_src->swizzle[3] = 3;
+               } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_GRID_SIZE) {
+                       r600_src->sel = load_block_grid_size(ctx, false);
+               } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_SIZE) {
+                       r600_src->sel = load_block_grid_size(ctx, true);
                 }
         } else {
                 if (tgsi_src->Register.Indirect)
@@ -2807,7 +2851,7 @@ static int r600_store_tcs_output(struct r600_shader_ctx *ctx)
  }
  
  static int r600_tess_factor_read(struct r600_shader_ctx *ctx,
-                                int output_idx)
+                                int output_idx, int nc)
  {
         int param;
         unsigned temp_reg = r600_get_temp(ctx);
@@ -2827,7 +2871,7 @@ static int r600_tess_factor_read(struct r600_shader_ctx *ctx,
         if (r)
                 return r;
  
-       do_lds_fetch_values(ctx, temp_reg, dreg, 0xf);
+       do_lds_fetch_values(ctx, temp_reg, dreg, ((1u << nc) - 1));
         return 0;
  }
  
@@ -2897,13 +2941,13 @@ static int r600_emit_tess_factor(struct r600_shader_ctx *ctx)
                 return -1;
  
         if (tessouter_idx != -1) {
-               r = r600_tess_factor_read(ctx, tessouter_idx);
+               r = r600_tess_factor_read(ctx, tessouter_idx, outer_comps);
                 if (r)
                         return r;
         }
  
         if (tessinner_idx != -1) {
-               r = r600_tess_factor_read(ctx, tessinner_idx);
+               r = r600_tess_factor_read(ctx, tessinner_idx, inner_comps);
                 if (r)
                         return r;
         }
@@ -3067,7 +3111,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
  
         ctx.bc = &shader->bc;
         ctx.shader = shader;
-       ctx.native_integers = true;
  
         r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
                            rscreen->has_compressed_msaa_texturing);
@@ -3148,6 +3191,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
         ctx.clip_vertex_write = 0;
         ctx.thread_id_gpr_loaded = false;
  
+       ctx.cs_block_size_reg = -1;
+       ctx.cs_grid_size_reg = -1;
+       ctx.cs_block_size_loaded = false;
+       ctx.cs_grid_size_loaded = false;
+
         shader->nr_ps_color_exports = 0;
         shader->nr_ps_max_color_exports = 0;
  
@@ -3211,8 +3259,15 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
                 if (add_tess_inout)
                         ctx.file_offset[TGSI_FILE_INPUT]+=2;
         }
-       if (ctx.type == PIPE_SHADER_COMPUTE)
+       if (ctx.type == PIPE_SHADER_COMPUTE) {
                 ctx.file_offset[TGSI_FILE_INPUT] = 2;
+               for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
+                       if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_GRID_SIZE)
+                               ctx.cs_grid_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
+                       if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_BLOCK_SIZE)
+                               ctx.cs_block_size_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
+               }
+       }
  
         ctx.file_offset[TGSI_FILE_OUTPUT] =
                         ctx.file_offset[TGSI_FILE_INPUT] +
@@ -3281,6 +3336,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
  
         ctx.nliterals = 0;
         ctx.literals = NULL;
+       ctx.max_driver_temp_used = 0;
  
         shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
                                ctx.info.colors_written == 1;
@@ -6801,6 +6857,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
         struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
         int src_gpr, r, i;
         int id = tgsi_tex_get_src_gpr(ctx, 1);
+       int sampler_index_mode = inst->Src[1].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
  
         src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
         if (src_requires_loading) {
@@ -6832,6 +6889,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
         vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7;          /* SEL_Z */
         vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7;          /* SEL_W */
         vtx.use_const_fields = 1;
+       vtx.buffer_index_mode = sampler_index_mode;
  
         if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx)))
                 return r;
@@ -6892,31 +6950,48 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
  static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int offset)
  {
         struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-       struct r600_bytecode_alu alu;
         int r;
         int id = tgsi_tex_get_src_gpr(ctx, reg_idx) + offset;
+       int sampler_index_mode = inst->Src[reg_idx].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
  
-       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-       alu.op = ALU_OP1_MOV;
-       alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
-       if (ctx->bc->chip_class >= EVERGREEN) {
-               /* channel 0 or 2 of each word */
-               alu.src[0].sel += (id / 2);
-               alu.src[0].chan = (id % 2) * 2;
-       } else {
+       if (ctx->bc->chip_class < EVERGREEN) {
+               struct r600_bytecode_alu alu;
+               memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+               alu.op = ALU_OP1_MOV;
+               alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
                 /* r600 we have them at channel 2 of the second dword */
                 alu.src[0].sel += (id * 2) + 1;
                 alu.src[0].chan = 1;
+               alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
+               tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+               alu.last = 1;
+               r = r600_bytecode_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+               return 0;
+       } else {
+               struct r600_bytecode_vtx vtx;
+               memset(&vtx, 0, sizeof(vtx));
+               vtx.op = FETCH_OP_GDS_MIN_UINT; /* aka GET_BUFFER_RESINFO */
+               vtx.buffer_id = id + R600_MAX_CONST_BUFFERS;
+               vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
+               vtx.src_gpr = 0;
+               vtx.mega_fetch_count = 16; /* no idea here really... */
+               vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
+               vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7;          /* SEL_X */
+               vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 4 : 7;          /* SEL_Y */
+               vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 4 : 7;          /* SEL_Z */
+               vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 4 : 7;          /* SEL_W */
+               vtx.data_format = FMT_32_32_32_32;
+               vtx.buffer_index_mode = sampler_index_mode;
+
+               if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx)))
+                       return r;
+               return 0;
         }
-       alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
-       tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
-       alu.last = 1;
-       r = r600_bytecode_add_alu(ctx->bc, &alu);
-       if (r)
-               return r;
-       return 0;
  }
  
+
  static int tgsi_tex(struct r600_shader_ctx *ctx)
  {
         struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -6970,7 +7045,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
  
         if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
                 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
-                       ctx->shader->uses_tex_buffers = true;
+                       if (ctx->bc->chip_class < EVERGREEN)
+                               ctx->shader->uses_tex_buffers = true;
                         return r600_do_buffer_txq(ctx, 1, 0);
                 }
                 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
@@ -7560,9 +7636,9 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
  
                 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
                 if (ctx->bc->chip_class >= EVERGREEN) {
-                       /* channel 1 or 3 of each word */
-                       alu.src[0].sel += (id / 2);
-                       alu.src[0].chan = ((id % 2) * 2) + 1;
+                       /* with eg each dword is number of cubes */
+                       alu.src[0].sel += id / 4;
+                       alu.src[0].chan = id % 4;
                 } else {
                         /* r600 we have them at channel 2 of the second dword */
                         alu.src[0].sel += (id * 2) + 1;
@@ -7829,7 +7905,7 @@ static int tgsi_set_gds_temp(struct r600_shader_ctx *ctx,
                              int *uav_id_p, int *uav_index_mode_p)
  {
         struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-       int uav_id, uav_index_mode;
+       int uav_id, uav_index_mode = 0;
         int r;
         bool is_cm = (ctx->bc->chip_class == CAYMAN);
  
@@ -8703,7 +8779,8 @@ static int tgsi_resq(struct r600_shader_ctx *ctx)
  
         if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
             (inst->Src[0].Register.File == TGSI_FILE_IMAGE && inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) {
-               ctx->shader->uses_tex_buffers = true;
+               if (ctx->bc->chip_class < EVERGREEN)
+                       ctx->shader->uses_tex_buffers = true;
                 return r600_do_buffer_txq(ctx, 0, ctx->shader->image_size_const_offset);
         }
  
@@ -8727,9 +8804,9 @@ static int tgsi_resq(struct r600_shader_ctx *ctx)
                 alu.op = ALU_OP1_MOV;
  
                 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
-               /* channel 1 or 3 of each word */
-               alu.src[0].sel += (id / 2);
-               alu.src[0].chan = ((id % 2) * 2) + 1;
+               /* with eg each dword is either number of cubes */
+               alu.src[0].sel += id / 4;
+               alu.src[0].chan = id % 4;
                 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
                 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
                 alu.last = 1;
@@ -10113,6 +10190,29 @@ static int tgsi_bfe(struct r600_shader_ctx *ctx)
         return 0;
  }
  
+static int tgsi_clock(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bytecode_alu alu;
+       int r;
+
+       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+       alu.op = ALU_OP1_MOV;
+       tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
+       alu.src[0].sel = EG_V_SQ_ALU_SRC_TIME_LO;
+       r = r600_bytecode_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+       alu.op = ALU_OP1_MOV;
+       tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
+       alu.src[0].sel = EG_V_SQ_ALU_SRC_TIME_HI;
+       r = r600_bytecode_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+       return 0;
+}
+
  static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
         [TGSI_OPCODE_ARL]       = { ALU_OP0_NOP, tgsi_r600_arl},
         [TGSI_OPCODE_MOV]       = { ALU_OP1_MOV, tgsi_op2},
@@ -10149,7 +10249,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
         [TGSI_OPCODE_POW]       = { ALU_OP0_NOP, tgsi_pow},
         [31]    = { ALU_OP0_NOP, tgsi_unsupported},
         [32]                    = { ALU_OP0_NOP, tgsi_unsupported},
-       [33]                    = { ALU_OP0_NOP, tgsi_unsupported},
+       [TGSI_OPCODE_CLOCK]     = { ALU_OP0_NOP, tgsi_unsupported},
         [34]                    = { ALU_OP0_NOP, tgsi_unsupported},
         [35]                    = { ALU_OP0_NOP, tgsi_unsupported},
         [TGSI_OPCODE_COS]       = { ALU_OP1_COS, tgsi_trig},
@@ -10347,7 +10447,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
         [TGSI_OPCODE_POW]       = { ALU_OP0_NOP, tgsi_pow},
         [31]    = { ALU_OP0_NOP, tgsi_unsupported},
         [32]                    = { ALU_OP0_NOP, tgsi_unsupported},
-       [33]                    = { ALU_OP0_NOP, tgsi_unsupported},
+       [TGSI_OPCODE_CLOCK]     = { ALU_OP0_NOP, tgsi_clock},
         [34]                    = { ALU_OP0_NOP, tgsi_unsupported},
         [35]                    = { ALU_OP0_NOP, tgsi_unsupported},
         [TGSI_OPCODE_COS]       = { ALU_OP1_COS, tgsi_trig},
@@ -10569,7 +10669,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
         [TGSI_OPCODE_POW]       = { ALU_OP0_NOP, cayman_pow},
         [31]    = { ALU_OP0_NOP, tgsi_unsupported},
         [32]                    = { ALU_OP0_NOP, tgsi_unsupported},
-       [33]                    = { ALU_OP0_NOP, tgsi_unsupported},
+       [TGSI_OPCODE_CLOCK]     = { ALU_OP0_NOP, tgsi_clock},
         [34]                    = { ALU_OP0_NOP, tgsi_unsupported},
         [35]                    = { ALU_OP0_NOP, tgsi_unsupported},
         [TGSI_OPCODE_COS]       = { ALU_OP1_COS, cayman_trig},