#include "brw_vec4.h"
#include "brw_cfg.h"
+#include "brw_eu.h"
#include "brw_program.h"
-#include "glsl/ir_uniform.h"
-#include "program/sampler.h"
namespace brw {
case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
+ case GLSL_TYPE_FUNCTION:
unreachable("not reached");
}
vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
src_reg src0, src_reg src1)
{
- vec4_instruction *inst;
-
- if (devinfo->gen >= 6) {
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->conditional_mod = conditionalmod;
- } else {
- emit(CMP(dst, src0, src1, conditionalmod));
-
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
-
+ vec4_instruction *inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+ inst->conditional_mod = conditionalmod;
return inst;
}
src_reg
vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
- src_reg coordinate, src_reg sampler)
+ src_reg coordinate, src_reg surface)
{
vec4_instruction *inst =
new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
dst_reg(this, glsl_type::uvec4_type));
inst->base_mrf = 2;
- inst->src[1] = sampler;
+ inst->src[1] = surface;
+ inst->src[2] = surface;
int param_base;
src_reg offset_value,
src_reg mcs,
bool is_cube_array,
+ uint32_t surface,
+ src_reg surface_reg,
uint32_t sampler,
src_reg sampler_reg)
{
inst->dst.writemask = WRITEMASK_XYZW;
inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
- inst->src[1] = sampler_reg;
+ inst->src[1] = surface_reg;
+ inst->src[2] = sampler_reg;
/* MRF for the first parameter */
int param_base = inst->base_mrf + inst->header_size;
/* fixup num layers (z) for cube arrays: hardware returns faces * layers;
* spec requires layers.
*/
- if (op == ir_txs && is_cube_array) {
- emit_math(SHADER_OPCODE_INT_QUOTIENT,
- writemask(inst->dst, WRITEMASK_Z),
- src_reg(inst->dst), brw_imm_d(6));
+ if (op == ir_txs) {
+ if (is_cube_array) {
+ emit_math(SHADER_OPCODE_INT_QUOTIENT,
+ writemask(inst->dst, WRITEMASK_Z),
+ src_reg(inst->dst), brw_imm_d(6));
+ } else if (devinfo->gen < 7) {
+ /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
+ emit_minmax(BRW_CONDITIONAL_GE, writemask(inst->dst, WRITEMASK_Z),
+ src_reg(inst->dst), brw_imm_d(1));
+ }
}
if (devinfo->gen == 6 && op == ir_tg4) {
- emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst);
+ emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], inst->dst);
}
if (op == ir_query_levels) {
unreachable("not reached");
}
-void
-vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
- dst_reg dst, src_reg surf_offset,
- src_reg src0, src_reg src1)
-{
- unsigned mlen = 1 + (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
- src_reg src_payload(this, glsl_type::uint_type, mlen);
- dst_reg payload(src_payload);
- payload.writemask = WRITEMASK_X;
-
- /* Set the atomic operation offset. */
- emit(MOV(offset(payload, 0), surf_offset));
- unsigned i = 1;
-
- /* Set the atomic operation arguments. */
- if (src0.file != BAD_FILE) {
- emit(MOV(offset(payload, i), src0));
- i++;
- }
-
- if (src1.file != BAD_FILE) {
- emit(MOV(offset(payload, i), src1));
- i++;
- }
-
- /* Emit the instruction. Note that this maps to the normal SIMD8
- * untyped atomic message on Ivy Bridge, but that's OK because
- * unused channels will be masked out.
- */
- vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
- src_payload,
- brw_imm_ud(surf_index), brw_imm_ud(atomic_op));
- inst->mlen = mlen;
-}
-
-void
-vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
- src_reg surf_offset)
-{
- dst_reg offset(this, glsl_type::uint_type);
- offset.writemask = WRITEMASK_X;
-
- /* Set the surface read offset. */
- emit(MOV(offset, surf_offset));
-
- /* Emit the instruction. Note that this maps to the normal SIMD8
- * untyped surface read message, but that's OK because unused
- * channels will be masked out.
- */
- vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
- src_reg(offset),
- brw_imm_ud(surf_index), brw_imm_d(1));
- inst->mlen = 1;
-}
-
void
vec4_visitor::emit_ndc_computation()
{
}
}
-src_reg
-vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
- src_reg *reladdr, int reg_offset)
-{
- if (reladdr) {
- src_reg index = src_reg(this, glsl_type::int_type);
-
- emit_before(block, inst, ADD(dst_reg(index), *reladdr,
- brw_imm_d(reg_offset)));
-
- /* Pre-gen6, the message header uses byte offsets instead of vec4
- * (16-byte) offset units.
- */
- if (devinfo->gen < 6) {
- emit_before(block, inst, MUL(dst_reg(index), index, brw_imm_d(16)));
- }
-
- return index;
- } else if (devinfo->gen >= 8) {
- /* Store the offset in a GRF so we can send-from-GRF. */
- src_reg offset = src_reg(this, glsl_type::int_type);
- emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset)));
- return offset;
- } else {
- int message_header_scale = devinfo->gen < 6 ? 16 : 1;
- return brw_imm_d(reg_offset * message_header_scale);
- }
-}
-
/**
* Emits an instruction before @inst to load the value named by @orig_src
* from scratch space at @base_offset to @temp.
void
vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
dst_reg temp, src_reg orig_src,
- int base_offset)
+ int base_offset, src_reg indirect)
{
int reg_offset = base_offset + orig_src.reg_offset;
const unsigned index = prog_data->base.binding_table.pull_constants_start;
- src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
- reg_offset);
+
+ src_reg offset;
+ if (indirect.file != BAD_FILE) {
+ offset = src_reg(this, glsl_type::int_type);
+
+ emit_before(block, inst, ADD(dst_reg(offset), indirect,
+ brw_imm_d(reg_offset * 16)));
+ } else if (devinfo->gen >= 8) {
+ /* Store the offset in a GRF so we can send-from-GRF. */
+ offset = src_reg(this, glsl_type::int_type);
+ emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16)));
+ } else {
+ offset = brw_imm_d(reg_offset * 16);
+ }
emit_pull_constant_load_reg(temp,
brw_imm_ud(index),
{
int pull_constant_loc[this->uniforms];
memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
- bool nested_reladdr;
- /* Walk through and find array access of uniforms. Put a copy of that
- * uniform in the pull constant buffer.
- *
- * Note that we don't move constant-indexed accesses to arrays. No
- * testing has been done of the performance impact of this choice.
+ /* First, walk through the instructions and determine which things need to
+ * be pulled. We mark something as needing to be pulled by setting
+ * pull_constant_loc to 0.
*/
- do {
- nested_reladdr = false;
-
- foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
- for (int i = 0 ; i < 3; i++) {
- if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
- continue;
+ foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+ /* We only care about MOV_INDIRECT of a uniform */
+ if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
+ inst->src[0].file != UNIFORM)
+ continue;
- int uniform = inst->src[i].nr;
+ int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset;
- if (inst->src[i].reladdr->reladdr)
- nested_reladdr = true; /* will need another pass */
+ for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++)
+ pull_constant_loc[uniform_nr + j] = 0;
+ }
- /* If this array isn't already present in the pull constant buffer,
- * add it.
- */
- if (pull_constant_loc[uniform] == -1) {
- const gl_constant_value **values =
- &stage_prog_data->param[uniform * 4];
+ /* Next, we walk the list of uniforms and assign real pull constant
+ * locations and set their corresponding entries in pull_param.
+ */
+ for (int j = 0; j < this->uniforms; j++) {
+ if (pull_constant_loc[j] < 0)
+ continue;
- pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4;
+ pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4;
- assert(uniform < uniform_array_size);
- for (int j = 0; j < uniform_size[uniform] * 4; j++) {
- stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
- = values[j];
- }
- }
+ for (int i = 0; i < 4; i++) {
+ stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
+ = stage_prog_data->param[j * 4 + i];
+ }
+ }
- /* Set up the annotation tracking for new generated instructions. */
- base_ir = inst->ir;
- current_annotation = inst->annotation;
+ /* Finally, we can walk through the instructions and lower MOV_INDIRECT
+ * instructions to actual uniform pulls.
+ */
+ foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
+ /* We only care about MOV_INDIRECT of a uniform */
+ if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
+ inst->src[0].file != UNIFORM)
+ continue;
- dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+ int uniform_nr = inst->src[0].nr + inst->src[0].reg_offset;
- emit_pull_constant_load(block, inst, temp, inst->src[i],
- pull_constant_loc[uniform]);
+ assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP);
- inst->src[i].file = temp.file;
- inst->src[i].nr = temp.nr;
- inst->src[i].reg_offset = temp.reg_offset;
- inst->src[i].reladdr = NULL;
- }
- }
- } while (nested_reladdr);
+ emit_pull_constant_load(block, inst, inst->dst, inst->src[0],
+ pull_constant_loc[uniform_nr], inst->src[1]);
+ inst->remove(block);
+ }
/* Now there are no accesses of the UNIFORM file with a reladdr, so
* no need to track them as larger-than-vec4 objects. This will be
this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
this->uniforms = 0;
-
- /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
- * at least one. See setup_uniforms() in brw_vec4.cpp.
- */
- this->uniform_array_size = 1;
- if (prog_data) {
- this->uniform_array_size =
- MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1);
- }
-
- this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
}
vec4_visitor::~vec4_visitor()