this->src[0] = src0;
this->src[1] = src1;
this->src[2] = src2;
+ this->saturate = false;
+ this->force_writemask_all = false;
+ this->no_dd_clear = false;
+ this->no_dd_check = false;
+ this->conditional_mod = BRW_CONDITIONAL_NONE;
+ this->sampler = 0;
+ this->texture_offset = 0;
+ this->target = 0;
+ this->shadow_compare = false;
this->ir = v->base_ir;
+ this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
+ this->header_present = false;
+ this->mlen = 0;
+ this->base_mrf = 0;
+ this->offset = 0;
this->annotation = v->current_annotation;
}
return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
}
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst)
+{
+ return emit(new(mem_ctx) vec4_instruction(this, opcode, dst));
+}
+
vec4_instruction *
vec4_visitor::emit(enum opcode opcode)
{
ALU1(FBL)
ALU1(CBIT)
ALU3(MAD)
+ALU2(ADDC)
+ALU2(SUBB)
/** Gen4 predicated IF. */
vec4_instruction *
return inst;
}
-/** Gen6+ IF with embedded comparison. */
+/** Gen6 IF with embedded comparison. */
vec4_instruction *
vec4_visitor::IF(src_reg src0, src_reg src1, uint32_t condition)
{
- assert(brw->gen >= 6);
+ assert(brw->gen == 6);
vec4_instruction *inst;
{
vec4_instruction *inst;
- inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_READ,
+ inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_GEN4_SCRATCH_READ,
dst, index);
inst->base_mrf = 14;
inst->mlen = 2;
{
vec4_instruction *inst;
- inst = new(mem_ctx) vec4_instruction(this, VS_OPCODE_SCRATCH_WRITE,
+ inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_GEN4_SCRATCH_WRITE,
dst, src, index);
inst->base_mrf = 13;
inst->mlen = 3;
* at link time.
*/
return 1;
+ case GLSL_TYPE_ATOMIC_UINT:
+ return 0;
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
if (variable_storage(ir))
return;
- switch (ir->mode) {
+ switch (ir->data.mode) {
case ir_var_shader_in:
- reg = new(mem_ctx) dst_reg(ATTR, ir->location);
+ reg = new(mem_ctx) dst_reg(ATTR, ir->data.location);
break;
case ir_var_shader_out:
reg = new(mem_ctx) dst_reg(this, ir->type);
for (int i = 0; i < type_size(ir->type); i++) {
- output_reg[ir->location + i] = *reg;
- output_reg[ir->location + i].reg_offset = i;
- output_reg[ir->location + i].type =
+ output_reg[ir->data.location + i] = *reg;
+ output_reg[ir->data.location + i].reg_offset = i;
+ output_reg[ir->data.location + i].type =
brw_type_for_base_type(ir->type->get_scalar_type());
- output_reg_annotation[ir->location + i] = ir->name;
+ output_reg_annotation[ir->data.location + i] = ir->name;
}
break;
/* Thanks to the lower_ubo_reference pass, we will see only
* ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
* variables, so no need for them to be in variable_ht.
+ *
+ * Atomic counters take no uniform storage, no need to do
+ * anything here.
*/
- if (ir->is_in_uniform_block())
+ if (ir->is_in_uniform_block() || ir->type->contains_atomic())
return;
/* Track how big the whole uniform variable is, in case we need to put a
void
vec4_visitor::visit(ir_loop *ir)
{
- dst_reg counter;
-
/* We don't want debugging output to print the whole body of the
* loop as the annotation.
*/
this->base_ir = NULL;
- if (ir->counter != NULL) {
- this->base_ir = ir->counter;
- ir->counter->accept(this);
- counter = *(variable_storage(ir->counter));
-
- if (ir->from != NULL) {
- this->base_ir = ir->from;
- ir->from->accept(this);
-
- emit(MOV(counter, this->result));
- }
- }
-
emit(BRW_OPCODE_DO);
- if (ir->to) {
- this->base_ir = ir->to;
- ir->to->accept(this);
-
- emit(CMP(dst_null_d(), src_reg(counter), this->result,
- brw_conditional_for_comparison(ir->cmp)));
-
- vec4_instruction *inst = emit(BRW_OPCODE_BREAK);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
-
visit_instructions(&ir->body_instructions);
-
- if (ir->increment) {
- this->base_ir = ir->increment;
- ir->increment->accept(this);
- emit(ADD(counter, src_reg(counter), this->result));
- }
-
emit(BRW_OPCODE_WHILE);
}
const ir_function_signature *sig;
exec_list empty;
- sig = ir->matching_signature(&empty);
+ sig = ir->matching_signature(NULL, &empty);
assert(sig);
break;
case ir_unop_sign:
- emit(MOV(result_dst, src_reg(0.0f)));
+ if (ir->type->is_float()) {
+ /* AND(val, 0x80000000) gives the sign bit.
+ *
+ * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
+ * zero.
+ */
+ emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
- emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
- inst = emit(MOV(result_dst, src_reg(1.0f)));
- inst->predicate = BRW_PREDICATE_NORMAL;
+ op[0].type = BRW_REGISTER_TYPE_UD;
+ result_dst.type = BRW_REGISTER_TYPE_UD;
+ emit(AND(result_dst, op[0], src_reg(0x80000000u)));
- emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
- inst = emit(MOV(result_dst, src_reg(-1.0f)));
- inst->predicate = BRW_PREDICATE_NORMAL;
+ inst = emit(OR(result_dst, src_reg(result_dst), src_reg(0x3f800000u)));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ this->result.type = BRW_REGISTER_TYPE_F;
+ } else {
+ /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
+ * -> non-negative val generates 0x00000000.
+ * Predicated OR sets 1 if val is positive.
+ */
+ emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
+
+ emit(ASR(result_dst, op[0], src_reg(31)));
+ inst = emit(OR(result_dst, src_reg(result_dst), src_reg(1)));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ }
break;
case ir_unop_rcp:
break;
case ir_binop_mul:
- if (ir->type->is_integer()) {
+ if (brw->gen < 8 && ir->type->is_integer()) {
/* For integer multiplication, the MUL uses the low 16 bits of one of
* the operands (src0 through SNB, src1 on IVB and later). The MACH
* accumulates in the contribution of the upper 16 bits of that
else
emit(MUL(result_dst, op[0], op[1]));
} else {
- struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
+ struct brw_reg acc = retype(brw_acc_reg(), result_dst.type);
emit(MUL(acc, op[0], op[1]));
emit(MACH(dst_null_d(), op[0], op[1]));
emit(MUL(result_dst, op[0], op[1]));
}
break;
+ case ir_binop_imul_high: {
+ struct brw_reg acc = retype(brw_acc_reg(), result_dst.type);
+
+ emit(MUL(acc, op[0], op[1]));
+ emit(MACH(result_dst, op[0], op[1]));
+ break;
+ }
case ir_binop_div:
/* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
assert(ir->type->is_integer());
emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
break;
+ case ir_binop_carry: {
+ struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
+
+ emit(ADDC(dst_null_ud(), op[0], op[1]));
+ emit(MOV(result_dst, src_reg(acc)));
+ break;
+ }
+ case ir_binop_borrow: {
+ struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
+
+ emit(SUBB(dst_null_ud(), op[0], op[1]));
+ emit(MOV(result_dst, src_reg(acc)));
+ break;
+ }
case ir_binop_mod:
/* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
assert(ir->type->is_integer());
ir_constant *uniform_block = ir->operands[0]->as_constant();
ir_constant *const_offset_ir = ir->operands[1]->as_constant();
unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
- src_reg offset = op[1];
+ src_reg offset;
/* Now, load the vector from that offset. */
assert(ir->type->is_vector() || ir->type->is_scalar());
src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
packed_consts.type = result.type;
src_reg surf_index =
- src_reg(SURF_INDEX_VEC4_UBO(uniform_block->value.u[0]));
+ src_reg(prog_data->base.binding_table.ubo_start + uniform_block->value.u[0]);
if (const_offset_ir) {
- offset = src_reg(const_offset / 16);
+ if (brw->gen >= 8) {
+ /* Store the offset in a GRF so we can send-from-GRF. */
+ offset = src_reg(this, glsl_type::int_type);
+ emit(MOV(dst_reg(offset), src_reg(const_offset / 16)));
+ } else {
+ /* Immediates are fine on older generations since they'll be moved
+ * to a (potentially fake) MRF at the generator level.
+ */
+ offset = src_reg(const_offset / 16);
+ }
} else {
- emit(SHR(dst_reg(offset), offset, src_reg(4)));
+ offset = src_reg(this, glsl_type::uint_type);
+ emit(SHR(dst_reg(offset), op[1], src_reg(4)));
}
vec4_instruction *pull =
emit(LRP(result_dst, op[2], op[1], op[0]));
break;
+ case ir_triop_csel:
+ emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
+ inst = emit(BRW_OPCODE_SEL, result_dst, op[1], op[2]);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+
case ir_triop_bfi:
op[0] = fix_3src_operand(op[0]);
op[1] = fix_3src_operand(op[1]);
case ir_binop_pack_half_2x16_split:
assert(!"not reached: should not occur in vertex shader");
break;
+ case ir_binop_ldexp:
+ assert(!"not reached: should be handled by ldexp_to_arith()");
+ break;
}
}
this->result = src_reg(*reg);
/* System values get their swizzle from the dst_reg writemask */
- if (ir->var->mode == ir_var_system_value)
+ if (ir->var->data.mode == ir_var_system_value)
return;
if (type->is_scalar() || type->is_vector() || type->is_matrix())
emit_constant_values(&dst, ir);
}
+void
+vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
+{
+ ir_dereference *deref = static_cast<ir_dereference *>(
+ ir->actual_parameters.get_head());
+ ir_variable *location = deref->variable_referenced();
+ unsigned surf_index = (prog_data->base.binding_table.abo_start +
+ location->data.atomic.buffer_index);
+
+ /* Calculate the surface offset */
+ src_reg offset(this, glsl_type::uint_type);
+ ir_dereference_array *deref_array = deref->as_dereference_array();
+ if (deref_array) {
+ deref_array->array_index->accept(this);
+
+ src_reg tmp(this, glsl_type::uint_type);
+ emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE));
+ emit(ADD(dst_reg(offset), tmp, location->data.atomic.offset));
+ } else {
+ offset = location->data.atomic.offset;
+ }
+
+ /* Emit the appropriate machine instruction */
+ const char *callee = ir->callee->function_name();
+ dst_reg dst = get_assignment_lhs(ir->return_deref, this);
+
+ if (!strcmp("__intrinsic_atomic_read", callee)) {
+ emit_untyped_surface_read(surf_index, dst, offset);
+
+ } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
+ emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
+ src_reg(), src_reg());
+
+ } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
+ emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
+ src_reg(), src_reg());
+ }
+}
+
void
vec4_visitor::visit(ir_call *ir)
{
- assert(!"not reached");
+ const char *callee = ir->callee->function_name();
+
+ if (!strcmp("__intrinsic_atomic_read", callee) ||
+ !strcmp("__intrinsic_atomic_increment", callee) ||
+ !strcmp("__intrinsic_atomic_predecrement", callee)) {
+ visit_atomic_counter_intrinsic(ir);
+ } else {
+ assert(!"Unsupported intrinsic.");
+ }
+}
+
+src_reg
+vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, int sampler)
+{
+ vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MCS);
+ inst->base_mrf = 2;
+ inst->mlen = 1;
+ inst->sampler = sampler;
+ inst->dst = dst_reg(this, glsl_type::uvec4_type);
+ inst->dst.writemask = WRITEMASK_XYZW;
+
+ /* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
+ int param_base = inst->base_mrf;
+ int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
+ int zero_mask = 0xf & ~coord_mask;
+
+ emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
+ coordinate));
+
+ emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
+ src_reg(0)));
+
+ emit(inst);
+ return src_reg(inst->dst);
}
void
int sampler =
_mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
+ /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
+ * emitting anything other than setting up the constant result.
+ */
+ if (ir->op == ir_tg4) {
+ ir_constant *chan = ir->lod_info.component->as_constant();
+ int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
+ if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
+ dst_reg result(this, ir->type);
+ this->result = src_reg(result);
+ emit(MOV(result, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
+ return;
+ }
+ }
+
/* Should be lowered by do_lower_texture_projection */
assert(!ir->projector);
+ /* Should be lowered */
+ assert(!ir->offset || !ir->offset->type->is_array());
+
/* Generate code to compute all the subexpression trees. This has to be
* done before loading any values into MRFs for the sampler message since
* generating these values may involve SEND messages that need the MRFs.
shadow_comparitor = this->result;
}
+ bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant();
+ src_reg offset_value;
+ if (has_nonconstant_offset) {
+ ir->offset->accept(this);
+ offset_value = src_reg(this->result);
+ }
+
const glsl_type *lod_type = NULL, *sample_index_type = NULL;
- src_reg lod, dPdx, dPdy, sample_index;
+ src_reg lod, dPdx, dPdy, sample_index, mcs;
switch (ir->op) {
case ir_tex:
lod = src_reg(0.0f);
lod = this->result;
lod_type = ir->lod_info.lod->type;
break;
+ case ir_query_levels:
+ lod = src_reg(0);
+ lod_type = glsl_type::int_type;
+ break;
case ir_txf_ms:
ir->lod_info.sample_index->accept(this);
sample_index = this->result;
sample_index_type = ir->lod_info.sample_index->type;
+
+ if (brw->gen >= 7 && key->tex.compressed_multisample_layout_mask & (1<<sampler))
+ mcs = emit_mcs_fetch(ir, coordinate, sampler);
+ else
+ mcs = src_reg(0u);
break;
case ir_txd:
ir->lod_info.grad.dPdx->accept(this);
break;
case ir_txb:
case ir_lod:
+ case ir_tg4:
break;
}
case ir_txs:
inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
break;
+ case ir_tg4:
+ if (has_nonconstant_offset)
+ inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TG4_OFFSET);
+ else
+ inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TG4);
+ break;
+ case ir_query_levels:
+ inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXS);
+ break;
case ir_txb:
assert(!"TXB is not valid for vertex shaders.");
break;
case ir_lod:
assert(!"LOD is not valid for vertex shaders.");
break;
+ default:
+ assert(!"Unrecognized tex op");
}
bool use_texture_offset = ir->offset != NULL && ir->op != ir_txf;
/* Texel offsets go in the message header; Gen4 also requires headers. */
- inst->header_present = use_texture_offset || brw->gen < 5;
+ inst->header_present = use_texture_offset || brw->gen < 5 || ir->op == ir_tg4;
inst->base_mrf = 2;
inst->mlen = inst->header_present + 1; /* always at least one */
inst->sampler = sampler;
inst->shadow_compare = ir->shadow_comparitor != NULL;
if (use_texture_offset)
- inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
+ inst->texture_offset = brw_texture_offset(ctx, ir->offset->as_constant());
+
+ /* Stuff the channel select bits in the top of the texture offset */
+ if (ir->op == ir_tg4)
+ inst->texture_offset |= gather_channel(ir, sampler)<<16;
/* MRF for the first parameter */
int param_base = inst->base_mrf + inst->header_present;
- if (ir->op == ir_txs) {
+ if (ir->op == ir_txs || ir->op == ir_query_levels) {
int writemask = brw->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod));
} else {
- int i, coord_mask = 0, zero_mask = 0;
/* Load the coordinate */
/* FINISHME: gl_clamp_mask and saturate */
- for (i = 0; i < ir->coordinate->type->vector_elements; i++)
- coord_mask |= (1 << i);
- for (; i < 4; i++)
- zero_mask |= (1 << i);
+ int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
+ int zero_mask = 0xf & ~coord_mask;
- if (ir->offset && ir->op == ir_txf) {
- /* It appears that the ld instruction used for txf does its
- * address bounds check before adding in the offset. To work
- * around this, just add the integer offset to the integer
- * texel coordinate, and don't put the offset in the header.
- */
- ir_constant *offset = ir->offset->as_constant();
- assert(offset);
-
- for (int j = 0; j < ir->coordinate->type->vector_elements; j++) {
- src_reg src = coordinate;
- src.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(src.swizzle, j),
- BRW_GET_SWZ(src.swizzle, j),
- BRW_GET_SWZ(src.swizzle, j),
- BRW_GET_SWZ(src.swizzle, j));
- emit(ADD(dst_reg(MRF, param_base, ir->coordinate->type, 1 << j),
- src, offset->value.i[j]));
- }
- } else {
- emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
- coordinate));
+ emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
+ coordinate));
+
+ if (zero_mask != 0) {
+ emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
+ src_reg(0)));
}
- emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
- src_reg(0)));
/* Load the shadow comparitor */
- if (ir->shadow_comparitor && ir->op != ir_txd) {
+ if (ir->shadow_comparitor && ir->op != ir_txd && (ir->op != ir_tg4 || !has_nonconstant_offset)) {
emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
WRITEMASK_X),
shadow_comparitor));
} else if (ir->op == ir_txf_ms) {
emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
sample_index));
+ if (brw->gen >= 7)
+ /* MCS data is in the first channel of `mcs`, but we need to get it into
+ * the .y channel of the second vec4 of params, so replicate .x across
+ * the whole vec4 and then mask off everything except .y
+ */
+ mcs.swizzle = BRW_SWIZZLE_XXXX;
+ emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
+ mcs));
inst->mlen++;
-
- /* on Gen7, there is an additional MCS parameter here after SI,
- * but we don't bother to emit it since it's always zero. If
- * we start supporting texturing from CMS surfaces, this will have
- * to change
- */
} else if (ir->op == ir_txd) {
const glsl_type *type = lod_type;
emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_XYZ), dPdy));
inst->mlen += 2;
}
+ } else if (ir->op == ir_tg4 && has_nonconstant_offset) {
+ if (ir->shadow_comparitor) {
+ emit(MOV(dst_reg(MRF, param_base, ir->shadow_comparitor->type, WRITEMASK_W),
+ shadow_comparitor));
+ }
+
+ emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
+ offset_value));
+ inst->mlen++;
}
}
swizzle_result(ir, src_reg(inst->dst), sampler);
}
+/**
+ * Set up the gather channel based on the swizzle, for gather4.
+ */
+uint32_t
+vec4_visitor::gather_channel(ir_texture *ir, int sampler)
+{
+ ir_constant *chan = ir->lod_info.component->as_constant();
+ int swiz = GET_SWZ(key->tex.swizzles[sampler], chan->value.i[0]);
+ switch (swiz) {
+ case SWIZZLE_X: return 0;
+ case SWIZZLE_Y:
+ /* gather4 sampler is broken for green channel on RG32F --
+ * we must ask for blue instead.
+ */
+ if (key->tex.gather_channel_quirk_mask & (1<<sampler))
+ return 2;
+ return 1;
+ case SWIZZLE_Z: return 2;
+ case SWIZZLE_W: return 3;
+ default:
+ assert(!"Not reached"); /* zero, one swizzles handled already */
+ return 0;
+ }
+}
+
void
vec4_visitor::swizzle_result(ir_texture *ir, src_reg orig_val, int sampler)
{
this->result = src_reg(this, ir->type);
dst_reg swizzled_result(this->result);
+ if (ir->op == ir_query_levels) {
+ /* # levels is in .w */
+ orig_val.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
+ emit(MOV(swizzled_result, orig_val));
+ return;
+ }
+
if (ir->op == ir_txs || ir->type == glsl_type::float_type
- || s == SWIZZLE_NOOP) {
+ || s == SWIZZLE_NOOP || ir->op == ir_tg4) {
emit(MOV(swizzled_result, orig_val));
return;
}
+
int zero_mask = 0, one_mask = 0, copy_mask = 0;
int swizzle[4] = {0};
assert(!"not reached");
}
+void
+vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
+ dst_reg dst, src_reg offset,
+ src_reg src0, src_reg src1)
+{
+ unsigned mlen = 0;
+
+ /* Set the atomic operation offset. */
+ emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
+ mlen++;
+
+ /* Set the atomic operation arguments. */
+ if (src0.file != BAD_FILE) {
+ emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src0));
+ mlen++;
+ }
+
+ if (src1.file != BAD_FILE) {
+ emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), src1));
+ mlen++;
+ }
+
+ /* Emit the instruction. Note that this maps to the normal SIMD8
+ * untyped atomic message on Ivy Bridge, but that's OK because
+ * unused channels will be masked out.
+ */
+ vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
+ src_reg(atomic_op), src_reg(surf_index));
+ inst->base_mrf = 0;
+ inst->mlen = mlen;
+}
+
+void
+vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
+ src_reg offset)
+{
+ /* Set the surface read offset. */
+ emit(MOV(brw_writemask(brw_uvec_mrf(8, 0, 0), WRITEMASK_X), offset));
+
+ /* Emit the instruction. Note that this maps to the normal SIMD8
+ * untyped surface read message, but that's OK because unused
+ * channels will be masked out.
+ */
+ vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ,
+ dst, src_reg(surf_index));
+ inst->base_mrf = 0;
+ inst->mlen = 1;
+}
+
void
vec4_visitor::emit_ndc_computation()
{
}
/* Lower legacy ff and ClipVertex clipping to clip distances */
- if (key->userclip_active && !key->uses_clip_distance) {
+ if (key->userclip_active && !prog->UsesClipDistanceOut) {
current_annotation = "user clip distances";
output_reg[VARYING_SLOT_CLIP_DIST0] = dst_reg(this, glsl_type::vec4_type);
emit_clip_distances(output_reg[VARYING_SLOT_CLIP_DIST1], 4);
}
- /* Set up the VUE data for the first URB write */
- int slot;
- for (slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
- emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
-
- /* If this was max_usable_mrf, we can't fit anything more into this URB
- * WRITE.
+ /* We may need to split this up into several URB writes, so do them in a
+ * loop.
+ */
+ int slot = 0;
+ bool complete = false;
+ do {
+ /* URB offset is in URB row increments, and each of our MRFs is half of
+ * one of those, since we're doing interleaved writes.
*/
- if (mrf > max_usable_mrf) {
- slot++;
- break;
- }
- }
-
- bool complete = slot >= prog_data->vue_map.num_slots;
- current_annotation = "URB write";
- vec4_instruction *inst = emit_urb_write_opcode(complete);
- inst->base_mrf = base_mrf;
- inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+ int offset = slot / 2;
- /* Optional second URB write */
- if (!complete) {
mrf = base_mrf + 1;
-
for (; slot < prog_data->vue_map.num_slots; ++slot) {
- assert(mrf < max_usable_mrf);
-
emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
+
+ /* If this was max_usable_mrf, we can't fit anything more into this
+ * URB WRITE.
+ */
+ if (mrf > max_usable_mrf) {
+ slot++;
+ break;
+ }
}
+ complete = slot >= prog_data->vue_map.num_slots;
current_annotation = "URB write";
- inst = emit_urb_write_opcode(true /* complete */);
+ vec4_instruction *inst = emit_urb_write_opcode(complete);
inst->base_mrf = base_mrf;
inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
- /* URB destination offset. In the previous write, we got MRFs
- * 2-13 minus the one header MRF, so 12 regs. URB offset is in
- * URB row increments, and each of our MRFs is half of one of
- * those, since we're doing interleaved writes.
- */
- inst->offset = (max_usable_mrf - base_mrf) / 2;
- }
+ inst->offset += offset;
+ } while(!complete);
}
}
return index;
+ } else if (brw->gen >= 8) {
+ /* Store the offset in a GRF so we can send-from-GRF. */
+ src_reg offset = src_reg(this, glsl_type::int_type);
+ emit_before(inst, MOV(dst_reg(offset), src_reg(reg_offset)));
+ return offset;
} else {
int message_header_scale = brw->gen < 6 ? 16 : 1;
return src_reg(reg_offset * message_header_scale);
int base_offset)
{
int reg_offset = base_offset + orig_src.reg_offset;
- src_reg index = src_reg((unsigned)SURF_INDEX_VEC4_CONST_BUFFER);
+ src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start);
src_reg offset = get_pull_constant_offset(inst, orig_src.reladdr, reg_offset);
vec4_instruction *load;
struct gl_shader_program *shader_prog,
struct brw_shader *shader,
void *mem_ctx,
- bool debug_flag)
- : debug_flag(debug_flag)
+ bool debug_flag,
+ bool no_spills)
+ : sanity_param_count(0),
+ fail_msg(NULL),
+ first_non_payload_grf(0),
+ need_all_constants_in_pull_buffer(false),
+ debug_flag(debug_flag),
+ no_spills(no_spills)
{
this->brw = brw;
this->ctx = &brw->ctx;
this->prog = prog;
this->key = key;
this->prog_data = prog_data;
+ this->stage_prog_data = &prog_data->base;
this->variable_ht = hash_table_ctor(0,
hash_table_pointer_hash,