/* emit the arrays used for inputs and outputs - load/store intrinsics will
* be converted to reads/writes of these arrays
*/
- nir_setup_inputs();
nir_setup_outputs();
nir_setup_uniforms();
nir_emit_system_values();
}
}
-void
-fs_visitor::nir_setup_inputs()
-{
- if (stage != MESA_SHADER_FRAGMENT)
- return;
-
- nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs);
-
- nir_foreach_variable(var, &nir->inputs) {
- fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
-
- fs_reg reg;
- if (var->data.location == VARYING_SLOT_POS) {
- emit_fragcoord_interpolation(input);
- } else if (var->data.location == VARYING_SLOT_LAYER) {
- struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_LAYER, 1), 3);
- reg.type = BRW_REGISTER_TYPE_D;
- bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg);
- } else if (var->data.location == VARYING_SLOT_VIEWPORT) {
- struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_VIEWPORT, 2), 3);
- reg.type = BRW_REGISTER_TYPE_D;
- bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg);
- } else {
- int location = var->data.location;
- emit_general_interpolation(&input, var->name, var->type,
- (glsl_interp_mode) var->data.interpolation,
- &location, var->data.centroid,
- var->data.sample);
- }
- }
-}
-
-void
-fs_visitor::nir_setup_single_output_varying(fs_reg *reg,
- const glsl_type *type,
- unsigned *location)
-{
- if (type->is_array() || type->is_matrix()) {
- const struct glsl_type *elem_type = glsl_get_array_element(type);
- const unsigned length = glsl_get_length(type);
-
- for (unsigned i = 0; i < length; i++) {
- nir_setup_single_output_varying(reg, elem_type, location);
- }
- } else if (type->is_record()) {
- for (unsigned i = 0; i < type->length; i++) {
- const struct glsl_type *field_type = type->fields.structure[i].type;
- nir_setup_single_output_varying(reg, field_type, location);
- }
- } else {
- assert(type->is_scalar() || type->is_vector());
- unsigned num_elements = type->vector_elements;
- if (type->is_double())
- num_elements *= 2;
- for (unsigned count = 0; count < num_elements; count += 4) {
- this->outputs[*location] = *reg;
- this->output_components[*location] = MIN2(4, num_elements - count);
- *reg = offset(*reg, bld, this->output_components[*location]);
- (*location)++;
- }
- }
-}
-
void
fs_visitor::nir_setup_outputs()
{
- if (stage == MESA_SHADER_TESS_CTRL)
+ if (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT)
return;
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-
- nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
-
nir_foreach_variable(var, &nir->outputs) {
- fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
-
- switch (stage) {
- case MESA_SHADER_VERTEX:
- case MESA_SHADER_TESS_EVAL:
- case MESA_SHADER_GEOMETRY: {
- unsigned location = var->data.location;
- nir_setup_single_output_varying(®, var->type, &location);
- break;
- }
- case MESA_SHADER_FRAGMENT:
- if (key->force_dual_color_blend &&
- var->data.location == FRAG_RESULT_DATA1) {
- this->dual_src_output = reg;
- this->do_dual_src = true;
- } else if (var->data.index > 0) {
- assert(var->data.location == FRAG_RESULT_DATA0);
- assert(var->data.index == 1);
- this->dual_src_output = reg;
- this->do_dual_src = true;
- } else if (var->data.location == FRAG_RESULT_COLOR) {
- /* Writing gl_FragColor outputs to all color regions. */
- for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
- this->outputs[i] = reg;
- this->output_components[i] = 4;
- }
- } else if (var->data.location == FRAG_RESULT_DEPTH) {
- this->frag_depth = reg;
- } else if (var->data.location == FRAG_RESULT_STENCIL) {
- this->frag_stencil = reg;
- } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
- this->sample_mask = reg;
- } else {
- int vector_elements = var->type->without_array()->vector_elements;
-
- /* gl_FragData or a user-defined FS output */
- assert(var->data.location >= FRAG_RESULT_DATA0 &&
- var->data.location < FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS);
-
- /* General color output. */
- for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
- int output = var->data.location - FRAG_RESULT_DATA0 + i;
- this->outputs[output] = offset(reg, bld, vector_elements * i);
- this->output_components[output] = vector_elements;
- }
- }
- break;
- default:
- unreachable("unhandled shader stage");
+ const unsigned vec4s =
+ var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
+ : type_size_vec4(var->type);
+ fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * vec4s);
+ for (unsigned i = 0; i < vec4s; i++) {
+ if (outputs[var->data.driver_location + i].file == BAD_FILE)
+ outputs[var->data.driver_location + i] = offset(reg, bld, 4 * i);
}
}
}
break;
case nir_instr_type_ssa_undef:
- nir_emit_undef(abld, nir_instr_as_ssa_undef(instr));
+ /* We create a new VGRF for undefs on every use (by handling
+ * them in get_nir_src()), rather than for each definition.
+ * This helps register coalescing eliminate MOVs from undef.
+ */
break;
case nir_instr_type_jump:
g0.negate = true;
}
- tmp.type = BRW_REGISTER_TYPE_W;
- tmp.subreg_offset = 2;
- tmp.stride = 2;
-
- bld.OR(tmp, g0, brw_imm_uw(0x3f80));
-
- tmp.type = BRW_REGISTER_TYPE_D;
- tmp.subreg_offset = 0;
- tmp.stride = 1;
+ bld.OR(subscript(tmp, BRW_REGISTER_TYPE_W, 1),
+ g0, brw_imm_uw(0x3f80));
} else {
/* Bit 31 of g1.6 is 0 if the polygon is front facing. */
fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));
case nir_op_f2d:
case nir_op_i2d:
case nir_op_u2d:
+ case nir_op_f2i64:
+ case nir_op_f2u64:
+ case nir_op_i2i64:
+ case nir_op_i2u64:
+ case nir_op_u2i64:
+ case nir_op_u2u64:
+ case nir_op_b2i64:
/* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions:
*
* "When source or destination is 64b (...), regioning in Align1
* data elements aligned to 64-bit. This restriction does not apply to
* BDW and later.
*/
- if (devinfo->is_cherryview || devinfo->is_broxton) {
+ if (nir_dest_bit_size(instr->dest.dest) == 64 &&
+ nir_src_bit_size(instr->src[0].src) == 32 &&
+ (devinfo->is_cherryview || devinfo->is_broxton)) {
fs_reg tmp = bld.vgrf(result.type, 1);
tmp = subscript(tmp, op[0].type, 0);
inst = bld.MOV(tmp, op[0]);
case nir_op_d2f:
case nir_op_d2i:
case nir_op_d2u:
- inst = bld.MOV(result, op[0]);
- inst->saturate = instr->dest.saturate;
+ if (instr->op == nir_op_b2i64) {
+ bld.MOV(result, negate(op[0]));
+ } else {
+ inst = bld.MOV(result, op[0]);
+ inst->saturate = instr->dest.saturate;
+ }
break;
case nir_op_f2i:
break;
}
+ case nir_op_pack_int_2x32_split:
+ bld.emit(FS_OPCODE_PACK, result, op[0], op[1]);
+ break;
+
+ case nir_op_unpack_int_2x32_split_x:
+ case nir_op_unpack_int_2x32_split_y: {
+ if (instr->op == nir_op_unpack_int_2x32_split_x)
+ bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 0));
+ else
+ bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 1));
+ break;
+ }
+
case nir_op_fpow:
inst = bld.emit(SHADER_OPCODE_POW, result, op[0], op[1]);
inst->saturate = instr->dest.saturate;
case nir_op_find_lsb:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
- bld.FBL(result, op[0]);
+
+ if (devinfo->gen < 7) {
+ fs_reg temp = vgrf(glsl_type::int_type);
+
+ /* (x & -x) generates a value that consists of only the LSB of x.
+ * For all powers of 2, findMSB(y) == findLSB(y).
+ */
+ fs_reg src = retype(op[0], BRW_REGISTER_TYPE_D);
+ fs_reg negated_src = src;
+
+ /* One must be negated, and the other must be non-negated. It
+ * doesn't matter which is which.
+ */
+ negated_src.negate = true;
+ src.negate = false;
+
+ bld.AND(temp, src, negated_src);
+ emit_find_msb_using_lzd(bld, result, temp, false);
+ } else {
+ bld.FBL(result, op[0]);
+ }
break;
case nir_op_ubitfield_extract:
nir_ssa_values[instr->def.index] = reg;
}
-void
-fs_visitor::nir_emit_undef(const fs_builder &bld, nir_ssa_undef_instr *instr)
-{
- const brw_reg_type reg_type =
- instr->def.bit_size == 32 ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_DF;
- nir_ssa_values[instr->def.index] =
- bld.vgrf(reg_type, instr->def.num_components);
-}
-
fs_reg
fs_visitor::get_nir_src(const nir_src &src)
{
fs_reg reg;
if (src.is_ssa) {
- reg = nir_ssa_values[src.ssa->index];
+ if (src.ssa->parent_instr->type == nir_instr_type_ssa_undef) {
+ const brw_reg_type reg_type = src.ssa->bit_size == 32 ?
+ BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_DF;
+ reg = bld.vgrf(reg_type, src.ssa->num_components);
+ } else {
+ reg = nir_ssa_values[src.ssa->index];
+ }
} else {
/* We don't handle indirects on locals */
assert(src.reg.indirect == NULL);
glsl_interp_mode interpolation)
{
struct brw_wm_prog_data *wm_prog_data =
- (struct brw_wm_prog_data *) bld.shader->stage_prog_data;
+ brw_wm_prog_data(bld.shader->stage_prog_data);
fs_inst *inst;
fs_reg payload;
int mlen;
inst = bld.emit(opcode, dst, payload, desc);
inst->mlen = mlen;
/* 2 floats per slot returned */
- inst->regs_written = 2 * bld.dispatch_width() / 8;
+ inst->size_written = 2 * dst.component_size(inst->exec_size);
inst->pi_noperspective = interpolation == INTERP_MODE_NOPERSPECTIVE;
wm_prog_data->pulls_bary = true;
{
assert(stage == MESA_SHADER_GEOMETRY);
- struct brw_gs_prog_data *gs_prog_data =
- (struct brw_gs_prog_data *) prog_data;
+ struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
if (gs_compile->control_data_header_size_bits == 0)
return;
assert(stage == MESA_SHADER_GEOMETRY);
assert(gs_compile->control_data_bits_per_vertex != 0);
- struct brw_gs_prog_data *gs_prog_data =
- (struct brw_gs_prog_data *) prog_data;
+ struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
const fs_builder abld = bld.annotate("emit control data bits");
const fs_builder fwa_bld = bld.exec_all();
fs_reg prev_count = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
abld.ADD(prev_count, vertex_count, brw_imm_ud(0xffffffffu));
unsigned log2_bits_per_vertex =
- _mesa_fls(gs_compile->control_data_bits_per_vertex);
+ util_last_bit(gs_compile->control_data_bits_per_vertex);
abld.SHR(dword_index, prev_count, brw_imm_ud(6u - log2_bits_per_vertex));
if (per_slot_offset.file != BAD_FILE) {
{
assert(stage == MESA_SHADER_GEOMETRY);
- struct brw_gs_prog_data *gs_prog_data =
- (struct brw_gs_prog_data *) prog_data;
+ struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
fs_reg vertex_count = get_nir_src(vertex_count_nir_src);
vertex_count.type = BRW_REGISTER_TYPE_UD;
* be recorded by transform feedback, we can simply discard all geometry
* bound to these streams when transform feedback is disabled.
*/
- if (stream_id > 0 && !nir->info.has_transform_feedback_varyings)
+ if (stream_id > 0 && !nir->info->has_transform_feedback_varyings)
return;
/* If we're outputting 32 control data bits or less, then we can wait
unsigned num_components,
unsigned first_component)
{
- struct brw_gs_prog_data *gs_prog_data = (struct brw_gs_prog_data *) prog_data;
+ struct brw_gs_prog_data *gs_prog_data = brw_gs_prog_data(prog_data);
nir_const_value *vertex_const = nir_src_as_const_value(vertex_src);
nir_const_value *offset_const = nir_src_as_const_value(offset_src);
} else {
for (unsigned i = 0; i < num_components; i++) {
bld.MOV(offset(dst, bld, i),
- fs_reg(ATTR, imm_offset + i, dst.type));
+ fs_reg(ATTR, imm_offset + i + first_component, dst.type));
}
}
return;
/* Use first_icp_handle as the base offset. There is one register
* of URB handles per vertex, so inform the register allocator that
- * we might read up to nir->info.gs.vertices_in registers.
+ * we might read up to nir->info->gs.vertices_in registers.
*/
bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle,
fs_reg(brw_vec8_grf(first_icp_handle, 0)),
fs_reg(icp_offset_bytes),
- brw_imm_ud(nir->info.gs.vertices_in * REG_SIZE));
+ brw_imm_ud(nir->info->gs.vertices_in * REG_SIZE));
}
} else {
assert(gs_prog_data->invocations > 1);
/* Use first_icp_handle as the base offset. There is one DWord
* of URB handles per vertex, so inform the register allocator that
- * we might read up to ceil(nir->info.gs.vertices_in / 8) registers.
+ * we might read up to ceil(nir->info->gs.vertices_in / 8) registers.
*/
bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle,
fs_reg(brw_vec8_grf(first_icp_handle, 0)),
fs_reg(icp_offset_bytes),
- brw_imm_ud(DIV_ROUND_UP(nir->info.gs.vertices_in, 8) *
+ brw_imm_ud(DIV_ROUND_UP(nir->info->gs.vertices_in, 8) *
REG_SIZE));
}
}
unsigned read_components = num_components + first_component;
fs_reg tmp = bld.vgrf(dst.type, read_components);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, icp_handle);
- inst->regs_written = read_components * type_sz(tmp_dst.type) / 4;
+ inst->size_written = read_components *
+ tmp.component_size(inst->exec_size);
for (unsigned i = 0; i < num_components; i++) {
bld.MOV(offset(tmp_dst, bld, i),
offset(tmp, bld, i + first_component));
} else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp_dst,
icp_handle);
- inst->regs_written = num_components * type_sz(tmp_dst.type) / 4;
+ inst->size_written = num_components *
+ tmp_dst.component_size(inst->exec_size);
}
inst->offset = base_offset + offset_const->u32[0];
inst->mlen = 1;
if (first_component != 0) {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp,
payload);
- inst->regs_written = read_components * type_sz(tmp_dst.type) / 4;
+ inst->size_written = read_components *
+ tmp.component_size(inst->exec_size);
for (unsigned i = 0; i < num_components; i++) {
bld.MOV(offset(tmp_dst, bld, i),
offset(tmp, bld, i + first_component));
} else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp_dst,
payload);
- inst->regs_written = num_components * type_sz(tmp_dst.type) / 4;
+ inst->size_written = num_components *
+ tmp_dst.component_size(inst->exec_size);
}
inst->offset = base_offset;
inst->mlen = 2;
/* Read the whole VUE header (because of alignment) and read .w. */
fs_reg tmp = bld.vgrf(dst.type, 4);
inst->dst = tmp;
- inst->regs_written = 4;
+ inst->size_written = 4 * REG_SIZE;
bld.MOV(dst, offset(tmp, bld, 3));
}
}
break;
}
+ case nir_intrinsic_load_input: {
+ fs_reg src = fs_reg(ATTR, instr->const_index[0], dest.type);
+ unsigned first_component = nir_intrinsic_component(instr);
+ unsigned num_components = instr->num_components;
+ enum brw_reg_type type = dest.type;
+
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ assert(const_offset && "Indirect input loads not allowed");
+ src = offset(src, bld, const_offset->u32[0]);
+
+ for (unsigned j = 0; j < num_components; j++) {
+ bld.MOV(offset(dest, bld, j), offset(src, bld, j + first_component));
+ }
+
+ if (type == BRW_REGISTER_TYPE_DF) {
+ /* Once the double vector is read, set again its original register
+ * type to continue with normal execution.
+ */
+ src = retype(src, type);
+ dest = retype(dest, type);
+ }
+
+ if (type_sz(src.type) == 8) {
+ shuffle_32bit_load_result_to_64bit_data(bld,
+ dest,
+ retype(dest, BRW_REGISTER_TYPE_F),
+ instr->num_components);
+ }
+ break;
+ }
+
default:
nir_emit_intrinsic(bld, instr);
break;
{
assert(stage == MESA_SHADER_TESS_CTRL);
struct brw_tcs_prog_key *tcs_key = (struct brw_tcs_prog_key *) key;
- struct brw_tcs_prog_data *tcs_prog_data =
- (struct brw_tcs_prog_data *) prog_data;
+ struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
fs_reg dst;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
break;
fs_reg m0 = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- fs_reg m0_2 = byte_offset(m0, 2 * sizeof(uint32_t));
+ fs_reg m0_2 = component(m0, 2);
- const fs_builder fwa_bld = bld.exec_all();
+ const fs_builder chanbld = bld.exec_all().group(1, 0);
/* Zero the message header */
- fwa_bld.MOV(m0, brw_imm_ud(0u));
+ bld.exec_all().MOV(m0, brw_imm_ud(0u));
/* Copy "Barrier ID" from r0.2, bits 16:13 */
- fwa_bld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
+ chanbld.AND(m0_2, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(INTEL_MASK(16, 13)));
/* Shift it up to bits 27:24. */
- fwa_bld.SHL(m0_2, m0_2, brw_imm_ud(11));
+ chanbld.SHL(m0_2, m0_2, brw_imm_ud(11));
/* Set the Barrier Count and the enable bit */
- fwa_bld.OR(m0_2, m0_2,
- brw_imm_ud(tcs_prog_data->instances << 8 | (1 << 15)));
+ chanbld.OR(m0_2, m0_2,
+ brw_imm_ud(tcs_prog_data->instances << 9 | (1 << 15)));
bld.emit(SHADER_OPCODE_BARRIER, bld.null_reg_ud(), m0);
break;
inst->offset = imm_offset;
inst->mlen = 2;
}
- inst->regs_written =
- ((num_components + first_component) * type_sz(dst.type) / 4);
+ inst->size_written = (num_components + first_component) *
+ inst->dst.component_size(inst->exec_size);
/* If we are reading 64-bit data using 32-bit read messages we need
* build proper 64-bit data elements by shuffling the low and high
if (inst->offset == 0 && indirect_offset.file == BAD_FILE) {
assert(type_sz(dst.type) < 8);
inst->dst = bld.vgrf(dst.type, 4);
- inst->regs_written = 4;
+ inst->size_written = 4 * REG_SIZE;
bld.MOV(dst, offset(inst->dst, bld, 3));
}
case nir_intrinsic_load_per_vertex_output: {
fs_reg indirect_offset = get_indirect_offset(instr);
unsigned imm_offset = instr->const_index[0];
+ unsigned first_component = nir_intrinsic_component(instr);
fs_inst *inst;
if (indirect_offset.file == BAD_FILE) {
bld.MOV(patch_handle,
retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
- if (imm_offset == 0) {
- /* This is a read of gl_TessLevelInner[], which lives in the
- * Patch URB header. The layout depends on the domain.
- */
- dst.type = BRW_REGISTER_TYPE_F;
- switch (tcs_key->tes_primitive_mode) {
- case GL_QUADS: {
- /* DWords 3-2 (reversed) */
- fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
-
- inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, patch_handle);
- inst->offset = 0;
- inst->mlen = 1;
- inst->regs_written = 4;
-
- /* dst.xy = tmp.wz */
- bld.MOV(dst, offset(tmp, bld, 3));
- bld.MOV(offset(dst, bld, 1), offset(tmp, bld, 2));
- break;
- }
- case GL_TRIANGLES:
- /* DWord 4; hardcode offset = 1 and regs_written = 1 */
- inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, patch_handle);
- inst->offset = 1;
- inst->mlen = 1;
- inst->regs_written = 1;
- break;
- case GL_ISOLINES:
- /* All channels are undefined. */
- break;
- default:
- unreachable("Bogus tessellation domain");
- }
- } else if (imm_offset == 1) {
- /* This is a read of gl_TessLevelOuter[], which lives in the
- * Patch URB header. The layout depends on the domain.
- */
- dst.type = BRW_REGISTER_TYPE_F;
-
- fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
- inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp, patch_handle);
- inst->offset = 1;
- inst->mlen = 1;
- inst->regs_written = 4;
-
- /* Reswizzle: WZYX */
- fs_reg srcs[4] = {
- offset(tmp, bld, 3),
- offset(tmp, bld, 2),
- offset(tmp, bld, 1),
- offset(tmp, bld, 0),
- };
-
- unsigned num_components;
- switch (tcs_key->tes_primitive_mode) {
- case GL_QUADS:
- num_components = 4;
- break;
- case GL_TRIANGLES:
- num_components = 3;
- break;
- case GL_ISOLINES:
- /* Isolines are not reversed; swizzle .zw -> .xy */
- srcs[0] = offset(tmp, bld, 2);
- srcs[1] = offset(tmp, bld, 3);
- num_components = 2;
- break;
- default:
- unreachable("Bogus tessellation domain");
+ {
+ if (first_component != 0) {
+ unsigned read_components =
+ instr->num_components + first_component;
+ fs_reg tmp = bld.vgrf(dst.type, read_components);
+ inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
+ patch_handle);
+ inst->size_written = read_components * REG_SIZE;
+ for (unsigned i = 0; i < instr->num_components; i++) {
+ bld.MOV(offset(dst, bld, i),
+ offset(tmp, bld, i + first_component));
+ }
+ } else {
+ inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst,
+ patch_handle);
+ inst->size_written = instr->num_components * REG_SIZE;
}
- bld.LOAD_PAYLOAD(dst, srcs, num_components, 0);
- } else {
- inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, patch_handle);
inst->offset = imm_offset;
inst->mlen = 1;
- inst->regs_written = instr->num_components;
}
} else {
/* Indirect indexing - use per-slot offsets as well. */
};
fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
-
- inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst, payload);
+ if (first_component != 0) {
+ unsigned read_components =
+ instr->num_components + first_component;
+ fs_reg tmp = bld.vgrf(dst.type, read_components);
+ inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, tmp,
+ payload);
+ inst->size_written = read_components * REG_SIZE;
+ for (unsigned i = 0; i < instr->num_components; i++) {
+ bld.MOV(offset(dst, bld, i),
+ offset(tmp, bld, i + first_component));
+ }
+ } else {
+ inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dst,
+ payload);
+ inst->size_written = instr->num_components * REG_SIZE;
+ }
inst->offset = imm_offset;
inst->mlen = 2;
- inst->regs_written = instr->num_components;
}
break;
}
if (indirect_offset.file != BAD_FILE) {
srcs[header_regs++] = indirect_offset;
- } else if (!is_passthrough_shader) {
- if (imm_offset == 0) {
- value.type = BRW_REGISTER_TYPE_F;
-
- mask &= (1 << tesslevel_inner_components(tcs_key->tes_primitive_mode)) - 1;
-
- /* This is a write to gl_TessLevelInner[], which lives in the
- * Patch URB header. The layout depends on the domain.
- */
- switch (tcs_key->tes_primitive_mode) {
- case GL_QUADS:
- /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed).
- * We use an XXYX swizzle to reverse put .xy in the .wz
- * channels, and use a .zw writemask.
- */
- mask = writemask_for_backwards_vector(mask);
- swiz = BRW_SWIZZLE4(0, 0, 1, 0);
- break;
- case GL_TRIANGLES:
- /* gl_TessLevelInner[].x lives at DWord 4, so we set the
- * writemask to X and bump the URB offset by 1.
- */
- imm_offset = 1;
- break;
- case GL_ISOLINES:
- /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */
- return;
- default:
- unreachable("Bogus tessellation domain");
- }
- } else if (imm_offset == 1) {
- /* This is a write to gl_TessLevelOuter[] which lives in the
- * Patch URB Header at DWords 4-7. However, it's reversed, so
- * instead of .xyzw we have .wzyx.
- */
- value.type = BRW_REGISTER_TYPE_F;
-
- mask &= (1 << tesslevel_outer_components(tcs_key->tes_primitive_mode)) - 1;
-
- if (tcs_key->tes_primitive_mode == GL_ISOLINES) {
- /* Isolines .xy should be stored in .zw, in order. */
- swiz = BRW_SWIZZLE4(0, 0, 0, 1);
- mask <<= 2;
- } else {
- /* Other domains are reversed; store .wzyx instead of .xyzw */
- swiz = BRW_SWIZZLE_WZYX;
- mask = writemask_for_backwards_vector(mask);
- }
- }
}
if (mask == 0)
break;
- unsigned num_components = _mesa_fls(mask);
+ unsigned num_components = util_last_bit(mask);
enum opcode opcode;
/* We can only pack two 64-bit components in a single message, so send
nir_intrinsic_instr *instr)
{
assert(stage == MESA_SHADER_TESS_EVAL);
- struct brw_tes_prog_data *tes_prog_data = (struct brw_tes_prog_data *) prog_data;
+ struct brw_tes_prog_data *tes_prog_data = brw_tes_prog_data(prog_data);
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
}
break;
- case nir_intrinsic_load_tess_level_outer:
- /* When the TES reads gl_TessLevelOuter, we ensure that the patch header
- * appears as a push-model input. So, we can simply use the ATTR file
- * rather than issuing URB read messages. The data is stored in the
- * high DWords in reverse order - DWord 7 contains .x, DWord 6 contains
- * .y, and so on.
- */
- switch (tes_prog_data->domain) {
- case BRW_TESS_DOMAIN_QUAD:
- for (unsigned i = 0; i < 4; i++)
- bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i));
- break;
- case BRW_TESS_DOMAIN_TRI:
- for (unsigned i = 0; i < 3; i++)
- bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i));
- break;
- case BRW_TESS_DOMAIN_ISOLINE:
- for (unsigned i = 0; i < 2; i++)
- bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 6 + i));
- break;
- }
- break;
-
- case nir_intrinsic_load_tess_level_inner:
- /* When the TES reads gl_TessLevelInner, we ensure that the patch header
- * appears as a push-model input. So, we can simply use the ATTR file
- * rather than issuing URB read messages.
- */
- switch (tes_prog_data->domain) {
- case BRW_TESS_DOMAIN_QUAD:
- bld.MOV(dest, component(fs_reg(ATTR, 0), 3));
- bld.MOV(offset(dest, bld, 1), component(fs_reg(ATTR, 0), 2));
- break;
- case BRW_TESS_DOMAIN_TRI:
- bld.MOV(dest, component(fs_reg(ATTR, 0), 4));
- break;
- case BRW_TESS_DOMAIN_ISOLINE:
- /* ignore - value is undefined */
- break;
- }
- break;
-
case nir_intrinsic_load_input:
case nir_intrinsic_load_per_vertex_input: {
fs_reg indirect_offset = get_indirect_offset(instr);
fs_reg tmp = bld.vgrf(dest.type, read_components);
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
patch_handle);
- inst->regs_written = read_components;
+ inst->size_written = read_components * REG_SIZE;
for (unsigned i = 0; i < instr->num_components; i++) {
bld.MOV(offset(dest, bld, i),
offset(tmp, bld, i + first_component));
} else {
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dest,
patch_handle);
- inst->regs_written = instr->num_components;
+ inst->size_written = instr->num_components * REG_SIZE;
}
inst->mlen = 1;
inst->offset = imm_offset;
}
inst->mlen = 2;
inst->offset = imm_offset;
- inst->regs_written =
- ((num_components + first_component) * type_sz(dest.type) / 4);
+ inst->size_written = (num_components + first_component) *
+ inst->dst.component_size(inst->exec_size);
/* If we are reading 64-bit data using 32-bit read messages we need
* build proper 64-bit data elements by shuffling the low and high
switch (instr->intrinsic) {
case nir_intrinsic_load_primitive_id:
assert(stage == MESA_SHADER_GEOMETRY);
- assert(((struct brw_gs_prog_data *)prog_data)->include_primitive_id);
+ assert(brw_gs_prog_data(prog_data)->include_primitive_id);
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
break;
}
}
+/**
+ * Fetch the current render target layer index.
+ */
+static fs_reg
+fetch_render_target_array_index(const fs_builder &bld)
+{
+ if (bld.shader->devinfo->gen >= 6) {
+ /* The render target array index is provided in the thread payload as
+ * bits 26:16 of r0.0.
+ */
+ const fs_reg idx = bld.vgrf(BRW_REGISTER_TYPE_UD);
+ bld.AND(idx, brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 0, 1),
+ brw_imm_uw(0x7ff));
+ return idx;
+ } else {
+ /* Pre-SNB we only ever render into the first layer of the framebuffer
+ * since layered rendering is not implemented.
+ */
+ return brw_imm_ud(0);
+ }
+}
+
+/**
+ * Fake non-coherent framebuffer read implemented using TXF to fetch from the
+ * framebuffer at the current fragment coordinates and sample index.
+ */
+fs_inst *
+fs_visitor::emit_non_coherent_fb_read(const fs_builder &bld, const fs_reg &dst,
+ unsigned target)
+{
+ const struct gen_device_info *devinfo = bld.shader->devinfo;
+
+ assert(bld.shader->stage == MESA_SHADER_FRAGMENT);
+ const brw_wm_prog_key *wm_key =
+ reinterpret_cast<const brw_wm_prog_key *>(key);
+ assert(!wm_key->coherent_fb_fetch);
+ const struct brw_wm_prog_data *wm_prog_data =
+ brw_wm_prog_data(stage_prog_data);
+
+ /* Calculate the surface index relative to the start of the texture binding
+ * table block, since that's what the texturing messages expect.
+ */
+ const unsigned surface = target +
+ wm_prog_data->binding_table.render_target_read_start -
+ wm_prog_data->base.binding_table.texture_start;
+
+ brw_mark_surface_used(
+ bld.shader->stage_prog_data,
+ wm_prog_data->binding_table.render_target_read_start + target);
+
+ /* Calculate the fragment coordinates. */
+ const fs_reg coords = bld.vgrf(BRW_REGISTER_TYPE_UD, 3);
+ bld.MOV(offset(coords, bld, 0), pixel_x);
+ bld.MOV(offset(coords, bld, 1), pixel_y);
+ bld.MOV(offset(coords, bld, 2), fetch_render_target_array_index(bld));
+
+ /* Calculate the sample index and MCS payload when multisampling. Luckily
+ * the MCS fetch message behaves deterministically for UMS surfaces, so it
+ * shouldn't be necessary to recompile based on whether the framebuffer is
+ * CMS or UMS.
+ */
+ if (wm_key->multisample_fbo &&
+ nir_system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE)
+ nir_system_values[SYSTEM_VALUE_SAMPLE_ID] = *emit_sampleid_setup();
+
+ const fs_reg sample = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
+ const fs_reg mcs = wm_key->multisample_fbo ?
+ emit_mcs_fetch(coords, 3, brw_imm_ud(surface)) : fs_reg();
+
+ /* Use either a normal or a CMS texel fetch message depending on whether
+ * the framebuffer is single or multisample. On SKL+ use the wide CMS
+ * message just in case the framebuffer uses 16x multisampling, it should
+ * be equivalent to the normal CMS fetch for lower multisampling modes.
+ */
+ const opcode op = !wm_key->multisample_fbo ? SHADER_OPCODE_TXF_LOGICAL :
+ devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W_LOGICAL :
+ SHADER_OPCODE_TXF_CMS_LOGICAL;
+
+ /* Emit the instruction. */
+ const fs_reg srcs[] = { coords, fs_reg(), brw_imm_ud(0), fs_reg(),
+ sample, mcs,
+ brw_imm_ud(surface), brw_imm_ud(0),
+ fs_reg(), brw_imm_ud(3), brw_imm_ud(0) };
+ STATIC_ASSERT(ARRAY_SIZE(srcs) == TEX_LOGICAL_NUM_SRCS);
+
+ fs_inst *inst = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs));
+ inst->size_written = 4 * inst->dst.component_size(inst->exec_size);
+
+ return inst;
+}
+
+/**
+ * Actual coherent framebuffer read implemented using the native render target
+ * read message. Requires SKL+.
+ */
+static fs_inst *
+emit_coherent_fb_read(const fs_builder &bld, const fs_reg &dst, unsigned target)
+{
+ assert(bld.shader->devinfo->gen >= 9);
+ fs_inst *inst = bld.emit(FS_OPCODE_FB_READ_LOGICAL, dst);
+ inst->target = target;
+ inst->size_written = 4 * inst->dst.component_size(inst->exec_size);
+
+ return inst;
+}
+
+static fs_reg
+alloc_temporary(const fs_builder &bld, unsigned size, fs_reg *regs, unsigned n)
+{
+ if (n && regs[0].file != BAD_FILE) {
+ return regs[0];
+
+ } else {
+ const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, size);
+
+ for (unsigned i = 0; i < n; i++)
+ regs[i] = tmp;
+
+ return tmp;
+ }
+}
+
+static fs_reg
+alloc_frag_output(fs_visitor *v, unsigned location)
+{
+ assert(v->stage == MESA_SHADER_FRAGMENT);
+ const brw_wm_prog_key *const key =
+ reinterpret_cast<const brw_wm_prog_key *>(v->key);
+ const unsigned l = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_LOCATION);
+ const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX);
+
+ if (i > 0 || (key->force_dual_color_blend && l == FRAG_RESULT_DATA1))
+ return alloc_temporary(v->bld, 4, &v->dual_src_output, 1);
+
+ else if (l == FRAG_RESULT_COLOR)
+ return alloc_temporary(v->bld, 4, v->outputs,
+ MAX2(key->nr_color_regions, 1));
+
+ else if (l == FRAG_RESULT_DEPTH)
+ return alloc_temporary(v->bld, 1, &v->frag_depth, 1);
+
+ else if (l == FRAG_RESULT_STENCIL)
+ return alloc_temporary(v->bld, 1, &v->frag_stencil, 1);
+
+ else if (l == FRAG_RESULT_SAMPLE_MASK)
+ return alloc_temporary(v->bld, 1, &v->sample_mask, 1);
+
+ else if (l >= FRAG_RESULT_DATA0 &&
+ l < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS)
+ return alloc_temporary(v->bld, 4,
+ &v->outputs[l - FRAG_RESULT_DATA0], 1);
+
+ else
+ unreachable("Invalid location");
+}
+
void
fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
{
assert(stage == MESA_SHADER_FRAGMENT);
- const struct brw_wm_prog_key *wm_key = (const struct brw_wm_prog_key *) key;
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
break;
}
+ case nir_intrinsic_load_layer_id:
+ dest.type = BRW_REGISTER_TYPE_UD;
+ bld.MOV(dest, fetch_render_target_array_index(bld));
+ break;
+
case nir_intrinsic_load_helper_invocation:
case nir_intrinsic_load_sample_mask_in:
case nir_intrinsic_load_sample_id: {
break;
}
+ case nir_intrinsic_store_output: {
+ const fs_reg src = get_nir_src(instr->src[0]);
+ const nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+ assert(const_offset && "Indirect output stores not allowed");
+ const unsigned location = nir_intrinsic_base(instr) +
+ SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION);
+ const fs_reg new_dest = retype(alloc_frag_output(this, location),
+ src.type);
+
+ for (unsigned j = 0; j < instr->num_components; j++)
+ bld.MOV(offset(new_dest, bld, nir_intrinsic_component(instr) + j),
+ offset(src, bld, j));
+
+ break;
+ }
+
+ case nir_intrinsic_load_output: {
+ const unsigned l = GET_FIELD(nir_intrinsic_base(instr),
+ BRW_NIR_FRAG_OUTPUT_LOCATION);
+ assert(l >= FRAG_RESULT_DATA0);
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ assert(const_offset && "Indirect output loads not allowed");
+ const unsigned target = l - FRAG_RESULT_DATA0 + const_offset->u32[0];
+ const fs_reg tmp = bld.vgrf(dest.type, 4);
+
+ if (reinterpret_cast<const brw_wm_prog_key *>(key)->coherent_fb_fetch)
+ emit_coherent_fb_read(bld, tmp, target);
+ else
+ emit_non_coherent_fb_read(bld, tmp, target);
+
+ for (unsigned j = 0; j < instr->num_components; j++) {
+ bld.MOV(offset(dest, bld, j),
+ offset(tmp, bld, nir_intrinsic_component(instr) + j));
+ }
+
+ break;
+ }
+
case nir_intrinsic_discard:
case nir_intrinsic_discard_if: {
/* We track our discarded pixels in f0.1. By predicating on it, we can
break;
}
- case nir_intrinsic_interp_var_at_centroid:
- case nir_intrinsic_interp_var_at_sample:
- case nir_intrinsic_interp_var_at_offset: {
- /* Handle ARB_gpu_shader5 interpolation intrinsics
- *
- * It's worth a quick word of explanation as to why we handle the full
- * variable-based interpolation intrinsic rather than a lowered version
- * with like we do for other inputs. We have to do that because the way
- * we set up inputs doesn't allow us to use the already setup inputs for
- * interpolation. At the beginning of the shader, we go through all of
- * the input variables and do the initial interpolation and put it in
- * the nir_inputs array based on its location as determined in
- * nir_lower_io. If the input isn't used, dead code cleans up and
- * everything works fine. However, when we get to the ARB_gpu_shader5
- * interpolation intrinsics, we need to reinterpolate the input
- * differently. If we used an intrinsic that just had an index it would
- * only give us the offset into the nir_inputs array. However, this is
- * useless because that value is post-interpolation and we need
- * pre-interpolation. In order to get the actual location of the bits
- * we get from the vertex fetching hardware, we need the variable.
- */
- fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
- const glsl_interp_mode interpolation =
- (glsl_interp_mode) instr->variables[0]->var->data.interpolation;
+ case nir_intrinsic_load_input: {
+ /* load_input is only used for flat inputs */
+ unsigned base = nir_intrinsic_base(instr);
+ unsigned component = nir_intrinsic_component(instr);
+ unsigned num_components = instr->num_components;
+ enum brw_reg_type type = dest.type;
- switch (instr->intrinsic) {
- case nir_intrinsic_interp_var_at_centroid:
- emit_pixel_interpolater_send(bld,
- FS_OPCODE_INTERPOLATE_AT_CENTROID,
- dst_xy,
- fs_reg(), /* src */
- brw_imm_ud(0u),
- interpolation);
- break;
+ /* Special case fields in the VUE header */
+ if (base == VARYING_SLOT_LAYER)
+ component = 1;
+ else if (base == VARYING_SLOT_VIEWPORT)
+ component = 2;
- case nir_intrinsic_interp_var_at_sample: {
- if (!wm_key->multisample_fbo) {
- /* From the ARB_gpu_shader5 specification:
- * "If multisample buffers are not available, the input varying
- * will be evaluated at the center of the pixel."
- */
- emit_pixel_interpolater_send(bld,
- FS_OPCODE_INTERPOLATE_AT_CENTROID,
- dst_xy,
- fs_reg(), /* src */
- brw_imm_ud(0u),
- interpolation);
- break;
- }
+ if (nir_dest_bit_size(instr->dest) == 64) {
+ /* const_index is in 32-bit type size units that could not be aligned
+ * with DF. We need to read the double vector as if it was a float
+ * vector of twice the number of components to fetch the right data.
+ */
+ type = BRW_REGISTER_TYPE_F;
+ num_components *= 2;
+ }
- nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
+ for (unsigned int i = 0; i < num_components; i++) {
+ struct brw_reg interp = interp_reg(base, component + i);
+ interp = suboffset(interp, 3);
+ bld.emit(FS_OPCODE_CINTERP, offset(retype(dest, type), bld, i),
+ retype(fs_reg(interp), type));
+ }
- if (const_sample) {
- unsigned msg_data = const_sample->i32[0] << 4;
+ if (nir_dest_bit_size(instr->dest) == 64) {
+ shuffle_32bit_load_result_to_64bit_data(bld,
+ dest,
+ retype(dest, type),
+ instr->num_components);
+ }
+ break;
+ }
+
+ case nir_intrinsic_load_barycentric_pixel:
+ case nir_intrinsic_load_barycentric_centroid:
+ case nir_intrinsic_load_barycentric_sample:
+ /* Do nothing - load_interpolated_input handling will handle it later. */
+ break;
+ case nir_intrinsic_load_barycentric_at_sample: {
+ const glsl_interp_mode interpolation =
+ (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
+
+ nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
+
+ if (const_sample) {
+ unsigned msg_data = const_sample->i32[0] << 4;
+
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SAMPLE,
+ dest,
+ fs_reg(), /* src */
+ brw_imm_ud(msg_data),
+ interpolation);
+ } else {
+ const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_UD);
+
+ if (nir_src_is_dynamically_uniform(instr->src[0])) {
+ const fs_reg sample_id = bld.emit_uniformize(sample_src);
+ const fs_reg msg_data = vgrf(glsl_type::uint_type);
+ bld.exec_all().group(1, 0)
+ .SHL(msg_data, sample_id, brw_imm_ud(4u));
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
- dst_xy,
+ dest,
fs_reg(), /* src */
- brw_imm_ud(msg_data),
+ msg_data,
interpolation);
} else {
- const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
- BRW_REGISTER_TYPE_UD);
-
- if (nir_src_is_dynamically_uniform(instr->src[0])) {
- const fs_reg sample_id = bld.emit_uniformize(sample_src);
- const fs_reg msg_data = vgrf(glsl_type::uint_type);
- bld.exec_all().group(1, 0)
- .SHL(msg_data, sample_id, brw_imm_ud(4u));
+ /* Make a loop that sends a message to the pixel interpolater
+ * for the sample number in each live channel. If there are
+ * multiple channels with the same sample number then these
+ * will be handled simultaneously with a single interation of
+ * the loop.
+ */
+ bld.emit(BRW_OPCODE_DO);
+
+ /* Get the next live sample number into sample_id_reg */
+ const fs_reg sample_id = bld.emit_uniformize(sample_src);
+
+ /* Set the flag register so that we can perform the send
+ * message on all channels that have the same sample number
+ */
+ bld.CMP(bld.null_reg_ud(),
+ sample_src, sample_id,
+ BRW_CONDITIONAL_EQ);
+ const fs_reg msg_data = vgrf(glsl_type::uint_type);
+ bld.exec_all().group(1, 0)
+ .SHL(msg_data, sample_id, brw_imm_ud(4u));
+ fs_inst *inst =
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
- dst_xy,
+ dest,
fs_reg(), /* src */
msg_data,
interpolation);
- } else {
- /* Make a loop that sends a message to the pixel interpolater
- * for the sample number in each live channel. If there are
- * multiple channels with the same sample number then these
- * will be handled simultaneously with a single interation of
- * the loop.
- */
- bld.emit(BRW_OPCODE_DO);
-
- /* Get the next live sample number into sample_id_reg */
- const fs_reg sample_id = bld.emit_uniformize(sample_src);
+ set_predicate(BRW_PREDICATE_NORMAL, inst);
- /* Set the flag register so that we can perform the send
- * message on all channels that have the same sample number
- */
- bld.CMP(bld.null_reg_ud(),
- sample_src, sample_id,
- BRW_CONDITIONAL_EQ);
- const fs_reg msg_data = vgrf(glsl_type::uint_type);
- bld.exec_all().group(1, 0)
- .SHL(msg_data, sample_id, brw_imm_ud(4u));
- fs_inst *inst =
- emit_pixel_interpolater_send(bld,
- FS_OPCODE_INTERPOLATE_AT_SAMPLE,
- dst_xy,
- fs_reg(), /* src */
- msg_data,
- interpolation);
- set_predicate(BRW_PREDICATE_NORMAL, inst);
-
- /* Continue the loop if there are any live channels left */
- set_predicate_inv(BRW_PREDICATE_NORMAL,
- true, /* inverse */
- bld.emit(BRW_OPCODE_WHILE));
- }
+ /* Continue the loop if there are any live channels left */
+ set_predicate_inv(BRW_PREDICATE_NORMAL,
+ true, /* inverse */
+ bld.emit(BRW_OPCODE_WHILE));
}
-
- break;
}
+ break;
+ }
- case nir_intrinsic_interp_var_at_offset: {
- nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ case nir_intrinsic_load_barycentric_at_offset: {
+ const glsl_interp_mode interpolation =
+ (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr);
- if (const_offset) {
- unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf;
- unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf;
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
- emit_pixel_interpolater_send(bld,
- FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
- dst_xy,
- fs_reg(), /* src */
- brw_imm_ud(off_x | (off_y << 4)),
- interpolation);
- } else {
- fs_reg src = vgrf(glsl_type::ivec2_type);
- fs_reg offset_src = retype(get_nir_src(instr->src[0]),
- BRW_REGISTER_TYPE_F);
- for (int i = 0; i < 2; i++) {
- fs_reg temp = vgrf(glsl_type::float_type);
- bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f));
- fs_reg itemp = vgrf(glsl_type::int_type);
- /* float to int */
- bld.MOV(itemp, temp);
-
- /* Clamp the upper end of the range to +7/16.
- * ARB_gpu_shader5 requires that we support a maximum offset
- * of +0.5, which isn't representable in a S0.4 value -- if
- * we didn't clamp it, we'd end up with -8/16, which is the
- * opposite of what the shader author wanted.
- *
- * This is legal due to ARB_gpu_shader5's quantization
- * rules:
- *
- * "Not all values of <offset> may be supported; x and y
- * offsets may be rounded to fixed-point values with the
- * number of fraction bits given by the
- * implementation-dependent constant
- * FRAGMENT_INTERPOLATION_OFFSET_BITS"
- */
- set_condmod(BRW_CONDITIONAL_L,
- bld.SEL(offset(src, bld, i), itemp, brw_imm_d(7)));
- }
+ if (const_offset) {
+ unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf;
+ unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf;
- const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
- emit_pixel_interpolater_send(bld,
- opcode,
- dst_xy,
- src,
- brw_imm_ud(0u),
- interpolation);
+ emit_pixel_interpolater_send(bld,
+ FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
+ dest,
+ fs_reg(), /* src */
+ brw_imm_ud(off_x | (off_y << 4)),
+ interpolation);
+ } else {
+ fs_reg src = vgrf(glsl_type::ivec2_type);
+ fs_reg offset_src = retype(get_nir_src(instr->src[0]),
+ BRW_REGISTER_TYPE_F);
+ for (int i = 0; i < 2; i++) {
+ fs_reg temp = vgrf(glsl_type::float_type);
+ bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f));
+ fs_reg itemp = vgrf(glsl_type::int_type);
+ /* float to int */
+ bld.MOV(itemp, temp);
+
+ /* Clamp the upper end of the range to +7/16.
+ * ARB_gpu_shader5 requires that we support a maximum offset
+ * of +0.5, which isn't representable in a S0.4 value -- if
+ * we didn't clamp it, we'd end up with -8/16, which is the
+ * opposite of what the shader author wanted.
+ *
+ * This is legal due to ARB_gpu_shader5's quantization
+ * rules:
+ *
+ * "Not all values of <offset> may be supported; x and y
+ * offsets may be rounded to fixed-point values with the
+ * number of fraction bits given by the
+ * implementation-dependent constant
+ * FRAGMENT_INTERPOLATION_OFFSET_BITS"
+ */
+ set_condmod(BRW_CONDITIONAL_L,
+ bld.SEL(offset(src, bld, i), itemp, brw_imm_d(7)));
}
- break;
+
+ const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
+ emit_pixel_interpolater_send(bld,
+ opcode,
+ dest,
+ src,
+ brw_imm_ud(0u),
+ interpolation);
}
+ break;
+ }
- default:
- unreachable("Invalid intrinsic");
+ case nir_intrinsic_load_interpolated_input: {
+ if (nir_intrinsic_base(instr) == VARYING_SLOT_POS) {
+ emit_fragcoord_interpolation(dest);
+ break;
}
- for (unsigned j = 0; j < instr->num_components; j++) {
- fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
- src.type = dest.type;
+ assert(instr->src[0].ssa &&
+ instr->src[0].ssa->parent_instr->type == nir_instr_type_intrinsic);
+ nir_intrinsic_instr *bary_intrinsic =
+ nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr);
+ nir_intrinsic_op bary_intrin = bary_intrinsic->intrinsic;
+ enum glsl_interp_mode interp_mode =
+ (enum glsl_interp_mode) nir_intrinsic_interp_mode(bary_intrinsic);
+ fs_reg dst_xy;
+
+ if (bary_intrin == nir_intrinsic_load_barycentric_at_offset ||
+ bary_intrin == nir_intrinsic_load_barycentric_at_sample) {
+ /* Use the result of the PI message */
+ dst_xy = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_F);
+ } else {
+ /* Use the delta_xy values computed from the payload */
+ enum brw_barycentric_mode bary =
+ brw_barycentric_mode(interp_mode, bary_intrin);
- bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
- dest = offset(dest, bld, 1);
+ dst_xy = this->delta_xy[bary];
+ }
+
+ for (unsigned int i = 0; i < instr->num_components; i++) {
+ fs_reg interp =
+ fs_reg(interp_reg(nir_intrinsic_base(instr),
+ nir_intrinsic_component(instr) + i));
+ interp.type = BRW_REGISTER_TYPE_F;
+ dest.type = BRW_REGISTER_TYPE_F;
+
+ if (devinfo->gen < 6 && interp_mode == INTERP_MODE_SMOOTH) {
+ fs_reg tmp = vgrf(glsl_type::float_type);
+ bld.emit(FS_OPCODE_LINTERP, tmp, dst_xy, interp);
+ bld.MUL(offset(dest, bld, i), tmp, this->pixel_w);
+ } else {
+ bld.emit(FS_OPCODE_LINTERP, offset(dest, bld, i), dst_xy, interp);
+ }
}
break;
}
+
default:
nir_emit_intrinsic(bld, instr);
break;
nir_intrinsic_instr *instr)
{
assert(stage == MESA_SHADER_COMPUTE);
- struct brw_cs_prog_data *cs_prog_data =
- (struct brw_cs_prog_data *) prog_data;
+ struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
switch (instr->intrinsic) {
case nir_intrinsic_atomic_counter_inc:
case nir_intrinsic_atomic_counter_dec:
- case nir_intrinsic_atomic_counter_read: {
+ case nir_intrinsic_atomic_counter_read:
+ case nir_intrinsic_atomic_counter_add:
+ case nir_intrinsic_atomic_counter_min:
+ case nir_intrinsic_atomic_counter_max:
+ case nir_intrinsic_atomic_counter_and:
+ case nir_intrinsic_atomic_counter_or:
+ case nir_intrinsic_atomic_counter_xor:
+ case nir_intrinsic_atomic_counter_exchange:
+ case nir_intrinsic_atomic_counter_comp_swap: {
if (stage == MESA_SHADER_FRAGMENT &&
instr->intrinsic != nir_intrinsic_atomic_counter_read)
- ((struct brw_wm_prog_data *)prog_data)->has_side_effects = true;
+ brw_wm_prog_data(prog_data)->has_side_effects = true;
+
+ /* Get some metadata from the image intrinsic. */
+ const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
/* Get the arguments of the atomic intrinsic. */
const fs_reg offset = get_nir_src(instr->src[0]);
const unsigned surface = (stage_prog_data->binding_table.abo_start +
instr->const_index[0]);
+ const fs_reg src0 = (info->num_srcs >= 2
+ ? get_nir_src(instr->src[1]) : fs_reg());
+ const fs_reg src1 = (info->num_srcs >= 3
+ ? get_nir_src(instr->src[2]) : fs_reg());
fs_reg tmp;
+ assert(info->num_srcs <= 3);
+
/* Emit a surface read or atomic op. */
- switch (instr->intrinsic) {
- case nir_intrinsic_atomic_counter_read:
+ if (instr->intrinsic == nir_intrinsic_atomic_counter_read) {
tmp = emit_untyped_read(bld, brw_imm_ud(surface), offset, 1, 1);
- break;
-
- case nir_intrinsic_atomic_counter_inc:
- tmp = emit_untyped_atomic(bld, brw_imm_ud(surface), offset, fs_reg(),
- fs_reg(), 1, 1, BRW_AOP_INC);
- break;
-
- case nir_intrinsic_atomic_counter_dec:
- tmp = emit_untyped_atomic(bld, brw_imm_ud(surface), offset, fs_reg(),
- fs_reg(), 1, 1, BRW_AOP_PREDEC);
- break;
-
- default:
- unreachable("Unreachable");
+ } else {
+ tmp = emit_untyped_atomic(bld, brw_imm_ud(surface), offset, src0,
+ src1, 1, 1,
+ get_atomic_counter_op(instr->intrinsic));
}
/* Assign the result. */
if (stage == MESA_SHADER_FRAGMENT &&
instr->intrinsic != nir_intrinsic_image_load)
- ((struct brw_wm_prog_data *)prog_data)->has_side_effects = true;
+ brw_wm_prog_data(prog_data)->has_side_effects = true;
/* Get the referenced image variable and type. */
const nir_variable *var = instr->variables[0]->var;
const fs_builder ubld = bld.group(8, 0);
const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
- ->regs_written = 2;
+ ->size_written = 2 * REG_SIZE;
break;
}
case nir_intrinsic_shader_clock: {
/* We cannot do anything if there is an event, so ignore it for now */
- fs_reg shader_clock = get_timestamp(bld);
- const fs_reg srcs[] = { shader_clock.set_smear(0), shader_clock.set_smear(1) };
-
+ const fs_reg shader_clock = get_timestamp(bld);
+ const fs_reg srcs[] = { component(shader_clock, 0),
+ component(shader_clock, 1) };
bld.LOAD_PAYLOAD(dest, srcs, ARRAY_SIZE(srcs), 0);
break;
}
if (const_offset) {
/* Offsets are in bytes but they should always be multiples of 4 */
assert(const_offset->u32[0] % 4 == 0);
- src.reg_offset = const_offset->u32[0] / 4;
+ src.offset = const_offset->u32[0];
for (unsigned j = 0; j < instr->num_components; j++) {
bld.MOV(offset(dest, bld, j), offset(src, bld, j));
*/
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ubo_start +
- nir->info.num_ubos - 1);
+ nir->info->num_ubos - 1);
}
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
* and we have to split it if necessary.
*/
const unsigned type_size = type_sz(dest.type);
- const fs_reg packed_consts = bld.vgrf(BRW_REGISTER_TYPE_F);
+ const unsigned block_sz = 64; /* Fetch one cacheline at a time. */
+ const fs_builder ubld = bld.exec_all().group(block_sz / 4, 0);
+ const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+
for (unsigned c = 0; c < instr->num_components;) {
const unsigned base = const_offset->u32[0] + c * type_size;
-
- /* Number of usable components in the next 16B-aligned load */
+ /* Number of usable components in the next block-aligned load. */
const unsigned count = MIN2(instr->num_components - c,
- (16 - base % 16) / type_size);
+ (block_sz - base % block_sz) / type_size);
- bld.exec_all()
- .emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- packed_consts, surf_index, brw_imm_ud(base & ~15));
+ ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ packed_consts, surf_index,
+ brw_imm_ud(base & ~(block_sz - 1)));
const fs_reg consts =
- retype(byte_offset(packed_consts, base & 15), dest.type);
+ retype(byte_offset(packed_consts, base & (block_sz - 1)),
+ dest.type);
for (unsigned d = 0; d < count; d++)
bld.MOV(offset(dest, bld, c + d), component(consts, d));
*/
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ssbo_start +
- nir->info.num_ssbos - 1);
+ nir->info->num_ssbos - 1);
}
fs_reg offset_reg;
break;
}
- case nir_intrinsic_load_input: {
- fs_reg src;
- unsigned num_components = instr->num_components;
- enum brw_reg_type type = dest.type;
-
- if (stage == MESA_SHADER_VERTEX) {
- src = fs_reg(ATTR, instr->const_index[0], dest.type);
- } else {
- assert(type_sz(type) >= 4);
- if (type == BRW_REGISTER_TYPE_DF) {
- /* const_index is in 32-bit type size units that could not be aligned
- * with DF. We need to read the double vector as if it was a float
- * vector of twice the number of components to fetch the right data.
- */
- dest = retype(dest, BRW_REGISTER_TYPE_F);
- num_components *= 2;
- }
- src = offset(retype(nir_inputs, dest.type), bld,
- instr->const_index[0]);
- }
-
- nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
- assert(const_offset && "Indirect input loads not allowed");
- src = offset(src, bld, const_offset->u32[0]);
-
- for (unsigned j = 0; j < num_components; j++) {
- bld.MOV(offset(dest, bld, j), offset(src, bld, j));
- }
-
- if (type == BRW_REGISTER_TYPE_DF) {
- /* Once the double vector is read, set again its original register
- * type to continue with normal execution.
- */
- src = retype(src, type);
- dest = retype(dest, type);
- }
-
- if (type_sz(src.type) == 8) {
- shuffle_32bit_load_result_to_64bit_data(bld,
- dest,
- retype(dest, BRW_REGISTER_TYPE_F),
- instr->num_components);
- }
-
- break;
- }
-
case nir_intrinsic_store_ssbo: {
assert(devinfo->gen >= 7);
if (stage == MESA_SHADER_FRAGMENT)
- ((struct brw_wm_prog_data *)prog_data)->has_side_effects = true;
+ brw_wm_prog_data(prog_data)->has_side_effects = true;
/* Block index */
fs_reg surf_index;
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ssbo_start +
- nir->info.num_ssbos - 1);
+ nir->info->num_ssbos - 1);
}
/* Value */
case nir_intrinsic_store_output: {
fs_reg src = get_nir_src(instr->src[0]);
- fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
- instr->const_index[0]);
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
assert(const_offset && "Indirect output stores not allowed");
- new_dest = offset(new_dest, bld, const_offset->u32[0]);
+ fs_reg new_dest = retype(offset(outputs[instr->const_index[0]], bld,
+ 4 * const_offset->u32[0]), src.type);
unsigned num_components = instr->num_components;
+ unsigned first_component = nir_intrinsic_component(instr);
unsigned bit_size = instr->src[0].is_ssa ?
instr->src[0].ssa->bit_size : instr->src[0].reg.reg->bit_size;
if (bit_size == 64) {
}
for (unsigned j = 0; j < num_components; j++) {
- bld.MOV(offset(new_dest, bld, j), offset(src, bld, j));
+ bld.MOV(offset(new_dest, bld, j + first_component),
+ offset(src, bld, j));
}
break;
}
src_payload, brw_imm_ud(index));
inst->header_size = 0;
inst->mlen = 1;
- inst->regs_written = 4;
+ inst->size_written = 4 * REG_SIZE;
bld.MOV(retype(dest, ret_payload.type), component(ret_payload, 0));
brw_mark_surface_used(prog_data, index);
int op, nir_intrinsic_instr *instr)
{
if (stage == MESA_SHADER_FRAGMENT)
- ((struct brw_wm_prog_data *)prog_data)->has_side_effects = true;
+ brw_wm_prog_data(prog_data)->has_side_effects = true;
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
*/
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ssbo_start +
- nir->info.num_ssbos - 1);
+ nir->info->num_ssbos - 1);
}
fs_reg offset = get_nir_src(instr->src[1]);
if (op == BRW_AOP_CMPWR)
data2 = get_nir_src(instr->src[3]);
- /* Emit the actual atomic operation operation */
+ /* Emit the actual atomic operation */
fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
data1, data2,
dest = get_nir_dest(instr->dest);
fs_reg surface = brw_imm_ud(GEN7_BTI_SLM);
- fs_reg offset = get_nir_src(instr->src[0]);
+ fs_reg offset;
fs_reg data1 = get_nir_src(instr->src[1]);
fs_reg data2;
if (op == BRW_AOP_CMPWR)
data2 = get_nir_src(instr->src[2]);
+ /* Get the offset */
+ nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+ if (const_offset) {
+ offset = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
+ } else {
+ offset = vgrf(glsl_type::uint_type);
+ bld.ADD(offset,
+ retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(instr->const_index[0]));
+ }
+
/* Emit the actual atomic operation operation */
fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_d(0);
+ uint32_t header_bits = 0;
for (unsigned i = 0; i < instr->num_srcs; i++) {
fs_reg src = get_nir_src(instr->src[i].src);
switch (instr->src[i].src_type) {
srcs[TEX_LOGICAL_SRC_LOD] =
retype(get_nir_src_imm(instr->src[i].src), BRW_REGISTER_TYPE_F);
break;
- case nir_tex_src_comparitor:
+ case nir_tex_src_comparator:
srcs[TEX_LOGICAL_SRC_SHADOW_C] = retype(src, BRW_REGISTER_TYPE_F);
break;
case nir_tex_src_coord:
case nir_tex_src_offset: {
nir_const_value *const_offset =
nir_src_as_const_value(instr->src[i].src);
- if (const_offset) {
- unsigned header_bits = brw_texture_offset(const_offset->i32, 3);
- if (header_bits != 0)
- srcs[TEX_LOGICAL_SRC_OFFSET_VALUE] = brw_imm_ud(header_bits);
+ unsigned offset_bits = 0;
+ if (const_offset &&
+ brw_texture_offset(const_offset->i32,
+ nir_tex_instr_src_size(instr, i),
+ &offset_bits)) {
+ header_bits |= offset_bits;
} else {
- srcs[TEX_LOGICAL_SRC_OFFSET_VALUE] =
+ srcs[TEX_LOGICAL_SRC_TG4_OFFSET] =
retype(src, BRW_REGISTER_TYPE_D);
}
break;
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(instr->coord_components);
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(lod_components);
- if (instr->op == nir_texop_query_levels) {
- /* textureQueryLevels() is implemented in terms of TXS so we need to
- * pass a valid LOD argument.
+ if (instr->op == nir_texop_query_levels ||
+ (instr->op == nir_texop_tex && stage != MESA_SHADER_FRAGMENT)) {
+ /* textureQueryLevels() and texture() are implemented in terms of TXS
+ * and TXL respectively, so we need to pass a valid LOD argument.
*/
assert(srcs[TEX_LOGICAL_SRC_LOD].file == BAD_FILE);
srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0u);
enum opcode opcode;
switch (instr->op) {
case nir_texop_tex:
- opcode = SHADER_OPCODE_TEX_LOGICAL;
+ opcode = (stage == MESA_SHADER_FRAGMENT ? SHADER_OPCODE_TEX_LOGICAL :
+ SHADER_OPCODE_TXL_LOGICAL);
break;
case nir_texop_txb:
opcode = FS_OPCODE_TXB_LOGICAL;
opcode = SHADER_OPCODE_LOD_LOGICAL;
break;
case nir_texop_tg4:
- if (srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file != BAD_FILE &&
- srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file != IMM)
+ if (srcs[TEX_LOGICAL_SRC_TG4_OFFSET].file != BAD_FILE)
opcode = SHADER_OPCODE_TG4_OFFSET_LOGICAL;
else
opcode = SHADER_OPCODE_TG4_LOGICAL;
unreachable("unknown texture opcode");
}
+ if (instr->op == nir_texop_tg4) {
+ if (instr->component == 1 &&
+ key_tex->gather_channel_quirk_mask & (1 << texture)) {
+ /* gather4 sampler is broken for green channel on RG32F --
+ * we must ask for blue instead.
+ */
+ header_bits |= 2 << 16;
+ } else {
+ header_bits |= instr->component << 16;
+ }
+ }
+
fs_reg dst = bld.vgrf(brw_type_for_nir_type(instr->dest_type), 4);
fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
+ inst->offset = header_bits;
const unsigned dest_size = nir_tex_instr_dest_size(instr);
if (devinfo->gen >= 9 &&
nir_ssa_def_components_read(&instr->dest.ssa):
(1 << dest_size) - 1;
assert(write_mask != 0); /* dead code should have been eliminated */
- inst->regs_written = _mesa_fls(write_mask) * dispatch_width / 8;
+ inst->size_written = util_last_bit(write_mask) *
+ inst->dst.component_size(inst->exec_size);
} else {
- inst->regs_written = 4 * dispatch_width / 8;
+ inst->size_written = 4 * inst->dst.component_size(inst->exec_size);
}
if (srcs[TEX_LOGICAL_SRC_SHADOW_C].file != BAD_FILE)
inst->shadow_compare = true;
- if (srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file == IMM)
- inst->offset = srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].ud;
-
- if (instr->op == nir_texop_tg4) {
- if (instr->component == 1 &&
- key_tex->gather_channel_quirk_mask & (1 << texture)) {
- /* gather4 sampler is broken for green channel on RG32F --
- * we must ask for blue instead.
- */
- inst->offset |= 2 << 16;
- } else {
- inst->offset |= instr->component << 16;
- }
-
- if (devinfo->gen == 6)
- emit_gen6_gather_wa(key_tex->gen6_gather_wa[texture], dst);
- }
+ if (instr->op == nir_texop_tg4 && devinfo->gen == 6)
+ emit_gen6_gather_wa(key_tex->gen6_gather_wa[texture], dst);
fs_reg nir_dest[4];
for (unsigned i = 0; i < dest_size; i++)
nir_dest[i] = offset(dst, bld, i);
- bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
- instr->is_array;
-
if (instr->op == nir_texop_query_levels) {
/* # levels is in .w */
nir_dest[0] = offset(dst, bld, 3);
- } else if (instr->op == nir_texop_txs && dest_size >= 3 &&
- (devinfo->gen < 7 || is_cube_array)) {
+ } else if (instr->op == nir_texop_txs &&
+ dest_size >= 3 && devinfo->gen < 7) {
+ /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
fs_reg depth = offset(dst, bld, 2);
- fs_reg fixed_depth = vgrf(glsl_type::int_type);
-
- if (is_cube_array) {
- /* fixup #layers for cube map arrays */
- bld.emit(SHADER_OPCODE_INT_QUOTIENT, fixed_depth, depth, brw_imm_d(6));
- } else if (devinfo->gen < 7) {
- /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
- bld.emit_minmax(fixed_depth, depth, brw_imm_d(1), BRW_CONDITIONAL_GE);
- }
-
- nir_dest[2] = fixed_depth;
+ nir_dest[2] = vgrf(glsl_type::int_type);
+ bld.emit_minmax(nir_dest[2], depth, brw_imm_d(1), BRW_CONDITIONAL_GE);
}
bld.LOAD_PAYLOAD(get_nir_dest(instr->dest), nir_dest, dest_size, 0);
assert(type_sz(src.type) == 8);
assert(type_sz(dst.type) == 4);
- assert(!src.in_range(dst, 2 * components * bld.dispatch_width() / 8));
+ assert(!regions_overlap(
+ dst, 2 * components * dst.component_size(bld.dispatch_width()),
+ src, components * src.component_size(bld.dispatch_width())));
for (unsigned i = 0; i < components; i++) {
const fs_reg component_i = offset(src, bld, i);
fs_reg
setup_imm_df(const fs_builder &bld, double v)
{
- const struct brw_device_info *devinfo = bld.shader->devinfo;
+ const struct gen_device_info *devinfo = bld.shader->devinfo;
assert(devinfo->gen >= 7);
if (devinfo->gen >= 8)
* instruction allows to set the 64-bit immediate value.
*/
if (devinfo->is_haswell) {
- const fs_builder ubld = bld.exec_all();
+ const fs_builder ubld = bld.exec_all().group(1, 0);
fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_DF, 1);
ubld.DIM(dst, brw_imm_df(v));
return component(dst, 0);