nir_emit_cf_list(&if_stmt->then_list);
- /* note: if the else is empty, dead CF elimination will remove it */
- bld.emit(BRW_OPCODE_ELSE);
-
- nir_emit_cf_list(&if_stmt->else_list);
+ if (!nir_cf_list_is_empty_block(&if_stmt->else_list)) {
+ bld.emit(BRW_OPCODE_ELSE);
+ nir_emit_cf_list(&if_stmt->else_list);
+ }
bld.emit(BRW_OPCODE_ENDIF);
break;
case nir_instr_type_deref:
- /* Derefs can exist for images but they do nothing */
+ unreachable("All derefs should've been lowered");
break;
case nir_instr_type_intrinsic:
}
set_predicate(BRW_PREDICATE_NORMAL, inst);
- } else if (type_sz(op[0].type) < 8) {
+ } else if (type_sz(op[0].type) == 2) {
+ /* AND(val, 0x8000) gives the sign bit.
+ *
+ * Predicated OR ORs 1.0 (0x3c00) with the sign bit if val is not zero.
+ */
+ fs_reg zero = retype(brw_imm_uw(0), BRW_REGISTER_TYPE_HF);
+ bld.CMP(bld.null_reg_f(), op[0], zero, BRW_CONDITIONAL_NZ);
+
+ op[0].type = BRW_REGISTER_TYPE_UW;
+ result.type = BRW_REGISTER_TYPE_UW;
+ bld.AND(result, op[0], brw_imm_uw(0x8000u));
+
+ if (instr->op == nir_op_fsign)
+ inst = bld.OR(result, result, brw_imm_uw(0x3c00u));
+ else {
+ /* Use XOR here to get the result sign correct. */
+ inst = bld.XOR(result, result, retype(op[1], BRW_REGISTER_TYPE_UW));
+ }
+
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ } else if (type_sz(op[0].type) == 4) {
/* AND(val, 0x80000000) gives the sign bit.
*
* Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
*/
bld.CMP(bld.null_reg_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ);
- fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
op[0].type = BRW_REGISTER_TYPE_UD;
result.type = BRW_REGISTER_TYPE_UD;
- bld.AND(result_int, op[0], brw_imm_ud(0x80000000u));
+ bld.AND(result, op[0], brw_imm_ud(0x80000000u));
if (instr->op == nir_op_fsign)
- inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u));
+ inst = bld.OR(result, result, brw_imm_ud(0x3f800000u));
else {
/* Use XOR here to get the result sign correct. */
- inst = bld.XOR(result_int, result_int,
- retype(op[1], BRW_REGISTER_TYPE_UD));
+ inst = bld.XOR(result, result, retype(op[1], BRW_REGISTER_TYPE_UD));
}
inst->predicate = BRW_PREDICATE_NORMAL;
}
}
+fs_reg
+fs_visitor::get_tcs_single_patch_icp_handle(const fs_builder &bld,
+ nir_intrinsic_instr *instr)
+{
+ struct brw_tcs_prog_data *tcs_prog_data = brw_tcs_prog_data(prog_data);
+ const nir_src &vertex_src = instr->src[0];
+ nir_intrinsic_instr *vertex_intrin = nir_src_as_intrinsic(vertex_src);
+ fs_reg icp_handle;
+
+ if (nir_src_is_const(vertex_src)) {
+ /* Emit a MOV to resolve <0,1,0> regioning. */
+ icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ unsigned vertex = nir_src_as_uint(vertex_src);
+ bld.MOV(icp_handle,
+ retype(brw_vec1_grf(1 + (vertex >> 3), vertex & 7),
+ BRW_REGISTER_TYPE_UD));
+ } else if (tcs_prog_data->instances == 1 && vertex_intrin &&
+ vertex_intrin->intrinsic == nir_intrinsic_load_invocation_id) {
+ /* For the common case of only 1 instance, an array index of
+ * gl_InvocationID means reading g1. Skip all the indirect work.
+ */
+ icp_handle = retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD);
+ } else {
+ /* The vertex index is non-constant. We need to use indirect
+ * addressing to fetch the proper URB handle.
+ */
+ icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+
+ /* Each ICP handle is a single DWord (4 bytes) */
+ fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ bld.SHL(vertex_offset_bytes,
+ retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(2u));
+
+ /* Start at g1. We might read up to 4 registers. */
+ bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle,
+ retype(brw_vec8_grf(1, 0), icp_handle.type), vertex_offset_bytes,
+ brw_imm_ud(4 * REG_SIZE));
+ }
+
+ return icp_handle;
+}
+
void
fs_visitor::nir_emit_tcs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
case nir_intrinsic_load_per_vertex_input: {
fs_reg indirect_offset = get_indirect_offset(instr);
unsigned imm_offset = instr->const_index[0];
-
- const nir_src &vertex_src = instr->src[0];
-
fs_inst *inst;
- fs_reg icp_handle;
-
- if (nir_src_is_const(vertex_src)) {
- /* Emit a MOV to resolve <0,1,0> regioning. */
- icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- unsigned vertex = nir_src_as_uint(vertex_src);
- bld.MOV(icp_handle,
- retype(brw_vec1_grf(1 + (vertex >> 3), vertex & 7),
- BRW_REGISTER_TYPE_UD));
- } else if (tcs_prog_data->instances == 1 &&
- nir_src_as_intrinsic(vertex_src) != NULL &&
- nir_src_as_intrinsic(vertex_src)->intrinsic == nir_intrinsic_load_invocation_id) {
- /* For the common case of only 1 instance, an array index of
- * gl_InvocationID means reading g1. Skip all the indirect work.
- */
- icp_handle = retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD);
- } else {
- /* The vertex index is non-constant. We need to use indirect
- * addressing to fetch the proper URB handle.
- */
- icp_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
-
- /* Each ICP handle is a single DWord (4 bytes) */
- fs_reg vertex_offset_bytes = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
- bld.SHL(vertex_offset_bytes,
- retype(get_nir_src(vertex_src), BRW_REGISTER_TYPE_UD),
- brw_imm_ud(2u));
-
- /* Start at g1. We might read up to 4 registers. */
- bld.emit(SHADER_OPCODE_MOV_INDIRECT, icp_handle,
- retype(brw_vec8_grf(1, 0), icp_handle.type), vertex_offset_bytes,
- brw_imm_ud(4 * REG_SIZE));
- }
+ fs_reg icp_handle = get_tcs_single_patch_icp_handle(bld, instr);
/* We can only read two double components with each URB read, so
* we send two read messages in that case, each one loading up to
for (unsigned int i = 0; i < instr->num_components; i++) {
fs_reg interp =
- component(interp_reg(nir_intrinsic_base(instr),
- nir_intrinsic_component(instr) + i), 0);
+ interp_reg(nir_intrinsic_base(instr),
+ nir_intrinsic_component(instr) + i);
interp.type = BRW_REGISTER_TYPE_F;
dest.type = BRW_REGISTER_TYPE_F;
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_atomic_exchange:
- case nir_intrinsic_image_atomic_comp_swap: {
+ case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_bindless_image_load:
+ case nir_intrinsic_bindless_image_store:
+ case nir_intrinsic_bindless_image_atomic_add:
+ case nir_intrinsic_bindless_image_atomic_min:
+ case nir_intrinsic_bindless_image_atomic_max:
+ case nir_intrinsic_bindless_image_atomic_and:
+ case nir_intrinsic_bindless_image_atomic_or:
+ case nir_intrinsic_bindless_image_atomic_xor:
+ case nir_intrinsic_bindless_image_atomic_exchange:
+ case nir_intrinsic_bindless_image_atomic_comp_swap: {
if (stage == MESA_SHADER_FRAGMENT &&
instr->intrinsic != nir_intrinsic_image_load)
brw_wm_prog_data(prog_data)->has_side_effects = true;
const GLenum format = nir_intrinsic_format(instr);
fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS];
- srcs[SURFACE_LOGICAL_SRC_SURFACE] =
- get_nir_image_intrinsic_image(bld, instr);
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_image_atomic_comp_swap:
+ srcs[SURFACE_LOGICAL_SRC_SURFACE] =
+ get_nir_image_intrinsic_image(bld, instr);
+ break;
+
+ default:
+ /* Bindless */
+ srcs[SURFACE_LOGICAL_SRC_SURFACE_HANDLE] =
+ bld.emit_uniformize(get_nir_src(instr->src[0]));
+ break;
+ }
+
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] =
brw_imm_ud(image_intrinsic_coord_components(instr));
/* Emit an image load, store or atomic op. */
- if (instr->intrinsic == nir_intrinsic_image_load) {
+ if (instr->intrinsic == nir_intrinsic_image_load ||
+ instr->intrinsic == nir_intrinsic_bindless_image_load) {
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
fs_inst *inst =
bld.emit(SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
inst->size_written = instr->num_components * dispatch_width * 4;
- } else if (instr->intrinsic == nir_intrinsic_image_store) {
+ } else if (instr->intrinsic == nir_intrinsic_image_store ||
+ instr->intrinsic == nir_intrinsic_bindless_image_store) {
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
srcs[SURFACE_LOGICAL_SRC_DATA] = get_nir_src(instr->src[3]);
bld.emit(SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
switch (instr->intrinsic) {
case nir_intrinsic_image_atomic_add:
+ case nir_intrinsic_bindless_image_atomic_add:
assert(num_srcs == 4);
op = get_op_for_atomic_add(instr, 3);
num_srcs = 3;
break;
case nir_intrinsic_image_atomic_min:
+ case nir_intrinsic_bindless_image_atomic_min:
assert(format == GL_R32UI || format == GL_R32I);
op = (format == GL_R32I) ? BRW_AOP_IMIN : BRW_AOP_UMIN;
break;
case nir_intrinsic_image_atomic_max:
+ case nir_intrinsic_bindless_image_atomic_max:
assert(format == GL_R32UI || format == GL_R32I);
op = (format == GL_R32I) ? BRW_AOP_IMAX : BRW_AOP_UMAX;
break;
case nir_intrinsic_image_atomic_and:
+ case nir_intrinsic_bindless_image_atomic_and:
op = BRW_AOP_AND;
break;
case nir_intrinsic_image_atomic_or:
+ case nir_intrinsic_bindless_image_atomic_or:
op = BRW_AOP_OR;
break;
case nir_intrinsic_image_atomic_xor:
+ case nir_intrinsic_bindless_image_atomic_xor:
op = BRW_AOP_XOR;
break;
case nir_intrinsic_image_atomic_exchange:
+ case nir_intrinsic_bindless_image_atomic_exchange:
op = BRW_AOP_MOV;
break;
case nir_intrinsic_image_atomic_comp_swap:
+ case nir_intrinsic_bindless_image_atomic_comp_swap:
op = BRW_AOP_CMPWR;
break;
default:
break;
}
- case nir_intrinsic_image_size: {
+ case nir_intrinsic_image_size:
+ case nir_intrinsic_bindless_image_size: {
/* Unlike the [un]typed load and store opcodes, the TXS that this turns
* into will handle the binding table index for us in the geneerator.
+ * Incidentally, this means that we can handle bindless with exactly the
+ * same code.
*/
fs_reg image = retype(get_nir_src_imm(instr->src[0]),
BRW_REGISTER_TYPE_UD);
image = bld.emit_uniformize(image);
fs_reg srcs[TEX_LOGICAL_NUM_SRCS];
- srcs[TEX_LOGICAL_SRC_SURFACE] = image;
+ if (instr->intrinsic == nir_intrinsic_image_size)
+ srcs[TEX_LOGICAL_SRC_SURFACE] = image;
+ else
+ srcs[TEX_LOGICAL_SRC_SURFACE_HANDLE] = image;
srcs[TEX_LOGICAL_SRC_SAMPLER] = brw_imm_d(0);
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(0);
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(0);
srcs[TEX_LOGICAL_SRC_COORD_COMPONENTS] = brw_imm_d(instr->coord_components);
srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(lod_components);
- bool shader_supports_implicit_lod = stage == MESA_SHADER_FRAGMENT ||
- (stage == MESA_SHADER_COMPUTE &&
- nir->info.cs.derivative_group != DERIVATIVE_GROUP_NONE);
-
enum opcode opcode;
switch (instr->op) {
case nir_texop_tex:
- opcode = shader_supports_implicit_lod ?
- SHADER_OPCODE_TEX_LOGICAL : SHADER_OPCODE_TXL_LOGICAL;
+ opcode = SHADER_OPCODE_TEX_LOGICAL;
break;
case nir_texop_txb:
opcode = FS_OPCODE_TXB_LOGICAL;