case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
- case SHADER_OPCODE_UNTYPED_ATOMIC:
- case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
- case SHADER_OPCODE_UNTYPED_SURFACE_READ:
- case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
- case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
- case SHADER_OPCODE_BYTE_SCATTERED_READ:
- case SHADER_OPCODE_TYPED_ATOMIC:
- case SHADER_OPCODE_TYPED_SURFACE_READ:
- case SHADER_OPCODE_TYPED_SURFACE_WRITE:
- case SHADER_OPCODE_IMAGE_SIZE:
case SHADER_OPCODE_URB_WRITE_SIMD8:
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
}
}
+bool
+fs_inst::is_control_source(unsigned arg) const
+{
+ switch (opcode) {
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+ case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
+ return arg == 0;
+
+ case SHADER_OPCODE_BROADCAST:
+ case SHADER_OPCODE_SHUFFLE:
+ case SHADER_OPCODE_QUAD_SWIZZLE:
+ case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
+ case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
+ case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
+ case SHADER_OPCODE_GET_BUFFER_SIZE:
+ return arg == 1;
+
+ case SHADER_OPCODE_MOV_INDIRECT:
+ case SHADER_OPCODE_CLUSTER_BROADCAST:
+ case SHADER_OPCODE_TEX:
+ case FS_OPCODE_TXB:
+ case SHADER_OPCODE_TXD:
+ case SHADER_OPCODE_TXF:
+ case SHADER_OPCODE_TXF_LZ:
+ case SHADER_OPCODE_TXF_CMS:
+ case SHADER_OPCODE_TXF_CMS_W:
+ case SHADER_OPCODE_TXF_UMS:
+ case SHADER_OPCODE_TXF_MCS:
+ case SHADER_OPCODE_TXL:
+ case SHADER_OPCODE_TXL_LZ:
+ case SHADER_OPCODE_TXS:
+ case SHADER_OPCODE_LOD:
+ case SHADER_OPCODE_TG4:
+ case SHADER_OPCODE_TG4_OFFSET:
+ case SHADER_OPCODE_SAMPLEINFO:
+ return arg == 1 || arg == 2;
+
+ case SHADER_OPCODE_SEND:
+ return arg == 0 || arg == 1;
+
+ default:
+ return false;
+ }
+}
+
/**
* Returns true if this instruction's sources and destinations cannot
* safely be the same register.
case GLSL_TYPE_ARRAY:
return type_size_scalar(type->fields.array) * type->length;
case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_INTERFACE:
size = 0;
for (i = 0; i < type->length; i++) {
size += type_size_scalar(type->fields.structure[i].type);
return 1;
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
- case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_FUNCTION:
unreachable("not reached");
}
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
- assert(src[3].file == IMM);
+ assert(src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == IMM);
/* Surface coordinates. */
- if (i == 0)
- return src[3].ud;
+ if (i == SURFACE_LOGICAL_SRC_ADDRESS)
+ return src[SURFACE_LOGICAL_SRC_IMM_DIMS].ud;
/* Surface operation source (ignored for reads). */
- else if (i == 1)
+ else if (i == SURFACE_LOGICAL_SRC_DATA)
return 0;
else
return 1;
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
- assert(src[3].file == IMM &&
- src[4].file == IMM);
+ assert(src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == IMM &&
+ src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);
/* Surface coordinates. */
- if (i == 0)
- return src[3].ud;
+ if (i == SURFACE_LOGICAL_SRC_ADDRESS)
+ return src[SURFACE_LOGICAL_SRC_IMM_DIMS].ud;
/* Surface operation source. */
- else if (i == 1)
- return src[4].ud;
+ else if (i == SURFACE_LOGICAL_SRC_DATA)
+ return src[SURFACE_LOGICAL_SRC_IMM_ARG].ud;
else
return 1;
* src[3] IMM with always 1 dimension.
* src[4] IMM with arg bitsize for scattered read/write 8, 16, 32
*/
- assert(src[3].file == IMM &&
- src[4].file == IMM);
- return i == 1 ? 0 : 1;
+ assert(src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == IMM &&
+ src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);
+ return i == SURFACE_LOGICAL_SRC_DATA ? 0 : 1;
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
- assert(src[3].file == IMM &&
- src[4].file == IMM);
+ assert(src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == IMM &&
+ src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);
return 1;
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: {
- assert(src[3].file == IMM &&
- src[4].file == IMM);
- const unsigned op = src[4].ud;
+ assert(src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == IMM &&
+ src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);
+ const unsigned op = src[SURFACE_LOGICAL_SRC_IMM_ARG].ud;
/* Surface coordinates. */
- if (i == 0)
- return src[3].ud;
+ if (i == SURFACE_LOGICAL_SRC_ADDRESS)
+ return src[SURFACE_LOGICAL_SRC_IMM_DIMS].ud;
/* Surface operation source. */
- else if (i == 1 && op == BRW_AOP_CMPWR)
+ else if (i == SURFACE_LOGICAL_SRC_DATA && op == BRW_AOP_CMPWR)
return 2;
- else if (i == 1 && (op == BRW_AOP_INC || op == BRW_AOP_DEC ||
- op == BRW_AOP_PREDEC))
+ else if (i == SURFACE_LOGICAL_SRC_DATA &&
+ (op == BRW_AOP_INC || op == BRW_AOP_DEC || op == BRW_AOP_PREDEC))
return 0;
else
return 1;
return (i == 0 ? 2 : 1);
case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: {
- assert(src[3].file == IMM &&
- src[4].file == IMM);
- const unsigned op = src[4].ud;
+ assert(src[SURFACE_LOGICAL_SRC_IMM_DIMS].file == IMM &&
+ src[SURFACE_LOGICAL_SRC_IMM_ARG].file == IMM);
+ const unsigned op = src[SURFACE_LOGICAL_SRC_IMM_ARG].ud;
/* Surface coordinates. */
- if (i == 0)
- return src[3].ud;
+ if (i == SURFACE_LOGICAL_SRC_ADDRESS)
+ return src[SURFACE_LOGICAL_SRC_IMM_DIMS].ud;
/* Surface operation source. */
- else if (i == 1 && op == BRW_AOP_FCMPWR)
+ else if (i == SURFACE_LOGICAL_SRC_DATA && op == BRW_AOP_FCMPWR)
return 2;
else
return 1;
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case SHADER_OPCODE_URB_READ_SIMD8:
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
- case SHADER_OPCODE_UNTYPED_ATOMIC:
- case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
- case SHADER_OPCODE_UNTYPED_SURFACE_READ:
- case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
- case SHADER_OPCODE_TYPED_ATOMIC:
- case SHADER_OPCODE_TYPED_SURFACE_READ:
- case SHADER_OPCODE_TYPED_SURFACE_WRITE:
- case SHADER_OPCODE_IMAGE_SIZE:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
- case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
- case SHADER_OPCODE_BYTE_SCATTERED_READ:
if (arg == 0)
return mlen * REG_SIZE;
break;
case BRW_OPCODE_OR:
if (inst->src[0].equals(inst->src[1]) ||
inst->src[1].is_zero()) {
- inst->opcode = BRW_OPCODE_MOV;
+ /* On Gen8+, the OR instruction can have a source modifier that
+ * performs logical not on the operand. Cases of 'OR r0, ~r1, 0'
+ * or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV.
+ */
+ if (inst->src[0].negate) {
+ inst->opcode = BRW_OPCODE_NOT;
+ inst->src[0].negate = false;
+ } else {
+ inst->opcode = BRW_OPCODE_MOV;
+ }
inst->src[1] = reg_undef;
progress = true;
break;
if (csel_inst != NULL) {
progress = true;
+ csel_inst->saturate = inst->saturate;
inst->remove(block);
}
bool needs_mov = false;
fs_reg orig_dst = inst->dst;
+
+ /* Get a new VGRF for the "low" 32x16-bit multiplication result if
+ * reusing the original destination is impossible due to hardware
+ * restrictions, source/destination overlap, or it being the null
+ * register.
+ */
fs_reg low = inst->dst;
if (orig_dst.is_null() || orig_dst.file == MRF ||
regions_overlap(inst->dst, inst->size_written,
inst->src[0], inst->size_read(0)) ||
regions_overlap(inst->dst, inst->size_written,
- inst->src[1], inst->size_read(1))) {
+ inst->src[1], inst->size_read(1)) ||
+ inst->dst.stride >= 4) {
needs_mov = true;
- /* Get a new VGRF but keep the same stride as inst->dst */
low = fs_reg(VGRF, alloc.allocate(regs_written(inst)),
inst->dst.type);
- low.stride = inst->dst.stride;
- low.offset = inst->dst.offset % REG_SIZE;
}
/* Get a new VGRF but keep the same stride as inst->dst */
/* Set "Source0 Alpha Present to RenderTarget" bit in message
* header.
*/
- if (inst->target > 0 && key->replicate_alpha)
+ if (inst->target > 0 && prog_data->replicate_alpha)
g00_bits |= 1 << 11;
/* Set computes stencil to render target */
length++;
}
+ if (src0_alpha.file != BAD_FILE) {
+ for (unsigned i = 0; i < bld.dispatch_width() / 8; i++) {
+ const fs_builder &ubld = bld.exec_all().group(8, i)
+ .annotate("FB write src0 alpha");
+ const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_F);
+ ubld.MOV(tmp, horiz_offset(src0_alpha, i * 8));
+ setup_color_payload(ubld, key, &sources[length], tmp, 1);
+ length++;
+ }
+ } else if (prog_data->replicate_alpha && inst->target != 0) {
+ /* Handle the case when fragment shader doesn't write to draw buffer
+ * zero. No need to call setup_color_payload() for src0_alpha because
+ * alpha value will be undefined.
+ */
+ length += bld.dispatch_width() / 8;
+ }
+
if (sample_mask.file != BAD_FILE) {
sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1),
BRW_REGISTER_TYPE_UD);
payload_header_size = length;
- if (src0_alpha.file != BAD_FILE) {
- /* FIXME: This is being passed at the wrong location in the payload and
- * doesn't work when gl_SampleMask and MRTs are used simultaneously.
- * It's supposed to be immediately before oMask but there seems to be no
- * reasonable way to pass them in the correct order because LOAD_PAYLOAD
- * requires header sources to form a contiguous segment at the beginning
- * of the message and src0_alpha has per-channel semantics.
- */
- setup_color_payload(bld, key, &sources[length], src0_alpha, 1);
- length++;
- } else if (key->replicate_alpha && inst->target != 0) {
- /* Handle the case when fragment shader doesn't write to draw buffer
- * zero. No need to call setup_color_payload() for src0_alpha because
- * alpha value will be undefined.
- */
- length++;
- }
-
setup_color_payload(bld, key, &sources[length], color0, components);
length += 4;
return shadow_compare ? GEN9_SAMPLER_MESSAGE_SAMPLE_C_LZ :
GEN9_SAMPLER_MESSAGE_SAMPLE_LZ;
case SHADER_OPCODE_TXS:
- case SHADER_OPCODE_IMAGE_SIZE:
+ case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
return GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
case SHADER_OPCODE_TXD:
assert(!shadow_compare || devinfo->gen >= 8 || devinfo->is_haswell);
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod);
length++;
break;
- case SHADER_OPCODE_IMAGE_SIZE:
+ case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
/* We need an LOD; just use 0 */
bld.MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
length++;
case SHADER_OPCODE_TG4_OFFSET:
base_binding_table_index = prog_data->binding_table.gather_texture_start;
break;
- case SHADER_OPCODE_IMAGE_SIZE:
+ case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
base_binding_table_index = prog_data->binding_table.image_start;
break;
default:
const gen_device_info *devinfo = bld.shader->devinfo;
/* Get the logical send arguments. */
- const fs_reg &addr = inst->src[0];
- const fs_reg &src = inst->src[1];
- const fs_reg &surface = inst->src[2];
- const UNUSED fs_reg &dims = inst->src[3];
- const fs_reg &arg = inst->src[4];
+ const fs_reg &addr = inst->src[SURFACE_LOGICAL_SRC_ADDRESS];
+ const fs_reg &src = inst->src[SURFACE_LOGICAL_SRC_DATA];
+ const fs_reg &surface = inst->src[SURFACE_LOGICAL_SRC_SURFACE];
+ const UNUSED fs_reg &dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS];
+ const fs_reg &arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
assert(arg.file == IMM);
/* Calculate the total number of components of the payload. */
- const unsigned addr_sz = inst->components_read(0);
- const unsigned src_sz = inst->components_read(1);
+ const unsigned addr_sz = inst->components_read(SURFACE_LOGICAL_SRC_ADDRESS);
+ const unsigned src_sz = inst->components_read(SURFACE_LOGICAL_SRC_DATA);
const bool is_typed_access =
inst->opcode == SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL ||
break;
case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
- lower_sampler_logical_send(ibld, inst, SHADER_OPCODE_IMAGE_SIZE);
+ lower_sampler_logical_send(ibld, inst,
+ SHADER_OPCODE_IMAGE_SIZE_LOGICAL);
break;
case FS_OPCODE_TXB_LOGICAL:
brw_nir_lower_fs_inputs(shader, devinfo, key);
brw_nir_lower_fs_outputs(shader);
- if (devinfo->gen < 6) {
- brw_setup_vue_interpolation(vue_map, shader, prog_data, devinfo);
- }
+ if (devinfo->gen < 6)
+ brw_setup_vue_interpolation(vue_map, shader, prog_data);
if (!key->multisample_fbo)
NIR_PASS_V(shader, demote_sample_qualifiers);
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true);
brw_nir_lower_cs_intrinsics(shader, dispatch_width);
+
+ /* Clean up after the local index and ID calculations. */
+ nir_opt_constant_folding(shader);
+ nir_opt_dce(shader);
+
return brw_postprocess_nir(shader, compiler, true);
}