* from the LIR.
*/
-#include <sys/types.h>
-
-#include "util/hash_table.h"
#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/fbobject.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "util/register_allocate.h"
-#include "program/hash_table.h"
#include "brw_context.h"
#include "brw_eu.h"
-#include "brw_wm.h"
#include "brw_fs.h"
#include "brw_cs.h"
+#include "brw_nir.h"
#include "brw_vec4_gs_visitor.h"
#include "brw_cfg.h"
+#include "brw_program.h"
#include "brw_dead_control_flow.h"
-#include "main/uniforms.h"
-#include "brw_fs_live_variables.h"
-#include "glsl/nir/glsl_types.h"
-#include "program/sampler.h"
+#include "compiler/glsl_types.h"
using namespace brw;
* the redundant ones.
*/
fs_reg vec4_offset = vgrf(glsl_type::int_type);
- bld.ADD(vec4_offset, varying_offset, fs_reg(const_offset & ~3));
+ bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf));
int scale = 1;
if (devinfo->gen == 4 && bld.dispatch_width() == 8) {
inst->mlen = 1 + bld.dispatch_width() / 8;
}
- bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale));
+ bld.MOV(dst, offset(vec4_result, bld, ((const_offset & 0xf) / 4) * scale));
}
/**
}
}
+/**
+ * Returns true if this instruction's sources and destinations cannot
+ * safely be the same register.
+ *
+ * In most cases, a register can be written over safely by the same
+ * instruction that is its last use. For a single instruction, the
+ * sources are dereferenced before writing of the destination starts
+ * (naturally).
+ *
+ * However, there are a few cases where this can be problematic:
+ *
+ * - Virtual opcodes that translate to multiple instructions in the
+ * code generator: if src == dst and one instruction writes the
+ * destination before a later instruction reads the source, then
+ * src will have been clobbered.
+ *
+ * - SIMD16 compressed instructions with certain regioning (see below).
+ *
+ * The register allocator uses this information to set up conflicts between
+ * GRF sources and the destination.
+ */
+bool
+fs_inst::has_source_and_destination_hazard() const
+{
+ switch (opcode) {
+ case FS_OPCODE_PACK_HALF_2x16_SPLIT:
+ /* Multiple partial writes to the destination */
+ return true;
+ default:
+ /* The SIMD16 compressed instruction
+ *
+ * add(16) g4<1>F g4<8,8,1>F g6<8,8,1>F
+ *
+ * is actually decoded in hardware as:
+ *
+ * add(8) g4<1>F g4<8,8,1>F g6<8,8,1>F
+ * add(8) g5<1>F g5<8,8,1>F g7<8,8,1>F
+ *
+ * Which is safe. However, if we have uniform accesses
+ * happening, we get into trouble:
+ *
+ * add(8) g4<1>F g4<0,1,0>F g6<8,8,1>F
+ * add(8) g5<1>F g4<0,1,0>F g7<8,8,1>F
+ *
+ * Now our destination for the first instruction overwrote the
+ * second instruction's src0, and we get garbage for those 8
+ * pixels. There's a similar issue for the pre-gen6
+ * pixel_x/pixel_y, which are registers of 16-bit values and thus
+ * would get stomped by the first decode as well.
+ */
+ if (exec_size == 16) {
+ for (int i = 0; i < sources; i++) {
+ if (src[i].file == VGRF && (src[i].stride == 0 ||
+ src[i].type == BRW_REGISTER_TYPE_UW ||
+ src[i].type == BRW_REGISTER_TYPE_W ||
+ src[i].type == BRW_REGISTER_TYPE_UB ||
+ src[i].type == BRW_REGISTER_TYPE_B)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+}
+
bool
fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const
{
this->file = BAD_FILE;
}
-/** Immediate value constructor. */
-fs_reg::fs_reg(float f)
-{
- init();
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_F;
- this->stride = 0;
- this->f = f;
-}
-
-/** Immediate value constructor. */
-fs_reg::fs_reg(int32_t i)
-{
- init();
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_D;
- this->stride = 0;
- this->d = i;
-}
-
-/** Immediate value constructor. */
-fs_reg::fs_reg(uint32_t u)
-{
- init();
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_UD;
- this->stride = 0;
- this->ud = u;
-}
-
-/** Vector float immediate value constructor. */
-fs_reg::fs_reg(uint8_t vf[4])
-{
- init();
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_VF;
- memcpy(&this->ud, vf, sizeof(unsigned));
-}
-
-/** Vector float immediate value constructor. */
-fs_reg::fs_reg(uint8_t vf0, uint8_t vf1, uint8_t vf2, uint8_t vf3)
-{
- init();
- this->file = IMM;
- this->type = BRW_REGISTER_TYPE_VF;
- this->ud = (vf0 << 0) | (vf1 << 8) | (vf2 << 16) | (vf3 << 24);
-}
-
-fs_reg::fs_reg(struct brw_reg reg) :
+fs_reg::fs_reg(struct ::brw_reg reg) :
backend_reg(reg)
{
this->reg_offset = 0;
bool
fs_reg::equals(const fs_reg &r) const
{
- return (memcmp((brw_reg *)this, (brw_reg *)&r, sizeof(brw_reg)) == 0 &&
- reg_offset == r.reg_offset &&
+ return (this->backend_reg::equals(r) &&
subreg_offset == r.subreg_offset &&
!reladdr && !r.reladdr &&
stride == r.stride);
fs_reg reset = shader_end_time;
reset.set_smear(2);
set_condmod(BRW_CONDITIONAL_Z,
- ibld.AND(ibld.null_reg_ud(), reset, fs_reg(1u)));
+ ibld.AND(ibld.null_reg_ud(), reset, brw_imm_ud(1u)));
ibld.IF(BRW_PREDICATE_NORMAL);
fs_reg start = shader_start_time;
* is 2 cycles. Remove that overhead, so I can forget about that when
* trying to determine the time taken for single instructions.
*/
- cbld.ADD(diff, diff, fs_reg(-2u));
+ cbld.ADD(diff, diff, brw_imm_ud(-2u));
SHADER_TIME_ADD(cbld, 0, diff);
- SHADER_TIME_ADD(cbld, 1, fs_reg(1u));
+ SHADER_TIME_ADD(cbld, 1, brw_imm_ud(1u));
ibld.emit(BRW_OPCODE_ELSE);
- SHADER_TIME_ADD(cbld, 2, fs_reg(1u));
+ SHADER_TIME_ADD(cbld, 2, brw_imm_ud(1u));
ibld.emit(BRW_OPCODE_ENDIF);
}
fs_reg value)
{
int index = shader_time_index * 3 + shader_time_subindex;
- fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE);
+ struct brw_reg offset = brw_imm_d(index * SHADER_TIME_STRIDE);
fs_reg payload;
if (dispatch_width == 8)
if (pixel_center_integer) {
bld.MOV(wpos, this->pixel_x);
} else {
- bld.ADD(wpos, this->pixel_x, fs_reg(0.5f));
+ bld.ADD(wpos, this->pixel_x, brw_imm_f(0.5f));
}
wpos = offset(wpos, bld, 1);
offset += key->drawable_height - 1.0f;
}
- bld.ADD(wpos, pixel_y, fs_reg(offset));
+ bld.ADD(wpos, pixel_y, brw_imm_f(offset));
}
wpos = offset(wpos, bld, 1);
}
void
-fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
+fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name,
const glsl_type *type,
glsl_interp_qualifier interpolation_mode,
- int location, bool mod_centroid,
+ int *location, bool mod_centroid,
bool mod_sample)
{
- attr.type = brw_type_for_base_type(type->get_scalar_type());
-
assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- unsigned int array_elements;
-
- if (type->is_array()) {
- array_elements = type->arrays_of_arrays_size();
- if (array_elements == 0) {
- fail("dereferenced array '%s' has length 0\n", name);
- }
- type = type->without_array();
- } else {
- array_elements = 1;
- }
-
if (interpolation_mode == INTERP_QUALIFIER_NONE) {
bool is_gl_Color =
- location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1;
+ *location == VARYING_SLOT_COL0 || *location == VARYING_SLOT_COL1;
if (key->flat_shade && is_gl_Color) {
interpolation_mode = INTERP_QUALIFIER_FLAT;
} else {
}
}
- for (unsigned int i = 0; i < array_elements; i++) {
- for (unsigned int j = 0; j < type->matrix_columns; j++) {
- if (prog_data->urb_setup[location] == -1) {
- /* If there's no incoming setup data for this slot, don't
- * emit interpolation for it.
- */
- attr = offset(attr, bld, type->vector_elements);
- location++;
- continue;
- }
+ if (type->is_array() || type->is_matrix()) {
+ const glsl_type *elem_type = glsl_get_array_element(type);
+ const unsigned length = glsl_get_length(type);
- if (interpolation_mode == INTERP_QUALIFIER_FLAT) {
- /* Constant interpolation (flat shading) case. The SF has
- * handed us defined values in only the constant offset
- * field of the setup reg.
- */
- for (unsigned int k = 0; k < type->vector_elements; k++) {
- struct brw_reg interp = interp_reg(location, k);
- interp = suboffset(interp, 3);
- interp.type = attr.type;
- bld.emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
- attr = offset(attr, bld, 1);
- }
- } else {
- /* Smooth/noperspective interpolation case. */
- for (unsigned int k = 0; k < type->vector_elements; k++) {
- struct brw_reg interp = interp_reg(location, k);
- if (devinfo->needs_unlit_centroid_workaround && mod_centroid) {
- /* Get the pixel/sample mask into f0 so that we know
- * which pixels are lit. Then, for each channel that is
- * unlit, replace the centroid data with non-centroid
- * data.
- */
- bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
-
- fs_inst *inst;
- inst = emit_linterp(attr, fs_reg(interp), interpolation_mode,
- false, false);
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->predicate_inverse = true;
- if (devinfo->has_pln)
- inst->no_dd_clear = true;
-
- inst = emit_linterp(attr, fs_reg(interp), interpolation_mode,
- mod_centroid && !key->persample_shading,
- mod_sample || key->persample_shading);
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->predicate_inverse = false;
- if (devinfo->has_pln)
- inst->no_dd_check = true;
+ for (unsigned i = 0; i < length; i++) {
+ emit_general_interpolation(attr, name, elem_type, interpolation_mode,
+ location, mod_centroid, mod_sample);
+ }
+ } else if (type->is_record()) {
+ for (unsigned i = 0; i < type->length; i++) {
+ const glsl_type *field_type = type->fields.structure[i].type;
+ emit_general_interpolation(attr, name, field_type, interpolation_mode,
+ location, mod_centroid, mod_sample);
+ }
+ } else {
+ assert(type->is_scalar() || type->is_vector());
- } else {
- emit_linterp(attr, fs_reg(interp), interpolation_mode,
- mod_centroid && !key->persample_shading,
- mod_sample || key->persample_shading);
- }
- if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
- bld.MUL(attr, attr, this->pixel_w);
- }
- attr = offset(attr, bld, 1);
- }
+ if (prog_data->urb_setup[*location] == -1) {
+ /* If there's no incoming setup data for this slot, don't
+ * emit interpolation for it.
+ */
+ *attr = offset(*attr, bld, type->vector_elements);
+ (*location)++;
+ return;
+ }
- }
- location++;
+ attr->type = brw_type_for_base_type(type->get_scalar_type());
+
+ if (interpolation_mode == INTERP_QUALIFIER_FLAT) {
+ /* Constant interpolation (flat shading) case. The SF has
+ * handed us defined values in only the constant offset
+ * field of the setup reg.
+ */
+ for (unsigned int i = 0; i < type->vector_elements; i++) {
+ struct brw_reg interp = interp_reg(*location, i);
+ interp = suboffset(interp, 3);
+ interp.type = attr->type;
+ bld.emit(FS_OPCODE_CINTERP, *attr, fs_reg(interp));
+ *attr = offset(*attr, bld, 1);
+ }
+ } else {
+ /* Smooth/noperspective interpolation case. */
+ for (unsigned int i = 0; i < type->vector_elements; i++) {
+ struct brw_reg interp = interp_reg(*location, i);
+ if (devinfo->needs_unlit_centroid_workaround && mod_centroid) {
+ /* Get the pixel/sample mask into f0 so that we know
+ * which pixels are lit. Then, for each channel that is
+ * unlit, replace the centroid data with non-centroid
+ * data.
+ */
+ bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+
+ fs_inst *inst;
+ inst = emit_linterp(*attr, fs_reg(interp), interpolation_mode,
+ false, false);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->predicate_inverse = true;
+ if (devinfo->has_pln)
+ inst->no_dd_clear = true;
+
+ inst = emit_linterp(*attr, fs_reg(interp), interpolation_mode,
+ mod_centroid && !key->persample_shading,
+ mod_sample || key->persample_shading);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->predicate_inverse = false;
+ if (devinfo->has_pln)
+ inst->no_dd_check = true;
+
+ } else {
+ emit_linterp(*attr, fs_reg(interp), interpolation_mode,
+ mod_centroid && !key->persample_shading,
+ mod_sample || key->persample_shading);
+ }
+ if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
+ bld.MUL(*attr, *attr, this->pixel_w);
+ }
+ *attr = offset(*attr, bld, 1);
+ }
}
+ (*location)++;
}
}
fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W));
g0.negate = true;
- bld.ASR(*reg, g0, fs_reg(15));
+ bld.ASR(*reg, g0, brw_imm_d(15));
} else {
/* Bit 31 of g1.6 is 0 if the polygon is front facing. We want to create
* a boolean result from this (1/true or 0/false).
fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));
g1_6.negate = true;
- bld.ASR(*reg, g1_6, fs_reg(31));
+ bld.ASR(*reg, g1_6, brw_imm_d(31));
}
return reg;
/* Convert int_sample_pos to floating point */
bld.MOV(dst, int_sample_pos);
/* Scale to the range [0, 1] */
- bld.MUL(dst, dst, fs_reg(1 / 16.0f));
+ bld.MUL(dst, dst, brw_imm_f(1 / 16.0f));
}
else {
/* From ARB_sample_shading specification:
* rasterization is disabled, gl_SamplePosition will always be
* (0.5, 0.5).
*/
- bld.MOV(dst, fs_reg(0.5f));
+ bld.MOV(dst, brw_imm_f(0.5f));
}
}
abld.exec_all().group(1, 0)
.AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
- fs_reg(sspi_mask));
- abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5));
+ brw_imm_ud(sspi_mask));
+ abld.exec_all().group(1, 0).SHR(t1, t1, brw_imm_d(5));
/* This works for both SIMD8 and SIMD16 */
abld.exec_all().group(4, 0)
* "When rendering to a non-multisample buffer, or if multisample
* rasterization is disabled, gl_SampleID will always be zero."
*/
- abld.MOV(*reg, fs_reg(0));
+ abld.MOV(*reg, brw_imm_d(0));
}
return reg;
brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
assert(stage == MESA_SHADER_VERTEX);
- int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
- if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
- count++;
/* Each attribute is 4 regs. */
this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes;
}
}
+void
+fs_visitor::assign_tes_urb_setup()
+{
+ assert(stage == MESA_SHADER_TESS_EVAL);
+
+ brw_vue_prog_data *vue_prog_data = (brw_vue_prog_data *) prog_data;
+
+ first_non_payload_grf += 8 * vue_prog_data->urb_read_length;
+
+ /* Rewrite all ATTR file references to HW_REGs. */
+ foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ convert_attr_sources_to_hw_regs(inst);
+ }
+}
+
void
fs_visitor::assign_gs_urb_setup()
{
first_non_payload_grf +=
8 * vue_prog_data->urb_read_length * nir->info.gs.vertices_in;
- const unsigned first_icp_handle = payload.num_regs -
- (vue_prog_data->include_vue_handles ? nir->info.gs.vertices_in : 0);
-
foreach_block_and_inst(block, fs_inst, inst, cfg) {
- /* Lower URB_READ_SIMD8 opcodes into real messages. */
- if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8) {
- assert(inst->src[0].file == IMM);
- inst->src[0] = retype(brw_vec8_grf(first_icp_handle +
- inst->src[0].ud,
- 0), BRW_REGISTER_TYPE_UD);
- /* for now, assume constant - we can do per-slot offsets later */
- assert(inst->src[1].file == IMM);
- inst->offset = inst->src[1].ud;
- inst->src[1] = fs_reg();
- inst->mlen = 1;
- inst->base_mrf = -1;
- }
-
/* Rewrite all ATTR file references to GRFs. */
convert_attr_sources_to_hw_regs(inst);
}
/* Generate a pull load into dst. */
if (inst->src[i].reladdr) {
VARYING_PULL_CONSTANT_LOAD(ibld, dst,
- fs_reg(index),
+ brw_imm_ud(index),
*inst->src[i].reladdr,
- pull_index);
+ pull_index * 4);
inst->src[i].reladdr = NULL;
inst->src[i].stride = 1;
} else {
const fs_builder ubld = ibld.exec_all().group(8, 0);
- fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15);
+ struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- dst, fs_reg(index), offset);
+ dst, brw_imm_ud(index), offset);
inst->src[i].set_smear(pull_index & 3);
}
brw_mark_surface_used(prog_data, index);
if (inst->dst.type != inst->src[0].type)
assert(!"unimplemented: saturate mixed types");
- if (brw_saturate_immediate(inst->dst.type, &inst->src[0])) {
+ if (brw_saturate_immediate(inst->dst.type,
+ &inst->src[0].as_brw_reg())) {
inst->saturate = false;
progress = true;
}
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
if (depth == 0) {
inst->opcode = BRW_OPCODE_MOV;
- inst->src[0] = fs_reg(0u);
+ inst->src[0] = brw_imm_ud(0u);
inst->sources = 1;
inst->force_writemask_all = true;
progress = true;
continue;
if (devinfo->gen >= 7) {
- /* The offset arg before was a vec4-aligned byte offset. We need to
- * turn it into a dword offset.
- */
+ /* The offset arg is a vec4-aligned immediate byte offset. */
fs_reg const_offset_reg = inst->src[1];
assert(const_offset_reg.file == IMM &&
const_offset_reg.type == BRW_REGISTER_TYPE_UD);
- const_offset_reg.ud /= 4;
+ assert(const_offset_reg.ud % 16 == 0);
fs_reg payload, offset;
if (devinfo->gen >= 9) {
*/
assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
mul->src[1].type == BRW_REGISTER_TYPE_UD);
- mul->src[1].type = (type_is_signed(mul->src[1].type) ?
- BRW_REGISTER_TYPE_W : BRW_REGISTER_TYPE_UW);
+ mul->src[1].type = BRW_REGISTER_TYPE_UW;
mul->src[1].stride *= 2;
} else if (devinfo->gen == 7 && !devinfo->is_haswell &&
assert(devinfo->gen >= 9);
assert(bld.dispatch_width() != 16);
+ /* XXX: src_stencil is only available on gen9+. dst_depth is never
+ * available on gen9+. As such it's impossible to have both enabled at the
+ * same time and therefore length cannot overrun the array.
+ */
+ assert(length < 15);
+
sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD);
bld.exec_all().annotate("FB write OS")
.emit(FS_OPCODE_PACK_STENCIL_REF, sources[length],
(has_lod || shadow_c.file != BAD_FILE ||
(op == SHADER_OPCODE_TEX && bld.dispatch_width() == 8))) {
for (unsigned i = coord_components; i < 3; i++)
- bld.MOV(offset(msg_end, bld, i), fs_reg(0.0f));
+ bld.MOV(offset(msg_end, bld, i), brw_imm_f(0.0f));
msg_end = offset(msg_end, bld, 3 - coord_components);
}
/* There's no plain shadow compare message, so we use shadow
* compare with a bias of 0.0.
*/
- bld.MOV(msg_end, fs_reg(0.0f));
+ bld.MOV(msg_end, brw_imm_f(0.0f));
msg_end = offset(msg_end, bld, 1);
}
case SHADER_OPCODE_TXF_CMS:
msg_lod = offset(msg_coords, bld, 3);
/* lod */
- bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), fs_reg(0u));
+ bld.MOV(retype(msg_lod, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u));
/* sample index */
bld.MOV(retype(offset(msg_lod, bld, 1), BRW_REGISTER_TYPE_UD), sample_index);
msg_end = offset(msg_lod, bld, 2);
if (bld.shader->stage != MESA_SHADER_FRAGMENT &&
op == SHADER_OPCODE_TEX) {
op = SHADER_OPCODE_TXL;
- lod = fs_reg(0.0f);
+ lod = brw_imm_f(0.0f);
}
/* Set up the LOD info */
{
fs_builder ubld = bld.exec_all().group(8, 0);
const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);
- ubld.MOV(dst, fs_reg(0));
+ ubld.MOV(dst, brw_imm_d(0));
ubld.MOV(component(dst, 7), sample_mask);
return dst;
}
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
lower_surface_logical_send(ibld, inst,
SHADER_OPCODE_TYPED_SURFACE_READ,
- fs_reg(0xffff));
+ brw_imm_d(0xffff));
break;
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
case IMM:
unreachable("not reached");
}
+ if (inst->dst.stride != 1)
+ fprintf(file, "<%u>", inst->dst.stride);
fprintf(file, ":%s, ", brw_reg_type_letters(inst->dst.type));
for (int i = 0; i < inst->sources; i++) {
fprintf(file, "|");
if (inst->src[i].file != IMM) {
+ unsigned stride;
+ if (inst->src[i].file == ARF || inst->src[i].file == FIXED_GRF) {
+ unsigned hstride = inst->src[i].hstride;
+ stride = (hstride == 0 ? 0 : (1 << (hstride - 1)));
+ } else {
+ stride = inst->src[i].stride;
+ }
+ if (stride != 1)
+ fprintf(file, "<%u>", stride);
+
fprintf(file, ":%s", brw_reg_type_letters(inst->src[i].type));
}
return !failed;
}
+bool
+fs_visitor::run_tes()
+{
+ assert(stage == MESA_SHADER_TESS_EVAL);
+
+ /* R0: thread header, R1-3: gl_TessCoord.xyz, R4: URB handles */
+ payload.num_regs = 5;
+
+ if (shader_time_index >= 0)
+ emit_shader_time_begin();
+
+ emit_nir_code();
+
+ if (failed)
+ return false;
+
+ emit_urb_writes();
+
+ if (shader_time_index >= 0)
+ emit_shader_time_end();
+
+ calculate_cfg();
+
+ optimize();
+
+ assign_curb_setup();
+ assign_tes_urb_setup();
+
+ fixup_3src_null_dest();
+ allocate_registers();
+
+ return !failed;
+}
+
bool
fs_visitor::run_gs()
{
*/
if (gs_compile->control_data_header_size_bits <= 32) {
const fs_builder abld = bld.annotate("initialize control data bits");
- abld.MOV(this->control_data_bits, fs_reg(0u));
+ abld.MOV(this->control_data_bits, brw_imm_ud(0u));
}
}
void *mem_ctx,
const struct brw_wm_prog_key *key,
struct brw_wm_prog_data *prog_data,
- const nir_shader *shader,
+ const nir_shader *src_shader,
struct gl_program *prog,
int shader_time_index8, int shader_time_index16,
bool use_rep_send,
unsigned *final_assembly_size,
char **error_str)
{
+ nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+ shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
+ true);
+ shader = brw_postprocess_nir(shader, compiler->devinfo, true);
+
/* key->alpha_test_func means simulating alpha testing via discards,
* so the shader definitely kills pixels.
*/
}
fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base,
- v.promoted_constants, v.runtime_check_aads_emit, "FS");
+ v.promoted_constants, v.runtime_check_aads_emit,
+ MESA_SHADER_FRAGMENT);
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
g.enable_debug(ralloc_asprintf(mem_ctx, "%s fragment shader %s",
return g.get_assembly(final_assembly_size);
}
-void
-brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
- void *buffer, uint32_t threads, uint32_t stride)
-{
- if (prog_data->local_invocation_id_regs == 0)
- return;
-
- /* 'stride' should be an integer number of registers, that is, a multiple
- * of 32 bytes.
- */
- assert(stride % 32 == 0);
-
- unsigned x = 0, y = 0, z = 0;
- for (unsigned t = 0; t < threads; t++) {
- uint32_t *param = (uint32_t *) buffer + stride * t / 4;
-
- for (unsigned i = 0; i < prog_data->simd_size; i++) {
- param[0 * prog_data->simd_size + i] = x;
- param[1 * prog_data->simd_size + i] = y;
- param[2 * prog_data->simd_size + i] = z;
-
- x++;
- if (x == prog_data->local_size[0]) {
- x = 0;
- y++;
- if (y == prog_data->local_size[1]) {
- y = 0;
- z++;
- if (z == prog_data->local_size[2])
- z = 0;
- }
- }
- }
- }
-}
-
fs_reg *
fs_visitor::emit_cs_local_invocation_id_setup()
{
void *mem_ctx,
const struct brw_cs_prog_key *key,
struct brw_cs_prog_data *prog_data,
- const nir_shader *shader,
+ const nir_shader *src_shader,
int shader_time_index,
unsigned *final_assembly_size,
char **error_str)
{
+ nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
+ shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
+ true);
+ shader = brw_postprocess_nir(shader, compiler->devinfo, true);
+
prog_data->local_size[0] = shader->info.cs.local_size[0];
prog_data->local_size[1] = shader->info.cs.local_size[1];
prog_data->local_size[2] = shader->info.cs.local_size[2];
}
fs_generator g(compiler, log_data, mem_ctx, (void*) key, &prog_data->base,
- v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
+ v8.promoted_constants, v8.runtime_check_aads_emit,
+ MESA_SHADER_COMPUTE);
if (INTEL_DEBUG & DEBUG_CS) {
char *name = ralloc_asprintf(mem_ctx, "%s compute shader %s",
shader->info.label ? shader->info.label :
return g.get_assembly(final_assembly_size);
}
+
+void
+brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
+ void *buffer, uint32_t threads, uint32_t stride)
+{
+ if (prog_data->local_invocation_id_regs == 0)
+ return;
+
+ /* 'stride' should be an integer number of registers, that is, a multiple
+ * of 32 bytes.
+ */
+ assert(stride % 32 == 0);
+
+ unsigned x = 0, y = 0, z = 0;
+ for (unsigned t = 0; t < threads; t++) {
+ uint32_t *param = (uint32_t *) buffer + stride * t / 4;
+
+ for (unsigned i = 0; i < prog_data->simd_size; i++) {
+ param[0 * prog_data->simd_size + i] = x;
+ param[1 * prog_data->simd_size + i] = y;
+ param[2 * prog_data->simd_size + i] = z;
+
+ x++;
+ if (x == prog_data->local_size[0]) {
+ x = 0;
+ y++;
+ if (y == prog_data->local_size[1]) {
+ y = 0;
+ z++;
+ if (z == prog_data->local_size[2])
+ z = 0;
+ }
+ }
+ }
+ }
+}