emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
} else {
emit(FS_OPCODE_LINTERP, wpos,
- this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
- this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
+ this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
interp_reg(VARYING_SLOT_POS, 2));
}
wpos = offset(wpos, 1);
barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
}
return emit(FS_OPCODE_LINTERP, attr,
- this->delta_x[barycoord_mode],
- this->delta_y[barycoord_mode], interp);
+ this->delta_xy[barycoord_mode], interp);
}
void
*/
foreach_block_and_inst(block, fs_inst, inst, cfg) {
if (inst->opcode == FS_OPCODE_LINTERP) {
- assert(inst->src[2].file == HW_REG);
- inst->src[2].fixed_hw_reg.nr += urb_start;
+ assert(inst->src[1].file == HW_REG);
+ inst->src[1].fixed_hw_reg.nr += urb_start;
}
if (inst->opcode == FS_OPCODE_CINTERP) {
}
}
- /* Patch all the references to delta_x/delta_y, since they're used in
- * register allocation. If they're unused, switch them to BAD_FILE so
- * we don't think some random VGRF is delta_x/delta_y.
+ /* Patch all the references to delta_xy, since they're used in register
+ * allocation. If they're unused, switch them to BAD_FILE so we don't
+ * think some random VGRF is delta_xy.
*/
- for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
- if (delta_x[i].file == GRF) {
- if (remap_table[delta_x[i].reg] != -1) {
- delta_x[i].reg = remap_table[delta_x[i].reg];
+ for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
+ if (delta_xy[i].file == GRF) {
+ if (remap_table[delta_xy[i].reg] != -1) {
+ delta_xy[i].reg = remap_table[delta_xy[i].reg];
} else {
- delta_x[i].file = BAD_FILE;
- }
- }
- }
- for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
- if (delta_y[i].file == GRF) {
- if (remap_table[delta_y[i].reg] != -1) {
- delta_y[i].reg = remap_table[delta_y[i].reg];
- } else {
- delta_y[i].file = BAD_FILE;
+ delta_xy[i].file = BAD_FILE;
}
}
}
if (progress) {
invalidate_live_intervals();
- for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
- if (delta_x[i].file == GRF && remap[delta_x[i].reg] != -1) {
- delta_x[i].reg = remap[delta_x[i].reg];
- }
- }
- for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
- if (delta_y[i].file == GRF && remap[delta_y[i].reg] != -1) {
- delta_y[i].reg = remap[delta_y[i].reg];
+ for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
+ if (delta_xy[i].file == GRF && remap[delta_xy[i].reg] != -1) {
+ delta_xy[i].reg = remap[delta_xy[i].reg];
}
}
}
fs_reg pixel_y;
fs_reg wpos_w;
fs_reg pixel_w;
- fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
- fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
+ fs_reg delta_xy[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
fs_reg shader_start_time;
fs_reg userplane[MAX_CLIP_PLANES];
fs_generator::generate_linterp(fs_inst *inst,
struct brw_reg dst, struct brw_reg *src)
{
+ /* PLN reads:
+ * / in SIMD16 \
+ * -----------------------------------
+ * | src1+0 | src1+1 | src1+2 | src1+3 |
+ * |-----------------------------------|
+ * |(x0, x1)|(y0, y1)|(x2, x3)|(y2, y3)|
+ * -----------------------------------
+ *
+ * but for the LINE/MAC pair, the LINE reads Xs and the MAC reads Ys:
+ *
+ * -----------------------------------
+ * | src1+0 | src1+1 | src1+2 | src1+3 |
+ * |-----------------------------------|
+ * |(x0, x1)|(y0, y1)| | | in SIMD8
+ * |-----------------------------------|
+ * |(x0, x1)|(x2, x3)|(y0, y1)|(y2, y3)| in SIMD16
+ * -----------------------------------
+ *
+ * See also: emit_interpolation_setup_gen4().
+ */
struct brw_reg delta_x = src[0];
- struct brw_reg delta_y = src[1];
- struct brw_reg interp = src[2];
+ struct brw_reg delta_y = offset(src[0], dispatch_width / 8);
+ struct brw_reg interp = src[1];
if (brw->has_pln &&
- delta_y.nr == delta_x.nr + 1 &&
(brw->gen >= 7 || (delta_x.nr & 1) == 0)) {
brw_PLN(p, dst, interp, delta_x);
} else {
*/
no16("interpolate_at_* not yet supported in SIMD16 mode.");
- fs_reg dst_x = vgrf(2);
- fs_reg dst_y = offset(dst_x, 1);
+ fs_reg dst_xy = vgrf(2);
/* For most messages, we need one reg of ignored data; the hardware
* requires mlen==1 even when there is no payload. in the per-slot
switch (instr->intrinsic) {
case nir_intrinsic_interp_var_at_centroid:
- inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u));
+ inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u));
break;
case nir_intrinsic_interp_var_at_sample: {
nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
assert(const_sample);
unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
- inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src,
+ inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
fs_reg(msg_data));
break;
}
unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
- inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src,
+ inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
fs_reg(off_x | (off_y << 4)));
} else {
src = vgrf(glsl_type::ivec2_type);
}
mlen = 2;
- inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src,
+ inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
fs_reg(0u));
}
break;
fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
src.type = dest.type;
- emit(FS_OPCODE_LINTERP, dest, dst_x, dst_y, src);
+ emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
dest = offset(dest, 1);
}
break;
}
assert(reg == ra_reg_count);
- /* Add a special class for aligned pairs, which we'll put delta_x/y
+ /* Add a special class for aligned pairs, which we'll put delta_xy
* in on Gen <= 6 so that we can do PLN.
*/
if (devinfo->has_pln && reg_width == 1 && devinfo->gen <= 6) {
* second operand of a PLN instruction needs to be an
* even-numbered register, so we have a special register class
* wm_aligned_pairs_class to handle this case. pre-GEN6 always
- * uses this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] as the
+ * uses this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] as the
* second operand of a PLN instruction (since it doesn't support
* any other interpolation modes). So all we need to do is find
* that register and set it to the appropriate class.
*/
if (screen->wm_reg_sets[rsi].aligned_pairs_class >= 0 &&
- this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF &&
- this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) {
+ this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF &&
+ this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) {
c = screen->wm_reg_sets[rsi].aligned_pairs_class;
}
/* 1. collect interpolation factors */
- fs_reg dst_x = vgrf(glsl_type::get_instance(ir->type->base_type, 2, 1));
- fs_reg dst_y = offset(dst_x, 1);
+ fs_reg dst_xy = vgrf(glsl_type::get_instance(ir->type->base_type, 2, 1));
/* for most messages, we need one reg of ignored data; the hardware requires mlen==1
* even when there is no payload. in the per-slot offset case, we'll replace this with
switch (ir->operation) {
case ir_unop_interpolate_at_centroid:
- inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_x, src, fs_reg(0u));
+ inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u));
break;
case ir_binop_interpolate_at_sample: {
assert(sample_num || !"nonconstant sample number should have been lowered.");
unsigned msg_data = sample_num->value.i[0] << 4;
- inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_x, src, fs_reg(msg_data));
+ inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src, fs_reg(msg_data));
break;
}
if (const_offset) {
unsigned msg_data = pack_pixel_offset(const_offset->value.f[0]) |
(pack_pixel_offset(const_offset->value.f[1]) << 4);
- inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_x, src,
+ inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
fs_reg(msg_data));
} else {
/* pack the operands: hw wants offsets as 4 bit signed ints */
}
mlen = 2 * reg_width;
- inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_x, src,
+ inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
fs_reg(0u));
}
break;
for (int i = 0; i < ir->type->vector_elements; i++) {
int ch = swiz ? ((*(int *)&swiz->mask) >> 2*i) & 3 : i;
- emit(FS_OPCODE_LINTERP, res,
- dst_x, dst_y,
+ emit(FS_OPCODE_LINTERP, res, dst_xy,
fs_reg(interp_reg(var->data.location, ch)));
res = offset(res, 1);
}
fs_reg(brw_imm_v(0x11001100))));
this->current_annotation = "compute pixel deltas from v0";
- if (brw->has_pln) {
- this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
- vgrf(glsl_type::vec2_type);
- this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
- offset(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], 1);
+
+ this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
+ vgrf(glsl_type::vec2_type);
+ const fs_reg &delta_xy = this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
+ const fs_reg xstart(negate(brw_vec1_grf(1, 0)));
+ const fs_reg ystart(negate(brw_vec1_grf(1, 1)));
+
+ if (brw->has_pln && dispatch_width == 16) {
+ emit(ADD(half(offset(delta_xy, 0), 0), half(this->pixel_x, 0), xstart));
+ emit(ADD(half(offset(delta_xy, 0), 1), half(this->pixel_y, 0), ystart));
+ emit(ADD(half(offset(delta_xy, 1), 0), half(this->pixel_x, 1), xstart))
+ ->force_sechalf = true;
+ emit(ADD(half(offset(delta_xy, 1), 1), half(this->pixel_y, 1), ystart))
+ ->force_sechalf = true;
} else {
- this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
- vgrf(glsl_type::float_type);
- this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
- vgrf(glsl_type::float_type);
+ emit(ADD(offset(delta_xy, 0), this->pixel_x, xstart));
+ emit(ADD(offset(delta_xy, 1), this->pixel_y, ystart));
}
- emit(ADD(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
- this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))));
- emit(ADD(this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
- this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))));
this->current_annotation = "compute pos.w and 1/pos.w";
/* Compute wpos.w. It's always in our setup, since it's needed to
* interpolate the other attributes.
*/
this->wpos_w = vgrf(glsl_type::float_type);
- emit(FS_OPCODE_LINTERP, wpos_w,
- this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
- this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
- interp_reg(VARYING_SLOT_POS, 3));
+ emit(FS_OPCODE_LINTERP, wpos_w, delta_xy, interp_reg(VARYING_SLOT_POS, 3));
/* Compute the pixel 1/W value from wpos.w. */
this->pixel_w = vgrf(glsl_type::float_type);
emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
uint8_t reg = payload.barycentric_coord_reg[i];
- this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0));
- this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0));
+ this->delta_xy[i] = fs_reg(brw_vec16_grf(reg, 0));
}
this->current_annotation = NULL;
return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
+/** Construct float[16] general-purpose register */
+static inline struct brw_reg
+brw_vec16_grf(unsigned nr, unsigned subnr)
+{
+ return brw_vec16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
static inline struct brw_reg
brw_uw8_grf(unsigned nr, unsigned subnr)