}
#include "brw_shader.h"
#include "brw_fs.h"
-#include "../glsl/glsl_types.h"
-#include "../glsl/ir_print_visitor.h"
+#include "glsl/glsl_types.h"
+#include "glsl/ir_print_visitor.h"
#define MAX_INSTRUCTION (1 << 30)
return 0;
switch (inst->opcode) {
- case FS_OPCODE_RCP:
- case FS_OPCODE_RSQ:
- case FS_OPCODE_SQRT:
- case FS_OPCODE_EXP2:
- case FS_OPCODE_LOG2:
- case FS_OPCODE_SIN:
- case FS_OPCODE_COS:
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
return 1 * c->dispatch_width / 8;
- case FS_OPCODE_POW:
+ case SHADER_OPCODE_POW:
+ case SHADER_OPCODE_INT_QUOTIENT:
+ case SHADER_OPCODE_INT_REMAINDER:
return 2 * c->dispatch_width / 8;
- case FS_OPCODE_TEX:
+ case SHADER_OPCODE_TEX:
case FS_OPCODE_TXB:
- case FS_OPCODE_TXD:
- case FS_OPCODE_TXL:
+ case SHADER_OPCODE_TXD:
+ case SHADER_OPCODE_TXF:
+ case SHADER_OPCODE_TXL:
+ case SHADER_OPCODE_TXS:
return 1;
case FS_OPCODE_FB_WRITE:
return 2;
}
/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg)
+fs_reg::fs_reg(enum register_file file, int reg)
{
init();
this->file = file;
- this->hw_reg = hw_reg;
+ this->reg = reg;
this->type = BRW_REGISTER_TYPE_F;
}
/** Fixed HW reg constructor. */
-fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type)
+fs_reg::fs_reg(enum register_file file, int reg, uint32_t type)
{
init();
this->file = file;
- this->hw_reg = hw_reg;
+ this->reg = reg;
this->type = type;
}
assert(param < ARRAY_SIZE(c->prog_data.param));
- switch (type->base_type) {
- case GLSL_TYPE_FLOAT:
+ if (ctx->Const.NativeIntegers) {
c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
- break;
- case GLSL_TYPE_UINT:
- c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
- break;
- case GLSL_TYPE_INT:
- c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
- break;
- case GLSL_TYPE_BOOL:
- c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
- break;
- default:
- assert(!"not reached");
- c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
- break;
+ } else {
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+ break;
+ case GLSL_TYPE_UINT:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2U;
+ break;
+ case GLSL_TYPE_INT:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2I;
+ break;
+ case GLSL_TYPE_BOOL:
+ c->prog_data.param_convert[param] = PARAM_CONVERT_F2B;
+ break;
+ default:
+ assert(!"not reached");
+ c->prog_data.param_convert[param] = PARAM_NO_CONVERT;
+ break;
+ }
}
this->param_index[param] = loc;
this->param_offset[param] = i;
emit(BRW_OPCODE_MOV, wpos,
fs_reg(brw_vec8_grf(c->source_depth_reg, 0)));
} else {
- emit(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
- interp_reg(FRAG_ATTRIB_WPOS, 2));
+ emit(FS_OPCODE_LINTERP, wpos,
+ this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
+ this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
+ interp_reg(FRAG_ATTRIB_WPOS, 2));
}
wpos.reg_offset++;
fs_visitor::emit_general_interpolation(ir_variable *ir)
{
fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
- /* Interpolation is always in floating point regs. */
- reg->type = BRW_REGISTER_TYPE_F;
+ reg->type = brw_type_for_base_type(ir->type->get_scalar_type());
fs_reg attr = *reg;
unsigned int array_elements;
type = ir->type;
}
+ glsl_interp_qualifier interpolation_mode =
+ ir->determine_interpolation_mode(c->key.flat_shade);
+
int location = ir->location;
for (unsigned int i = 0; i < array_elements; i++) {
for (unsigned int j = 0; j < type->matrix_columns; j++) {
continue;
}
- bool is_gl_Color =
- location == FRAG_ATTRIB_COL0 || location == FRAG_ATTRIB_COL1;
-
- if (c->key.flat_shade && is_gl_Color) {
+ if (interpolation_mode == INTERP_QUALIFIER_FLAT) {
/* Constant interpolation (flat shading) case. The SF has
* handed us defined values in only the constant offset
* field of the setup reg.
for (unsigned int k = 0; k < type->vector_elements; k++) {
struct brw_reg interp = interp_reg(location, k);
interp = suboffset(interp, 3);
+ interp.type = reg->type;
emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
attr.reg_offset++;
}
} else {
- /* Perspective interpolation case. */
+ /* Smooth/noperspective interpolation case. */
for (unsigned int k = 0; k < type->vector_elements; k++) {
/* FINISHME: At some point we probably want to push
* this farther by giving similar treatment to the
emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f));
} else {
struct brw_reg interp = interp_reg(location, k);
- emit(FS_OPCODE_LINTERP, attr,
- this->delta_x, this->delta_y, fs_reg(interp));
+ brw_wm_barycentric_interp_mode barycoord_mode;
+ if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
+ barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
+ else
+ barycoord_mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
+ emit(FS_OPCODE_LINTERP, attr,
+ this->delta_x[barycoord_mode],
+ this->delta_y[barycoord_mode], fs_reg(interp));
}
attr.reg_offset++;
}
}
fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
{
switch (opcode) {
- case FS_OPCODE_RCP:
- case FS_OPCODE_RSQ:
- case FS_OPCODE_SQRT:
- case FS_OPCODE_EXP2:
- case FS_OPCODE_LOG2:
- case FS_OPCODE_SIN:
- case FS_OPCODE_COS:
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
break;
default:
assert(!"not reached: bad math opcode");
* expanding that result out, but we would need to be careful with
* masking.
*
- * The hardware ignores source modifiers (negate and abs) on math
+ * Gen 6 hardware ignores source modifiers (negate and abs) on math
* instructions, so we also move to a temp to set those up.
*/
- if (intel->gen >= 6 && (src.file == UNIFORM ||
+ if (intel->gen == 6 && (src.file == UNIFORM ||
src.abs ||
src.negate)) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
}
fs_inst *
-fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
{
int base_mrf = 2;
fs_inst *inst;
- assert(opcode == FS_OPCODE_POW);
+ switch (opcode) {
+ case SHADER_OPCODE_POW:
+ case SHADER_OPCODE_INT_QUOTIENT:
+ case SHADER_OPCODE_INT_REMAINDER:
+ break;
+ default:
+ assert(!"not reached: unsupported binary math opcode.");
+ return NULL;
+ }
- if (intel->gen >= 6) {
+ if (intel->gen >= 7) {
+ inst = emit(opcode, dst, src0, src1);
+ } else if (intel->gen == 6) {
/* Can't do hstride == 0 args to gen6 math, so expand it out.
*
* The hardware ignores source modifiers (negate and abs) on math
*/
if (src0.file == UNIFORM || src0.abs || src0.negate) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
+ expanded.type = src0.type;
emit(BRW_OPCODE_MOV, expanded, src0);
src0 = expanded;
}
if (src1.file == UNIFORM || src1.abs || src1.negate) {
fs_reg expanded = fs_reg(this, glsl_type::float_type);
+ expanded.type = src1.type;
emit(BRW_OPCODE_MOV, expanded, src1);
src1 = expanded;
}
inst = emit(opcode, dst, src0, src1);
} else {
- emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1);
- inst = emit(opcode, dst, src0, reg_null_f);
+ /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
+ * "Message Payload":
+ *
+ * "Operand0[7]. For the INT DIV functions, this operand is the
+ * denominator."
+ * ...
+ * "Operand1[7]. For the INT DIV functions, this operand is the
+ * numerator."
+ */
+ bool is_int_div = opcode != SHADER_OPCODE_POW;
+ fs_reg &op0 = is_int_div ? src1 : src0;
+ fs_reg &op1 = is_int_div ? src0 : src1;
+
+ emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1, op1.type), op1);
+ inst = emit(opcode, dst, op0, reg_null_f);
inst->base_mrf = base_mrf;
inst->mlen = 2 * c->dispatch_width / 8;
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == UNIFORM) {
- int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+ int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs +
constant_nr / 8,
constant_nr % 8);
/* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) {
if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
- int fp_index;
-
- if (i >= VERT_RESULT_VAR0)
- fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
- else if (i <= VERT_RESULT_TEX7)
- fp_index = i;
- else
- fp_index = -1;
+ int fp_index = _mesa_vert_result_to_frag_attrib((gl_vert_result) i);
if (fp_index >= 0)
urb_setup[fp_index] = urb_next++;
split_grf[i] = false;
}
- if (brw->has_pln) {
- /* PLN opcodes rely on the delta_xy being contiguous. */
- split_grf[this->delta_x.reg] = false;
+ if (brw->has_pln &&
+ this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF) {
+ /* PLN opcodes rely on the delta_xy being contiguous. We only have to
+ * check this for BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC, because prior to
+ * Gen6, that was the only supported interpolation mode, and since Gen6,
+ * delta_x and delta_y are in fixed hardware registers.
+ */
+ split_grf[this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg] =
+ false;
}
foreach_list(node, &this->instructions) {
fs_inst *inst = (fs_inst *)node;
for (int i = 0; i < 3; i++) {
- int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+ int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
if (inst->src[i].file != UNIFORM)
continue;
fs_inst *inst = (fs_inst *)node;
for (int i = 0; i < 3; i++) {
- int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+ int constant_nr = inst->src[i].reg + inst->src[i].reg_offset;
if (inst->src[i].file != UNIFORM)
continue;
assert(this->params_remap[constant_nr] != -1);
- inst->src[i].hw_reg = this->params_remap[constant_nr];
+ inst->src[i].reg = this->params_remap[constant_nr];
inst->src[i].reg_offset = 0;
}
}
if (inst->src[i].file != UNIFORM)
continue;
- int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+ int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset;
if (uniform_nr < pull_uniform_base)
continue;
scan_inst->src[i] = inst->src[0];
progress = true;
} else if (i == 0 && scan_inst->src[1].file != IMM) {
- /* Fit this constant in by commuting the operands */
+ /* Fit this constant in by commuting the operands.
+ * Exception: we can't do this for 32-bit integer MUL
+ * because it's asymmetric.
+ */
+ if (scan_inst->opcode == BRW_OPCODE_MUL &&
+ (scan_inst->src[1].type == BRW_REGISTER_TYPE_D ||
+ scan_inst->src[1].type == BRW_REGISTER_TYPE_UD))
+ break;
scan_inst->src[0] = scan_inst->src[1];
scan_inst->src[1] = inst->src[0];
progress = true;
break;
case BRW_OPCODE_CMP:
+ case BRW_OPCODE_IF:
if (i == 1) {
scan_inst->src[i] = inst->src[0];
progress = true;
}
break;
- case FS_OPCODE_RCP:
+ case SHADER_OPCODE_RCP:
/* The hardware doesn't do math on immediate values
* (because why are you doing that, seriously?), but
* the correct answer is to just constant fold it
progress = true;
}
break;
+
+ default:
+ break;
}
}
break;
}
+ break;
+ default:
break;
}
}
case BRW_OPCODE_ENDIF:
if_depth--;
break;
+ default:
+ break;
}
if (loop_depth || if_depth)
continue;
interfered = true;
break;
}
+
+ /* The accumulator result appears to get used for the
+ * conditional modifier generation. When negating a UD
+ * value, there is a 33rd bit generated for the sign in the
+ * accumulator value, so now you can't check, for example,
+ * equality with a 32-bit value. See piglit fs-op-neg-uint.
+ */
+ if (scan_inst->conditional_mod &&
+ inst->src[0].negate &&
+ inst->src[0].type == BRW_REGISTER_TYPE_UD) {
+ interfered = true;
+ break;
+ }
}
if (interfered) {
continue;
scan_inst->src[i].reg == inst->dst.reg &&
scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
fs_reg new_src = inst->src[0];
+ if (scan_inst->src[i].abs) {
+ new_src.negate = 0;
+ new_src.abs = 1;
+ }
new_src.negate ^= scan_inst->src[i].negate;
- new_src.abs |= scan_inst->src[i].abs;
scan_inst->src[i] = new_src;
}
}
/* Work out which hardware MRF registers are written by this
* instruction.
*/
- int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4;
int mrf_high;
- if (inst->dst.hw_reg & BRW_MRF_COMPR4) {
+ if (inst->dst.reg & BRW_MRF_COMPR4) {
mrf_high = mrf_low + 4;
} else if (c->dispatch_width == 16 &&
(!inst->force_uncompressed && !inst->force_sechalf)) {
if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
/* Found the creator of our MRF's source value. */
scan_inst->dst.file = MRF;
- scan_inst->dst.hw_reg = inst->dst.hw_reg;
+ scan_inst->dst.reg = inst->dst.reg;
scan_inst->saturate |= inst->saturate;
inst->remove();
progress = true;
/* If somebody else writes our MRF here, we can't
* compute-to-MRF before that.
*/
- int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4;
+ int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4;
int scan_mrf_high;
- if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) {
+ if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
scan_mrf_high = scan_mrf_low + 4;
} else if (c->dispatch_width == 16 &&
(!scan_inst->force_uncompressed &&
if (inst->opcode == BRW_OPCODE_MOV &&
inst->dst.file == MRF) {
- fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg];
+ fs_inst *prev_inst = last_mrf_move[inst->dst.reg];
if (prev_inst && inst->equals(prev_inst)) {
inst->remove();
progress = true;
/* Clear out the last-write records for MRFs that were overwritten. */
if (inst->dst.file == MRF) {
- last_mrf_move[inst->dst.hw_reg] = NULL;
+ last_mrf_move[inst->dst.reg] = NULL;
}
if (inst->mlen > 0) {
inst->dst.file == MRF &&
inst->src[0].file == GRF &&
!inst->predicated) {
- last_mrf_move[inst->dst.hw_reg] = inst;
+ last_mrf_move[inst->dst.reg] = inst;
}
}
/* Make sure we didn't try to sneak in an extra uniform */
assert(orig_nr_params == c->prog_data.nr_params);
+ (void) orig_nr_params;
}
return !failed;
fs_visitor v(c, prog, shader);
if (!v.run()) {
- prog->LinkStatus = GL_FALSE;
- prog->InfoLog = ralloc_strdup(prog, v.fail_msg);
+ prog->LinkStatus = false;
+ ralloc_strcat(&prog->InfoLog, v.fail_msg);
return false;
}
{
struct brw_context *brw = brw_context(ctx);
struct brw_wm_prog_key key;
- struct gl_fragment_program *fp = prog->FragmentProgram;
- struct brw_fragment_program *bfp = brw_fragment_program(fp);
- if (!fp)
+ if (!prog->_LinkedShaders[MESA_SHADER_FRAGMENT])
return true;
+ struct gl_fragment_program *fp = (struct gl_fragment_program *)
+ prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
+ struct brw_fragment_program *bfp = brw_fragment_program(fp);
+
memset(&key, 0, sizeof(key));
if (fp->UsesKill)
key.vp_outputs_written |= BITFIELD64_BIT(FRAG_ATTRIB_WPOS);
for (int i = 0; i < FRAG_ATTRIB_MAX; i++) {
- int vp_index = -1;
-
if (!(fp->Base.InputsRead & BITFIELD64_BIT(i)))
continue;
key.proj_attrib_mask |= 1 << i;
- if (i <= FRAG_ATTRIB_TEX7)
- vp_index = i;
- else if (i >= FRAG_ATTRIB_VAR0)
- vp_index = i - FRAG_ATTRIB_VAR0 + VERT_RESULT_VAR0;
+ int vp_index = _mesa_vert_result_to_frag_attrib((gl_vert_result) i);
if (vp_index >= 0)
key.vp_outputs_written |= BITFIELD64_BIT(vp_index);
for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) {
if (fp->Base.ShadowSamplers & (1 << i))
- key.compare_funcs[i] = GL_LESS;
+ key.tex.compare_funcs[i] = GL_LESS;
/* FINISHME: depth compares might use (0,0,0,W) for example */
- key.tex_swizzles[i] = SWIZZLE_XYZW;
+ key.tex.swizzles[i] = SWIZZLE_XYZW;
}
if (fp->Base.InputsRead & FRAG_BIT_WPOS) {