} /* extern "C" */
#include "brw_fs.h"
-#include "../glsl/ir_print_visitor.h"
+#include "glsl/ir_print_visitor.h"
void
fs_visitor::generate_fb_write(fs_inst *inst)
{
- GLboolean eot = inst->eot;
+ bool eot = inst->eot;
struct brw_reg implied_header;
/* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
}
void
-fs_visitor::generate_math(fs_inst *inst,
- struct brw_reg dst, struct brw_reg *src)
+fs_visitor::generate_math1_gen7(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0)
{
- int op = brw_math_function(inst->opcode);
+ assert(inst->mlen == 0);
+ brw_math(p, dst,
+ brw_math_function(inst->opcode),
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE
+ : BRW_MATH_SATURATE_NONE,
+ 0, src0,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+}
- if (intel->gen >= 6) {
- assert(inst->mlen == 0);
+void
+fs_visitor::generate_math2_gen7(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ assert(inst->mlen == 0);
+ brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1);
+}
- if (inst->opcode == SHADER_OPCODE_POW) {
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math2(p, dst, op, src[0], src[1]);
+void
+fs_visitor::generate_math1_gen6(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0)
+{
+ int op = brw_math_function(inst->opcode);
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1]));
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
- } else {
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math(p, dst,
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- 0, src[0],
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
-
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math(p, sechalf(dst),
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- 0, sechalf(src[0]),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
- }
- } else /* gen <= 5 */{
- assert(inst->mlen >= 1);
+ assert(inst->mlen == 0);
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_math(p, dst,
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p, dst,
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ 0, src0,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p, sechalf(dst),
op,
inst->saturate ? BRW_MATH_SATURATE_SATURATE :
BRW_MATH_SATURATE_NONE,
- inst->base_mrf, src[0],
+ 0, sechalf(src0),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
+}
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_math(p, sechalf(dst),
- op,
- inst->saturate ? BRW_MATH_SATURATE_SATURATE :
- BRW_MATH_SATURATE_NONE,
- inst->base_mrf + 1, sechalf(src[0]),
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
+void
+fs_visitor::generate_math2_gen6(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
+{
+ int op = brw_math_function(inst->opcode);
- brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
- }
+ assert(inst->mlen == 0);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math2(p, dst, op, src0, src1);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+ }
+}
+
+void
+fs_visitor::generate_math_gen4(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ int op = brw_math_function(inst->opcode);
+
+ assert(inst->mlen >= 1);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_math(p, dst,
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf, src,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ if (c->dispatch_width == 16) {
+ brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+ brw_math(p, sechalf(dst),
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf + 1, sechalf(src),
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+
+ brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
}
}
int msg_type = -1;
int rlen = 4;
uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+ uint32_t return_format;
+
+ switch (dst.type) {
+ case BRW_REGISTER_TYPE_D:
+ return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
+ break;
+ case BRW_REGISTER_TYPE_UD:
+ return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
+ break;
+ default:
+ return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+ break;
+ }
if (c->dispatch_width == 16)
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
if (intel->gen >= 5) {
switch (inst->opcode) {
- case FS_OPCODE_TEX:
+ case SHADER_OPCODE_TEX:
if (inst->shadow_compare) {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
} else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
}
break;
- case FS_OPCODE_TXL:
+ case SHADER_OPCODE_TXL:
if (inst->shadow_compare) {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
} else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
}
break;
- case FS_OPCODE_TXD:
+ case SHADER_OPCODE_TXS:
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
+ break;
+ case SHADER_OPCODE_TXD:
/* There is no sample_d_c message; comparisons are done manually */
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
break;
+ case SHADER_OPCODE_TXF:
+ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
+ break;
default:
assert(!"not reached");
break;
}
} else {
switch (inst->opcode) {
- case FS_OPCODE_TEX:
+ case SHADER_OPCODE_TEX:
/* Note that G45 and older determines shadow compare and dispatch width
* from message length for most messages.
*/
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
}
break;
- case FS_OPCODE_TXL:
+ case SHADER_OPCODE_TXL:
if (inst->shadow_compare) {
assert(inst->mlen == 6);
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
}
break;
- case FS_OPCODE_TXD:
+ case SHADER_OPCODE_TXD:
/* There is no sample_d_c message; comparisons are done manually */
assert(inst->mlen == 7 || inst->mlen == 10);
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
break;
+ case SHADER_OPCODE_TXF:
+ assert(inst->mlen == 9);
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ break;
+ case SHADER_OPCODE_TXS:
+ assert(inst->mlen == 3);
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO;
+ simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+ break;
default:
assert(!"not reached");
break;
msg_type,
rlen,
inst->mlen,
- 0,
inst->header_present,
- simd_mode);
+ simd_mode,
+ return_format);
}
}
}
+static uint32_t brw_file_from_reg(fs_reg *reg)
+{
+ switch (reg->file) {
+ case ARF:
+ return BRW_ARCHITECTURE_REGISTER_FILE;
+ case GRF:
+ return BRW_GENERAL_REGISTER_FILE;
+ case MRF:
+ return BRW_MESSAGE_REGISTER_FILE;
+ case IMM:
+ return BRW_IMMEDIATE_VALUE;
+ default:
+ assert(!"not reached");
+ return BRW_GENERAL_REGISTER_FILE;
+ }
+}
+
static struct brw_reg
brw_reg_from_fs_reg(fs_reg *reg)
{
case ARF:
case MRF:
if (reg->smear == -1) {
- brw_reg = brw_vec8_reg(reg->file, reg->reg, 0);
+ brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0);
} else {
- brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear);
+ brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, reg->smear);
}
brw_reg = retype(brw_reg, reg->type);
if (reg->sechalf)
const char *last_annotation_string = NULL;
ir_instruction *last_annotation_ir = NULL;
- int loop_stack_array_size = 16;
- int loop_stack_depth = 0;
- brw_instruction **loop_stack =
- rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
- int *if_depth_in_loop =
- rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
-
-
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
printf("Native code for fragment shader %d (%d-wide dispatch):\n",
prog->Name, c->dispatch_width);
for (unsigned int i = 0; i < 3; i++) {
src[i] = brw_reg_from_fs_reg(&inst->src[i]);
+
+ /* The accumulator result appears to get used for the
+ * conditional modifier generation. When negating a UD
+ * value, there is a 33rd bit generated for the sign in the
+ * accumulator value, so now you can't check, for example,
+ * equality with a 32-bit value. See piglit fs-op-neg-uvec4.
+ */
+ assert(!inst->conditional_mod ||
+ inst->src[i].type != BRW_REGISTER_TYPE_UD ||
+ !inst->src[i].negate);
}
dst = brw_reg_from_fs_reg(&inst->dst);
} else {
brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
}
- if_depth_in_loop[loop_stack_depth]++;
break;
case BRW_OPCODE_ELSE:
break;
case BRW_OPCODE_ENDIF:
brw_ENDIF(p);
- if_depth_in_loop[loop_stack_depth]--;
break;
case BRW_OPCODE_DO:
- loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
- if (loop_stack_array_size <= loop_stack_depth) {
- loop_stack_array_size *= 2;
- loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
- loop_stack_array_size);
- if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
- loop_stack_array_size);
- }
- if_depth_in_loop[loop_stack_depth] = 0;
+ brw_DO(p, BRW_EXECUTE_8);
break;
case BRW_OPCODE_BREAK:
- brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
+ brw_BREAK(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case BRW_OPCODE_CONTINUE:
/* FINISHME: We need to write the loop instruction support still. */
if (intel->gen >= 6)
- gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
+ gen6_CONT(p);
else
- brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+ brw_CONT(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
- case BRW_OPCODE_WHILE: {
- struct brw_instruction *inst0, *inst1;
- GLuint br = 1;
-
- if (intel->gen >= 5)
- br = 2;
-
- assert(loop_stack_depth > 0);
- loop_stack_depth--;
- inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
- if (intel->gen < 6) {
- /* patch all the BREAK/CONT instructions from last BGNLOOP */
- while (inst0 > loop_stack[loop_stack_depth]) {
- inst0--;
- if (inst0->header.opcode == BRW_OPCODE_BREAK &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
- }
- else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
- inst0->bits3.if_else.jump_count == 0) {
- inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
- }
- }
- }
- }
+ case BRW_OPCODE_WHILE:
+ brw_WHILE(p);
break;
case SHADER_OPCODE_RCP:
case SHADER_OPCODE_SQRT:
case SHADER_OPCODE_EXP2:
case SHADER_OPCODE_LOG2:
- case SHADER_OPCODE_POW:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
- generate_math(inst, dst, src);
+ if (intel->gen >= 7) {
+ generate_math1_gen7(inst, dst, src[0]);
+ } else if (intel->gen == 6) {
+ generate_math1_gen6(inst, dst, src[0]);
+ } else {
+ generate_math_gen4(inst, dst, src[0]);
+ }
+ break;
+ case SHADER_OPCODE_INT_QUOTIENT:
+ case SHADER_OPCODE_INT_REMAINDER:
+ case SHADER_OPCODE_POW:
+ if (intel->gen >= 7) {
+ generate_math2_gen7(inst, dst, src[0], src[1]);
+ } else if (intel->gen == 6) {
+ generate_math2_gen6(inst, dst, src[0], src[1]);
+ } else {
+ generate_math_gen4(inst, dst, src[0]);
+ }
break;
case FS_OPCODE_PIXEL_X:
generate_pixel_xy(dst, true);
case FS_OPCODE_LINTERP:
generate_linterp(inst, dst, src);
break;
- case FS_OPCODE_TEX:
+ case SHADER_OPCODE_TEX:
case FS_OPCODE_TXB:
- case FS_OPCODE_TXD:
- case FS_OPCODE_TXL:
+ case SHADER_OPCODE_TXD:
+ case SHADER_OPCODE_TXF:
+ case SHADER_OPCODE_TXL:
+ case SHADER_OPCODE_TXS:
generate_tex(inst, dst, src[0]);
break;
case FS_OPCODE_DISCARD:
printf("\n");
}
- ralloc_free(loop_stack);
- ralloc_free(if_depth_in_loop);
-
brw_set_uip_jip(p);
/* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS