case SHADER_OPCODE_LOG2:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
- return 1 * c->dispatch_width / 8;
+ return 1 * dispatch_width / 8;
case SHADER_OPCODE_POW:
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
- return 2 * c->dispatch_width / 8;
+ return 2 * dispatch_width / 8;
case SHADER_OPCODE_TEX:
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXD:
if (intel->gen < 6) {
inst->base_mrf = 2;
- inst->mlen = c->dispatch_width / 8;
+ inst->mlen = dispatch_width / 8;
}
return inst;
inst = emit(opcode, dst, op0, reg_null_f);
inst->base_mrf = base_mrf;
- inst->mlen = 2 * c->dispatch_width / 8;
+ inst->mlen = 2 * dispatch_width / 8;
}
return inst;
}
void
fs_visitor::setup_paramvalues_refs()
{
- if (c->dispatch_width != 8)
+ if (dispatch_width != 8)
return;
/* Set up the pointers to ParamValues now that that array is finalized. */
fs_visitor::assign_curb_setup()
{
c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
- if (c->dispatch_width == 8) {
+ if (dispatch_width == 8) {
c->prog_data.first_curbe_grf = c->nr_payload_regs;
} else {
c->prog_data.first_curbe_grf_16 = c->nr_payload_regs;
bool
fs_visitor::remove_dead_constants()
{
- if (c->dispatch_width == 8) {
+ if (dispatch_width == 8) {
this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
if (c->prog_data.nr_params <= max_uniform_components)
return;
- if (c->dispatch_width == 16) {
+ if (dispatch_width == 16) {
fail("Pull constants not supported in 16-wide\n");
return;
}
int mrf_high;
if (inst->dst.reg & BRW_MRF_COMPR4) {
mrf_high = mrf_low + 4;
- } else if (c->dispatch_width == 16 &&
+ } else if (dispatch_width == 16 &&
(!inst->force_uncompressed && !inst->force_sechalf)) {
mrf_high = mrf_low + 1;
} else {
if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
scan_mrf_high = scan_mrf_low + 4;
- } else if (c->dispatch_width == 16 &&
+ } else if (dispatch_width == 16 &&
(!scan_inst->force_uncompressed &&
!scan_inst->force_sechalf)) {
scan_mrf_high = scan_mrf_low + 1;
bool progress = false;
/* Need to update the MRF tracking for compressed instructions. */
- if (c->dispatch_width == 16)
+ if (dispatch_width == 16)
return false;
memset(last_mrf_move, 0, sizeof(last_mrf_move));
if (barycentric_interp_modes & (1 << i)) {
c->barycentric_coord_reg[i] = c->nr_payload_regs;
c->nr_payload_regs += 2;
- if (c->dispatch_width == 16) {
+ if (dispatch_width == 16) {
c->nr_payload_regs += 2;
}
}
if (uses_depth) {
c->source_depth_reg = c->nr_payload_regs;
c->nr_payload_regs++;
- if (c->dispatch_width == 16) {
+ if (dispatch_width == 16) {
/* R28: interpolated depth if not 8-wide. */
c->nr_payload_regs++;
}
if (uses_depth) {
c->source_w_reg = c->nr_payload_regs;
c->nr_payload_regs++;
- if (c->dispatch_width == 16) {
+ if (dispatch_width == 16) {
/* R30: interpolated W if not 8-wide. */
c->nr_payload_regs++;
}
else
setup_payload_gen4();
- if (c->dispatch_width == 16) {
+ if (dispatch_width == 16) {
/* We have to do a compaction pass now, or the one at the end of
* execution will squash down where our prog_offset start needs
* to be.
generate_code();
- if (c->dispatch_width == 8) {
+ if (dispatch_width == 8) {
c->prog_data.reg_blocks = brw_register_blocks(grf_used);
} else {
c->prog_data.reg_blocks_16 = brw_register_blocks(grf_used);
/* Now the main event: Visit the shader IR and generate our FS IR for it.
*/
- c->dispatch_width = 8;
-
- fs_visitor v(c, prog, shader);
+ fs_visitor v(c, prog, shader, 8);
if (!v.run()) {
prog->LinkStatus = false;
ralloc_strcat(&prog->InfoLog, v.fail_msg);
}
if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) {
- c->dispatch_width = 16;
- fs_visitor v2(c, prog, shader);
+ fs_visitor v2(c, prog, shader, 16);
v2.import_uniforms(&v);
if (!v2.run()) {
perf_debug("16-wide shader failed to compile, falling back to "
public:
fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog,
- struct brw_shader *shader);
+ struct brw_shader *shader, unsigned dispatch_width);
~fs_visitor();
fs_reg *variable_storage(ir_variable *var);
int grf_used;
+ const unsigned dispatch_width; /**< 8 or 16 */
+
int force_uncompressed_stack;
int force_sechalf_stack;
};
if (this->dual_src_output.file != BAD_FILE)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
- else if (c->dispatch_width == 16)
+ else if (dispatch_width == 16)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
else
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
brw_pop_insn_state(p);
brw_fb_WRITE(p,
- c->dispatch_width,
+ dispatch_width,
inst->base_mrf,
implied_header,
msg_control,
deltas = brw_imm_v(0x11001100);
}
- if (c->dispatch_width == 16) {
+ if (dispatch_width == 16) {
dst = vec16(dst);
}
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
- if (c->dispatch_width == 16) {
+ if (dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_math(p, sechalf(dst),
op,
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math2(p, dst, op, src0, src1);
- if (c->dispatch_width == 16) {
+ if (dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
- if (c->dispatch_width == 16) {
+ if (dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_math(p, sechalf(dst),
op,
break;
}
- if (c->dispatch_width == 16)
+ if (dispatch_width == 16)
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
if (intel->gen >= 5) {
/* Note that G45 and older determines shadow compare and dispatch width
* from message length for most messages.
*/
- assert(c->dispatch_width == 8);
+ assert(dispatch_width == 8);
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
if (inst->shadow_compare) {
assert(inst->mlen == 6);
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
if (shader) {
printf("Native code for fragment shader %d (%d-wide dispatch):\n",
- prog->Name, c->dispatch_width);
+ prog->Name, dispatch_width);
} else {
printf("Native code for fragment program %d (%d-wide dispatch):\n",
- c->fp->program.Base.Id, c->dispatch_width);
+ c->fp->program.Base.Id, dispatch_width);
}
}
brw_set_predicate_inverse(p, inst->predicate_inverse);
brw_set_saturate(p, inst->saturate);
- if (inst->force_uncompressed || c->dispatch_width == 8) {
+ if (inst->force_uncompressed || dispatch_width == 8) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
} else if (inst->force_sechalf) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
case BRW_OPCODE_MAD:
brw_set_access_mode(p, BRW_ALIGN_16);
- if (c->dispatch_width == 16) {
+ if (dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MAD(p, dst, src[0], src[1], src[2]);
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
assert(intel->gen == 6);
gen6_IF(p, inst->conditional_mod, src[0], src[1]);
} else {
- brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
+ brw_IF(p, dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);
}
break;
emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
BRW_CONDITIONAL_L));
- if (intel->gen < 6 && c->dispatch_width == 16)
+ if (intel->gen < 6 && dispatch_width == 16)
fail("Can't support (non-uniform) control flow on 16-wide");
emit(IF(BRW_PREDICATE_NORMAL));
emit(FS_OPCODE_DISCARD);
fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
/* PROGRAM_STATE_VAR etc. */
- if (c->dispatch_width == 8) {
+ if (dispatch_width == 8) {
for (unsigned p = 0;
p < c->fp->program.Base.Parameters->NumParameters; p++) {
for (unsigned int i = 0; i < 4; i++) {
* so our second half values in g6 got overwritten in the first
* half.
*/
- if (c->dispatch_width == 16 && (this->pixel_x.reg == a ||
- this->pixel_x.reg == b ||
- this->pixel_y.reg == a ||
- this->pixel_y.reg == b)) {
+ if (dispatch_width == 16 && (this->pixel_x.reg == a ||
+ this->pixel_x.reg == b ||
+ this->pixel_y.reg == a ||
+ this->pixel_y.reg == b)) {
return start <= end;
}
{
int hw_reg_mapping[this->virtual_grf_count + 1];
int i;
- int reg_width = c->dispatch_width / 8;
+ int reg_width = dispatch_width / 8;
/* Note that compressed instructions require alignment to 2 registers. */
hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
int payload_node_count,
int first_payload_node)
{
- int reg_width = c->dispatch_width / 8;
+ int reg_width = dispatch_width / 8;
int loop_depth = 0;
int loop_end_ip = 0;
fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)
{
int mrf_count = BRW_MAX_GRF - GEN7_MRF_HACK_START;
- int reg_width = c->dispatch_width / 8;
+ int reg_width = dispatch_width / 8;
/* Identify all the MRFs used in the program. */
bool mrf_used[mrf_count];
* registers it's allocating be contiguous physical pairs of regs
* for reg_width == 2.
*/
- int reg_width = c->dispatch_width / 8;
+ int reg_width = dispatch_width / 8;
int hw_reg_mapping[this->virtual_grf_count];
int payload_node_count = (ALIGN(this->first_non_payload_grf, reg_width) /
reg_width);
if (reg == -1) {
fail("no register to spill\n");
- } else if (c->dispatch_width == 16) {
+ } else if (dispatch_width == 16) {
fail("Failure to register allocate. Reduce number of live scalar "
"values to avoid this.");
} else {
bool
instruction_scheduler::is_compressed(fs_inst *inst)
{
- return (v->c->dispatch_width == 16 &&
+ return (v->dispatch_width == 16 &&
!inst->force_uncompressed &&
!inst->force_sechalf);
}
if (ir->uniform_block != -1)
return;
- if (c->dispatch_width == 16) {
+ if (dispatch_width == 16) {
if (!variable_storage(ir)) {
fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
}
* FINISHME: Emit just the MUL if we know an operand is small
* enough.
*/
- if (intel->gen >= 7 && c->dispatch_width == 16)
+ if (intel->gen >= 7 && dispatch_width == 16)
fail("16-wide explicit accumulator operands unsupported\n");
struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
}
break;
case ir_binop_div:
- if (intel->gen >= 7 && c->dispatch_width == 16)
+ if (intel->gen >= 7 && dispatch_width == 16)
fail("16-wide INTDIV unsupported\n");
/* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
break;
case ir_binop_mod:
- if (intel->gen >= 7 && c->dispatch_width == 16)
+ if (intel->gen >= 7 && dispatch_width == 16)
fail("16-wide INTDIV unsupported\n");
/* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
{
int mlen = 0;
int base_mrf = 2;
- int reg_width = c->dispatch_width / 8;
+ int reg_width = dispatch_width / 8;
bool header_present = false;
const int vector_elements =
ir->coordinate ? ir->coordinate->type->vector_elements : 0;
{
int mlen = 0;
int base_mrf = 2;
- int reg_width = c->dispatch_width / 8;
+ int reg_width = dispatch_width / 8;
bool header_present = false;
int offsets[3];
mlen += reg_width;
break;
case ir_txd: {
- if (c->dispatch_width == 16)
+ if (dispatch_width == 16)
fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
/* Load dPdx and the coordinate together:
0
};
- if (c->dispatch_width == 16) {
+ if (dispatch_width == 16) {
fail("rectangle scale uniform setup not supported on 16-wide\n");
return coordinate;
}
void
fs_visitor::visit(ir_if *ir)
{
- if (intel->gen < 6 && c->dispatch_width == 16) {
+ if (intel->gen < 6 && dispatch_width == 16) {
fail("Can't support (non-uniform) control flow on 16-wide\n");
}
{
fs_reg counter = reg_undef;
- if (intel->gen < 6 && c->dispatch_width == 16) {
+ if (intel->gen < 6 && dispatch_width == 16) {
fail("Can't support (non-uniform) control flow on 16-wide\n");
}
void
fs_visitor::emit_dummy_fs()
{
- int reg_width = c->dispatch_width / 8;
+ int reg_width = dispatch_width / 8;
/* Everyone's favorite color. */
emit(MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)));
void
fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
{
- int reg_width = c->dispatch_width / 8;
+ int reg_width = dispatch_width / 8;
fs_inst *inst;
fs_reg color = outputs[target];
fs_reg mrf;
color.reg_offset += index;
- if (c->dispatch_width == 8 || intel->gen >= 6) {
+ if (dispatch_width == 8 || intel->gen >= 6) {
/* SIMD8 write looks like:
* m + 0: r0
* m + 1: r1
*/
int base_mrf = 1;
int nr = base_mrf;
- int reg_width = c->dispatch_width / 8;
+ int reg_width = dispatch_width / 8;
bool do_dual_src = this->dual_src_output.file != BAD_FILE;
bool src0_alpha_to_render_target = false;
- if (c->dispatch_width == 16 && do_dual_src) {
+ if (dispatch_width == 16 && do_dual_src) {
fail("GL_ARB_blend_func_extended not yet supported in 16-wide.");
do_dual_src = false;
}
nr += reg_width;
if (c->source_depth_to_render_target) {
- if (intel->gen == 6 && c->dispatch_width == 16) {
+ if (intel->gen == 6 && dispatch_width == 16) {
/* For outputting oDepth on gen6, SIMD8 writes have to be
* used. This would require 8-wide moves of each half to
* message regs, kind of like pre-gen5 SIMD16 FB writes.
}
fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog,
- struct brw_shader *shader)
+ struct brw_shader *shader, unsigned dispatch_width)
+ : dispatch_width(dispatch_width)
{
this->c = c;
this->p = &c->func;
GLuint runtime_check_aads_emit:1;
GLuint last_scratch;
-
- GLuint dispatch_width;
};
bool brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,