p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
if (p->brw->gen >= 6) {
- /* Since we don't use the 32-wide support in gen6, we translate
+ /* Since we don't use the SIMD32 support in gen6, we translate
* the pre-gen6 compression control here.
*/
switch (compression_control) {
p->current->header.compression_control = GEN6_COMPRESSION_1Q;
break;
case BRW_COMPRESSION_2NDHALF:
- /* For 8-wide, this is "use the second set of 8 bits." */
+ /* For SIMD8, this is "use the second set of 8 bits." */
p->current->header.compression_control = GEN6_COMPRESSION_2Q;
break;
case BRW_COMPRESSION_COMPRESSED:
- /* For 16-wide instruction compression, use the first set of 16 bits
- * since we don't do 32-wide dispatch.
+ /* For SIMD16 instruction compression, use the first set of 16 bits
+ * since we don't do SIMD32 dispatch.
*/
p->current->header.compression_control = GEN6_COMPRESSION_1H;
break;
hash_table_insert(dst_ht, data, key);
}
-/* For 16-wide, we need to follow from the uniform setup of 8-wide dispatch.
+/* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.
* This brings in those uniform definitions
*/
void
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
if (brw->gen >= 7 && dispatch_width == 16)
- fail("16-wide INTDIV unsupported\n");
+ fail("SIMD16 INTDIV unsupported\n");
break;
case SHADER_OPCODE_POW:
break;
c->prog_data.nr_params = new_nr_params;
} else {
- /* This should have been generated in the 8-wide pass already. */
+ /* This should have been generated in the SIMD8 pass already. */
assert(this->params_remap);
}
return;
if (dispatch_width == 16) {
- fail("Pull constants not supported in 16-wide\n");
+ fail("Pull constants not supported in SIMD16\n");
return;
}
clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
int first_grf, int grf_len)
{
- bool inst_16wide = (dispatch_width > 8 &&
+ bool inst_simd16 = (dispatch_width > 8 &&
!inst->force_uncompressed &&
!inst->force_sechalf);
if (grf >= first_grf &&
grf < first_grf + grf_len) {
deps[grf - first_grf] = false;
- if (inst_16wide)
+ if (inst_simd16)
deps[grf - first_grf + 1] = false;
}
}
return;
}
- bool scan_inst_16wide = (dispatch_width > 8 &&
+ bool scan_inst_simd16 = (dispatch_width > 8 &&
!scan_inst->force_uncompressed &&
!scan_inst->force_sechalf);
needs_dep[reg - first_write_grf]) {
inst->insert_before(DEP_RESOLVE_MOV(reg));
needs_dep[reg - first_write_grf] = false;
- if (scan_inst_16wide)
+ if (scan_inst_simd16)
needs_dep[reg - first_write_grf + 1] = false;
}
}
c->source_depth_reg = c->nr_payload_regs;
c->nr_payload_regs++;
if (dispatch_width == 16) {
- /* R28: interpolated depth if not 8-wide. */
+ /* R28: interpolated depth if not SIMD8. */
c->nr_payload_regs++;
}
}
c->source_w_reg = c->nr_payload_regs;
c->nr_payload_regs++;
if (dispatch_width == 16) {
- /* R30: interpolated W if not 8-wide. */
+ /* R30: interpolated W if not SIMD8. */
c->nr_payload_regs++;
}
}
c->sample_mask_reg = c->nr_payload_regs;
c->nr_payload_regs++;
if (dispatch_width == 16) {
- /* R33: input coverage mask if not 8-wide. */
+ /* R33: input coverage mask if not SIMD8. */
c->nr_payload_regs++;
}
}
fs_visitor v2(brw, c, prog, fp, 16);
if (brw->gen >= 5 && likely(!(INTEL_DEBUG & DEBUG_NO16))) {
if (c->prog_data.nr_pull_params == 0) {
- /* Try a 16-wide compile */
+ /* Try a SIMD16 compile */
v2.import_uniforms(&v);
if (!v2.run()) {
- perf_debug("16-wide shader failed to compile, falling back to "
- "8-wide at a 10-20%% performance cost: %s", v2.fail_msg);
+ perf_debug("SIMD16 shader failed to compile, falling back to "
+ "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg);
} else {
simd16_instructions = &v2.instructions;
}
} else {
- perf_debug("Skipping 16-wide due to pull parameters.\n");
+ perf_debug("Skipping SIMD16 due to pull parameters.\n");
}
}
dst = vec16(dst);
}
- /* We do this 8 or 16-wide, but since the destination is UW we
- * don't do compression in the 16-wide case.
+ /* We do this SIMD8 or SIMD16, but since the destination is UW we
+ * don't do compression in the SIMD16 case.
*/
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
}
if (brw->gen >= 7 && inst->header_present && dispatch_width == 16) {
- /* The send-from-GRF for 16-wide texturing with a header has an extra
+ /* The send-from-GRF for SIMD16 texturing with a header has an extra
* hardware register allocated to it, which we need to skip over (since
* our coordinates in the payload are in the even-numbered registers,
* and the header comes right before the first one).
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
if (shader) {
- printf("Native code for fragment shader %d (%d-wide dispatch):\n",
+ printf("Native code for fragment shader %d (SIMD%d dispatch):\n",
prog->Name, dispatch_width);
} else if (fp) {
- printf("Native code for fragment program %d (%d-wide dispatch):\n",
+ printf("Native code for fragment program %d (SIMD%d dispatch):\n",
fp->Base.Id, dispatch_width);
} else {
- printf("Native code for blorp program (%d-wide dispatch):\n",
+ printf("Native code for blorp program (SIMD%d dispatch):\n",
dispatch_width);
}
}
brw_NOP(p);
}
- /* Save off the start of this 16-wide program */
+ /* Save off the start of this SIMD16 program */
c->prog_data.prog_offset_16 = p->nr_insn * sizeof(struct brw_instruction);
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
/* The registers used to make up almost all values handled in the compiler
* are a scalar value occupying a single register (or 2 registers in the
- * case of 16-wide, which is handled by dividing base_reg_count by 2 and
+ * case of SIMD16, which is handled by dividing base_reg_count by 2 and
* multiplying allocated register numbers by 2). Things that were
* aggregates of scalar values at the GLSL level were split to scalar
* values by split_virtual_grfs().
* nr_payload_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients.
*
* And we have payload_node_count nodes covering these registers in order
- * (note that in 16-wide, a node is two registers).
+ * (note that in SIMD16, a node is two registers).
*/
void
fs_visitor::setup_payload_interference(struct ra_graph *g,
break;
case FS_OPCODE_LINTERP:
- /* On gen6+ in 16-wide, there are 4 adjacent registers (so 2 nodes)
+ /* On gen6+ in SIMD16, there are 4 adjacent registers (so 2 nodes)
* used by PLN's sourcing of the deltas, while we list only the first
* two in the arguments (1 node). Pre-gen6, the deltas are computed
* in normal VGRFs.
fs_visitor::assign_regs(bool allow_spilling)
{
/* Most of this allocation was written for a reg_width of 1
- * (dispatch_width == 8). In extending to 16-wide, the code was
+ * (dispatch_width == 8). In extending to SIMD16, the code was
* left in place and it was converted to have the hardware
* registers it's allocating be contiguous physical pairs of regs
* for reg_width == 2.
if (dispatch_width == 16) {
if (!variable_storage(ir)) {
- fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
+ fail("Failed to find uniform '%s' in SIMD16\n", ir->name);
}
return;
}
* enough.
*/
if (brw->gen >= 7 && dispatch_width == 16)
- fail("16-wide explicit accumulator operands unsupported\n");
+ fail("SIMD16 explicit accumulator operands unsupported\n");
struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
break;
case ir_binop_imul_high: {
if (brw->gen >= 7 && dispatch_width == 16)
- fail("16-wide explicit accumulator operands unsupported\n");
+ fail("SIMD16 explicit accumulator operands unsupported\n");
struct brw_reg acc = retype(brw_acc_reg(), this->result.type);
break;
case ir_binop_carry: {
if (brw->gen >= 7 && dispatch_width == 16)
- fail("16-wide explicit accumulator operands unsupported\n");
+ fail("SIMD16 explicit accumulator operands unsupported\n");
struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
}
case ir_binop_borrow: {
if (brw->gen >= 7 && dispatch_width == 16)
- fail("16-wide explicit accumulator operands unsupported\n");
+ fail("SIMD16 explicit accumulator operands unsupported\n");
struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf)) {
/* For general texture offsets (no txf workaround), we need a header to
- * put them in. Note that for 16-wide we're making space for two actual
+ * put them in. Note that for SIMD16 we're making space for two actual
* hardware registers here, so the emit will have to fix up for this.
*
* * ir4_tg4 needs to place its channel select in the header,
};
if (dispatch_width == 16) {
- fail("rectangle scale uniform setup not supported on 16-wide\n");
+ fail("rectangle scale uniform setup not supported on SIMD16\n");
return coordinate;
}
fs_visitor::visit(ir_if *ir)
{
if (brw->gen < 6 && dispatch_width == 16) {
- fail("Can't support (non-uniform) control flow on 16-wide\n");
+ fail("Can't support (non-uniform) control flow on SIMD16\n");
}
/* Don't point the annotation at the if statement, because then it plus
fs_visitor::visit(ir_loop *ir)
{
if (brw->gen < 6 && dispatch_width == 16) {
- fail("Can't support (non-uniform) control flow on 16-wide\n");
+ fail("Can't support (non-uniform) control flow on SIMD16\n");
}
this->base_ir = NULL;
bool src0_alpha_to_render_target = false;
if (dispatch_width == 16 && do_dual_src) {
- fail("GL_ARB_blend_func_extended not yet supported in 16-wide.");
+ fail("GL_ARB_blend_func_extended not yet supported in SIMD16.");
do_dual_src = false;
}
if (c->source_depth_to_render_target) {
if (brw->gen == 6 && dispatch_width == 16) {
/* For outputting oDepth on gen6, SIMD8 writes have to be
- * used. This would require 8-wide moves of each half to
+ * used. This would require SIMD8 moves of each half to
* message regs, kind of like pre-gen5 SIMD16 FB writes.
* Just bail on doing so for now.
*/
* Returns how many cycles it takes the instruction to issue.
*
* Instructions in gen hardware are handled one simd4 vector at a time,
- * with 1 cycle per vector dispatched. Thus 8-wide pixel shaders take 2
- * cycles to dispatch and 16-wide (compressed) instructions take 4.
+ * with 1 cycle per vector dispatched. Thus SIMD8 pixel shaders take 2
+ * cycles to dispatch and SIMD16 (compressed) instructions take 4.
*/
virtual int issue_time(backend_instruction *inst) = 0;
} else {
/* Before register allocation, we don't care about the latencies of
* instructions. All we care about is reducing live intervals of
- * variables so that we can avoid register spilling, or get 16-wide
+ * variables so that we can avoid register spilling, or get SIMD16
* shaders which naturally do a better job of hiding instruction
* latency.
*/