}
}
-#define ALU1(op) \
- fs_inst * \
- fs_visitor::op(const fs_reg &dst, const fs_reg &src0) \
- { \
- return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0); \
- }
-
-#define ALU2(op) \
- fs_inst * \
- fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \
- const fs_reg &src1) \
- { \
- return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \
- }
-
-#define ALU2_ACC(op) \
- fs_inst * \
- fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \
- const fs_reg &src1) \
- { \
- fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\
- inst->writes_accumulator = true; \
- return inst; \
- }
-
-#define ALU3(op) \
- fs_inst * \
- fs_visitor::op(const fs_reg &dst, const fs_reg &src0, \
- const fs_reg &src1, const fs_reg &src2) \
- { \
- return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\
- }
-
-ALU1(NOT)
-ALU1(MOV)
-ALU1(FRC)
-ALU1(RNDD)
-ALU1(RNDE)
-ALU1(RNDZ)
-ALU2(ADD)
-ALU2(MUL)
-ALU2_ACC(MACH)
-ALU2(AND)
-ALU2(OR)
-ALU2(XOR)
-ALU2(SHL)
-ALU2(SHR)
-ALU2(ASR)
-ALU3(LRP)
-ALU1(BFREV)
-ALU3(BFE)
-ALU2(BFI1)
-ALU3(BFI2)
-ALU1(FBH)
-ALU1(FBL)
-ALU1(CBIT)
-ALU3(MAD)
-ALU2_ACC(ADDC)
-ALU2_ACC(SUBB)
-ALU2(SEL)
-ALU2(MAC)
-
-/** Gen4 predicated IF. */
-fs_inst *
-fs_visitor::IF(enum brw_predicate predicate)
-{
- fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width);
- inst->predicate = predicate;
- return inst;
-}
-
-/** Gen6 IF with embedded comparison. */
-fs_inst *
-fs_visitor::IF(const fs_reg &src0, const fs_reg &src1,
- enum brw_conditional_mod condition)
-{
- assert(devinfo->gen == 6);
- fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width,
- reg_null_d, src0, src1);
- inst->conditional_mod = condition;
- return inst;
-}
-
-/**
- * CMP: Sets the low bit of the destination channels with the result
- * of the comparison, while the upper bits are undefined, and updates
- * the flag register with the packed 16 bits of the result.
- */
-fs_inst *
-fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1,
- enum brw_conditional_mod condition)
-{
- fs_inst *inst;
-
- /* Take the instruction:
- *
- * CMP null<d> src0<f> src1<f>
- *
- * Original gen4 does type conversion to the destination type before
- * comparison, producing garbage results for floating point comparisons.
- *
- * The destination type doesn't matter on newer generations, so we set the
- * type to match src0 so we can compact the instruction.
- */
- dst.type = src0.type;
- if (dst.file == HW_REG)
- dst.fixed_hw_reg.type = dst.type;
-
- resolve_ud_negate(&src0);
- resolve_ud_negate(&src1);
-
- inst = new(mem_ctx) fs_inst(BRW_OPCODE_CMP, dst, src0, src1);
- inst->conditional_mod = condition;
-
- return inst;
-}
-
-fs_inst *
-fs_visitor::LOAD_PAYLOAD(const fs_reg &dst, fs_reg *src, int sources,
- int header_size)
-{
- assert(dst.width % 8 == 0);
- fs_inst *inst = new(mem_ctx) fs_inst(SHADER_OPCODE_LOAD_PAYLOAD, dst.width,
- dst, src, sources);
- inst->header_size = header_size;
-
- for (int i = 0; i < header_size; i++)
- assert(src[i].file != GRF || src[i].width * type_sz(src[i].type) == 32);
- inst->regs_written = header_size;
-
- for (int i = header_size; i < sources; ++i)
- assert(src[i].file != GRF || src[i].width == dst.width);
- inst->regs_written += (sources - header_size) * (dst.width / 8);
-
- return inst;
-}
-
void
fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
const fs_reg &dst,
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_FUNCTION:
unreachable("not reached");
}
void
fs_visitor::emit_shader_time_end()
{
- enum shader_time_shader_type type, written_type, reset_type;
- switch (stage) {
- case MESA_SHADER_VERTEX:
- type = ST_VS;
- written_type = ST_VS_WRITTEN;
- reset_type = ST_VS_RESET;
- break;
- case MESA_SHADER_GEOMETRY:
- type = ST_GS;
- written_type = ST_GS_WRITTEN;
- reset_type = ST_GS_RESET;
- break;
- case MESA_SHADER_FRAGMENT:
- if (dispatch_width == 8) {
- type = ST_FS8;
- written_type = ST_FS8_WRITTEN;
- reset_type = ST_FS8_RESET;
- } else {
- assert(dispatch_width == 16);
- type = ST_FS16;
- written_type = ST_FS16_WRITTEN;
- reset_type = ST_FS16_RESET;
- }
- break;
- case MESA_SHADER_COMPUTE:
- type = ST_CS;
- written_type = ST_CS_WRITTEN;
- reset_type = ST_CS_RESET;
- break;
- default:
- unreachable("fs_visitor::emit_shader_time_end missing code");
- }
-
/* Insert our code just before the final SEND with EOT. */
exec_node *end = this->instructions.get_tail();
assert(end && ((fs_inst *) end)->eot);
* trying to determine the time taken for single instructions.
*/
ibld.ADD(diff, diff, fs_reg(-2u));
- SHADER_TIME_ADD(ibld, type, diff);
- SHADER_TIME_ADD(ibld, written_type, fs_reg(1u));
+ SHADER_TIME_ADD(ibld, 0, diff);
+ SHADER_TIME_ADD(ibld, 1, fs_reg(1u));
ibld.emit(BRW_OPCODE_ELSE);
- SHADER_TIME_ADD(ibld, reset_type, fs_reg(1u));
+ SHADER_TIME_ADD(ibld, 2, fs_reg(1u));
ibld.emit(BRW_OPCODE_ENDIF);
}
void
fs_visitor::SHADER_TIME_ADD(const fs_builder &bld,
- enum shader_time_shader_type type, fs_reg value)
+ int shader_time_subindex,
+ fs_reg value)
{
- int shader_time_index =
- brw_get_shader_time_index(brw, shader_prog, prog, type);
- fs_reg offset = fs_reg(shader_time_index * SHADER_TIME_STRIDE);
+ int index = shader_time_index * 3 + shader_time_subindex;
+ fs_reg offset = fs_reg(index * SHADER_TIME_STRIDE);
fs_reg payload;
if (dispatch_width == 8)
* During a SIMD16 compile (if one happens anyway), this just calls fail().
*/
void
-fs_visitor::no16(const char *format, ...)
+fs_visitor::no16(const char *msg)
{
- va_list va;
-
- va_start(va, format);
-
if (dispatch_width == 16) {
- vfail(format, va);
+ fail("%s", msg);
} else {
simd16_unsupported = true;
- if (brw->perf_debug) {
- if (no16_msg)
- ralloc_vasprintf_append(&no16_msg, format, va);
- else
- no16_msg = ralloc_vasprintf(mem_ctx, format, va);
- }
+ compiler->shader_perf_log(log_data,
+ "SIMD16 shader failed to compile: %s", msg);
}
-
- va_end(va);
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dispatch_width));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst, src0));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst, const fs_reg &src0,
- const fs_reg &src1, const fs_reg &src2)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst, src0, src1, src2));
-}
-
-fs_inst *
-fs_visitor::emit(enum opcode opcode, const fs_reg &dst,
- fs_reg src[], int sources)
-{
- return emit(new(mem_ctx) fs_inst(opcode, dst, src, sources));
}
/**
brw_type_for_base_type(type), dispatch_width);
}
-fs_reg
-fs_visitor::vgrf(int num_components)
-{
- int reg_width = dispatch_width / 8;
- return fs_reg(GRF, alloc.allocate(num_components * reg_width),
- BRW_REGISTER_TYPE_F, dispatch_width);
-}
-
/** Fixed HW reg constructor. */
fs_reg::fs_reg(enum register_file file, int reg)
{
*src = temp;
}
-fs_reg
-fs_visitor::fix_math_operand(fs_reg src)
-{
- /* Can't do hstride == 0 args on gen6 math, so expand it out. We
- * might be able to do better by doing execsize = 1 math and then
- * expanding that result out, but we would need to be careful with
- * masking.
- *
- * The hardware ignores source modifiers (negate and abs) on math
- * instructions, so we also move to a temp to set those up.
- */
- if (devinfo->gen == 6 && src.file != UNIFORM && src.file != IMM &&
- !src.abs && !src.negate)
- return src;
-
- /* Gen7 relaxes most of the above restrictions, but still can't use IMM
- * operands to math
- */
- if (devinfo->gen >= 7 && src.file != IMM)
- return src;
-
- fs_reg expanded = vgrf(glsl_type::float_type);
- expanded.type = src.type;
- emit(BRW_OPCODE_MOV, expanded, src);
- return expanded;
-}
-
-fs_inst *
-fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src)
-{
- switch (opcode) {
- case SHADER_OPCODE_RCP:
- case SHADER_OPCODE_RSQ:
- case SHADER_OPCODE_SQRT:
- case SHADER_OPCODE_EXP2:
- case SHADER_OPCODE_LOG2:
- case SHADER_OPCODE_SIN:
- case SHADER_OPCODE_COS:
- break;
- default:
- unreachable("not reached: bad math opcode");
- }
-
- /* Can't do hstride == 0 args to gen6 math, so expand it out. We
- * might be able to do better by doing execsize = 1 math and then
- * expanding that result out, but we would need to be careful with
- * masking.
- *
- * Gen 6 hardware ignores source modifiers (negate and abs) on math
- * instructions, so we also move to a temp to set those up.
- */
- if (devinfo->gen == 6 || devinfo->gen == 7)
- src = fix_math_operand(src);
-
- fs_inst *inst = emit(opcode, dst, src);
-
- if (devinfo->gen < 6) {
- inst->base_mrf = 2;
- inst->mlen = dispatch_width / 8;
- }
-
- return inst;
-}
-
-fs_inst *
-fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
-{
- int base_mrf = 2;
- fs_inst *inst;
-
- if (devinfo->gen >= 8) {
- inst = emit(opcode, dst, src0, src1);
- } else if (devinfo->gen >= 6) {
- src0 = fix_math_operand(src0);
- src1 = fix_math_operand(src1);
-
- inst = emit(opcode, dst, src0, src1);
- } else {
- /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
- * "Message Payload":
- *
- * "Operand0[7]. For the INT DIV functions, this operand is the
- * denominator."
- * ...
- * "Operand1[7]. For the INT DIV functions, this operand is the
- * numerator."
- */
- bool is_int_div = opcode != SHADER_OPCODE_POW;
- fs_reg &op0 = is_int_div ? src1 : src0;
- fs_reg &op1 = is_int_div ? src0 : src1;
-
- emit(MOV(fs_reg(MRF, base_mrf + 1, op1.type, dispatch_width), op1));
- inst = emit(opcode, dst, op0, reg_null_f);
-
- inst->base_mrf = base_mrf;
- inst->mlen = 2 * dispatch_width / 8;
- }
- return inst;
-}
-
void
fs_visitor::emit_discard_jump()
{
unsigned vue_entries =
MAX2(count, vs_prog_data->base.vue_map.num_slots);
+ /* URB entry size is counted in units of 64 bytes (for the 3DSTATE_URB_VS
+ * command). Each attribute is 16 bytes (4 floats/dwords), so each unit
+ * fits four attributes.
+ */
vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4;
vs_prog_data->base.urb_read_length = (count + 1) / 2;
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
int base_mrf = 1;
int color_mrf = base_mrf + 2;
+ fs_inst *mov;
- fs_inst *mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
- fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+ if (uniforms == 1) {
+ mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
+ fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
+ } else {
+ struct brw_reg reg =
+ brw_reg(BRW_GENERAL_REGISTER_FILE,
+ 2, 3, 0, 0, BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+
+ mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)),
+ fs_reg(reg));
+ }
fs_inst *write;
if (key->nr_color_regions == 1) {
assign_curb_setup();
/* Now that we have the uniform assigned, go ahead and force it to a vec4. */
- assert(mov->src[0].file == HW_REG);
- mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
+ if (uniforms == 1) {
+ assert(mov->src[0].file == HW_REG);
+ mov->src[0] = brw_vec4_grf(mov->src[0].fixed_hw_reg.nr, 0);
+ }
}
/**
fs_reg high(GRF, alloc.allocate(dispatch_width / 8),
inst->dst.type, dispatch_width);
- if (brw->gen >= 7) {
+ if (devinfo->gen >= 7) {
fs_reg src1_0_w = inst->src[1];
fs_reg src1_1_w = inst->src[1];
src1_1_w.fixed_hw_reg.dw1.ud >>= 16;
} else {
src1_0_w.type = BRW_REGISTER_TYPE_UW;
- src1_0_w.stride = 2;
+ if (src1_0_w.stride != 0) {
+ assert(src1_0_w.stride == 1);
+ src1_0_w.stride = 2;
+ }
src1_1_w.type = BRW_REGISTER_TYPE_UW;
- src1_1_w.stride = 2;
+ if (src1_1_w.stride != 0) {
+ assert(src1_1_w.stride == 1);
+ src1_1_w.stride = 2;
+ }
src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
}
ibld.MUL(low, inst->src[0], src1_0_w);
fs_reg src0_1_w = inst->src[0];
src0_0_w.type = BRW_REGISTER_TYPE_UW;
- src0_0_w.stride = 2;
+ if (src0_0_w.stride != 0) {
+ assert(src0_0_w.stride == 1);
+ src0_0_w.stride = 2;
+ }
src0_1_w.type = BRW_REGISTER_TYPE_UW;
- src0_1_w.stride = 2;
+ if (src0_1_w.stride != 0) {
+ assert(src0_1_w.stride == 1);
+ src0_1_w.stride = 2;
+ }
src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
ibld.MUL(low, src0_0_w, inst->src[1]);
ibld.ADD(dst, low, high);
if (inst->conditional_mod) {
- fs_reg null(retype(brw_null_reg(), inst->dst.type));
+ fs_reg null(retype(ibld.null_reg_f(), inst->dst.type));
set_condmod(inst->conditional_mod,
ibld.MOV(null, inst->dst));
}
void
fs_visitor::setup_cs_payload()
{
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
payload.num_regs = 1;
}
fail("Failure to register allocate. Reduce number of "
"live scalar values to avoid this.");
} else {
- perf_debug("%s shader triggered register spilling. "
- "Try reducing the number of live scalar values to "
- "improve performance.\n", stage_name);
+ compiler->shader_perf_log(log_data,
+ "%s shader triggered register spilling. "
+ "Try reducing the number of live scalar "
+ "values to improve performance.\n",
+ stage_name);
}
/* Since we're out of heuristics, just go spill registers until we
}
bool
-fs_visitor::run_vs()
+fs_visitor::run_vs(gl_clip_plane *clip_planes)
{
assert(stage == MESA_SHADER_VERTEX);
- assign_common_binding_table_offsets(0);
+ if (prog_data->map_entries == NULL)
+ assign_common_binding_table_offsets(0);
setup_vs_payload();
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_begin();
emit_nir_code();
if (failed)
return false;
- emit_urb_writes();
+ emit_urb_writes(clip_planes);
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_end();
calculate_cfg();
}
bool
-fs_visitor::run_fs()
+fs_visitor::run_fs(bool do_rep_send)
{
brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) this->prog_data;
brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key;
sanity_param_count = prog->Parameters->NumParameters;
- assign_binding_table_offsets();
+ if (prog_data->map_entries == NULL)
+ assign_binding_table_offsets();
if (devinfo->gen >= 6)
setup_payload_gen6();
if (0) {
emit_dummy_fs();
- } else if (brw->use_rep_send && dispatch_width == 16) {
+ } else if (do_rep_send) {
+ assert(dispatch_width == 16);
emit_repclear_shader();
} else {
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_begin();
calculate_urb_setup();
emit_fb_writes();
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_end();
calculate_cfg();
setup_cs_payload();
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_begin();
emit_nir_code();
emit_cs_terminate();
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_end();
calculate_cfg();
if (unlikely(INTEL_DEBUG & DEBUG_WM))
brw_dump_ir("fragment", prog, &shader->base, &fp->Base);
+ int st_index8 = -1, st_index16 = -1;
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+ st_index8 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS8);
+ st_index16 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS16);
+ }
+
/* Now the main event: Visit the shader IR and generate our FS IR for it.
*/
- fs_visitor v(brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
- prog, &fp->Base, 8);
- if (!v.run_fs()) {
+ fs_visitor v(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
+ prog, &fp->Base, 8, st_index8);
+ if (!v.run_fs(false /* do_rep_send */)) {
if (prog) {
prog->LinkStatus = false;
ralloc_strcat(&prog->InfoLog, v.fail_msg);
}
cfg_t *simd16_cfg = NULL;
- fs_visitor v2(brw, mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
- prog, &fp->Base, 16);
+ fs_visitor v2(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
+ prog, &fp->Base, 16, st_index16);
if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
if (!v.simd16_unsupported) {
/* Try a SIMD16 compile */
v2.import_uniforms(&v);
- if (!v2.run_fs()) {
- perf_debug("SIMD16 shader failed to compile, falling back to "
- "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg);
+ if (!v2.run_fs(brw->use_rep_send)) {
+ perf_debug("SIMD16 shader failed to compile: %s", v2.fail_msg);
} else {
simd16_cfg = v2.cfg;
}
- } else {
- perf_debug("SIMD16 shader unsupported, falling back to "
- "SIMD8 at a 10-20%% performance cost: %s", v.no16_msg);
}
}
prog_data->no_8 = false;
}
- fs_generator g(brw, mem_ctx, (void *) key, &prog_data->base,
+ fs_generator g(brw->intelScreen->compiler, brw,
+ mem_ctx, (void *) key, &prog_data->base,
&fp->Base, v.promoted_constants, v.runtime_check_aads_emit, "FS");
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {