struct brw_stage_prog_data *prog_data,
unsigned promoted_constants,
bool runtime_check_aads_emit,
- const char *stage_abbrev)
+ gl_shader_stage stage)
: compiler(compiler), log_data(log_data),
devinfo(compiler->devinfo), key(key),
prog_data(prog_data),
promoted_constants(promoted_constants),
runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false),
- stage_abbrev(stage_abbrev), mem_ctx(mem_ctx)
+ stage(stage), mem_ctx(mem_ctx)
{
p = rzalloc(mem_ctx, struct brw_codegen);
brw_init_codegen(devinfo, p, mem_ctx);
unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
- /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
- struct brw_reg addr = vec8(brw_address_reg(0));
+ if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) {
+ imm_byte_offset += indirect_byte_offset.ud;
- /* The destination stride of an instruction (in bytes) must be greater
- * than or equal to the size of the rest of the instruction. Since the
- * address register is of type UW, we can't use a D-type instruction.
- * In order to get around this, re re-type to UW and use a stride.
- */
- indirect_byte_offset =
- retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+ reg.nr = imm_byte_offset / REG_SIZE;
+ reg.subnr = imm_byte_offset % REG_SIZE;
+ brw_MOV(p, dst, reg);
+ } else {
+ /* Prior to Broadwell, there are only 8 address registers. */
+ assert(inst->exec_size == 8 || devinfo->gen >= 8);
- /* Prior to Broadwell, there are only 8 address registers. */
- assert(inst->exec_size == 8 || devinfo->gen >= 8);
+ /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
+ struct brw_reg addr = vec8(brw_address_reg(0));
- brw_MOV(p, addr, indirect_byte_offset);
- brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE);
- brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+ /* The destination stride of an instruction (in bytes) must be greater
+ * than or equal to the size of the rest of the instruction. Since the
+ * address register is of type UW, we can't use a D-type instruction.
+ * In order to get around this, re re-type to UW and use a stride.
+ */
+ indirect_byte_offset =
+ retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+
+ if (devinfo->gen < 8) {
+ /* Prior to broadwell, we have a restriction that the bottom 5 bits
+ * of the base offset and the bottom 5 bits of the indirect must add
+ * to less than 32. In other words, the hardware needs to be able to
+ * add the bottom five bits of the two to get the subnumber and add
+ * the next 7 bits of each to get the actual register number. Since
+ * the indirect may cause us to cross a register boundary, this makes
+ * it almost useless. We could try and do something clever where we
+ * use a actual base offset if base_offset % 32 == 0 but that would
+ * mean we were generating different code depending on the base
+ * offset. Instead, for the sake of consistency, we'll just do the
+ * add ourselves.
+ */
+ brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
+ brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), dst.type));
+ } else {
+ brw_MOV(p, addr, indirect_byte_offset);
+ brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+ }
+ }
}
void
void
fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+ struct brw_reg surface_index,
struct brw_reg sampler_index)
{
int msg_type = -1;
/* Set the offset bits in DWord 2. */
brw_MOV(p, get_element_ud(header_reg, 2),
brw_imm_ud(inst->offset));
+ } else if (stage != MESA_SHADER_VERTEX &&
+ stage != MESA_SHADER_FRAGMENT) {
+ /* The vertex and fragment stages have g0.2 set to 0, so
+ * header0.2 is 0 when g0 is copied. Other stages may not, so we
+ * must set it to 0 to avoid setting undesirable bits in the
+ * message.
+ */
+ brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(0));
}
brw_adjust_sampler_state_pointer(p, header_reg, sampler_index);
? prog_data->binding_table.gather_texture_start
: prog_data->binding_table.texture_start;
- if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
+ if (surface_index.file == BRW_IMMEDIATE_VALUE &&
+ sampler_index.file == BRW_IMMEDIATE_VALUE) {
+ uint32_t surface = surface_index.ud;
uint32_t sampler = sampler_index.ud;
brw_SAMPLE(p,
retype(dst, BRW_REGISTER_TYPE_UW),
inst->base_mrf,
src,
- sampler + base_binding_table_index,
+ surface + base_binding_table_index,
sampler % 16,
msg_type,
rlen,
simd_mode,
return_format);
- brw_mark_surface_used(prog_data, sampler + base_binding_table_index);
+ brw_mark_surface_used(prog_data, surface + base_binding_table_index);
} else {
/* Non-const sampler index */
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
+ struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD));
struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);
- /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */
- brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101));
+ if (memcmp(&surface_reg, &sampler_reg, sizeof(surface_reg)) == 0) {
+ brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101));
+ } else {
+ brw_SHL(p, addr, sampler_reg, brw_imm_ud(8));
+ brw_OR(p, addr, addr, surface_reg);
+ }
if (base_binding_table_index)
brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
brw_AND(p, addr, addr, brw_imm_ud(0xfff));
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
case SHADER_OPCODE_SAMPLEINFO:
- generate_tex(inst, dst, src[0], src[1]);
+ generate_tex(inst, dst, src[0], src[1], src[2]);
break;
case FS_OPCODE_DDX_COARSE:
case FS_OPCODE_DDX_FINE:
"%s SIMD%d shader: %d inst, %d loops, %u cycles, "
"%d:%d spills:fills, Promoted %u constants, "
"compacted %d to %d bytes.",
- stage_abbrev, dispatch_width, before_size / 16,
+ _mesa_shader_stage_to_abbrev(stage),
+ dispatch_width, before_size / 16,
loop_count, cfg->cycle_count, spill_count,
fill_count, promoted_constants, before_size,
after_size);