X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_fs_generator.cpp;h=fb9f65c6a3749f2baecdb7903b60b055765c13bc;hb=26fdb7e51e9f6b407ad8c635850ccffbd01876bc;hp=e207a77fdc160cb54b5fc9fc40e896fd3b7929da;hpb=7c81a6a647257c309cb1ca36c60aa4bfa8e2e022;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index e207a77fdc1..fb9f65c6a37 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -27,28 +27,30 @@ * native instructions. */ -#include "main/macros.h" -#include "brw_context.h" #include "brw_eu.h" #include "brw_fs.h" #include "brw_cfg.h" +#include "brw_program.h" -static uint32_t brw_file_from_reg(fs_reg *reg) +static enum brw_reg_file +brw_file_from_reg(fs_reg *reg) { switch (reg->file) { - case GRF: + case ARF: + return BRW_ARCHITECTURE_REGISTER_FILE; + case FIXED_GRF: + case VGRF: return BRW_GENERAL_REGISTER_FILE; case MRF: return BRW_MESSAGE_REGISTER_FILE; case IMM: return BRW_IMMEDIATE_VALUE; case BAD_FILE: - case HW_REG: case ATTR: case UNIFORM: unreachable("not reached"); } - return 0; + return BRW_ARCHITECTURE_REGISTER_FILE; } static struct brw_reg @@ -58,13 +60,13 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen) switch (reg->file) { case MRF: - assert((reg->reg & ~(1 << 7)) < BRW_MAX_MRF(gen)); + assert((reg->nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(gen)); /* Fallthrough */ - case GRF: + case VGRF: if (reg->stride == 0) { - brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, 0); + brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0); } else if (inst->exec_size < 8) { - brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0); + brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->nr, 0); brw_reg = stride(brw_reg, inst->exec_size * reg->stride, inst->exec_size, reg->stride); } else { @@ -77,44 +79,19 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen) * So, for registers with width > 8, we have to use a width of 8 * and trust the compression state to sort out the exec size. */ - brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0); + brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->nr, 0); brw_reg = stride(brw_reg, 8 * reg->stride, 8, reg->stride); } brw_reg = retype(brw_reg, reg->type); brw_reg = byte_offset(brw_reg, reg->subreg_offset); + brw_reg.abs = reg->abs; + brw_reg.negate = reg->negate; break; + case ARF: + case FIXED_GRF: case IMM: - assert(reg->stride == ((reg->type == BRW_REGISTER_TYPE_V || - reg->type == BRW_REGISTER_TYPE_UV || - reg->type == BRW_REGISTER_TYPE_VF) ? 1 : 0)); - - switch (reg->type) { - case BRW_REGISTER_TYPE_F: - brw_reg = brw_imm_f(reg->fixed_hw_reg.dw1.f); - break; - case BRW_REGISTER_TYPE_D: - brw_reg = brw_imm_d(reg->fixed_hw_reg.dw1.d); - break; - case BRW_REGISTER_TYPE_UD: - brw_reg = brw_imm_ud(reg->fixed_hw_reg.dw1.ud); - break; - case BRW_REGISTER_TYPE_W: - brw_reg = brw_imm_w(reg->fixed_hw_reg.dw1.d); - break; - case BRW_REGISTER_TYPE_UW: - brw_reg = brw_imm_uw(reg->fixed_hw_reg.dw1.ud); - break; - case BRW_REGISTER_TYPE_VF: - brw_reg = brw_imm_vf(reg->fixed_hw_reg.dw1.ud); - break; - default: - unreachable("not reached"); - } - break; - case HW_REG: - assert(reg->type == reg->fixed_hw_reg.type); - brw_reg = reg->fixed_hw_reg; + brw_reg = reg->as_brw_reg(); break; case BAD_FILE: /* Probably unused. */ @@ -124,10 +101,6 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen) case UNIFORM: unreachable("not reached"); } - if (reg->abs) - brw_reg = brw_abs(brw_reg); - if (reg->negate) - brw_reg = negate(brw_reg); return brw_reg; } @@ -138,14 +111,14 @@ fs_generator::fs_generator(const struct brw_compiler *compiler, void *log_data, struct brw_stage_prog_data *prog_data, unsigned promoted_constants, bool runtime_check_aads_emit, - const char *stage_abbrev) + gl_shader_stage stage) : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo), key(key), prog_data(prog_data), promoted_constants(promoted_constants), runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(false), - stage_abbrev(stage_abbrev), mem_ctx(mem_ctx) + stage(stage), mem_ctx(mem_ctx) { p = rzalloc(mem_ctx, struct brw_codegen); brw_init_codegen(devinfo, p, mem_ctx); @@ -367,6 +340,84 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) } } +void +fs_generator::generate_mov_indirect(fs_inst *inst, + struct brw_reg dst, + struct brw_reg reg, + struct brw_reg indirect_byte_offset) +{ + assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD); + assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE); + + unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr; + + if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) { + imm_byte_offset += indirect_byte_offset.ud; + + reg.nr = imm_byte_offset / REG_SIZE; + reg.subnr = imm_byte_offset % REG_SIZE; + brw_MOV(p, dst, reg); + } else { + /* Prior to Broadwell, there are only 8 address registers. */ + assert(inst->exec_size == 8 || devinfo->gen >= 8); + + /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ + struct brw_reg addr = vec8(brw_address_reg(0)); + + /* The destination stride of an instruction (in bytes) must be greater + * than or equal to the size of the rest of the instruction. Since the + * address register is of type UW, we can't use a D-type instruction. + * In order to get around this, re retype to UW and use a stride. + */ + indirect_byte_offset = + retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); + + struct brw_reg ind_src; + if (devinfo->gen < 8) { + /* From the Haswell PRM section "Register Region Restrictions": + * + * "The lower bits of the AddressImmediate must not overflow to + * change the register address. The lower 5 bits of Address + * Immediate when added to lower 5 bits of address register gives + * the sub-register offset. The upper bits of Address Immediate + * when added to upper bits of address register gives the register + * address. Any overflow from sub-register offset is dropped." + * + * This restriction is only listed in the Haswell PRM but emperical + * testing indicates that it applies on all older generations and is + * lifted on Broadwell. + * + * Since the indirect may cause us to cross a register boundary, this + * makes the base offset almost useless. We could try and do + * something clever where we use a actual base offset if + * base_offset % 32 == 0 but that would mean we were generating + * different code depending on the base offset. Instead, for the + * sake of consistency, we'll just do the add ourselves. + */ + brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset)); + ind_src = brw_VxH_indirect(0, 0); + } else { + brw_MOV(p, addr, indirect_byte_offset); + ind_src = brw_VxH_indirect(0, imm_byte_offset); + } + + brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type)); + + if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE && + !inst->get_next()->is_tail_sentinel() && + ((fs_inst *)inst->get_next())->mlen > 0) { + /* From the Sandybridge PRM: + * + * "[Errata: DevSNB(SNB)] If MRF register is updated by any + * instruction that “indexed/indirect” source AND is followed by a + * send, the instruction requires a “Switch”. This is to avoid + * race condition where send may dispatch before MRF is updated." + */ + brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH); + } + } +} + void fs_generator::generate_urb_read(fs_inst *inst, struct brw_reg dst, @@ -383,6 +434,9 @@ fs_generator::generate_urb_read(fs_inst *inst, brw_inst_set_sfid(p->devinfo, send, BRW_SFID_URB); brw_inst_set_urb_opcode(p->devinfo, send, GEN8_URB_OPCODE_SIMD8_READ); + if (inst->opcode == SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT) + brw_inst_set_urb_per_slot_offset(p->devinfo, send, true); + brw_inst_set_mlen(p->devinfo, send, inst->mlen); brw_inst_set_rlen(p->devinfo, send, inst->regs_written); brw_inst_set_header_present(p->devinfo, send, true); @@ -425,7 +479,7 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload) insn = brw_next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(p, insn, brw_null_reg()); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW)); brw_set_src0(p, insn, payload); brw_set_src1(p, insn, brw_imm_d(0)); @@ -498,12 +552,13 @@ fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src) } void -fs_generator::generate_blorp_fb_write(fs_inst *inst) +fs_generator::generate_blorp_fb_write(fs_inst *inst, struct brw_reg payload) { brw_fb_WRITE(p, 16 /* dispatch_width */, - brw_message_reg(inst->base_mrf), - brw_reg_from_fs_reg(inst, &inst->src[0], devinfo->gen), + inst->base_mrf >= 0 ? + brw_message_reg(inst->base_mrf) : payload, + brw_null_reg(), BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, inst->target, inst->mlen, @@ -658,7 +713,7 @@ fs_generator::generate_get_buffer_size(fs_inst *inst, retype(dst, BRW_REGISTER_TYPE_UW), inst->base_mrf, src, - surf_index.dw1.ud, + surf_index.ud, 0, GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO, rlen, /* response length */ @@ -667,11 +722,12 @@ fs_generator::generate_get_buffer_size(fs_inst *inst, simd_mode, BRW_SAMPLER_RETURN_FORMAT_SINT32); - brw_mark_surface_used(prog_data, surf_index.dw1.ud); + brw_mark_surface_used(prog_data, surf_index.ud); } void fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src, + struct brw_reg surface_index, struct brw_reg sampler_index) { int msg_type = -1; @@ -692,6 +748,17 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src break; } + /* Stomp the resinfo output type to UINT32. On gens 4-5, the output type + * is set as part of the message descriptor. On gen4, the PRM seems to + * allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on + * later gens UINT32 is required. Once you hit Sandy Bridge, the bit is + * gone from the message descriptor entirely and you just get UINT32 all + * the time regasrdless. Since we can really only do non-UINT32 on gen4, + * just stomp it to UINT32 all the time. + */ + if (inst->opcode == SHADER_OPCODE_TXS) + return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32; + switch (inst->exec_size) { case 8: simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; @@ -741,6 +808,10 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src case SHADER_OPCODE_TXF: msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; break; + case SHADER_OPCODE_TXF_CMS_W: + assert(devinfo->gen >= 9); + msg_type = GEN9_SAMPLER_MESSAGE_SAMPLE_LD2DMS_W; + break; case SHADER_OPCODE_TXF_CMS: if (devinfo->gen >= 7) msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; @@ -892,6 +963,14 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src /* Set the offset bits in DWord 2. */ brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(inst->offset)); + } else if (stage != MESA_SHADER_VERTEX && + stage != MESA_SHADER_FRAGMENT) { + /* The vertex and fragment stages have g0.2 set to 0, so + * header0.2 is 0 when g0 is copied. Other stages may not, so we + * must set it to 0 to avoid setting undesirable bits in the + * message. + */ + brw_MOV(p, get_element_ud(header_reg, 2), brw_imm_ud(0)); } brw_adjust_sampler_state_pointer(p, header_reg, sampler_index); @@ -904,14 +983,16 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src ? prog_data->binding_table.gather_texture_start : prog_data->binding_table.texture_start; - if (sampler_index.file == BRW_IMMEDIATE_VALUE) { - uint32_t sampler = sampler_index.dw1.ud; + if (surface_index.file == BRW_IMMEDIATE_VALUE && + sampler_index.file == BRW_IMMEDIATE_VALUE) { + uint32_t surface = surface_index.ud; + uint32_t sampler = sampler_index.ud; brw_SAMPLE(p, retype(dst, BRW_REGISTER_TYPE_UW), inst->base_mrf, src, - sampler + base_binding_table_index, + surface + base_binding_table_index, sampler % 16, msg_type, rlen, @@ -920,19 +1001,24 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src simd_mode, return_format); - brw_mark_surface_used(prog_data, sampler + base_binding_table_index); + brw_mark_surface_used(prog_data, surface + base_binding_table_index); } else { /* Non-const sampler index */ struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); + struct brw_reg surface_reg = vec1(retype(surface_index, BRW_REGISTER_TYPE_UD)); struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD)); brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */ - brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + if (memcmp(&surface_reg, &sampler_reg, sizeof(surface_reg)) == 0) { + brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101)); + } else { + brw_SHL(p, addr, sampler_reg, brw_imm_ud(8)); + brw_OR(p, addr, addr, surface_reg); + } if (base_binding_table_index) brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index)); brw_AND(p, addr, addr, brw_imm_ud(0xfff)); @@ -1172,16 +1258,14 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); - uint32_t surf_index = index.dw1.ud; + uint32_t surf_index = index.ud; assert(offset.file == BRW_IMMEDIATE_VALUE && offset.type == BRW_REGISTER_TYPE_UD); - uint32_t read_offset = offset.dw1.ud; + uint32_t read_offset = offset.ud; brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), read_offset, surf_index); - - brw_mark_surface_used(prog_data, surf_index); } void @@ -1223,12 +1307,13 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, if (index.file == BRW_IMMEDIATE_VALUE) { - uint32_t surf_index = index.dw1.ud; + uint32_t surf_index = index.ud; brw_push_insn_state(p); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_inst_set_exec_size(devinfo, send, BRW_EXECUTE_4); brw_pop_insn_state(p); brw_set_dest(p, send, dst); @@ -1242,9 +1327,6 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, header_present, BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0); - - brw_mark_surface_used(prog_data, surf_index); - } else { struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); @@ -1274,11 +1356,6 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, 0); brw_pop_insn_state(p); - - /* visitor knows more than we do about the surface limit required, - * so has already done marking. - */ - } } @@ -1294,7 +1371,7 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst, assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); - uint32_t surf_index = index.dw1.ud; + uint32_t surf_index = index.ud; uint32_t simd_mode, rlen, msg_type; if (dispatch_width == 16) { @@ -1345,8 +1422,6 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst, inst->header_size != 0, simd_mode, return_format); - - brw_mark_surface_used(prog_data, surf_index); } void @@ -1376,7 +1451,7 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, if (index.file == BRW_IMMEDIATE_VALUE) { - uint32_t surf_index = index.dw1.ud; + uint32_t surf_index = index.ud; brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW)); @@ -1391,8 +1466,6 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, simd_mode, 0); - brw_mark_surface_used(prog_data, surf_index); - } else { struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); @@ -1423,10 +1496,6 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, false /* header */, simd_mode, 0); - - /* visitor knows more than we do about the surface limit required, - * so has already done marking. - */ } } @@ -2050,6 +2119,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_CMS_W: case SHADER_OPCODE_TXF_UMS: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXL: @@ -2058,7 +2128,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - generate_tex(inst, dst, src[0], src[1]); + generate_tex(inst, dst, src[0], src[1], src[2]); break; case FS_OPCODE_DDX_COARSE: case FS_OPCODE_DDX_FINE: @@ -2067,7 +2137,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case FS_OPCODE_DDY_COARSE: case FS_OPCODE_DDY_FINE: assert(src[1].file == BRW_IMMEDIATE_VALUE); - generate_ddy(inst->opcode, dst, src[0], src[1].dw1.ud); + generate_ddy(inst->opcode, dst, src[0], src[1].ud); break; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: @@ -2085,7 +2155,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) fill_count++; break; + case SHADER_OPCODE_MOV_INDIRECT: + generate_mov_indirect(inst, dst, src[0], src[1]); + break; + case SHADER_OPCODE_URB_READ_SIMD8: + case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: generate_urb_read(inst, dst, src[0]); break; @@ -2118,7 +2193,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) break; case FS_OPCODE_BLORP_FB_WRITE: - generate_blorp_fb_write(inst); + generate_blorp_fb_write(inst, src[0]); break; case FS_OPCODE_MOV_DISPATCH_TO_FLAGS: @@ -2135,37 +2210,37 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_UNTYPED_ATOMIC: assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_untyped_atomic(p, dst, src[0], src[1], src[2].dw1.ud, + brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen, !inst->dst.is_null()); break; case SHADER_OPCODE_UNTYPED_SURFACE_READ: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_untyped_surface_read(p, dst, src[0], src[1], - inst->mlen, src[2].dw1.ud); + inst->mlen, src[2].ud); break; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_untyped_surface_write(p, src[0], src[1], - inst->mlen, src[2].dw1.ud); + inst->mlen, src[2].ud); break; case SHADER_OPCODE_TYPED_ATOMIC: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_typed_atomic(p, dst, src[0], src[1], - src[2].dw1.ud, inst->mlen, !inst->dst.is_null()); + src[2].ud, inst->mlen, !inst->dst.is_null()); break; case SHADER_OPCODE_TYPED_SURFACE_READ: assert(src[2].file == BRW_IMMEDIATE_VALUE); brw_typed_surface_read(p, dst, src[0], src[1], - inst->mlen, src[2].dw1.ud); + inst->mlen, src[2].ud); break; case SHADER_OPCODE_TYPED_SURFACE_WRITE: assert(src[2].file == BRW_IMMEDIATE_VALUE); - brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].dw1.ud); + brw_typed_surface_write(p, src[0], src[1], inst->mlen, src[2].ud); break; case SHADER_OPCODE_MEMORY_FENCE: @@ -2184,6 +2259,28 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_broadcast(p, dst, src[0], src[1]); break; + case SHADER_OPCODE_EXTRACT_BYTE: { + assert(src[0].type == BRW_REGISTER_TYPE_D || + src[0].type == BRW_REGISTER_TYPE_UD); + + enum brw_reg_type type = + src[0].type == BRW_REGISTER_TYPE_D ? BRW_REGISTER_TYPE_B + : BRW_REGISTER_TYPE_UB; + brw_MOV(p, dst, spread(suboffset(retype(src[0], type), src[1].ud), 4)); + break; + } + + case SHADER_OPCODE_EXTRACT_WORD: { + assert(src[0].type == BRW_REGISTER_TYPE_D || + src[0].type == BRW_REGISTER_TYPE_UD); + + enum brw_reg_type type = + src[0].type == BRW_REGISTER_TYPE_D ? BRW_REGISTER_TYPE_W + : BRW_REGISTER_TYPE_UW; + brw_MOV(p, dst, spread(suboffset(retype(src[0], type), src[1].ud), 2)); + break; + } + case FS_OPCODE_SET_SAMPLE_ID: generate_set_sample_id(inst, dst, src[0], src[1]); break; @@ -2267,6 +2364,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_set_uip_jip(p); annotation_finalize(&annotation, p->next_insn_offset); +#ifndef NDEBUG + bool validated = brw_validate_instructions(p, start_offset, &annotation); +#else + if (unlikely(debug_flag)) + brw_validate_instructions(p, start_offset, &annotation); +#endif + int before_size = p->next_insn_offset - start_offset; brw_compact_instructions(p, start_offset, annotation.ann_count, annotation.ann); @@ -2282,14 +2386,16 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) dump_assembly(p->store, annotation.ann_count, annotation.ann, p->devinfo); - ralloc_free(annotation.ann); + ralloc_free(annotation.mem_ctx); } + assert(validated); compiler->shader_debug_log(log_data, "%s SIMD%d shader: %d inst, %d loops, %u cycles, " "%d:%d spills:fills, Promoted %u constants, " - "compacted %d to %d bytes.\n", - stage_abbrev, dispatch_width, before_size / 16, + "compacted %d to %d bytes.", + _mesa_shader_stage_to_abbrev(stage), + dispatch_width, before_size / 16, loop_count, cfg->cycle_count, spill_count, fill_count, promoted_constants, before_size, after_size);