X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_fs_generator.cpp;h=74c6cd3047411a7f9a113622e1acb0063e16f954;hp=5825e0770d41b68b53a56df8acffa6eebdb310aa;hb=90b6745bc80cf6dabb8f736dbf12d47c2a6602f5;hpb=f858fa26b4cca8834c8687f01d2ba431fcc8e006 diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 5825e0770d4..74c6cd30474 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -224,25 +224,27 @@ public: bool fs_generator::patch_discard_jumps_to_fb_writes() { - if (devinfo->gen < 6 || this->discard_halt_patches.is_empty()) + if (this->discard_halt_patches.is_empty()) return false; int scale = brw_jump_scale(p->devinfo); - /* There is a somewhat strange undocumented requirement of using - * HALT, according to the simulator. If some channel has HALTed to - * a particular UIP, then by the end of the program, every channel - * must have HALTed to that UIP. Furthermore, the tracking is a - * stack, so you can't do the final halt of a UIP after starting - * halting to a new UIP. - * - * Symptoms of not emitting this instruction on actual hardware - * included GPU hangs and sparkly rendering on the piglit discard - * tests. - */ - brw_inst *last_halt = gen6_HALT(p); - brw_inst_set_uip(p->devinfo, last_halt, 1 * scale); - brw_inst_set_jip(p->devinfo, last_halt, 1 * scale); + if (devinfo->gen >= 6) { + /* There is a somewhat strange undocumented requirement of using + * HALT, according to the simulator. If some channel has HALTed to + * a particular UIP, then by the end of the program, every channel + * must have HALTed to that UIP. Furthermore, the tracking is a + * stack, so you can't do the final halt of a UIP after starting + * halting to a new UIP. + * + * Symptoms of not emitting this instruction on actual hardware + * included GPU hangs and sparkly rendering on the piglit discard + * tests. + */ + brw_inst *last_halt = brw_HALT(p); + brw_inst_set_uip(p->devinfo, last_halt, 1 * scale); + brw_inst_set_jip(p->devinfo, last_halt, 1 * scale); + } int ip = p->nr_insn; @@ -250,11 +252,67 @@ fs_generator::patch_discard_jumps_to_fb_writes() brw_inst *patch = &p->store[patch_ip->ip]; assert(brw_inst_opcode(p->devinfo, patch) == BRW_OPCODE_HALT); - /* HALT takes a half-instruction distance from the pre-incremented IP. */ - brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale); + if (devinfo->gen >= 6) { + /* HALT takes a half-instruction distance from the pre-incremented IP. */ + brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale); + } else { + brw_set_src1(p, patch, brw_imm_d((ip - patch_ip->ip) * scale)); + } } this->discard_halt_patches.make_empty(); + + if (devinfo->gen < 6) { + /* From the g965 PRM: + * + * "As DMask is not automatically reloaded into AMask upon completion + * of this instruction, software has to manually restore AMask upon + * completion." + * + * DMask lives in the bottom 16 bits of sr0.1. + */ + brw_inst *reset = brw_MOV(p, brw_mask_reg(BRW_AMASK), + retype(brw_sr0_reg(1), BRW_REGISTER_TYPE_UW)); + brw_inst_set_exec_size(devinfo, reset, BRW_EXECUTE_1); + brw_inst_set_mask_control(devinfo, reset, BRW_MASK_DISABLE); + brw_inst_set_qtr_control(devinfo, reset, BRW_COMPRESSION_NONE); + brw_inst_set_thread_control(devinfo, reset, BRW_THREAD_SWITCH); + } + + if (devinfo->gen == 4 && !devinfo->is_g4x) { + /* From the g965 PRM: + * + * "[DevBW, DevCL] Erratum: The subfields in mask stack register are + * reset to zero during graphics reset, however, they are not + * initialized at thread dispatch. These subfields will retain the + * values from the previous thread. Software should make sure the + * mask stack is empty (reset to zero) before terminating the thread. + * In case that this is not practical, software may have to reset the + * mask stack at the beginning of each kernel, which will impact the + * performance." + * + * Luckily we can rely on: + * + * "[DevBW, DevCL] This register access restriction is not + * applicable, hardware does ensure execution pipeline coherency, + * when a mask stack register is used as an explicit source and/or + * destination." + */ + brw_push_insn_state(p); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); + + brw_set_default_exec_size(p, BRW_EXECUTE_2); + brw_MOV(p, vec2(brw_mask_stack_depth_reg(0)), brw_imm_uw(0)); + + brw_set_default_exec_size(p, BRW_EXECUTE_16); + /* Reset the if stack. */ + brw_MOV(p, retype(brw_mask_stack_reg(0), BRW_REGISTER_TYPE_UW), + brw_imm_uw(0)); + + brw_pop_insn_state(p); + } + return true; } @@ -410,7 +468,15 @@ fs_generator::generate_mov_indirect(fs_inst *inst, reg.nr = imm_byte_offset / REG_SIZE; reg.subnr = imm_byte_offset % REG_SIZE; - brw_MOV(p, dst, reg); + if (type_sz(reg.type) > 4 && !devinfo->has_64bit_float) { + brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0), + subscript(reg, BRW_REGISTER_TYPE_D, 0)); + brw_set_default_swsb(p, tgl_swsb_null()); + brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1), + subscript(reg, BRW_REGISTER_TYPE_D, 1)); + } else { + brw_MOV(p, dst, reg); + } } else { /* Prior to Broadwell, there are only 8 address registers. */ assert(inst->exec_size <= 8 || devinfo->gen >= 8); @@ -1362,14 +1428,12 @@ fs_generator::generate_ddy(const fs_inst *inst, void fs_generator::generate_discard_jump(fs_inst *) { - assert(devinfo->gen >= 6); - /* This HALT will be patched up at FB write time to point UIP at the end of * the program, and at brw_uip_jip() JIP will be set to the end of the * current block (or the program). */ this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn)); - gen6_HALT(p); + brw_HALT(p); } void @@ -1719,8 +1783,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, struct brw_compile_stats *stats) { /* align to 64 byte boundary. */ - while (p->next_insn_offset % 64) - brw_NOP(p); + brw_realign(p, 64); this->dispatch_width = dispatch_width; @@ -2497,7 +2560,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, /* overriding the shader makes disasm_info invalid */ if (!brw_try_override_assembly(p, start_offset, sha1buf)) { - dump_assembly(p->store, disasm_info, perf.block_latency); + dump_assembly(p->store, start_offset, p->next_insn_offset, + disasm_info, perf.block_latency); } else { fprintf(stderr, "Successfully overrode shader with sha1 %s\n\n", sha1buf); } @@ -2531,6 +2595,16 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, return start_offset; } +void +fs_generator::add_const_data(void *data, unsigned size) +{ + assert(prog_data->const_data_size == 0); + if (size > 0) { + prog_data->const_data_size = size; + prog_data->const_data_offset = brw_append_data(p, data, size, 32); + } +} + const unsigned * fs_generator::get_assembly() {