X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fintel%2Fcompiler%2Fbrw_vec4_gs_visitor.cpp;h=7848832e4128c59e692c0e2bfaade31b2b032c4e;hb=16ada419d7c13bc96e299d3b17d756ec1af6f38a;hp=9793ef5012586aa54341c2f0d998062bea42e939;hpb=b86dba8a0eee6be283a96481c0c2b1fb1e882824;p=mesa.git diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index 9793ef50125..7848832e412 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -29,6 +29,7 @@ #include "brw_vec4_gs_visitor.h" #include "gen6_gs_visitor.h" +#include "brw_cfg.h" #include "brw_fs.h" #include "brw_nir.h" #include "common/gen_debug.h" @@ -52,29 +53,36 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler, } -dst_reg * -vec4_gs_visitor::make_reg_for_system_value(int location) +static inline struct brw_reg +attribute_to_hw_reg(int attr, brw_reg_type type, bool interleaved) { - dst_reg *reg = new(mem_ctx) dst_reg(this, glsl_type::int_type); - - switch (location) { - case SYSTEM_VALUE_INVOCATION_ID: - this->current_annotation = "initialize gl_InvocationID"; - if (gs_prog_data->invocations > 1) - emit(GS_OPCODE_GET_INSTANCE_ID, *reg); - else - emit(MOV(*reg, brw_imm_ud(0))); - break; - default: - unreachable("not reached"); + struct brw_reg reg; + + unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(type)); + if (interleaved) { + reg = stride(brw_vecn_grf(width, attr / 2, (attr % 2) * 4), 0, width, 1); + } else { + reg = brw_vecn_grf(width, attr, 0); } + reg.type = type; return reg; } - +/** + * Replace each register of type ATTR in this->instructions with a reference + * to a fixed HW register. + * + * If interleaved is true, then each attribute takes up half a register, with + * register N containing attribute 2*N in its first half and attribute 2*N+1 + * in its second half (this corresponds to the payload setup used by geometry + * shaders in "single" or "dual instanced" dispatch mode). If interleaved is + * false, then each attribute takes up a whole register, with register N + * containing attribute N (this corresponds to the payload setup used by + * vertex shaders, and by geometry shaders in "dual object" dispatch mode). + */ int -vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map, +vec4_gs_visitor::setup_varying_inputs(int payload_reg, int attributes_per_reg) { /* For geometry shaders there are N copies of the input attributes, where N @@ -89,12 +97,24 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map, assert(num_input_vertices <= MAX_GS_INPUT_VERTICES); unsigned input_array_stride = prog_data->urb_read_length * 2; - for (int slot = 0; slot < c->input_vue_map.num_slots; slot++) { - int varying = c->input_vue_map.slot_to_varying[slot]; - for (unsigned vertex = 0; vertex < num_input_vertices; vertex++) { - attribute_map[BRW_VARYING_SLOT_COUNT * vertex + varying] = - attributes_per_reg * payload_reg + input_array_stride * vertex + - slot; + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != ATTR) + continue; + + assert(inst->src[i].offset % REG_SIZE == 0); + int grf = payload_reg * attributes_per_reg + + inst->src[i].nr + inst->src[i].offset / REG_SIZE; + + struct brw_reg reg = + attribute_to_hw_reg(grf, inst->src[i].type, attributes_per_reg > 1); + reg.swizzle = inst->src[i].swizzle; + if (inst->src[i].abs) + reg = brw_abs(reg); + if (inst->src[i].negate) + reg = negate(reg); + + inst->src[i] = reg; } } @@ -103,25 +123,15 @@ vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map, return payload_reg + regs_used; } - void vec4_gs_visitor::setup_payload() { - int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES]; - /* If we are in dual instanced or single mode, then attributes are going * to be interleaved, so one register contains two attribute slots. */ int attributes_per_reg = prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2; - /* If a geometry shader tries to read from an input that wasn't written by - * the vertex shader, that produces undefined results, but it shouldn't - * crash anything. So initialize attribute_map to zeros--that ensures that - * these undefined results are read from r0. - */ - memset(attribute_map, 0, sizeof(attribute_map)); - int reg = 0; /* The payload always contains important data in r0, which contains @@ -132,13 +142,11 @@ vec4_gs_visitor::setup_payload() /* If the shader uses gl_PrimitiveIDIn, that goes in r1. */ if (gs_prog_data->include_primitive_id) - attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg++; + reg++; reg = setup_uniforms(reg); - reg = setup_varying_inputs(reg, attribute_map, attributes_per_reg); - - lower_attributes_to_hw_regs(attribute_map, attributes_per_reg > 1); + reg = setup_varying_inputs(reg, attributes_per_reg); this->first_non_payload_grf = reg; } @@ -414,7 +422,7 @@ vec4_gs_visitor::set_stream_control_data_bits(unsigned stream_id) assert(c->control_data_bits_per_vertex == 2); /* Must be a valid stream */ - assert(stream_id >= 0 && stream_id < MAX_VERTEX_STREAMS); + assert(stream_id < MAX_VERTEX_STREAMS); /* Control data bits are initialized to 0 so we don't have to set any * bits when sending vertices to stream 0. @@ -610,7 +618,6 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, const nir_shader *src_shader, struct gl_program *prog, int shader_time_index, - unsigned *final_assembly_size, char **error_str) { struct brw_gs_compile c; @@ -634,7 +641,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, shader->info.separate_shader); shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, is_scalar); - brw_nir_lower_vue_inputs(shader, is_scalar, &c.input_vue_map); + brw_nir_lower_vue_inputs(shader, &c.input_vue_map); brw_nir_lower_vue_outputs(shader, is_scalar); shader = brw_postprocess_nir(shader, compiler, is_scalar); @@ -809,10 +816,17 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and * a multiple of 128 bytes in gen6. */ - if (compiler->devinfo->gen >= 7) + if (compiler->devinfo->gen >= 7) { prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; - else + /* On Cannonlake software shall not program an allocation size that + * specifies a size that is a multiple of 3 64B (512-bit) cachelines. + */ + if (compiler->devinfo->gen == 10 && + prog_data->base.urb_entry_size % 3 == 0) + prog_data->base.urb_entry_size++; + } else { prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128; + } assert(shader->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim)); prog_data->output_topology = @@ -853,7 +867,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, g.enable_debug(name); } g.generate_code(v.cfg, 8); - return g.get_assembly(final_assembly_size); + return g.get_assembly(&prog_data->base.base.program_size); } } @@ -868,10 +882,37 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader, mem_ctx, true /* no_spills */, shader_time_index); + + /* Backup 'nr_params' and 'param' as they can be modified by the + * the DUAL_OBJECT visitor. If it fails, we will run the fallback + * (DUAL_INSTANCED or SINGLE mode) and we need to restore original + * values. + */ + const unsigned param_count = prog_data->base.base.nr_params; + uint32_t *param = ralloc_array(NULL, uint32_t, param_count); + memcpy(param, prog_data->base.base.param, + sizeof(uint32_t) * param_count); + if (v.run()) { + /* Success! Backup is not needed */ + ralloc_free(param); return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader, &prog_data->base, v.cfg, - final_assembly_size); + &prog_data->base.base. + program_size); + } else { + /* These variables could be modified by the execution of the GS + * visitor if it packed the uniforms in the push constant buffer. + * As it failed, we need restore them so we can start again with + * DUAL_INSTANCED or SINGLE mode. + * + * FIXME: Could more variables be modified by this execution? + */ + memcpy(prog_data->base.base.param, param, + sizeof(uint32_t) * param_count); + prog_data->base.base.nr_params = param_count; + prog_data->base.base.nr_pull_params = 0; + ralloc_free(param); } } } @@ -922,7 +963,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, } else { ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader, &prog_data->base, gs->cfg, - final_assembly_size); + &prog_data->base.base.program_size); } delete gs;