X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_fs.cpp;h=68e73cc5cd801a672bbb73e884444c5f9e6556d6;hb=a497ab6838ae5a9898abfed82f7bc8295b490911;hp=977fd8c35f592ea32591355bce06d7fd410daeff;hpb=9b22a0d295316b7547667ebbfe1e1b6182439186;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 977fd8c35f5..68e73cc5cd8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -462,6 +462,8 @@ type_size_scalar(const struct glsl_type *type) case GLSL_TYPE_BOOL: return type->components(); case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_UINT64: + case GLSL_TYPE_INT64: return type->components() * 2; case GLSL_TYPE_ARRAY: return type_size_scalar(type->fields.array) * type->length; @@ -492,19 +494,6 @@ type_size_scalar(const struct glsl_type *type) return 0; } -/* Attribute arrays are loaded as one vec4 per element (or matrix column), - * except for double-precision types, which are loaded as one dvec4. - */ -extern "C" int -type_size_vs_input(const struct glsl_type *type) -{ - if (type->is_double()) { - return type_size_dvec4(type); - } else { - return type_size_vec4(type); - } -} - /** * Create a MOV to read the timestamp register. * @@ -1864,7 +1853,10 @@ fs_visitor::compact_virtual_grfs() } static void -set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous, +set_push_pull_constant_loc(unsigned uniform, int *chunk_start, + unsigned *max_chunk_bitsize, + bool contiguous, unsigned bitsize, + const unsigned target_bitsize, int *push_constant_loc, int *pull_constant_loc, unsigned *num_push_constants, unsigned *num_pull_constants, @@ -1876,11 +1868,23 @@ set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous, if (*chunk_start < 0) *chunk_start = uniform; + /* Keep track of the maximum bit size access in contiguous uniforms */ + *max_chunk_bitsize = MAX2(*max_chunk_bitsize, bitsize); + /* If this element does not need to be contiguous with the next, we * split at this point and everything between chunk_start and u forms a * single chunk. */ if (!contiguous) { + /* If bitsize doesn't match the target one, skip it */ + if (*max_chunk_bitsize != target_bitsize) { + /* FIXME: right now we only support 32 and 64-bit accesses */ + assert(*max_chunk_bitsize == 4 || *max_chunk_bitsize == 8); + *max_chunk_bitsize = 0; + *chunk_start = -1; + return; + } + unsigned chunk_size = uniform - *chunk_start + 1; /* Decide whether we should push or pull this parameter. In the @@ -1898,6 +1902,7 @@ set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous, pull_constant_loc[j] = (*num_pull_constants)++; } + *max_chunk_bitsize = 0; *chunk_start = -1; } } @@ -1920,8 +1925,8 @@ fs_visitor::assign_constant_locations() bool is_live[uniforms]; memset(is_live, 0, sizeof(is_live)); - bool is_live_64bit[uniforms]; - memset(is_live_64bit, 0, sizeof(is_live_64bit)); + unsigned bitsize_access[uniforms]; + memset(bitsize_access, 0, sizeof(bitsize_access)); /* For each uniform slot, a value of true indicates that the given slot and * the next slot must remain contiguous. This is used to keep us from @@ -1958,20 +1963,18 @@ fs_visitor::assign_constant_locations() for (unsigned j = constant_nr; j < last; j++) { is_live[j] = true; contiguous[j] = true; - if (type_sz(inst->src[i].type) == 8) { - is_live_64bit[j] = true; - } + bitsize_access[j] = MAX2(bitsize_access[j], type_sz(inst->src[i].type)); } is_live[last] = true; + bitsize_access[last] = MAX2(bitsize_access[last], type_sz(inst->src[i].type)); } else { if (constant_nr >= 0 && constant_nr < (int) uniforms) { int regs_read = inst->components_read(i) * type_sz(inst->src[i].type) / 4; for (int j = 0; j < regs_read; j++) { is_live[constant_nr + j] = true; - if (type_sz(inst->src[i].type) == 8) { - is_live_64bit[constant_nr + j] = true; - } + bitsize_access[constant_nr + j] = + MAX2(bitsize_access[constant_nr + j], type_sz(inst->src[i].type)); } } } @@ -2010,13 +2013,17 @@ fs_visitor::assign_constant_locations() memset(pull_constant_loc, -1, uniforms * sizeof(*pull_constant_loc)); int chunk_start = -1; + unsigned max_chunk_bitsize = 0; /* First push 64-bit uniforms to ensure they are properly aligned */ + const unsigned uniform_64_bit_size = type_sz(BRW_REGISTER_TYPE_DF); for (unsigned u = 0; u < uniforms; u++) { - if (!is_live[u] || !is_live_64bit[u]) + if (!is_live[u]) continue; - set_push_pull_constant_loc(u, &chunk_start, contiguous[u], + set_push_pull_constant_loc(u, &chunk_start, &max_chunk_bitsize, + contiguous[u], bitsize_access[u], + uniform_64_bit_size, push_constant_loc, pull_constant_loc, &num_push_constants, &num_pull_constants, max_push_components, max_chunk_size, @@ -2025,15 +2032,18 @@ fs_visitor::assign_constant_locations() } /* Then push the rest of uniforms */ + const unsigned uniform_32_bit_size = type_sz(BRW_REGISTER_TYPE_F); for (unsigned u = 0; u < uniforms; u++) { - if (!is_live[u] || is_live_64bit[u]) + if (!is_live[u]) continue; /* Skip thread_local_id_index to put it in the last push register. */ if (thread_local_id_index == (int)u) continue; - set_push_pull_constant_loc(u, &chunk_start, contiguous[u], + set_push_pull_constant_loc(u, &chunk_start, &max_chunk_bitsize, + contiguous[u], bitsize_access[u], + uniform_32_bit_size, push_constant_loc, pull_constant_loc, &num_push_constants, &num_pull_constants, max_push_components, max_chunk_size, @@ -2111,25 +2121,22 @@ fs_visitor::lower_constant_loads() if (pull_index == -1) continue; - const unsigned index = stage_prog_data->binding_table.pull_constants_start; - fs_reg dst; - - if (type_sz(inst->src[i].type) <= 4) - dst = vgrf(glsl_type::float_type); - else - dst = vgrf(glsl_type::double_type); - assert(inst->src[i].stride == 0); - const fs_builder ubld = ibld.exec_all().group(4, 0); - struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); + const unsigned index = stage_prog_data->binding_table.pull_constants_start; + const unsigned block_sz = 64; /* Fetch one cacheline at a time. */ + const fs_builder ubld = ibld.exec_all().group(block_sz / 4, 0); + const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD); + const unsigned base = pull_index * 4; + ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - dst, brw_imm_ud(index), offset); + dst, brw_imm_ud(index), brw_imm_ud(base & ~(block_sz - 1))); /* Rewrite the instruction to use the temporary VGRF. */ inst->src[i].file = VGRF; inst->src[i].nr = dst.nr; - inst->src[i].offset = (pull_index & 3) * 4 + inst->src[i].offset % 4; + inst->src[i].offset = (base & (block_sz - 1)) + + inst->src[i].offset % 4; brw_mark_surface_used(prog_data, index); } @@ -5941,15 +5948,15 @@ fs_visitor::run_tcs_single_patch() } /* Fix the disptach mask */ - if (nir->info->tcs.vertices_out % 8) { + if (nir->info->tess.tcs_vertices_out % 8) { bld.CMP(bld.null_reg_ud(), invocation_id, - brw_imm_ud(nir->info->tcs.vertices_out), BRW_CONDITIONAL_L); + brw_imm_ud(nir->info->tess.tcs_vertices_out), BRW_CONDITIONAL_L); bld.IF(BRW_PREDICATE_NORMAL); } emit_nir_code(); - if (nir->info->tcs.vertices_out % 8) { + if (nir->info->tess.tcs_vertices_out % 8) { bld.emit(BRW_OPCODE_ENDIF); } @@ -6398,15 +6405,21 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, unsigned *final_assembly_size, char **error_str) { + const struct gen_device_info *devinfo = compiler->devinfo; + nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); - shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, - true); - brw_nir_lower_fs_inputs(shader, vue_map, prog, compiler->devinfo, key); + shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true); + brw_nir_lower_fs_inputs(shader, devinfo, key); brw_nir_lower_fs_outputs(shader); + + if (devinfo->gen < 6) { + brw_setup_vue_interpolation(vue_map, shader, prog_data, devinfo); + } + if (!key->multisample_fbo) NIR_PASS_V(shader, demote_sample_qualifiers); NIR_PASS_V(shader, move_interpolation_to_top); - shader = brw_postprocess_nir(shader, compiler->devinfo, true); + shader = brw_postprocess_nir(shader, compiler, true); /* key->alpha_test_func means simulating alpha testing via discards, * so the shader definitely kills pixels. @@ -6631,8 +6644,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, char **error_str) { nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); - shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, - true); + shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true); brw_nir_lower_cs_shared(shader); prog_data->base.total_shared += shader->num_shared; @@ -6645,7 +6657,7 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, (unsigned)4 * (prog_data->thread_local_id_index + 1)); brw_nir_lower_intrinsics(shader, &prog_data->base); - shader = brw_postprocess_nir(shader, compiler->devinfo, true); + shader = brw_postprocess_nir(shader, compiler, true); prog_data->local_size[0] = shader->info->cs.local_size[0]; prog_data->local_size[1] = shader->info->cs.local_size[1];