X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=9b982569490ce1b529f5b2ec472be07f8e3bb396;hb=a921b215dd9487aef74bec868e4201232d976992;hp=5b7203542f39fa916d3b5af831dafb8c21f04d8e;hpb=8294295dbdc053c92065844f2079aef8da05db9b;p=mesa.git diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 5b7203542f3..9b982569490 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -53,8 +53,6 @@ #include "st_program.h" #include "st_mesa_to_tgsi.h" #include "st_format.h" -#include "st_nir.h" -#include "st_shader_cache.h" #include "st_glsl_to_tgsi_temprename.h" #include "util/hash_table.h" @@ -2717,6 +2715,42 @@ shrink_array_declarations(struct inout_decl *decls, unsigned count, } } + +static void +mark_array_io(struct inout_decl *decls, unsigned count, + GLbitfield64* usage_mask, + GLbitfield64 double_usage_mask, + GLbitfield* patch_usage_mask) +{ + unsigned i; + int j; + + /* Fix array declarations by removing unused array elements at both ends + * of the arrays. For example, mat4[3] where only mat[1] is used. + */ + for (i = 0; i < count; i++) { + struct inout_decl *decl = &decls[i]; + if (!decl->array_id) + continue; + + /* When not all entries of an array are accessed, we mark them as used + * here anyway, to ensure that the input/output mapping logic doesn't get + * confused. + * + * TODO This happens when an array isn't used via indirect access, which + * some game ports do (at least eON-based). There is an optimization + * opportunity here by replacing the array declaration with non-array + * declarations of those slots that are actually used. + */ + for (j = 0; j < (int)decl->size; ++j) { + if (decl->mesa_index >= VARYING_SLOT_PATCH0) + *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j); + else + *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j); + } + } +} + void glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) { @@ -3904,6 +3938,21 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) case ir_intrinsic_image_atomic_comp_swap: opcode = TGSI_OPCODE_ATOMCAS; break; + case ir_intrinsic_image_atomic_inc_wrap: { + /* There's a bit of disagreement between GLSL and the hardware. The + * hardware wants to wrap after the given wrap value, while GLSL + * wants to wrap at the value. Subtract 1 to make up the difference. + */ + st_src_reg wrap = get_temp(glsl_type::uint_type); + emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(wrap), + arg1, st_src_reg_for_int(-1)); + arg1 = wrap; + opcode = TGSI_OPCODE_ATOMINC_WRAP; + break; + } + case ir_intrinsic_image_atomic_dec_wrap: + opcode = TGSI_OPCODE_ATOMDEC_WRAP; + break; default: assert(!"Unexpected intrinsic"); return; @@ -4029,6 +4078,8 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) case ir_intrinsic_image_atomic_comp_swap: case ir_intrinsic_image_size: case ir_intrinsic_image_samples: + case ir_intrinsic_image_atomic_inc_wrap: + case ir_intrinsic_image_atomic_dec_wrap: visit_image_intrinsic(ir); return; @@ -5740,6 +5791,8 @@ _mesa_sysval_to_semantic(unsigned sysval) /* Fragment shader */ case SYSTEM_VALUE_FRAG_COORD: return TGSI_SEMANTIC_POSITION; + case SYSTEM_VALUE_POINT_COORD: + return TGSI_SEMANTIC_PCOORD; case SYSTEM_VALUE_FRONT_FACE: return TGSI_SEMANTIC_FACE; case SYSTEM_VALUE_SAMPLE_ID: @@ -5793,7 +5846,10 @@ _mesa_sysval_to_semantic(unsigned sysval) case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: case SYSTEM_VALUE_VERTEX_CNT: - case SYSTEM_VALUE_VARYING_COORD: + case SYSTEM_VALUE_BARYCENTRIC_PIXEL: + case SYSTEM_VALUE_BARYCENTRIC_SAMPLE: + case SYSTEM_VALUE_BARYCENTRIC_CENTROID: + case SYSTEM_VALUE_BARYCENTRIC_SIZE: default: assert(!"Unexpected SYSTEM_VALUE_ enum"); return TGSI_SEMANTIC_COUNT; @@ -6202,6 +6258,8 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_ATOMIMAX: case TGSI_OPCODE_ATOMFADD: case TGSI_OPCODE_IMG2HND: + case TGSI_OPCODE_ATOMINC_WRAP: + case TGSI_OPCODE_ATOMDEC_WRAP: for (i = num_src - 1; i >= 0; i--) src[i + 1] = src[i]; num_src++; @@ -7159,14 +7217,28 @@ get_mesa_program_tgsi(struct gl_context *ctx, } do_set_program_inouts(shader->ir, prog, shader->Stage); + _mesa_copy_linked_program_data(shader_program, shader); - shrink_array_declarations(v->inputs, v->num_inputs, - &prog->info.inputs_read, - prog->DualSlotInputs, - &prog->info.patch_inputs_read); - shrink_array_declarations(v->outputs, v->num_outputs, - &prog->info.outputs_written, 0ULL, - &prog->info.patch_outputs_written); + + if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS)) { + mark_array_io(v->inputs, v->num_inputs, + &prog->info.inputs_read, + prog->DualSlotInputs, + &prog->info.patch_inputs_read); + + mark_array_io(v->outputs, v->num_outputs, + &prog->info.outputs_written, 0ULL, + &prog->info.patch_outputs_written); + } else { + shrink_array_declarations(v->inputs, v->num_inputs, + &prog->info.inputs_read, + prog->DualSlotInputs, + &prog->info.patch_inputs_read); + shrink_array_declarations(v->outputs, v->num_outputs, + &prog->info.outputs_written, 0ULL, + &prog->info.patch_outputs_written); + } + count_resources(v, prog); /* The GLSL IR won't be needed anymore. */ @@ -7196,7 +7268,7 @@ get_mesa_program_tgsi(struct gl_context *ctx, * prog->ParameterValues to get reallocated (e.g., anything that adds a * program constant) has to happen before creating this linkage. */ - _mesa_associate_uniform_storage(ctx, shader_program, prog, true); + _mesa_associate_uniform_storage(ctx, shader_program, prog); if (!shader_program->data->LinkStatus) { free_glsl_to_tgsi_visitor(v); _mesa_reference_program(ctx, &shader->Program, NULL); @@ -7287,131 +7359,29 @@ has_unsupported_control_flow(exec_list *ir, return visitor.unsupported; } -extern "C" { - /** * Link a shader. - * Called via ctx->Driver.LinkShader() * This actually involves converting GLSL IR into an intermediate TGSI-like IR * with code lowering and other optimizations. */ GLboolean -st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) +st_link_tgsi(struct gl_context *ctx, struct gl_shader_program *prog) { struct pipe_screen *pscreen = ctx->st->pipe->screen; - enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir) - pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, - PIPE_SHADER_CAP_PREFERRED_IR); - bool use_nir = preferred_ir == PIPE_SHADER_IR_NIR; - - /* Return early if we are loading the shader from on-disk cache */ - if (st_load_ir_from_disk_cache(ctx, prog, use_nir)) { - return GL_TRUE; - } - - assert(prog->data->LinkStatus); - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i] == NULL) + struct gl_linked_shader *shader = prog->_LinkedShaders[i]; + if (shader == NULL) continue; - struct gl_linked_shader *shader = prog->_LinkedShaders[i]; exec_list *ir = shader->ir; gl_shader_stage stage = shader->Stage; + enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage); const struct gl_shader_compiler_options *options = &ctx->Const.ShaderCompilerOptions[stage]; - enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage); - bool have_dround = pscreen->get_shader_param(pscreen, ptarget, - PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED); - bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget, - PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED); - bool have_ldexp = pscreen->get_shader_param(pscreen, ptarget, - PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED); + unsigned if_threshold = pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_LOWER_IF_THRESHOLD); - - /* If there are forms of indirect addressing that the driver - * cannot handle, perform the lowering pass. - */ - if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput || - options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) { - lower_variable_index_to_cond_assign(stage, ir, - options->EmitNoIndirectInput, - options->EmitNoIndirectOutput, - options->EmitNoIndirectTemp, - options->EmitNoIndirectUniform); - } - - if (!pscreen->get_param(pscreen, PIPE_CAP_INT64_DIVMOD)) - lower_64bit_integer_instructions(ir, DIV64 | MOD64); - - if (ctx->Extensions.ARB_shading_language_packing) { - unsigned lower_inst = LOWER_PACK_SNORM_2x16 | - LOWER_UNPACK_SNORM_2x16 | - LOWER_PACK_UNORM_2x16 | - LOWER_UNPACK_UNORM_2x16 | - LOWER_PACK_SNORM_4x8 | - LOWER_UNPACK_SNORM_4x8 | - LOWER_UNPACK_UNORM_4x8 | - LOWER_PACK_UNORM_4x8; - - if (ctx->Extensions.ARB_gpu_shader5) - lower_inst |= LOWER_PACK_USE_BFI | - LOWER_PACK_USE_BFE; - if (!ctx->st->has_half_float_packing) - lower_inst |= LOWER_PACK_HALF_2x16 | - LOWER_UNPACK_HALF_2x16; - - lower_packing_builtins(ir, lower_inst); - } - - if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS)) - lower_offset_arrays(ir); - do_mat_op_to_vec(ir); - - if (stage == MESA_SHADER_FRAGMENT) - lower_blend_equation_advanced( - shader, ctx->Extensions.KHR_blend_equation_advanced_coherent); - - lower_instructions(ir, - MOD_TO_FLOOR | - FDIV_TO_MUL_RCP | - EXP_TO_EXP2 | - LOG_TO_LOG2 | - MUL64_TO_MUL_AND_MUL_HIGH | - (have_ldexp ? 0 : LDEXP_TO_ARITH) | - (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) | - CARRY_TO_ARITH | - BORROW_TO_ARITH | - (have_dround ? 0 : DOPS_TO_DFRAC) | - (options->EmitNoPow ? POW_TO_EXP2 : 0) | - (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) | - (options->EmitNoSat ? SAT_TO_CLAMP : 0) | - (ctx->Const.ForceGLSLAbsSqrt ? SQRT_TO_ABS_SQRT : 0) | - /* Assume that if ARB_gpu_shader5 is not supported - * then all of the extended integer functions need - * lowering. It may be necessary to add some caps - * for individual instructions. - */ - (!ctx->Extensions.ARB_gpu_shader5 - ? BIT_COUNT_TO_MATH | - EXTRACT_TO_SHIFTS | - INSERT_TO_SHIFTS | - REVERSE_TO_SHIFTS | - FIND_LSB_TO_FLOAT_CAST | - FIND_MSB_TO_FLOAT_CAST | - IMUL_HIGH_TO_MUL - : 0)); - - do_vec_index_to_cond_assign(ir); - lower_vector_insert(ir, true); - lower_quadop_vector(ir, false); - lower_noise(ir); - if (options->MaxIfDepth == 0) { - lower_discard(ir); - } - if (ctx->Const.GLSLOptimizeConservatively) { /* Do it once and repeat only if there's unsupported control flow. */ do { @@ -7437,17 +7407,6 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) do_vec_index_to_cond_assign(ir); validate_ir_tree(ir); - } - - build_program_resource_list(ctx, prog); - - if (use_nir) - return st_link_nir(ctx, prog); - - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_linked_shader *shader = prog->_LinkedShaders[i]; - if (shader == NULL) - continue; struct gl_program *linked_prog = get_mesa_program_tgsi(ctx, prog, shader); @@ -7466,6 +7425,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) return GL_TRUE; } +extern "C" { + void st_translate_stream_output_info(struct gl_transform_feedback_info *info, const ubyte outputMapping[],