#include "st_program.h"
#include "st_mesa_to_tgsi.h"
#include "st_format.h"
-#include "st_nir.h"
-#include "st_shader_cache.h"
#include "st_glsl_to_tgsi_temprename.h"
#include "util/hash_table.h"
}
}
+
+static void
+mark_array_io(struct inout_decl *decls, unsigned count,
+ GLbitfield64* usage_mask,
+ GLbitfield64 double_usage_mask,
+ GLbitfield* patch_usage_mask)
+{
+ unsigned i;
+ int j;
+
+ /* Fix array declarations by removing unused array elements at both ends
+ * of the arrays. For example, mat4[3] where only mat[1] is used.
+ */
+ for (i = 0; i < count; i++) {
+ struct inout_decl *decl = &decls[i];
+ if (!decl->array_id)
+ continue;
+
+ /* When not all entries of an array are accessed, we mark them as used
+ * here anyway, to ensure that the input/output mapping logic doesn't get
+ * confused.
+ *
+ * TODO This happens when an array isn't used via indirect access, which
+ * some game ports do (at least eON-based). There is an optimization
+ * opportunity here by replacing the array declaration with non-array
+ * declarations of those slots that are actually used.
+ */
+ for (j = 0; j < (int)decl->size; ++j) {
+ if (decl->mesa_index >= VARYING_SLOT_PATCH0)
+ *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j);
+ else
+ *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j);
+ }
+ }
+}
+
void
glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
{
case ir_intrinsic_image_atomic_comp_swap:
opcode = TGSI_OPCODE_ATOMCAS;
break;
+ case ir_intrinsic_image_atomic_inc_wrap: {
+ /* There's a bit of disagreement between GLSL and the hardware. The
+ * hardware wants to wrap after the given wrap value, while GLSL
+ * wants to wrap at the value. Subtract 1 to make up the difference.
+ */
+ st_src_reg wrap = get_temp(glsl_type::uint_type);
+ emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(wrap),
+ arg1, st_src_reg_for_int(-1));
+ arg1 = wrap;
+ opcode = TGSI_OPCODE_ATOMINC_WRAP;
+ break;
+ }
+ case ir_intrinsic_image_atomic_dec_wrap:
+ opcode = TGSI_OPCODE_ATOMDEC_WRAP;
+ break;
default:
assert(!"Unexpected intrinsic");
return;
case ir_intrinsic_image_atomic_comp_swap:
case ir_intrinsic_image_size:
case ir_intrinsic_image_samples:
+ case ir_intrinsic_image_atomic_inc_wrap:
+ case ir_intrinsic_image_atomic_dec_wrap:
visit_image_intrinsic(ir);
return;
/* Fragment shader */
case SYSTEM_VALUE_FRAG_COORD:
return TGSI_SEMANTIC_POSITION;
+ case SYSTEM_VALUE_POINT_COORD:
+ return TGSI_SEMANTIC_PCOORD;
case SYSTEM_VALUE_FRONT_FACE:
return TGSI_SEMANTIC_FACE;
case SYSTEM_VALUE_SAMPLE_ID:
case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
case SYSTEM_VALUE_VERTEX_CNT:
- case SYSTEM_VALUE_VARYING_COORD:
+ case SYSTEM_VALUE_BARYCENTRIC_PIXEL:
+ case SYSTEM_VALUE_BARYCENTRIC_SAMPLE:
+ case SYSTEM_VALUE_BARYCENTRIC_CENTROID:
+ case SYSTEM_VALUE_BARYCENTRIC_SIZE:
default:
assert(!"Unexpected SYSTEM_VALUE_ enum");
return TGSI_SEMANTIC_COUNT;
case TGSI_OPCODE_ATOMIMAX:
case TGSI_OPCODE_ATOMFADD:
case TGSI_OPCODE_IMG2HND:
+ case TGSI_OPCODE_ATOMINC_WRAP:
+ case TGSI_OPCODE_ATOMDEC_WRAP:
for (i = num_src - 1; i >= 0; i--)
src[i + 1] = src[i];
num_src++;
}
do_set_program_inouts(shader->ir, prog, shader->Stage);
+
_mesa_copy_linked_program_data(shader_program, shader);
- shrink_array_declarations(v->inputs, v->num_inputs,
- &prog->info.inputs_read,
- prog->DualSlotInputs,
- &prog->info.patch_inputs_read);
- shrink_array_declarations(v->outputs, v->num_outputs,
- &prog->info.outputs_written, 0ULL,
- &prog->info.patch_outputs_written);
+
+ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS)) {
+ mark_array_io(v->inputs, v->num_inputs,
+ &prog->info.inputs_read,
+ prog->DualSlotInputs,
+ &prog->info.patch_inputs_read);
+
+ mark_array_io(v->outputs, v->num_outputs,
+ &prog->info.outputs_written, 0ULL,
+ &prog->info.patch_outputs_written);
+ } else {
+ shrink_array_declarations(v->inputs, v->num_inputs,
+ &prog->info.inputs_read,
+ prog->DualSlotInputs,
+ &prog->info.patch_inputs_read);
+ shrink_array_declarations(v->outputs, v->num_outputs,
+ &prog->info.outputs_written, 0ULL,
+ &prog->info.patch_outputs_written);
+ }
+
count_resources(v, prog);
/* The GLSL IR won't be needed anymore. */
* prog->ParameterValues to get reallocated (e.g., anything that adds a
* program constant) has to happen before creating this linkage.
*/
- _mesa_associate_uniform_storage(ctx, shader_program, prog, true);
+ _mesa_associate_uniform_storage(ctx, shader_program, prog);
if (!shader_program->data->LinkStatus) {
free_glsl_to_tgsi_visitor(v);
_mesa_reference_program(ctx, &shader->Program, NULL);
return visitor.unsupported;
}
-extern "C" {
-
/**
* Link a shader.
- * Called via ctx->Driver.LinkShader()
* This actually involves converting GLSL IR into an intermediate TGSI-like IR
* with code lowering and other optimizations.
*/
GLboolean
-st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
+st_link_tgsi(struct gl_context *ctx, struct gl_shader_program *prog)
{
struct pipe_screen *pscreen = ctx->st->pipe->screen;
- enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
- pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX,
- PIPE_SHADER_CAP_PREFERRED_IR);
- bool use_nir = preferred_ir == PIPE_SHADER_IR_NIR;
-
- /* Return early if we are loading the shader from on-disk cache */
- if (st_load_ir_from_disk_cache(ctx, prog, use_nir)) {
- return GL_TRUE;
- }
-
- assert(prog->data->LinkStatus);
-
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- if (prog->_LinkedShaders[i] == NULL)
+ struct gl_linked_shader *shader = prog->_LinkedShaders[i];
+ if (shader == NULL)
continue;
- struct gl_linked_shader *shader = prog->_LinkedShaders[i];
exec_list *ir = shader->ir;
gl_shader_stage stage = shader->Stage;
+ enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage);
const struct gl_shader_compiler_options *options =
&ctx->Const.ShaderCompilerOptions[stage];
- enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage);
- bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
- PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
- bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
- PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED);
- bool have_ldexp = pscreen->get_shader_param(pscreen, ptarget,
- PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED);
+
unsigned if_threshold = pscreen->get_shader_param(pscreen, ptarget,
PIPE_SHADER_CAP_LOWER_IF_THRESHOLD);
-
- /* If there are forms of indirect addressing that the driver
- * cannot handle, perform the lowering pass.
- */
- if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
- options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
- lower_variable_index_to_cond_assign(stage, ir,
- options->EmitNoIndirectInput,
- options->EmitNoIndirectOutput,
- options->EmitNoIndirectTemp,
- options->EmitNoIndirectUniform);
- }
-
- if (!pscreen->get_param(pscreen, PIPE_CAP_INT64_DIVMOD))
- lower_64bit_integer_instructions(ir, DIV64 | MOD64);
-
- if (ctx->Extensions.ARB_shading_language_packing) {
- unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
- LOWER_UNPACK_SNORM_2x16 |
- LOWER_PACK_UNORM_2x16 |
- LOWER_UNPACK_UNORM_2x16 |
- LOWER_PACK_SNORM_4x8 |
- LOWER_UNPACK_SNORM_4x8 |
- LOWER_UNPACK_UNORM_4x8 |
- LOWER_PACK_UNORM_4x8;
-
- if (ctx->Extensions.ARB_gpu_shader5)
- lower_inst |= LOWER_PACK_USE_BFI |
- LOWER_PACK_USE_BFE;
- if (!ctx->st->has_half_float_packing)
- lower_inst |= LOWER_PACK_HALF_2x16 |
- LOWER_UNPACK_HALF_2x16;
-
- lower_packing_builtins(ir, lower_inst);
- }
-
- if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
- lower_offset_arrays(ir);
- do_mat_op_to_vec(ir);
-
- if (stage == MESA_SHADER_FRAGMENT)
- lower_blend_equation_advanced(
- shader, ctx->Extensions.KHR_blend_equation_advanced_coherent);
-
- lower_instructions(ir,
- MOD_TO_FLOOR |
- FDIV_TO_MUL_RCP |
- EXP_TO_EXP2 |
- LOG_TO_LOG2 |
- MUL64_TO_MUL_AND_MUL_HIGH |
- (have_ldexp ? 0 : LDEXP_TO_ARITH) |
- (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
- CARRY_TO_ARITH |
- BORROW_TO_ARITH |
- (have_dround ? 0 : DOPS_TO_DFRAC) |
- (options->EmitNoPow ? POW_TO_EXP2 : 0) |
- (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
- (options->EmitNoSat ? SAT_TO_CLAMP : 0) |
- (ctx->Const.ForceGLSLAbsSqrt ? SQRT_TO_ABS_SQRT : 0) |
- /* Assume that if ARB_gpu_shader5 is not supported
- * then all of the extended integer functions need
- * lowering. It may be necessary to add some caps
- * for individual instructions.
- */
- (!ctx->Extensions.ARB_gpu_shader5
- ? BIT_COUNT_TO_MATH |
- EXTRACT_TO_SHIFTS |
- INSERT_TO_SHIFTS |
- REVERSE_TO_SHIFTS |
- FIND_LSB_TO_FLOAT_CAST |
- FIND_MSB_TO_FLOAT_CAST |
- IMUL_HIGH_TO_MUL
- : 0));
-
- do_vec_index_to_cond_assign(ir);
- lower_vector_insert(ir, true);
- lower_quadop_vector(ir, false);
- lower_noise(ir);
- if (options->MaxIfDepth == 0) {
- lower_discard(ir);
- }
-
if (ctx->Const.GLSLOptimizeConservatively) {
/* Do it once and repeat only if there's unsupported control flow. */
do {
do_vec_index_to_cond_assign(ir);
validate_ir_tree(ir);
- }
-
- build_program_resource_list(ctx, prog);
-
- if (use_nir)
- return st_link_nir(ctx, prog);
-
- for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- struct gl_linked_shader *shader = prog->_LinkedShaders[i];
- if (shader == NULL)
- continue;
struct gl_program *linked_prog =
get_mesa_program_tgsi(ctx, prog, shader);
return GL_TRUE;
}
+extern "C" {
+
void
st_translate_stream_output_info(struct gl_transform_feedback_info *info,
const ubyte outputMapping[],