X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fmesa%2Fstate_tracker%2Fst_glsl_to_tgsi.cpp;h=1c644163716385c47329edd023f8700ed6d97a8e;hp=be582f5f01ccaf7a1a8df2fa9864ae4060ad665e;hb=9e9d28007a44c664c79cd89549988194e156a39f;hpb=71c34a51c323bcdeaa1897efe065b9676b179a97 diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index be582f5f01c..1c644163716 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -49,7 +49,6 @@ #include "tgsi/tgsi_info.h" #include "util/u_math.h" #include "util/u_memory.h" -#include "st_glsl_types.h" #include "st_program.h" #include "st_mesa_to_tgsi.h" #include "st_format.h" @@ -246,6 +245,7 @@ public: bool has_tex_txf_lz; bool precise; bool need_uarl; + bool tg4_component_in_swizzle; variable_storage *find_variable_storage(ir_variable *var); @@ -292,7 +292,7 @@ public: virtual void visit(ir_barrier *); /*@}*/ - void visit_expression(ir_expression *, st_src_reg *) ATTRIBUTE_NOINLINE; + void ATTRIBUTE_NOINLINE visit_expression(ir_expression *, st_src_reg *); void visit_atomic_counter_intrinsic(ir_call *); void visit_ssbo_intrinsic(ir_call *); @@ -1572,17 +1572,6 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]); break; - case ir_unop_noise: { - /* At some point, a motivated person could add a better - * implementation of noise. Currently not even the nvidia - * binary drivers do anything more than this. In any case, the - * place to do this is in the GL state tracker, not the poor - * driver. - */ - emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); - break; - } - case ir_binop_add: emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); break; @@ -1949,11 +1938,13 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); break; } + /* fallthrough */ case ir_unop_u2f: if (native_integers) { emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]); break; } + /* fallthrough */ case ir_binop_lshift: case ir_binop_rshift: if (native_integers) { @@ -1975,16 +1966,19 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, opcode, result_dst, op[0], count); break; } + /* fallthrough */ case ir_binop_bit_and: if (native_integers) { emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); break; } + /* fallthrough */ case ir_binop_bit_xor: if (native_integers) { emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); break; } + /* fallthrough */ case ir_binop_bit_or: if (native_integers) { emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); @@ -2237,11 +2231,9 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) case ir_unop_get_buffer_size: { ir_constant *const_offset = ir->operands[0]->as_constant(); - int buf_base = ctx->st->has_hw_atomics - ? 0 : ctx->Const.Program[shader->Stage].MaxAtomicBuffers; st_src_reg buffer( PROGRAM_BUFFER, - buf_base + (const_offset ? const_offset->value.u[0] : 0), + const_offset ? const_offset->value.u[0] : 0, GLSL_TYPE_UINT); if (!const_offset) { buffer.reladdr = ralloc(mem_ctx, st_src_reg); @@ -2388,6 +2380,24 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) case ir_binop_carry: case ir_binop_borrow: case ir_unop_ssbo_unsized_array_length: + case ir_unop_atan: + case ir_binop_atan2: + case ir_unop_clz: + case ir_binop_add_sat: + case ir_binop_sub_sat: + case ir_binop_abs_sub: + case ir_binop_avg: + case ir_binop_avg_round: + case ir_binop_mul_32x16: + case ir_unop_f162f: + case ir_unop_f2f16: + case ir_unop_f2fmp: + case ir_unop_f162b: + case ir_unop_b2f16: + case ir_unop_i2i: + case ir_unop_i2imp: + case ir_unop_u2u: + case ir_unop_u2ump: /* This operation is not supported, or should have already been handled. */ assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()"); @@ -2403,7 +2413,7 @@ glsl_to_tgsi_visitor::visit(ir_swizzle *ir) { st_src_reg src; int i; - int swizzle[4]; + int swizzle[4] = {0}; /* Note that this is only swizzles in expressions, not those on the left * hand side of an assignment, which do write masking. See ir_assignment @@ -2763,12 +2773,12 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) if (handle_bound_deref(ir->as_dereference())) return; - /* We only need the logic provided by st_glsl_storage_type_size() + /* We only need the logic provided by count_vec4_slots() * for arrays of structs. Indirect sampler and image indexing is handled * elsewhere. */ int element_size = ir->type->without_array()->is_struct() ? - st_glsl_storage_type_size(ir->type, var->data.bindless) : + ir->type->count_vec4_slots(false, var->data.bindless) : type_size(ir->type); index = ir->array_index->constant_expression_value(ralloc_parent(ir)); @@ -2873,7 +2883,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) if (i == (unsigned) ir->field_idx) break; const glsl_type *member_type = struct_type->fields.structure[i].type; - offset += st_glsl_storage_type_size(member_type, var->data.bindless); + offset += member_type->count_vec4_slots(false, var->data.bindless); } /* If the type is smaller than a vec4, replicate the last channel out. */ @@ -3447,7 +3457,9 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) resource = buffer; } else { - st_src_reg buffer(PROGRAM_BUFFER, location->data.binding, + st_src_reg buffer(PROGRAM_BUFFER, + prog->info.num_ssbos + + location->data.binding, GLSL_TYPE_ATOMIC_UINT); if (offset.file != PROGRAM_UNDEFINED) { @@ -3535,11 +3547,9 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); ir_constant *const_block = block->as_constant(); - int buf_base = st_context(ctx)->has_hw_atomics - ? 0 : ctx->Const.Program[shader->Stage].MaxAtomicBuffers; st_src_reg buffer( PROGRAM_BUFFER, - buf_base + (const_block ? const_block->value.u[0] : 0), + const_block ? const_block->value.u[0] : 0, GLSL_TYPE_UINT); if (!const_block) { @@ -3761,7 +3771,7 @@ static void get_image_qualifiers(ir_dereference *ir, const glsl_type **type, bool *memory_coherent, bool *memory_volatile, bool *memory_restrict, bool *memory_read_only, - unsigned *image_format) + enum pipe_format *image_format) { switch (ir->ir_type) { @@ -3819,7 +3829,7 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) unsigned sampler_array_size = 1, sampler_base = 0; bool memory_coherent = false, memory_volatile = false, memory_restrict = false, memory_read_only = false; - unsigned image_format = 0; + enum pipe_format image_format = PIPE_FORMAT_NONE; const glsl_type *type = NULL; get_image_qualifiers(img, &type, &memory_coherent, &memory_volatile, @@ -3975,8 +3985,7 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) } inst->tex_target = type->sampler_index(); - inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx), - _mesa_get_shader_image_format(image_format)); + inst->image_format = image_format; inst->read_only = memory_read_only; if (memory_coherent) @@ -4294,14 +4303,13 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) enum tgsi_opcode opcode = TGSI_OPCODE_NOP; const glsl_type *sampler_type = ir->sampler->type; unsigned sampler_array_size = 1, sampler_base = 0; - bool is_cube_array = false, is_cube_shadow = false; + bool is_cube_array = false; ir_variable *var = ir->sampler->variable_referenced(); unsigned i; /* if we are a cube array sampler or a cube shadow */ if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { is_cube_array = sampler_type->sampler_array; - is_cube_shadow = sampler_type->sampler_shadow; } if (ir->coordinate) { @@ -4339,7 +4347,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) } break; case ir_txb: - if (is_cube_array || is_cube_shadow) { + if (is_cube_array || + (sampler_type->sampler_shadow && sampler_type->coordinate_components() >= 3)) { opcode = TGSI_OPCODE_TXB2; } else { @@ -4356,7 +4365,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) { opcode = TGSI_OPCODE_TEX_LZ; } else { - opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL; + opcode = (is_cube_array || (sampler_type->sampler_shadow && sampler_type->coordinate_components() >= 3)) ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL; ir->lod_info.lod->accept(this); lod_info = this->result; } @@ -4494,11 +4503,21 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) ir->shadow_comparator->accept(this); if (is_cube_array) { - cube_sc = get_temp(glsl_type::float_type); - cube_sc_dst = st_dst_reg(cube_sc); - cube_sc_dst.writemask = WRITEMASK_X; + if (lod_info.file != PROGRAM_UNDEFINED) { + // If we have both a cube array *and* a bias/lod, stick the + // comparator into the .Y of the second argument. + st_src_reg tmp = get_temp(glsl_type::vec2_type); + cube_sc_dst = st_dst_reg(tmp); + cube_sc_dst.writemask = WRITEMASK_X; + emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, lod_info); + lod_info = tmp; + cube_sc_dst.writemask = WRITEMASK_Y; + } else { + cube_sc = get_temp(glsl_type::float_type); + cube_sc_dst = st_dst_reg(cube_sc); + cube_sc_dst.writemask = WRITEMASK_X; + } emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result); - cube_sc_dst.writemask = WRITEMASK_X; } else { if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && @@ -4565,7 +4584,20 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) if (is_cube_array && ir->shadow_comparator) { inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); } else { - inst = emit_asm(ir, opcode, result_dst, coord, component); + if (this->tg4_component_in_swizzle) { + inst = emit_asm(ir, opcode, result_dst, coord); + int idx = 0; + foreach_in_list(immediate_storage, entry, &this->immediates) { + if (component.index == idx) { + gl_constant_value value = entry->values[component.swizzle]; + inst->gather_component = value.i; + break; + } + idx++; + } + } else { + inst = emit_asm(ir, opcode, result_dst, coord, component); + } } } else inst = emit_asm(ir, opcode, result_dst, coord); @@ -4717,6 +4749,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() prog = NULL; precise = 0; need_uarl = false; + tg4_component_in_swizzle = false; shader_program = NULL; shader = NULL; options = NULL; @@ -5774,99 +5807,9 @@ struct st_translate { enum pipe_shader_type procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */ bool need_uarl; + bool tg4_component_in_swizzle; }; -/** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ -enum tgsi_semantic -_mesa_sysval_to_semantic(unsigned sysval) -{ - switch (sysval) { - /* Vertex shader */ - case SYSTEM_VALUE_VERTEX_ID: - return TGSI_SEMANTIC_VERTEXID; - case SYSTEM_VALUE_INSTANCE_ID: - return TGSI_SEMANTIC_INSTANCEID; - case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: - return TGSI_SEMANTIC_VERTEXID_NOBASE; - case SYSTEM_VALUE_BASE_VERTEX: - return TGSI_SEMANTIC_BASEVERTEX; - case SYSTEM_VALUE_BASE_INSTANCE: - return TGSI_SEMANTIC_BASEINSTANCE; - case SYSTEM_VALUE_DRAW_ID: - return TGSI_SEMANTIC_DRAWID; - - /* Geometry shader */ - case SYSTEM_VALUE_INVOCATION_ID: - return TGSI_SEMANTIC_INVOCATIONID; - - /* Fragment shader */ - case SYSTEM_VALUE_FRAG_COORD: - return TGSI_SEMANTIC_POSITION; - case SYSTEM_VALUE_POINT_COORD: - return TGSI_SEMANTIC_PCOORD; - case SYSTEM_VALUE_FRONT_FACE: - return TGSI_SEMANTIC_FACE; - case SYSTEM_VALUE_SAMPLE_ID: - return TGSI_SEMANTIC_SAMPLEID; - case SYSTEM_VALUE_SAMPLE_POS: - return TGSI_SEMANTIC_SAMPLEPOS; - case SYSTEM_VALUE_SAMPLE_MASK_IN: - return TGSI_SEMANTIC_SAMPLEMASK; - case SYSTEM_VALUE_HELPER_INVOCATION: - return TGSI_SEMANTIC_HELPER_INVOCATION; - - /* Tessellation shader */ - case SYSTEM_VALUE_TESS_COORD: - return TGSI_SEMANTIC_TESSCOORD; - case SYSTEM_VALUE_VERTICES_IN: - return TGSI_SEMANTIC_VERTICESIN; - case SYSTEM_VALUE_PRIMITIVE_ID: - return TGSI_SEMANTIC_PRIMID; - case SYSTEM_VALUE_TESS_LEVEL_OUTER: - return TGSI_SEMANTIC_TESSOUTER; - case SYSTEM_VALUE_TESS_LEVEL_INNER: - return TGSI_SEMANTIC_TESSINNER; - - /* Compute shader */ - case SYSTEM_VALUE_LOCAL_INVOCATION_ID: - return TGSI_SEMANTIC_THREAD_ID; - case SYSTEM_VALUE_WORK_GROUP_ID: - return TGSI_SEMANTIC_BLOCK_ID; - case SYSTEM_VALUE_NUM_WORK_GROUPS: - return TGSI_SEMANTIC_GRID_SIZE; - case SYSTEM_VALUE_LOCAL_GROUP_SIZE: - return TGSI_SEMANTIC_BLOCK_SIZE; - - /* ARB_shader_ballot */ - case SYSTEM_VALUE_SUBGROUP_SIZE: - return TGSI_SEMANTIC_SUBGROUP_SIZE; - case SYSTEM_VALUE_SUBGROUP_INVOCATION: - return TGSI_SEMANTIC_SUBGROUP_INVOCATION; - case SYSTEM_VALUE_SUBGROUP_EQ_MASK: - return TGSI_SEMANTIC_SUBGROUP_EQ_MASK; - case SYSTEM_VALUE_SUBGROUP_GE_MASK: - return TGSI_SEMANTIC_SUBGROUP_GE_MASK; - case SYSTEM_VALUE_SUBGROUP_GT_MASK: - return TGSI_SEMANTIC_SUBGROUP_GT_MASK; - case SYSTEM_VALUE_SUBGROUP_LE_MASK: - return TGSI_SEMANTIC_SUBGROUP_LE_MASK; - case SYSTEM_VALUE_SUBGROUP_LT_MASK: - return TGSI_SEMANTIC_SUBGROUP_LT_MASK; - - /* Unhandled */ - case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: - case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: - case SYSTEM_VALUE_VERTEX_CNT: - case SYSTEM_VALUE_BARYCENTRIC_PIXEL: - case SYSTEM_VALUE_BARYCENTRIC_SAMPLE: - case SYSTEM_VALUE_BARYCENTRIC_CENTROID: - case SYSTEM_VALUE_BARYCENTRIC_SIZE: - default: - assert(!"Unexpected SYSTEM_VALUE_ enum"); - return TGSI_SEMANTIC_COUNT; - } -} - /** * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. */ @@ -6231,6 +6174,8 @@ compile_tgsi_instruction(struct st_translate *t, case TGSI_OPCODE_SAMP2HND: if (inst->resource.file == PROGRAM_SAMPLER) { src[num_src] = t->samplers[inst->resource.index]; + if (t->tg4_component_in_swizzle && inst->op == TGSI_OPCODE_TG4) + src[num_src].SwizzleX = inst->gather_component; } else { /* Bindless samplers. */ src[num_src] = translate_src(t, &inst->resource); @@ -6695,6 +6640,7 @@ st_translate_program( t->procType = procType; t->need_uarl = !screen->get_param(screen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS); + t->tg4_component_in_swizzle = screen->get_param(screen, PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE); t->inputMapping = inputMapping; t->outputMapping = outputMapping; t->ureg = ureg; @@ -6867,6 +6813,12 @@ st_translate_program( goto out; } } + + if (program->shader->Program->sh.fs.BlendSupport) + ureg_property(ureg, + TGSI_PROPERTY_FS_BLEND_EQUATION_ADVANCED, + program->shader->Program->sh.fs.BlendSupport); + } else if (procType == PIPE_SHADER_VERTEX) { for (i = 0; i < numOutputs; i++) { @@ -6884,6 +6836,9 @@ st_translate_program( emit_compute_block_size(proginfo, ureg); } + if (program->shader->Program->info.layer_viewport_relative) + ureg_property(ureg, TGSI_PROPERTY_LAYER_VIEWPORT_RELATIVE, 1); + /* Declare address register. */ if (program->num_address_regs > 0) { @@ -6899,7 +6854,7 @@ st_translate_program( for (i = 0; sysInputs; i++) { if (sysInputs & (1ull << i)) { - enum tgsi_semantic semName = _mesa_sysval_to_semantic(i); + enum tgsi_semantic semName = tgsi_get_sysval_semantic(i); t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0); @@ -7033,8 +6988,10 @@ st_translate_program( if (!st_context(ctx)->has_hw_atomics) { for (i = 0; i < prog->info.num_abos; i++) { - unsigned index = prog->sh.AtomicBuffers[i]->Binding; - assert(index < frag_const->MaxAtomicBuffers); + unsigned index = (prog->info.num_ssbos + + prog->sh.AtomicBuffers[i]->Binding); + assert(prog->sh.AtomicBuffers[i]->Binding < + frag_const->MaxAtomicBuffers); t->buffers[index] = ureg_DECL_buffer(ureg, index, true); } } else { @@ -7049,11 +7006,7 @@ st_translate_program( assert(prog->info.num_ssbos <= frag_const->MaxShaderStorageBlocks); for (i = 0; i < prog->info.num_ssbos; i++) { - unsigned index = i; - if (!st_context(ctx)->has_hw_atomics) - index += frag_const->MaxAtomicBuffers; - - t->buffers[index] = ureg_DECL_buffer(ureg, index, false); + t->buffers[i] = ureg_DECL_buffer(ureg, i, false); } } @@ -7148,6 +7101,7 @@ get_mesa_program_tgsi(struct gl_context *ctx, PIPE_CAP_TGSI_TEX_TXF_LZ); v->need_uarl = !pscreen->get_param(pscreen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS); + v->tg4_component_in_swizzle = pscreen->get_param(pscreen, PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE); v->variables = _mesa_hash_table_create(v->mem_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); skip_merge_registers = @@ -7286,34 +7240,7 @@ get_mesa_program_tgsi(struct gl_context *ctx, return NULL; } - struct st_vertex_program *stvp; - struct st_fragment_program *stfp; - struct st_common_program *stp; - struct st_compute_program *stcp; - - switch (shader->Stage) { - case MESA_SHADER_VERTEX: - stvp = (struct st_vertex_program *)prog; - stvp->glsl_to_tgsi = v; - break; - case MESA_SHADER_FRAGMENT: - stfp = (struct st_fragment_program *)prog; - stfp->glsl_to_tgsi = v; - break; - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - stp = st_common_program(prog); - stp->glsl_to_tgsi = v; - break; - case MESA_SHADER_COMPUTE: - stcp = (struct st_compute_program *)prog; - stcp->glsl_to_tgsi = v; - break; - default: - assert(!"should not be reached"); - return NULL; - } + st_program(prog)->glsl_to_tgsi = v; PRINT_STATS(v->print_stats()); @@ -7424,6 +7351,14 @@ st_link_tgsi(struct gl_context *ctx, struct gl_shader_program *prog) st_set_prog_affected_state_flags(linked_prog); if (linked_prog) { + /* This is really conservative: */ + linked_prog->info.writes_memory = + linked_prog->info.num_ssbos || + linked_prog->info.num_images || + ctx->Extensions.ARB_bindless_texture || + (linked_prog->sh.LinkedTransformFeedback && + linked_prog->sh.LinkedTransformFeedback->NumVarying); + if (!ctx->Driver.ProgramStringNotify(ctx, _mesa_shader_stage_to_program(i), linked_prog)) { @@ -7435,35 +7370,3 @@ st_link_tgsi(struct gl_context *ctx, struct gl_shader_program *prog) return GL_TRUE; } - -extern "C" { - -void -st_translate_stream_output_info(struct gl_transform_feedback_info *info, - const ubyte outputMapping[], - struct pipe_stream_output_info *so) -{ - unsigned i; - - if (!info) { - so->num_outputs = 0; - return; - } - - for (i = 0; i < info->NumOutputs; i++) { - so->output[i].register_index = - outputMapping[info->Outputs[i].OutputRegister]; - so->output[i].start_component = info->Outputs[i].ComponentOffset; - so->output[i].num_components = info->Outputs[i].NumComponents; - so->output[i].output_buffer = info->Outputs[i].OutputBuffer; - so->output[i].dst_offset = info->Outputs[i].DstOffset; - so->output[i].stream = info->Outputs[i].StreamId; - } - - for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { - so->stride[i] = info->Buffers[i].Stride; - } - so->num_outputs = info->NumOutputs; -} - -} /* extern "C" */