X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fr600%2Fr600_shader.c;h=bdaf28ced2c486ebc528d94a4b770743b8743f75;hb=ab8fb5a082513bb699b224c52ac82779752608fb;hp=df40f94bdcf6c83d720648c11fee4cce31f5f487;hpb=83c86e09a8d7610022eae49731ac1ed03ee4f67b;p=mesa.git diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index df40f94bdcf..bdaf28ced2c 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -21,7 +21,6 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "r600_sq.h" -#include "r600_llvm.h" #include "r600_formats.h" #include "r600_opcodes.h" #include "r600_shader.h" @@ -34,6 +33,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_dump.h" +#include "util/u_bitcast.h" #include "util/u_memory.h" #include "util/u_math.h" #include @@ -137,7 +137,7 @@ static int store_shader(struct pipe_context *ctx, if (shader->bo == NULL) { shader->bo = (struct r600_resource*) - pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); + pipe_buffer_create(ctx->screen, 0, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); if (shader->bo == NULL) { return -ENOMEM; } @@ -183,21 +183,18 @@ int r600_pipe_shader_create(struct pipe_context *ctx, R600_ERR("translation from TGSI failed !\n"); goto error; } - if (shader->shader.processor_type == TGSI_PROCESSOR_VERTEX) { + if (shader->shader.processor_type == PIPE_SHADER_VERTEX) { /* only disable for vertex shaders in tess paths */ if (key.vs.as_ls) use_sb = 0; } - use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_TESS_CTRL); - use_sb &= (shader->shader.processor_type != TGSI_PROCESSOR_TESS_EVAL); + use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_CTRL); + use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_EVAL); /* disable SB for shaders using doubles */ use_sb &= !shader->shader.uses_doubles; - /* Check if the bytecode has already been built. When using the llvm - * backend, r600_shader_from_tgsi() will take care of building the - * bytecode. - */ + /* Check if the bytecode has already been built. */ if (!shader->shader.bc.bytecode) { r = r600_bytecode_build(&shader->shader.bc); if (r) { @@ -238,16 +235,16 @@ int r600_pipe_shader_create(struct pipe_context *ctx, /* Build state. */ switch (shader->shader.processor_type) { - case TGSI_PROCESSOR_TESS_CTRL: + case PIPE_SHADER_TESS_CTRL: evergreen_update_hs_state(ctx, shader); break; - case TGSI_PROCESSOR_TESS_EVAL: + case PIPE_SHADER_TESS_EVAL: if (key.tes.as_es) evergreen_update_es_state(ctx, shader); else evergreen_update_vs_state(ctx, shader); break; - case TGSI_PROCESSOR_GEOMETRY: + case PIPE_SHADER_GEOMETRY: if (rctx->b.chip_class >= EVERGREEN) { evergreen_update_gs_state(ctx, shader); evergreen_update_vs_state(ctx, shader->gs_copy_shader); @@ -256,7 +253,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, r600_update_vs_state(ctx, shader->gs_copy_shader); } break; - case TGSI_PROCESSOR_VERTEX: + case PIPE_SHADER_VERTEX: export_shader = key.vs.as_es; if (rctx->b.chip_class >= EVERGREEN) { if (key.vs.as_ls) @@ -272,7 +269,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, r600_update_vs_state(ctx, shader); } break; - case TGSI_PROCESSOR_FRAGMENT: + case PIPE_SHADER_FRAGMENT: if (rctx->b.chip_class >= EVERGREEN) { evergreen_update_ps_state(ctx, shader); } else { @@ -292,7 +289,7 @@ error: void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) { - pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL); + r600_resource_reference(&shader->bo, NULL); r600_bytecode_clear(&shader->shader.bc); r600_release_command_buffer(&shader->command_buffer); } @@ -332,7 +329,6 @@ struct r600_shader_ctx { uint32_t *literals; uint32_t nliterals; uint32_t max_driver_temp_used; - boolean use_llvm; /* needed for evergreen interpolation */ struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid /* evergreen/cayman also store sample mask in face register */ @@ -401,10 +397,6 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); return -EINVAL; } - if (i->Instruction.Predicate) { - R600_ERR("predicate unsupported\n"); - return -EINVAL; - } #if 0 if (i->Instruction.Label) { R600_ERR("label unsupported\n"); @@ -417,12 +409,12 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) case TGSI_FILE_CONSTANT: break; case TGSI_FILE_INPUT: - if (ctx->type == TGSI_PROCESSOR_GEOMETRY || - ctx->type == TGSI_PROCESSOR_TESS_CTRL || - ctx->type == TGSI_PROCESSOR_TESS_EVAL) + if (ctx->type == PIPE_SHADER_GEOMETRY || + ctx->type == PIPE_SHADER_TESS_CTRL || + ctx->type == PIPE_SHADER_TESS_EVAL) break; case TGSI_FILE_OUTPUT: - if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) + if (ctx->type == PIPE_SHADER_TESS_CTRL) break; default: R600_ERR("unsupported src %d (file %d, dimension %d)\n", j, @@ -434,7 +426,7 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) } for (j = 0; j < i->Instruction.NumDstRegs; j++) { if (i->Dst[j].Register.Dimension) { - if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) + if (ctx->type == PIPE_SHADER_TESS_CTRL) continue; R600_ERR("unsupported dst (dimension)\n"); return -EINVAL; @@ -661,11 +653,9 @@ static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) ctx->shader->input[index].lds_pos = ctx->shader->nlds++; if (ctx->shader->input[index].interpolate > 0) { evergreen_interp_assign_ij_index(ctx, index); - if (!ctx->use_llvm) - r = evergreen_interp_alu(ctx, index); + r = evergreen_interp_alu(ctx, index); } else { - if (!ctx->use_llvm) - r = evergreen_interp_flat(ctx, index); + r = evergreen_interp_flat(ctx, index); } } return r; @@ -865,13 +855,13 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_INPUT: for (j = 0; j < count; j++) { i = ctx->shader->ninput + j; - assert(i < Elements(ctx->shader->input)); + assert(i < ARRAY_SIZE(ctx->shader->input)); ctx->shader->input[i].name = d->Semantic.Name; ctx->shader->input[i].sid = d->Semantic.Index + j; ctx->shader->input[i].interpolate = d->Interp.Interpolate; ctx->shader->input[i].interpolate_location = d->Interp.Location; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j; - if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { + if (ctx->type == PIPE_SHADER_FRAGMENT) { ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); switch (ctx->shader->input[i].name) { case TGSI_SEMANTIC_FACE: @@ -896,7 +886,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) if ((r = evergreen_interp_input(ctx, i))) return r; } - } else if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { + } else if (ctx->type == PIPE_SHADER_GEOMETRY) { /* FIXME probably skip inputs if they aren't passed in the ring */ ctx->shader->input[i].ring_offset = ctx->next_ring_offset; ctx->next_ring_offset += 16; @@ -909,15 +899,15 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) case TGSI_FILE_OUTPUT: for (j = 0; j < count; j++) { i = ctx->shader->noutput + j; - assert(i < Elements(ctx->shader->output)); + assert(i < ARRAY_SIZE(ctx->shader->output)); ctx->shader->output[i].name = d->Semantic.Name; ctx->shader->output[i].sid = d->Semantic.Index + j; ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j; ctx->shader->output[i].interpolate = d->Interp.Interpolate; ctx->shader->output[i].write_mask = d->Declaration.UsageMask; - if (ctx->type == TGSI_PROCESSOR_VERTEX || - ctx->type == TGSI_PROCESSOR_GEOMETRY || - ctx->type == TGSI_PROCESSOR_TESS_EVAL) { + if (ctx->type == PIPE_SHADER_VERTEX || + ctx->type == PIPE_SHADER_GEOMETRY || + ctx->type == PIPE_SHADER_TESS_EVAL) { ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); switch (d->Semantic.Name) { case TGSI_SEMANTIC_CLIPDIST: @@ -946,10 +936,10 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->cv_output = i; break; } - if (ctx->type == TGSI_PROCESSOR_GEOMETRY) { + if (ctx->type == PIPE_SHADER_GEOMETRY) { ctx->gs_out_ring_offset += 16; } - } else if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { + } else if (ctx->type == PIPE_SHADER_FRAGMENT) { switch (d->Semantic.Name) { case TGSI_SEMANTIC_COLOR: ctx->shader->nr_ps_max_color_exports++; @@ -1119,12 +1109,13 @@ static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_off interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; k = eg_get_interpolator_index(interpolate, location); - ctx->eg_interpolators[k].enabled = true; + if (k >= 0) + ctx->eg_interpolators[k].enabled = true; } } else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration; if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { - for (k = 0; k < Elements(inputs); k++) { + for (k = 0; k < ARRAY_SIZE(inputs); k++) { if (d->Semantic.Name == inputs[k].name || d->Semantic.Name == inputs[k].alternate_name) { inputs[k].enabled = true; @@ -1136,7 +1127,7 @@ static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_off tgsi_parse_free(&parse); - for (i = 0; i < Elements(inputs); i++) { + for (i = 0; i < ARRAY_SIZE(inputs); i++) { boolean enabled = inputs[i].enabled; int *reg = inputs[i].reg; unsigned name = inputs[i].name; @@ -1215,7 +1206,8 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; k = eg_get_interpolator_index(interpolate, location); - ctx->eg_interpolators[k].enabled = true; + if (k >= 0) + ctx->eg_interpolators[k].enabled = true; } } } @@ -1224,7 +1216,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx) /* assign gpr to each interpolator according to priority */ num_baryc = 0; - for (i = 0; i < Elements(ctx->eg_interpolators); i++) { + for (i = 0; i < ARRAY_SIZE(ctx->eg_interpolators); i++) { if (ctx->eg_interpolators[i].enabled) { ctx->eg_interpolators[i].ij_index = num_baryc; num_baryc ++; @@ -1349,7 +1341,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx, r600_src->swizzle[2] = 0; r600_src->swizzle[3] = 0; r600_src->sel = 0; - } else if (ctx->type != TGSI_PROCESSOR_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { + } else if (ctx->type != PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { r600_src->swizzle[0] = 3; r600_src->swizzle[1] = 3; r600_src->swizzle[2] = 3; @@ -1368,7 +1360,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx, } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSOUTER) { r600_src->sel = 2; } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTICESIN) { - if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) { + if (ctx->type == PIPE_SHADER_TESS_CTRL) { r600_src->sel = ctx->tess_input_info; r600_src->swizzle[0] = 2; r600_src->swizzle[1] = 2; @@ -1381,13 +1373,13 @@ static void tgsi_src(struct r600_shader_ctx *ctx, r600_src->swizzle[2] = 3; r600_src->swizzle[3] = 3; } - } else if (ctx->type == TGSI_PROCESSOR_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { + } else if (ctx->type == PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { r600_src->sel = 0; r600_src->swizzle[0] = 0; r600_src->swizzle[1] = 0; r600_src->swizzle[2] = 0; r600_src->swizzle[3] = 0; - } else if (ctx->type == TGSI_PROCESSOR_TESS_EVAL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { + } else if (ctx->type == PIPE_SHADER_TESS_EVAL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { r600_src->sel = 0; r600_src->swizzle[0] = 3; r600_src->swizzle[1] = 3; @@ -1860,19 +1852,19 @@ static int tgsi_split_lds_inputs(struct r600_shader_ctx *ctx) for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { struct tgsi_full_src_register *src = &inst->Src[i]; - if (ctx->type == TGSI_PROCESSOR_TESS_EVAL && src->Register.File == TGSI_FILE_INPUT) { + if (ctx->type == PIPE_SHADER_TESS_EVAL && src->Register.File == TGSI_FILE_INPUT) { int treg = r600_get_temp(ctx); fetch_tes_input(ctx, src, treg); ctx->src[i].sel = treg; ctx->src[i].rel = 0; } - if (ctx->type == TGSI_PROCESSOR_TESS_CTRL && src->Register.File == TGSI_FILE_INPUT) { + if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_INPUT) { int treg = r600_get_temp(ctx); fetch_tcs_input(ctx, src, treg); ctx->src[i].sel = treg; ctx->src[i].rel = 0; } - if (ctx->type == TGSI_PROCESSOR_TESS_CTRL && src->Register.File == TGSI_FILE_OUTPUT) { + if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_OUTPUT) { int treg = r600_get_temp(ctx); fetch_tcs_output(ctx, src, treg); ctx->src[i].sel = treg; @@ -2165,7 +2157,7 @@ static int generate_gs_copy_shader(struct r600_context *rctx, ctx.shader = &cshader->shader; ctx.bc = &ctx.shader->bc; - ctx.type = ctx.bc->type = TGSI_PROCESSOR_VERTEX; + ctx.type = ctx.bc->type = PIPE_SHADER_VERTEX; r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, rctx->screen->has_compressed_msaa_texturing); @@ -2930,28 +2922,22 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, struct pipe_stream_output_info so = pipeshader->selector->so; struct tgsi_full_immediate *immediate; struct r600_shader_ctx ctx; - struct r600_bytecode_output output[32]; + struct r600_bytecode_output output[ARRAY_SIZE(shader->output)]; unsigned output_done, noutput; unsigned opcode; int i, j, k, r = 0; int next_param_base = 0, next_clip_base; int max_color_exports = MAX2(key.ps.nr_cbufs, 1); - /* Declarations used by llvm code */ - bool use_llvm = false; bool indirect_gprs; bool ring_outputs = false; bool lds_outputs = false; bool lds_inputs = false; bool pos_emitted = false; -#ifdef R600_USE_LLVM - use_llvm = rscreen->b.debug_flags & DBG_LLVM; -#endif ctx.bc = &shader->bc; ctx.shader = shader; ctx.native_integers = true; - r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, rscreen->has_compressed_msaa_texturing); ctx.tokens = tokens; @@ -2967,7 +2953,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.bc->type = shader->processor_type; switch (ctx.type) { - case TGSI_PROCESSOR_VERTEX: + case PIPE_SHADER_VERTEX: shader->vs_as_gs_a = key.vs.as_gs_a; shader->vs_as_es = key.vs.as_es; shader->vs_as_ls = key.vs.as_ls; @@ -2976,21 +2962,21 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, if (shader->vs_as_ls) lds_outputs = true; break; - case TGSI_PROCESSOR_GEOMETRY: + case PIPE_SHADER_GEOMETRY: ring_outputs = true; break; - case TGSI_PROCESSOR_TESS_CTRL: + case PIPE_SHADER_TESS_CTRL: shader->tcs_prim_mode = key.tcs.prim_mode; lds_outputs = true; lds_inputs = true; break; - case TGSI_PROCESSOR_TESS_EVAL: + case PIPE_SHADER_TESS_EVAL: shader->tes_as_es = key.tes.as_es; lds_inputs = true; if (shader->tes_as_es) ring_outputs = true; break; - case TGSI_PROCESSOR_FRAGMENT: + case PIPE_SHADER_FRAGMENT: shader->two_side = key.ps.color_two_side; break; default: @@ -3043,33 +3029,23 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.file_offset[i] = 0; } -#ifdef R600_USE_LLVM - if (use_llvm && ctx.info.indirect_files && (ctx.info.indirect_files & (1 << TGSI_FILE_CONSTANT)) != ctx.info.indirect_files) { - fprintf(stderr, "Warning: R600 LLVM backend does not support " - "indirect adressing. Falling back to TGSI " - "backend.\n"); - use_llvm = 0; - } -#endif - if (ctx.type == TGSI_PROCESSOR_VERTEX) { + if (ctx.type == PIPE_SHADER_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; - if (!use_llvm) { - r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); - } + r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); } - if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { + if (ctx.type == PIPE_SHADER_FRAGMENT) { if (ctx.bc->chip_class >= EVERGREEN) ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); else ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]); } - if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { + if (ctx.type == PIPE_SHADER_GEOMETRY) { /* FIXME 1 would be enough in some cases (3 or less input vertices) */ ctx.file_offset[TGSI_FILE_INPUT] = 2; } - if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) + if (ctx.type == PIPE_SHADER_TESS_CTRL) ctx.file_offset[TGSI_FILE_INPUT] = 1; - if (ctx.type == TGSI_PROCESSOR_TESS_EVAL) { + if (ctx.type == PIPE_SHADER_TESS_EVAL) { bool add_tesscoord = false, add_tess_inout = false; ctx.file_offset[TGSI_FILE_INPUT] = 1; for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) { @@ -3085,16 +3061,10 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, if (add_tess_inout) ctx.file_offset[TGSI_FILE_INPUT]+=2; } - ctx.use_llvm = use_llvm; - if (use_llvm) { - ctx.file_offset[TGSI_FILE_OUTPUT] = - ctx.file_offset[TGSI_FILE_INPUT]; - } else { - ctx.file_offset[TGSI_FILE_OUTPUT] = + ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + ctx.info.file_max[TGSI_FILE_INPUT] + 1; - } ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; @@ -3108,15 +3078,15 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1; ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2; - if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) { + if (ctx.type == PIPE_SHADER_TESS_CTRL) { ctx.tess_input_info = ctx.bc->ar_reg + 3; ctx.tess_output_info = ctx.bc->ar_reg + 4; ctx.temp_reg = ctx.bc->ar_reg + 5; - } else if (ctx.type == TGSI_PROCESSOR_TESS_EVAL) { + } else if (ctx.type == PIPE_SHADER_TESS_EVAL) { ctx.tess_input_info = 0; ctx.tess_output_info = ctx.bc->ar_reg + 3; ctx.temp_reg = ctx.bc->ar_reg + 4; - } else if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { + } else if (ctx.type == PIPE_SHADER_GEOMETRY) { ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3; ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4; ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5; @@ -3155,7 +3125,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, if (shader->vs_as_gs_a) vs_add_primid_output(&ctx, key.vs.prim_id_out); - if (ctx.type == TGSI_PROCESSOR_TESS_EVAL) + if (ctx.type == PIPE_SHADER_TESS_EVAL) r600_fetch_tess_io_info(&ctx); while (!tgsi_parse_end_of_tokens(&ctx.parse)) { @@ -3234,71 +3204,12 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, } } -/* LLVM backend setup */ -#ifdef R600_USE_LLVM - if (use_llvm) { - struct radeon_llvm_context radeon_llvm_ctx; - LLVMModuleRef mod; - bool dump = r600_can_dump_shader(&rscreen->b, - tgsi_get_processor_type(tokens)); - boolean use_kill = false; - - memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); - radeon_llvm_ctx.type = ctx.type; - radeon_llvm_ctx.two_side = shader->two_side; - radeon_llvm_ctx.face_gpr = ctx.face_gpr; - radeon_llvm_ctx.inputs_count = ctx.shader->ninput + 1; - radeon_llvm_ctx.r600_inputs = ctx.shader->input; - radeon_llvm_ctx.r600_outputs = ctx.shader->output; - radeon_llvm_ctx.color_buffer_count = max_color_exports; - radeon_llvm_ctx.chip_class = ctx.bc->chip_class; - radeon_llvm_ctx.fs_color_all = shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN); - radeon_llvm_ctx.stream_outputs = &so; - radeon_llvm_ctx.alpha_to_one = key.ps.alpha_to_one; - radeon_llvm_ctx.has_compressed_msaa_texturing = - ctx.bc->has_compressed_msaa_texturing; - mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens); - ctx.shader->has_txq_cube_array_z_comp = radeon_llvm_ctx.has_txq_cube_array_z_comp; - ctx.shader->uses_tex_buffers = radeon_llvm_ctx.uses_tex_buffers; - - if (r600_llvm_compile(mod, rscreen->b.family, ctx.bc, &use_kill, - dump, &rctx->b.debug)) { - radeon_llvm_dispose(&radeon_llvm_ctx); - use_llvm = 0; - fprintf(stderr, "R600 LLVM backend failed to compile " - "shader. Falling back to TGSI\n"); - } else { - ctx.file_offset[TGSI_FILE_OUTPUT] = - ctx.file_offset[TGSI_FILE_INPUT]; - } - if (use_kill) - ctx.shader->uses_kill = use_kill; - radeon_llvm_dispose(&radeon_llvm_ctx); - } -#endif -/* End of LLVM backend setup */ - if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) shader->nr_ps_max_color_exports = 8; - if (!use_llvm) { - if (ctx.fragcoord_input >= 0) { - if (ctx.bc->chip_class == CAYMAN) { - for (j = 0 ; j < 4; j++) { - struct r600_bytecode_alu alu; - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP1_RECIP_IEEE; - alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; - alu.src[0].chan = 3; - - alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; - alu.dst.chan = j; - alu.dst.write = (j == 3); - alu.last = 1; - if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) - return r; - } - } else { + if (ctx.fragcoord_input >= 0) { + if (ctx.bc->chip_class == CAYMAN) { + for (j = 0 ; j < 4; j++) { struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_RECIP_IEEE; @@ -3306,87 +3217,100 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, alu.src[0].chan = 3; alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; - alu.dst.chan = 3; - alu.dst.write = 1; + alu.dst.chan = j; + alu.dst.write = (j == 3); alu.last = 1; if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) return r; } - } - - if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { + } else { struct r600_bytecode_alu alu; - int r; - - /* GS thread with no output workaround - emit a cut at start of GS */ - if (ctx.bc->chip_class == R600) - r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX); + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_RECIP_IEEE; + alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; + alu.src[0].chan = 3; - for (j = 0; j < 4; j++) { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP1_MOV; - alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; - alu.src[0].value = 0; - alu.dst.sel = ctx.gs_export_gpr_tregs[j]; - alu.dst.write = 1; - alu.last = 1; - r = r600_bytecode_add_alu(ctx.bc, &alu); - if (r) - return r; - } + alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) + return r; } + } - if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) - r600_fetch_tess_io_info(&ctx); + if (ctx.type == PIPE_SHADER_GEOMETRY) { + struct r600_bytecode_alu alu; + int r; - if (shader->two_side && ctx.colors_used) { - if ((r = process_twoside_color_inputs(&ctx))) + /* GS thread with no output workaround - emit a cut at start of GS */ + if (ctx.bc->chip_class == R600) + r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX); + + for (j = 0; j < 4; j++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[0].value = 0; + alu.dst.sel = ctx.gs_export_gpr_tregs[j]; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx.bc, &alu); + if (r) return r; } + } - tgsi_parse_init(&ctx.parse, tokens); - while (!tgsi_parse_end_of_tokens(&ctx.parse)) { - tgsi_parse_token(&ctx.parse); - switch (ctx.parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - r = tgsi_is_supported(&ctx); - if (r) - goto out_err; - ctx.max_driver_temp_used = 0; - /* reserve first tmp for everyone */ - r600_get_temp(&ctx); + if (ctx.type == PIPE_SHADER_TESS_CTRL) + r600_fetch_tess_io_info(&ctx); - opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; - if ((r = tgsi_split_constant(&ctx))) + if (shader->two_side && ctx.colors_used) { + if ((r = process_twoside_color_inputs(&ctx))) + return r; + } + + tgsi_parse_init(&ctx.parse, tokens); + while (!tgsi_parse_end_of_tokens(&ctx.parse)) { + tgsi_parse_token(&ctx.parse); + switch (ctx.parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + r = tgsi_is_supported(&ctx); + if (r) + goto out_err; + ctx.max_driver_temp_used = 0; + /* reserve first tmp for everyone */ + r600_get_temp(&ctx); + + opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; + if ((r = tgsi_split_constant(&ctx))) + goto out_err; + if ((r = tgsi_split_literal_constant(&ctx))) + goto out_err; + if (ctx.type == PIPE_SHADER_GEOMETRY) { + if ((r = tgsi_split_gs_inputs(&ctx))) goto out_err; - if ((r = tgsi_split_literal_constant(&ctx))) + } else if (lds_inputs) { + if ((r = tgsi_split_lds_inputs(&ctx))) goto out_err; - if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { - if ((r = tgsi_split_gs_inputs(&ctx))) - goto out_err; - } else if (lds_inputs) { - if ((r = tgsi_split_lds_inputs(&ctx))) - goto out_err; - } - if (ctx.bc->chip_class == CAYMAN) - ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; - else if (ctx.bc->chip_class >= EVERGREEN) - ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; - else - ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; - r = ctx.inst_info->process(&ctx); + } + if (ctx.bc->chip_class == CAYMAN) + ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; + else if (ctx.bc->chip_class >= EVERGREEN) + ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; + else + ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; + r = ctx.inst_info->process(&ctx); + if (r) + goto out_err; + + if (ctx.type == PIPE_SHADER_TESS_CTRL) { + r = r600_store_tcs_output(&ctx); if (r) goto out_err; - - if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) { - r = r600_store_tcs_output(&ctx); - if (r) - goto out_err; - } - break; - default: - break; } + break; + default: + break; } } @@ -3437,8 +3361,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, alu.dst.write = (j == ochan); if (j == 3) alu.last = 1; - if (!use_llvm) - r = r600_bytecode_add_alu(ctx.bc, &alu); + r = r600_bytecode_add_alu(ctx.bc, &alu); if (r) return r; } @@ -3446,11 +3369,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, } /* Add stream outputs. */ - if (!use_llvm && so.num_outputs) { + if (so.num_outputs) { bool emit = false; - if (!lds_outputs && !ring_outputs && ctx.type == TGSI_PROCESSOR_VERTEX) + if (!lds_outputs && !ring_outputs && ctx.type == PIPE_SHADER_VERTEX) emit = true; - if (!ring_outputs && ctx.type == TGSI_PROCESSOR_TESS_EVAL) + if (!ring_outputs && ctx.type == PIPE_SHADER_TESS_EVAL) emit = true; if (emit) emit_streamout(&ctx, &so, -1, NULL); @@ -3458,11 +3381,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; convert_edgeflag_to_int(&ctx); - if (ctx.type == TGSI_PROCESSOR_TESS_CTRL) + if (ctx.type == PIPE_SHADER_TESS_CTRL) r600_emit_tess_factor(&ctx); if (lds_outputs) { - if (ctx.type == TGSI_PROCESSOR_VERTEX) { + if (ctx.type == PIPE_SHADER_VERTEX) { if (ctx.shader->noutput) emit_lds_vs_writes(&ctx); } @@ -3491,8 +3414,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, output[j].type = -1; output[j].op = CF_OP_EXPORT; switch (ctx.type) { - case TGSI_PROCESSOR_VERTEX: - case TGSI_PROCESSOR_TESS_EVAL: + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_TESS_EVAL: switch (shader->output[i].name) { case TGSI_SEMANTIC_POSITION: output[j].array_base = 60; @@ -3582,7 +3505,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, } break; - case TGSI_PROCESSOR_FRAGMENT: + case PIPE_SHADER_FRAGMENT: if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { /* never export more colors than the number of CBs */ if (shader->output[i].sid >= max_color_exports) { @@ -3636,7 +3559,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, goto out_err; } break; - case TGSI_PROCESSOR_TESS_CTRL: + case PIPE_SHADER_TESS_CTRL: break; default: R600_ERR("unsupported processor type %d\n", ctx.type); @@ -3651,7 +3574,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, } /* add fake position export */ - if ((ctx.type == TGSI_PROCESSOR_VERTEX || ctx.type == TGSI_PROCESSOR_TESS_EVAL) && pos_emitted == false) { + if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && pos_emitted == false) { memset(&output[j], 0, sizeof(struct r600_bytecode_output)); output[j].gpr = 0; output[j].elem_size = 3; @@ -3667,7 +3590,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, } /* add fake param output for vertex shader if no param is exported */ - if ((ctx.type == TGSI_PROCESSOR_VERTEX || ctx.type == TGSI_PROCESSOR_TESS_EVAL) && next_param_base == 0) { + if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && next_param_base == 0) { memset(&output[j], 0, sizeof(struct r600_bytecode_output)); output[j].gpr = 0; output[j].elem_size = 3; @@ -3683,7 +3606,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, } /* add fake pixel export */ - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && shader->nr_ps_color_exports == 0) { + if (ctx.type == PIPE_SHADER_FRAGMENT && shader->nr_ps_color_exports == 0) { memset(&output[j], 0, sizeof(struct r600_bytecode_output)); output[j].gpr = 0; output[j].elem_size = 3; @@ -3709,31 +3632,27 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, } } /* add output to bytecode */ - if (!use_llvm) { - for (i = 0; i < noutput; i++) { - r = r600_bytecode_add_output(ctx.bc, &output[i]); - if (r) - goto out_err; - } + for (i = 0; i < noutput; i++) { + r = r600_bytecode_add_output(ctx.bc, &output[i]); + if (r) + goto out_err; } } /* add program end */ - if (!use_llvm) { - if (ctx.bc->chip_class == CAYMAN) - cm_bytecode_add_cf_end(ctx.bc); - else { - const struct cf_op_info *last = NULL; + if (ctx.bc->chip_class == CAYMAN) + cm_bytecode_add_cf_end(ctx.bc); + else { + const struct cf_op_info *last = NULL; - if (ctx.bc->cf_last) - last = r600_isa_cf(ctx.bc->cf_last->op); + if (ctx.bc->cf_last) + last = r600_isa_cf(ctx.bc->cf_last->op); - /* alu clause instructions don't have EOP bit, so add NOP */ - if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS) - r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); + /* alu clause instructions don't have EOP bit, so add NOP */ + if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS) + r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); - ctx.bc->cf_last->end_of_program = 1; - } + ctx.bc->cf_last->end_of_program = 1; } /* check GPR limit - we have 124 = 128 - 4 @@ -3744,7 +3663,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, goto out_err; } - if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { + if (ctx.type == PIPE_SHADER_GEOMETRY) { if ((r = generate_gs_copy_shader(rctx, pipeshader, &so))) return r; } @@ -3811,7 +3730,7 @@ static void tgsi_dst(struct r600_shader_ctx *ctx, if (inst->Instruction.Saturate) { r600_dst->clamp = 1; } - if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) { + if (ctx->type == PIPE_SHADER_TESS_CTRL) { if (tgsi_dst->Register.File == TGSI_FILE_OUTPUT) { return; } @@ -3883,9 +3802,6 @@ static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool /* handle some special cases */ if (i == 1 || i == 3) { switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) { - case TGSI_OPCODE_SUB: - r600_bytecode_src_toggle_neg(&alu.src[1]); - break; case TGSI_OPCODE_DABS: r600_bytecode_src_set_abs(&alu.src[0]); break; @@ -3988,6 +3904,11 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) int i, j, r, lasti = tgsi_last_instruction(write_mask); /* use temp register if trans_only and more than one dst component */ int use_tmp = trans_only && (write_mask ^ (1 << lasti)); + unsigned op = ctx->inst_info->op; + + if (op == ALU_OP2_MUL_IEEE && + ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS]) + op = ALU_OP2_MUL; for (i = 0; i <= lasti; i++) { if (!(write_mask & (1 << i))) @@ -4001,7 +3922,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) } else tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); - alu.op = ctx->inst_info->op; + alu.op = op; if (!swap) { for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { r600_bytecode_src(&alu.src[j], &ctx->src[j], i); @@ -4010,17 +3931,6 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) r600_bytecode_src(&alu.src[0], &ctx->src[1], i); r600_bytecode_src(&alu.src[1], &ctx->src[0], i); } - /* handle some special cases */ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_SUB: - r600_bytecode_src_toggle_neg(&alu.src[1]); - break; - case TGSI_OPCODE_ABS: - r600_bytecode_src_set_abs(&alu.src[0]); - break; - default: - break; - } if (i == lasti || trans_only) { alu.last = 1; } @@ -4278,42 +4188,64 @@ static int egcm_double_to_int(struct r600_shader_ctx *ctx) return 0; } -static int cayman_emit_double_instr(struct r600_shader_ctx *ctx) +static int cayman_emit_unary_double_raw(struct r600_bytecode *bc, + unsigned op, + int dst_reg, + struct r600_shader_src *src, + bool abs) { - struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; - int i, r; struct r600_bytecode_alu alu; - int last_slot = 3; - int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); - int t1 = ctx->temp_reg; + const int last_slot = 3; + int r; /* these have to write the result to X/Y by the looks of it */ - for (i = 0 ; i < last_slot; i++) { + for (int i = 0 ; i < last_slot; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ctx->inst_info->op; - - /* should only be one src regs */ - assert (inst->Instruction.NumSrcRegs == 1); + alu.op = op; - r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); - r600_bytecode_src(&alu.src[1], &ctx->src[0], 0); + r600_bytecode_src(&alu.src[0], src, 1); + r600_bytecode_src(&alu.src[1], src, 0); - /* RSQ should take the absolute value of src */ - if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ || - ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) { + if (abs) r600_bytecode_src_set_abs(&alu.src[1]); - } - alu.dst.sel = t1; + + alu.dst.sel = dst_reg; alu.dst.chan = i; alu.dst.write = (i == 0 || i == 1); - if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1) + if (bc->chip_class != CAYMAN || i == last_slot - 1) alu.last = 1; - r = r600_bytecode_add_alu(ctx->bc, &alu); + r = r600_bytecode_add_alu(bc, &alu); if (r) return r; } + return 0; +} + +static int cayman_emit_double_instr(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + int i, r; + struct r600_bytecode_alu alu; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); + int t1 = ctx->temp_reg; + + /* should only be one src regs */ + assert(inst->Instruction.NumSrcRegs == 1); + + /* only support one double at a time */ + assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || + inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); + + r = cayman_emit_unary_double_raw( + ctx->bc, ctx->inst_info->op, t1, + &ctx->src[0], + ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ || + ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT); + if (r) + return r; + for (i = 0 ; i <= lasti; i++) { if (!(inst->Dst[0].Register.WriteMask & (1 << i))) continue; @@ -4419,25 +4351,27 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); int t1 = ctx->temp_reg; - for (k = 0; k < 2; k++) { - if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2)))) - continue; + /* t1 would get overwritten below if we actually tried to + * multiply two pairs of doubles at a time. */ + assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || + inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); - for (i = 0; i < 4; i++) { - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ctx->inst_info->op; - for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { - r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1)); - } - alu.dst.sel = t1; - alu.dst.chan = i; - alu.dst.write = 1; - if (i == 3) - alu.last = 1; - r = r600_bytecode_add_alu(ctx->bc, &alu); - if (r) - return r; + k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1; + + for (i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ctx->inst_info->op; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1)); } + alu.dst.sel = t1; + alu.dst.chan = i; + alu.dst.write = 1; + if (i == 3) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; } for (i = 0; i <= lasti; i++) { @@ -4459,6 +4393,63 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) return 0; } +/* + * Emit RECIP_64 + MUL_64 to implement division. + */ +static int cayman_ddiv_instr(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + int r; + struct r600_bytecode_alu alu; + int t1 = ctx->temp_reg; + int k; + + /* Only support one double at a time. This is the same constraint as + * in DMUL lowering. */ + assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || + inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); + + k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1; + + r = cayman_emit_unary_double_raw(ctx->bc, ALU_OP2_RECIP_64, t1, &ctx->src[1], false); + if (r) + return r; + + for (int i = 0; i < 4; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MUL_64; + + r600_bytecode_src(&alu.src[0], &ctx->src[0], k * 2 + ((i == 3) ? 0 : 1)); + + alu.src[1].sel = t1; + alu.src[1].chan = (i == 3) ? 0 : 1; + + alu.dst.sel = t1; + alu.dst.chan = i; + alu.dst.write = 1; + if (i == 3) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + for (int i = 0; i < 2; i++) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.src[0].sel = t1; + alu.src[0].chan = i; + tgsi_dst(ctx, &inst->Dst[0], k * 2 + i, &alu.dst); + alu.dst.write = 1; + if (i == 1) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + /* * r600 - trunc to -PI..PI range * r700 - normalize by dividing by 2PI @@ -4466,10 +4457,6 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) */ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) { - static float half_inv_pi = 1.0 /(3.1415926535 * 2); - static float double_pi = 3.1415926535 * 2; - static float neg_pi = -3.1415926535; - int r; struct r600_bytecode_alu alu; @@ -4485,7 +4472,7 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; - alu.src[1].value = *(uint32_t *)&half_inv_pi; + alu.src[1].value = u_bitcast_f2u(0.5f * M_1_PI); alu.src[2].sel = V_SQ_ALU_SRC_0_5; alu.src[2].chan = 0; alu.last = 1; @@ -4524,8 +4511,8 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) alu.src[2].chan = 0; if (ctx->bc->chip_class == R600) { - alu.src[1].value = *(uint32_t *)&double_pi; - alu.src[2].value = *(uint32_t *)&neg_pi; + alu.src[1].value = u_bitcast_f2u(2.0f * M_PI); + alu.src[2].value = u_bitcast_f2u(-M_PI); } else { alu.src[1].sel = V_SQ_ALU_SRC_1; alu.src[2].sel = V_SQ_ALU_SRC_0_5; @@ -6559,6 +6546,11 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) int i, j, r; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); int temp_regs[4]; + unsigned op = ctx->inst_info->op; + + if (op == ALU_OP3_MULADD_IEEE && + ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS]) + op = ALU_OP3_MULADD; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { temp_regs[j] = 0; @@ -6570,7 +6562,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ctx->inst_info->op; + alu.op = op; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]); if (r) @@ -6596,10 +6588,14 @@ static int tgsi_dp(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bytecode_alu alu; int i, j, r; + unsigned op = ctx->inst_info->op; + if (op == ALU_OP2_DOT4_IEEE && + ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS]) + op = ALU_OP2_DOT4; for (i = 0; i < 4; i++) { memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ctx->inst_info->op; + alu.op = op; for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { r600_bytecode_src(&alu.src[j], &ctx->src[j], i); } @@ -6649,7 +6645,7 @@ static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, inst->Src[index].Register.File != TGSI_FILE_INPUT && inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || ctx->src[index].neg || ctx->src[index].abs || - (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == TGSI_PROCESSOR_GEOMETRY); + (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == PIPE_SHADER_GEOMETRY); } static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, @@ -6784,7 +6780,6 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) static int tgsi_tex(struct r600_shader_ctx *ctx) { - static float one_point_five = 1.5f; struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bytecode_tex tex; struct r600_bytecode_alu alu; @@ -6986,7 +6981,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - alu.src[2].value = *(uint32_t *)&one_point_five; + alu.src[2].value = u_bitcast_f2u(1.5f); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 0; @@ -7007,7 +7002,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; alu.src[2].chan = 0; - alu.src[2].value = *(uint32_t *)&one_point_five; + alu.src[2].value = u_bitcast_f2u(1.5f); alu.dst.sel = ctx->temp_reg; alu.dst.chan = 1; @@ -7041,7 +7036,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { if (ctx->bc->chip_class >= EVERGREEN) { int mytmp = r600_get_temp(ctx); - static const float eight = 8.0f; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; alu.src[0].sel = ctx->temp_reg; @@ -7061,7 +7055,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; - alu.src[1].value = *(uint32_t *)&eight; + alu.src[1].value = u_bitcast_f2u(8.0f); alu.src[2].sel = mytmp; alu.src[2].chan = 0; alu.dst.sel = ctx->temp_reg; @@ -7787,6 +7781,15 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) int i, r, j; int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); int temp_regs[3]; + unsigned op; + + if (ctx->src[0].abs && ctx->src[0].neg) { + op = ALU_OP3_CNDE; + ctx->src[0].abs = 0; + ctx->src[0].neg = 0; + } else { + op = ALU_OP3_CNDGE; + } for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { temp_regs[j] = 0; @@ -7799,7 +7802,7 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx) continue; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.op = ALU_OP3_CNDGE; + alu.op = op; r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); if (r) return r; @@ -8641,14 +8644,15 @@ static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) { - ctx->bc->fc_sp++; + assert(ctx->bc->fc_sp < ARRAY_SIZE(ctx->bc->fc_stack)); ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; + ctx->bc->fc_sp++; } static void fc_poplevel(struct r600_shader_ctx *ctx) { - struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; + struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp - 1]; free(sp->mid); sp->mid = NULL; sp->num_mid = 0; @@ -8744,24 +8748,24 @@ static int tgsi_else(struct r600_shader_ctx *ctx) r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE); ctx->bc->cf_last->pop_count = 1; - fc_set_mid(ctx, ctx->bc->fc_sp); - ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; + fc_set_mid(ctx, ctx->bc->fc_sp - 1); + ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id; return 0; } static int tgsi_endif(struct r600_shader_ctx *ctx) { pops(ctx, 1); - if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { + if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_IF) { R600_ERR("if/endif unbalanced in shader\n"); return -1; } - if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { - ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; - ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; + if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid == NULL) { + ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2; + ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->pop_count = 1; } else { - ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; + ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; } fc_poplevel(ctx); @@ -8788,7 +8792,7 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx) r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END); - if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { + if (ctx->bc->fc_stack[ctx->bc->fc_sp - 1].type != FC_LOOP) { R600_ERR("loop/endloop in shader code are not paired.\n"); return -EINVAL; } @@ -8798,12 +8802,12 @@ static int tgsi_endloop(struct r600_shader_ctx *ctx) LOOP START point to CF after LOOP END BRK/CONT point to LOOP END CF */ - ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; + ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->id + 2; - ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; + ctx->bc->fc_stack[ctx->bc->fc_sp - 1].start->cf_addr = ctx->bc->cf_last->id + 2; - for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { - ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; + for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp - 1].num_mid; i++) { + ctx->bc->fc_stack[ctx->bc->fc_sp - 1].mid[i]->cf_addr = ctx->bc->cf_last->id; } /* XXX add LOOPRET support */ fc_poplevel(ctx); @@ -8818,7 +8822,7 @@ static int tgsi_loop_breakc(struct r600_shader_ctx *ctx) for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) { - if (FC_LOOP == ctx->bc->fc_stack[fscp].type) + if (FC_LOOP == ctx->bc->fc_stack[fscp - 1].type) break; } if (fscp == 0) { @@ -8837,14 +8841,14 @@ static int tgsi_loop_breakc(struct r600_shader_ctx *ctx) r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK); if (r) return r; - fc_set_mid(ctx, fscp); + fc_set_mid(ctx, fscp - 1); return tgsi_endif(ctx); } else { r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK); if (r) return r; - fc_set_mid(ctx, fscp); + fc_set_mid(ctx, fscp - 1); } return 0; @@ -8856,7 +8860,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) { - if (FC_LOOP == ctx->bc->fc_stack[fscp].type) + if (FC_LOOP == ctx->bc->fc_stack[fscp - 1].type) break; } @@ -8867,7 +8871,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); - fc_set_mid(ctx, fscp); + fc_set_mid(ctx, fscp - 1); return 0; } @@ -8962,6 +8966,105 @@ static int tgsi_umad(struct r600_shader_ctx *ctx) return 0; } +static int tgsi_pk2h(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bytecode_alu alu; + int r, i; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); + + /* temp.xy = f32_to_f16(src) */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_FLT32_TO_FLT16; + alu.dst.chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + alu.dst.chan = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.x = temp.y * 0x10000 + temp.x */ + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP3_MULADD_UINT24; + alu.is_op3 = 1; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.last = i == lasti; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 1; + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 0x10000; + alu.src[2].sel = ctx->temp_reg; + alu.src[2].chan = 0; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + return 0; +} + +static int tgsi_up2h(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bytecode_alu alu; + int r, i; + int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); + + /* temp.x = src.x */ + /* note: no need to mask out the high bits */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.dst.chan = 0; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* temp.y = src.x >> 16 */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_LSHR_INT; + alu.dst.chan = 1; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; + alu.src[1].value = 16; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* dst.wz = dst.xy = f16_to_f32(temp.xy) */ + for (i = 0; i < lasti + 1; i++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << i))) + continue; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.op = ALU_OP1_FLT16_TO_FLT32; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = i % 2; + alu.last = i == lasti; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + return 0; +} + static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl}, [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, @@ -8976,17 +9079,16 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, - [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, + [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, - [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, - [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, + [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, - [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, - [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, + [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, @@ -8994,7 +9096,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [22] = { ALU_OP0_NOP, tgsi_unsupported}, [23] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, - [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, + [25] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, @@ -9002,9 +9104,9 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, [32] = { ALU_OP0_NOP, tgsi_unsupported}, - [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, + [33] = { ALU_OP0_NOP, tgsi_unsupported}, [34] = { ALU_OP0_NOP, tgsi_unsupported}, - [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, @@ -9040,7 +9142,7 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, [69] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, - [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, @@ -9074,20 +9176,20 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, - [105] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported}, [106] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, - [112] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, [114] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_loop_breakc}, [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ - [118] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_DFMA] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, @@ -9175,25 +9277,24 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, - [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, + [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, - [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, - [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, + [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, - [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, - [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, + [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, - [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, [22] = { ALU_OP0_NOP, tgsi_unsupported}, [23] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, - [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, + [25] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, @@ -9201,14 +9302,14 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, [32] = { ALU_OP0_NOP, tgsi_unsupported}, - [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, + [33] = { ALU_OP0_NOP, tgsi_unsupported}, [34] = { ALU_OP0_NOP, tgsi_unsupported}, - [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ - [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_pk2h}, [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, @@ -9223,7 +9324,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, - [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_up2h}, [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, @@ -9239,7 +9340,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, [69] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, - [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, @@ -9273,20 +9374,20 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, - [105] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported}, [106] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, - [112] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, [114] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ - [118] = { ALU_OP0_NOP, tgsi_unsupported}, + /* Refer below for TGSI_OPCODE_DFMA */ [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_f2i}, [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, @@ -9369,6 +9470,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, + [TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr }, [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, @@ -9378,6 +9480,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, + [TGSI_OPCODE_DFMA] = { ALU_OP3_FMA_64, tgsi_op3_64}, [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, @@ -9397,25 +9500,24 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, - [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, + [TGSI_OPCODE_MUL] = { ALU_OP2_MUL_IEEE, tgsi_op2}, [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, - [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, - [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, + [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, - [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, - [TGSI_OPCODE_SUB] = { ALU_OP2_ADD, tgsi_op2}, + [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3}, [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, - [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, [22] = { ALU_OP0_NOP, tgsi_unsupported}, [23] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, - [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, + [25] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, @@ -9423,14 +9525,14 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow}, [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, [32] = { ALU_OP0_NOP, tgsi_unsupported}, - [TGSI_OPCODE_ABS] = { ALU_OP1_MOV, tgsi_op2}, + [33] = { ALU_OP0_NOP, tgsi_unsupported}, [34] = { ALU_OP0_NOP, tgsi_unsupported}, - [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig}, [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ - [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_pk2h}, [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, @@ -9445,7 +9547,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, - [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_up2h}, [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, @@ -9461,7 +9563,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, [69] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, - [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, + [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4_IEEE, tgsi_dp}, [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, @@ -9495,20 +9597,20 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, - [105] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported}, [106] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, - [112] = { ALU_OP0_NOP, tgsi_unsupported}, + [TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, [114] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ - [118] = { ALU_OP0_NOP, tgsi_unsupported}, + /* Refer below for TGSI_OPCODE_DFMA */ [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2}, [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, @@ -9591,6 +9693,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, + [TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr }, [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, @@ -9600,6 +9703,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, + [TGSI_OPCODE_DFMA] = { ALU_OP3_FMA_64, tgsi_op3_64}, [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp},