X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fr600%2Fr600_shader.c;h=f58f8ccbe8bcdb40e8e0bc4cf9fd95e7c4c17523;hb=HEAD;hp=6655b000aa9a516aa10715913e7ee7fe91d8673c;hpb=626bd455d425beb058ff413dce0b8d990ace7c49;p=mesa.git diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 6655b000aa9..f58f8ccbe8b 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -24,7 +24,9 @@ #include "r600_formats.h" #include "r600_opcodes.h" #include "r600_shader.h" +#include "r600_dump.h" #include "r600d.h" +#include "sfn/sfn_nir.h" #include "sb/sb_public.h" @@ -33,6 +35,10 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_scan.h" #include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_from_mesa.h" +#include "nir/tgsi_to_nir.h" +#include "nir/nir_to_tgsi_info.h" +#include "compiler/nir/nir.h" #include "util/u_bitcast.h" #include "util/u_memory.h" #include "util/u_math.h" @@ -141,7 +147,9 @@ static int store_shader(struct pipe_context *ctx, if (shader->bo == NULL) { return -ENOMEM; } - ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); + ptr = r600_buffer_map_sync_with_rings( + &rctx->b, shader->bo, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); if (R600_BIG_ENDIAN) { for (i = 0; i < shader->shader.bc.ndw; ++i) { ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); @@ -155,6 +163,8 @@ static int store_shader(struct pipe_context *ctx, return 0; } +extern const struct nir_shader_compiler_options r600_nir_options; +static int nshader = 0; int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, union r600_shader_key key) @@ -162,27 +172,71 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_shader_selector *sel = shader->selector; int r; - bool dump = r600_can_dump_shader(&rctx->screen->b, - tgsi_get_processor_type(sel->tokens)); - unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); + struct r600_screen *rscreen = (struct r600_screen *)ctx->screen; + + int processor = sel->ir_type == PIPE_SHADER_IR_TGSI ? + tgsi_get_processor_type(sel->tokens): + pipe_shader_type_from_mesa(sel->nir->info.stage); + + bool dump = r600_can_dump_shader(&rctx->screen->b, processor); + unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB) && + !(rscreen->b.debug_flags & DBG_NIR); unsigned sb_disasm; unsigned export_shader; - + shader->shader.bc.isa = rctx->isa; + + if (!(rscreen->b.debug_flags & DBG_NIR)) { + assert(sel->ir_type == PIPE_SHADER_IR_TGSI); + r = r600_shader_from_tgsi(rctx, shader, key); + if (r) { + R600_ERR("translation from TGSI failed !\n"); + goto error; + } + } else { + if (sel->ir_type == PIPE_SHADER_IR_TGSI) { + sel->nir = tgsi_to_nir(sel->tokens, ctx->screen, true); + /* Lower int64 ops because we have some r600 build-in shaders that use it */ + if (!ctx->screen->get_param(ctx->screen, PIPE_CAP_DOUBLES)) { + NIR_PASS_V(sel->nir, nir_lower_regs_to_ssa); + NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar, NULL, NULL); + NIR_PASS_V(sel->nir, nir_lower_int64); + NIR_PASS_V(sel->nir, nir_opt_vectorize, NULL, NULL); + } + NIR_PASS_V(sel->nir, nir_lower_flrp, ~0, false); + } + nir_tgsi_scan_shader(sel->nir, &sel->info, true); + r = r600_shader_from_nir(rctx, shader, &key); + if (r) { + fprintf(stderr, "--Failed shader--------------------------------------------------\n"); + + if (sel->ir_type == PIPE_SHADER_IR_TGSI) { + fprintf(stderr, "--TGSI--------------------------------------------------------\n"); + tgsi_dump(sel->tokens, 0); + } + + if (rscreen->b.debug_flags & DBG_NIR) { + fprintf(stderr, "--NIR --------------------------------------------------------\n"); + nir_print_shader(sel->nir, stderr); + } + + R600_ERR("translation from NIR failed !\n"); + goto error; + } + } + if (dump) { - fprintf(stderr, "--------------------------------------------------------------\n"); - tgsi_dump(sel->tokens, 0); - + if (sel->ir_type == PIPE_SHADER_IR_TGSI) { + fprintf(stderr, "--TGSI--------------------------------------------------------\n"); + tgsi_dump(sel->tokens, 0); + } + if (sel->so.num_outputs) { r600_dump_streamout(&sel->so); } } - r = r600_shader_from_tgsi(rctx, shader, key); - if (r) { - R600_ERR("translation from TGSI failed !\n"); - goto error; - } + if (shader->shader.processor_type == PIPE_SHADER_VERTEX) { /* only disable for vertex shaders in tess paths */ if (key.vs.as_ls) @@ -214,7 +268,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx, r600_bytecode_disasm(&shader->shader.bc); fprintf(stderr, "______________________________________________________________\n"); } else if ((dump && sb_disasm) || use_sb) { - r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, + r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, dump, use_sb); if (r) { R600_ERR("r600_sb_bytecode_process failed !\n"); @@ -222,6 +276,30 @@ int r600_pipe_shader_create(struct pipe_context *ctx, } } + if (dump) { + FILE *f; + char fname[1024]; + snprintf(fname, 1024, "shader_from_%s_%d.cpp", + (sel->ir_type == PIPE_SHADER_IR_TGSI ? + (rscreen->b.debug_flags & DBG_NIR ? "tgsi-nir" : "tgsi") + : "nir"), nshader); + f = fopen(fname, "w"); + print_shader_info(f, nshader++, &shader->shader); + print_shader_info(stderr, nshader++, &shader->shader); + print_pipe_info(stderr, &sel->info); + if (sel->ir_type == PIPE_SHADER_IR_TGSI) { + fprintf(f, "/****TGSI**********************************\n"); + tgsi_dump_to_file(sel->tokens, 0, f); + } + + if (rscreen->b.debug_flags & DBG_NIR){ + fprintf(f, "/****NIR **********************************\n"); + nir_print_shader(sel->nir, f); + } + fprintf(f, "******************************************/\n"); + fclose(f); + } + if (shader->gs_copy_shader) { if (dump) { // dump copy shader @@ -299,7 +377,8 @@ error: void r600_pipe_shader_destroy(struct pipe_context *ctx UNUSED, struct r600_pipe_shader *shader) { r600_resource_reference(&shader->bo, NULL); - r600_bytecode_clear(&shader->shader.bc); + if (shader->shader.bc.cf.next) + r600_bytecode_clear(&shader->shader.bc); r600_release_command_buffer(&shader->command_buffer); } @@ -431,24 +510,26 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) #endif for (j = 0; j < i->Instruction.NumSrcRegs; j++) { if (i->Src[j].Register.Dimension) { - switch (i->Src[j].Register.File) { - case TGSI_FILE_CONSTANT: - case TGSI_FILE_HW_ATOMIC: - break; - case TGSI_FILE_INPUT: - if (ctx->type == PIPE_SHADER_GEOMETRY || - ctx->type == PIPE_SHADER_TESS_CTRL || - ctx->type == PIPE_SHADER_TESS_EVAL) - break; - case TGSI_FILE_OUTPUT: - if (ctx->type == PIPE_SHADER_TESS_CTRL) - break; - default: - R600_ERR("unsupported src %d (file %d, dimension %d)\n", j, - i->Src[j].Register.File, - i->Src[j].Register.Dimension); - return -EINVAL; - } + switch (i->Src[j].Register.File) { + case TGSI_FILE_CONSTANT: + case TGSI_FILE_HW_ATOMIC: + break; + case TGSI_FILE_INPUT: + if (ctx->type == PIPE_SHADER_GEOMETRY || + ctx->type == PIPE_SHADER_TESS_CTRL || + ctx->type == PIPE_SHADER_TESS_EVAL) + break; + /* fallthrough */ + case TGSI_FILE_OUTPUT: + if (ctx->type == PIPE_SHADER_TESS_CTRL) + break; + /* fallthrough */ + default: + R600_ERR("unsupported src %d (file %d, dimension %d)\n", j, + i->Src[j].Register.File, + i->Src[j].Register.Dimension); + return -EINVAL; + } } } for (j = 0; j < i->Instruction.NumDstRegs; j++) { @@ -618,6 +699,8 @@ static int r600_spi_sid(struct r600_shader_io * io) else { if (name == TGSI_SEMANTIC_GENERIC) { /* For generic params simply use sid from tgsi */ + index = 9 + io->sid; + } else if (name == TGSI_SEMANTIC_TEXCOORD) { index = io->sid; } else { /* For non-generic params - pack name and sid into 8 bits */ @@ -644,9 +727,11 @@ int r600_get_lds_unique_index(unsigned semantic_name, unsigned index) case TGSI_SEMANTIC_CLIPDIST: assert(index <= 1); return 2 + index; + case TGSI_SEMANTIC_TEXCOORD: + return 4 + index; case TGSI_SEMANTIC_GENERIC: if (index <= 63-4) - return 4 + index - 9; + return 4 + index; else /* same explanation as in the default statement, * the only user hitting this is st/nine. @@ -1673,19 +1758,11 @@ static void tgsi_src(struct r600_shader_ctx *ctx, } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSOUTER) { r600_src->sel = 2; } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTICESIN) { - if (ctx->type == PIPE_SHADER_TESS_CTRL) { - r600_src->sel = ctx->tess_input_info; - r600_src->swizzle[0] = 2; - r600_src->swizzle[1] = 2; - r600_src->swizzle[2] = 2; - r600_src->swizzle[3] = 2; - } else { - r600_src->sel = ctx->tess_input_info; - r600_src->swizzle[0] = 3; - r600_src->swizzle[1] = 3; - r600_src->swizzle[2] = 3; - r600_src->swizzle[3] = 3; - } + r600_src->sel = ctx->tess_input_info; + r600_src->swizzle[0] = 2; + r600_src->swizzle[1] = 2; + r600_src->swizzle[2] = 2; + r600_src->swizzle[3] = 2; } else if (ctx->type == PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { r600_src->sel = 0; r600_src->swizzle[0] = 0; @@ -2475,9 +2552,9 @@ static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx) r600_bytecode_add_alu(ctx->bc, &alu); } -static int generate_gs_copy_shader(struct r600_context *rctx, - struct r600_pipe_shader *gs, - struct pipe_stream_output_info *so) +int generate_gs_copy_shader(struct r600_context *rctx, + struct r600_pipe_shader *gs, + struct pipe_stream_output_info *so) { struct r600_shader_ctx ctx = {}; struct r600_shader *gs_shader = &gs->shader; @@ -2975,7 +3052,8 @@ static int emit_lds_vs_writes(struct r600_shader_ctx *ctx) for (i = 0; i < ctx->shader->noutput; i++) { struct r600_bytecode_alu alu; - int param = r600_get_lds_unique_index(ctx->shader->output[i].name, ctx->shader->output[i].sid); + int param = r600_get_lds_unique_index(ctx->shader->output[i].name, + ctx->shader->output[i].sid); if (param) { r = single_alu_op2(ctx, ALU_OP2_ADD_INT, @@ -3559,7 +3637,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.tess_input_info = ++regno; ctx.tess_output_info = ++regno; } else if (ctx.type == PIPE_SHADER_TESS_EVAL) { - ctx.tess_input_info = 0; + ctx.tess_input_info = ++regno; ctx.tess_output_info = ++regno; } else if (ctx.type == PIPE_SHADER_GEOMETRY) { ctx.gs_export_gpr_tregs[0] = ++regno; @@ -3887,6 +3965,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; + + ctx.bc->precise |= ctx.parse.FullToken.FullInstruction.Instruction.Precise; + r = ctx.inst_info->process(&ctx); if (r) goto out_err; @@ -6976,6 +7057,7 @@ static int tgsi_interp_egcm(struct r600_shader_ctx *ctx) } else { location = TGSI_INTERPOLATE_LOC_CENTROID; + ctx->shader->input[input].uses_interpolate_at_centroid = 1; } k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location); @@ -7478,6 +7560,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) int8_t offset_x = 0, offset_y = 0, offset_z = 0; boolean has_txq_cube_array_z = false; unsigned sampler_index_mode; + int array_index_offset_channel = -1; if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || @@ -7719,11 +7802,43 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) if (r) return r; + /* Evaluate the array index according to floor(idx + 0.5). This + * needs to be done before merging the face select value, because + * otherwise the fractional part of the array index will interfere + * with the face select value */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); + alu.op = ALU_OP1_RNDNE; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + + /* Because the array slice index and the cube face index are merged + * into one value we have to make sure the array slice index is >= 0, + * otherwise the face selection will fail */ + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP2_MAX; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; + alu.src[1].sel = V_SQ_ALU_SRC_0; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 3; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP3_MULADD; alu.is_op3 = 1; - r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 3; alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; alu.src[1].chan = 0; alu.src[1].value = u_bitcast_f2u(8.0f); @@ -8273,7 +8388,14 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) t->src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index; t->src_sel_x = inst->TexOffsets[0].SwizzleX; t->src_sel_y = inst->TexOffsets[0].SwizzleY; - t->src_sel_z = inst->TexOffsets[0].SwizzleZ; + if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || + inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) + /* make sure array index selector is 0, this is just a safety + * precausion because TGSI seems to emit something strange here */ + t->src_sel_z = 4; + else + t->src_sel_z = inst->TexOffsets[0].SwizzleZ; + t->src_sel_w = 4; t->dst_sel_x = 7; @@ -8429,19 +8551,43 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) opcode == FETCH_OP_SAMPLE_C_LB) { /* the array index is read from Y */ tex.coord_type_y = 0; + array_index_offset_channel = tex.src_sel_y; } else { /* the array index is read from Z */ tex.coord_type_z = 0; tex.src_sel_z = tex.src_sel_y; + array_index_offset_channel = tex.src_sel_z; } } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || - inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || - ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || + inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY) { + tex.coord_type_z = 0; + array_index_offset_channel = tex.src_sel_z; + } else if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && - (ctx->bc->chip_class >= EVERGREEN))) - /* the array index is read from Z */ + (ctx->bc->chip_class >= EVERGREEN)) + /* the array index is read from Z, coordinate will be corrected elsewhere */ tex.coord_type_z = 0; + /* We have array access to 1D or 2D ARRAY, the coordinates are not int -> + * evaluate the array index */ + if (array_index_offset_channel >= 0 && + opcode != FETCH_OP_LD && + opcode != FETCH_OP_GET_TEXTURE_RESINFO) { + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.src[0].sel = tex.src_gpr; + alu.src[0].chan = array_index_offset_channel; + alu.src[0].rel = tex.src_rel; + alu.op = ALU_OP1_RNDNE; + alu.dst.sel = tex.src_gpr; + alu.dst.chan = array_index_offset_channel; + alu.dst.rel = tex.src_rel; + alu.dst.write = 1; + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + /* mask unused source components */ if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) { switch (inst->Texture.Texture) {