X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Fa4xx%2Ffd4_program.c;h=4e98be45999f5fa8f2f5b7e8200e2f65f000842d;hb=75fef41f16c7b028c3c7809eb7f9d8e3ea4e259c;hp=05b0c4f9ae0e38ae9a4779ee9a48f63e20a9d02e;hpb=3c5d309477c2667ad3cbc370ad0566480f39b95d;p=mesa.git diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c index 05b0c4f9ae0..4e98be45999 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -1,5 +1,3 @@ -/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ - /* * Copyright (C) 2014 Rob Clark * @@ -30,7 +28,7 @@ #include "util/u_string.h" #include "util/u_memory.h" #include "util/u_inlines.h" -#include "util/u_format.h" +#include "util/format/u_format.h" #include "freedreno_program.h" @@ -39,58 +37,12 @@ #include "fd4_texture.h" #include "fd4_format.h" -static void -delete_shader_stateobj(struct fd4_shader_stateobj *so) -{ - ir3_shader_destroy(so->shader); - free(so); -} - -static struct fd4_shader_stateobj * -create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, - enum shader_t type) -{ - struct fd_context *ctx = fd_context(pctx); - struct ir3_compiler *compiler = ctx->screen->compiler; - struct fd4_shader_stateobj *so = CALLOC_STRUCT(fd4_shader_stateobj); - so->shader = ir3_shader_create(compiler, cso, type, &ctx->debug); - return so; -} - -static void * -fd4_fp_state_create(struct pipe_context *pctx, - const struct pipe_shader_state *cso) -{ - return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT); -} - -static void -fd4_fp_state_delete(struct pipe_context *pctx, void *hwcso) -{ - struct fd4_shader_stateobj *so = hwcso; - delete_shader_stateobj(so); -} - -static void * -fd4_vp_state_create(struct pipe_context *pctx, - const struct pipe_shader_state *cso) -{ - return create_shader_stateobj(pctx, cso, SHADER_VERTEX); -} - -static void -fd4_vp_state_delete(struct pipe_context *pctx, void *hwcso) -{ - struct fd4_shader_stateobj *so = hwcso; - delete_shader_stateobj(so); -} - static void emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { const struct ir3_info *si = &so->info; enum a4xx_state_block sb = fd4_stage2shadersb(so->type); - enum adreno_state_src src; + enum a4xx_state_src src; uint32_t i, sz, *bin; if (fd_mesa_debug & FD_DBG_DIRECT) { @@ -160,7 +112,8 @@ setup_stages(struct fd4_emit *emit, struct stage *s) if (s[i].v) { s[i].i = &s[i].v->info; /* constlen is in units of 4 * vec4: */ - s[i].constlen = align(s[i].v->constlen, 4) / 4; + assert(s[i].v->constlen % 4 == 0); + s[i].constlen = s[i].v->constlen / 4; /* instrlen is already in units of 16 instr.. although * probably we should ditch that and not make the compiler * care about instruction group size of a3xx vs a4xx @@ -211,14 +164,14 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, { struct stage s[MAX_STAGES]; uint32_t pos_regid, posz_regid, psize_regid, color_regid[8]; - uint32_t face_regid, coord_regid, zwcoord_regid; + uint32_t face_regid, coord_regid, zwcoord_regid, vcoord_regid, lcoord_regid; enum a3xx_threadsize fssz; int constmode; int i, j; debug_assert(nr <= ARRAY_SIZE(color_regid)); - if (emit->key.binning_pass) + if (emit->binning_pass) nr = 0; setup_stages(emit, s); @@ -253,10 +206,20 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } - /* TODO get these dynamically: */ - face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); - coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); - zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); + face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE); + coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD); + zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); + vcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL); + lcoord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL); + + /* XXX since we don't know how to support noperspective varyings on a4xx, + * use this little hack to support u_blitter, which should be the only + * case with noperspective varyings on a4xx: + */ + if (VALIDREG(lcoord_regid)) { + assert(!VALIDREG(vcoord_regid)); + vcoord_regid = lcoord_regid; + } /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. @@ -282,7 +245,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) | 0x3f3f000 | /* XXX */ A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid)); - OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid) | + OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(vcoord_regid) | 0xfcfcfc00); OUT_RING(ring, 0x00fcfcfc); /* XXX HLSQ_CONTROL_4 */ @@ -310,7 +273,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1); OUT_RING(ring, 0x140010 | /* XXX */ - COND(emit->key.binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS)); + COND(emit->binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS)); OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1); OUT_RING(ring, 0x7f | /* XXX */ @@ -329,7 +292,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) | A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) | A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE | - COND(s[VS].v->has_samp, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE)); + COND(s[VS].v->need_pixlod, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE)); OUT_RING(ring, A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) | A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in)); OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) | @@ -337,7 +300,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(s[FS].v->varying_in)); struct ir3_shader_linkage l = {0}; - ir3_link_shaders(&l, s[VS].v, s[FS].v); + ir3_link_shaders(&l, s[VS].v, s[FS].v, false); for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { uint32_t reg = 0; @@ -373,7 +336,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[VS].instroff)); OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ - if (emit->key.binning_pass) { + if (emit->binning_pass) { OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1); OUT_RING(ring, 0x00000000); /* SP_FS_LENGTH_REG */ @@ -404,12 +367,12 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | - COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE)); + COND(s[FS].v->need_pixlod, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) | 0x80000000 | /* XXX */ COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) | COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING) | - COND(s[FS].v->frag_coord, A4XX_SP_FS_CTRL_REG1_FRAGCOORD)); + COND(s[FS].v->fragcoord_compmask != 0, A4XX_SP_FS_CTRL_REG1_FRAGCOORD)); OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) | @@ -433,10 +396,8 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, OUT_RING(ring, A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) | COND(s[FS].v->total_in > 0, A4XX_RB_RENDER_CONTROL2_VARYING) | COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) | - COND(s[FS].v->frag_coord, A4XX_RB_RENDER_CONTROL2_XCOORD | - A4XX_RB_RENDER_CONTROL2_YCOORD | - A4XX_RB_RENDER_CONTROL2_ZCOORD | - A4XX_RB_RENDER_CONTROL2_WCOORD)); + COND(s[FS].v->fragcoord_compmask != 0, + A4XX_RB_RENDER_CONTROL2_COORD_MASK(s[FS].v->fragcoord_compmask))); OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1); OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(nr) | @@ -459,11 +420,11 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) | A4XX_SP_FS_MRT_REG_MRTFORMAT(format) | COND(srgb, A4XX_SP_FS_MRT_REG_COLOR_SRGB) | - COND(emit->key.half_precision, + COND(color_regid[i] & HALF_REG_ID, A4XX_SP_FS_MRT_REG_HALF_PRECISION)); } - if (emit->key.binning_pass) { + if (emit->binning_pass) { OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2); OUT_RING(ring, A4XX_VPC_ATTR_THRDASSIGN(1) | 0x40000000 | /* XXX */ @@ -513,40 +474,32 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, } } - gl_varying_slot slot = s[FS].v->inputs[j].slot; - - /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */ - if (slot >= VARYING_SLOT_VAR0) { - unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); - /* Replace the .xy coordinates with S/T from the point sprite. Set - * interpolation bits for .zw such that they become .01 + bool coord_mode = emit->sprite_coord_mode; + if (ir3_point_sprite(s[FS].v, j, emit->sprite_coord_enable, &coord_mode)) { + /* mask is two 2-bit fields, where: + * '01' -> S + * '10' -> T + * '11' -> 1 - T (flip mode) */ - if (emit->sprite_coord_enable & texmask) { - /* mask is two 2-bit fields, where: - * '01' -> S - * '10' -> T - * '11' -> 1 - T (flip mode) - */ - unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001; - uint32_t loc = inloc; - if (compmask & 0x1) { - vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2); - loc++; - } - if (compmask & 0x2) { - vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2); - loc++; - } - if (compmask & 0x4) { - /* .z <- 0.0f */ - vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2); - loc++; - } - if (compmask & 0x8) { - /* .w <- 1.0f */ - vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2); - loc++; - } + unsigned mask = coord_mode ? 0b1101 : 0b1001; + uint32_t loc = inloc; + if (compmask & 0x1) { + vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x2) { + vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x4) { + /* .z <- 0.0f */ + vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x8) { + /* .w <- 1.0f */ + vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2); + loc++; } } } @@ -572,7 +525,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, if (s[VS].instrlen) emit_shader(ring, s[VS].v); - if (!emit->key.binning_pass) + if (!emit->binning_pass) if (s[FS].instrlen) emit_shader(ring, s[FS].v); } @@ -580,11 +533,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, void fd4_prog_init(struct pipe_context *pctx) { - pctx->create_fs_state = fd4_fp_state_create; - pctx->delete_fs_state = fd4_fp_state_delete; - - pctx->create_vs_state = fd4_vp_state_create; - pctx->delete_vs_state = fd4_vp_state_delete; - + ir3_prog_init(pctx); fd_prog_init(pctx); }