freedreno/a4xx: enable A405
[mesa.git] / src / gallium / drivers / freedreno / a2xx / fd2_program.c
index 834a7c7fcd79c2ea6df68816dfc2c649d52d2c87..b735535e7f77698db177c75365dcdef4d7213fa2 100644 (file)
@@ -1,5 +1,3 @@
-/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
-
 /*
  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
  *
  *
  * Authors:
  *    Rob Clark <robclark@freedesktop.org>
+ *    Jonathan Marek <jonathan@marek.ca>
  */
 
 #include "pipe/p_state.h"
 #include "util/u_string.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_parse.h"
+#include "nir/tgsi_to_nir.h"
 
 #include "freedreno_program.h"
 
+#include "ir2.h"
 #include "fd2_program.h"
-#include "fd2_compiler.h"
 #include "fd2_texture.h"
 #include "fd2_util.h"
+#include "instr-a2xx.h"
 
 static struct fd2_shader_stateobj *
-create_shader(enum shader_t type)
+create_shader(struct pipe_context *pctx, gl_shader_stage type)
 {
        struct fd2_shader_stateobj *so = CALLOC_STRUCT(fd2_shader_stateobj);
        if (!so)
                return NULL;
        so->type = type;
+       so->is_a20x = is_a20x(fd_context(pctx)->screen);
        return so;
 }
 
@@ -56,88 +58,67 @@ delete_shader(struct fd2_shader_stateobj *so)
 {
        if (!so)
                return;
-       ir2_shader_destroy(so->ir);
-       free(so->tokens);
-       free(so->bin);
+       ralloc_free(so->nir);
+       for (int i = 0; i < ARRAY_SIZE(so->variant); i++)
+               free(so->variant[i].info.dwords);
        free(so);
 }
 
-static struct fd2_shader_stateobj *
-assemble(struct fd2_shader_stateobj *so)
+static void
+emit(struct fd_ringbuffer *ring, gl_shader_stage type,
+       struct ir2_shader_info *info, struct util_dynarray *patches)
 {
-       free(so->bin);
-       so->bin = ir2_shader_assemble(so->ir, &so->info);
-       if (!so->bin)
-               goto fail;
+       unsigned i;
 
-       if (fd_mesa_debug & FD_DBG_DISASM) {
-               DBG("disassemble: type=%d", so->type);
-               disasm_a2xx(so->bin, so->info.sizedwords, 0, so->type);
-       }
+       assert(info->sizedwords);
 
-       return so;
+       OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + info->sizedwords);
+       OUT_RING(ring, type == MESA_SHADER_FRAGMENT);
+       OUT_RING(ring, info->sizedwords);
 
-fail:
-       debug_error("assemble failed!");
-       delete_shader(so);
-       return NULL;
+       if (patches)
+               util_dynarray_append(patches, uint32_t*, &ring->cur[info->mem_export_ptr]);
+
+       for (i = 0; i < info->sizedwords; i++)
+               OUT_RING(ring, info->dwords[i]);
 }
 
-static struct fd2_shader_stateobj *
-compile(struct fd_program_stateobj *prog, struct fd2_shader_stateobj *so)
+static int
+ir2_glsl_type_size(const struct glsl_type *type, bool bindless)
 {
-       int ret;
+       return glsl_count_attribute_slots(type, false);
+}
 
-       if (fd_mesa_debug & FD_DBG_DISASM) {
-               DBG("dump tgsi: type=%d", so->type);
-               tgsi_dump(so->tokens, 0);
-       }
+static void *
+fd2_fp_state_create(struct pipe_context *pctx,
+               const struct pipe_shader_state *cso)
+{
+       struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_FRAGMENT);
+       if (!so)
+               return NULL;
+
+       so->nir = (cso->type == PIPE_SHADER_IR_NIR) ? cso->ir.nir :
+               tgsi_to_nir(cso->tokens, pctx->screen);
+
+       NIR_PASS_V(so->nir, nir_lower_io, nir_var_all, ir2_glsl_type_size,
+                          (nir_lower_io_options)0);
 
-       ret = fd2_compile_shader(prog, so);
-       if (ret)
+       if (ir2_optimize_nir(so->nir, true))
                goto fail;
 
-       /* NOTE: we don't assemble yet because for VS we don't know the
-        * type information for vertex fetch yet.. so those need to be
-        * patched up later before assembling.
-        */
+       so->first_immediate = so->nir->num_uniforms;
 
-       so->info.sizedwords = 0;
+       ir2_compile(so, 0, NULL);
 
+       ralloc_free(so->nir);
+       so->nir = NULL;
        return so;
 
 fail:
-       debug_error("compile failed!");
        delete_shader(so);
        return NULL;
 }
 
-static void
-emit(struct fd_ringbuffer *ring, struct fd2_shader_stateobj *so)
-{
-       unsigned i;
-
-       if (so->info.sizedwords == 0)
-               assemble(so);
-
-       OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + so->info.sizedwords);
-       OUT_RING(ring, (so->type == SHADER_VERTEX) ? 0 : 1);
-       OUT_RING(ring, so->info.sizedwords);
-       for (i = 0; i < so->info.sizedwords; i++)
-               OUT_RING(ring, so->bin[i]);
-}
-
-static void *
-fd2_fp_state_create(struct pipe_context *pctx,
-               const struct pipe_shader_state *cso)
-{
-       struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
-       if (!so)
-               return NULL;
-       so->tokens = tgsi_dup_tokens(cso->tokens);
-       return so;
-}
-
 static void
 fd2_fp_state_delete(struct pipe_context *pctx, void *hwcso)
 {
@@ -149,11 +130,29 @@ static void *
 fd2_vp_state_create(struct pipe_context *pctx,
                const struct pipe_shader_state *cso)
 {
-       struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
+       struct fd2_shader_stateobj *so = create_shader(pctx, MESA_SHADER_VERTEX);
        if (!so)
                return NULL;
-       so->tokens = tgsi_dup_tokens(cso->tokens);
+
+       so->nir = (cso->type == PIPE_SHADER_IR_NIR) ? cso->ir.nir :
+               tgsi_to_nir(cso->tokens, pctx->screen);
+
+       NIR_PASS_V(so->nir, nir_lower_io, nir_var_all, ir2_glsl_type_size,
+                          (nir_lower_io_options)0);
+
+       if (ir2_optimize_nir(so->nir, true))
+               goto fail;
+
+       so->first_immediate = so->nir->num_uniforms;
+
+       /* compile binning variant now */
+       ir2_compile(so, 0, NULL);
+
        return so;
+
+fail:
+       delete_shader(so);
+       return NULL;
 }
 
 static void
@@ -164,304 +163,127 @@ fd2_vp_state_delete(struct pipe_context *pctx, void *hwcso)
 }
 
 static void
-patch_vtx_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
-               struct fd_vertex_stateobj *vtx)
+patch_vtx_fetch(struct fd_context *ctx, struct pipe_vertex_element *elem,
+       instr_fetch_vtx_t *instr, uint16_t dst_swiz)
 {
-       unsigned i;
-
-       assert(so->num_vfetch_instrs == vtx->num_elements);
-
-       /* update vtx fetch instructions: */
-       for (i = 0; i < so->num_vfetch_instrs; i++) {
-               struct ir2_instruction *instr = so->vfetch_instrs[i];
-               struct pipe_vertex_element *elem = &vtx->pipe[i];
-               struct pipe_vertex_buffer *vb =
-                               &ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index];
-               enum pipe_format format = elem->src_format;
-               const struct util_format_description *desc =
-                               util_format_description(format);
-               unsigned j;
-
-               /* Find the first non-VOID channel. */
-               for (j = 0; j < 4; j++)
-                       if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID)
-                               break;
-
-               /* CI/CIS can probably be set in compiler instead: */
-               instr->fetch.const_idx = 20 + (i / 3);
-               instr->fetch.const_idx_sel = i % 3;
-
-               instr->fetch.fmt = fd2_pipe2surface(format);
-               instr->fetch.is_normalized = desc->channel[j].normalized;
-               instr->fetch.is_signed =
-                               desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED;
-               instr->fetch.stride = vb->stride ? : 1;
-               instr->fetch.offset = elem->src_offset;
-
-               for (j = 0; j < 4; j++)
-                       instr->regs[0]->swizzle[j] = "xyzw01__"[desc->swizzle[j]];
-
-               assert(instr->fetch.fmt != ~0);
-
-               DBG("vtx[%d]: %s (%d), ci=%d, cis=%d, id=%d, swizzle=%s, "
-                               "stride=%d, offset=%d",
-                               i, util_format_name(format),
-                               instr->fetch.fmt,
-                               instr->fetch.const_idx,
-                               instr->fetch.const_idx_sel,
-                               elem->instance_divisor,
-                               instr->regs[0]->swizzle,
-                               instr->fetch.stride,
-                               instr->fetch.offset);
-       }
-
-       /* trigger re-assemble: */
-       so->info.sizedwords = 0;
+       struct surface_format fmt = fd2_pipe2surface(elem->src_format);
+
+       instr->dst_swiz = fd2_vtx_swiz(elem->src_format, dst_swiz);
+       instr->format_comp_all = fmt.sign == SQ_TEX_SIGN_SIGNED;
+       instr->num_format_all = fmt.num_format;
+       instr->format = fmt.format;
+       instr->exp_adjust_all = fmt.exp_adjust;
+       instr->stride = ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index].stride;
+       instr->offset = elem->src_offset;
 }
 
 static void
-patch_tex_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
-               struct fd_texture_stateobj *tex)
+patch_fetches(struct fd_context *ctx, struct ir2_shader_info *info,
+       struct fd_vertex_stateobj *vtx, struct fd_texture_stateobj *tex)
 {
-       unsigned i;
-
-       /* update tex fetch instructions: */
-       for (i = 0; i < so->num_tfetch_instrs; i++) {
-               struct ir2_instruction *instr = so->tfetch_instrs[i].instr;
-               unsigned samp_id = so->tfetch_instrs[i].samp_id;
-               unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
-
-               if (const_idx != instr->fetch.const_idx) {
-                       instr->fetch.const_idx = const_idx;
-                       /* trigger re-assemble: */
-                       so->info.sizedwords = 0;
+       for (int i = 0; i < info->num_fetch_instrs; i++) {
+               struct ir2_fetch_info *fi = &info->fetch_info[i];
+
+               instr_fetch_t *instr = (instr_fetch_t*) &info->dwords[fi->offset];
+               if (instr->opc == VTX_FETCH) {
+                       unsigned idx = (instr->vtx.const_index - 20) * 3 +
+                               instr->vtx.const_index_sel;
+                       patch_vtx_fetch(ctx, &vtx->pipe[idx], &instr->vtx, fi->vtx.dst_swiz);
+                       continue;
                }
-       }
-}
 
-void
-fd2_program_validate(struct fd_context *ctx)
-{
-       struct fd_program_stateobj *prog = &ctx->prog;
-       bool dirty_fp = !!(ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_PROG);
-       bool dirty_vp = !!(ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_PROG);
-
-       /* if vertex or frag shader is dirty, we may need to recompile. Compile
-        * frag shader first, as that assigns the register slots for exports
-        * from the vertex shader.  And therefore if frag shader has changed we
-        * need to recompile both vert and frag shader.
-        */
-       if (dirty_fp)
-               compile(prog, prog->fp);
-
-       if (dirty_fp || dirty_vp)
-               compile(prog, prog->vp);
-
-       /* if necessary, fix up vertex fetch instructions: */
-       if (ctx->dirty & (FD_DIRTY_VTXSTATE | FD_DIRTY_PROG))
-               patch_vtx_fetches(ctx, prog->vp, ctx->vtx.vtx);
-
-       /* if necessary, fix up texture fetch instructions: */
-       if (ctx->dirty & (FD_DIRTY_TEXSTATE | FD_DIRTY_PROG)) {
-               patch_tex_fetches(ctx, prog->vp, &ctx->tex[PIPE_SHADER_VERTEX]);
-               patch_tex_fetches(ctx, prog->fp, &ctx->tex[PIPE_SHADER_FRAGMENT]);
+               assert(instr->opc == TEX_FETCH);
+               instr->tex.const_idx = fd2_get_const_idx(ctx, tex, fi->tex.samp_id);
+               instr->tex.src_swiz = fi->tex.src_swiz;
        }
 }
 
 void
-fd2_program_emit(struct fd_ringbuffer *ring,
+fd2_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
                struct fd_program_stateobj *prog)
 {
-       struct ir2_shader_info *vsi =
-               &((struct fd2_shader_stateobj *)prog->vp)->info;
-       struct ir2_shader_info *fsi =
-               &((struct fd2_shader_stateobj *)prog->fp)->info;
-       uint8_t vs_gprs, fs_gprs, vs_export;
-
-       emit(ring, prog->vp);
-       emit(ring, prog->fp);
-
-       vs_gprs = (vsi->max_reg < 0) ? 0x80 : vsi->max_reg;
-       fs_gprs = (fsi->max_reg < 0) ? 0x80 : fsi->max_reg;
-       vs_export = MAX2(1, prog->num_exports) - 1;
-
-       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
-       OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
-       OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(POSITION_2_VECTORS_SPRITE) |
-                       A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
-                       A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
-                       A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
-                       A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
-                       A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs));
-}
-
-/* Creates shader:
- *    EXEC ADDR(0x2) CNT(0x1)
- *       (S)FETCH:     SAMPLE  R0.xyzw = R0.xyx CONST(0) LOCATION(CENTER)
- *    ALLOC PARAM/PIXEL SIZE(0x0)
- *    EXEC_END ADDR(0x3) CNT(0x1)
- *          ALU:       MAXv    export0 = R0, R0        ; gl_FragColor
- *    NOP
- */
-static struct fd2_shader_stateobj *
-create_blit_fp(void)
-{
-       struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
-       struct ir2_cf *cf;
-       struct ir2_instruction *instr;
-
-       if (!so)
-               return NULL;
-
-       so->ir = ir2_shader_create();
-
-       cf = ir2_cf_create(so->ir, EXEC);
-
-       instr = ir2_instr_create_tex_fetch(cf, 0);
-       ir2_reg_create(instr, 0, "xyzw", 0);
-       ir2_reg_create(instr, 0, "xyx", 0);
-       instr->sync = true;
-
-       cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
-       cf = ir2_cf_create(so->ir, EXEC_END);
-
-       instr = ir2_instr_create_alu(cf, MAXv, ~0);
-       ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
-       ir2_reg_create(instr, 0, NULL, 0);
-       ir2_reg_create(instr, 0, NULL, 0);
-
-       return assemble(so);
-}
-
-/* Creates shader:
-*     EXEC ADDR(0x3) CNT(0x2)
-*           FETCH:     VERTEX  R1.xy01 = R0.x FMT_32_32_FLOAT UNSIGNED STRIDE(8) CONST(26, 1)
-*           FETCH:     VERTEX  R2.xyz1 = R0.x FMT_32_32_32_FLOAT UNSIGNED STRIDE(12) CONST(26, 0)
-*     ALLOC POSITION SIZE(0x0)
-*     EXEC ADDR(0x5) CNT(0x1)
-*           ALU:       MAXv    export62 = R2, R2       ; gl_Position
-*     ALLOC PARAM/PIXEL SIZE(0x0)
-*     EXEC_END ADDR(0x6) CNT(0x1)
-*           ALU:       MAXv    export0 = R1, R1
-*     NOP
- */
-static struct fd2_shader_stateobj *
-create_blit_vp(void)
-{
-       struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
-       struct ir2_cf *cf;
-       struct ir2_instruction *instr;
-
-       if (!so)
-               return NULL;
-
-       so->ir = ir2_shader_create();
-
-       cf = ir2_cf_create(so->ir, EXEC);
-
-       instr = ir2_instr_create_vtx_fetch(cf, 26, 1, FMT_32_32_FLOAT, false, 8);
-       instr->fetch.is_normalized = true;
-       ir2_reg_create(instr, 1, "xy01", 0);
-       ir2_reg_create(instr, 0, "x", 0);
-
-       instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
-       instr->fetch.is_normalized = true;
-       ir2_reg_create(instr, 2, "xyz1", 0);
-       ir2_reg_create(instr, 0, "x", 0);
-
-       cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0);
-       cf = ir2_cf_create(so->ir, EXEC);
-
-       instr = ir2_instr_create_alu(cf, MAXv, ~0);
-       ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT);
-       ir2_reg_create(instr, 2, NULL, 0);
-       ir2_reg_create(instr, 2, NULL, 0);
-
-       cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
-       cf = ir2_cf_create(so->ir, EXEC_END);
-
-       instr = ir2_instr_create_alu(cf, MAXv, ~0);
-       ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
-       ir2_reg_create(instr, 1, NULL, 0);
-       ir2_reg_create(instr, 1, NULL, 0);
-
-       return assemble(so);
-}
-
-/* Creates shader:
- *    ALLOC PARAM/PIXEL SIZE(0x0)
- *    EXEC_END ADDR(0x1) CNT(0x1)
- *          ALU:       MAXv    export0 = C0, C0        ; gl_FragColor
- */
-static struct fd2_shader_stateobj *
-create_solid_fp(void)
-{
-       struct fd2_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
-       struct ir2_cf *cf;
-       struct ir2_instruction *instr;
-
-       if (!so)
-               return NULL;
-
-       so->ir = ir2_shader_create();
-
-       cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
-       cf = ir2_cf_create(so->ir, EXEC_END);
-
-       instr = ir2_instr_create_alu(cf, MAXv, ~0);
-       ir2_reg_create(instr, 0, NULL, IR2_REG_EXPORT);
-       ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
-       ir2_reg_create(instr, 0, NULL, IR2_REG_CONST);
-
-       return assemble(so);
-}
+       struct fd2_shader_stateobj *fp = NULL, *vp;
+       struct ir2_shader_info *fpi, *vpi;
+       struct ir2_frag_linkage *f;
+       uint8_t vs_gprs, fs_gprs = 0, vs_export = 0;
+       enum a2xx_sq_ps_vtx_mode mode = POSITION_1_VECTOR;
+       bool binning = (ctx->batch && ring == ctx->batch->binning);
+       unsigned variant = 0;
+
+       vp = prog->vs;
+
+       /* find variant matching the linked fragment shader */
+       if (!binning) {
+               fp = prog->fs;
+               for (variant = 1; variant < ARRAY_SIZE(vp->variant); variant++) {
+                       /* if checked all variants, compile a new variant */
+                       if (!vp->variant[variant].info.sizedwords) {
+                               ir2_compile(vp, variant, fp);
+                               break;
+                       }
 
-/* Creates shader:
- *    EXEC ADDR(0x3) CNT(0x1)
- *       (S)FETCH:     VERTEX  R1.xyz1 = R0.x FMT_32_32_32_FLOAT
- *                           UNSIGNED STRIDE(12) CONST(26, 0)
- *    ALLOC POSITION SIZE(0x0)
- *    EXEC ADDR(0x4) CNT(0x1)
- *          ALU:       MAXv    export62 = R1, R1       ; gl_Position
- *    ALLOC PARAM/PIXEL SIZE(0x0)
- *    EXEC_END ADDR(0x5) CNT(0x0)
- */
-static struct fd2_shader_stateobj *
-create_solid_vp(void)
-{
-       struct fd2_shader_stateobj *so = create_shader(SHADER_VERTEX);
-       struct ir2_cf *cf;
-       struct ir2_instruction *instr;
+                       /* check if fragment shader linkage matches */
+                       if (!memcmp(&vp->variant[variant].f, &fp->variant[0].f,
+                                       sizeof(struct ir2_frag_linkage)))
+                               break;
+               }
+               assert(variant < ARRAY_SIZE(vp->variant));
+       }
 
-       if (!so)
-               return NULL;
+       vpi = &vp->variant[variant].info;
+       fpi = &fp->variant[0].info;
+       f = &fp->variant[0].f;
 
-       so->ir = ir2_shader_create();
+       /* clear/gmem2mem/mem2gmem need to be changed to remove this condition */
+       if (prog != &ctx->solid_prog && prog != &ctx->blit_prog[0]) {
+               patch_fetches(ctx, vpi, ctx->vtx.vtx, &ctx->tex[PIPE_SHADER_VERTEX]);
+               if (fp)
+                       patch_fetches(ctx, fpi, NULL, &ctx->tex[PIPE_SHADER_FRAGMENT]);
+       }
 
-       cf = ir2_cf_create(so->ir, EXEC);
+       emit(ring, MESA_SHADER_VERTEX, vpi,
+               binning ? &ctx->batch->shader_patches : NULL);
 
-       instr = ir2_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
-       ir2_reg_create(instr, 1, "xyz1", 0);
-       ir2_reg_create(instr, 0, "x", 0);
+       if (fp) {
+               emit(ring, MESA_SHADER_FRAGMENT, fpi, NULL);
+               fs_gprs = (fpi->max_reg < 0) ? 0x80 : fpi->max_reg;
+               vs_export = MAX2(1, f->inputs_count) - 1;
+       }
 
-       cf = ir2_cf_create_alloc(so->ir, SQ_POSITION, 0);
-       cf = ir2_cf_create(so->ir, EXEC);
+       vs_gprs = (vpi->max_reg < 0) ? 0x80 : vpi->max_reg;
 
-       instr = ir2_instr_create_alu(cf, MAXv, ~0);
-       ir2_reg_create(instr, 62, NULL, IR2_REG_EXPORT);
-       ir2_reg_create(instr, 1, NULL, 0);
-       ir2_reg_create(instr, 1, NULL, 0);
+       if (vp->writes_psize && !binning)
+               mode = POSITION_2_VECTORS_SPRITE;
 
-       cf = ir2_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
-       cf = ir2_cf_create(so->ir, EXEC_END);
+       /* set register to use for param (fragcoord/pointcoord/frontfacing) */
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
+       OUT_RING(ring, A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY) |
+               COND(fp, A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(f->inputs_count)) |
+               /* we need SCREEN_XY for both fragcoord and frontfacing */
+               A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY);
 
-       return assemble(so);
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_A2XX_SQ_PROGRAM_CNTL));
+       OUT_RING(ring, A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(2) |
+                       A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(mode) |
+                       A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE |
+                       A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE |
+                       A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
+                       A2XX_SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
+                       A2XX_SQ_PROGRAM_CNTL_VS_REGS(vs_gprs) |
+                       COND(fp && fp->need_param, A2XX_SQ_PROGRAM_CNTL_PARAM_GEN) |
+                       COND(!fp, A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX));
 }
 
 void
 fd2_prog_init(struct pipe_context *pctx)
 {
        struct fd_context *ctx = fd_context(pctx);
+       struct fd_program_stateobj *prog;
+       struct fd2_shader_stateobj *so;
+       struct ir2_shader_info *info;
+       instr_fetch_vtx_t *instr;
 
        pctx->create_fs_state = fd2_fp_state_create;
        pctx->delete_fs_state = fd2_fp_state_delete;
@@ -471,8 +293,47 @@ fd2_prog_init(struct pipe_context *pctx)
 
        fd_prog_init(pctx);
 
-       ctx->solid_prog.fp = create_solid_fp();
-       ctx->solid_prog.vp = create_solid_vp();
-       ctx->blit_prog[0].fp = create_blit_fp();
-       ctx->blit_prog[0].vp = create_blit_vp();
+       /* XXX maybe its possible to reuse patch_vtx_fetch somehow? */
+
+       prog = &ctx->solid_prog;
+       so = prog->vs;
+       ir2_compile(prog->vs, 1, prog->fs);
+
+#define IR2_FETCH_SWIZ_XY01 0xb08
+#define IR2_FETCH_SWIZ_XYZ1 0xa88
+
+       info = &so->variant[1].info;
+
+       instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
+       instr->const_index = 26;
+       instr->const_index_sel = 0;
+       instr->format = FMT_32_32_32_FLOAT;
+       instr->format_comp_all = false;
+       instr->stride = 12;
+       instr->num_format_all = true;
+       instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
+
+       prog = &ctx->blit_prog[0];
+       so = prog->vs;
+       ir2_compile(prog->vs, 1, prog->fs);
+
+       info = &so->variant[1].info;
+
+       instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[0].offset];
+       instr->const_index = 26;
+       instr->const_index_sel = 1;
+       instr->format = FMT_32_32_FLOAT;
+       instr->format_comp_all = false;
+       instr->stride = 8;
+       instr->num_format_all = false;
+       instr->dst_swiz = IR2_FETCH_SWIZ_XY01;
+
+       instr = (instr_fetch_vtx_t*) &info->dwords[info->fetch_info[1].offset];
+       instr->const_index = 26;
+       instr->const_index_sel = 0;
+       instr->format = FMT_32_32_32_FLOAT;
+       instr->format_comp_all = false;
+       instr->stride = 12;
+       instr->num_format_all = false;
+       instr->dst_swiz = IR2_FETCH_SWIZ_XYZ1;
 }