0a7500f1611dd3098d545c3e5bf1c3afde8cbfa5
[mesa.git] / src / gallium / drivers / freedreno / a3xx / fd3_program.c
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29 #include "pipe/p_state.h"
30 #include "util/u_string.h"
31 #include "util/u_memory.h"
32 #include "util/u_inlines.h"
33 #include "util/u_format.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "tgsi/tgsi_parse.h"
36
37 #include "freedreno_lowering.h"
38 #include "freedreno_program.h"
39
40 #include "fd3_program.h"
41 #include "fd3_compiler.h"
42 #include "fd3_emit.h"
43 #include "fd3_texture.h"
44 #include "fd3_util.h"
45
46 static void
47 delete_variant(struct fd3_shader_variant *v)
48 {
49 ir3_shader_destroy(v->ir);
50 fd_bo_del(v->bo);
51 free(v);
52 }
53
54 static void
55 assemble_variant(struct fd3_shader_variant *so)
56 {
57 struct fd_context *ctx = fd_context(so->so->pctx);
58 uint32_t sz, *bin;
59
60 bin = ir3_shader_assemble(so->ir, &so->info);
61 sz = so->info.sizedwords * 4;
62
63 so->bo = fd_bo_new(ctx->dev, sz,
64 DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
65 DRM_FREEDRENO_GEM_TYPE_KMEM);
66
67 memcpy(fd_bo_map(so->bo), bin, sz);
68
69 free(bin);
70
71 so->instrlen = so->info.sizedwords / 8;
72 so->constlen = so->info.max_const + 1;
73 }
74
75 /* for vertex shader, the inputs are loaded into registers before the shader
76 * is executed, so max_regs from the shader instructions might not properly
77 * reflect the # of registers actually used:
78 */
79 static void
80 fixup_vp_regfootprint(struct fd3_shader_variant *so)
81 {
82 unsigned i;
83 for (i = 0; i < so->inputs_count; i++)
84 so->info.max_reg = MAX2(so->info.max_reg, (so->inputs[i].regid + 3) >> 2);
85 for (i = 0; i < so->outputs_count; i++)
86 so->info.max_reg = MAX2(so->info.max_reg, (so->outputs[i].regid + 3) >> 2);
87 }
88
89 static struct fd3_shader_variant *
90 create_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key)
91 {
92 struct fd3_shader_variant *v = CALLOC_STRUCT(fd3_shader_variant);
93 const struct tgsi_token *tokens = so->tokens;
94 int ret;
95
96 if (!v)
97 return NULL;
98
99 v->so = so;
100 v->key = key;
101 v->type = so->type;
102
103 if (fd_mesa_debug & FD_DBG_DISASM) {
104 DBG("dump tgsi: type=%d", so->type);
105 tgsi_dump(tokens, 0);
106 }
107
108 if (!(fd_mesa_debug & FD_DBG_NOOPT)) {
109 ret = fd3_compile_shader(v, tokens, key);
110 if (ret) {
111 debug_error("new compiler failed, trying fallback!");
112
113 v->inputs_count = 0;
114 v->outputs_count = 0;
115 v->total_in = 0;
116 v->samplers_count = 0;
117 v->immediates_count = 0;
118 }
119 } else {
120 ret = -1; /* force fallback to old compiler */
121 }
122
123 if (ret)
124 ret = fd3_compile_shader_old(v, tokens, key);
125
126 if (ret) {
127 debug_error("compile failed!");
128 goto fail;
129 }
130
131 assemble_variant(v);
132 if (!v->bo) {
133 debug_error("assemble failed!");
134 goto fail;
135 }
136
137 if (so->type == SHADER_VERTEX)
138 fixup_vp_regfootprint(v);
139
140 if (fd_mesa_debug & FD_DBG_DISASM) {
141 DBG("disassemble: type=%d", v->type);
142 disasm_a3xx(fd_bo_map(v->bo), v->info.sizedwords, 0, v->type);
143 }
144
145 return v;
146
147 fail:
148 delete_variant(v);
149 return NULL;
150 }
151
152 struct fd3_shader_variant *
153 fd3_shader_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key)
154 {
155 struct fd3_shader_variant *v;
156
157 /* some shader key values only apply to vertex or frag shader,
158 * so normalize the key to avoid constructing multiple identical
159 * variants:
160 */
161 if (so->type == SHADER_FRAGMENT) {
162 key.binning_pass = false;
163 }
164 if (so->type == SHADER_VERTEX) {
165 key.color_two_side = false;
166 key.half_precision = false;
167 }
168
169 for (v = so->variants; v; v = v->next)
170 if (!memcmp(&key, &v->key, sizeof(key)))
171 return v;
172
173 /* compile new variant if it doesn't exist already: */
174 v = create_variant(so, key);
175 v->next = so->variants;
176 so->variants = v;
177
178 return v;
179 }
180
181
182 static void
183 delete_shader(struct fd3_shader_stateobj *so)
184 {
185 struct fd3_shader_variant *v, *t;
186 for (v = so->variants; v; ) {
187 t = v;
188 v = v->next;
189 delete_variant(t);
190 }
191 free((void *)so->tokens);
192 free(so);
193 }
194
195 static struct fd3_shader_stateobj *
196 create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso,
197 enum shader_t type)
198 {
199 struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
200 so->pctx = pctx;
201 so->type = type;
202 so->tokens = tgsi_dup_tokens(cso->tokens);
203 return so;
204 }
205
206 static void *
207 fd3_fp_state_create(struct pipe_context *pctx,
208 const struct pipe_shader_state *cso)
209 {
210 return create_shader(pctx, cso, SHADER_FRAGMENT);
211 }
212
213 static void
214 fd3_fp_state_delete(struct pipe_context *pctx, void *hwcso)
215 {
216 struct fd3_shader_stateobj *so = hwcso;
217 delete_shader(so);
218 }
219
220 static void *
221 fd3_vp_state_create(struct pipe_context *pctx,
222 const struct pipe_shader_state *cso)
223 {
224 return create_shader(pctx, cso, SHADER_VERTEX);
225 }
226
227 static void
228 fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso)
229 {
230 struct fd3_shader_stateobj *so = hwcso;
231 delete_shader(so);
232 }
233
234 static void
235 emit_shader(struct fd_ringbuffer *ring, const struct fd3_shader_variant *so)
236 {
237 const struct ir3_shader_info *si = &so->info;
238 enum adreno_state_block sb;
239 enum adreno_state_src src;
240 uint32_t i, sz, *bin;
241
242 if (so->type == SHADER_VERTEX) {
243 sb = SB_VERT_SHADER;
244 } else {
245 sb = SB_FRAG_SHADER;
246 }
247
248 if (fd_mesa_debug & FD_DBG_DIRECT) {
249 sz = si->sizedwords;
250 src = SS_DIRECT;
251 bin = fd_bo_map(so->bo);
252 } else {
253 sz = 0;
254 src = SS_INDIRECT;
255 bin = NULL;
256 }
257
258 OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
259 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
260 CP_LOAD_STATE_0_STATE_SRC(src) |
261 CP_LOAD_STATE_0_STATE_BLOCK(sb) |
262 CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
263 if (bin) {
264 OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
265 CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
266 } else {
267 OUT_RELOC(ring, so->bo, 0,
268 CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
269 }
270 for (i = 0; i < sz; i++) {
271 OUT_RING(ring, bin[i]);
272 }
273 }
274
275 static int
276 find_output(const struct fd3_shader_variant *so, fd3_semantic semantic)
277 {
278 int j;
279 for (j = 0; j < so->outputs_count; j++)
280 if (so->outputs[j].semantic == semantic)
281 return j;
282 return 0;
283 }
284
285 static uint32_t
286 find_output_regid(const struct fd3_shader_variant *so, fd3_semantic semantic)
287 {
288 int j;
289 for (j = 0; j < so->outputs_count; j++)
290 if (so->outputs[j].semantic == semantic)
291 return so->outputs[j].regid;
292 return regid(63, 0);
293 }
294
295 void
296 fd3_program_emit(struct fd_ringbuffer *ring,
297 struct fd_program_stateobj *prog, struct fd3_shader_key key)
298 {
299 const struct fd3_shader_variant *vp, *fp;
300 const struct ir3_shader_info *vsi, *fsi;
301 uint32_t pos_regid, posz_regid, psize_regid, color_regid;
302 int i;
303
304 vp = fd3_shader_variant(prog->vp, key);
305
306 if (key.binning_pass) {
307 /* use dummy stateobj to simplify binning vs non-binning: */
308 static const struct fd3_shader_variant binning_fp = {};
309 fp = &binning_fp;
310 } else {
311 fp = fd3_shader_variant(prog->fp, key);
312 }
313
314 vsi = &vp->info;
315 fsi = &fp->info;
316
317 pos_regid = find_output_regid(vp,
318 fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
319 posz_regid = find_output_regid(fp,
320 fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
321 psize_regid = find_output_regid(vp,
322 fd3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
323 color_regid = find_output_regid(fp,
324 fd3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
325
326 /* we could probably divide this up into things that need to be
327 * emitted if frag-prog is dirty vs if vert-prog is dirty..
328 */
329
330 OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
331 OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
332 /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
333 * flush some caches? I think we only need to set those
334 * bits if we have updated const or shader..
335 */
336 A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
337 A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
338 OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
339 A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
340 OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
341 OUT_RING(ring, 0x00000000); /* HLSQ_CONTROL_3_REG */
342 OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
343 A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
344 A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vp->instrlen));
345 OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
346 A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
347 A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fp->instrlen));
348
349 OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
350 OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
351 COND(key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
352 A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
353 A3XX_SP_SP_CTRL_REG_L0MODE(0));
354
355 OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
356 OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
357
358 OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
359 OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
360 A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
361 A3XX_SP_VS_CTRL_REG0_CACHEINVALID |
362 A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
363 A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
364 A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
365 A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
366 A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
367 COND(vp->samplers_count > 0, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
368 A3XX_SP_VS_CTRL_REG0_LENGTH(vp->instrlen));
369 OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
370 A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
371 A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vsi->max_const, 0)));
372 OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
373 A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
374 A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->inputs_count));
375
376 for (i = 0; i < fp->inputs_count; ) {
377 uint32_t reg = 0;
378 int j;
379
380 OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i/2), 1);
381
382 j = find_output(vp, fp->inputs[i].semantic);
383 reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[j].regid);
384 reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[i].compmask);
385 i++;
386
387 j = find_output(vp, fp->inputs[i].semantic);
388 reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[j].regid);
389 reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[i].compmask);
390 i++;
391
392 OUT_RING(ring, reg);
393 }
394
395 for (i = 0; i < fp->inputs_count; ) {
396 uint32_t reg = 0;
397
398 OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i/4), 1);
399
400 reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[i++].inloc);
401 reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[i++].inloc);
402 reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[i++].inloc);
403 reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[i++].inloc);
404
405 OUT_RING(ring, reg);
406 }
407
408 OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
409 OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
410 A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
411 OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
412
413 if (key.binning_pass) {
414 OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
415 OUT_RING(ring, 0x00000000);
416
417 OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
418 OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
419 A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
420 OUT_RING(ring, 0x00000000);
421 } else {
422 OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
423 OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
424
425 OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
426 OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
427 A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
428 A3XX_SP_FS_CTRL_REG0_CACHEINVALID |
429 A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
430 A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
431 A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
432 A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
433 A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
434 COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
435 A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
436 OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
437 A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
438 A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) |
439 A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
440 OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
441 OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
442 A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
443 OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
444 }
445
446 OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
447 OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_0 */
448 OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_1 */
449
450 OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
451 if (fp->writes_pos) {
452 OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE |
453 A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
454 } else {
455 OUT_RING(ring, 0x00000000);
456 }
457
458 OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
459 OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(color_regid) |
460 COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION));
461 OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
462 OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
463 OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
464
465 if (key.binning_pass) {
466 OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
467 OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
468 A3XX_VPC_ATTR_LMSIZE(1));
469 OUT_RING(ring, 0x00000000);
470 } else {
471 OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
472 OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
473 A3XX_VPC_ATTR_THRDASSIGN(1) |
474 A3XX_VPC_ATTR_LMSIZE(1));
475 OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
476 A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
477
478 OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
479 OUT_RING(ring, fp->so->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
480 OUT_RING(ring, fp->so->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
481 OUT_RING(ring, fp->so->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
482 OUT_RING(ring, fp->so->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
483
484 OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
485 OUT_RING(ring, fp->so->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
486 OUT_RING(ring, fp->so->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
487 OUT_RING(ring, fp->so->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
488 OUT_RING(ring, fp->so->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
489 }
490
491 OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
492 OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
493 A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252));
494
495 emit_shader(ring, vp);
496
497 OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
498 OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
499
500 if (!key.binning_pass) {
501 emit_shader(ring, fp);
502
503 OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
504 OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
505 }
506
507 OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
508 OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) |
509 A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
510 A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(vp->inputs_count) |
511 A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(vp->inputs_count));
512 OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
513 A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) |
514 A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0)));
515 }
516
517 /* hack.. until we figure out how to deal w/ vpsrepl properly.. */
518 static void
519 fix_blit_fp(struct pipe_context *pctx)
520 {
521 struct fd_context *ctx = fd_context(pctx);
522 struct fd3_shader_stateobj *so = ctx->blit_prog.fp;
523
524 so->vpsrepl[0] = 0x99999999;
525 so->vpsrepl[1] = 0x99999999;
526 so->vpsrepl[2] = 0x99999999;
527 so->vpsrepl[3] = 0x99999999;
528 }
529
530 void
531 fd3_prog_init(struct pipe_context *pctx)
532 {
533 pctx->create_fs_state = fd3_fp_state_create;
534 pctx->delete_fs_state = fd3_fp_state_delete;
535
536 pctx->create_vs_state = fd3_vp_state_create;
537 pctx->delete_vs_state = fd3_vp_state_delete;
538
539 fd_prog_init(pctx);
540
541 fix_blit_fp(pctx);
542 }