Merge commit 'origin/7.8'
[mesa.git] / src / gallium / drivers / nvfx / nvfx_fragprog.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "util/u_inlines.h"
5
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/tgsi_parse.h"
8 #include "tgsi/tgsi_util.h"
9
10 #include "nvfx_context.h"
11 #include "nvfx_shader.h"
12 #include "nvfx_resource.h"
13
14 #define MAX_CONSTS 128
15 #define MAX_IMM 32
16 struct nvfx_fpc {
17 struct nvfx_fragment_program *fp;
18
19 uint attrib_map[PIPE_MAX_SHADER_INPUTS];
20
21 unsigned r_temps;
22 unsigned r_temps_discard;
23 struct nvfx_sreg r_result[PIPE_MAX_SHADER_OUTPUTS];
24 struct nvfx_sreg *r_temp;
25
26 int num_regs;
27
28 unsigned inst_offset;
29 unsigned have_const;
30
31 struct {
32 int pipe;
33 float vals[4];
34 } consts[MAX_CONSTS];
35 int nr_consts;
36
37 struct nvfx_sreg imm[MAX_IMM];
38 unsigned nr_imm;
39 };
40
41 static INLINE struct nvfx_sreg
42 temp(struct nvfx_fpc *fpc)
43 {
44 int idx = ffs(~fpc->r_temps) - 1;
45
46 if (idx < 0) {
47 NOUVEAU_ERR("out of temps!!\n");
48 assert(0);
49 return nvfx_sr(NVFXSR_TEMP, 0);
50 }
51
52 fpc->r_temps |= (1 << idx);
53 fpc->r_temps_discard |= (1 << idx);
54 return nvfx_sr(NVFXSR_TEMP, idx);
55 }
56
57 static INLINE void
58 release_temps(struct nvfx_fpc *fpc)
59 {
60 fpc->r_temps &= ~fpc->r_temps_discard;
61 fpc->r_temps_discard = 0;
62 }
63
64 static INLINE struct nvfx_sreg
65 constant(struct nvfx_fpc *fpc, int pipe, float vals[4])
66 {
67 int idx;
68
69 if (fpc->nr_consts == MAX_CONSTS)
70 assert(0);
71 idx = fpc->nr_consts++;
72
73 fpc->consts[idx].pipe = pipe;
74 if (pipe == -1)
75 memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
76 return nvfx_sr(NVFXSR_CONST, idx);
77 }
78
79 #define arith(cc,s,o,d,m,s0,s1,s2) \
80 nvfx_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \
81 (d), (m), (s0), (s1), (s2))
82 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
83 nvfx_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \
84 (d), (m), (s0), none, none)
85
86 static void
87 grow_insns(struct nvfx_fpc *fpc, int size)
88 {
89 struct nvfx_fragment_program *fp = fpc->fp;
90
91 fp->insn_len += size;
92 fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
93 }
94
95 static void
96 emit_src(struct nvfx_fpc *fpc, int pos, struct nvfx_sreg src)
97 {
98 struct nvfx_fragment_program *fp = fpc->fp;
99 uint32_t *hw = &fp->insn[fpc->inst_offset];
100 uint32_t sr = 0;
101
102 switch (src.type) {
103 case NVFXSR_INPUT:
104 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
105 hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT);
106 break;
107 case NVFXSR_OUTPUT:
108 sr |= NVFX_FP_REG_SRC_HALF;
109 /* fall-through */
110 case NVFXSR_TEMP:
111 sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT);
112 sr |= (src.index << NVFX_FP_REG_SRC_SHIFT);
113 break;
114 case NVFXSR_CONST:
115 if (!fpc->have_const) {
116 grow_insns(fpc, 4);
117 fpc->have_const = 1;
118 }
119
120 hw = &fp->insn[fpc->inst_offset];
121 if (fpc->consts[src.index].pipe >= 0) {
122 struct nvfx_fragment_program_data *fpd;
123
124 fp->consts = realloc(fp->consts, ++fp->nr_consts *
125 sizeof(*fpd));
126 fpd = &fp->consts[fp->nr_consts - 1];
127 fpd->offset = fpc->inst_offset + 4;
128 fpd->index = fpc->consts[src.index].pipe;
129 memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
130 } else {
131 memcpy(&fp->insn[fpc->inst_offset + 4],
132 fpc->consts[src.index].vals,
133 sizeof(uint32_t) * 4);
134 }
135
136 sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT);
137 break;
138 case NVFXSR_NONE:
139 sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT);
140 break;
141 default:
142 assert(0);
143 }
144
145 if (src.negate)
146 sr |= NVFX_FP_REG_NEGATE;
147
148 if (src.abs)
149 hw[1] |= (1 << (29 + pos));
150
151 sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) |
152 (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) |
153 (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) |
154 (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT));
155
156 hw[pos + 1] |= sr;
157 }
158
159 static void
160 emit_dst(struct nvfx_fpc *fpc, struct nvfx_sreg dst)
161 {
162 struct nvfx_fragment_program *fp = fpc->fp;
163 uint32_t *hw = &fp->insn[fpc->inst_offset];
164
165 switch (dst.type) {
166 case NVFXSR_TEMP:
167 if (fpc->num_regs < (dst.index + 1))
168 fpc->num_regs = dst.index + 1;
169 break;
170 case NVFXSR_OUTPUT:
171 if (dst.index == 1) {
172 fp->fp_control |= 0xe;
173 } else {
174 hw[0] |= NVFX_FP_OP_OUT_REG_HALF;
175 }
176 break;
177 case NVFXSR_NONE:
178 hw[0] |= (1 << 30);
179 break;
180 default:
181 assert(0);
182 }
183
184 hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT);
185 }
186
187 static void
188 nvfx_fp_arith(struct nvfx_fpc *fpc, int sat, int op,
189 struct nvfx_sreg dst, int mask,
190 struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
191 {
192 struct nvfx_fragment_program *fp = fpc->fp;
193 uint32_t *hw;
194
195 fpc->inst_offset = fp->insn_len;
196 fpc->have_const = 0;
197 grow_insns(fpc, 4);
198 hw = &fp->insn[fpc->inst_offset];
199 memset(hw, 0, sizeof(uint32_t) * 4);
200
201 if (op == NVFX_FP_OP_OPCODE_KIL)
202 fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL;
203 hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT);
204 hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT);
205 hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT);
206
207 if (sat)
208 hw[0] |= NVFX_FP_OP_OUT_SAT;
209
210 if (dst.cc_update)
211 hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE;
212 hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT);
213 hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) |
214 (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) |
215 (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) |
216 (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT));
217
218 emit_dst(fpc, dst);
219 emit_src(fpc, 0, s0);
220 emit_src(fpc, 1, s1);
221 emit_src(fpc, 2, s2);
222 }
223
224 static void
225 nvfx_fp_tex(struct nvfx_fpc *fpc, int sat, int op, int unit,
226 struct nvfx_sreg dst, int mask,
227 struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2)
228 {
229 struct nvfx_fragment_program *fp = fpc->fp;
230
231 nvfx_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
232
233 fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT);
234 fp->samplers |= (1 << unit);
235 }
236
237 static INLINE struct nvfx_sreg
238 tgsi_src(struct nvfx_fpc *fpc, const struct tgsi_full_src_register *fsrc)
239 {
240 struct nvfx_sreg src;
241
242 switch (fsrc->Register.File) {
243 case TGSI_FILE_INPUT:
244 src = nvfx_sr(NVFXSR_INPUT,
245 fpc->attrib_map[fsrc->Register.Index]);
246 break;
247 case TGSI_FILE_CONSTANT:
248 src = constant(fpc, fsrc->Register.Index, NULL);
249 break;
250 case TGSI_FILE_IMMEDIATE:
251 assert(fsrc->Register.Index < fpc->nr_imm);
252 src = fpc->imm[fsrc->Register.Index];
253 break;
254 case TGSI_FILE_TEMPORARY:
255 src = fpc->r_temp[fsrc->Register.Index];
256 break;
257 /* NV40 fragprog result regs are just temps, so this is simple */
258 case TGSI_FILE_OUTPUT:
259 src = fpc->r_result[fsrc->Register.Index];
260 break;
261 default:
262 NOUVEAU_ERR("bad src file\n");
263 break;
264 }
265
266 src.abs = fsrc->Register.Absolute;
267 src.negate = fsrc->Register.Negate;
268 src.swz[0] = fsrc->Register.SwizzleX;
269 src.swz[1] = fsrc->Register.SwizzleY;
270 src.swz[2] = fsrc->Register.SwizzleZ;
271 src.swz[3] = fsrc->Register.SwizzleW;
272 return src;
273 }
274
275 static INLINE struct nvfx_sreg
276 tgsi_dst(struct nvfx_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
277 switch (fdst->Register.File) {
278 case TGSI_FILE_OUTPUT:
279 return fpc->r_result[fdst->Register.Index];
280 case TGSI_FILE_TEMPORARY:
281 return fpc->r_temp[fdst->Register.Index];
282 case TGSI_FILE_NULL:
283 return nvfx_sr(NVFXSR_NONE, 0);
284 default:
285 NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
286 return nvfx_sr(NVFXSR_NONE, 0);
287 }
288 }
289
290 static INLINE int
291 tgsi_mask(uint tgsi)
292 {
293 int mask = 0;
294
295 if (tgsi & TGSI_WRITEMASK_X) mask |= NVFX_FP_MASK_X;
296 if (tgsi & TGSI_WRITEMASK_Y) mask |= NVFX_FP_MASK_Y;
297 if (tgsi & TGSI_WRITEMASK_Z) mask |= NVFX_FP_MASK_Z;
298 if (tgsi & TGSI_WRITEMASK_W) mask |= NVFX_FP_MASK_W;
299 return mask;
300 }
301
302 static boolean
303 nvfx_fragprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
304 const struct tgsi_full_instruction *finst)
305 {
306 const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0);
307 struct nvfx_sreg src[3], dst, tmp;
308 int mask, sat, unit;
309 int ai = -1, ci = -1, ii = -1;
310 int i;
311
312 if (finst->Instruction.Opcode == TGSI_OPCODE_END)
313 return TRUE;
314
315 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
316 const struct tgsi_full_src_register *fsrc;
317
318 fsrc = &finst->Src[i];
319 if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
320 src[i] = tgsi_src(fpc, fsrc);
321 }
322 }
323
324 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
325 const struct tgsi_full_src_register *fsrc;
326
327 fsrc = &finst->Src[i];
328
329 switch (fsrc->Register.File) {
330 case TGSI_FILE_INPUT:
331 if (ai == -1 || ai == fsrc->Register.Index) {
332 ai = fsrc->Register.Index;
333 src[i] = tgsi_src(fpc, fsrc);
334 } else {
335 src[i] = temp(fpc);
336 arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
337 tgsi_src(fpc, fsrc), none, none);
338 }
339 break;
340 case TGSI_FILE_CONSTANT:
341 if ((ci == -1 && ii == -1) ||
342 ci == fsrc->Register.Index) {
343 ci = fsrc->Register.Index;
344 src[i] = tgsi_src(fpc, fsrc);
345 } else {
346 src[i] = temp(fpc);
347 arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
348 tgsi_src(fpc, fsrc), none, none);
349 }
350 break;
351 case TGSI_FILE_IMMEDIATE:
352 if ((ci == -1 && ii == -1) ||
353 ii == fsrc->Register.Index) {
354 ii = fsrc->Register.Index;
355 src[i] = tgsi_src(fpc, fsrc);
356 } else {
357 src[i] = temp(fpc);
358 arith(fpc, 0, MOV, src[i], NVFX_FP_MASK_ALL,
359 tgsi_src(fpc, fsrc), none, none);
360 }
361 break;
362 case TGSI_FILE_TEMPORARY:
363 /* handled above */
364 break;
365 case TGSI_FILE_SAMPLER:
366 unit = fsrc->Register.Index;
367 break;
368 case TGSI_FILE_OUTPUT:
369 break;
370 default:
371 NOUVEAU_ERR("bad src file\n");
372 return FALSE;
373 }
374 }
375
376 dst = tgsi_dst(fpc, &finst->Dst[0]);
377 mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
378 sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
379
380 switch (finst->Instruction.Opcode) {
381 case TGSI_OPCODE_ABS:
382 arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
383 break;
384 case TGSI_OPCODE_ADD:
385 arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
386 break;
387 case TGSI_OPCODE_CMP:
388 tmp = nvfx_sr(NVFXSR_NONE, 0);
389 tmp.cc_update = 1;
390 arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
391 dst.cc_test = NVFX_COND_GE;
392 arith(fpc, sat, MOV, dst, mask, src[2], none, none);
393 dst.cc_test = NVFX_COND_LT;
394 arith(fpc, sat, MOV, dst, mask, src[1], none, none);
395 break;
396 case TGSI_OPCODE_COS:
397 arith(fpc, sat, COS, dst, mask, src[0], none, none);
398 break;
399 case TGSI_OPCODE_DDX:
400 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
401 tmp = temp(fpc);
402 arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y,
403 swz(src[0], Z, W, Z, W), none, none);
404 arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W,
405 swz(tmp, X, Y, X, Y), none, none);
406 arith(fpc, sat, DDX, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0],
407 none, none);
408 arith(fpc, 0, MOV, dst, mask, tmp, none, none);
409 } else {
410 arith(fpc, sat, DDX, dst, mask, src[0], none, none);
411 }
412 break;
413 case TGSI_OPCODE_DDY:
414 if (mask & (NVFX_FP_MASK_Z | NVFX_FP_MASK_W)) {
415 tmp = temp(fpc);
416 arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y,
417 swz(src[0], Z, W, Z, W), none, none);
418 arith(fpc, 0, MOV, tmp, NVFX_FP_MASK_Z | NVFX_FP_MASK_W,
419 swz(tmp, X, Y, X, Y), none, none);
420 arith(fpc, sat, DDY, tmp, NVFX_FP_MASK_X | NVFX_FP_MASK_Y, src[0],
421 none, none);
422 arith(fpc, 0, MOV, dst, mask, tmp, none, none);
423 } else {
424 arith(fpc, sat, DDY, dst, mask, src[0], none, none);
425 }
426 break;
427 case TGSI_OPCODE_DP3:
428 arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
429 break;
430 case TGSI_OPCODE_DP4:
431 arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
432 break;
433 case TGSI_OPCODE_DPH:
434 tmp = temp(fpc);
435 arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[1], none);
436 arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
437 swz(src[1], W, W, W, W), none);
438 break;
439 case TGSI_OPCODE_DST:
440 arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
441 break;
442 case TGSI_OPCODE_EX2:
443 arith(fpc, sat, EX2, dst, mask, src[0], none, none);
444 break;
445 case TGSI_OPCODE_FLR:
446 arith(fpc, sat, FLR, dst, mask, src[0], none, none);
447 break;
448 case TGSI_OPCODE_FRC:
449 arith(fpc, sat, FRC, dst, mask, src[0], none, none);
450 break;
451 case TGSI_OPCODE_KILP:
452 arith(fpc, 0, KIL, none, 0, none, none, none);
453 break;
454 case TGSI_OPCODE_KIL:
455 dst = nvfx_sr(NVFXSR_NONE, 0);
456 dst.cc_update = 1;
457 arith(fpc, 0, MOV, dst, NVFX_FP_MASK_ALL, src[0], none, none);
458 dst.cc_update = 0; dst.cc_test = NVFX_COND_LT;
459 arith(fpc, 0, KIL, dst, 0, none, none, none);
460 break;
461 case TGSI_OPCODE_LG2:
462 arith(fpc, sat, LG2, dst, mask, src[0], none, none);
463 break;
464 // case TGSI_OPCODE_LIT:
465 case TGSI_OPCODE_LRP:
466 if(!nvfx->is_nv4x)
467 arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]);
468 else {
469 tmp = temp(fpc);
470 arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
471 arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
472 }
473 break;
474 case TGSI_OPCODE_MAD:
475 arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
476 break;
477 case TGSI_OPCODE_MAX:
478 arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
479 break;
480 case TGSI_OPCODE_MIN:
481 arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
482 break;
483 case TGSI_OPCODE_MOV:
484 arith(fpc, sat, MOV, dst, mask, src[0], none, none);
485 break;
486 case TGSI_OPCODE_MUL:
487 arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
488 break;
489 case TGSI_OPCODE_POW:
490 if(!nvfx->is_nv4x)
491 arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none);
492 else {
493 tmp = temp(fpc);
494 arith(fpc, 0, LG2, tmp, NVFX_FP_MASK_X,
495 swz(src[0], X, X, X, X), none, none);
496 arith(fpc, 0, MUL, tmp, NVFX_FP_MASK_X, swz(tmp, X, X, X, X),
497 swz(src[1], X, X, X, X), none);
498 arith(fpc, sat, EX2, dst, mask,
499 swz(tmp, X, X, X, X), none, none);
500 }
501 break;
502 case TGSI_OPCODE_RCP:
503 arith(fpc, sat, RCP, dst, mask, src[0], none, none);
504 break;
505 case TGSI_OPCODE_RET:
506 assert(0);
507 break;
508 case TGSI_OPCODE_RFL:
509 if(!nvfx->is_nv4x)
510 arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none);
511 else {
512 tmp = temp(fpc);
513 arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_X, src[0], src[0], none);
514 arith(fpc, 0, DP3, tmp, NVFX_FP_MASK_Y, src[0], src[1], none);
515 arith(fpc, 0, DIV, scale(tmp, 2X), NVFX_FP_MASK_Z,
516 swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
517 arith(fpc, sat, MAD, dst, mask,
518 swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
519 }
520 break;
521 case TGSI_OPCODE_RSQ:
522 if(!nvfx->is_nv4x)
523 arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
524 else {
525 tmp = temp(fpc);
526 arith(fpc, 0, LG2, scale(tmp, INV_2X), NVFX_FP_MASK_X,
527 abs(swz(src[0], X, X, X, X)), none, none);
528 arith(fpc, sat, EX2, dst, mask,
529 neg(swz(tmp, X, X, X, X)), none, none);
530 }
531 break;
532 case TGSI_OPCODE_SCS:
533 /* avoid overwriting the source */
534 if(src[0].swz[NVFX_SWZ_X] != NVFX_SWZ_X)
535 {
536 if (mask & NVFX_FP_MASK_X) {
537 arith(fpc, sat, COS, dst, NVFX_FP_MASK_X,
538 swz(src[0], X, X, X, X), none, none);
539 }
540 if (mask & NVFX_FP_MASK_Y) {
541 arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y,
542 swz(src[0], X, X, X, X), none, none);
543 }
544 }
545 else
546 {
547 if (mask & NVFX_FP_MASK_Y) {
548 arith(fpc, sat, SIN, dst, NVFX_FP_MASK_Y,
549 swz(src[0], X, X, X, X), none, none);
550 }
551 if (mask & NVFX_FP_MASK_X) {
552 arith(fpc, sat, COS, dst, NVFX_FP_MASK_X,
553 swz(src[0], X, X, X, X), none, none);
554 }
555 }
556 break;
557 case TGSI_OPCODE_SEQ:
558 arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none);
559 break;
560 case TGSI_OPCODE_SFL:
561 arith(fpc, sat, SFL, dst, mask, src[0], src[1], none);
562 break;
563 case TGSI_OPCODE_SGE:
564 arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
565 break;
566 case TGSI_OPCODE_SGT:
567 arith(fpc, sat, SGT, dst, mask, src[0], src[1], none);
568 break;
569 case TGSI_OPCODE_SIN:
570 arith(fpc, sat, SIN, dst, mask, src[0], none, none);
571 break;
572 case TGSI_OPCODE_SLE:
573 arith(fpc, sat, SLE, dst, mask, src[0], src[1], none);
574 break;
575 case TGSI_OPCODE_SLT:
576 arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
577 break;
578 case TGSI_OPCODE_SNE:
579 arith(fpc, sat, SNE, dst, mask, src[0], src[1], none);
580 break;
581 case TGSI_OPCODE_STR:
582 arith(fpc, sat, STR, dst, mask, src[0], src[1], none);
583 break;
584 case TGSI_OPCODE_SUB:
585 arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
586 break;
587 case TGSI_OPCODE_TEX:
588 tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
589 break;
590 case TGSI_OPCODE_TXB:
591 tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
592 break;
593 case TGSI_OPCODE_TXP:
594 tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
595 break;
596 case TGSI_OPCODE_XPD:
597 tmp = temp(fpc);
598 arith(fpc, 0, MUL, tmp, mask,
599 swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
600 arith(fpc, sat, MAD, dst, (mask & ~NVFX_FP_MASK_W),
601 swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
602 neg(tmp));
603 break;
604 default:
605 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
606 return FALSE;
607 }
608
609 release_temps(fpc);
610 return TRUE;
611 }
612
613 static boolean
614 nvfx_fragprog_parse_decl_attrib(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
615 const struct tgsi_full_declaration *fdec)
616 {
617 int hw;
618
619 switch (fdec->Semantic.Name) {
620 case TGSI_SEMANTIC_POSITION:
621 hw = NVFX_FP_OP_INPUT_SRC_POSITION;
622 break;
623 case TGSI_SEMANTIC_COLOR:
624 if (fdec->Semantic.Index == 0) {
625 hw = NVFX_FP_OP_INPUT_SRC_COL0;
626 } else
627 if (fdec->Semantic.Index == 1) {
628 hw = NVFX_FP_OP_INPUT_SRC_COL1;
629 } else {
630 NOUVEAU_ERR("bad colour semantic index\n");
631 return FALSE;
632 }
633 break;
634 case TGSI_SEMANTIC_FOG:
635 hw = NVFX_FP_OP_INPUT_SRC_FOGC;
636 break;
637 case TGSI_SEMANTIC_GENERIC:
638 if (fdec->Semantic.Index <= 7) {
639 hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic.
640 Index);
641 } else {
642 NOUVEAU_ERR("bad generic semantic index\n");
643 return FALSE;
644 }
645 break;
646 default:
647 NOUVEAU_ERR("bad input semantic\n");
648 return FALSE;
649 }
650
651 fpc->attrib_map[fdec->Range.First] = hw;
652 return TRUE;
653 }
654
655 static boolean
656 nvfx_fragprog_parse_decl_output(struct nvfx_context* nvfx, struct nvfx_fpc *fpc,
657 const struct tgsi_full_declaration *fdec)
658 {
659 unsigned idx = fdec->Range.First;
660 unsigned hw;
661
662 switch (fdec->Semantic.Name) {
663 case TGSI_SEMANTIC_POSITION:
664 hw = 1;
665 break;
666 case TGSI_SEMANTIC_COLOR:
667 hw = ~0;
668 switch (fdec->Semantic.Index) {
669 case 0: hw = 0; break;
670 case 1: hw = 2; break;
671 case 2: hw = 3; break;
672 case 3: hw = 4; break;
673 }
674 if(hw > ((nvfx->is_nv4x) ? 4 : 2)) {
675 NOUVEAU_ERR("bad rcol index\n");
676 return FALSE;
677 }
678 break;
679 default:
680 NOUVEAU_ERR("bad output semantic\n");
681 return FALSE;
682 }
683
684 fpc->r_result[idx] = nvfx_sr(NVFXSR_OUTPUT, hw);
685 fpc->r_temps |= (1 << hw);
686 return TRUE;
687 }
688
689 static boolean
690 nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
691 {
692 struct tgsi_parse_context p;
693 int high_temp = -1, i;
694
695 tgsi_parse_init(&p, fpc->fp->pipe.tokens);
696 while (!tgsi_parse_end_of_tokens(&p)) {
697 const union tgsi_full_token *tok = &p.FullToken;
698
699 tgsi_parse_token(&p);
700 switch(tok->Token.Type) {
701 case TGSI_TOKEN_TYPE_DECLARATION:
702 {
703 const struct tgsi_full_declaration *fdec;
704 fdec = &p.FullToken.FullDeclaration;
705 switch (fdec->Declaration.File) {
706 case TGSI_FILE_INPUT:
707 if (!nvfx_fragprog_parse_decl_attrib(nvfx, fpc, fdec))
708 goto out_err;
709 break;
710 case TGSI_FILE_OUTPUT:
711 if (!nvfx_fragprog_parse_decl_output(nvfx, fpc, fdec))
712 goto out_err;
713 break;
714 case TGSI_FILE_TEMPORARY:
715 if (fdec->Range.Last > high_temp) {
716 high_temp =
717 fdec->Range.Last;
718 }
719 break;
720 default:
721 break;
722 }
723 }
724 break;
725 case TGSI_TOKEN_TYPE_IMMEDIATE:
726 {
727 struct tgsi_full_immediate *imm;
728 float vals[4];
729
730 imm = &p.FullToken.FullImmediate;
731 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
732 assert(fpc->nr_imm < MAX_IMM);
733
734 vals[0] = imm->u[0].Float;
735 vals[1] = imm->u[1].Float;
736 vals[2] = imm->u[2].Float;
737 vals[3] = imm->u[3].Float;
738 fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
739 }
740 break;
741 default:
742 break;
743 }
744 }
745 tgsi_parse_free(&p);
746
747 if (++high_temp) {
748 fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg));
749 for (i = 0; i < high_temp; i++)
750 fpc->r_temp[i] = temp(fpc);
751 fpc->r_temps_discard = 0;
752 }
753
754 return TRUE;
755
756 out_err:
757 if (fpc->r_temp)
758 FREE(fpc->r_temp);
759 tgsi_parse_free(&p);
760 return FALSE;
761 }
762
763 static void
764 nvfx_fragprog_translate(struct nvfx_context *nvfx,
765 struct nvfx_fragment_program *fp)
766 {
767 struct tgsi_parse_context parse;
768 struct nvfx_fpc *fpc = NULL;
769
770 fpc = CALLOC(1, sizeof(struct nvfx_fpc));
771 if (!fpc)
772 return;
773 fpc->fp = fp;
774 fpc->num_regs = 2;
775
776 if (!nvfx_fragprog_prepare(nvfx, fpc)) {
777 FREE(fpc);
778 return;
779 }
780
781 tgsi_parse_init(&parse, fp->pipe.tokens);
782
783 while (!tgsi_parse_end_of_tokens(&parse)) {
784 tgsi_parse_token(&parse);
785
786 switch (parse.FullToken.Token.Type) {
787 case TGSI_TOKEN_TYPE_INSTRUCTION:
788 {
789 const struct tgsi_full_instruction *finst;
790
791 finst = &parse.FullToken.FullInstruction;
792 if (!nvfx_fragprog_parse_instruction(nvfx, fpc, finst))
793 goto out_err;
794 }
795 break;
796 default:
797 break;
798 }
799 }
800
801 if(!nvfx->is_nv4x)
802 fp->fp_control |= (fpc->num_regs-1)/2;
803 else
804 fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT;
805
806 /* Terminate final instruction */
807 if(fp->insn)
808 fp->insn[fpc->inst_offset] |= 0x00000001;
809
810 /* Append NOP + END instruction, may or may not be necessary. */
811 fpc->inst_offset = fp->insn_len;
812 grow_insns(fpc, 4);
813 fp->insn[fpc->inst_offset + 0] = 0x00000001;
814 fp->insn[fpc->inst_offset + 1] = 0x00000000;
815 fp->insn[fpc->inst_offset + 2] = 0x00000000;
816 fp->insn[fpc->inst_offset + 3] = 0x00000000;
817
818 fp->translated = TRUE;
819 out_err:
820 tgsi_parse_free(&parse);
821 if (fpc->r_temp)
822 FREE(fpc->r_temp);
823 FREE(fpc);
824 }
825
826 static void
827 nvfx_fragprog_upload(struct nvfx_context *nvfx,
828 struct nvfx_fragment_program *fp)
829 {
830 struct pipe_context *pipe = &nvfx->pipe;
831 const uint32_t le = 1;
832
833 #if 0
834 for (i = 0; i < fp->insn_len; i++) {
835 fflush(stdout); fflush(stderr);
836 NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
837 fflush(stdout); fflush(stderr);
838 }
839 #endif
840
841 if ((*(const uint8_t *)&le)) {
842 /* Can do this with an inline transfer */
843 pipe_buffer_write(pipe,
844 fp->buffer,
845 0,
846 fp->insn_len * sizeof fp->insn[0],
847 fp->insn);
848 } else {
849 struct pipe_transfer *transfer;
850 uint32_t *map;
851 int i;
852
853 map = pipe_buffer_map(pipe, fp->buffer,
854 PIPE_TRANSFER_WRITE,
855 &transfer);
856
857 /* Weird swapping for big-endian chips */
858 for (i = 0; i < fp->insn_len; i++) {
859 map[i] = ((fp->insn[i] & 0xffff) << 16) |
860 ((fp->insn[i] >> 16) & 0xffff);
861 }
862
863 pipe_buffer_unmap(pipe, fp->buffer, transfer);
864 }
865 }
866
867 static boolean
868 nvfx_fragprog_validate(struct nvfx_context *nvfx)
869 {
870 struct pipe_context *pipe = &nvfx->pipe;
871 struct nvfx_fragment_program *fp = nvfx->fragprog;
872 struct pipe_resource *constbuf =
873 nvfx->constbuf[PIPE_SHADER_FRAGMENT];
874 struct pipe_screen *pscreen = nvfx->pipe.screen;
875 struct nouveau_stateobj *so;
876 boolean new_consts = FALSE;
877 int i;
878
879 if (fp->translated)
880 goto update_constants;
881
882 nvfx->fallback_swrast &= ~NVFX_NEW_FRAGPROG;
883 nvfx_fragprog_translate(nvfx, fp);
884 if (!fp->translated) {
885 nvfx->fallback_swrast |= NVFX_NEW_FRAGPROG;
886 return FALSE;
887 }
888
889 fp->buffer = pipe_buffer_create(pscreen,
890 /* XXX: no alignment, maybe use a priv bind flag
891 * 0x100,
892 */
893 0, fp->insn_len * 4);
894 nvfx_fragprog_upload(nvfx, fp);
895
896 so = so_new(4, 4, 1);
897 so_method(so, nvfx->screen->eng3d, NV34TCL_FP_ACTIVE_PROGRAM, 1);
898 so_reloc (so, nvfx_resource(fp->buffer)->bo, 0, NOUVEAU_BO_VRAM |
899 NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
900 NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
901 NV34TCL_FP_ACTIVE_PROGRAM_DMA1);
902 so_method(so, nvfx->screen->eng3d, NV34TCL_FP_CONTROL, 1);
903 so_data (so, fp->fp_control);
904 if(!nvfx->is_nv4x) {
905 so_method(so, nvfx->screen->eng3d, NV34TCL_FP_REG_CONTROL, 1);
906 so_data (so, (1<<16)|0x4);
907 so_method(so, nvfx->screen->eng3d, NV34TCL_TX_UNITS_ENABLE, 1);
908 so_data (so, fp->samplers);
909 }
910
911 so_ref(so, &fp->so);
912 so_ref(NULL, &so);
913
914 update_constants:
915 if (fp->nr_consts) {
916 struct pipe_transfer *transfer;
917 float *map;
918
919 map = pipe_buffer_map(pipe, constbuf,
920 PIPE_TRANSFER_READ,
921 &transfer);
922
923 /* XXX: probably a bad idea to be reading back data
924 * from a buffer the gpu has been using. Not really
925 * sure what this code is doing though, or how to
926 * avoid it - kw.
927 */
928 for (i = 0; i < fp->nr_consts; i++) {
929 struct nvfx_fragment_program_data *fpd = &fp->consts[i];
930 uint32_t *p = &fp->insn[fpd->offset];
931 uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
932
933 if (!memcmp(p, cb, 4 * sizeof(float)))
934 continue;
935 memcpy(p, cb, 4 * sizeof(float));
936 new_consts = TRUE;
937 }
938 pipe_buffer_unmap(pipe, constbuf, transfer);
939
940 if (new_consts)
941 nvfx_fragprog_upload(nvfx, fp);
942 }
943
944 if (new_consts || fp->so != nvfx->state.hw[NVFX_STATE_FRAGPROG]) {
945 so_ref(fp->so, &nvfx->state.hw[NVFX_STATE_FRAGPROG]);
946 return TRUE;
947 }
948
949 return FALSE;
950 }
951
952 void
953 nvfx_fragprog_destroy(struct nvfx_context *nvfx,
954 struct nvfx_fragment_program *fp)
955 {
956 if (fp->buffer)
957 pipe_resource_reference(&fp->buffer, NULL);
958
959 if (fp->so)
960 so_ref(NULL, &fp->so);
961
962 if (fp->insn_len)
963 FREE(fp->insn);
964 }
965
966 struct nvfx_state_entry nvfx_state_fragprog = {
967 .validate = nvfx_fragprog_validate,
968 .dirty = {
969 .pipe = NVFX_NEW_FRAGPROG,
970 .hw = NVFX_STATE_FRAGPROG
971 }
972 };