nouveau: match gallium code reorginisation.
[mesa.git] / src / gallium / drivers / nv40 / nv40_fragprog.c
1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4
5 #include "pipe/p_shader_tokens.h"
6 #include "tgsi/util/tgsi_parse.h"
7 #include "tgsi/util/tgsi_util.h"
8
9 #include "nv40_context.h"
10
11 #define SWZ_X 0
12 #define SWZ_Y 1
13 #define SWZ_Z 2
14 #define SWZ_W 3
15 #define MASK_X 1
16 #define MASK_Y 2
17 #define MASK_Z 4
18 #define MASK_W 8
19 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W)
20 #define DEF_SCALE NV40_FP_OP_DST_SCALE_1X
21 #define DEF_CTEST NV40_FP_OP_COND_TR
22 #include "nv40_shader.h"
23
24 #define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w)
25 #define neg(s) nv40_sr_neg((s))
26 #define abs(s) nv40_sr_abs((s))
27 #define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v)
28
29 #define MAX_CONSTS 128
30 #define MAX_IMM 32
31 struct nv40_fpc {
32 struct nv40_fragment_program *fp;
33
34 uint attrib_map[PIPE_MAX_SHADER_INPUTS];
35
36 int high_temp;
37 int temp_temp_count;
38 int num_regs;
39
40 uint depth_id;
41 uint colour_id;
42
43 unsigned inst_offset;
44
45 struct {
46 int pipe;
47 float vals[4];
48 } consts[MAX_CONSTS];
49 int nr_consts;
50
51 struct nv40_sreg imm[MAX_IMM];
52 unsigned nr_imm;
53 };
54
55 static INLINE struct nv40_sreg
56 temp(struct nv40_fpc *fpc)
57 {
58 int idx;
59
60 idx = fpc->temp_temp_count++;
61 idx += fpc->high_temp + 1;
62 return nv40_sr(NV40SR_TEMP, idx);
63 }
64
65 static INLINE struct nv40_sreg
66 constant(struct nv40_fpc *fpc, int pipe, float vals[4])
67 {
68 int idx;
69
70 if (fpc->nr_consts == MAX_CONSTS)
71 assert(0);
72 idx = fpc->nr_consts++;
73
74 fpc->consts[idx].pipe = pipe;
75 if (pipe == -1)
76 memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float));
77 return nv40_sr(NV40SR_CONST, idx);
78 }
79
80 #define arith(cc,s,o,d,m,s0,s1,s2) \
81 nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \
82 (d), (m), (s0), (s1), (s2))
83 #define tex(cc,s,o,u,d,m,s0,s1,s2) \
84 nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \
85 (d), (m), (s0), none, none)
86
87 static void
88 grow_insns(struct nv40_fpc *fpc, int size)
89 {
90 struct nv40_fragment_program *fp = fpc->fp;
91
92 fp->insn_len += size;
93 fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len);
94 }
95
96 static void
97 emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
98 {
99 struct nv40_fragment_program *fp = fpc->fp;
100 uint32_t *hw = &fp->insn[fpc->inst_offset];
101 uint32_t sr = 0;
102
103 switch (src.type) {
104 case NV40SR_INPUT:
105 sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
106 hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT);
107 break;
108 case NV40SR_OUTPUT:
109 sr |= NV40_FP_REG_SRC_HALF;
110 /* fall-through */
111 case NV40SR_TEMP:
112 sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT);
113 sr |= (src.index << NV40_FP_REG_SRC_SHIFT);
114 break;
115 case NV40SR_CONST:
116 grow_insns(fpc, 4);
117 hw = &fp->insn[fpc->inst_offset];
118 if (fpc->consts[src.index].pipe >= 0) {
119 struct nv40_fragment_program_data *fpd;
120
121 fp->consts = realloc(fp->consts, ++fp->nr_consts *
122 sizeof(*fpd));
123 fpd = &fp->consts[fp->nr_consts - 1];
124 fpd->offset = fpc->inst_offset + 4;
125 fpd->index = fpc->consts[src.index].pipe;
126 memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4);
127 } else {
128 memcpy(&fp->insn[fpc->inst_offset + 4],
129 fpc->consts[src.index].vals,
130 sizeof(uint32_t) * 4);
131 }
132
133 sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
134 break;
135 case NV40SR_NONE:
136 sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
137 break;
138 default:
139 assert(0);
140 }
141
142 if (src.negate)
143 sr |= NV40_FP_REG_NEGATE;
144
145 if (src.abs)
146 hw[1] |= (1 << (29 + pos));
147
148 sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) |
149 (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) |
150 (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) |
151 (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT));
152
153 hw[pos + 1] |= sr;
154 }
155
156 static void
157 emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst)
158 {
159 struct nv40_fragment_program *fp = fpc->fp;
160 uint32_t *hw = &fp->insn[fpc->inst_offset];
161
162 switch (dst.type) {
163 case NV40SR_TEMP:
164 if (fpc->num_regs < (dst.index + 1))
165 fpc->num_regs = dst.index + 1;
166 break;
167 case NV40SR_OUTPUT:
168 if (dst.index == 1) {
169 fp->fp_control |= 0xe;
170 } else {
171 hw[0] |= NV40_FP_OP_OUT_REG_HALF;
172 }
173 break;
174 case NV40SR_NONE:
175 hw[0] |= (1 << 30);
176 break;
177 default:
178 assert(0);
179 }
180
181 hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT);
182 }
183
184 static void
185 nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op,
186 struct nv40_sreg dst, int mask,
187 struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
188 {
189 struct nv40_fragment_program *fp = fpc->fp;
190 uint32_t *hw;
191
192 fpc->inst_offset = fp->insn_len;
193 grow_insns(fpc, 4);
194 hw = &fp->insn[fpc->inst_offset];
195 memset(hw, 0, sizeof(uint32_t) * 4);
196
197 if (op == NV40_FP_OP_OPCODE_KIL)
198 fp->fp_control |= NV40TCL_FP_CONTROL_KIL;
199 hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT);
200 hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT);
201 hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT);
202
203 if (sat)
204 hw[0] |= NV40_FP_OP_OUT_SAT;
205
206 if (dst.cc_update)
207 hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE;
208 hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT);
209 hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) |
210 (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) |
211 (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) |
212 (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT));
213
214 emit_dst(fpc, dst);
215 emit_src(fpc, 0, s0);
216 emit_src(fpc, 1, s1);
217 emit_src(fpc, 2, s2);
218 }
219
220 static void
221 nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit,
222 struct nv40_sreg dst, int mask,
223 struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2)
224 {
225 struct nv40_fragment_program *fp = fpc->fp;
226
227 nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2);
228
229 fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT);
230 fp->samplers |= (1 << unit);
231 }
232
233 static INLINE struct nv40_sreg
234 tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
235 {
236 struct nv40_sreg src;
237
238 switch (fsrc->SrcRegister.File) {
239 case TGSI_FILE_INPUT:
240 src = nv40_sr(NV40SR_INPUT,
241 fpc->attrib_map[fsrc->SrcRegister.Index]);
242 break;
243 case TGSI_FILE_CONSTANT:
244 src = constant(fpc, fsrc->SrcRegister.Index, NULL);
245 break;
246 case TGSI_FILE_IMMEDIATE:
247 assert(fsrc->SrcRegister.Index < fpc->nr_imm);
248 src = fpc->imm[fsrc->SrcRegister.Index];
249 break;
250 case TGSI_FILE_TEMPORARY:
251 src = nv40_sr(NV40SR_TEMP, fsrc->SrcRegister.Index + 1);
252 if (fpc->high_temp < src.index)
253 fpc->high_temp = src.index;
254 break;
255 /* This is clearly insane, but gallium hands us shaders like this.
256 * Luckily fragprog results are just temp regs..
257 */
258 case TGSI_FILE_OUTPUT:
259 if (fsrc->SrcRegister.Index == fpc->colour_id)
260 return nv40_sr(NV40SR_OUTPUT, 0);
261 else
262 return nv40_sr(NV40SR_OUTPUT, 1);
263 break;
264 default:
265 NOUVEAU_ERR("bad src file\n");
266 break;
267 }
268
269 src.abs = fsrc->SrcRegisterExtMod.Absolute;
270 src.negate = fsrc->SrcRegister.Negate;
271 src.swz[0] = fsrc->SrcRegister.SwizzleX;
272 src.swz[1] = fsrc->SrcRegister.SwizzleY;
273 src.swz[2] = fsrc->SrcRegister.SwizzleZ;
274 src.swz[3] = fsrc->SrcRegister.SwizzleW;
275 return src;
276 }
277
278 static INLINE struct nv40_sreg
279 tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
280 int idx;
281
282 switch (fdst->DstRegister.File) {
283 case TGSI_FILE_OUTPUT:
284 if (fdst->DstRegister.Index == fpc->colour_id)
285 return nv40_sr(NV40SR_OUTPUT, 0);
286 else
287 return nv40_sr(NV40SR_OUTPUT, 1);
288 break;
289 case TGSI_FILE_TEMPORARY:
290 idx = fdst->DstRegister.Index + 1;
291 if (fpc->high_temp < idx)
292 fpc->high_temp = idx;
293 return nv40_sr(NV40SR_TEMP, idx);
294 case TGSI_FILE_NULL:
295 return nv40_sr(NV40SR_NONE, 0);
296 default:
297 NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
298 return nv40_sr(NV40SR_NONE, 0);
299 }
300 }
301
302 static INLINE int
303 tgsi_mask(uint tgsi)
304 {
305 int mask = 0;
306
307 if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X;
308 if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y;
309 if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z;
310 if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W;
311 return mask;
312 }
313
314 static boolean
315 src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc,
316 struct nv40_sreg *src)
317 {
318 const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
319 struct nv40_sreg tgsi = tgsi_src(fpc, fsrc);
320 uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
321 uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
322 fsrc->SrcRegisterExtSwz.NegateY,
323 fsrc->SrcRegisterExtSwz.NegateZ,
324 fsrc->SrcRegisterExtSwz.NegateW };
325 uint c;
326
327 for (c = 0; c < 4; c++) {
328 switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
329 case TGSI_EXTSWIZZLE_X:
330 case TGSI_EXTSWIZZLE_Y:
331 case TGSI_EXTSWIZZLE_Z:
332 case TGSI_EXTSWIZZLE_W:
333 mask |= (1 << c);
334 break;
335 case TGSI_EXTSWIZZLE_ZERO:
336 zero_mask |= (1 << c);
337 tgsi.swz[c] = SWZ_X;
338 break;
339 case TGSI_EXTSWIZZLE_ONE:
340 one_mask |= (1 << c);
341 tgsi.swz[c] = SWZ_X;
342 break;
343 default:
344 assert(0);
345 }
346
347 if (!tgsi.negate && neg[c])
348 neg_mask |= (1 << c);
349 }
350
351 if (mask == MASK_ALL && !neg_mask)
352 return TRUE;
353
354 *src = temp(fpc);
355
356 if (mask)
357 arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
358
359 if (zero_mask)
360 arith(fpc, 0, SFL, *src, zero_mask, *src, none, none);
361
362 if (one_mask)
363 arith(fpc, 0, STR, *src, one_mask, *src, none, none);
364
365 if (neg_mask) {
366 struct nv40_sreg one = temp(fpc);
367 arith(fpc, 0, STR, one, neg_mask, one, none, none);
368 arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none);
369 }
370
371 return FALSE;
372 }
373
374 static boolean
375 nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
376 const struct tgsi_full_instruction *finst)
377 {
378 const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
379 struct nv40_sreg src[3], dst, tmp;
380 int mask, sat, unit;
381 int ai = -1, ci = -1;
382 int i;
383
384 if (finst->Instruction.Opcode == TGSI_OPCODE_END)
385 return TRUE;
386
387 fpc->temp_temp_count = 0;
388 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
389 const struct tgsi_full_src_register *fsrc;
390
391 fsrc = &finst->FullSrcRegisters[i];
392 if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
393 src[i] = tgsi_src(fpc, fsrc);
394 }
395 }
396
397 for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
398 const struct tgsi_full_src_register *fsrc;
399
400 fsrc = &finst->FullSrcRegisters[i];
401
402 switch (fsrc->SrcRegister.File) {
403 case TGSI_FILE_INPUT:
404 case TGSI_FILE_CONSTANT:
405 case TGSI_FILE_TEMPORARY:
406 if (!src_native_swz(fpc, fsrc, &src[i]))
407 continue;
408 break;
409 default:
410 break;
411 }
412
413 switch (fsrc->SrcRegister.File) {
414 case TGSI_FILE_INPUT:
415 if (ai == -1 || ai == fsrc->SrcRegister.Index) {
416 ai = fsrc->SrcRegister.Index;
417 src[i] = tgsi_src(fpc, fsrc);
418 } else {
419 NOUVEAU_MSG("extra src attr %d\n",
420 fsrc->SrcRegister.Index);
421 src[i] = temp(fpc);
422 arith(fpc, 0, MOV, src[i], MASK_ALL,
423 tgsi_src(fpc, fsrc), none, none);
424 }
425 break;
426 case TGSI_FILE_CONSTANT:
427 case TGSI_FILE_IMMEDIATE:
428 if (ci == -1 || ci == fsrc->SrcRegister.Index) {
429 ci = fsrc->SrcRegister.Index;
430 src[i] = tgsi_src(fpc, fsrc);
431 } else {
432 src[i] = temp(fpc);
433 arith(fpc, 0, MOV, src[i], MASK_ALL,
434 tgsi_src(fpc, fsrc), none, none);
435 }
436 break;
437 case TGSI_FILE_TEMPORARY:
438 /* handled above */
439 break;
440 case TGSI_FILE_SAMPLER:
441 unit = fsrc->SrcRegister.Index;
442 break;
443 case TGSI_FILE_OUTPUT:
444 break;
445 default:
446 NOUVEAU_ERR("bad src file\n");
447 return FALSE;
448 }
449 }
450
451 dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
452 mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
453 sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
454
455 switch (finst->Instruction.Opcode) {
456 case TGSI_OPCODE_ABS:
457 arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none);
458 break;
459 case TGSI_OPCODE_ADD:
460 arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
461 break;
462 case TGSI_OPCODE_CMP:
463 tmp = temp(fpc);
464 arith(fpc, sat, MOV, dst, mask, src[2], none, none);
465 tmp.cc_update = 1;
466 arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
467 dst.cc_test = NV40_VP_INST_COND_LT;
468 arith(fpc, sat, MOV, dst, mask, src[1], none, none);
469 break;
470 case TGSI_OPCODE_COS:
471 arith(fpc, sat, COS, dst, mask, src[0], none, none);
472 break;
473 case TGSI_OPCODE_DP3:
474 arith(fpc, sat, DP3, dst, mask, src[0], src[1], none);
475 break;
476 case TGSI_OPCODE_DP4:
477 arith(fpc, sat, DP4, dst, mask, src[0], src[1], none);
478 break;
479 case TGSI_OPCODE_DPH:
480 tmp = temp(fpc);
481 arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none);
482 arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X),
483 swz(src[1], W, W, W, W), none);
484 break;
485 case TGSI_OPCODE_DST:
486 arith(fpc, sat, DST, dst, mask, src[0], src[1], none);
487 break;
488 case TGSI_OPCODE_EX2:
489 arith(fpc, sat, EX2, dst, mask, src[0], none, none);
490 break;
491 case TGSI_OPCODE_FLR:
492 arith(fpc, sat, FLR, dst, mask, src[0], none, none);
493 break;
494 case TGSI_OPCODE_FRC:
495 arith(fpc, sat, FRC, dst, mask, src[0], none, none);
496 break;
497 case TGSI_OPCODE_KIL:
498 arith(fpc, 0, KIL, none, 0, none, none, none);
499 break;
500 case TGSI_OPCODE_KILP:
501 dst = nv40_sr(NV40SR_NONE, 0);
502 dst.cc_update = 1;
503 arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none);
504 dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT;
505 arith(fpc, 0, KIL, dst, 0, none, none, none);
506 break;
507 case TGSI_OPCODE_LG2:
508 arith(fpc, sat, LG2, dst, mask, src[0], none, none);
509 break;
510 // case TGSI_OPCODE_LIT:
511 case TGSI_OPCODE_LRP:
512 tmp = temp(fpc);
513 arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
514 arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp);
515 break;
516 case TGSI_OPCODE_MAD:
517 arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]);
518 break;
519 case TGSI_OPCODE_MAX:
520 arith(fpc, sat, MAX, dst, mask, src[0], src[1], none);
521 break;
522 case TGSI_OPCODE_MIN:
523 arith(fpc, sat, MIN, dst, mask, src[0], src[1], none);
524 break;
525 case TGSI_OPCODE_MOV:
526 arith(fpc, sat, MOV, dst, mask, src[0], none, none);
527 break;
528 case TGSI_OPCODE_MUL:
529 arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
530 break;
531 case TGSI_OPCODE_POW:
532 tmp = temp(fpc);
533 arith(fpc, 0, LG2, tmp, MASK_X,
534 swz(src[0], X, X, X, X), none, none);
535 arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X),
536 swz(src[1], X, X, X, X), none);
537 arith(fpc, sat, EX2, dst, mask,
538 swz(tmp, X, X, X, X), none, none);
539 break;
540 case TGSI_OPCODE_RCP:
541 arith(fpc, sat, RCP, dst, mask, src[0], none, none);
542 break;
543 case TGSI_OPCODE_RET:
544 assert(0);
545 break;
546 case TGSI_OPCODE_RFL:
547 tmp = temp(fpc);
548 arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none);
549 arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none);
550 arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z,
551 swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none);
552 arith(fpc, sat, MAD, dst, mask,
553 swz(tmp, Z, Z, Z, Z), src[0], neg(src[1]));
554 break;
555 case TGSI_OPCODE_RSQ:
556 tmp = temp(fpc);
557 arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X,
558 abs(swz(src[0], X, X, X, X)), none, none);
559 arith(fpc, sat, EX2, dst, mask,
560 neg(swz(tmp, X, X, X, X)), none, none);
561 break;
562 case TGSI_OPCODE_SCS:
563 if (mask & MASK_X) {
564 arith(fpc, sat, COS, dst, MASK_X,
565 swz(src[0], X, X, X, X), none, none);
566 }
567 if (mask & MASK_Y) {
568 arith(fpc, sat, SIN, dst, MASK_Y,
569 swz(src[0], X, X, X, X), none, none);
570 }
571 break;
572 case TGSI_OPCODE_SIN:
573 arith(fpc, sat, SIN, dst, mask, src[0], none, none);
574 break;
575 case TGSI_OPCODE_SGE:
576 arith(fpc, sat, SGE, dst, mask, src[0], src[1], none);
577 break;
578 case TGSI_OPCODE_SLT:
579 arith(fpc, sat, SLT, dst, mask, src[0], src[1], none);
580 break;
581 case TGSI_OPCODE_SUB:
582 arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none);
583 break;
584 case TGSI_OPCODE_TEX:
585 if (finst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide ==
586 TGSI_EXTSWIZZLE_W) {
587 tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none);
588 } else
589 tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none);
590 break;
591 case TGSI_OPCODE_TXB:
592 tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none);
593 break;
594 case TGSI_OPCODE_XPD:
595 tmp = temp(fpc);
596 arith(fpc, 0, MUL, tmp, mask,
597 swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none);
598 arith(fpc, sat, MAD, dst, (mask & ~MASK_W),
599 swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y),
600 neg(tmp));
601 break;
602 default:
603 NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode);
604 return FALSE;
605 }
606
607 return TRUE;
608 }
609
610 static boolean
611 nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
612 const struct tgsi_full_declaration *fdec)
613 {
614 int hw;
615
616 switch (fdec->Semantic.SemanticName) {
617 case TGSI_SEMANTIC_POSITION:
618 hw = NV40_FP_OP_INPUT_SRC_POSITION;
619 break;
620 case TGSI_SEMANTIC_COLOR:
621 if (fdec->Semantic.SemanticIndex == 0) {
622 hw = NV40_FP_OP_INPUT_SRC_COL0;
623 } else
624 if (fdec->Semantic.SemanticIndex == 1) {
625 hw = NV40_FP_OP_INPUT_SRC_COL1;
626 } else {
627 NOUVEAU_ERR("bad colour semantic index\n");
628 return FALSE;
629 }
630 break;
631 case TGSI_SEMANTIC_FOG:
632 hw = NV40_FP_OP_INPUT_SRC_FOGC;
633 break;
634 case TGSI_SEMANTIC_GENERIC:
635 if (fdec->Semantic.SemanticIndex <= 7) {
636 hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic.
637 SemanticIndex);
638 } else {
639 NOUVEAU_ERR("bad generic semantic index\n");
640 return FALSE;
641 }
642 break;
643 default:
644 NOUVEAU_ERR("bad input semantic\n");
645 return FALSE;
646 }
647
648 fpc->attrib_map[fdec->u.DeclarationRange.First] = hw;
649 return TRUE;
650 }
651
652 static boolean
653 nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,
654 const struct tgsi_full_declaration *fdec)
655 {
656 switch (fdec->Semantic.SemanticName) {
657 case TGSI_SEMANTIC_POSITION:
658 fpc->depth_id = fdec->u.DeclarationRange.First;
659 break;
660 case TGSI_SEMANTIC_COLOR:
661 fpc->colour_id = fdec->u.DeclarationRange.First;
662 break;
663 default:
664 NOUVEAU_ERR("bad output semantic\n");
665 return FALSE;
666 }
667
668 return TRUE;
669 }
670
671 void
672 nv40_fragprog_translate(struct nv40_context *nv40,
673 struct nv40_fragment_program *fp)
674 {
675 struct tgsi_parse_context parse;
676 struct nv40_fpc *fpc = NULL;
677
678 fpc = CALLOC(1, sizeof(struct nv40_fpc));
679 if (!fpc)
680 return;
681 fpc->fp = fp;
682 fpc->high_temp = -1;
683 fpc->num_regs = 2;
684
685 tgsi_parse_init(&parse, fp->pipe->tokens);
686
687 while (!tgsi_parse_end_of_tokens(&parse)) {
688 tgsi_parse_token(&parse);
689
690 switch (parse.FullToken.Token.Type) {
691 case TGSI_TOKEN_TYPE_DECLARATION:
692 {
693 const struct tgsi_full_declaration *fdec;
694 fdec = &parse.FullToken.FullDeclaration;
695 switch (fdec->Declaration.File) {
696 case TGSI_FILE_INPUT:
697 if (!nv40_fragprog_parse_decl_attrib(fpc, fdec))
698 goto out_err;
699 break;
700 case TGSI_FILE_OUTPUT:
701 if (!nv40_fragprog_parse_decl_output(fpc, fdec))
702 goto out_err;
703 break;
704 default:
705 break;
706 }
707 }
708 break;
709 case TGSI_TOKEN_TYPE_IMMEDIATE:
710 {
711 struct tgsi_full_immediate *imm;
712 float vals[4];
713
714 imm = &parse.FullToken.FullImmediate;
715 assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
716 assert(fpc->nr_imm < MAX_IMM);
717
718 vals[0] = imm->u.ImmediateFloat32[0].Float;
719 vals[1] = imm->u.ImmediateFloat32[1].Float;
720 vals[2] = imm->u.ImmediateFloat32[2].Float;
721 vals[3] = imm->u.ImmediateFloat32[3].Float;
722 fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals);
723 }
724 break;
725 case TGSI_TOKEN_TYPE_INSTRUCTION:
726 {
727 const struct tgsi_full_instruction *finst;
728
729 finst = &parse.FullToken.FullInstruction;
730 if (!nv40_fragprog_parse_instruction(fpc, finst))
731 goto out_err;
732 }
733 break;
734 default:
735 break;
736 }
737 }
738
739 fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT;
740
741 /* Terminate final instruction */
742 fp->insn[fpc->inst_offset] |= 0x00000001;
743
744 /* Append NOP + END instruction, may or may not be necessary. */
745 fpc->inst_offset = fp->insn_len;
746 grow_insns(fpc, 4);
747 fp->insn[fpc->inst_offset + 0] = 0x00000001;
748 fp->insn[fpc->inst_offset + 1] = 0x00000000;
749 fp->insn[fpc->inst_offset + 2] = 0x00000000;
750 fp->insn[fpc->inst_offset + 3] = 0x00000000;
751
752 fp->translated = TRUE;
753 fp->on_hw = FALSE;
754 out_err:
755 tgsi_parse_free(&parse);
756 free(fpc);
757 }
758
759 void
760 nv40_fragprog_bind(struct nv40_context *nv40, struct nv40_fragment_program *fp)
761 {
762 struct pipe_winsys *ws = nv40->pipe.winsys;
763 struct nouveau_stateobj *so;
764 int i;
765
766 if (!fp->translated) {
767 nv40_fragprog_translate(nv40, fp);
768 if (!fp->translated)
769 assert(0);
770 }
771
772 if (fp->nr_consts) {
773 float *map = ws->buffer_map(ws, nv40->fragprog.constant_buf,
774 PIPE_BUFFER_USAGE_CPU_READ);
775 for (i = 0; i < fp->nr_consts; i++) {
776 struct nv40_fragment_program_data *fpd = &fp->consts[i];
777 uint32_t *p = &fp->insn[fpd->offset];
778 uint32_t *cb = (uint32_t *)&map[fpd->index * 4];
779
780 if (!memcmp(p, cb, 4 * sizeof(float)))
781 continue;
782 memcpy(p, cb, 4 * sizeof(float));
783 fp->on_hw = 0;
784 }
785 ws->buffer_unmap(ws, nv40->fragprog.constant_buf);
786 }
787
788 if (!fp->on_hw) {
789 const uint32_t le = 1;
790 uint32_t *map;
791
792 if (!fp->buffer)
793 fp->buffer = ws->buffer_create(ws, 0x100, 0,
794 fp->insn_len * 4);
795 map = ws->buffer_map(ws, fp->buffer,
796 PIPE_BUFFER_USAGE_CPU_WRITE);
797
798 #if 0
799 for (i = 0; i < fp->insn_len; i++) {
800 NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]);
801 }
802 #endif
803
804 if ((*(const uint8_t *)&le)) {
805 for (i = 0; i < fp->insn_len; i++) {
806 map[i] = fp->insn[i];
807 }
808 } else {
809 /* Weird swapping for big-endian chips */
810 for (i = 0; i < fp->insn_len; i++) {
811 map[i] = ((fp->insn[i] & 0xffff) << 16) |
812 ((fp->insn[i] >> 16) & 0xffff);
813 }
814 }
815
816 ws->buffer_unmap(ws, fp->buffer);
817 fp->on_hw = TRUE;
818 }
819
820 so = so_new(4, 1);
821 so_method(so, nv40->hw->curie, NV40TCL_FP_ADDRESS, 1);
822 so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
823 NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
824 NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1);
825 so_method(so, nv40->hw->curie, NV40TCL_FP_CONTROL, 1);
826 so_data (so, fp->fp_control);
827
828 so_emit(nv40->nvws, so);
829 so_ref(so, &fp->so);
830 so_ref(NULL, &so);
831
832 nv40->fragprog.active = fp;
833 }
834
835 void
836 nv40_fragprog_destroy(struct nv40_context *nv40,
837 struct nv40_fragment_program *fp)
838 {
839 if (fp->insn_len)
840 free(fp->insn);
841 }
842