disable assertion
[mesa.git] / src / mesa / drivers / dri / nouveau / nouveau_shader_0.c
1 /*
2 * Copyright (C) 2006 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /*
29 * Authors:
30 * Ben Skeggs <darktama@iinet.net.au>
31 */
32
33 #include "glheader.h"
34 #include "macros.h"
35 #include "enums.h"
36
37 #include "shader/prog_instruction.h"
38 #include "shader/prog_parameter.h"
39 #include "shader/prog_statevars.h"
40 #include "shader/programopt.h"
41
42 #include "nouveau_context.h"
43 #include "nouveau_shader.h"
44 #include "nouveau_msg.h"
45
46 static nvsFixedReg _tx_mesa_vp_dst_reg[VERT_RESULT_MAX] = {
47 NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD,
48 NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3,
49 NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7,
50 NVS_FR_POINTSZ, NVS_FR_BFC0, NVS_FR_BFC1, NVS_FR_UNKNOWN /* EDGE */
51 };
52
53 static nvsFixedReg _tx_mesa_fp_dst_reg[FRAG_RESULT_MAX] = {
54 NVS_FR_FRAGDATA0 /* COLR */, NVS_FR_FRAGDATA0 /* COLH */,
55 NVS_FR_UNKNOWN /* DEPR */
56 };
57
58 static nvsFixedReg _tx_mesa_fp_src_reg[FRAG_ATTRIB_MAX] = {
59 NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD,
60 NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3,
61 NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7
62 };
63
64 static nvsSwzComp _tx_mesa_swizzle[4] = {
65 NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W
66 };
67
68 static nvsOpcode _tx_mesa_opcode[] = {
69 [OPCODE_ABS] = NVS_OP_ABS, [OPCODE_ADD] = NVS_OP_ADD,
70 [OPCODE_ARA] = NVS_OP_ARA, [OPCODE_ARL] = NVS_OP_ARL,
71 [OPCODE_ARL_NV] = NVS_OP_ARL, [OPCODE_ARR] = NVS_OP_ARR,
72 [OPCODE_CMP] = NVS_OP_CMP, [OPCODE_COS] = NVS_OP_COS,
73 [OPCODE_DDX] = NVS_OP_DDX, [OPCODE_DDY] = NVS_OP_DDY,
74 [OPCODE_DP3] = NVS_OP_DP3, [OPCODE_DP4] = NVS_OP_DP4,
75 [OPCODE_DPH] = NVS_OP_DPH, [OPCODE_DST] = NVS_OP_DST,
76 [OPCODE_EX2] = NVS_OP_EX2, [OPCODE_EXP] = NVS_OP_EXP,
77 [OPCODE_FLR] = NVS_OP_FLR, [OPCODE_FRC] = NVS_OP_FRC,
78 [OPCODE_KIL] = NVS_OP_EMUL, [OPCODE_KIL_NV] = NVS_OP_KIL,
79 [OPCODE_LG2] = NVS_OP_LG2, [OPCODE_LIT] = NVS_OP_LIT,
80 [OPCODE_LOG] = NVS_OP_LOG,
81 [OPCODE_LRP] = NVS_OP_LRP,
82 [OPCODE_MAD] = NVS_OP_MAD, [OPCODE_MAX] = NVS_OP_MAX,
83 [OPCODE_MIN] = NVS_OP_MIN, [OPCODE_MOV] = NVS_OP_MOV,
84 [OPCODE_MUL] = NVS_OP_MUL,
85 [OPCODE_PK2H] = NVS_OP_PK2H, [OPCODE_PK2US] = NVS_OP_PK2US,
86 [OPCODE_PK4B] = NVS_OP_PK4B, [OPCODE_PK4UB] = NVS_OP_PK4UB,
87 [OPCODE_POW] = NVS_OP_POW, [OPCODE_POPA] = NVS_OP_POPA,
88 [OPCODE_PUSHA] = NVS_OP_PUSHA,
89 [OPCODE_RCC] = NVS_OP_RCC, [OPCODE_RCP] = NVS_OP_RCP,
90 [OPCODE_RFL] = NVS_OP_RFL, [OPCODE_RSQ] = NVS_OP_RSQ,
91 [OPCODE_SCS] = NVS_OP_SCS, [OPCODE_SEQ] = NVS_OP_SEQ,
92 [OPCODE_SFL] = NVS_OP_SFL, [OPCODE_SGE] = NVS_OP_SGE,
93 [OPCODE_SGT] = NVS_OP_SGT, [OPCODE_SIN] = NVS_OP_SIN,
94 [OPCODE_SLE] = NVS_OP_SLE, [OPCODE_SLT] = NVS_OP_SLT,
95 [OPCODE_SNE] = NVS_OP_SNE, [OPCODE_SSG] = NVS_OP_SSG,
96 [OPCODE_STR] = NVS_OP_STR, [OPCODE_SUB] = NVS_OP_SUB,
97 [OPCODE_SWZ] = NVS_OP_MOV,
98 [OPCODE_TEX] = NVS_OP_TEX, [OPCODE_TXB] = NVS_OP_TXB,
99 [OPCODE_TXD] = NVS_OP_TXD,
100 [OPCODE_TXL] = NVS_OP_TXL, [OPCODE_TXP] = NVS_OP_TXP,
101 [OPCODE_TXP_NV] = NVS_OP_TXP,
102 [OPCODE_UP2H] = NVS_OP_UP2H, [OPCODE_UP2US] = NVS_OP_UP2US,
103 [OPCODE_UP4B] = NVS_OP_UP4B, [OPCODE_UP4UB] = NVS_OP_UP4UB,
104 [OPCODE_X2D] = NVS_OP_X2D,
105 [OPCODE_XPD] = NVS_OP_XPD
106 };
107
108 static nvsCond _tx_mesa_condmask[] = {
109 NVS_COND_TR, /* workaround mesa not filling a valid value */
110 NVS_COND_GT, NVS_COND_LT, NVS_COND_UN, NVS_COND_GE,
111 NVS_COND_LE, NVS_COND_NE, NVS_COND_NE, NVS_COND_TR, NVS_COND_FL
112 };
113
114 struct pass0_rec {
115 int nvs_ipos;
116 int next_temp;
117
118 int mesa_const_base;
119 int mesa_const_last;
120
121 int swzconst_done;
122 int swzconst_id;
123 nvsRegister const_half;
124 };
125
126 #define X NVS_SWZ_X
127 #define Y NVS_SWZ_Y
128 #define Z NVS_SWZ_Z
129 #define W NVS_SWZ_W
130
131 #define FILL_CONDITION_FLAGS(fragment) do { \
132 (fragment)->cond = \
133 pass0_make_condmask(inst->DstReg.CondMask); \
134 if ((fragment)->cond != NVS_COND_TR) \
135 (fragment)->cond_test = 1; \
136 (fragment)->cond_reg = inst->CondDst; \
137 pass0_make_swizzle((fragment)->cond_swizzle, inst->DstReg.CondSwizzle);\
138 } while(0)
139
140 #define ARITH(op,dest,mask,sat,s0,s1,s2) do { \
141 nvsinst = pass0_emit(nvs, parent, fpos, (op), \
142 (dest), (mask), (sat), (s0), (s1), (s2));\
143 FILL_CONDITION_FLAGS(nvsinst); \
144 } while(0)
145
146 #define ARITHu(op,dest,mask,sat,s0,s1,s2) do { \
147 nvsinst = pass0_emit(nvs, parent, fpos, (op), \
148 (dest), (mask), (sat), (s0), (s1), (s2));\
149 } while(0)
150
151 static void
152 pass0_append_fragment(nvsFragmentHeader *parent,
153 nvsFragmentHeader *fragment,
154 int pos)
155 {
156 nvsFragmentHeader **head, **tail;
157 assert(parent && fragment);
158
159 switch (parent->type) {
160 case NVS_BRANCH:
161 if (pos == 0) {
162 head = &((nvsBranch *)parent)->target_head;
163 tail = &((nvsBranch *)parent)->target_tail;
164 } else {
165 head = &((nvsBranch *)parent)->else_head;
166 tail = &((nvsBranch *)parent)->else_tail;
167 }
168 break;
169 case NVS_LOOP:
170 head = &((nvsLoop *)parent)->insn_head;
171 tail = &((nvsLoop *)parent)->insn_tail;
172 break;
173 case NVS_SUBROUTINE:
174 head = &((nvsSubroutine *)parent)->insn_head;
175 tail = &((nvsSubroutine *)parent)->insn_tail;
176 break;
177 default:
178 assert(0);
179 break;
180 }
181
182 fragment->parent = parent;
183 fragment->prev = *tail;
184 fragment->next = NULL;
185 if (!(*head))
186 *head = fragment;
187 else
188 (*tail)->next = fragment;
189 *tail = fragment;
190
191 }
192
193 static nvsSubroutine *
194 pass0_create_subroutine(nouveauShader *nvs, const char *label)
195 {
196 nvsSubroutine *sub;
197
198 sub = CALLOC_STRUCT(nvs_subroutine);
199 if (sub) {
200 sub->header.type = NVS_SUBROUTINE;
201 sub->label = strdup(label);
202 if (!nvs->program_tree)
203 nvs->program_tree = &sub->header;
204 else
205 pass0_append_fragment(nvs->program_tree,
206 &sub->header, 0);
207 }
208
209 return sub;
210 }
211
212 static void
213 pass0_make_reg(nouveauShader *nvs, nvsRegister *reg,
214 nvsRegFile file, unsigned int index)
215 {
216 struct pass0_rec *rec = nvs->pass_rec;
217
218 /* defaults */
219 *reg = nvr_unused;
220 /* -1 == quick-and-dirty temp alloc */
221 if (file == NVS_FILE_TEMP && index == -1) {
222 index = rec->next_temp++;
223 assert(index < NVS_MAX_TEMPS);
224 }
225 reg->file = file;
226 reg->index = index;
227 }
228
229 static void
230 pass0_make_swizzle(nvsSwzComp *swz, unsigned int mesa)
231 {
232 int i;
233
234 for (i=0;i<4;i++)
235 swz[i] = _tx_mesa_swizzle[GET_SWZ(mesa, i)];
236 }
237
238 static nvsOpcode
239 pass0_make_opcode(enum prog_opcode op)
240 {
241 if (op > MAX_OPCODE)
242 return NVS_OP_UNKNOWN;
243 return _tx_mesa_opcode[op];
244 }
245
246 static nvsCond
247 pass0_make_condmask(GLuint mesa)
248 {
249 if (mesa > COND_FL)
250 return NVS_COND_UNKNOWN;
251 return _tx_mesa_condmask[mesa];
252 }
253
254 static unsigned int
255 pass0_make_mask(GLuint mesa_mask)
256 {
257 unsigned int mask = 0;
258
259 if (mesa_mask & WRITEMASK_X) mask |= SMASK_X;
260 if (mesa_mask & WRITEMASK_Y) mask |= SMASK_Y;
261 if (mesa_mask & WRITEMASK_Z) mask |= SMASK_Z;
262 if (mesa_mask & WRITEMASK_W) mask |= SMASK_W;
263
264 return mask;
265 }
266
267 static nvsTexTarget
268 pass0_make_tex_target(GLuint mesa)
269 {
270 switch (mesa) {
271 case TEXTURE_1D_INDEX: return NVS_TEX_TARGET_1D;
272 case TEXTURE_2D_INDEX: return NVS_TEX_TARGET_2D;
273 case TEXTURE_3D_INDEX: return NVS_TEX_TARGET_3D;
274 case TEXTURE_CUBE_INDEX: return NVS_TEX_TARGET_CUBE;
275 case TEXTURE_RECT_INDEX: return NVS_TEX_TARGET_RECT;
276 default:
277 return NVS_TEX_TARGET_UNKNOWN;
278 }
279 }
280
281 static void
282 pass0_make_dst_reg(nvsPtr nvs, nvsRegister *reg,
283 struct prog_dst_register *dst)
284 {
285 struct gl_program *mesa = (struct gl_program*)&nvs->mesa.vp;
286 nvsFixedReg sfr;
287
288 switch (dst->File) {
289 case PROGRAM_OUTPUT:
290 if (mesa->Target == GL_VERTEX_PROGRAM_ARB) {
291 sfr = (dst->Index < VERT_RESULT_MAX) ?
292 _tx_mesa_vp_dst_reg[dst->Index] :
293 NVS_FR_UNKNOWN;
294 } else {
295 sfr = (dst->Index < FRAG_RESULT_MAX) ?
296 _tx_mesa_fp_dst_reg[dst->Index] :
297 NVS_FR_UNKNOWN;
298 }
299 pass0_make_reg(nvs, reg, NVS_FILE_RESULT, sfr);
300 break;
301 case PROGRAM_TEMPORARY:
302 pass0_make_reg(nvs, reg, NVS_FILE_TEMP, dst->Index);
303 break;
304 case PROGRAM_ADDRESS:
305 pass0_make_reg(nvs, reg, NVS_FILE_ADDRESS, dst->Index);
306 break;
307 default:
308 fprintf(stderr, "Unknown dest file %d\n", dst->File);
309 assert(0);
310 }
311 }
312
313 static void
314 pass0_make_src_reg(nvsPtr nvs, nvsRegister *reg, struct prog_src_register *src)
315 {
316 struct pass0_rec *rec = nvs->pass_rec;
317 struct gl_program *mesa = (struct gl_program *)&nvs->mesa.vp.Base;
318 int i;
319
320 *reg = nvr_unused;
321
322 switch (src->File) {
323 case PROGRAM_INPUT:
324 reg->file = NVS_FILE_ATTRIB;
325 if (mesa->Target == GL_VERTEX_PROGRAM_ARB) {
326 for (i=0; i<NVS_MAX_ATTRIBS; i++) {
327 if (nvs->vp_attrib_map[i] == src->Index) {
328 reg->index = i;
329 break;
330 }
331 }
332 if (i==NVS_MAX_ATTRIBS)
333 reg->index = NVS_FR_UNKNOWN;
334 } else {
335 reg->index = (src->Index < FRAG_ATTRIB_MAX) ?
336 _tx_mesa_fp_src_reg[src->Index] :
337 NVS_FR_UNKNOWN;
338 }
339 break;
340 case PROGRAM_STATE_VAR:
341 case PROGRAM_NAMED_PARAM:
342 case PROGRAM_CONSTANT:
343 reg->file = NVS_FILE_CONST;
344 reg->index = src->Index + rec->mesa_const_base;
345 reg->indexed = src->RelAddr;
346 if (reg->indexed) {
347 reg->addr_reg = 0;
348 reg->addr_comp = NVS_SWZ_X;
349 }
350 break;
351 case PROGRAM_TEMPORARY:
352 reg->file = NVS_FILE_TEMP;
353 reg->index = src->Index;
354 break;
355 default:
356 fprintf(stderr, "Unknown source type %d\n", src->File);
357 assert(0);
358 }
359
360 /* per-component negate handled elsewhere */
361 reg->negate = src->NegateBase != 0;
362 reg->abs = src->Abs;
363 pass0_make_swizzle(reg->swizzle, src->Swizzle);
364 }
365
366 static nvsInstruction *
367 pass0_emit(nouveauShader *nvs, nvsFragmentHeader *parent, int fpos,
368 nvsOpcode op, nvsRegister dst,
369 unsigned int mask, int saturate,
370 nvsRegister src0, nvsRegister src1, nvsRegister src2)
371 {
372 nvsInstruction *sif;
373
374 sif = CALLOC_STRUCT(nvs_instruction);
375 if (!sif)
376 return NULL;
377
378 /* Seems mesa doesn't explicitly 0 this.. */
379 if (nvs->mesa.vp.Base.Target == GL_VERTEX_PROGRAM_ARB)
380 saturate = 0;
381
382 sif->op = op;
383 sif->saturate = saturate;
384 sif->dest = dst;
385 sif->mask = mask;
386 sif->dest_scale = NVS_SCALE_1X;
387 sif->src[0] = src0;
388 sif->src[1] = src1;
389 sif->src[2] = src2;
390 sif->cond = COND_TR;
391 sif->cond_reg = 0;
392 sif->cond_test = 0;
393 sif->cond_update= 0;
394 pass0_make_swizzle(sif->cond_swizzle, SWIZZLE_NOOP);
395 pass0_append_fragment(parent, &sif->header, fpos);
396
397 return sif;
398 }
399
400 static void
401 pass0_fixup_swizzle(nvsPtr nvs, nvsFragmentHeader *parent, int fpos,
402 struct prog_src_register *src,
403 unsigned int sm1,
404 unsigned int sm2)
405 {
406 static const float sc[4] = { 1.0, 0.0, -1.0, 0.0 };
407 struct pass0_rec *rec = nvs->pass_rec;
408 int fixup_1, fixup_2;
409 nvsInstruction *nvsinst;
410 nvsRegister sr, dr = nvr_unused;
411 nvsRegister sm1const, sm2const;
412
413 if (!rec->swzconst_done) {
414 struct gl_program *prog = &nvs->mesa.vp.Base;
415 GLuint swizzle;
416 rec->swzconst_id = _mesa_add_unnamed_constant(prog->Parameters,
417 sc, 4, &swizzle);
418 /* XXX what about swizzle? */
419 rec->swzconst_done = 1;
420 COPY_4V(nvs->params[rec->swzconst_id].val, sc);
421 }
422
423 fixup_1 = (sm1 != MAKE_SWIZZLE4(0,0,0,0) &&
424 sm2 != MAKE_SWIZZLE4(2,2,2,2));
425 fixup_2 = (sm2 != MAKE_SWIZZLE4(2,2,2,2));
426
427 if (src->File != PROGRAM_TEMPORARY && src->File != PROGRAM_INPUT) {
428 /* We can't use more than one const in an instruction,
429 * so move the const into a temp, and swizzle from there.
430 *
431 * TODO: should just emit the swizzled const, instead of
432 * swizzling it in the shader.. would need to reswizzle
433 * any state params when they change however..
434 */
435 pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1);
436 pass0_make_src_reg(nvs, &sr, src);
437 ARITHu(NVS_OP_MOV, dr, SMASK_ALL, 0,
438 sr, nvr_unused, nvr_unused);
439 pass0_make_reg(nvs, &sr, NVS_FILE_TEMP, dr.index);
440 } else {
441 if (fixup_1)
442 src->NegateBase = 0;
443 pass0_make_src_reg(nvs, &sr, src);
444 pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1);
445 }
446
447 pass0_make_reg(nvs, &sm1const, NVS_FILE_CONST, rec->swzconst_id);
448 pass0_make_swizzle(sm1const.swizzle, sm1);
449 if (fixup_1 && fixup_2) {
450 /* Any combination with SWIZZLE_ONE */
451 pass0_make_reg(nvs, &sm2const,
452 NVS_FILE_CONST, rec->swzconst_id);
453 pass0_make_swizzle(sm2const.swizzle, sm2);
454 ARITHu(NVS_OP_MAD, dr, SMASK_ALL, 0, sr, sm1const, sm2const);
455 } else {
456 /* SWIZZLE_ZERO || arbitrary negate */
457 ARITHu(NVS_OP_MUL, dr, SMASK_ALL, 0, sr, sm1const, nvr_unused);
458 }
459
460 src->File = PROGRAM_TEMPORARY;
461 src->Index = dr.index;
462 src->Swizzle = SWIZZLE_NOOP;
463 }
464
465 #define SET_SWZ(fs, cp, c) fs = (fs & ~(0x7<<(cp*3))) | (c<<(cp*3))
466 static void
467 pass0_check_sources(nvsPtr nvs, nvsFragmentHeader *parent, int fpos,
468 struct prog_instruction *inst)
469 {
470 unsigned int insrc = -1, constsrc = -1;
471 int i;
472
473 for (i=0;i<_mesa_num_inst_src_regs(inst->Opcode);i++) {
474 struct prog_src_register *src = &inst->SrcReg[i];
475 unsigned int sm_1 = 0, sm_2 = 0;
476 nvsRegister sr, dr;
477 int do_mov = 0, c;
478
479 /* Build up swizzle masks as if we were going to use
480 * "MAD new, src, const1, const2" to support arbitrary negation
481 * and SWIZZLE_ZERO/SWIZZLE_ONE.
482 */
483 for (c=0;c<4;c++) {
484 if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ZERO) {
485 SET_SWZ(sm_1, c, SWIZZLE_Y); /* 0.0 */
486 SET_SWZ(sm_2, c, SWIZZLE_Y);
487 SET_SWZ(src->Swizzle, c, SWIZZLE_X);
488 } else if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ONE) {
489 SET_SWZ(sm_1, c, SWIZZLE_Y);
490 if (src->NegateBase & (1<<c))
491 SET_SWZ(sm_2, c, SWIZZLE_Z); /* -1.0 */
492 else
493 SET_SWZ(sm_2, c, SWIZZLE_X); /* 1.0 */
494 SET_SWZ(src->Swizzle, c, SWIZZLE_X);
495 } else {
496 if (src->NegateBase & (1<<c))
497 SET_SWZ(sm_1, c, SWIZZLE_Z); /* -[xyzw] */
498 else
499 SET_SWZ(sm_1, c, SWIZZLE_X); /*[xyzw]*/
500 SET_SWZ(sm_2, c, SWIZZLE_Y);
501 }
502 }
503
504 /* Unless we're multiplying by 1.0 or -1.0 on all components,
505 * and we're adding nothing to any component we have to
506 * emulate the swizzle.
507 */
508 if ((sm_1 != MAKE_SWIZZLE4(0,0,0,0) &&
509 sm_1 != MAKE_SWIZZLE4(2,2,2,2)) ||
510 sm_2 != MAKE_SWIZZLE4(1,1,1,1)) {
511 pass0_fixup_swizzle(nvs, parent, fpos, src, sm_1, sm_2);
512 /* The source is definitely in a temp now, so don't
513 * bother checking for multiple ATTRIB/CONST regs.
514 */
515 continue;
516 }
517
518 /* HW can't use more than one ATTRIB or PARAM in a single
519 * instruction */
520 switch (src->File) {
521 case PROGRAM_INPUT:
522 if (insrc != -1 && insrc != src->Index)
523 do_mov = 1;
524 else insrc = src->Index;
525 break;
526 case PROGRAM_STATE_VAR:
527 if (constsrc != -1 && constsrc != src->Index)
528 do_mov = 1;
529 else constsrc = src->Index;
530 break;
531 default:
532 break;
533 }
534
535 /* Emit any extra ATTRIB/CONST to a temp, and modify the Mesa
536 * instruction to point at the temp.
537 */
538 if (do_mov) {
539 pass0_make_src_reg(nvs, &sr, src);
540 pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1);
541 pass0_emit(nvs, parent, fpos, NVS_OP_MOV,
542 dr, SMASK_ALL, 0,
543 sr, nvr_unused, nvr_unused);
544
545 src->File = PROGRAM_TEMPORARY;
546 src->Index = dr.index;
547 src->Swizzle= SWIZZLE_NOOP;
548 }
549 }
550 }
551
552 static GLboolean
553 pass0_emulate_instruction(nouveauShader *nvs,
554 nvsFragmentHeader *parent, int fpos,
555 struct prog_instruction *inst)
556 {
557 nvsFunc *shader = nvs->func;
558 nvsRegister src[3], dest, temp;
559 nvsInstruction *nvsinst;
560 unsigned int mask = pass0_make_mask(inst->DstReg.WriteMask);
561 int i, sat;
562
563 sat = (inst->SaturateMode == SATURATE_ZERO_ONE);
564
565 /* Build all the "real" regs for the instruction */
566 for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++)
567 pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]);
568 if (inst->Opcode != OPCODE_KIL)
569 pass0_make_dst_reg(nvs, &dest, &inst->DstReg);
570
571 switch (inst->Opcode) {
572 case OPCODE_ABS:
573 if (shader->caps & SCAP_SRC_ABS)
574 ARITH(NVS_OP_MOV, dest, mask, sat,
575 nvsAbs(src[0]), nvr_unused, nvr_unused);
576 else
577 ARITH(NVS_OP_MAX, dest, mask, sat,
578 src[0], nvsNegate(src[0]), nvr_unused);
579 break;
580 case OPCODE_CMP:
581 /*XXX: this will clobber CC0... */
582 ARITH (NVS_OP_MOV, dest, mask, sat,
583 src[2], nvr_unused, nvr_unused);
584 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
585 ARITHu(NVS_OP_MOV, temp, SMASK_ALL, 0,
586 src[0], nvr_unused, nvr_unused);
587 nvsinst->cond_update = 1;
588 nvsinst->cond_reg = 0;
589 ARITH (NVS_OP_MOV, dest, mask, sat,
590 src[1], nvr_unused, nvr_unused);
591 nvsinst->cond = COND_LT;
592 nvsinst->cond_reg = 0;
593 nvsinst->cond_test = 1;
594 break;
595 case OPCODE_DPH:
596 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
597 ARITHu(NVS_OP_DP3, temp, SMASK_X, 0,
598 src[0], src[1], nvr_unused);
599 ARITH (NVS_OP_ADD, dest, mask, sat,
600 nvsSwizzle(temp, X, X, X, X),
601 nvsSwizzle(src[1], W, W, W, W),
602 nvr_unused);
603 break;
604 case OPCODE_KIL:
605 /* This is only in ARB shaders, so we don't have to worry
606 * about clobbering a CC reg as they aren't supported anyway.
607 *XXX: might have to worry with GLSL however...
608 */
609 /* MOVC0 temp, src */
610 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
611 ARITHu(NVS_OP_MOV, temp, SMASK_ALL, 0,
612 src[0], nvr_unused, nvr_unused);
613 nvsinst->cond_update = 1;
614 nvsinst->cond_reg = 0;
615 /* KIL_NV (LT0.xyzw) temp */
616 ARITHu(NVS_OP_KIL, nvr_unused, 0, 0,
617 nvr_unused, nvr_unused, nvr_unused);
618 nvsinst->cond = COND_LT;
619 nvsinst->cond_reg = 0;
620 nvsinst->cond_test = 1;
621 pass0_make_swizzle(nvsinst->cond_swizzle, SWIZZLE_NOOP);
622 break;
623 case OPCODE_LRP:
624 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
625 ARITHu(NVS_OP_MAD, temp, mask, 0,
626 nvsNegate(src[0]), src[2], src[2]);
627 ARITH (NVS_OP_MAD, dest, mask, sat, src[0], src[1], temp);
628 break;
629 case OPCODE_POW:
630 if (shader->SupportsOpcode(shader, NVS_OP_LG2) &&
631 shader->SupportsOpcode(shader, NVS_OP_EX2)) {
632 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
633 /* LG2 temp.x, src0.c */
634 ARITHu(NVS_OP_LG2, temp, SMASK_X, 0,
635 nvsSwizzle(src[0], X, X, X, X),
636 nvr_unused, nvr_unused);
637 /* MUL temp.x, temp.x, src1.c */
638 ARITHu(NVS_OP_MUL, temp, SMASK_X, 0,
639 nvsSwizzle(temp, X, X, X, X),
640 nvsSwizzle(src[1], X, X, X, X),
641 nvr_unused);
642 /* EX2 dest, temp.x */
643 ARITH (NVS_OP_EX2, dest, mask, sat,
644 nvsSwizzle(temp, X, X, X, X),
645 nvr_unused, nvr_unused);
646 } else {
647 /* can we use EXP/LOG instead of EX2/LG2?? */
648 fprintf(stderr, "Implement POW for NV20 vtxprog!\n");
649 return GL_FALSE;
650 }
651 break;
652 case OPCODE_RSQ:
653 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
654 ARITHu(NVS_OP_LG2, temp, SMASK_X, 0,
655 nvsAbs(nvsSwizzle(src[0], X, X, X, X)),
656 nvr_unused, nvr_unused);
657 nvsinst->dest_scale = NVS_SCALE_INV_2X;
658 ARITH (NVS_OP_EX2, dest, mask, sat,
659 nvsNegate(nvsSwizzle(temp, X, X, X, X)),
660 nvr_unused, nvr_unused);
661 break;
662 case OPCODE_SCS:
663 if (mask & SMASK_X)
664 ARITH(NVS_OP_COS, dest, SMASK_X, sat,
665 nvsSwizzle(src[0], X, X, X, X),
666 nvr_unused, nvr_unused);
667 if (mask & SMASK_Y)
668 ARITH(NVS_OP_SIN, dest, SMASK_Y, sat,
669 nvsSwizzle(src[0], X, X, X, X),
670 nvr_unused, nvr_unused);
671 break;
672 case OPCODE_SUB:
673 ARITH(NVS_OP_ADD, dest, mask, sat,
674 src[0], nvsNegate(src[1]), nvr_unused);
675 break;
676 case OPCODE_XPD:
677 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
678 ARITHu(NVS_OP_MUL, temp, SMASK_ALL, 0,
679 nvsSwizzle(src[0], Z, X, Y, Y),
680 nvsSwizzle(src[1], Y, Z, X, X),
681 nvr_unused);
682 ARITH (NVS_OP_MAD, dest, (mask & ~SMASK_W), sat,
683 nvsSwizzle(src[0], Y, Z, X, X),
684 nvsSwizzle(src[1], Z, X, Y, Y),
685 nvsNegate(temp));
686 break;
687 default:
688 WARN_ONCE("hw doesn't support opcode \"%s\","
689 "and no emulation found\n",
690 _mesa_opcode_string(inst->Opcode));
691 return GL_FALSE;
692 }
693
694 return GL_TRUE;
695 }
696
697 static GLboolean
698 pass0_translate_arith(nouveauShader *nvs, struct gl_program *prog,
699 int ipos, int fpos,
700 nvsFragmentHeader *parent)
701 {
702 struct prog_instruction *inst = &prog->Instructions[ipos];
703 nvsFunc *shader = nvs->func;
704 nvsInstruction *nvsinst;
705 GLboolean ret;
706
707 /* Deal with multiple ATTRIB/PARAM in a single instruction */
708 pass0_check_sources(nvs, parent, fpos, inst);
709
710 /* Now it's safe to do the prog_instruction->nvsInstruction
711 * conversion
712 */
713 if (shader->SupportsOpcode(shader,
714 pass0_make_opcode(inst->Opcode))) {
715 nvsRegister src[3], dest;
716 int i;
717
718 for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++)
719 pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]);
720 pass0_make_dst_reg(nvs, &dest, &inst->DstReg);
721
722 ARITH(pass0_make_opcode(inst->Opcode), dest,
723 pass0_make_mask(inst->DstReg.WriteMask),
724 (inst->SaturateMode != SATURATE_OFF),
725 src[0], src[1], src[2]);
726 nvsinst->tex_unit = inst->TexSrcUnit;
727 nvsinst->tex_target = pass0_make_tex_target(inst->TexSrcTarget);
728
729 ret = GL_TRUE;
730 } else
731 ret = pass0_emulate_instruction(nvs, parent, fpos, inst);
732
733 return ret;
734 }
735
736 static GLboolean
737 pass0_translate_instructions(nouveauShader *nvs, int ipos, int fpos,
738 nvsFragmentHeader *parent)
739 {
740 struct gl_program *prog = (struct gl_program *)&nvs->mesa.vp;
741
742 while (1) {
743 struct prog_instruction *inst = &prog->Instructions[ipos];
744
745 switch (inst->Opcode) {
746 case OPCODE_END:
747 return GL_TRUE;
748 case OPCODE_BRA:
749 case OPCODE_CAL:
750 case OPCODE_RET:
751 //case OPCODE_LOOP:
752 //case OPCODE_ENDLOOP:
753 //case OPCODE_IF:
754 //case OPCODE_ELSE:
755 //case OPCODE_ENDIF:
756 WARN_ONCE("branch ops unimplemented\n");
757 return GL_FALSE;
758 break;
759 default:
760 if (!pass0_translate_arith(nvs, prog,
761 ipos, fpos, parent))
762 return GL_FALSE;
763 break;
764 }
765
766 ipos++;
767 }
768
769 return GL_TRUE;
770 }
771
772 static void
773 pass0_build_attrib_map(nouveauShader *nvs, struct gl_vertex_program *vp)
774 {
775 GLuint inputs_read = vp->Base.InputsRead;
776 GLuint input_alloc = ~0xFFFF;
777 int i;
778
779 for (i=0; i<NVS_MAX_ATTRIBS; i++)
780 nvs->vp_attrib_map[i] = -1;
781
782 while (inputs_read) {
783 int in = ffs(inputs_read) - 1;
784 int hw;
785 inputs_read &= ~(1<<in);
786
787 if (vp->IsNVProgram) {
788 /* NVvp: must alias */
789 if (in >= VERT_ATTRIB_GENERIC0)
790 hw = in - VERT_ATTRIB_GENERIC0;
791 else
792 hw = in;
793 } else {
794 /* ARBvp: may alias (but we won't)
795 * GL2.0: must not alias
796 */
797 if (in >= VERT_ATTRIB_GENERIC0)
798 hw = ffs(~input_alloc) - 1;
799 else
800 hw = in;
801 input_alloc |= (1<<hw);
802 }
803
804 nvs->vp_attrib_map[hw] = in;
805 }
806
807 if (NOUVEAU_DEBUG & DEBUG_SHADERS) {
808 printf("vtxprog attrib map:\n");
809 for (i=0; i<NVS_MAX_ATTRIBS; i++) {
810 printf(" hw:%d = attrib:%d\n",
811 i, nvs->vp_attrib_map[i]);
812 }
813 }
814 }
815
816 static void
817 pass0_vp_insert_ff_clip_planes(GLcontext *ctx, nouveauShader *nvs)
818 {
819 struct gl_program *prog = &nvs->mesa.vp.Base;
820 nvsFragmentHeader *parent = nvs->program_tree;
821 nvsInstruction *nvsinst;
822 GLuint fpos = 0;
823 nvsRegister opos, epos, eqn, mv[4];
824 gl_state_index tokens[STATE_LENGTH]
825 = { STATE_MODELVIEW_MATRIX, 0, 0, 0, 0 };
826 GLint id;
827 int i;
828
829 /* modelview transform */
830 pass0_make_reg(nvs, &opos, NVS_FILE_ATTRIB, NVS_FR_POSITION);
831 pass0_make_reg(nvs, &epos, NVS_FILE_TEMP , -1);
832 for (i=0; i<4; i++) {
833 tokens[2] = tokens[3] = i;
834 id = _mesa_add_state_reference(prog->Parameters, tokens);
835 pass0_make_reg(nvs, &mv[i], NVS_FILE_CONST, id);
836 }
837 ARITHu(NVS_OP_DP4, epos, SMASK_X, 0, opos, mv[0], nvr_unused);
838 ARITHu(NVS_OP_DP4, epos, SMASK_Y, 0, opos, mv[1], nvr_unused);
839 ARITHu(NVS_OP_DP4, epos, SMASK_Z, 0, opos, mv[2], nvr_unused);
840 ARITHu(NVS_OP_DP4, epos, SMASK_W, 0, opos, mv[3], nvr_unused);
841
842 /* Emit code to emulate fixed-function glClipPlane */
843 for (i=0; i<6; i++) {
844 GLuint clipmask = SMASK_X;
845 nvsRegister clip;
846
847 if (!(ctx->Transform.ClipPlanesEnabled & (1<<i)))
848 continue;
849
850 /* Point a const at a user clipping plane */
851 tokens[0] = STATE_CLIPPLANE;
852 tokens[1] = i;
853 id = _mesa_add_state_reference(prog->Parameters, tokens);
854 pass0_make_reg(nvs, &eqn , NVS_FILE_CONST , id);
855 pass0_make_reg(nvs, &clip, NVS_FILE_RESULT, NVS_FR_CLIP0 + i);
856
857 /*XXX: something else needs to take care of modifying the
858 * instructions to write to the correct hw clip register.
859 */
860 switch (i) {
861 case 0: case 3: clipmask = SMASK_Y; break;
862 case 1: case 4: clipmask = SMASK_Z; break;
863 case 2: case 5: clipmask = SMASK_W; break;
864 }
865
866 /* Emit transform */
867 ARITHu(NVS_OP_DP4, clip, clipmask, 0, epos, eqn, nvr_unused);
868 }
869 }
870
871 static void
872 pass0_rebase_mesa_consts(nouveauShader *nvs)
873 {
874 struct pass0_rec *rec = nvs->pass_rec;
875 struct gl_program *prog = &nvs->mesa.vp.Base;
876 struct prog_instruction *inst = prog->Instructions;
877 int i;
878
879 /*XXX: not a good idea, params->hw_index is malloc'd */
880 memset(nvs->params, 0x00, sizeof(nvs->params));
881
882 /* When doing relative addressing on constants, the hardware needs us
883 * to fill the "const id" field with a positive value. Determine the
884 * most negative index that is used so that all accesses to a
885 * mesa-provided constant can be rebased to a positive index.
886 */
887 while (inst->Opcode != OPCODE_END) {
888 for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++) {
889 struct prog_src_register *src = &inst->SrcReg[i];
890
891 switch (src->File) {
892 case PROGRAM_STATE_VAR:
893 case PROGRAM_CONSTANT:
894 case PROGRAM_NAMED_PARAM:
895 if (src->RelAddr && src->Index < 0) {
896 int base = src->Index * -1;
897 if (rec->mesa_const_base < base)
898 rec->mesa_const_base = base;
899 }
900 break;
901 default:
902 break;
903 }
904 }
905
906 inst++;
907 }
908 }
909
910 static void
911 pass0_resolve_mesa_consts(nouveauShader *nvs)
912 {
913 struct pass0_rec *rec = nvs->pass_rec;
914 struct gl_program *prog = &nvs->mesa.vp.Base;
915 struct gl_program_parameter_list *plist = prog->Parameters;
916 int i;
917
918 /* Init all const tracking/alloc info from the parameter list, rather
919 * than doing it as we translate the program. Otherwise:
920 * 1) we can't get at the correct constant info when relative
921 * addressing is being used due to src->Index not pointing
922 * at the exact const;
923 * 2) as we add extra consts to the program, mesa will call realloc()
924 * and we get invalid pointers to the const data.
925 */
926 rec->mesa_const_last = plist->NumParameters + rec->mesa_const_base;
927 nvs->param_high = rec->mesa_const_last;
928 for (i=0; i<plist->NumParameters; i++) {
929 int hw = rec->mesa_const_base + i;
930
931 switch (plist->Parameters[i].Type) {
932 case PROGRAM_NAMED_PARAM:
933 case PROGRAM_STATE_VAR:
934 nvs->params[hw].in_use = GL_TRUE;
935 nvs->params[hw].source_val = plist->ParameterValues[i];
936 COPY_4V(nvs->params[hw].val, plist->ParameterValues[i]);
937 break;
938 case PROGRAM_CONSTANT:
939 nvs->params[hw].in_use = GL_TRUE;
940 nvs->params[hw].source_val = NULL;
941 COPY_4V(nvs->params[hw].val, plist->ParameterValues[i]);
942 break;
943 default:
944 assert(0);
945 break;
946 }
947 }
948 }
949
950 GLboolean
951 nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs)
952 {
953 nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
954 struct gl_program *prog = (struct gl_program*)nvs;
955 struct gl_vertex_program *vp = (struct gl_vertex_program *)prog;
956 struct gl_fragment_program *fp = (struct gl_fragment_program *)prog;
957 struct pass0_rec *rec;
958 int ret = GL_FALSE;
959
960 rec = CALLOC_STRUCT(pass0_rec);
961 if (!rec)
962 return GL_FALSE;
963
964 rec->next_temp = prog->NumTemporaries;
965 nvs->pass_rec = rec;
966
967 nvs->program_tree = (nvsFragmentHeader*)
968 pass0_create_subroutine(nvs, "program body");
969 if (!nvs->program_tree) {
970 FREE(rec);
971 return GL_FALSE;
972 }
973
974 switch (prog->Target) {
975 case GL_VERTEX_PROGRAM_ARB:
976 nvs->func = &nmesa->VPfunc;
977
978 if (vp->IsPositionInvariant)
979 _mesa_insert_mvp_code(ctx, vp);
980 pass0_rebase_mesa_consts(nvs);
981
982 if (!prog->String && ctx->Transform.ClipPlanesEnabled)
983 pass0_vp_insert_ff_clip_planes(ctx, nvs);
984
985 pass0_build_attrib_map(nvs, vp);
986 break;
987 case GL_FRAGMENT_PROGRAM_ARB:
988 nvs->func = &nmesa->FPfunc;
989
990 if (fp->FogOption != GL_NONE)
991 _mesa_append_fog_code(ctx, fp);
992 pass0_rebase_mesa_consts(nvs);
993 break;
994 default:
995 fprintf(stderr, "Unknown program type %d", prog->Target);
996 FREE(rec);
997 /* DESTROY TREE!! */
998 return GL_FALSE;
999 }
1000 nvs->func->card_priv = &nvs->card_priv;
1001
1002 ret = pass0_translate_instructions(nvs, 0, 0, nvs->program_tree);
1003 if (ret)
1004 pass0_resolve_mesa_consts(nvs);
1005 /*XXX: if (!ret) DESTROY TREE!!! */
1006
1007 FREE(rec);
1008 return ret;
1009 }
1010