document 1D convolution fix
[mesa.git] / src / mesa / drivers / dri / nouveau / nouveau_shader_0.c
1 /*
2 * Copyright (C) 2006 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /*
29 * Authors:
30 * Ben Skeggs <darktama@iinet.net.au>
31 */
32
33 #include "glheader.h"
34 #include "macros.h"
35 #include "enums.h"
36
37 #include "program.h"
38 #include "programopt.h"
39 #include "program_instruction.h"
40
41 #include "nouveau_context.h"
42 #include "nouveau_shader.h"
43 #include "nouveau_msg.h"
44
45 static nvsFixedReg _tx_mesa_vp_dst_reg[VERT_RESULT_MAX] = {
46 NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD,
47 NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3,
48 NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7,
49 NVS_FR_POINTSZ, NVS_FR_BFC0, NVS_FR_BFC1, NVS_FR_UNKNOWN /* EDGE */
50 };
51
52 static nvsFixedReg _tx_mesa_fp_dst_reg[FRAG_RESULT_MAX] = {
53 NVS_FR_FRAGDATA0 /* COLR */, NVS_FR_FRAGDATA0 /* COLH */,
54 NVS_FR_UNKNOWN /* DEPR */
55 };
56
57 static nvsFixedReg _tx_mesa_fp_src_reg[FRAG_ATTRIB_MAX] = {
58 NVS_FR_POSITION, NVS_FR_COL0, NVS_FR_COL1, NVS_FR_FOGCOORD,
59 NVS_FR_TEXCOORD0, NVS_FR_TEXCOORD1, NVS_FR_TEXCOORD2, NVS_FR_TEXCOORD3,
60 NVS_FR_TEXCOORD4, NVS_FR_TEXCOORD5, NVS_FR_TEXCOORD6, NVS_FR_TEXCOORD7
61 };
62
63 static nvsSwzComp _tx_mesa_swizzle[4] = {
64 NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W
65 };
66
67 static nvsOpcode _tx_mesa_opcode[] = {
68 [OPCODE_ABS] = NVS_OP_ABS, [OPCODE_ADD] = NVS_OP_ADD,
69 [OPCODE_ARA] = NVS_OP_ARA, [OPCODE_ARL] = NVS_OP_ARL,
70 [OPCODE_ARL_NV] = NVS_OP_ARL, [OPCODE_ARR] = NVS_OP_ARR,
71 [OPCODE_CMP] = NVS_OP_CMP, [OPCODE_COS] = NVS_OP_COS,
72 [OPCODE_DDX] = NVS_OP_DDX, [OPCODE_DDY] = NVS_OP_DDY,
73 [OPCODE_DP3] = NVS_OP_DP3, [OPCODE_DP4] = NVS_OP_DP4,
74 [OPCODE_DPH] = NVS_OP_DPH, [OPCODE_DST] = NVS_OP_DST,
75 [OPCODE_EX2] = NVS_OP_EX2, [OPCODE_EXP] = NVS_OP_EXP,
76 [OPCODE_FLR] = NVS_OP_FLR, [OPCODE_FRC] = NVS_OP_FRC,
77 [OPCODE_KIL] = NVS_OP_EMUL, [OPCODE_KIL_NV] = NVS_OP_KIL,
78 [OPCODE_LG2] = NVS_OP_LG2, [OPCODE_LIT] = NVS_OP_LIT,
79 [OPCODE_LOG] = NVS_OP_LOG,
80 [OPCODE_LRP] = NVS_OP_LRP,
81 [OPCODE_MAD] = NVS_OP_MAD, [OPCODE_MAX] = NVS_OP_MAX,
82 [OPCODE_MIN] = NVS_OP_MIN, [OPCODE_MOV] = NVS_OP_MOV,
83 [OPCODE_MUL] = NVS_OP_MUL,
84 [OPCODE_PK2H] = NVS_OP_PK2H, [OPCODE_PK2US] = NVS_OP_PK2US,
85 [OPCODE_PK4B] = NVS_OP_PK4B, [OPCODE_PK4UB] = NVS_OP_PK4UB,
86 [OPCODE_POW] = NVS_OP_POW, [OPCODE_POPA] = NVS_OP_POPA,
87 [OPCODE_PUSHA] = NVS_OP_PUSHA,
88 [OPCODE_RCC] = NVS_OP_RCC, [OPCODE_RCP] = NVS_OP_RCP,
89 [OPCODE_RFL] = NVS_OP_RFL, [OPCODE_RSQ] = NVS_OP_RSQ,
90 [OPCODE_SCS] = NVS_OP_SCS, [OPCODE_SEQ] = NVS_OP_SEQ,
91 [OPCODE_SFL] = NVS_OP_SFL, [OPCODE_SGE] = NVS_OP_SGE,
92 [OPCODE_SGT] = NVS_OP_SGT, [OPCODE_SIN] = NVS_OP_SIN,
93 [OPCODE_SLE] = NVS_OP_SLE, [OPCODE_SLT] = NVS_OP_SLT,
94 [OPCODE_SNE] = NVS_OP_SNE, [OPCODE_SSG] = NVS_OP_SSG,
95 [OPCODE_STR] = NVS_OP_STR, [OPCODE_SUB] = NVS_OP_SUB,
96 [OPCODE_SWZ] = NVS_OP_MOV,
97 [OPCODE_TEX] = NVS_OP_TEX, [OPCODE_TXB] = NVS_OP_TXB,
98 [OPCODE_TXD] = NVS_OP_TXD,
99 [OPCODE_TXL] = NVS_OP_TXL, [OPCODE_TXP] = NVS_OP_TXP,
100 [OPCODE_TXP_NV] = NVS_OP_TXP,
101 [OPCODE_UP2H] = NVS_OP_UP2H, [OPCODE_UP2US] = NVS_OP_UP2US,
102 [OPCODE_UP4B] = NVS_OP_UP4B, [OPCODE_UP4UB] = NVS_OP_UP4UB,
103 [OPCODE_X2D] = NVS_OP_X2D,
104 [OPCODE_XPD] = NVS_OP_XPD
105 };
106
107 static nvsCond _tx_mesa_condmask[] = {
108 NVS_COND_TR, /* workaround mesa not filling a valid value */
109 NVS_COND_GT, NVS_COND_LT, NVS_COND_UN, NVS_COND_GE,
110 NVS_COND_LE, NVS_COND_NE, NVS_COND_NE, NVS_COND_TR, NVS_COND_FL
111 };
112
113 struct pass0_rec {
114 int nvs_ipos;
115 int next_temp;
116
117 int mesa_const_base;
118 int mesa_const_last;
119
120 int swzconst_done;
121 int swzconst_id;
122 nvsRegister const_half;
123 };
124
125 #define X NVS_SWZ_X
126 #define Y NVS_SWZ_Y
127 #define Z NVS_SWZ_Z
128 #define W NVS_SWZ_W
129
130 #define FILL_CONDITION_FLAGS(fragment) do { \
131 (fragment)->cond = \
132 pass0_make_condmask(inst->DstReg.CondMask); \
133 if ((fragment)->cond != NVS_COND_TR) \
134 (fragment)->cond_test = 1; \
135 (fragment)->cond_reg = inst->CondDst; \
136 pass0_make_swizzle((fragment)->cond_swizzle, inst->DstReg.CondSwizzle);\
137 } while(0)
138
139 #define ARITH(op,dest,mask,sat,s0,s1,s2) do { \
140 nvsinst = pass0_emit(nvs, parent, fpos, (op), \
141 (dest), (mask), (sat), (s0), (s1), (s2));\
142 FILL_CONDITION_FLAGS(nvsinst); \
143 } while(0)
144
145 #define ARITHu(op,dest,mask,sat,s0,s1,s2) do { \
146 nvsinst = pass0_emit(nvs, parent, fpos, (op), \
147 (dest), (mask), (sat), (s0), (s1), (s2));\
148 } while(0)
149
150 static void
151 pass0_append_fragment(nvsFragmentHeader *parent,
152 nvsFragmentHeader *fragment,
153 int pos)
154 {
155 nvsFragmentHeader **head, **tail;
156 assert(parent && fragment);
157
158 switch (parent->type) {
159 case NVS_BRANCH:
160 if (pos == 0) {
161 head = &((nvsBranch *)parent)->target_head;
162 tail = &((nvsBranch *)parent)->target_tail;
163 } else {
164 head = &((nvsBranch *)parent)->else_head;
165 tail = &((nvsBranch *)parent)->else_tail;
166 }
167 break;
168 case NVS_LOOP:
169 head = &((nvsLoop *)parent)->insn_head;
170 tail = &((nvsLoop *)parent)->insn_tail;
171 break;
172 case NVS_SUBROUTINE:
173 head = &((nvsSubroutine *)parent)->insn_head;
174 tail = &((nvsSubroutine *)parent)->insn_tail;
175 break;
176 default:
177 assert(0);
178 break;
179 }
180
181 fragment->parent = parent;
182 fragment->prev = *tail;
183 fragment->next = NULL;
184 if (!(*head))
185 *head = fragment;
186 else
187 (*tail)->next = fragment;
188 *tail = fragment;
189
190 }
191
192 static nvsSubroutine *
193 pass0_create_subroutine(nouveauShader *nvs, const char *label)
194 {
195 nvsSubroutine *sub;
196
197 sub = CALLOC_STRUCT(nvs_subroutine);
198 if (sub) {
199 sub->header.type = NVS_SUBROUTINE;
200 sub->label = strdup(label);
201 if (!nvs->program_tree)
202 nvs->program_tree = &sub->header;
203 else
204 pass0_append_fragment(nvs->program_tree,
205 &sub->header, 0);
206 }
207
208 return sub;
209 }
210
211 static void
212 pass0_make_reg(nouveauShader *nvs, nvsRegister *reg,
213 nvsRegFile file, unsigned int index)
214 {
215 struct pass0_rec *rec = nvs->pass_rec;
216
217 /* defaults */
218 *reg = nvr_unused;
219 /* -1 == quick-and-dirty temp alloc */
220 if (file == NVS_FILE_TEMP && index == -1) {
221 index = rec->next_temp++;
222 assert(index < NVS_MAX_TEMPS);
223 }
224 reg->file = file;
225 reg->index = index;
226 }
227
228 static void
229 pass0_make_swizzle(nvsSwzComp *swz, unsigned int mesa)
230 {
231 int i;
232
233 for (i=0;i<4;i++)
234 swz[i] = _tx_mesa_swizzle[GET_SWZ(mesa, i)];
235 }
236
237 static nvsOpcode
238 pass0_make_opcode(enum prog_opcode op)
239 {
240 if (op > MAX_OPCODE)
241 return NVS_OP_UNKNOWN;
242 return _tx_mesa_opcode[op];
243 }
244
245 static nvsCond
246 pass0_make_condmask(GLuint mesa)
247 {
248 if (mesa > COND_FL)
249 return NVS_COND_UNKNOWN;
250 return _tx_mesa_condmask[mesa];
251 }
252
253 static unsigned int
254 pass0_make_mask(GLuint mesa_mask)
255 {
256 unsigned int mask = 0;
257
258 if (mesa_mask & WRITEMASK_X) mask |= SMASK_X;
259 if (mesa_mask & WRITEMASK_Y) mask |= SMASK_Y;
260 if (mesa_mask & WRITEMASK_Z) mask |= SMASK_Z;
261 if (mesa_mask & WRITEMASK_W) mask |= SMASK_W;
262
263 return mask;
264 }
265
266 static GLboolean
267 pass0_opcode_is_tex(enum prog_opcode op)
268 {
269 switch (op) {
270 case OPCODE_TEX:
271 case OPCODE_TXB:
272 case OPCODE_TXD:
273 case OPCODE_TXL:
274 case OPCODE_TXP:
275 return GL_TRUE;
276 default:
277 break;
278 }
279
280 return GL_FALSE;
281 }
282
283 static nvsTexTarget
284 pass0_make_tex_target(GLuint mesa)
285 {
286 switch (mesa) {
287 case TEXTURE_1D_INDEX: return NVS_TEX_TARGET_1D;
288 case TEXTURE_2D_INDEX: return NVS_TEX_TARGET_2D;
289 case TEXTURE_3D_INDEX: return NVS_TEX_TARGET_3D;
290 case TEXTURE_CUBE_INDEX: return NVS_TEX_TARGET_CUBE;
291 case TEXTURE_RECT_INDEX: return NVS_TEX_TARGET_RECT;
292 default:
293 return NVS_TEX_TARGET_UNKNOWN;
294 }
295 }
296
297 static void
298 pass0_make_dst_reg(nvsPtr nvs, nvsRegister *reg,
299 struct prog_dst_register *dst)
300 {
301 struct gl_program *mesa = (struct gl_program*)&nvs->mesa.vp;
302 nvsFixedReg sfr;
303
304 switch (dst->File) {
305 case PROGRAM_OUTPUT:
306 if (mesa->Target == GL_VERTEX_PROGRAM_ARB) {
307 sfr = (dst->Index < VERT_RESULT_MAX) ?
308 _tx_mesa_vp_dst_reg[dst->Index] :
309 NVS_FR_UNKNOWN;
310 } else {
311 sfr = (dst->Index < FRAG_RESULT_MAX) ?
312 _tx_mesa_fp_dst_reg[dst->Index] :
313 NVS_FR_UNKNOWN;
314 }
315 pass0_make_reg(nvs, reg, NVS_FILE_RESULT, sfr);
316 break;
317 case PROGRAM_TEMPORARY:
318 pass0_make_reg(nvs, reg, NVS_FILE_TEMP, dst->Index);
319 break;
320 case PROGRAM_ADDRESS:
321 pass0_make_reg(nvs, reg, NVS_FILE_ADDRESS, dst->Index);
322 break;
323 default:
324 fprintf(stderr, "Unknown dest file %d\n", dst->File);
325 assert(0);
326 }
327 }
328
329 static void
330 pass0_make_src_reg(nvsPtr nvs, nvsRegister *reg, struct prog_src_register *src)
331 {
332 struct pass0_rec *rec = nvs->pass_rec;
333 struct gl_program *mesa = (struct gl_program *)&nvs->mesa.vp.Base;
334 int i;
335
336 *reg = nvr_unused;
337
338 switch (src->File) {
339 case PROGRAM_INPUT:
340 reg->file = NVS_FILE_ATTRIB;
341 if (mesa->Target == GL_VERTEX_PROGRAM_ARB) {
342 for (i=0; i<NVS_MAX_ATTRIBS; i++) {
343 if (nvs->vp_attrib_map[i] == src->Index) {
344 reg->index = i;
345 break;
346 }
347 }
348 if (i==NVS_MAX_ATTRIBS)
349 reg->index = NVS_FR_UNKNOWN;
350 } else {
351 reg->index = (src->Index < FRAG_ATTRIB_MAX) ?
352 _tx_mesa_fp_src_reg[src->Index] :
353 NVS_FR_UNKNOWN;
354 }
355 break;
356 case PROGRAM_STATE_VAR:
357 case PROGRAM_NAMED_PARAM:
358 case PROGRAM_CONSTANT:
359 reg->file = NVS_FILE_CONST;
360 reg->index = src->Index + rec->mesa_const_base;
361 reg->indexed = src->RelAddr;
362 if (reg->indexed) {
363 reg->addr_reg = 0;
364 reg->addr_comp = NVS_SWZ_X;
365 }
366 break;
367 case PROGRAM_TEMPORARY:
368 reg->file = NVS_FILE_TEMP;
369 reg->index = src->Index;
370 break;
371 default:
372 fprintf(stderr, "Unknown source type %d\n", src->File);
373 assert(0);
374 }
375
376 /* per-component negate handled elsewhere */
377 reg->negate = src->NegateBase != 0;
378 reg->abs = src->Abs;
379 pass0_make_swizzle(reg->swizzle, src->Swizzle);
380 }
381
382 static nvsInstruction *
383 pass0_emit(nouveauShader *nvs, nvsFragmentHeader *parent, int fpos,
384 nvsOpcode op, nvsRegister dst,
385 unsigned int mask, int saturate,
386 nvsRegister src0, nvsRegister src1, nvsRegister src2)
387 {
388 nvsInstruction *sif;
389
390 sif = CALLOC_STRUCT(nvs_instruction);
391 if (!sif)
392 return NULL;
393
394 /* Seems mesa doesn't explicitly 0 this.. */
395 if (nvs->mesa.vp.Base.Target == GL_VERTEX_PROGRAM_ARB)
396 saturate = 0;
397
398 sif->op = op;
399 sif->saturate = saturate;
400 sif->dest = dst;
401 sif->mask = mask;
402 sif->dest_scale = NVS_SCALE_1X;
403 sif->src[0] = src0;
404 sif->src[1] = src1;
405 sif->src[2] = src2;
406 sif->cond = COND_TR;
407 sif->cond_reg = 0;
408 sif->cond_test = 0;
409 sif->cond_update= 0;
410 pass0_make_swizzle(sif->cond_swizzle, SWIZZLE_NOOP);
411 pass0_append_fragment(parent, &sif->header, fpos);
412
413 return sif;
414 }
415
416 static void
417 pass0_fixup_swizzle(nvsPtr nvs, nvsFragmentHeader *parent, int fpos,
418 struct prog_src_register *src,
419 unsigned int sm1,
420 unsigned int sm2)
421 {
422 static const float sc[4] = { 1.0, 0.0, -1.0, 0.0 };
423 struct pass0_rec *rec = nvs->pass_rec;
424 int fixup_1, fixup_2;
425 nvsInstruction *nvsinst;
426 nvsRegister sr, dr = nvr_unused;
427 nvsRegister sm1const, sm2const;
428
429 if (!rec->swzconst_done) {
430 struct gl_program *prog = &nvs->mesa.vp.Base;
431 rec->swzconst_id = _mesa_add_unnamed_constant(prog->Parameters,
432 sc, 4);
433 rec->swzconst_done = 1;
434 COPY_4V(nvs->params[rec->swzconst_id].val, sc);
435 }
436
437 fixup_1 = (sm1 != MAKE_SWIZZLE4(0,0,0,0) &&
438 sm2 != MAKE_SWIZZLE4(2,2,2,2));
439 fixup_2 = (sm2 != MAKE_SWIZZLE4(2,2,2,2));
440
441 if (src->File != PROGRAM_TEMPORARY && src->File != PROGRAM_INPUT) {
442 /* We can't use more than one const in an instruction,
443 * so move the const into a temp, and swizzle from there.
444 *
445 * TODO: should just emit the swizzled const, instead of
446 * swizzling it in the shader.. would need to reswizzle
447 * any state params when they change however..
448 */
449 pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1);
450 pass0_make_src_reg(nvs, &sr, src);
451 ARITHu(NVS_OP_MOV, dr, SMASK_ALL, 0,
452 sr, nvr_unused, nvr_unused);
453 pass0_make_reg(nvs, &sr, NVS_FILE_TEMP, dr.index);
454 } else {
455 if (fixup_1)
456 src->NegateBase = 0;
457 pass0_make_src_reg(nvs, &sr, src);
458 pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1);
459 }
460
461 pass0_make_reg(nvs, &sm1const, NVS_FILE_CONST, rec->swzconst_id);
462 pass0_make_swizzle(sm1const.swizzle, sm1);
463 if (fixup_1 && fixup_2) {
464 /* Any combination with SWIZZLE_ONE */
465 pass0_make_reg(nvs, &sm2const,
466 NVS_FILE_CONST, rec->swzconst_id);
467 pass0_make_swizzle(sm2const.swizzle, sm2);
468 ARITHu(NVS_OP_MAD, dr, SMASK_ALL, 0, sr, sm1const, sm2const);
469 } else {
470 /* SWIZZLE_ZERO || arbitrary negate */
471 ARITHu(NVS_OP_MUL, dr, SMASK_ALL, 0, sr, sm1const, nvr_unused);
472 }
473
474 src->File = PROGRAM_TEMPORARY;
475 src->Index = dr.index;
476 src->Swizzle = SWIZZLE_NOOP;
477 }
478
479 #define SET_SWZ(fs, cp, c) fs = (fs & ~(0x7<<(cp*3))) | (c<<(cp*3))
480 static void
481 pass0_check_sources(nvsPtr nvs, nvsFragmentHeader *parent, int fpos,
482 struct prog_instruction *inst)
483 {
484 unsigned int insrc = -1, constsrc = -1;
485 int i;
486
487 for (i=0;i<_mesa_num_inst_src_regs(inst->Opcode);i++) {
488 struct prog_src_register *src = &inst->SrcReg[i];
489 unsigned int sm_1 = 0, sm_2 = 0;
490 nvsRegister sr, dr;
491 int do_mov = 0, c;
492
493 /* Build up swizzle masks as if we were going to use
494 * "MAD new, src, const1, const2" to support arbitrary negation
495 * and SWIZZLE_ZERO/SWIZZLE_ONE.
496 */
497 for (c=0;c<4;c++) {
498 if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ZERO) {
499 SET_SWZ(sm_1, c, SWIZZLE_Y); /* 0.0 */
500 SET_SWZ(sm_2, c, SWIZZLE_Y);
501 SET_SWZ(src->Swizzle, c, SWIZZLE_X);
502 } else if (GET_SWZ(src->Swizzle, c) == SWIZZLE_ONE) {
503 SET_SWZ(sm_1, c, SWIZZLE_Y);
504 if (src->NegateBase & (1<<c))
505 SET_SWZ(sm_2, c, SWIZZLE_Z); /* -1.0 */
506 else
507 SET_SWZ(sm_2, c, SWIZZLE_X); /* 1.0 */
508 SET_SWZ(src->Swizzle, c, SWIZZLE_X);
509 } else {
510 if (src->NegateBase & (1<<c))
511 SET_SWZ(sm_1, c, SWIZZLE_Z); /* -[xyzw] */
512 else
513 SET_SWZ(sm_1, c, SWIZZLE_X); /*[xyzw]*/
514 SET_SWZ(sm_2, c, SWIZZLE_Y);
515 }
516 }
517
518 /* Unless we're multiplying by 1.0 or -1.0 on all components,
519 * and we're adding nothing to any component we have to
520 * emulate the swizzle.
521 */
522 if ((sm_1 != MAKE_SWIZZLE4(0,0,0,0) &&
523 sm_1 != MAKE_SWIZZLE4(2,2,2,2)) ||
524 sm_2 != MAKE_SWIZZLE4(1,1,1,1)) {
525 pass0_fixup_swizzle(nvs, parent, fpos, src, sm_1, sm_2);
526 /* The source is definitely in a temp now, so don't
527 * bother checking for multiple ATTRIB/CONST regs.
528 */
529 continue;
530 }
531
532 /* HW can't use more than one ATTRIB or PARAM in a single
533 * instruction */
534 switch (src->File) {
535 case PROGRAM_INPUT:
536 if (insrc != -1 && insrc != src->Index)
537 do_mov = 1;
538 else insrc = src->Index;
539 break;
540 case PROGRAM_STATE_VAR:
541 if (constsrc != -1 && constsrc != src->Index)
542 do_mov = 1;
543 else constsrc = src->Index;
544 break;
545 default:
546 break;
547 }
548
549 /* Emit any extra ATTRIB/CONST to a temp, and modify the Mesa
550 * instruction to point at the temp.
551 */
552 if (do_mov) {
553 pass0_make_src_reg(nvs, &sr, src);
554 pass0_make_reg(nvs, &dr, NVS_FILE_TEMP, -1);
555 pass0_emit(nvs, parent, fpos, NVS_OP_MOV,
556 dr, SMASK_ALL, 0,
557 sr, nvr_unused, nvr_unused);
558
559 src->File = PROGRAM_TEMPORARY;
560 src->Index = dr.index;
561 src->Swizzle= SWIZZLE_NOOP;
562 }
563 }
564 }
565
566 static GLboolean
567 pass0_emulate_instruction(nouveauShader *nvs,
568 nvsFragmentHeader *parent, int fpos,
569 struct prog_instruction *inst)
570 {
571 nvsFunc *shader = nvs->func;
572 nvsRegister src[3], dest, temp;
573 nvsInstruction *nvsinst;
574 unsigned int mask = pass0_make_mask(inst->DstReg.WriteMask);
575 int i, sat;
576
577 sat = (inst->SaturateMode == SATURATE_ZERO_ONE);
578
579 /* Build all the "real" regs for the instruction */
580 for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++)
581 pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]);
582 if (inst->Opcode != OPCODE_KIL)
583 pass0_make_dst_reg(nvs, &dest, &inst->DstReg);
584
585 switch (inst->Opcode) {
586 case OPCODE_ABS:
587 if (shader->caps & SCAP_SRC_ABS)
588 ARITH(NVS_OP_MOV, dest, mask, sat,
589 nvsAbs(src[0]), nvr_unused, nvr_unused);
590 else
591 ARITH(NVS_OP_MAX, dest, mask, sat,
592 src[0], nvsNegate(src[0]), nvr_unused);
593 break;
594 case OPCODE_CMP:
595 /*XXX: this will clobber CC0... */
596 ARITH (NVS_OP_MOV, dest, mask, sat,
597 src[2], nvr_unused, nvr_unused);
598 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
599 ARITHu(NVS_OP_MOV, temp, SMASK_ALL, 0,
600 src[0], nvr_unused, nvr_unused);
601 nvsinst->cond_update = 1;
602 nvsinst->cond_reg = 0;
603 ARITH (NVS_OP_MOV, dest, mask, sat,
604 src[1], nvr_unused, nvr_unused);
605 nvsinst->cond = COND_LT;
606 nvsinst->cond_reg = 0;
607 nvsinst->cond_test = 1;
608 break;
609 case OPCODE_DPH:
610 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
611 ARITHu(NVS_OP_DP3, temp, SMASK_X, 0,
612 src[0], src[1], nvr_unused);
613 ARITH (NVS_OP_ADD, dest, mask, sat,
614 nvsSwizzle(temp, X, X, X, X),
615 nvsSwizzle(src[1], W, W, W, W),
616 nvr_unused);
617 break;
618 case OPCODE_KIL:
619 /* This is only in ARB shaders, so we don't have to worry
620 * about clobbering a CC reg as they aren't supported anyway.
621 *XXX: might have to worry with GLSL however...
622 */
623 /* MOVC0 temp, src */
624 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
625 ARITHu(NVS_OP_MOV, temp, SMASK_ALL, 0,
626 src[0], nvr_unused, nvr_unused);
627 nvsinst->cond_update = 1;
628 nvsinst->cond_reg = 0;
629 /* KIL_NV (LT0.xyzw) temp */
630 ARITHu(NVS_OP_KIL, nvr_unused, 0, 0,
631 nvr_unused, nvr_unused, nvr_unused);
632 nvsinst->cond = COND_LT;
633 nvsinst->cond_reg = 0;
634 nvsinst->cond_test = 1;
635 pass0_make_swizzle(nvsinst->cond_swizzle, SWIZZLE_NOOP);
636 break;
637 case OPCODE_LRP:
638 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
639 ARITHu(NVS_OP_MAD, temp, mask, 0,
640 nvsNegate(src[0]), src[2], src[2]);
641 ARITH (NVS_OP_MAD, dest, mask, sat, src[0], src[1], temp);
642 break;
643 case OPCODE_POW:
644 if (shader->SupportsOpcode(shader, NVS_OP_LG2) &&
645 shader->SupportsOpcode(shader, NVS_OP_EX2)) {
646 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
647 /* LG2 temp.x, src0.c */
648 ARITHu(NVS_OP_LG2, temp, SMASK_X, 0,
649 nvsSwizzle(src[0], X, X, X, X),
650 nvr_unused, nvr_unused);
651 /* MUL temp.x, temp.x, src1.c */
652 ARITHu(NVS_OP_MUL, temp, SMASK_X, 0,
653 nvsSwizzle(temp, X, X, X, X),
654 nvsSwizzle(src[1], X, X, X, X),
655 nvr_unused);
656 /* EX2 dest, temp.x */
657 ARITH (NVS_OP_EX2, dest, mask, sat,
658 nvsSwizzle(temp, X, X, X, X),
659 nvr_unused, nvr_unused);
660 } else {
661 /* can we use EXP/LOG instead of EX2/LG2?? */
662 fprintf(stderr, "Implement POW for NV20 vtxprog!\n");
663 return GL_FALSE;
664 }
665 break;
666 case OPCODE_RSQ:
667 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
668 ARITHu(NVS_OP_LG2, temp, SMASK_X, 0,
669 nvsAbs(nvsSwizzle(src[0], X, X, X, X)),
670 nvr_unused, nvr_unused);
671 nvsinst->dest_scale = NVS_SCALE_INV_2X;
672 ARITH (NVS_OP_EX2, dest, mask, sat,
673 nvsNegate(nvsSwizzle(temp, X, X, X, X)),
674 nvr_unused, nvr_unused);
675 break;
676 case OPCODE_SCS:
677 if (mask & SMASK_X)
678 ARITH(NVS_OP_COS, dest, SMASK_X, sat,
679 nvsSwizzle(src[0], X, X, X, X),
680 nvr_unused, nvr_unused);
681 if (mask & SMASK_Y)
682 ARITH(NVS_OP_SIN, dest, SMASK_Y, sat,
683 nvsSwizzle(src[0], X, X, X, X),
684 nvr_unused, nvr_unused);
685 break;
686 case OPCODE_SUB:
687 ARITH(NVS_OP_ADD, dest, mask, sat,
688 src[0], nvsNegate(src[1]), nvr_unused);
689 break;
690 case OPCODE_XPD:
691 pass0_make_reg(nvs, &temp, NVS_FILE_TEMP, -1);
692 ARITHu(NVS_OP_MUL, temp, SMASK_ALL, 0,
693 nvsSwizzle(src[0], Z, X, Y, Y),
694 nvsSwizzle(src[1], Y, Z, X, X),
695 nvr_unused);
696 ARITH (NVS_OP_MAD, dest, (mask & ~SMASK_W), sat,
697 nvsSwizzle(src[0], Y, Z, X, X),
698 nvsSwizzle(src[1], Z, X, Y, Y),
699 nvsNegate(temp));
700 break;
701 default:
702 WARN_ONCE("hw doesn't support opcode \"%s\","
703 "and no emulation found\n",
704 _mesa_opcode_string(inst->Opcode));
705 return GL_FALSE;
706 }
707
708 return GL_TRUE;
709 }
710
711 static GLboolean
712 pass0_translate_arith(nouveauShader *nvs, struct gl_program *prog,
713 int ipos, int fpos,
714 nvsFragmentHeader *parent)
715 {
716 struct prog_instruction *inst = &prog->Instructions[ipos];
717 nvsFunc *shader = nvs->func;
718 nvsInstruction *nvsinst;
719 GLboolean ret;
720
721 /* Deal with multiple ATTRIB/PARAM in a single instruction */
722 pass0_check_sources(nvs, parent, fpos, inst);
723
724 /* Now it's safe to do the prog_instruction->nvsInstruction
725 * conversion
726 */
727 if (shader->SupportsOpcode(shader,
728 pass0_make_opcode(inst->Opcode))) {
729 nvsRegister src[3], dest;
730 int i;
731
732 for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++)
733 pass0_make_src_reg(nvs, &src[i], &inst->SrcReg[i]);
734 pass0_make_dst_reg(nvs, &dest, &inst->DstReg);
735
736 ARITH(pass0_make_opcode(inst->Opcode), dest,
737 pass0_make_mask(inst->DstReg.WriteMask),
738 (inst->SaturateMode != SATURATE_OFF),
739 src[0], src[1], src[2]);
740 nvsinst->tex_unit = inst->TexSrcUnit;
741 if (pass0_opcode_is_tex(inst->Opcode))
742 nvsinst->tex_target =
743 pass0_make_tex_target(inst->TexSrcTarget);
744 else
745 nvsinst->tex_target = NVS_TEX_TARGET_UNKNOWN;
746
747 ret = GL_TRUE;
748 } else
749 ret = pass0_emulate_instruction(nvs, parent, fpos, inst);
750
751 return ret;
752 }
753
754 static GLboolean
755 pass0_translate_instructions(nouveauShader *nvs, int ipos, int fpos,
756 nvsFragmentHeader *parent)
757 {
758 struct gl_program *prog = (struct gl_program *)&nvs->mesa.vp;
759
760 while (1) {
761 struct prog_instruction *inst = &prog->Instructions[ipos];
762
763 switch (inst->Opcode) {
764 case OPCODE_END:
765 return GL_TRUE;
766 case OPCODE_BRA:
767 case OPCODE_CAL:
768 case OPCODE_RET:
769 //case OPCODE_LOOP:
770 //case OPCODE_ENDLOOP:
771 //case OPCODE_IF:
772 //case OPCODE_ELSE:
773 //case OPCODE_ENDIF:
774 WARN_ONCE("branch ops unimplemented\n");
775 return GL_FALSE;
776 break;
777 default:
778 if (!pass0_translate_arith(nvs, prog,
779 ipos, fpos, parent))
780 return GL_FALSE;
781 break;
782 }
783
784 ipos++;
785 }
786
787 return GL_TRUE;
788 }
789
790 static void
791 pass0_build_attrib_map(nouveauShader *nvs, struct gl_vertex_program *vp)
792 {
793 GLuint inputs_read = vp->Base.InputsRead;
794 GLuint input_alloc = ~0xFFFF;
795 int i;
796
797 for (i=0; i<NVS_MAX_ATTRIBS; i++)
798 nvs->vp_attrib_map[i] = -1;
799
800 while (inputs_read) {
801 int in = ffs(inputs_read) - 1;
802 int hw;
803 inputs_read &= ~(1<<in);
804
805 if (vp->IsNVProgram) {
806 /* NVvp: must alias */
807 if (in >= VERT_ATTRIB_GENERIC0)
808 hw = in - VERT_ATTRIB_GENERIC0;
809 else
810 hw = in;
811 } else {
812 /* ARBvp: may alias (but we won't)
813 * GL2.0: must not alias
814 */
815 if (in >= VERT_ATTRIB_GENERIC0)
816 hw = ffs(~input_alloc) - 1;
817 else
818 hw = in;
819 input_alloc |= (1<<hw);
820 }
821
822 nvs->vp_attrib_map[hw] = in;
823 }
824
825 if (NOUVEAU_DEBUG & DEBUG_SHADERS) {
826 printf("vtxprog attrib map:\n");
827 for (i=0; i<NVS_MAX_ATTRIBS; i++) {
828 printf(" hw:%d = attrib:%d\n",
829 i, nvs->vp_attrib_map[i]);
830 }
831 }
832 }
833
834 static void
835 pass0_vp_insert_ff_clip_planes(GLcontext *ctx, nouveauShader *nvs)
836 {
837 struct gl_program *prog = &nvs->mesa.vp.Base;
838 nvsFragmentHeader *parent = nvs->program_tree;
839 nvsInstruction *nvsinst;
840 GLuint fpos = 0;
841 nvsRegister opos, epos, eqn, mv[4];
842 GLint tokens[6] = { STATE_MATRIX, STATE_MODELVIEW, 0, 0, 0, 0 };
843 GLint id;
844 int i;
845
846 /* modelview transform */
847 pass0_make_reg(nvs, &opos, NVS_FILE_ATTRIB, NVS_FR_POSITION);
848 pass0_make_reg(nvs, &epos, NVS_FILE_TEMP , -1);
849 for (i=0; i<4; i++) {
850 tokens[3] = tokens[4] = i;
851 id = _mesa_add_state_reference(prog->Parameters, tokens);
852 pass0_make_reg(nvs, &mv[i], NVS_FILE_CONST, id);
853 }
854 ARITHu(NVS_OP_DP4, epos, SMASK_X, 0, opos, mv[0], nvr_unused);
855 ARITHu(NVS_OP_DP4, epos, SMASK_Y, 0, opos, mv[1], nvr_unused);
856 ARITHu(NVS_OP_DP4, epos, SMASK_Z, 0, opos, mv[2], nvr_unused);
857 ARITHu(NVS_OP_DP4, epos, SMASK_W, 0, opos, mv[3], nvr_unused);
858
859 /* Emit code to emulate fixed-function glClipPlane */
860 for (i=0; i<6; i++) {
861 GLuint clipmask = SMASK_X;
862 nvsRegister clip;
863
864 if (!(ctx->Transform.ClipPlanesEnabled & (1<<i)))
865 continue;
866
867 /* Point a const at a user clipping plane */
868 tokens[0] = STATE_CLIPPLANE;
869 tokens[1] = i;
870 id = _mesa_add_state_reference(prog->Parameters, tokens);
871 pass0_make_reg(nvs, &eqn , NVS_FILE_CONST , id);
872 pass0_make_reg(nvs, &clip, NVS_FILE_RESULT, NVS_FR_CLIP0 + i);
873
874 /*XXX: something else needs to take care of modifying the
875 * instructions to write to the correct hw clip register.
876 */
877 switch (i) {
878 case 0: case 3: clipmask = SMASK_Y; break;
879 case 1: case 4: clipmask = SMASK_Z; break;
880 case 2: case 5: clipmask = SMASK_W; break;
881 }
882
883 /* Emit transform */
884 ARITHu(NVS_OP_DP4, clip, clipmask, 0, epos, eqn, nvr_unused);
885 }
886 }
887
888 static void
889 pass0_rebase_mesa_consts(nouveauShader *nvs)
890 {
891 struct pass0_rec *rec = nvs->pass_rec;
892 struct gl_program *prog = &nvs->mesa.vp.Base;
893 struct prog_instruction *inst = prog->Instructions;
894 int i;
895
896 /*XXX: not a good idea, params->hw_index is malloc'd */
897 memset(nvs->params, 0x00, sizeof(nvs->params));
898
899 /* When doing relative addressing on constants, the hardware needs us
900 * to fill the "const id" field with a positive value. Determine the
901 * most negative index that is used so that all accesses to a
902 * mesa-provided constant can be rebased to a positive index.
903 */
904 while (inst->Opcode != OPCODE_END) {
905 for (i=0; i<_mesa_num_inst_src_regs(inst->Opcode); i++) {
906 struct prog_src_register *src = &inst->SrcReg[i];
907
908 switch (src->File) {
909 case PROGRAM_STATE_VAR:
910 case PROGRAM_CONSTANT:
911 case PROGRAM_NAMED_PARAM:
912 if (src->RelAddr && src->Index < 0) {
913 int base = src->Index * -1;
914 if (rec->mesa_const_base < base)
915 rec->mesa_const_base = base;
916 }
917 break;
918 default:
919 break;
920 }
921 }
922
923 inst++;
924 }
925 }
926
927 static GLboolean
928 pass0_resolve_mesa_consts(nouveauShader *nvs)
929 {
930 struct pass0_rec *rec = nvs->pass_rec;
931 struct gl_program *prog = &nvs->mesa.vp.Base;
932 struct gl_program_parameter_list *plist = prog->Parameters;
933 int i;
934
935 /* Init all const tracking/alloc info from the parameter list, rather
936 * than doing it as we translate the program. Otherwise:
937 * 1) we can't get at the correct constant info when relative
938 * addressing is being used due to src->Index not pointing
939 * at the exact const;
940 * 2) as we add extra consts to the program, mesa will call realloc()
941 * and we get invalid pointers to the const data.
942 */
943 rec->mesa_const_last = plist->NumParameters + rec->mesa_const_base;
944 nvs->param_high = rec->mesa_const_last;
945 for (i=0; i<plist->NumParameters; i++) {
946 int hw = rec->mesa_const_base + i;
947
948 if (hw > NVS_MAX_CONSTS) {
949 nvsProgramError(nvs, "hw = %d > NVS_MAX_CONSTS!\n", hw);
950 return GL_FALSE;
951 }
952
953 switch (plist->Parameters[i].Type) {
954 case PROGRAM_NAMED_PARAM:
955 case PROGRAM_STATE_VAR:
956 nvs->params[hw].in_use = GL_TRUE;
957 nvs->params[hw].source_val = plist->ParameterValues[i];
958 COPY_4V(nvs->params[hw].val, plist->ParameterValues[i]);
959 break;
960 case PROGRAM_CONSTANT:
961 nvs->params[hw].in_use = GL_TRUE;
962 nvs->params[hw].source_val = NULL;
963 COPY_4V(nvs->params[hw].val, plist->ParameterValues[i]);
964 break;
965 default:
966 nvsProgramError(nvs, "hit bad type=%d on param %d\n",
967 plist->Parameters[i].Type, i);
968 return GL_FALSE;
969 }
970 }
971
972 return GL_TRUE;
973 }
974
975 GLboolean
976 nouveau_shader_pass0(GLcontext *ctx, nouveauShader *nvs)
977 {
978 nouveauContextPtr nmesa = NOUVEAU_CONTEXT(ctx);
979 struct gl_program *prog = (struct gl_program*)nvs;
980 struct gl_vertex_program *vp = (struct gl_vertex_program *)prog;
981 struct gl_fragment_program *fp = (struct gl_fragment_program *)prog;
982 struct pass0_rec *rec;
983 int ret = GL_FALSE;
984
985 NVSDBG("start: nvs=%p\n", nvs);
986
987 /* Previously detected an error, and haven't recieved new program
988 * string, so fail immediately.
989 */
990 if (nvs->error) {
991 NVSDBG("failed previous compile attempt, not retrying\n");
992 return GL_FALSE;
993 }
994
995 rec = CALLOC_STRUCT(pass0_rec);
996 if (!rec)
997 return GL_FALSE;
998
999 rec->next_temp = prog->NumTemporaries;
1000 nvs->pass_rec = rec;
1001
1002 nvs->program_tree = (nvsFragmentHeader*)
1003 pass0_create_subroutine(nvs, "program body");
1004 if (!nvs->program_tree) {
1005 FREE(rec);
1006 return GL_FALSE;
1007 }
1008
1009 switch (prog->Target) {
1010 case GL_VERTEX_PROGRAM_ARB:
1011 nvs->func = &nmesa->VPfunc;
1012
1013 if (vp->IsPositionInvariant)
1014 _mesa_insert_mvp_code(ctx, vp);
1015 pass0_rebase_mesa_consts(nvs);
1016
1017 if (!prog->String && ctx->Transform.ClipPlanesEnabled)
1018 pass0_vp_insert_ff_clip_planes(ctx, nvs);
1019
1020 pass0_build_attrib_map(nvs, vp);
1021 break;
1022 case GL_FRAGMENT_PROGRAM_ARB:
1023 nvs->func = &nmesa->FPfunc;
1024
1025 if (fp->FogOption != GL_NONE)
1026 _mesa_append_fog_code(ctx, fp);
1027 pass0_rebase_mesa_consts(nvs);
1028 break;
1029 default:
1030 fprintf(stderr, "Unknown program type %d", prog->Target);
1031 FREE(rec);
1032 /* DESTROY TREE!! */
1033 return GL_FALSE;
1034 }
1035 nvs->func->card_priv = &nvs->card_priv;
1036
1037 ret = pass0_translate_instructions(nvs, 0, 0, nvs->program_tree);
1038 if (ret)
1039 ret = pass0_resolve_mesa_consts(nvs);
1040
1041 /*XXX: if (!ret) DESTROY TREE!!! */
1042
1043 FREE(rec);
1044 return ret;
1045 }
1046