nv50: fix PSIZ and PRIMID mapping
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 /*
2 * Copyright 2010 Chrsitoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_program.h"
24 #include "nv50_pc.h"
25 #include "nv50_context.h"
26
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_util.h"
30 #include "tgsi/tgsi_dump.h"
31
32 static INLINE unsigned
33 bitcount4(const uint32_t val)
34 {
35 static const unsigned cnt[16]
36 = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
37 return cnt[val & 0xf];
38 }
39
40 static unsigned
41 nv50_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
42 {
43 unsigned mask = inst->Dst[0].Register.WriteMask;
44
45 switch (inst->Instruction.Opcode) {
46 case TGSI_OPCODE_COS:
47 case TGSI_OPCODE_SIN:
48 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
49 case TGSI_OPCODE_DP3:
50 return 0x7;
51 case TGSI_OPCODE_DP4:
52 case TGSI_OPCODE_DPH:
53 case TGSI_OPCODE_KIL: /* WriteMask ignored */
54 return 0xf;
55 case TGSI_OPCODE_DST:
56 return mask & (c ? 0xa : 0x6);
57 case TGSI_OPCODE_EX2:
58 case TGSI_OPCODE_EXP:
59 case TGSI_OPCODE_LG2:
60 case TGSI_OPCODE_LOG:
61 case TGSI_OPCODE_POW:
62 case TGSI_OPCODE_RCP:
63 case TGSI_OPCODE_RSQ:
64 case TGSI_OPCODE_SCS:
65 return 0x1;
66 case TGSI_OPCODE_IF:
67 return 0x1;
68 case TGSI_OPCODE_LIT:
69 return 0xb;
70 case TGSI_OPCODE_TEX:
71 case TGSI_OPCODE_TXB:
72 case TGSI_OPCODE_TXL:
73 case TGSI_OPCODE_TXP:
74 {
75 const struct tgsi_instruction_texture *tex;
76
77 assert(inst->Instruction.Texture);
78 tex = &inst->Texture;
79
80 mask = 0x7;
81 if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
82 inst->Instruction.Opcode != TGSI_OPCODE_TXD)
83 mask |= 0x8; /* bias, lod or proj */
84
85 switch (tex->Texture) {
86 case TGSI_TEXTURE_1D:
87 mask &= 0x9;
88 break;
89 case TGSI_TEXTURE_SHADOW1D:
90 mask &= 0x5;
91 break;
92 case TGSI_TEXTURE_2D:
93 mask &= 0xb;
94 break;
95 default:
96 break;
97 }
98 }
99 return mask;
100 case TGSI_OPCODE_XPD:
101 {
102 unsigned x = 0;
103 if (mask & 1) x |= 0x6;
104 if (mask & 2) x |= 0x5;
105 if (mask & 4) x |= 0x3;
106 return x;
107 }
108 default:
109 break;
110 }
111
112 return mask;
113 }
114
115 static void
116 nv50_indirect_inputs(struct nv50_translation_info *ti, int id)
117 {
118 int i, c;
119
120 for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
121 for (c = 0; c < 4; ++c)
122 ti->input_access[i][c] = id;
123
124 ti->indirect_inputs = TRUE;
125 }
126
127 static void
128 nv50_indirect_outputs(struct nv50_translation_info *ti, int id)
129 {
130 int i, c;
131
132 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
133 for (c = 0; c < 4; ++c)
134 ti->output_access[i][c] = id;
135
136 ti->indirect_outputs = TRUE;
137 }
138
139 static void
140 prog_inst(struct nv50_translation_info *ti,
141 const struct tgsi_full_instruction *inst, int id)
142 {
143 const struct tgsi_dst_register *dst;
144 const struct tgsi_src_register *src;
145 int s, c, k;
146 unsigned mask;
147
148 if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
149 for (c = 0; c < 4; ++c) {
150 dst = &inst->Dst[0].Register;
151 if (inst->Dst[0].Register.Indirect)
152 nv50_indirect_outputs(ti, id);
153 if (!(dst->WriteMask & (1 << c)))
154 continue;
155 ti->output_access[dst->Index][c] = id;
156 }
157
158 if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
159 inst->Src[0].Register.File == TGSI_FILE_INPUT &&
160 dst->Index == ti->edgeflag_out)
161 ti->p->vp.edgeflag = inst->Src[0].Register.Index;
162 }
163
164 for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
165 src = &inst->Src[s].Register;
166 if (src->File != TGSI_FILE_INPUT)
167 continue;
168 mask = nv50_tgsi_src_mask(inst, s);
169
170 if (inst->Src[s].Register.Indirect)
171 nv50_indirect_inputs(ti, id);
172
173 for (c = 0; c < 4; ++c) {
174 if (!(mask & (1 << c)))
175 continue;
176 k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
177 if (k <= TGSI_SWIZZLE_W)
178 ti->input_access[src->Index][k] = id;
179 }
180 }
181 }
182
183 static void
184 prog_immediate(struct nv50_translation_info *ti,
185 const struct tgsi_full_immediate *imm)
186 {
187 int c;
188 unsigned n = ++ti->immd32_nr;
189
190 tgsi_dump_immediate(imm);
191
192 if (n == (1 << (ffs(n) - 1)))
193 ti->immd32 = REALLOC(ti->immd32, (n / 2) * 16, (n * 2) * 16);
194
195 for (c = 0; c < 4; ++c)
196 ti->immd32[(n - 1) * 4 + c] = imm->u[c].Uint;
197 }
198
199 static INLINE unsigned
200 translate_interpolate(const struct tgsi_full_declaration *decl)
201 {
202 unsigned mode;
203
204 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
205 mode = NV50_INTERP_FLAT;
206 else
207 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
208 mode = 0;
209 else
210 mode = NV50_INTERP_LINEAR;
211
212 if (decl->Declaration.Centroid)
213 mode |= NV50_INTERP_CENTROID;
214
215 return mode;
216 }
217
218 static void
219 prog_decl(struct nv50_translation_info *ti,
220 const struct tgsi_full_declaration *decl)
221 {
222 unsigned i, first, last, sn = 0, si = 0;
223
224 first = decl->Range.First;
225 last = decl->Range.Last;
226
227 if (decl->Declaration.Semantic) {
228 sn = decl->Semantic.Name;
229 si = decl->Semantic.Index;
230 }
231 tgsi_dump_declaration(decl);
232
233 switch (decl->Declaration.File) {
234 case TGSI_FILE_INPUT:
235 for (i = first; i <= last; ++i)
236 ti->interp_mode[i] = translate_interpolate(decl);
237
238 if (!decl->Declaration.Semantic)
239 break;
240
241 for (i = first; i <= last; ++i) {
242 ti->p->in[i].sn = sn;
243 ti->p->in[i].si = si;
244 }
245
246 switch (sn) {
247 case TGSI_SEMANTIC_FACE:
248 break;
249 case TGSI_SEMANTIC_COLOR:
250 if (ti->p->type == PIPE_SHADER_FRAGMENT)
251 ti->p->vp.bfc[si] = first;
252 break;
253 }
254 break;
255 case TGSI_FILE_OUTPUT:
256 if (!decl->Declaration.Semantic)
257 break;
258
259 for (i = first; i <= last; ++i) {
260 ti->p->out[i].sn = sn;
261 ti->p->out[i].si = si;
262 }
263
264 switch (sn) {
265 case TGSI_SEMANTIC_BCOLOR:
266 ti->p->vp.bfc[si] = first;
267 break;
268 case TGSI_SEMANTIC_PSIZE:
269 ti->p->vp.psiz = first;
270 break;
271 case TGSI_SEMANTIC_EDGEFLAG:
272 ti->edgeflag_out = first;
273 break;
274 default:
275 break;
276 }
277 break;
278 case TGSI_FILE_SYSTEM_VALUE:
279 switch (decl->Semantic.Name) {
280 case TGSI_SEMANTIC_FACE:
281 break;
282 case TGSI_SEMANTIC_INSTANCEID:
283 break;
284 case TGSI_SEMANTIC_PRIMID:
285 break;
286 /*
287 case TGSI_SEMANTIC_PRIMIDIN:
288 break;
289 case TGSI_SEMANTIC_VERTEXID:
290 break;
291 */
292 default:
293 break;
294 }
295 break;
296 case TGSI_FILE_CONSTANT:
297 ti->p->parm_size = MAX2(ti->p->parm_size, (last + 1) * 16);
298 break;
299 case TGSI_FILE_ADDRESS:
300 case TGSI_FILE_SAMPLER:
301 case TGSI_FILE_TEMPORARY:
302 break;
303 default:
304 assert(0);
305 break;
306 }
307 }
308
309 static int
310 nv50_vertprog_prepare(struct nv50_translation_info *ti)
311 {
312 struct nv50_program *p = ti->p;
313 int i, c;
314 unsigned num_inputs = 0;
315
316 ti->input_file = NV_FILE_MEM_S;
317 ti->output_file = NV_FILE_OUT;
318
319 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
320 p->in[i].id = i;
321 p->in[i].hw = num_inputs;
322
323 for (c = 0; c < 4; ++c) {
324 if (!ti->input_access[i][c])
325 continue;
326 ti->input_map[i][c] = num_inputs++;
327 p->vp.attrs[(4 * i + c) / 32] |= 1 << ((i * 4 + c) % 32);
328 }
329 }
330
331 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
332 p->out[i].id = i;
333 p->out[i].hw = p->max_out;
334
335 for (c = 0; c < 4; ++c) {
336 if (!ti->output_access[i][c])
337 continue;
338 ti->output_map[i][c] = p->max_out++;
339 p->out[i].mask |= 1 << c;
340 }
341 }
342
343 if (p->vp.psiz < 0x40)
344 p->vp.psiz = p->out[p->vp.psiz].hw;
345
346 return 0;
347 }
348
349 static int
350 nv50_fragprog_prepare(struct nv50_translation_info *ti)
351 {
352 struct nv50_program *p = ti->p;
353 int i, j, c;
354 unsigned nvary, nintp, depr;
355 unsigned n = 0, m = 0, skip = 0;
356 ubyte sn[16], si[16];
357
358 /* FP flags */
359
360 if (ti->scan.writes_z) {
361 p->fp.flags[1] = 0x11;
362 p->fp.flags[0] |= NV50TCL_FP_CONTROL_EXPORTS_Z;
363 }
364
365 if (ti->scan.uses_kill)
366 p->fp.flags[0] |= NV50TCL_FP_CONTROL_USES_KIL;
367
368 /* FP inputs */
369
370 ti->input_file = NV_FILE_MEM_V;
371 ti->output_file = NV_FILE_GPR;
372
373 /* count non-flat inputs, save semantic info */
374 for (i = 0; i < p->in_nr; ++i) {
375 m += (ti->interp_mode[i] & NV50_INTERP_FLAT) ? 0 : 1;
376 sn[i] = p->in[i].sn;
377 si[i] = p->in[i].si;
378 }
379
380 /* reorder p->in[] so that non-flat inputs are first and
381 * kick out special inputs that don't use VP/GP_RESULT_MAP
382 */
383 nintp = 0;
384 for (i = 0; i < p->in_nr; ++i) {
385 if (sn[i] == TGSI_SEMANTIC_POSITION) {
386 for (c = 0; c < 4; ++c) {
387 ti->input_map[i][c] = nintp;
388 if (ti->input_access[i][c]) {
389 p->fp.interp |= 1 << (24 + c);
390 ++nintp;
391 }
392 }
393 skip++;
394 continue;
395 } else
396 if (sn[i] == TGSI_SEMANTIC_FACE) {
397 ti->input_map[i][0] = 255;
398 skip++;
399 continue;
400 }
401
402 j = (ti->interp_mode[i] & NV50_INTERP_FLAT) ? m++ : n++;
403
404 if (sn[i] == TGSI_SEMANTIC_COLOR)
405 p->vp.bfc[si[i]] = j;
406
407 p->in[j].linear = (ti->interp_mode[i] & NV50_INTERP_LINEAR) ? 1 : 0;
408 p->in[j].id = i;
409 p->in[j].sn = sn[i];
410 p->in[j].si = si[i];
411 }
412 assert(n <= m);
413 p->in_nr -= skip;
414
415 if (!(p->fp.interp & (8 << 24))) {
416 p->fp.interp |= (8 << 24);
417 ++nintp;
418 }
419
420 p->fp.colors = (1 << 24) | 4; /* CLAMP, FFC0_ID = 4 */
421
422 for (i = 0; i < p->in_nr; ++i) {
423 int j = p->in[i].id;
424 p->in[i].hw = nintp;
425
426 for (c = 0; c < 4; ++c) {
427 if (!ti->input_access[j][c])
428 continue;
429 p->in[i].mask |= 1 << c;
430 ti->input_map[j][c] = nintp++;
431 }
432 /* count color inputs */
433 if (i == p->vp.bfc[0] || i == p->vp.bfc[1])
434 p->fp.colors += bitcount4(p->in[i].mask) << 16;
435 }
436 nintp -= bitcount4(p->fp.interp >> 24); /* subtract position inputs */
437 nvary = nintp;
438 if (n < m)
439 nvary -= p->in[n].hw;
440
441 p->fp.interp |= nvary << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_SHIFT;
442 p->fp.interp |= nintp << NV50TCL_FP_INTERPOLANT_CTRL_COUNT_SHIFT;
443
444 /* FP outputs */
445
446 if (p->out_nr > (1 + (ti->scan.writes_z ? 1 : 0)))
447 p->fp.flags[0] |= NV50TCL_FP_CONTROL_MULTIPLE_RESULTS;
448
449 depr = p->out_nr;
450 for (i = 0; i < p->out_nr; ++i) {
451 p->out[i].id = i;
452 if (p->out[i].sn == TGSI_SEMANTIC_POSITION) {
453 depr = i;
454 continue;
455 }
456 p->out[i].hw = p->max_out;
457 p->out[i].mask = 0xf;
458
459 for (c = 0; c < 4; ++c)
460 ti->output_map[i][c] = p->max_out++;
461 }
462 if (depr < p->out_nr) {
463 p->out[depr].mask = 0x4;
464 p->out[depr].hw = p->max_out++;
465 }
466
467 return 0;
468 }
469
470 static int
471 nv50_geomprog_prepare(struct nv50_translation_info *ti)
472 {
473 ti->input_file = NV_FILE_MEM_S;
474 ti->output_file = NV_FILE_OUT;
475
476 assert(0);
477 return 1;
478 }
479
480 static int
481 nv50_prog_scan(struct nv50_translation_info *ti)
482 {
483 struct nv50_program *p = ti->p;
484 struct tgsi_parse_context parse;
485 int ret;
486
487 p->vp.psiz = 0x40;
488 p->vp.bfc[0] = 0x40;
489 p->vp.bfc[1] = 0x40;
490 p->gp.primid = 0x80;
491
492 tgsi_scan_shader(p->pipe.tokens, &ti->scan);
493
494 tgsi_parse_init(&parse, p->pipe.tokens);
495 while (!tgsi_parse_end_of_tokens(&parse)) {
496 tgsi_parse_token(&parse);
497
498 switch (parse.FullToken.Token.Type) {
499 case TGSI_TOKEN_TYPE_IMMEDIATE:
500 prog_immediate(ti, &parse.FullToken.FullImmediate);
501 break;
502 case TGSI_TOKEN_TYPE_DECLARATION:
503 prog_decl(ti, &parse.FullToken.FullDeclaration);
504 break;
505 case TGSI_TOKEN_TYPE_INSTRUCTION:
506 prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->inst_nr);
507 break;
508 }
509 }
510
511 p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1;
512 p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1;
513
514 switch (p->type) {
515 case PIPE_SHADER_VERTEX:
516 ret = nv50_vertprog_prepare(ti);
517 break;
518 case PIPE_SHADER_FRAGMENT:
519 ret = nv50_fragprog_prepare(ti);
520 break;
521 case PIPE_SHADER_GEOMETRY:
522 ret = nv50_geomprog_prepare(ti);
523 break;
524 default:
525 assert(!"unsupported program type");
526 ret = -1;
527 break;
528 }
529
530 assert(!ret);
531 return ret;
532 }
533
534 boolean
535 nv50_program_tx(struct nv50_program *p)
536 {
537 struct nv50_translation_info *ti;
538 int ret;
539
540 ti = CALLOC_STRUCT(nv50_translation_info);
541 ti->p = p;
542
543 ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
544
545 ret = nv50_prog_scan(ti);
546 if (ret) {
547 NOUVEAU_ERR("unsupported shader program\n");
548 goto out;
549 }
550
551 ret = nv50_generate_code(ti);
552 if (ret) {
553 NOUVEAU_ERR("error during shader translation\n");
554 goto out;
555 }
556
557 out:
558 if (ti->immd32)
559 FREE(ti->immd32);
560 FREE(ti);
561 return ret ? FALSE : TRUE;
562 }
563
564 void
565 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
566 {
567 nouveau_bo_ref(NULL, &p->bo);
568
569 so_ref(NULL, &p->so);
570
571 if (p->code)
572 FREE(p->code);
573
574 p->translated = FALSE;
575 }