Merge remote branch 'origin/nvc0'
[mesa.git] / src / gallium / drivers / nv50 / nv50_program.c
1 /*
2 * Copyright 2010 Chrsitoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 /* #define NV50_PROGRAM_DEBUG */
24
25 #include "nv50_program.h"
26 #include "nv50_pc.h"
27 #include "nv50_context.h"
28
29 #include "pipe/p_shader_tokens.h"
30 #include "tgsi/tgsi_parse.h"
31 #include "tgsi/tgsi_util.h"
32 #include "tgsi/tgsi_dump.h"
33
34 static INLINE unsigned
35 bitcount4(const uint32_t val)
36 {
37 static const unsigned cnt[16]
38 = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
39 return cnt[val & 0xf];
40 }
41
42 static unsigned
43 nv50_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
44 {
45 unsigned mask = inst->Dst[0].Register.WriteMask;
46
47 switch (inst->Instruction.Opcode) {
48 case TGSI_OPCODE_COS:
49 case TGSI_OPCODE_SIN:
50 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
51 case TGSI_OPCODE_DP3:
52 return 0x7;
53 case TGSI_OPCODE_DP4:
54 case TGSI_OPCODE_DPH:
55 case TGSI_OPCODE_KIL: /* WriteMask ignored */
56 return 0xf;
57 case TGSI_OPCODE_DST:
58 return mask & (c ? 0xa : 0x6);
59 case TGSI_OPCODE_EX2:
60 case TGSI_OPCODE_EXP:
61 case TGSI_OPCODE_LG2:
62 case TGSI_OPCODE_LOG:
63 case TGSI_OPCODE_POW:
64 case TGSI_OPCODE_RCP:
65 case TGSI_OPCODE_RSQ:
66 case TGSI_OPCODE_SCS:
67 return 0x1;
68 case TGSI_OPCODE_IF:
69 return 0x1;
70 case TGSI_OPCODE_LIT:
71 return 0xb;
72 case TGSI_OPCODE_TEX:
73 case TGSI_OPCODE_TXB:
74 case TGSI_OPCODE_TXL:
75 case TGSI_OPCODE_TXP:
76 {
77 const struct tgsi_instruction_texture *tex;
78
79 assert(inst->Instruction.Texture);
80 tex = &inst->Texture;
81
82 mask = 0x7;
83 if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
84 inst->Instruction.Opcode != TGSI_OPCODE_TXD)
85 mask |= 0x8; /* bias, lod or proj */
86
87 switch (tex->Texture) {
88 case TGSI_TEXTURE_1D:
89 mask &= 0x9;
90 break;
91 case TGSI_TEXTURE_SHADOW1D:
92 mask &= 0x5;
93 break;
94 case TGSI_TEXTURE_2D:
95 mask &= 0xb;
96 break;
97 default:
98 break;
99 }
100 }
101 return mask;
102 case TGSI_OPCODE_XPD:
103 {
104 unsigned x = 0;
105 if (mask & 1) x |= 0x6;
106 if (mask & 2) x |= 0x5;
107 if (mask & 4) x |= 0x3;
108 return x;
109 }
110 default:
111 break;
112 }
113
114 return mask;
115 }
116
117 static void
118 nv50_indirect_inputs(struct nv50_translation_info *ti, int id)
119 {
120 int i, c;
121
122 for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
123 for (c = 0; c < 4; ++c)
124 ti->input_access[i][c] = id;
125
126 ti->indirect_inputs = TRUE;
127 }
128
129 static void
130 nv50_indirect_outputs(struct nv50_translation_info *ti, int id)
131 {
132 int i, c;
133
134 for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
135 for (c = 0; c < 4; ++c)
136 ti->output_access[i][c] = id;
137
138 ti->indirect_outputs = TRUE;
139 }
140
141 static void
142 prog_inst(struct nv50_translation_info *ti,
143 const struct tgsi_full_instruction *inst, int id)
144 {
145 const struct tgsi_dst_register *dst;
146 const struct tgsi_src_register *src;
147 int s, c, k;
148 unsigned mask;
149
150 if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
151 ti->subr[ti->subr_nr].pos = id - 1;
152 ti->subr[ti->subr_nr].id = ti->subr_nr + 1; /* id 0 is main program */
153 ++ti->subr_nr;
154 }
155
156 if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
157 dst = &inst->Dst[0].Register;
158
159 for (c = 0; c < 4; ++c) {
160 if (dst->Indirect)
161 nv50_indirect_outputs(ti, id);
162 if (!(dst->WriteMask & (1 << c)))
163 continue;
164 ti->output_access[dst->Index][c] = id;
165 }
166
167 if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
168 inst->Src[0].Register.File == TGSI_FILE_INPUT &&
169 dst->Index == ti->edgeflag_out)
170 ti->p->vp.edgeflag = inst->Src[0].Register.Index;
171 } else
172 if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
173 if (inst->Dst[0].Register.Indirect)
174 ti->store_to_memory = TRUE;
175 }
176
177 for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
178 src = &inst->Src[s].Register;
179 if (src->File == TGSI_FILE_TEMPORARY)
180 if (inst->Src[s].Register.Indirect)
181 ti->store_to_memory = TRUE;
182 if (src->File != TGSI_FILE_INPUT)
183 continue;
184 mask = nv50_tgsi_src_mask(inst, s);
185
186 if (inst->Src[s].Register.Indirect)
187 nv50_indirect_inputs(ti, id);
188
189 for (c = 0; c < 4; ++c) {
190 if (!(mask & (1 << c)))
191 continue;
192 k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
193 if (k <= TGSI_SWIZZLE_W)
194 ti->input_access[src->Index][k] = id;
195 }
196 }
197 }
198
199 /* Probably should introduce something like struct tgsi_function_declaration
200 * instead of trying to guess inputs/outputs.
201 */
202 static void
203 prog_subroutine_inst(struct nv50_subroutine *subr,
204 const struct tgsi_full_instruction *inst)
205 {
206 const struct tgsi_dst_register *dst;
207 const struct tgsi_src_register *src;
208 int s, c, k;
209 unsigned mask;
210
211 for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
212 src = &inst->Src[s].Register;
213 if (src->File != TGSI_FILE_TEMPORARY)
214 continue;
215 mask = nv50_tgsi_src_mask(inst, s);
216
217 assert(!inst->Src[s].Register.Indirect);
218
219 for (c = 0; c < 4; ++c) {
220 k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
221
222 if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
223 if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
224 subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
225 }
226 }
227
228 if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
229 dst = &inst->Dst[0].Register;
230
231 for (c = 0; c < 4; ++c)
232 if (dst->WriteMask & (1 << c))
233 subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
234 }
235 }
236
237 static void
238 prog_immediate(struct nv50_translation_info *ti,
239 const struct tgsi_full_immediate *imm)
240 {
241 int c;
242 unsigned n = ti->immd32_nr++;
243
244 assert(ti->immd32_nr <= ti->scan.immediate_count);
245
246 for (c = 0; c < 4; ++c)
247 ti->immd32[n * 4 + c] = imm->u[c].Uint;
248
249 ti->immd32_ty[n] = imm->Immediate.DataType;
250 }
251
252 static INLINE unsigned
253 translate_interpolate(const struct tgsi_full_declaration *decl)
254 {
255 unsigned mode;
256
257 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
258 mode = NV50_INTERP_FLAT;
259 else
260 if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
261 mode = 0;
262 else
263 mode = NV50_INTERP_LINEAR;
264
265 if (decl->Declaration.Centroid)
266 mode |= NV50_INTERP_CENTROID;
267
268 return mode;
269 }
270
271 static void
272 prog_decl(struct nv50_translation_info *ti,
273 const struct tgsi_full_declaration *decl)
274 {
275 unsigned i, first, last, sn = 0, si = 0;
276
277 first = decl->Range.First;
278 last = decl->Range.Last;
279
280 if (decl->Declaration.Semantic) {
281 sn = decl->Semantic.Name;
282 si = decl->Semantic.Index;
283 }
284
285 switch (decl->Declaration.File) {
286 case TGSI_FILE_INPUT:
287 for (i = first; i <= last; ++i)
288 ti->interp_mode[i] = translate_interpolate(decl);
289
290 if (!decl->Declaration.Semantic)
291 break;
292
293 for (i = first; i <= last; ++i) {
294 ti->p->in[i].sn = sn;
295 ti->p->in[i].si = si;
296 }
297
298 switch (sn) {
299 case TGSI_SEMANTIC_FACE:
300 break;
301 case TGSI_SEMANTIC_COLOR:
302 if (ti->p->type == PIPE_SHADER_FRAGMENT)
303 ti->p->vp.bfc[si] = first;
304 break;
305 }
306 break;
307 case TGSI_FILE_OUTPUT:
308 if (!decl->Declaration.Semantic)
309 break;
310
311 for (i = first; i <= last; ++i) {
312 ti->p->out[i].sn = sn;
313 ti->p->out[i].si = si;
314 }
315
316 switch (sn) {
317 case TGSI_SEMANTIC_BCOLOR:
318 ti->p->vp.bfc[si] = first;
319 break;
320 case TGSI_SEMANTIC_PSIZE:
321 ti->p->vp.psiz = first;
322 break;
323 case TGSI_SEMANTIC_EDGEFLAG:
324 ti->edgeflag_out = first;
325 break;
326 default:
327 break;
328 }
329 break;
330 case TGSI_FILE_SYSTEM_VALUE:
331 switch (decl->Semantic.Name) {
332 case TGSI_SEMANTIC_FACE:
333 break;
334 case TGSI_SEMANTIC_INSTANCEID:
335 break;
336 case TGSI_SEMANTIC_PRIMID:
337 break;
338 /*
339 case TGSI_SEMANTIC_PRIMIDIN:
340 break;
341 case TGSI_SEMANTIC_VERTEXID:
342 break;
343 */
344 default:
345 break;
346 }
347 break;
348 case TGSI_FILE_CONSTANT:
349 ti->p->parm_size = MAX2(ti->p->parm_size, (last + 1) * 16);
350 break;
351 case TGSI_FILE_ADDRESS:
352 case TGSI_FILE_SAMPLER:
353 case TGSI_FILE_TEMPORARY:
354 break;
355 default:
356 assert(0);
357 break;
358 }
359 }
360
361 static int
362 nv50_vertprog_prepare(struct nv50_translation_info *ti)
363 {
364 struct nv50_program *p = ti->p;
365 int i, c;
366 unsigned num_inputs = 0;
367
368 ti->input_file = NV_FILE_MEM_S;
369 ti->output_file = NV_FILE_OUT;
370
371 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
372 p->in[i].id = i;
373 p->in[i].hw = num_inputs;
374
375 for (c = 0; c < 4; ++c) {
376 if (!ti->input_access[i][c])
377 continue;
378 ti->input_map[i][c] = num_inputs++;
379 p->vp.attrs[(4 * i + c) / 32] |= 1 << ((i * 4 + c) % 32);
380 }
381 }
382
383 for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
384 p->out[i].id = i;
385 p->out[i].hw = p->max_out;
386
387 for (c = 0; c < 4; ++c) {
388 if (!ti->output_access[i][c])
389 continue;
390 ti->output_map[i][c] = p->max_out++;
391 p->out[i].mask |= 1 << c;
392 }
393 }
394
395 if (p->vp.psiz < 0x40)
396 p->vp.psiz = p->out[p->vp.psiz].hw;
397
398 return 0;
399 }
400
401 static int
402 nv50_fragprog_prepare(struct nv50_translation_info *ti)
403 {
404 struct nv50_program *p = ti->p;
405 int i, j, c;
406 unsigned nvary, nintp, depr;
407 unsigned n = 0, m = 0, skip = 0;
408 ubyte sn[16], si[16];
409
410 /* FP flags */
411
412 if (ti->scan.writes_z) {
413 p->fp.flags[1] = 0x11;
414 p->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z;
415 }
416
417 if (ti->scan.uses_kill)
418 p->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
419
420 /* FP inputs */
421
422 ti->input_file = NV_FILE_MEM_V;
423 ti->output_file = NV_FILE_GPR;
424
425 /* count non-flat inputs, save semantic info */
426 for (i = 0; i < p->in_nr; ++i) {
427 m += (ti->interp_mode[i] & NV50_INTERP_FLAT) ? 0 : 1;
428 sn[i] = p->in[i].sn;
429 si[i] = p->in[i].si;
430 }
431
432 /* reorder p->in[] so that non-flat inputs are first and
433 * kick out special inputs that don't use VP/GP_RESULT_MAP
434 */
435 nintp = 0;
436 for (i = 0; i < p->in_nr; ++i) {
437 if (sn[i] == TGSI_SEMANTIC_POSITION) {
438 for (c = 0; c < 4; ++c) {
439 ti->input_map[i][c] = nintp;
440 if (ti->input_access[i][c]) {
441 p->fp.interp |= 1 << (24 + c);
442 ++nintp;
443 }
444 }
445 skip++;
446 continue;
447 } else
448 if (sn[i] == TGSI_SEMANTIC_FACE) {
449 ti->input_map[i][0] = 255;
450 skip++;
451 continue;
452 }
453
454 j = (ti->interp_mode[i] & NV50_INTERP_FLAT) ? m++ : n++;
455
456 if (sn[i] == TGSI_SEMANTIC_COLOR)
457 p->vp.bfc[si[i]] = j;
458
459 p->in[j].linear = (ti->interp_mode[i] & NV50_INTERP_LINEAR) ? 1 : 0;
460 p->in[j].id = i;
461 p->in[j].sn = sn[i];
462 p->in[j].si = si[i];
463 }
464 assert(n <= m);
465 p->in_nr -= skip;
466
467 if (!(p->fp.interp & (8 << 24))) {
468 p->fp.interp |= (8 << 24);
469 ++nintp;
470 }
471
472 p->fp.colors = (1 << 24) | 4; /* CLAMP, FFC0_ID = 4 */
473
474 for (i = 0; i < p->in_nr; ++i) {
475 int j = p->in[i].id;
476 p->in[i].hw = nintp;
477
478 for (c = 0; c < 4; ++c) {
479 if (!ti->input_access[j][c])
480 continue;
481 p->in[i].mask |= 1 << c;
482 ti->input_map[j][c] = nintp++;
483 }
484 /* count color inputs */
485 if (i == p->vp.bfc[0] || i == p->vp.bfc[1])
486 p->fp.colors += bitcount4(p->in[i].mask) << 16;
487 }
488 nintp -= bitcount4(p->fp.interp >> 24); /* subtract position inputs */
489 nvary = nintp;
490 if (n < m)
491 nvary -= p->in[n].hw;
492
493 p->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT;
494 p->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT;
495
496 /* FP outputs */
497
498 if (p->out_nr > (1 + (ti->scan.writes_z ? 1 : 0)))
499 p->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS;
500
501 depr = p->out_nr;
502 for (i = 0; i < p->out_nr; ++i) {
503 p->out[i].id = i;
504 if (p->out[i].sn == TGSI_SEMANTIC_POSITION) {
505 depr = i;
506 continue;
507 }
508 p->out[i].hw = p->max_out;
509 p->out[i].mask = 0xf;
510
511 for (c = 0; c < 4; ++c)
512 ti->output_map[i][c] = p->max_out++;
513 }
514 if (depr < p->out_nr) {
515 p->out[depr].mask = 0x4;
516 p->out[depr].hw = ti->output_map[depr][2] = p->max_out++;
517 } else {
518 /* allowed values are 1, 4, 5, 8, 9, ... */
519 p->max_out = MAX2(4, p->max_out);
520 }
521
522 return 0;
523 }
524
525 static int
526 nv50_geomprog_prepare(struct nv50_translation_info *ti)
527 {
528 ti->input_file = NV_FILE_MEM_S;
529 ti->output_file = NV_FILE_OUT;
530
531 assert(0);
532 return 1;
533 }
534
535 static int
536 nv50_prog_scan(struct nv50_translation_info *ti)
537 {
538 struct nv50_program *p = ti->p;
539 struct tgsi_parse_context parse;
540 int ret, i;
541
542 p->vp.edgeflag = 0x40;
543 p->vp.psiz = 0x40;
544 p->vp.bfc[0] = 0x40;
545 p->vp.bfc[1] = 0x40;
546 p->gp.primid = 0x80;
547
548 tgsi_scan_shader(p->pipe.tokens, &ti->scan);
549
550 #ifdef NV50_PROGRAM_DEBUG
551 tgsi_dump(p->pipe.tokens, 0);
552 #endif
553
554 ti->subr =
555 CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
556
557 ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
558 ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
559
560 ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0]));
561
562 tgsi_parse_init(&parse, p->pipe.tokens);
563 while (!tgsi_parse_end_of_tokens(&parse)) {
564 tgsi_parse_token(&parse);
565
566 switch (parse.FullToken.Token.Type) {
567 case TGSI_TOKEN_TYPE_IMMEDIATE:
568 prog_immediate(ti, &parse.FullToken.FullImmediate);
569 break;
570 case TGSI_TOKEN_TYPE_DECLARATION:
571 prog_decl(ti, &parse.FullToken.FullDeclaration);
572 break;
573 case TGSI_TOKEN_TYPE_INSTRUCTION:
574 ti->insns[ti->inst_nr] = parse.FullToken.FullInstruction;
575 prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->inst_nr);
576 break;
577 }
578 }
579
580 /* Scan to determine which registers are inputs/outputs of a subroutine. */
581 for (i = 0; i < ti->subr_nr; ++i) {
582 int pc = ti->subr[i].id;
583 while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
584 prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
585 }
586
587 p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1;
588 p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1;
589
590 switch (p->type) {
591 case PIPE_SHADER_VERTEX:
592 ret = nv50_vertprog_prepare(ti);
593 break;
594 case PIPE_SHADER_FRAGMENT:
595 ret = nv50_fragprog_prepare(ti);
596 break;
597 case PIPE_SHADER_GEOMETRY:
598 ret = nv50_geomprog_prepare(ti);
599 break;
600 default:
601 assert(!"unsupported program type");
602 ret = -1;
603 break;
604 }
605
606 assert(!ret);
607 return ret;
608 }
609
610 boolean
611 nv50_program_translate(struct nv50_program *p)
612 {
613 struct nv50_translation_info *ti;
614 int ret;
615
616 ti = CALLOC_STRUCT(nv50_translation_info);
617 ti->p = p;
618
619 ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
620
621 ret = nv50_prog_scan(ti);
622 if (ret) {
623 NOUVEAU_ERR("unsupported shader program\n");
624 goto out;
625 }
626
627 ret = nv50_generate_code(ti);
628 if (ret) {
629 NOUVEAU_ERR("error during shader translation\n");
630 goto out;
631 }
632
633 out:
634 if (ti->immd32)
635 FREE(ti->immd32);
636 if (ti->immd32_ty)
637 FREE(ti->immd32_ty);
638 if (ti->insns)
639 FREE(ti->insns);
640 if (ti->subr)
641 FREE(ti->subr);
642 FREE(ti);
643 return ret ? FALSE : TRUE;
644 }
645
646 void
647 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
648 {
649 if (p->res)
650 nouveau_resource_free(&p->res);
651
652 if (p->code)
653 FREE(p->code);
654
655 if (p->fixups)
656 FREE(p->fixups);
657
658 p->translated = FALSE;
659 }