Merge branch 'master' of ssh://git.freedesktop.org/git/mesa/mesa into pipe-video
[mesa.git] / src / gallium / drivers / nv50 / nv50_pc_emit.c
1 /*
2 * Copyright 2010 Christoph Bumiller
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 #include "nv50_context.h"
24 #include "nv50_pc.h"
25
26 #define FLAGS_CC_SHIFT 7
27 #define FLAGS_ID_SHIFT 12
28 #define FLAGS_WR_ID_SHIFT 4
29 #define FLAGS_CC_MASK (0x1f << FLAGS_CC_SHIFT)
30 #define FLAGS_ID_MASK (0x03 << FLAGS_ID_SHIFT)
31 #define FLAGS_WR_EN (1 << 6)
32 #define FLAGS_WR_ID_MASK (0x3 << FLAGS_WR_ID_SHIFT)
33
34 #define NV50_FIXUP_CODE_RELOC 0
35 #define NV50_FIXUP_DATA_RELOC 1
36
37 struct nv50_fixup {
38 uint8_t type;
39 int8_t shift;
40 uint32_t mask;
41 uint32_t data;
42 uint32_t offset;
43 };
44
45 void
46 nv50_relocate_program(struct nv50_program *p,
47 uint32_t code_base,
48 uint32_t data_base)
49 {
50 struct nv50_fixup *f = (struct nv50_fixup *)p->fixups;
51 unsigned i;
52
53 for (i = 0; i < p->num_fixups; ++i) {
54 uint32_t data;
55
56 switch (f[i].type) {
57 case NV50_FIXUP_CODE_RELOC: data = code_base + f[i].data; break;
58 case NV50_FIXUP_DATA_RELOC: data = data_base + f[i].data; break;
59 default:
60 data = f[i].data;
61 break;
62 }
63 data = (f[i].shift < 0) ? (data >> -f[i].shift) : (data << f[i].shift);
64
65 p->code[f[i].offset / 4] &= ~f[i].mask;
66 p->code[f[i].offset / 4] |= data & f[i].mask;
67 }
68 }
69
70 static void
71 new_fixup(struct nv_pc *pc, uint8_t ty, int w, uint32_t data, uint32_t m, int s)
72 {
73 struct nv50_fixup *f;
74
75 const unsigned size = sizeof(struct nv50_fixup);
76 const unsigned n = pc->num_fixups;
77
78 if (!(n % 8))
79 pc->fixups = REALLOC(pc->fixups, n * size, (n + 8) * size);
80
81 f = (struct nv50_fixup *)pc->fixups;
82
83 f[n].offset = (pc->bin_pos + w) * 4;
84 f[n].type = ty;
85 f[n].data = data;
86 f[n].mask = m;
87 f[n].shift = s;
88
89 ++pc->num_fixups;
90 }
91
92 const ubyte nv50_inst_min_size_tab[NV_OP_COUNT] =
93 {
94 0, 0, 0, 8, 8, 4, 4, 4, 8, 4, 4, 8, 8, 8, 8, 8, /* 15 */
95 8, 8, 8, 4, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, /* 31 */
96 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, /* 47 */
97 4, 8, 8, 8, 8, 8, 0, 0, 8
98 };
99
100 unsigned
101 nv50_inst_min_size(struct nv_instruction *i)
102 {
103 int n;
104
105 if (nv50_inst_min_size_tab[i->opcode] > 4)
106 return 8;
107
108 if (i->def[0] && i->def[0]->reg.file != NV_FILE_GPR)
109 return 8;
110 if (i->def[0]->join->reg.id > 63)
111 return 8;
112
113 for (n = 0; n < 3; ++n) {
114 if (!i->src[n])
115 break;
116 if (i->src[n]->value->reg.file != NV_FILE_GPR &&
117 i->src[n]->value->reg.file != NV_FILE_MEM_V)
118 return 8;
119 if (i->src[n]->value->reg.id > 63)
120 return 8;
121 }
122
123 if (i->flags_def || i->flags_src || i->src[4])
124 return 8;
125
126 if (i->is_join)
127 return 8;
128
129 if (i->src[2]) {
130 if (i->saturate || i->src[2]->mod)
131 return 8;
132 if (i->src[0]->mod ^ i->src[1]->mod)
133 return 8;
134 if ((i->src[0]->mod | i->src[1]->mod) & NV_MOD_ABS)
135 return 8;
136 if (i->def[0]->join->reg.id < 0 ||
137 i->def[0]->join->reg.id != i->src[2]->value->join->reg.id)
138 return 8;
139 }
140
141 return nv50_inst_min_size_tab[i->opcode];
142 }
143
144 static INLINE ubyte
145 STYPE(struct nv_instruction *nvi, int s)
146 {
147 return nvi->src[s]->typecast;
148 }
149
150 static INLINE ubyte
151 DTYPE(struct nv_instruction *nvi, int d)
152 {
153 return nvi->def[d]->reg.type;
154 }
155
156 static INLINE struct nv_reg *
157 SREG(struct nv_ref *ref)
158 {
159 return &ref->value->join->reg;
160 }
161
162 static INLINE struct nv_reg *
163 DREG(struct nv_value *val)
164 {
165 return &val->join->reg;
166 }
167
168 static INLINE ubyte
169 SFILE(struct nv_instruction *nvi, int s)
170 {
171 return nvi->src[s]->value->reg.file;
172 }
173
174 static INLINE ubyte
175 DFILE(struct nv_instruction *nvi, int d)
176 {
177 return nvi->def[0]->reg.file;
178 }
179
180 static INLINE void
181 SID(struct nv_pc *pc, struct nv_ref *ref, int pos)
182 {
183 pc->emit[pos / 32] |= SREG(ref)->id << (pos % 32);
184 }
185
186 static INLINE void
187 DID(struct nv_pc *pc, struct nv_value *val, int pos)
188 {
189 pc->emit[pos / 32] |= DREG(val)->id << (pos % 32);
190 }
191
192 static INLINE uint32_t
193 get_immd_u32(struct nv_ref *ref)
194 {
195 assert(ref->value->reg.file == NV_FILE_IMM);
196 return ref->value->reg.imm.u32;
197 }
198
199 static INLINE void
200 set_immd_u32(struct nv_pc *pc, uint32_t u32)
201 {
202 pc->emit[1] |= 3;
203 pc->emit[0] |= (u32 & 0x3f) << 16;
204 pc->emit[1] |= (u32 >> 6) << 2;
205 }
206
207 static INLINE void
208 set_immd(struct nv_pc *pc, struct nv_ref *ref)
209 {
210 assert(ref->value->reg.file == NV_FILE_IMM);
211 set_immd_u32(pc, get_immd_u32(ref));
212 }
213
214 /* Allocate data in immediate buffer, if we want to load the immediate
215 * for a constant buffer instead of inlining it into the code.
216 */
217 static void
218 nv_pc_alloc_immd(struct nv_pc *pc, struct nv_ref *ref)
219 {
220 uint32_t i, val = get_immd_u32(ref);
221
222 for (i = 0; i < pc->immd_count; ++i)
223 if (pc->immd_buf[i] == val)
224 break;
225
226 if (i == pc->immd_count) {
227 if (!(pc->immd_count % 8))
228 pc->immd_buf = REALLOC(pc->immd_buf,
229 pc->immd_count * 4, (pc->immd_count + 8) * 4);
230 pc->immd_buf[pc->immd_count++] = val;
231 }
232
233 SREG(ref)->id = i;
234 }
235
236 static INLINE void
237 set_pred(struct nv_pc *pc, struct nv_instruction *i)
238 {
239 assert(!(pc->emit[1] & 0x00003f80));
240
241 pc->emit[1] |= i->cc << 7;
242 if (i->flags_src)
243 pc->emit[1] |= SREG(i->flags_src)->id << 12;
244 }
245
246 static INLINE void
247 set_pred_wr(struct nv_pc *pc, struct nv_instruction *i)
248 {
249 assert(!(pc->emit[1] & 0x00000070));
250
251 if (i->flags_def)
252 pc->emit[1] |= (DREG(i->flags_def)->id << 4) | 0x40;
253 }
254
255 static INLINE void
256 set_a16_bits(struct nv_pc *pc, uint id)
257 {
258 ++id; /* $a0 is always 0 */
259 pc->emit[0] |= (id & 3) << 26;
260 pc->emit[1] |= id & 4;
261 }
262
263 static INLINE void
264 set_addr(struct nv_pc *pc, struct nv_instruction *i)
265 {
266 if (i->src[4])
267 set_a16_bits(pc, SREG(i->src[4])->id);
268 }
269
270 static void
271 set_dst(struct nv_pc *pc, struct nv_value *value)
272 {
273 struct nv_reg *reg = &value->join->reg;
274
275 if (reg->id < 0) {
276 pc->emit[0] |= (127 << 2) | 1; /* set 'long'-bit to catch bugs */
277 pc->emit[1] |= 0x8;
278 return;
279 }
280
281 if (reg->file == NV_FILE_OUT)
282 pc->emit[1] |= 0x8;
283 else
284 if (reg->file == NV_FILE_ADDR)
285 assert(0);
286
287 pc->emit[0] |= reg->id << 2;
288 }
289
290 static void
291 set_src_0(struct nv_pc *pc, struct nv_ref *ref)
292 {
293 struct nv_reg *reg = SREG(ref);
294
295 if (reg->file == NV_FILE_MEM_S)
296 pc->emit[1] |= 0x00200000;
297 else
298 if (reg->file == NV_FILE_MEM_P)
299 pc->emit[0] |= 0x01800000;
300 else
301 if (reg->file != NV_FILE_GPR)
302 NOUVEAU_ERR("invalid src0 register file: %d\n", reg->file);
303
304 assert(reg->id < 128);
305 pc->emit[0] |= reg->id << 9;
306 }
307
308 static void
309 set_src_1(struct nv_pc *pc, struct nv_ref *ref)
310 {
311 struct nv_reg *reg = SREG(ref);
312
313 if (reg->file >= NV_FILE_MEM_C(0) &&
314 reg->file <= NV_FILE_MEM_C(15)) {
315 assert(!(pc->emit[1] & 0x01800000));
316
317 pc->emit[0] |= 0x00800000;
318 pc->emit[1] |= (reg->file - NV_FILE_MEM_C(0)) << 22;
319 } else
320 if (reg->file != NV_FILE_GPR)
321 NOUVEAU_ERR("invalid src1 register file: %d\n", reg->file);
322
323 assert(reg->id < 128);
324 pc->emit[0] |= reg->id << 16;
325 }
326
327 static void
328 set_src_2(struct nv_pc *pc, struct nv_ref *ref)
329 {
330 struct nv_reg *reg = SREG(ref);
331
332 if (reg->file >= NV_FILE_MEM_C(0) &&
333 reg->file <= NV_FILE_MEM_C(15)) {
334 assert(!(pc->emit[1] & 0x01800000));
335
336 pc->emit[0] |= 0x01000000;
337 pc->emit[1] |= (reg->file - NV_FILE_MEM_C(0)) << 22;
338 } else
339 if (reg->file != NV_FILE_GPR)
340 NOUVEAU_ERR("invalid src2 register file: %d\n", reg->file);
341
342 assert(reg->id < 128);
343 pc->emit[1] |= reg->id << 14;
344 }
345
346 /* the default form:
347 * - long instruction
348 * - 1 to 3 sources in slots 0, 1, 2
349 * - address & flags
350 */
351 static void
352 emit_form_MAD(struct nv_pc *pc, struct nv_instruction *i)
353 {
354 pc->emit[0] |= 1;
355
356 set_pred(pc, i);
357 set_pred_wr(pc, i);
358
359 if (i->def[0])
360 set_dst(pc, i->def[0]);
361 else {
362 pc->emit[0] |= 0x01fc;
363 pc->emit[1] |= 0x0008;
364 }
365
366 if (i->src[0])
367 set_src_0(pc, i->src[0]);
368
369 if (i->src[1])
370 set_src_1(pc, i->src[1]);
371
372 if (i->src[2])
373 set_src_2(pc, i->src[2]);
374
375 set_addr(pc, i);
376 }
377
378 /* like default form, but 2nd source in slot 2, no 3rd source */
379 static void
380 emit_form_ADD(struct nv_pc *pc, struct nv_instruction *i)
381 {
382 pc->emit[0] |= 1;
383
384 if (i->def[0])
385 set_dst(pc, i->def[0]);
386 else {
387 pc->emit[0] |= 0x01fc;
388 pc->emit[1] |= 0x0008;
389 }
390
391 set_pred(pc, i);
392 set_pred_wr(pc, i);
393
394 if (i->src[0])
395 set_src_0(pc, i->src[0]);
396
397 if (i->src[1])
398 set_src_2(pc, i->src[1]);
399
400 set_addr(pc, i);
401 }
402
403 /* short mul */
404 static void
405 emit_form_MUL(struct nv_pc *pc, struct nv_instruction *i)
406 {
407 assert(!i->is_long && !(pc->emit[0] & 1));
408
409 assert(i->def[0]);
410 set_dst(pc, i->def[0]);
411
412 if (i->src[0])
413 set_src_0(pc, i->src[0]);
414
415 if (i->src[1])
416 set_src_1(pc, i->src[1]);
417 }
418
419 /* default immediate form
420 * - 1 to 3 sources where last is immediate
421 * - no address or predicate possible
422 */
423 static void
424 emit_form_IMM(struct nv_pc *pc, struct nv_instruction *i, ubyte mod_mask)
425 {
426 pc->emit[0] |= 1;
427
428 assert(i->def[0]);
429 assert(i->src[0]);
430 set_dst(pc, i->def[0]);
431
432 assert(!i->src[4] && !i->flags_src && !i->flags_def);
433
434 if (i->src[2]) {
435 set_immd(pc, i->src[2]);
436 set_src_0(pc, i->src[1]);
437 set_src_1(pc, i->src[0]);
438 } else
439 if (i->src[1]) {
440 set_immd(pc, i->src[1]);
441 set_src_0(pc, i->src[0]);
442 } else
443 set_immd(pc, i->src[0]);
444
445 assert(!mod_mask);
446 }
447
448 static void
449 set_ld_st_size(struct nv_pc *pc, int s, ubyte type)
450 {
451 switch (type) {
452 case NV_TYPE_F64:
453 pc->emit[1] |= 0x8000 << s;
454 break;
455 case NV_TYPE_F32:
456 case NV_TYPE_S32:
457 case NV_TYPE_U32:
458 pc->emit[1] |= 0xc000 << s;
459 break;
460 case NV_TYPE_S16:
461 pc->emit[1] |= 0x6000 << s;
462 break;
463 case NV_TYPE_U16:
464 pc->emit[1] |= 0x4000 << s;
465 break;
466 case NV_TYPE_S8:
467 pc->emit[1] |= 0x2000 << s;
468 break;
469 default:
470 break;
471 }
472 }
473
474 static void
475 emit_ld(struct nv_pc *pc, struct nv_instruction *i)
476 {
477 ubyte sf = SFILE(i, 0);
478
479 if (sf == NV_FILE_IMM) {
480 sf = NV_FILE_MEM_C(0);
481 nv_pc_alloc_immd(pc, i->src[0]);
482
483 new_fixup(pc, NV50_FIXUP_DATA_RELOC, 0, SREG(i->src[0])->id, 0xffff, 9);
484 }
485
486 if (sf == NV_FILE_MEM_S ||
487 sf == NV_FILE_MEM_P) {
488 pc->emit[0] = 0x10000001;
489 pc->emit[1] = 0x04200000 | (0x3c << 12);
490 if (sf == NV_FILE_MEM_P)
491 pc->emit[0] |= 0x01800000;
492 } else
493 if (sf >= NV_FILE_MEM_C(0) &&
494 sf <= NV_FILE_MEM_C(15)) {
495 pc->emit[0] = 0x10000001;
496 pc->emit[1] = 0x24000000;
497 pc->emit[1] |= (sf - NV_FILE_MEM_C(0)) << 22;
498 } else
499 if (sf >= NV_FILE_MEM_G(0) &&
500 sf <= NV_FILE_MEM_G(15)) {
501 pc->emit[0] = 0xd0000001 | ((sf - NV_FILE_MEM_G(0)) << 16);
502 pc->emit[1] = 0xa0000000;
503
504 assert(i->src[4] && SREG(i->src[4])->file == NV_FILE_GPR);
505 SID(pc, i->src[4], 9);
506 } else
507 if (sf == NV_FILE_MEM_L) {
508 pc->emit[0] = 0xd0000001;
509 pc->emit[1] = 0x40000000;
510
511 set_addr(pc, i);
512 } else {
513 NOUVEAU_ERR("invalid ld source file\n");
514 abort();
515 }
516
517 set_ld_st_size(pc, (sf == NV_FILE_MEM_L) ? 8 : 0, STYPE(i, 0));
518
519 set_dst(pc, i->def[0]);
520 set_pred_wr(pc, i);
521
522 set_pred(pc, i);
523
524 if (sf < NV_FILE_MEM_G(0) ||
525 sf > NV_FILE_MEM_G(15)) {
526 SID(pc, i->src[0], 9);
527 set_addr(pc, i);
528 }
529 }
530
531 static void
532 emit_st(struct nv_pc *pc, struct nv_instruction *i)
533 {
534 assert(SFILE(i, 1) == NV_FILE_GPR);
535 assert(SFILE(i, 0) == NV_FILE_MEM_L);
536
537 pc->emit[0] = 0xd0000001;
538 pc->emit[1] = 0x60000000;
539
540 SID(pc, i->src[1], 2);
541 SID(pc, i->src[0], 9);
542
543 set_ld_st_size(pc, 8, STYPE(i, 1));
544
545 set_addr(pc, i);
546 set_pred(pc, i);
547 }
548
549 static int
550 verify_mov(struct nv_instruction *i)
551 {
552 ubyte sf = SFILE(i, 0);
553 ubyte df = DFILE(i, 0);
554
555 if (df == NV_FILE_GPR)
556 return 0;
557
558 if (df != NV_FILE_OUT &&
559 df != NV_FILE_FLAGS &&
560 df != NV_FILE_ADDR)
561 return 1;
562
563 if (sf == NV_FILE_FLAGS)
564 return 2;
565 if (sf == NV_FILE_ADDR)
566 return 3;
567 if (sf == NV_FILE_IMM && df != NV_FILE_OUT)
568 return 4;
569
570 return 0;
571 }
572
573 static void
574 emit_mov(struct nv_pc *pc, struct nv_instruction *i)
575 {
576 assert(!verify_mov(i));
577
578 if (SFILE(i, 0) >= NV_FILE_MEM_S)
579 emit_ld(pc, i);
580 else
581 if (SFILE(i, 0) == NV_FILE_FLAGS) {
582 pc->emit[0] = 0x00000001 | (DREG(i->def[0])->id << 2);
583 pc->emit[1] = 0x20000780 | (SREG(i->src[0])->id << 12);
584 } else
585 if (SFILE(i, 0) == NV_FILE_ADDR) {
586 pc->emit[0] = 0x00000001 | (DREG(i->def[0])->id << 2);
587 pc->emit[1] = 0x40000780;
588 set_a16_bits(pc, SREG(i->src[0])->id);
589 } else
590 if (DFILE(i, 0) == NV_FILE_FLAGS) {
591 pc->emit[0] = 0x00000001;
592 pc->emit[1] = 0xa0000000 | (1 << 6);
593 set_pred(pc, i);
594 pc->emit[0] |= SREG(i->src[0])->id << 9;
595 pc->emit[1] |= DREG(i->def[0])->id << 4;
596 } else
597 if (SFILE(i, 0) == NV_FILE_IMM) {
598 if (i->opcode == NV_OP_LDA) {
599 emit_ld(pc, i);
600 } else {
601 pc->emit[0] = 0x10008001;
602 pc->emit[1] = 0x00000003;
603
604 emit_form_IMM(pc, i, 0);
605 }
606 } else {
607 pc->emit[0] = 0x10000000;
608 pc->emit[0] |= DREG(i->def[0])->id << 2;
609 pc->emit[0] |= SREG(i->src[0])->id << 9;
610
611 if (!i->is_long) {
612 pc->emit[0] |= 0x8000;
613 } else {
614 pc->emit[0] |= 0x00000001;
615 pc->emit[1] = 0x0403c000;
616
617 set_pred(pc, i);
618 }
619 }
620
621 if (DFILE(i, 0) == NV_FILE_OUT)
622 pc->emit[1] |= 0x8;
623 }
624
625 static void
626 emit_interp(struct nv_pc *pc, struct nv_instruction *i)
627 {
628 pc->emit[0] = 0x80000000;
629
630 assert(DFILE(i, 0) == NV_FILE_GPR);
631 assert(SFILE(i, 0) == NV_FILE_MEM_V);
632
633 DID(pc, i->def[0], 2);
634 SID(pc, i->src[0], 16);
635
636 if (i->flat)
637 pc->emit[0] |= 1 << 8;
638 else
639 if (i->opcode == NV_OP_PINTERP) {
640 pc->emit[0] |= 1 << 25;
641 pc->emit[0] |= SREG(i->src[1])->id << 9;
642 }
643
644 if (i->centroid)
645 pc->emit[0] |= 1 << 24;
646
647 assert(i->is_long || !i->flags_src);
648
649 if (i->is_long) {
650 set_pred(pc, i);
651
652 pc->emit[1] |=
653 (pc->emit[0] & (3 << 24)) >> (24 - 16) |
654 (pc->emit[0] & (1 << 8)) >> (18 - 8);
655
656 pc->emit[0] |= 1;
657 pc->emit[0] &= ~0x03000100;
658 }
659 }
660
661 static void
662 emit_minmax(struct nv_pc *pc, struct nv_instruction *i)
663 {
664 pc->emit[0] = 0x30000000;
665 pc->emit[1] = (i->opcode == NV_OP_MIN) ? (2 << 28) : 0;
666
667 switch (DTYPE(i, 0)) {
668 case NV_TYPE_F32:
669 pc->emit[0] |= 0x80000000;
670 pc->emit[1] |= 0x80000000;
671 break;
672 case NV_TYPE_S32:
673 pc->emit[1] |= 0x8c000000;
674 break;
675 case NV_TYPE_U32:
676 pc->emit[1] |= 0x84000000;
677 break;
678 }
679
680 emit_form_MAD(pc, i);
681
682 if (i->src[0]->mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000;
683 if (i->src[1]->mod & NV_MOD_ABS) pc->emit[1] |= 0x00080000;
684 }
685
686 static void
687 emit_add_f32(struct nv_pc *pc, struct nv_instruction *i)
688 {
689 pc->emit[0] = 0xb0000000;
690
691 assert(!((i->src[0]->mod | i->src[1]->mod) & NV_MOD_ABS));
692
693 if (SFILE(i, 1) == NV_FILE_IMM) {
694 emit_form_IMM(pc, i, 0);
695
696 if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 0x8000;
697 if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22;
698 } else
699 if (i->is_long) {
700 emit_form_ADD(pc, i);
701
702 if (i->src[0]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 26;
703 if (i->src[1]->mod & NV_MOD_NEG) pc->emit[1] |= 1 << 27;
704
705 if (i->saturate)
706 pc->emit[1] |= 0x20000000;
707 } else {
708 emit_form_MUL(pc, i);
709
710 if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 0x8000;
711 if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22;
712 }
713 }
714
715 static void
716 emit_add_b32(struct nv_pc *pc, struct nv_instruction *i)
717 {
718 pc->emit[0] = 0x20008000;
719
720 if (SFILE(i, 1) == NV_FILE_IMM) {
721 emit_form_IMM(pc, i, 0);
722 } else
723 if (i->is_long) {
724 pc->emit[0] = 0x20000000;
725 pc->emit[1] = 0x04000000;
726 emit_form_ADD(pc, i);
727 } else {
728 emit_form_MUL(pc, i);
729 }
730
731 if (i->src[0]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 28;
732 if (i->src[1]->mod & NV_MOD_NEG) pc->emit[0] |= 1 << 22;
733 }
734
735 static void
736 emit_add_a16(struct nv_pc *pc, struct nv_instruction *i)
737 {
738 int s = (i->opcode == NV_OP_MOV) ? 0 : 1;
739
740 pc->emit[0] = 0xd0000001 | ((uint16_t)get_immd_u32(i->src[s]) << 9);
741 pc->emit[1] = 0x20000000;
742
743 pc->emit[0] |= (DREG(i->def[0])->id + 1) << 2;
744
745 set_pred(pc, i);
746
747 if (i->src[1])
748 set_a16_bits(pc, SREG(i->src[1])->id + 1);
749 }
750
751 static void
752 emit_flow(struct nv_pc *pc, struct nv_instruction *i, ubyte flow_op)
753 {
754 pc->emit[0] = 0x00000003 | (flow_op << 28);
755 pc->emit[1] = 0x00000000;
756
757 set_pred(pc, i);
758
759 if (i->target && (i->opcode != NV_OP_BREAK)) {
760 uint32_t pos = i->target->bin_pos;
761
762 new_fixup(pc, NV50_FIXUP_CODE_RELOC, 0, pos, 0xffff << 11, 9);
763 new_fixup(pc, NV50_FIXUP_CODE_RELOC, 1, pos, 0x3f << 14, -4);
764
765 pc->emit[0] |= (pos / 4) << 11;
766 }
767 }
768
769 static INLINE void
770 emit_add(struct nv_pc *pc, struct nv_instruction *i)
771 {
772 if (DFILE(i, 0) == NV_FILE_ADDR)
773 emit_add_a16(pc, i);
774 else {
775 switch (DTYPE(i, 0)) {
776 case NV_TYPE_F32:
777 emit_add_f32(pc, i);
778 break;
779 case NV_TYPE_U32:
780 case NV_TYPE_S32:
781 emit_add_b32(pc, i);
782 break;
783 }
784 }
785 }
786
787 static void
788 emit_bitop2(struct nv_pc *pc, struct nv_instruction *i)
789 {
790 pc->emit[0] = 0xd0000000;
791
792 if (SFILE(i, 1) == NV_FILE_IMM) {
793 emit_form_IMM(pc, i, 0);
794
795 if (i->opcode == NV_OP_OR)
796 pc->emit[0] |= 0x0100;
797 else
798 if (i->opcode == NV_OP_XOR)
799 pc->emit[0] |= 0x8000;
800 } else {
801 emit_form_MAD(pc, i);
802
803 pc->emit[1] |= 0x04000000;
804
805 if (i->opcode == NV_OP_OR)
806 pc->emit[1] |= 0x4000;
807 else
808 if (i->opcode == NV_OP_XOR)
809 pc->emit[1] |= 0x8000;
810 }
811 }
812
813 static void
814 emit_arl(struct nv_pc *pc, struct nv_instruction *i)
815 {
816 assert(SFILE(i, 0) == NV_FILE_GPR);
817 assert(SFILE(i, 1) == NV_FILE_IMM);
818
819 assert(!i->flags_def);
820
821 pc->emit[0] = 0x00000001;
822 pc->emit[1] = 0xc0000000;
823
824 pc->emit[0] |= (i->def[0]->reg.id + 1) << 2;
825 set_pred(pc, i);
826 set_src_0(pc, i->src[0]);
827 pc->emit[0] |= (get_immd_u32(i->src[1]) & 0x3f) << 16;
828 }
829
830 static void
831 emit_shift(struct nv_pc *pc, struct nv_instruction *i)
832 {
833 if (DFILE(i, 0) == NV_FILE_ADDR) {
834 emit_arl(pc, i);
835 return;
836 }
837
838 pc->emit[0] = 0x30000001;
839 pc->emit[1] = 0xc4000000;
840
841 if (i->opcode == NV_OP_SHR)
842 pc->emit[1] |= 1 << 29;
843
844 if (SFILE(i, 1) == NV_FILE_IMM) {
845 pc->emit[1] |= 1 << 20;
846 pc->emit[0] |= (get_immd_u32(i->src[1]) & 0x7f) << 16;
847
848 set_pred(pc, i);
849 } else
850 emit_form_MAD(pc, i);
851
852 if (STYPE(i, 0) == NV_TYPE_S32)
853 pc->emit[1] |= 1 << 27;
854 }
855
856 static void
857 emit_flop(struct nv_pc *pc, struct nv_instruction *i)
858 {
859 struct nv_ref *src0 = i->src[0];
860
861 pc->emit[0] = 0x90000000;
862
863 assert(STYPE(i, 0) == NV_TYPE_F32);
864 assert(SFILE(i, 0) == NV_FILE_GPR);
865
866 if (!i->is_long) {
867 emit_form_MUL(pc, i);
868 assert(i->opcode == NV_OP_RCP && !src0->mod);
869 return;
870 }
871
872 pc->emit[1] = (i->opcode - NV_OP_RCP) << 29;
873
874 emit_form_MAD(pc, i);
875
876 if (src0->mod & NV_MOD_NEG) pc->emit[1] |= 0x04000000;
877 if (src0->mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000;
878 }
879
880 static void
881 emit_mad_f32(struct nv_pc *pc, struct nv_instruction *i)
882 {
883 const boolean neg_mul = (i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG;
884 const boolean neg_add = (i->src[2]->mod & NV_MOD_NEG);
885
886 pc->emit[0] = 0xe0000000;
887
888 if (!i->is_long) {
889 emit_form_MUL(pc, i);
890 assert(!neg_mul && !neg_add);
891 return;
892 }
893
894 emit_form_MAD(pc, i);
895
896 if (neg_mul) pc->emit[1] |= 0x04000000;
897 if (neg_add) pc->emit[1] |= 0x08000000;
898
899 if (i->saturate)
900 pc->emit[1] |= 0x20000000;
901 }
902
903 static INLINE void
904 emit_mad(struct nv_pc *pc, struct nv_instruction *i)
905 {
906 emit_mad_f32(pc, i);
907 }
908
909 static void
910 emit_mul_f32(struct nv_pc *pc, struct nv_instruction *i)
911 {
912 boolean neg = (i->src[0]->mod ^ i->src[1]->mod) & NV_MOD_NEG;
913
914 pc->emit[0] = 0xc0000000;
915
916 if (SFILE(i, 1) == NV_FILE_IMM) {
917 emit_form_IMM(pc, i, 0);
918
919 if (neg)
920 pc->emit[0] |= 0x8000;
921 } else
922 if (i->is_long) {
923 emit_form_MAD(pc, i);
924
925 if (neg)
926 pc->emit[1] |= 0x08 << 24;
927 } else {
928 emit_form_MUL(pc, i);
929
930 if (neg)
931 pc->emit[0] |= 0x8000;
932 }
933 }
934
935 static void
936 emit_set(struct nv_pc *pc, struct nv_instruction *nvi)
937 {
938 assert(nvi->is_long);
939
940 pc->emit[0] = 0x30000000;
941 pc->emit[1] = 0x60000000;
942
943 pc->emit[1] |= nvi->set_cond << 14;
944
945 switch (STYPE(nvi, 0)) {
946 case NV_TYPE_U32: pc->emit[1] |= 0x04000000; break;
947 case NV_TYPE_S32: pc->emit[1] |= 0x0c000000; break;
948 case NV_TYPE_F32: pc->emit[0] |= 0x80000000; break;
949 default:
950 assert(0);
951 break;
952 }
953
954 emit_form_MAD(pc, nvi);
955 }
956
957 #define CVT_RN (0x00 << 16)
958 #define CVT_FLOOR (0x02 << 16)
959 #define CVT_CEIL (0x04 << 16)
960 #define CVT_TRUNC (0x06 << 16)
961 #define CVT_SAT (0x08 << 16)
962 #define CVT_ABS (0x10 << 16)
963
964 #define CVT_X32_X32 0x04004000
965 #define CVT_X32_S32 0x04014000
966 #define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32)
967 #define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32)
968 #define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32)
969 #define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32)
970 #define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32)
971 #define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32)
972 #define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32)
973 #define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32)
974 #define CVT_U32_U32 ((0x00 << 24) | CVT_X32_X32)
975
976 #define CVT_NEG 0x20000000
977 #define CVT_RI 0x08000000
978
979 static void
980 emit_cvt(struct nv_pc *pc, struct nv_instruction *nvi)
981 {
982 ubyte dst_type = nvi->def[0] ? DTYPE(nvi, 0) : STYPE(nvi, 0);
983
984 pc->emit[0] = 0xa0000000;
985
986 switch (dst_type) {
987 case NV_TYPE_F32:
988 switch (STYPE(nvi, 0)) {
989 case NV_TYPE_F32: pc->emit[1] = CVT_F32_F32; break;
990 case NV_TYPE_S32: pc->emit[1] = CVT_F32_S32; break;
991 case NV_TYPE_U32: pc->emit[1] = CVT_F32_U32; break;
992 }
993 break;
994 case NV_TYPE_S32:
995 switch (STYPE(nvi, 0)) {
996 case NV_TYPE_F32: pc->emit[1] = CVT_S32_F32; break;
997 case NV_TYPE_S32: pc->emit[1] = CVT_S32_S32; break;
998 case NV_TYPE_U32: pc->emit[1] = CVT_S32_U32; break;
999 }
1000 break;
1001 case NV_TYPE_U32:
1002 switch (STYPE(nvi, 0)) {
1003 case NV_TYPE_F32: pc->emit[1] = CVT_U32_F32; break;
1004 case NV_TYPE_S32: pc->emit[1] = CVT_U32_S32; break;
1005 case NV_TYPE_U32: pc->emit[1] = CVT_U32_U32; break;
1006 }
1007 break;
1008 }
1009 if (pc->emit[1] == CVT_F32_F32 &&
1010 (nvi->opcode == NV_OP_CEIL || nvi->opcode == NV_OP_FLOOR ||
1011 nvi->opcode == NV_OP_TRUNC))
1012 pc->emit[1] |= CVT_RI;
1013
1014 switch (nvi->opcode) {
1015 case NV_OP_CEIL: pc->emit[1] |= CVT_CEIL; break;
1016 case NV_OP_FLOOR: pc->emit[1] |= CVT_FLOOR; break;
1017 case NV_OP_TRUNC: pc->emit[1] |= CVT_TRUNC; break;
1018
1019 case NV_OP_ABS: pc->emit[1] |= CVT_ABS; break;
1020 case NV_OP_SAT: pc->emit[1] |= CVT_SAT; break;
1021 case NV_OP_NEG: pc->emit[1] |= CVT_NEG; break;
1022 default:
1023 assert(nvi->opcode == NV_OP_CVT);
1024 break;
1025 }
1026 assert(nvi->opcode != NV_OP_ABS || !(nvi->src[0]->mod & NV_MOD_NEG));
1027
1028 if (nvi->src[0]->mod & NV_MOD_NEG) pc->emit[1] ^= CVT_NEG;
1029 if (nvi->src[0]->mod & NV_MOD_ABS) pc->emit[1] |= CVT_ABS;
1030
1031 emit_form_MAD(pc, nvi);
1032 }
1033
1034 static void
1035 emit_tex(struct nv_pc *pc, struct nv_instruction *i)
1036 {
1037 pc->emit[0] = 0xf0000001;
1038 pc->emit[1] = 0x00000000;
1039
1040 DID(pc, i->def[0], 2);
1041
1042 set_pred(pc, i);
1043
1044 pc->emit[0] |= i->tex_t << 9;
1045 pc->emit[0] |= i->tex_s << 17;
1046
1047 pc->emit[0] |= (i->tex_argc - 1) << 22;
1048
1049 pc->emit[0] |= (i->tex_mask & 0x3) << 25;
1050 pc->emit[1] |= (i->tex_mask & 0xc) << 12;
1051
1052 if (i->tex_live)
1053 pc->emit[1] |= 4;
1054
1055 if (i->tex_cube)
1056 pc->emit[0] |= 0x08000000;
1057
1058 if (i->opcode == NV_OP_TXB)
1059 pc->emit[1] |= 0x20000000;
1060 else
1061 if (i->opcode == NV_OP_TXL)
1062 pc->emit[1] |= 0x40000000;
1063 }
1064
1065 static void
1066 emit_cvt2fixed(struct nv_pc *pc, struct nv_instruction *i)
1067 {
1068 ubyte mod = i->src[0]->mod;
1069
1070 pc->emit[0] = 0xb0000000;
1071 pc->emit[1] = 0xc0000000;
1072
1073 if (i->opcode == NV_OP_PREEX2)
1074 pc->emit[1] |= 0x4000;
1075
1076 emit_form_MAD(pc, i);
1077
1078 if (mod & NV_MOD_NEG) pc->emit[1] |= 0x04000000;
1079 if (mod & NV_MOD_ABS) pc->emit[1] |= 0x00100000;
1080 }
1081
1082 static void
1083 emit_ddx(struct nv_pc *pc, struct nv_instruction *i)
1084 {
1085 assert(i->is_long && SFILE(i, 0) == NV_FILE_GPR);
1086
1087 pc->emit[0] = (i->src[0]->mod & NV_MOD_NEG) ? 0xc0240001 : 0xc0140001;
1088 pc->emit[1] = (i->src[0]->mod & NV_MOD_NEG) ? 0x86400000 : 0x89800000;
1089
1090 DID(pc, i->def[0], 2);
1091 SID(pc, i->src[0], 9);
1092 SID(pc, i->src[0], 32 + 14);
1093
1094 set_pred(pc, i);
1095 set_pred_wr(pc, i);
1096 }
1097
1098 static void
1099 emit_ddy(struct nv_pc *pc, struct nv_instruction *i)
1100 {
1101 assert(i->is_long && SFILE(i, 0) == NV_FILE_GPR);
1102
1103 pc->emit[0] = (i->src[0]->mod & NV_MOD_NEG) ? 0xc0250001 : 0xc0150001;
1104 pc->emit[1] = (i->src[0]->mod & NV_MOD_NEG) ? 0x85800000 : 0x8a400000;
1105
1106 DID(pc, i->def[0], 2);
1107 SID(pc, i->src[0], 9);
1108 SID(pc, i->src[0], 32 + 14);
1109
1110 set_pred(pc, i);
1111 set_pred_wr(pc, i);
1112 }
1113
1114 static void
1115 emit_quadop(struct nv_pc *pc, struct nv_instruction *i)
1116 {
1117 pc->emit[0] = 0xc0000000;
1118 pc->emit[1] = 0x80000000;
1119
1120 emit_form_ADD(pc, i);
1121
1122 pc->emit[0] |= i->lanes << 16;
1123
1124 pc->emit[0] |= (i->quadop & 0x03) << 20;
1125 pc->emit[1] |= (i->quadop & 0xfc) << 20;
1126 }
1127
1128 void
1129 nv50_emit_instruction(struct nv_pc *pc, struct nv_instruction *i)
1130 {
1131 /* nv_print_instruction(i); */
1132
1133 switch (i->opcode) {
1134 case NV_OP_MOV:
1135 if (DFILE(i, 0) == NV_FILE_ADDR)
1136 emit_add_a16(pc, i);
1137 else
1138 emit_mov(pc, i);
1139 break;
1140 case NV_OP_LDA:
1141 emit_mov(pc, i);
1142 break;
1143 case NV_OP_STA:
1144 emit_st(pc, i);
1145 break;
1146 case NV_OP_LINTERP:
1147 case NV_OP_PINTERP:
1148 emit_interp(pc, i);
1149 break;
1150 case NV_OP_ADD:
1151 emit_add(pc, i);
1152 break;
1153 case NV_OP_AND:
1154 case NV_OP_OR:
1155 case NV_OP_XOR:
1156 emit_bitop2(pc, i);
1157 break;
1158 case NV_OP_CVT:
1159 case NV_OP_ABS:
1160 case NV_OP_NEG:
1161 case NV_OP_SAT:
1162 case NV_OP_CEIL:
1163 case NV_OP_FLOOR:
1164 case NV_OP_TRUNC:
1165 emit_cvt(pc, i);
1166 break;
1167 case NV_OP_DFDX:
1168 emit_ddx(pc, i);
1169 break;
1170 case NV_OP_DFDY:
1171 emit_ddy(pc, i);
1172 break;
1173 case NV_OP_RCP:
1174 case NV_OP_RSQ:
1175 case NV_OP_LG2:
1176 case NV_OP_SIN:
1177 case NV_OP_COS:
1178 case NV_OP_EX2:
1179 emit_flop(pc, i);
1180 break;
1181 case NV_OP_PRESIN:
1182 case NV_OP_PREEX2:
1183 emit_cvt2fixed(pc, i);
1184 break;
1185 case NV_OP_MAD:
1186 emit_mad(pc, i);
1187 break;
1188 case NV_OP_MAX:
1189 case NV_OP_MIN:
1190 emit_minmax(pc, i);
1191 break;
1192 case NV_OP_MUL:
1193 emit_mul_f32(pc, i);
1194 break;
1195 case NV_OP_SET:
1196 emit_set(pc, i);
1197 break;
1198 case NV_OP_SHL:
1199 case NV_OP_SHR:
1200 emit_shift(pc, i);
1201 break;
1202 case NV_OP_TEX:
1203 case NV_OP_TXB:
1204 case NV_OP_TXL:
1205 emit_tex(pc, i);
1206 break;
1207 case NV_OP_QUADOP:
1208 emit_quadop(pc, i);
1209 break;
1210 case NV_OP_KIL:
1211 emit_flow(pc, i, 0x0);
1212 break;
1213 case NV_OP_BRA:
1214 emit_flow(pc, i, 0x1);
1215 break;
1216 case NV_OP_CALL:
1217 emit_flow(pc, i, 0x2);
1218 break;
1219 case NV_OP_RET:
1220 emit_flow(pc, i, 0x3);
1221 break;
1222 case NV_OP_BREAKADDR:
1223 emit_flow(pc, i, 0x4);
1224 break;
1225 case NV_OP_BREAK:
1226 emit_flow(pc, i, 0x5);
1227 break;
1228 case NV_OP_JOINAT:
1229 emit_flow(pc, i, 0xa);
1230 break;
1231 case NV_OP_NOP:
1232 case NV_OP_JOIN:
1233 pc->emit[0] = 0xf0000001;
1234 pc->emit[1] = 0xe0000000;
1235 break;
1236 case NV_OP_PHI:
1237 case NV_OP_UNDEF:
1238 case NV_OP_SUB:
1239 NOUVEAU_ERR("operation \"%s\" should have been eliminated\n",
1240 nv_opcode_name(i->opcode));
1241 break;
1242 default:
1243 NOUVEAU_ERR("unhandled NV_OP: %d\n", i->opcode);
1244 abort();
1245 break;
1246 }
1247
1248 if (i->is_join) {
1249 assert(i->is_long && !(pc->emit[1] & 1));
1250 pc->emit[1] |= 2;
1251 }
1252
1253 assert((pc->emit[0] & 1) == i->is_long);
1254 }