gallium: Remove every double semi-colon
[mesa.git] / src / gallium / drivers / ilo / shader / ilo_shader_gs.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "tgsi/tgsi_dump.h"
29 #include "toy_compiler.h"
30 #include "toy_tgsi.h"
31 #include "toy_legalize.h"
32 #include "toy_optimize.h"
33 #include "toy_helpers.h"
34 #include "ilo_shader_internal.h"
35
36 /* XXX Below is proof-of-concept code. Skip this file! */
37
38 /*
39 * TODO
40 * - primitive id is in r0.1. FS receives PID as a flat attribute.
41 * - set VUE header m0.1 for layered rendering
42 */
43 struct gs_compile_context {
44 struct ilo_shader *shader;
45 const struct ilo_shader_variant *variant;
46 const struct pipe_stream_output_info *so_info;
47
48 struct toy_compiler tc;
49 struct toy_tgsi tgsi;
50 int output_map[PIPE_MAX_SHADER_OUTPUTS];
51
52 bool write_so;
53 bool write_vue;
54
55 int in_vue_size;
56 int in_vue_count;
57
58 int out_vue_size;
59 int out_vue_min_count;
60
61 bool is_static;
62
63 struct {
64 struct toy_src header;
65 struct toy_src svbi;
66 struct toy_src vues[6];
67 } payload;
68
69 struct {
70 struct toy_dst urb_write_header;
71 bool prim_start;
72 bool prim_end;
73 int prim_type;
74
75 struct toy_dst tmp;
76
77 /* buffered tgsi_outs */
78 struct toy_dst buffers[3];
79 int buffer_needed, buffer_cur;
80
81 struct toy_dst so_written;
82 struct toy_dst so_index;
83
84 struct toy_src tgsi_outs[PIPE_MAX_SHADER_OUTPUTS];
85 } vars;
86
87 struct {
88 struct toy_dst total_vertices;
89 struct toy_dst total_prims;
90
91 struct toy_dst num_vertices;
92 struct toy_dst num_vertices_in_prim;
93 } dynamic_data;
94
95 struct {
96 int total_vertices;
97 int total_prims;
98 /* this limits the max vertice count to be 256 */
99 uint32_t last_vertex[8];
100
101 int num_vertices;
102 int num_vertices_in_prim;
103 } static_data;
104
105 int first_free_grf;
106 int last_free_grf;
107 int first_free_mrf;
108 int last_free_mrf;
109 };
110
111 static void
112 gs_COPY8(struct toy_compiler *tc, struct toy_dst dst, struct toy_src src)
113 {
114 struct toy_inst *inst;
115
116 inst = tc_MOV(tc, dst, src);
117 inst->exec_size = GEN6_EXECSIZE_8;
118 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
119 }
120
121 static void
122 gs_COPY4(struct toy_compiler *tc,
123 struct toy_dst dst, int dst_ch,
124 struct toy_src src, int src_ch)
125 {
126 struct toy_inst *inst;
127
128 inst = tc_MOV(tc,
129 tdst_offset(dst, 0, dst_ch),
130 tsrc_offset(src, 0, src_ch));
131 inst->exec_size = GEN6_EXECSIZE_4;
132 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
133 }
134
135 static void
136 gs_COPY1(struct toy_compiler *tc,
137 struct toy_dst dst, int dst_ch,
138 struct toy_src src, int src_ch)
139 {
140 struct toy_inst *inst;
141
142 inst = tc_MOV(tc,
143 tdst_offset(dst, 0, dst_ch),
144 tsrc_rect(tsrc_offset(src, 0, src_ch), TOY_RECT_010));
145 inst->exec_size = GEN6_EXECSIZE_1;
146 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
147 }
148
149 static void
150 gs_init_vars(struct gs_compile_context *gcc)
151 {
152 struct toy_compiler *tc = &gcc->tc;
153 struct toy_dst dst;
154
155 /* init URB_WRITE header */
156 dst = gcc->vars.urb_write_header;
157
158 gs_COPY8(tc, dst, gcc->payload.header);
159
160 gcc->vars.prim_start = true;
161 gcc->vars.prim_end = false;
162 switch (gcc->out_vue_min_count) {
163 case 1:
164 gcc->vars.prim_type = GEN6_3DPRIM_POINTLIST;
165 break;
166 case 2:
167 gcc->vars.prim_type = GEN6_3DPRIM_LINESTRIP;
168 break;
169 case 3:
170 gcc->vars.prim_type = GEN6_3DPRIM_TRISTRIP;
171 break;
172 }
173
174 if (gcc->write_so)
175 tc_MOV(tc, gcc->vars.so_written, tsrc_imm_d(0));
176 }
177
178 static void
179 gs_save_output(struct gs_compile_context *gcc, const struct toy_src *outs)
180 {
181 struct toy_compiler *tc = &gcc->tc;
182 const struct toy_dst buf = gcc->vars.buffers[gcc->vars.buffer_cur];
183 int i;
184
185 for (i = 0; i < gcc->shader->out.count; i++)
186 tc_MOV(tc, tdst_offset(buf, i, 0), outs[i]);
187
188 /* advance the cursor */
189 gcc->vars.buffer_cur++;
190 gcc->vars.buffer_cur %= gcc->vars.buffer_needed;
191 }
192
193 static void
194 gs_write_so(struct gs_compile_context *gcc,
195 struct toy_dst dst,
196 struct toy_src index, struct toy_src out,
197 bool send_write_commit_message,
198 int binding_table_index)
199 {
200 struct toy_compiler *tc = &gcc->tc;
201 struct toy_dst mrf_header;
202 struct toy_src desc;
203
204 mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
205
206 /* m0.5: destination index */
207 gs_COPY1(tc, mrf_header, 5, index, 0);
208
209 /* m0.0 - m0.3: RGBA */
210 gs_COPY4(tc, mrf_header, 0, tsrc_type(out, mrf_header.type), 0);
211
212 desc = tsrc_imm_mdesc_data_port(tc, false,
213 1, send_write_commit_message,
214 true, send_write_commit_message,
215 GEN6_MSG_DP_SVB_WRITE, 0,
216 binding_table_index);
217
218 tc_SEND(tc, dst, tsrc_from(mrf_header), desc,
219 GEN6_SFID_DP_RC);
220 }
221
222 static void
223 gs_write_vue(struct gs_compile_context *gcc,
224 struct toy_dst dst, struct toy_src msg_header,
225 const struct toy_src *outs, int num_outs,
226 bool eot)
227 {
228 struct toy_compiler *tc = &gcc->tc;
229 struct toy_dst mrf_header;
230 struct toy_src desc;
231 int sent = 0;
232
233 mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
234 gs_COPY8(tc, mrf_header, msg_header);
235
236 while (sent < num_outs) {
237 int mrf = gcc->first_free_mrf + 1;
238 const int mrf_avail = gcc->last_free_mrf - mrf + 1;
239 int msg_len, num_entries, i;
240 bool complete;
241
242 num_entries = (num_outs - sent + 1) / 2;
243 complete = true;
244 if (num_entries > mrf_avail) {
245 num_entries = mrf_avail;
246 complete = false;
247 }
248
249 for (i = 0; i < num_entries; i++) {
250 gs_COPY4(tc, tdst(TOY_FILE_MRF, mrf + i / 2, 0), 0,
251 outs[sent + 2 * i], 0);
252 if (sent + i * 2 + 1 < gcc->shader->out.count) {
253 gs_COPY4(tc, tdst(TOY_FILE_MRF, mrf + i / 2, 0), 4,
254 outs[sent + 2 * i + 1], 0);
255 }
256 mrf++;
257 }
258
259 /* do not forget the header */
260 msg_len = num_entries + 1;
261
262 if (complete) {
263 desc = tsrc_imm_mdesc_urb(tc,
264 eot, msg_len, !eot, true, true, !eot,
265 false, sent, 0);
266 }
267 else {
268 desc = tsrc_imm_mdesc_urb(tc,
269 false, msg_len, 0, false, true, false,
270 false, sent, 0);
271 }
272
273 tc_add2(tc, TOY_OPCODE_URB_WRITE,
274 (complete) ? dst : tdst_null(), tsrc_from(mrf_header), desc);
275
276 sent += num_entries * 2;
277 }
278 }
279
280 static void
281 gs_ff_sync(struct gs_compile_context *gcc, struct toy_dst dst,
282 struct toy_src num_prims)
283 {
284 struct toy_compiler *tc = &gcc->tc;
285 struct toy_dst mrf_header =
286 tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
287 struct toy_src desc;
288 bool allocate;
289
290 gs_COPY8(tc, mrf_header, gcc->payload.header);
291
292 /* set NumSOVertsToWrite and NumSOPrimsNeeded */
293 if (gcc->write_so) {
294 if (num_prims.file == TOY_FILE_IMM) {
295 const uint32_t v =
296 (num_prims.val32 * gcc->in_vue_count) << 16 | num_prims.val32;
297
298 gs_COPY1(tc, mrf_header, 0, tsrc_imm_d(v), 0);
299 }
300 else {
301 struct toy_dst m0_0 = tdst_d(gcc->vars.tmp);
302
303 tc_MUL(tc, m0_0, num_prims, tsrc_imm_d(gcc->in_vue_count << 16));
304 tc_OR(tc, m0_0, tsrc_from(m0_0), num_prims);
305
306 gs_COPY1(tc, mrf_header, 0, tsrc_from(m0_0), 0);
307 }
308 }
309
310 /* set NumGSPrimsGenerated */
311 if (gcc->write_vue)
312 gs_COPY1(tc, mrf_header, 1, num_prims, 0);
313
314 /*
315 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
316 *
317 * "Programming Note: If the GS stage is enabled, software must always
318 * allocate at least one GS URB Entry. This is true even if the GS
319 * thread never needs to output vertices to the pipeline, e.g., when
320 * only performing stream output. This is an artifact of the need to
321 * pass the GS thread an initial destination URB handle."
322 */
323 allocate = true;
324 desc = tsrc_imm_mdesc_urb(tc, false, 1, 1,
325 false, false, allocate,
326 false, 0, 1);
327
328 tc_SEND(tc, dst, tsrc_from(mrf_header), desc, GEN6_SFID_URB);
329 }
330
331 static void
332 gs_discard(struct gs_compile_context *gcc)
333 {
334 struct toy_compiler *tc = &gcc->tc;
335 struct toy_dst mrf_header;
336 struct toy_src desc;
337
338 mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
339
340 gs_COPY8(tc, mrf_header, tsrc_from(gcc->vars.urb_write_header));
341
342 desc = tsrc_imm_mdesc_urb(tc,
343 true, 1, 0, true, false, false,
344 false, 0, 0);
345
346 tc_add2(tc, TOY_OPCODE_URB_WRITE,
347 tdst_null(), tsrc_from(mrf_header), desc);
348 }
349
350 static void
351 gs_lower_opcode_endprim(struct gs_compile_context *gcc, struct toy_inst *inst)
352 {
353 /* if has control flow, set PrimEnd on the last vertex and URB_WRITE */
354 }
355
356 static void
357 gs_lower_opcode_emit_vue_dynamic(struct gs_compile_context *gcc)
358 {
359 /* TODO similar to the static version */
360
361 /*
362 * When SO is enabled and the inputs are lines or triangles, vertices are
363 * always buffered. we can defer the emission of the current vertex until
364 * the next EMIT or ENDPRIM. Or, we can emit two URB_WRITEs with the later
365 * patching the former.
366 */
367 }
368
369 static void
370 gs_lower_opcode_emit_so_dynamic(struct gs_compile_context *gcc)
371 {
372 struct toy_compiler *tc = &gcc->tc;
373
374 tc_IF(tc, tdst_null(),
375 tsrc_from(gcc->dynamic_data.num_vertices_in_prim),
376 tsrc_imm_d(gcc->out_vue_min_count),
377 GEN6_COND_GE);
378
379 {
380 tc_ADD(tc, gcc->vars.tmp, tsrc_from(gcc->vars.so_index), tsrc_imm_d(0x03020100));
381
382 /* TODO same as static version */
383 }
384
385 tc_ENDIF(tc);
386
387 tc_ADD(tc, gcc->vars.so_index,
388 tsrc_from(gcc->vars.so_index), tsrc_imm_d(gcc->out_vue_min_count));
389 }
390
391 static void
392 gs_lower_opcode_emit_vue_static(struct gs_compile_context *gcc)
393 {
394 struct toy_compiler *tc = &gcc->tc;
395 struct toy_inst *inst2;
396 bool eot;
397
398 eot = (gcc->static_data.num_vertices == gcc->static_data.total_vertices);
399
400 gcc->vars.prim_end =
401 ((gcc->static_data.last_vertex[(gcc->static_data.num_vertices - 1) / 32] &
402 1 << ((gcc->static_data.num_vertices - 1) % 32)) != 0);
403
404 if (eot && gcc->write_so) {
405 inst2 = tc_OR(tc, tdst_offset(gcc->vars.urb_write_header, 0, 2),
406 tsrc_from(gcc->vars.so_written),
407 tsrc_imm_d(gcc->vars.prim_type << 2 |
408 gcc->vars.prim_start << 1 |
409 gcc->vars.prim_end));
410 inst2->exec_size = GEN6_EXECSIZE_1;
411 inst2->src[0] = tsrc_rect(inst2->src[0], TOY_RECT_010);
412 inst2->src[1] = tsrc_rect(inst2->src[1], TOY_RECT_010);
413 }
414 else {
415 gs_COPY1(tc, gcc->vars.urb_write_header, 2,
416 tsrc_imm_d(gcc->vars.prim_type << 2 |
417 gcc->vars.prim_start << 1 |
418 gcc->vars.prim_end), 0);
419 }
420
421 gs_write_vue(gcc, tdst_d(gcc->vars.tmp),
422 tsrc_from(gcc->vars.urb_write_header),
423 gcc->vars.tgsi_outs,
424 gcc->shader->out.count, eot);
425
426 if (!eot) {
427 gs_COPY1(tc, gcc->vars.urb_write_header, 0,
428 tsrc_from(tdst_d(gcc->vars.tmp)), 0);
429 }
430
431 gcc->vars.prim_start = gcc->vars.prim_end;
432 gcc->vars.prim_end = false;
433 }
434
435 static void
436 gs_lower_opcode_emit_so_static(struct gs_compile_context *gcc)
437 {
438 struct toy_compiler *tc = &gcc->tc;
439 struct toy_inst *inst;
440 int i, j;
441
442 if (gcc->static_data.num_vertices_in_prim < gcc->out_vue_min_count)
443 return;
444
445 inst = tc_MOV(tc, tdst_w(gcc->vars.tmp), tsrc_imm_v(0x03020100));
446 inst->exec_size = GEN6_EXECSIZE_8;
447 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
448
449 tc_ADD(tc, tdst_d(gcc->vars.tmp), tsrc_from(tdst_d(gcc->vars.tmp)),
450 tsrc_rect(tsrc_from(gcc->vars.so_index), TOY_RECT_010));
451
452 tc_IF(tc, tdst_null(),
453 tsrc_rect(tsrc_offset(tsrc_from(tdst_d(gcc->vars.tmp)), 0, gcc->out_vue_min_count - 1), TOY_RECT_010),
454 tsrc_rect(tsrc_offset(gcc->payload.svbi, 0, 4), TOY_RECT_010),
455 GEN6_COND_LE);
456 {
457 for (i = 0; i < gcc->out_vue_min_count; i++) {
458 for (j = 0; j < gcc->so_info->num_outputs; j++) {
459 const int idx = gcc->so_info->output[j].register_index;
460 struct toy_src index, out;
461 int binding_table_index;
462 bool write_commit;
463
464 index = tsrc_d(tsrc_offset(tsrc_from(gcc->vars.tmp), 0, i));
465
466 if (i == gcc->out_vue_min_count - 1) {
467 out = gcc->vars.tgsi_outs[idx];
468 }
469 else {
470 /* gcc->vars.buffer_cur also points to the first vertex */
471 const int buf =
472 (gcc->vars.buffer_cur + i) % gcc->vars.buffer_needed;
473
474 out = tsrc_offset(tsrc_from(gcc->vars.buffers[buf]), idx, 0);
475 }
476
477 out = tsrc_offset(out, 0, gcc->so_info->output[j].start_component);
478
479 /*
480 * From the Sandy Bridge PRM, volume 4 part 2, page 19:
481 *
482 * "The Kernel must do a write commit on the last write to DAP
483 * prior to a URB_WRITE with End of Thread."
484 */
485 write_commit =
486 (gcc->static_data.num_vertices == gcc->static_data.total_vertices &&
487 i == gcc->out_vue_min_count - 1 &&
488 j == gcc->so_info->num_outputs - 1);
489
490
491 binding_table_index = gcc->shader->bt.gen6_so_base + j;
492
493 gs_write_so(gcc, gcc->vars.tmp, index,
494 out, write_commit, binding_table_index);
495
496 /*
497 * From the Sandy Bridge PRM, volume 4 part 1, page 168:
498 *
499 * "The write commit does not modify the destination register, but
500 * merely clears the dependency associated with the destination
501 * register. Thus, a simple "mov" instruction using the register as a
502 * source is sufficient to wait for the write commit to occur."
503 */
504 if (write_commit)
505 tc_MOV(tc, gcc->vars.tmp, tsrc_from(gcc->vars.tmp));
506 }
507 }
508
509 /* SONumPrimsWritten occupies the higher word of m0.2 of URB_WRITE */
510 tc_ADD(tc, gcc->vars.so_written,
511 tsrc_from(gcc->vars.so_written), tsrc_imm_d(1 << 16));
512 tc_ADD(tc, gcc->vars.so_index,
513 tsrc_from(gcc->vars.so_index), tsrc_imm_d(gcc->out_vue_min_count));
514 }
515 tc_ENDIF(tc);
516 }
517
518 static void
519 gs_lower_opcode_emit_static(struct gs_compile_context *gcc,
520 struct toy_inst *inst)
521 {
522 gcc->static_data.num_vertices++;
523 gcc->static_data.num_vertices_in_prim++;
524
525 if (gcc->write_so) {
526 gs_lower_opcode_emit_so_static(gcc);
527
528 if (gcc->out_vue_min_count > 1 &&
529 gcc->static_data.num_vertices != gcc->static_data.total_vertices)
530 gs_save_output(gcc, gcc->vars.tgsi_outs);
531 }
532
533 if (gcc->write_vue)
534 gs_lower_opcode_emit_vue_static(gcc);
535 }
536
537 static void
538 gs_lower_opcode_emit_dynamic(struct gs_compile_context *gcc,
539 struct toy_inst *inst)
540 {
541 struct toy_compiler *tc = &gcc->tc;
542
543 tc_ADD(tc, gcc->dynamic_data.num_vertices,
544 tsrc_from(gcc->dynamic_data.num_vertices), tsrc_imm_d(1));
545 tc_ADD(tc, gcc->dynamic_data.num_vertices_in_prim,
546 tsrc_from(gcc->dynamic_data.num_vertices_in_prim), tsrc_imm_d(1));
547
548 if (gcc->write_so) {
549 gs_lower_opcode_emit_so_dynamic(gcc);
550
551 if (gcc->out_vue_min_count > 1)
552 gs_save_output(gcc, gcc->vars.tgsi_outs);
553 }
554
555 if (gcc->write_vue)
556 gs_lower_opcode_emit_vue_dynamic(gcc);
557 }
558
559 static void
560 gs_lower_opcode_emit(struct gs_compile_context *gcc, struct toy_inst *inst)
561 {
562 if (gcc->is_static)
563 gs_lower_opcode_emit_static(gcc, inst);
564 else
565 gs_lower_opcode_emit_dynamic(gcc, inst);
566 }
567
568 static void
569 gs_lower_opcode_tgsi_in(struct gs_compile_context *gcc,
570 struct toy_dst dst, int dim, int idx)
571 {
572 struct toy_compiler *tc = &gcc->tc;
573 struct toy_src attr;
574 int slot, reg = -1, subreg;
575
576 slot = toy_tgsi_find_input(&gcc->tgsi, idx);
577 if (slot >= 0) {
578 int i;
579
580 for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) {
581 if (gcc->variant->u.gs.semantic_names[i] ==
582 gcc->tgsi.inputs[slot].semantic_name &&
583 gcc->variant->u.gs.semantic_indices[i] ==
584 gcc->tgsi.inputs[slot].semantic_index) {
585 reg = i / 2;
586 subreg = (i % 2) * 4;
587 break;
588 }
589 }
590 }
591
592 if (reg < 0) {
593 tc_MOV(tc, dst, tsrc_imm_f(0.0f));
594 return;
595 }
596
597 /* fix vertex ordering for GEN6_3DPRIM_TRISTRIP_REVERSE */
598 if (gcc->in_vue_count == 3 && dim < 2) {
599 struct toy_inst *inst;
600
601 /* get PrimType */
602 inst = tc_AND(tc, tdst_d(gcc->vars.tmp),
603 tsrc_offset(gcc->payload.header, 0, 2), tsrc_imm_d(0x1f));
604 inst->exec_size = GEN6_EXECSIZE_1;
605 inst->src[0] = tsrc_rect(inst->src[0], TOY_RECT_010);
606 inst->src[1] = tsrc_rect(inst->src[1], TOY_RECT_010);
607
608 inst = tc_CMP(tc, tdst_null(), tsrc_from(tdst_d(gcc->vars.tmp)),
609 tsrc_imm_d(GEN6_3DPRIM_TRISTRIP_REVERSE), GEN6_COND_NZ);
610 inst->src[0] = tsrc_rect(inst->src[0], TOY_RECT_010);
611
612 attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg);
613 inst = tc_MOV(tc, dst, attr);
614 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
615
616 /* swap IN[0] and IN[1] for GEN6_3DPRIM_TRISTRIP_REVERSE */
617 dim = !dim;
618
619 attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg);
620 inst = tc_MOV(tc, dst, attr);
621 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL;
622 inst->pred_inv = true;
623 }
624 else {
625 attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg);
626 tc_MOV(tc, dst, attr);
627 }
628
629
630 }
631
632 static void
633 gs_lower_opcode_tgsi_imm(struct gs_compile_context *gcc,
634 struct toy_dst dst, int idx)
635 {
636 const uint32_t *imm;
637 int ch;
638
639 imm = toy_tgsi_get_imm(&gcc->tgsi, idx, NULL);
640
641 for (ch = 0; ch < 4; ch++) {
642 struct toy_inst *inst;
643
644 /* raw moves */
645 inst = tc_MOV(&gcc->tc,
646 tdst_writemask(tdst_ud(dst), 1 << ch),
647 tsrc_imm_ud(imm[ch]));
648 inst->access_mode = GEN6_ALIGN_16;
649 }
650 }
651
652 static void
653 gs_lower_opcode_tgsi_direct(struct gs_compile_context *gcc,
654 struct toy_inst *inst)
655 {
656 struct toy_compiler *tc = &gcc->tc;
657 int dim, idx;
658
659 assert(inst->src[0].file == TOY_FILE_IMM);
660 dim = inst->src[0].val32;
661
662 assert(inst->src[1].file == TOY_FILE_IMM);
663 idx = inst->src[1].val32;
664
665 switch (inst->opcode) {
666 case TOY_OPCODE_TGSI_IN:
667 gs_lower_opcode_tgsi_in(gcc, inst->dst, dim, idx);
668 /* fetch all dimensions */
669 if (dim == 0) {
670 int i;
671
672 for (i = 1; i < gcc->in_vue_count; i++) {
673 const int vrf = toy_tgsi_get_vrf(&gcc->tgsi, TGSI_FILE_INPUT, i, idx);
674 struct toy_dst dst;
675
676 if (vrf < 0)
677 continue;
678
679 dst = tdst(TOY_FILE_VRF, vrf, 0);
680 gs_lower_opcode_tgsi_in(gcc, dst, i, idx);
681 }
682 }
683 break;
684 case TOY_OPCODE_TGSI_IMM:
685 assert(!dim);
686 gs_lower_opcode_tgsi_imm(gcc, inst->dst, idx);
687 break;
688 case TOY_OPCODE_TGSI_CONST:
689 case TOY_OPCODE_TGSI_SV:
690 default:
691 tc_fail(tc, "unhandled TGSI fetch");
692 break;
693 }
694
695 tc_discard_inst(tc, inst);
696 }
697
698 static void
699 gs_lower_virtual_opcodes(struct gs_compile_context *gcc)
700 {
701 struct toy_compiler *tc = &gcc->tc;
702 struct toy_inst *inst;
703
704 tc_head(tc);
705 while ((inst = tc_next(tc)) != NULL) {
706 switch (inst->opcode) {
707 case TOY_OPCODE_TGSI_IN:
708 case TOY_OPCODE_TGSI_CONST:
709 case TOY_OPCODE_TGSI_SV:
710 case TOY_OPCODE_TGSI_IMM:
711 gs_lower_opcode_tgsi_direct(gcc, inst);
712 break;
713 case TOY_OPCODE_TGSI_INDIRECT_FETCH:
714 case TOY_OPCODE_TGSI_INDIRECT_STORE:
715 /* TODO similar to VS */
716 tc_fail(tc, "no indirection support");
717 tc_discard_inst(tc, inst);
718 break;
719 case TOY_OPCODE_TGSI_TEX:
720 case TOY_OPCODE_TGSI_TXB:
721 case TOY_OPCODE_TGSI_TXD:
722 case TOY_OPCODE_TGSI_TXL:
723 case TOY_OPCODE_TGSI_TXP:
724 case TOY_OPCODE_TGSI_TXF:
725 case TOY_OPCODE_TGSI_TXQ:
726 case TOY_OPCODE_TGSI_TXQ_LZ:
727 case TOY_OPCODE_TGSI_TEX2:
728 case TOY_OPCODE_TGSI_TXB2:
729 case TOY_OPCODE_TGSI_TXL2:
730 case TOY_OPCODE_TGSI_SAMPLE:
731 case TOY_OPCODE_TGSI_SAMPLE_I:
732 case TOY_OPCODE_TGSI_SAMPLE_I_MS:
733 case TOY_OPCODE_TGSI_SAMPLE_B:
734 case TOY_OPCODE_TGSI_SAMPLE_C:
735 case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
736 case TOY_OPCODE_TGSI_SAMPLE_D:
737 case TOY_OPCODE_TGSI_SAMPLE_L:
738 case TOY_OPCODE_TGSI_GATHER4:
739 case TOY_OPCODE_TGSI_SVIEWINFO:
740 case TOY_OPCODE_TGSI_SAMPLE_POS:
741 case TOY_OPCODE_TGSI_SAMPLE_INFO:
742 /* TODO similar to VS */
743 tc_fail(tc, "no sampling support");
744 tc_discard_inst(tc, inst);
745 break;
746 case TOY_OPCODE_EMIT:
747 gs_lower_opcode_emit(gcc, inst);
748 tc_discard_inst(tc, inst);
749 break;
750 case TOY_OPCODE_ENDPRIM:
751 gs_lower_opcode_endprim(gcc, inst);
752 tc_discard_inst(tc, inst);
753 break;
754 default:
755 break;
756 }
757 }
758
759 tc_head(tc);
760 while ((inst = tc_next(tc)) != NULL) {
761 switch (inst->opcode) {
762 case TOY_OPCODE_INV:
763 case TOY_OPCODE_LOG:
764 case TOY_OPCODE_EXP:
765 case TOY_OPCODE_SQRT:
766 case TOY_OPCODE_RSQ:
767 case TOY_OPCODE_SIN:
768 case TOY_OPCODE_COS:
769 case TOY_OPCODE_FDIV:
770 case TOY_OPCODE_POW:
771 case TOY_OPCODE_INT_DIV_QUOTIENT:
772 case TOY_OPCODE_INT_DIV_REMAINDER:
773 toy_compiler_lower_math(tc, inst);
774 break;
775 case TOY_OPCODE_URB_WRITE:
776 toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_URB);
777 break;
778 default:
779 if (inst->opcode > 127)
780 tc_fail(tc, "unhandled virtual opcode");
781 break;
782 }
783 }
784 }
785
786 /**
787 * Get the number of (tessellated) primitives generated by this shader.
788 * Return false if that is unknown until runtime.
789 */
790 static void
791 get_num_prims_static(struct gs_compile_context *gcc)
792 {
793 struct toy_compiler *tc = &gcc->tc;
794 const struct toy_inst *inst;
795 int num_vertices_in_prim = 0, if_depth = 0, do_depth = 0;
796 bool is_static = true;
797
798 tc_head(tc);
799 while ((inst = tc_next_no_skip(tc)) != NULL) {
800 switch (inst->opcode) {
801 case GEN6_OPCODE_IF:
802 if_depth++;
803 break;
804 case GEN6_OPCODE_ENDIF:
805 if_depth--;
806 break;
807 case TOY_OPCODE_DO:
808 do_depth++;
809 break;
810 case GEN6_OPCODE_WHILE:
811 do_depth--;
812 break;
813 case TOY_OPCODE_EMIT:
814 if (if_depth || do_depth) {
815 is_static = false;
816 }
817 else {
818 gcc->static_data.total_vertices++;
819
820 num_vertices_in_prim++;
821 if (num_vertices_in_prim >= gcc->out_vue_min_count)
822 gcc->static_data.total_prims++;
823 }
824 break;
825 case TOY_OPCODE_ENDPRIM:
826 if (if_depth || do_depth) {
827 is_static = false;
828 }
829 else {
830 const int vertidx = gcc->static_data.total_vertices - 1;
831 const int idx = vertidx / 32;
832 const int subidx = vertidx % 32;
833
834 gcc->static_data.last_vertex[idx] |= 1 << subidx;
835 num_vertices_in_prim = 0;
836 }
837 break;
838 default:
839 break;
840 }
841
842 if (!is_static)
843 break;
844 }
845
846 gcc->is_static = is_static;
847 }
848
849 /**
850 * Compile the shader.
851 */
852 static bool
853 gs_compile(struct gs_compile_context *gcc)
854 {
855 struct toy_compiler *tc = &gcc->tc;
856 struct ilo_shader *sh = gcc->shader;
857
858 get_num_prims_static(gcc);
859
860 if (gcc->is_static) {
861 tc_head(tc);
862
863 gs_init_vars(gcc);
864 gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims));
865 gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0);
866 if (gcc->write_so)
867 gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1);
868
869 tc_tail(tc);
870 }
871 else {
872 tc_fail(tc, "no control flow support");
873 return false;
874 }
875
876 if (!gcc->write_vue)
877 gs_discard(gcc);
878
879 gs_lower_virtual_opcodes(gcc);
880 toy_compiler_legalize_for_ra(tc);
881 toy_compiler_optimize(tc);
882 toy_compiler_allocate_registers(tc,
883 gcc->first_free_grf,
884 gcc->last_free_grf,
885 1);
886 toy_compiler_legalize_for_asm(tc);
887
888 if (tc->fail) {
889 ilo_err("failed to legalize GS instructions: %s\n", tc->reason);
890 return false;
891 }
892
893 if (ilo_debug & ILO_DEBUG_GS) {
894 ilo_printf("legalized instructions:\n");
895 toy_compiler_dump(tc);
896 ilo_printf("\n");
897 }
898
899 sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
900 if (!sh->kernel)
901 return false;
902
903 if (ilo_debug & ILO_DEBUG_GS) {
904 ilo_printf("disassembly:\n");
905 toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false);
906 ilo_printf("\n");
907 }
908
909 return true;
910 }
911
912 static bool
913 gs_compile_passthrough(struct gs_compile_context *gcc)
914 {
915 struct toy_compiler *tc = &gcc->tc;
916 struct ilo_shader *sh = gcc->shader;
917
918 gcc->is_static = true;
919 gcc->static_data.total_vertices = gcc->in_vue_count;
920 gcc->static_data.total_prims = 1;
921 gcc->static_data.last_vertex[0] = 1 << (gcc->in_vue_count - 1);
922
923 gs_init_vars(gcc);
924 gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims));
925 gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0);
926 if (gcc->write_so)
927 gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1);
928
929 {
930 int vert, attr;
931
932 for (vert = 0; vert < gcc->out_vue_min_count; vert++) {
933 for (attr = 0; attr < gcc->shader->out.count; attr++) {
934 tc_MOV(tc, tdst_from(gcc->vars.tgsi_outs[attr]),
935 tsrc_offset(gcc->payload.vues[vert], attr / 2, (attr % 2) * 4));
936 }
937
938 gs_lower_opcode_emit(gcc, NULL);
939 }
940
941 gs_lower_opcode_endprim(gcc, NULL);
942 }
943
944 if (!gcc->write_vue)
945 gs_discard(gcc);
946
947 gs_lower_virtual_opcodes(gcc);
948
949 toy_compiler_legalize_for_ra(tc);
950 toy_compiler_optimize(tc);
951 toy_compiler_allocate_registers(tc,
952 gcc->first_free_grf,
953 gcc->last_free_grf,
954 1);
955
956 toy_compiler_legalize_for_asm(tc);
957
958 if (tc->fail) {
959 ilo_err("failed to translate GS TGSI tokens: %s\n", tc->reason);
960 return false;
961 }
962
963 if (ilo_debug & ILO_DEBUG_GS) {
964 int i;
965
966 ilo_printf("VUE count %d, VUE size %d\n",
967 gcc->in_vue_count, gcc->in_vue_size);
968 ilo_printf("%srasterizer discard\n",
969 (gcc->variant->u.gs.rasterizer_discard) ? "" : "no ");
970
971 for (i = 0; i < gcc->so_info->num_outputs; i++) {
972 ilo_printf("SO[%d] = OUT[%d]\n", i,
973 gcc->so_info->output[i].register_index);
974 }
975
976 ilo_printf("legalized instructions:\n");
977 toy_compiler_dump(tc);
978 ilo_printf("\n");
979 }
980
981 sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
982 if (!sh->kernel) {
983 ilo_err("failed to compile GS: %s\n", tc->reason);
984 return false;
985 }
986
987 if (ilo_debug & ILO_DEBUG_GS) {
988 ilo_printf("disassembly:\n");
989 toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false);
990 ilo_printf("\n");
991 }
992
993 return true;
994 }
995
996 /**
997 * Translate the TGSI tokens.
998 */
999 static bool
1000 gs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
1001 struct toy_tgsi *tgsi)
1002 {
1003 if (ilo_debug & ILO_DEBUG_GS) {
1004 ilo_printf("dumping geometry shader\n");
1005 ilo_printf("\n");
1006
1007 tgsi_dump(tokens, 0);
1008 ilo_printf("\n");
1009 }
1010
1011 toy_compiler_translate_tgsi(tc, tokens, true, tgsi);
1012 if (tc->fail)
1013 return false;
1014
1015 if (ilo_debug & ILO_DEBUG_GS) {
1016 ilo_printf("TGSI translator:\n");
1017 toy_tgsi_dump(tgsi);
1018 ilo_printf("\n");
1019 toy_compiler_dump(tc);
1020 ilo_printf("\n");
1021 }
1022
1023 return true;
1024 }
1025
1026 /**
1027 * Set up shader inputs for fixed-function units.
1028 */
1029 static void
1030 gs_setup_shader_in(struct ilo_shader *sh,
1031 const struct ilo_shader_variant *variant)
1032 {
1033 int i;
1034
1035 for (i = 0; i < variant->u.gs.num_inputs; i++) {
1036 sh->in.semantic_names[i] = variant->u.gs.semantic_names[i];
1037 sh->in.semantic_indices[i] = variant->u.gs.semantic_indices[i];
1038 sh->in.interp[i] = TGSI_INTERPOLATE_CONSTANT;
1039 sh->in.centroid[i] = false;
1040 }
1041
1042 sh->in.count = variant->u.gs.num_inputs;
1043
1044 sh->in.has_pos = false;
1045 sh->in.has_linear_interp = false;
1046 sh->in.barycentric_interpolation_mode = 0;
1047 }
1048
1049 /**
1050 * Set up shader outputs for fixed-function units.
1051 *
1052 * XXX share the code with VS
1053 */
1054 static void
1055 gs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
1056 bool output_clipdist, int *output_map)
1057 {
1058 int psize_slot = -1, pos_slot = -1;
1059 int clipdist_slot[2] = { -1, -1 };
1060 int color_slot[4] = { -1, -1, -1, -1 };
1061 int num_outs, i;
1062
1063 /* find out the slots of outputs that need special care */
1064 for (i = 0; i < tgsi->num_outputs; i++) {
1065 switch (tgsi->outputs[i].semantic_name) {
1066 case TGSI_SEMANTIC_PSIZE:
1067 psize_slot = i;
1068 break;
1069 case TGSI_SEMANTIC_POSITION:
1070 pos_slot = i;
1071 break;
1072 case TGSI_SEMANTIC_CLIPDIST:
1073 if (tgsi->outputs[i].semantic_index)
1074 clipdist_slot[1] = i;
1075 else
1076 clipdist_slot[0] = i;
1077 break;
1078 case TGSI_SEMANTIC_COLOR:
1079 if (tgsi->outputs[i].semantic_index)
1080 color_slot[2] = i;
1081 else
1082 color_slot[0] = i;
1083 break;
1084 case TGSI_SEMANTIC_BCOLOR:
1085 if (tgsi->outputs[i].semantic_index)
1086 color_slot[3] = i;
1087 else
1088 color_slot[1] = i;
1089 break;
1090 default:
1091 break;
1092 }
1093 }
1094
1095 /* the first two VUEs are always PSIZE and POSITION */
1096 num_outs = 2;
1097 output_map[0] = psize_slot;
1098 output_map[1] = pos_slot;
1099
1100 sh->out.register_indices[0] =
1101 (psize_slot >= 0) ? tgsi->outputs[psize_slot].index : -1;
1102 sh->out.semantic_names[0] = TGSI_SEMANTIC_PSIZE;
1103 sh->out.semantic_indices[0] = 0;
1104
1105 sh->out.register_indices[1] =
1106 (pos_slot >= 0) ? tgsi->outputs[pos_slot].index : -1;
1107 sh->out.semantic_names[1] = TGSI_SEMANTIC_POSITION;
1108 sh->out.semantic_indices[1] = 0;
1109
1110 sh->out.has_pos = true;
1111
1112 /* followed by optional clip distances */
1113 if (output_clipdist) {
1114 sh->out.register_indices[num_outs] =
1115 (clipdist_slot[0] >= 0) ? tgsi->outputs[clipdist_slot[0]].index : -1;
1116 sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
1117 sh->out.semantic_indices[num_outs] = 0;
1118 output_map[num_outs++] = clipdist_slot[0];
1119
1120 sh->out.register_indices[num_outs] =
1121 (clipdist_slot[1] >= 0) ? tgsi->outputs[clipdist_slot[1]].index : -1;
1122 sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
1123 sh->out.semantic_indices[num_outs] = 1;
1124 output_map[num_outs++] = clipdist_slot[1];
1125 }
1126
1127 /*
1128 * make BCOLOR follow COLOR so that we can make use of
1129 * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING in 3DSTATE_SF
1130 */
1131 for (i = 0; i < 4; i++) {
1132 const int slot = color_slot[i];
1133
1134 if (slot < 0)
1135 continue;
1136
1137 sh->out.register_indices[num_outs] = tgsi->outputs[slot].index;
1138 sh->out.semantic_names[num_outs] = tgsi->outputs[slot].semantic_name;
1139 sh->out.semantic_indices[num_outs] = tgsi->outputs[slot].semantic_index;
1140
1141 output_map[num_outs++] = slot;
1142 }
1143
1144 /* add the rest of the outputs */
1145 for (i = 0; i < tgsi->num_outputs; i++) {
1146 switch (tgsi->outputs[i].semantic_name) {
1147 case TGSI_SEMANTIC_PSIZE:
1148 case TGSI_SEMANTIC_POSITION:
1149 case TGSI_SEMANTIC_CLIPDIST:
1150 case TGSI_SEMANTIC_COLOR:
1151 case TGSI_SEMANTIC_BCOLOR:
1152 break;
1153 default:
1154 sh->out.register_indices[num_outs] = tgsi->outputs[i].index;
1155 sh->out.semantic_names[num_outs] = tgsi->outputs[i].semantic_name;
1156 sh->out.semantic_indices[num_outs] = tgsi->outputs[i].semantic_index;
1157 output_map[num_outs++] = i;
1158 break;
1159 }
1160 }
1161
1162 sh->out.count = num_outs;
1163 }
1164
1165 static void
1166 gs_setup_vars(struct gs_compile_context *gcc)
1167 {
1168 int grf = gcc->first_free_grf;
1169 int i;
1170
1171 gcc->vars.urb_write_header = tdst_d(tdst(TOY_FILE_GRF, grf, 0));
1172 grf++;
1173
1174 gcc->vars.tmp = tdst(TOY_FILE_GRF, grf, 0);
1175 grf++;
1176
1177 if (gcc->write_so) {
1178 gcc->vars.buffer_needed = gcc->out_vue_min_count - 1;
1179 for (i = 0; i < gcc->vars.buffer_needed; i++) {
1180 gcc->vars.buffers[i] = tdst(TOY_FILE_GRF, grf, 0);
1181 grf += gcc->shader->out.count;
1182 }
1183
1184 gcc->vars.so_written = tdst_d(tdst(TOY_FILE_GRF, grf, 0));
1185 grf++;
1186
1187 gcc->vars.so_index = tdst_d(tdst(TOY_FILE_GRF, grf, 0));
1188 grf++;
1189 }
1190
1191 gcc->first_free_grf = grf;
1192
1193 if (!gcc->tgsi.reg_mapping) {
1194 for (i = 0; i < gcc->shader->out.count; i++)
1195 gcc->vars.tgsi_outs[i] = tsrc(TOY_FILE_GRF, grf++, 0);
1196
1197 gcc->first_free_grf = grf;
1198 return;
1199 }
1200
1201 for (i = 0; i < gcc->shader->out.count; i++) {
1202 const int slot = gcc->output_map[i];
1203 const int vrf = (slot >= 0) ? toy_tgsi_get_vrf(&gcc->tgsi,
1204 TGSI_FILE_OUTPUT, 0, gcc->tgsi.outputs[slot].index) : -1;
1205
1206 if (vrf >= 0)
1207 gcc->vars.tgsi_outs[i] = tsrc(TOY_FILE_VRF, vrf, 0);
1208 else
1209 gcc->vars.tgsi_outs[i] = (i == 0) ? tsrc_imm_d(0) : tsrc_imm_f(0.0f);
1210 }
1211 }
1212
1213 static void
1214 gs_setup_payload(struct gs_compile_context *gcc)
1215 {
1216 int grf, i;
1217
1218 grf = 0;
1219
1220 /* r0: payload header */
1221 gcc->payload.header = tsrc_d(tsrc(TOY_FILE_GRF, grf, 0));
1222 grf++;
1223
1224 /* r1: SVBI */
1225 if (gcc->write_so) {
1226 gcc->payload.svbi = tsrc_ud(tsrc(TOY_FILE_GRF, grf, 0));
1227 grf++;
1228 }
1229
1230 /* URB data */
1231 gcc->shader->in.start_grf = grf;
1232
1233 /* no pull constants */
1234
1235 /* VUEs */
1236 for (i = 0; i < gcc->in_vue_count; i++) {
1237 gcc->payload.vues[i] = tsrc(TOY_FILE_GRF, grf, 0);
1238 grf += gcc->in_vue_size;
1239 }
1240
1241 gcc->first_free_grf = grf;
1242 gcc->last_free_grf = 127;
1243 }
1244
1245 /**
1246 * Set up GS compile context. This includes translating the TGSI tokens.
1247 */
1248 static bool
1249 gs_setup(struct gs_compile_context *gcc,
1250 const struct ilo_shader_state *state,
1251 const struct ilo_shader_variant *variant,
1252 int num_verts)
1253 {
1254 memset(gcc, 0, sizeof(*gcc));
1255
1256 gcc->shader = CALLOC_STRUCT(ilo_shader);
1257 if (!gcc->shader)
1258 return false;
1259
1260 gcc->variant = variant;
1261 gcc->so_info = &state->info.stream_output;
1262
1263 toy_compiler_init(&gcc->tc, state->info.dev);
1264
1265 gcc->write_so = (state->info.stream_output.num_outputs > 0);
1266 gcc->write_vue = !gcc->variant->u.gs.rasterizer_discard;
1267
1268 gcc->tc.templ.access_mode = GEN6_ALIGN_16;
1269 gcc->tc.templ.exec_size = GEN6_EXECSIZE_4;
1270 gcc->tc.rect_linear_width = 4;
1271
1272 if (state->info.tokens) {
1273 if (!gs_setup_tgsi(&gcc->tc, state->info.tokens, &gcc->tgsi)) {
1274 toy_compiler_cleanup(&gcc->tc);
1275 FREE(gcc->shader);
1276 return false;
1277 }
1278
1279 switch (gcc->tgsi.props.gs_input_prim) {
1280 case PIPE_PRIM_POINTS:
1281 gcc->in_vue_count = 1;
1282 break;
1283 case PIPE_PRIM_LINES:
1284 gcc->in_vue_count = 2;
1285 gcc->shader->in.discard_adj = true;
1286 break;
1287 case PIPE_PRIM_TRIANGLES:
1288 gcc->in_vue_count = 3;
1289 gcc->shader->in.discard_adj = true;
1290 break;
1291 case PIPE_PRIM_LINES_ADJACENCY:
1292 gcc->in_vue_count = 4;
1293 break;
1294 case PIPE_PRIM_TRIANGLES_ADJACENCY:
1295 gcc->in_vue_count = 6;
1296 break;
1297 default:
1298 tc_fail(&gcc->tc, "unsupported GS input type");
1299 gcc->in_vue_count = 0;
1300 break;
1301 }
1302
1303 switch (gcc->tgsi.props.gs_output_prim) {
1304 case PIPE_PRIM_POINTS:
1305 gcc->out_vue_min_count = 1;
1306 break;
1307 case PIPE_PRIM_LINE_STRIP:
1308 gcc->out_vue_min_count = 2;
1309 break;
1310 case PIPE_PRIM_TRIANGLE_STRIP:
1311 gcc->out_vue_min_count = 3;
1312 break;
1313 default:
1314 tc_fail(&gcc->tc, "unsupported GS output type");
1315 gcc->out_vue_min_count = 0;
1316 break;
1317 }
1318 }
1319 else {
1320 int i;
1321
1322 gcc->in_vue_count = num_verts;
1323 gcc->out_vue_min_count = num_verts;
1324
1325 gcc->tgsi.num_outputs = gcc->variant->u.gs.num_inputs;
1326 for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) {
1327 gcc->tgsi.outputs[i].semantic_name =
1328 gcc->variant->u.gs.semantic_names[i];
1329 gcc->tgsi.outputs[i].semantic_index =
1330 gcc->variant->u.gs.semantic_indices[i];
1331 }
1332 }
1333
1334 gcc->tc.templ.access_mode = GEN6_ALIGN_1;
1335
1336 gs_setup_shader_in(gcc->shader, gcc->variant);
1337 gs_setup_shader_out(gcc->shader, &gcc->tgsi, false, gcc->output_map);
1338
1339 gcc->in_vue_size = (gcc->shader->in.count + 1) / 2;
1340
1341 gcc->out_vue_size = (gcc->shader->out.count + 1) / 2;
1342
1343 gs_setup_payload(gcc);
1344 gs_setup_vars(gcc);
1345
1346 /* m0 is reserved for system routines */
1347 gcc->first_free_mrf = 1;
1348 gcc->last_free_mrf = 15;
1349
1350 gcc->shader->bt.gen6_so_base = 0;
1351 gcc->shader->bt.gen6_so_count = gcc->so_info->num_outputs;
1352
1353 gcc->shader->bt.total_count = gcc->shader->bt.gen6_so_count;
1354
1355 return true;
1356 }
1357
1358 /**
1359 * Compile the geometry shader.
1360 */
1361 struct ilo_shader *
1362 ilo_shader_compile_gs(const struct ilo_shader_state *state,
1363 const struct ilo_shader_variant *variant)
1364 {
1365 struct gs_compile_context gcc;
1366
1367 if (!gs_setup(&gcc, state, variant, 0))
1368 return NULL;
1369
1370 if (!gs_compile(&gcc)) {
1371 FREE(gcc.shader);
1372 gcc.shader = NULL;
1373 }
1374
1375 toy_tgsi_cleanup(&gcc.tgsi);
1376 toy_compiler_cleanup(&gcc.tc);
1377
1378 return gcc.shader;
1379 }
1380
1381 static bool
1382 append_gs_to_vs(struct ilo_shader *vs, struct ilo_shader *gs, int num_verts)
1383 {
1384 void *combined;
1385 int gs_offset;
1386
1387 if (!gs)
1388 return false;
1389
1390 /* kernels must be aligned to 64-byte */
1391 gs_offset = align(vs->kernel_size, 64);
1392 combined = REALLOC(vs->kernel, vs->kernel_size,
1393 gs_offset + gs->kernel_size);
1394 if (!combined)
1395 return false;
1396
1397 memcpy(combined + gs_offset, gs->kernel, gs->kernel_size);
1398
1399 vs->kernel = combined;
1400 vs->kernel_size = gs_offset + gs->kernel_size;
1401
1402 vs->stream_output = true;
1403 vs->gs_offsets[num_verts - 1] = gs_offset;
1404 vs->gs_start_grf = gs->in.start_grf;
1405 vs->gs_bt_so_count = gs->bt.gen6_so_count;
1406
1407 ilo_shader_destroy_kernel(gs);
1408
1409 return true;
1410 }
1411
1412 bool
1413 ilo_shader_compile_gs_passthrough(const struct ilo_shader_state *vs_state,
1414 const struct ilo_shader_variant *vs_variant,
1415 const int *so_mapping,
1416 struct ilo_shader *vs)
1417 {
1418 struct gs_compile_context gcc;
1419 struct ilo_shader_state state;
1420 struct ilo_shader_variant variant;
1421 const int num_verts = 3;
1422 int i;
1423
1424 /* init GS state and variant */
1425 state = *vs_state;
1426 state.info.tokens = NULL;
1427 for (i = 0; i < state.info.stream_output.num_outputs; i++) {
1428 const int reg = state.info.stream_output.output[i].register_index;
1429
1430 state.info.stream_output.output[i].register_index = so_mapping[reg];
1431 }
1432
1433 variant = *vs_variant;
1434 variant.u.gs.rasterizer_discard = vs_variant->u.vs.rasterizer_discard;
1435 variant.u.gs.num_inputs = vs->out.count;
1436 for (i = 0; i < vs->out.count; i++) {
1437 variant.u.gs.semantic_names[i] =
1438 vs->out.semantic_names[i];
1439 variant.u.gs.semantic_indices[i] =
1440 vs->out.semantic_indices[i];
1441 }
1442
1443 if (!gs_setup(&gcc, &state, &variant, num_verts))
1444 return false;
1445
1446 if (!gs_compile_passthrough(&gcc)) {
1447 FREE(gcc.shader);
1448 gcc.shader = NULL;
1449 }
1450
1451 /* no need to call toy_tgsi_cleanup() */
1452 toy_compiler_cleanup(&gcc.tc);
1453
1454 return append_gs_to_vs(vs, gcc.shader, num_verts);
1455 }