ilo: expose register indices of OUTs in ilo_shader
[mesa.git] / src / gallium / drivers / ilo / shader / ilo_shader_fs.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "tgsi/tgsi_dump.h"
29 #include "toy_compiler.h"
30 #include "toy_tgsi.h"
31 #include "toy_legalize.h"
32 #include "toy_optimize.h"
33 #include "toy_helpers.h"
34 #include "ilo_context.h"
35 #include "ilo_shader.h"
36
37 struct fs_compile_context {
38 struct ilo_shader *shader;
39 const struct ilo_shader_variant *variant;
40
41 struct toy_compiler tc;
42 struct toy_tgsi tgsi;
43
44 enum brw_message_target const_cache;
45 int dispatch_mode;
46
47 struct {
48 int barycentric_interps[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
49 int source_depth;
50 int source_w;
51 int pos_offset;
52 } payloads[2];
53
54 int first_const_grf;
55 int first_attr_grf;
56 int first_free_grf;
57 int last_free_grf;
58
59 int num_grf_per_vrf;
60
61 int first_free_mrf;
62 int last_free_mrf;
63 };
64
65 static void
66 fetch_position(struct fs_compile_context *fcc, struct toy_dst dst)
67 {
68 struct toy_compiler *tc = &fcc->tc;
69 const struct toy_src src_z =
70 tsrc(TOY_FILE_GRF, fcc->payloads[0].source_depth, 0);
71 const struct toy_src src_w =
72 tsrc(TOY_FILE_GRF, fcc->payloads[0].source_w, 0);
73 const int fb_height =
74 (fcc->variant->u.fs.fb_height) ? fcc->variant->u.fs.fb_height : 1;
75 const bool origin_upper_left =
76 (fcc->tgsi.props.fs_coord_origin == TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
77 const bool pixel_center_integer =
78 (fcc->tgsi.props.fs_coord_pixel_center ==
79 TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
80 struct toy_src subspan_x, subspan_y;
81 struct toy_dst tmp, tmp_uw;
82 struct toy_dst real_dst[4];
83
84 tdst_transpose(dst, real_dst);
85
86 subspan_x = tsrc_uw(tsrc(TOY_FILE_GRF, 1, 2 * 4));
87 subspan_x = tsrc_rect(subspan_x, TOY_RECT_240);
88
89 subspan_y = tsrc_offset(subspan_x, 0, 1);
90
91 tmp_uw = tdst_uw(tc_alloc_tmp(tc));
92 tmp = tc_alloc_tmp(tc);
93
94 /* X */
95 tc_ADD(tc, tmp_uw, subspan_x, tsrc_imm_v(0x10101010));
96 tc_MOV(tc, tmp, tsrc_from(tmp_uw));
97 if (pixel_center_integer)
98 tc_MOV(tc, real_dst[0], tsrc_from(tmp));
99 else
100 tc_ADD(tc, real_dst[0], tsrc_from(tmp), tsrc_imm_f(0.5f));
101
102 /* Y */
103 tc_ADD(tc, tmp_uw, subspan_y, tsrc_imm_v(0x11001100));
104 tc_MOV(tc, tmp, tsrc_from(tmp_uw));
105 if (origin_upper_left && pixel_center_integer) {
106 tc_MOV(tc, real_dst[1], tsrc_from(tmp));
107 }
108 else {
109 struct toy_src y = tsrc_from(tmp);
110 float offset = 0.0f;
111
112 if (!pixel_center_integer)
113 offset += 0.5f;
114
115 if (!origin_upper_left) {
116 offset += (float) (fb_height - 1);
117 y = tsrc_negate(y);
118 }
119
120 tc_ADD(tc, real_dst[1], y, tsrc_imm_f(offset));
121 }
122
123 /* Z and W */
124 tc_MOV(tc, real_dst[2], src_z);
125 tc_INV(tc, real_dst[3], src_w);
126 }
127
128 static void
129 fetch_face(struct fs_compile_context *fcc, struct toy_dst dst)
130 {
131 struct toy_compiler *tc = &fcc->tc;
132 const struct toy_src r0 = tsrc_d(tsrc(TOY_FILE_GRF, 0, 0));
133 struct toy_dst tmp_f, tmp;
134 struct toy_dst real_dst[4];
135
136 tdst_transpose(dst, real_dst);
137
138 tmp_f = tc_alloc_tmp(tc);
139 tmp = tdst_d(tmp_f);
140 tc_SHR(tc, tmp, tsrc_rect(r0, TOY_RECT_010), tsrc_imm_d(15));
141 tc_AND(tc, tmp, tsrc_from(tmp), tsrc_imm_d(1));
142 tc_MOV(tc, tmp_f, tsrc_from(tmp));
143
144 /* convert to 1.0 and -1.0 */
145 tc_MUL(tc, tmp_f, tsrc_from(tmp_f), tsrc_imm_f(-2.0f));
146 tc_ADD(tc, real_dst[0], tsrc_from(tmp_f), tsrc_imm_f(1.0f));
147
148 tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
149 tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
150 tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
151 }
152
153 static void
154 fetch_attr(struct fs_compile_context *fcc, struct toy_dst dst, int slot)
155 {
156 struct toy_compiler *tc = &fcc->tc;
157 struct toy_dst real_dst[4];
158 bool is_const = false;
159 int grf, mode, ch;
160
161 tdst_transpose(dst, real_dst);
162
163 grf = fcc->first_attr_grf + slot * 2;
164
165 switch (fcc->tgsi.inputs[slot].interp) {
166 case TGSI_INTERPOLATE_CONSTANT:
167 is_const = true;
168 break;
169 case TGSI_INTERPOLATE_LINEAR:
170 if (fcc->tgsi.inputs[slot].centroid)
171 mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
172 else
173 mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
174 break;
175 case TGSI_INTERPOLATE_COLOR:
176 if (fcc->variant->u.fs.flatshade) {
177 is_const = true;
178 break;
179 }
180 /* fall through */
181 case TGSI_INTERPOLATE_PERSPECTIVE:
182 if (fcc->tgsi.inputs[slot].centroid)
183 mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
184 else
185 mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
186 break;
187 default:
188 assert(!"unexpected FS interpolation");
189 mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
190 break;
191 }
192
193 if (is_const) {
194 struct toy_src a0[4];
195
196 a0[0] = tsrc(TOY_FILE_GRF, grf + 0, 3 * 4);
197 a0[1] = tsrc(TOY_FILE_GRF, grf + 0, 7 * 4);
198 a0[2] = tsrc(TOY_FILE_GRF, grf + 1, 3 * 4);
199 a0[3] = tsrc(TOY_FILE_GRF, grf + 1, 7 * 4);
200
201 for (ch = 0; ch < 4; ch++)
202 tc_MOV(tc, real_dst[ch], tsrc_rect(a0[ch], TOY_RECT_010));
203 }
204 else {
205 struct toy_src attr[4], uv;
206
207 attr[0] = tsrc(TOY_FILE_GRF, grf + 0, 0);
208 attr[1] = tsrc(TOY_FILE_GRF, grf + 0, 4 * 4);
209 attr[2] = tsrc(TOY_FILE_GRF, grf + 1, 0);
210 attr[3] = tsrc(TOY_FILE_GRF, grf + 1, 4 * 4);
211
212 uv = tsrc(TOY_FILE_GRF, fcc->payloads[0].barycentric_interps[mode], 0);
213
214 for (ch = 0; ch < 4; ch++) {
215 tc_add2(tc, BRW_OPCODE_PLN, real_dst[ch],
216 tsrc_rect(attr[ch], TOY_RECT_010), uv);
217 }
218 }
219
220 if (fcc->tgsi.inputs[slot].semantic_name == TGSI_SEMANTIC_FOG) {
221 tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
222 tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
223 tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
224 }
225 }
226
227 static void
228 fs_lower_opcode_tgsi_in(struct fs_compile_context *fcc,
229 struct toy_dst dst, int dim, int idx)
230 {
231 int slot;
232
233 assert(!dim);
234
235 slot = toy_tgsi_find_input(&fcc->tgsi, idx);
236 if (slot < 0)
237 return;
238
239 switch (fcc->tgsi.inputs[slot].semantic_name) {
240 case TGSI_SEMANTIC_POSITION:
241 fetch_position(fcc, dst);
242 break;
243 case TGSI_SEMANTIC_FACE:
244 fetch_face(fcc, dst);
245 break;
246 default:
247 fetch_attr(fcc, dst, slot);
248 break;
249 }
250 }
251
252 static void
253 fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
254 struct toy_dst dst, int dim, struct toy_src idx)
255 {
256 const struct toy_dst header =
257 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
258 const struct toy_dst global_offset =
259 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 2 * 4));
260 const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
261 struct toy_compiler *tc = &fcc->tc;
262 unsigned msg_type, msg_ctrl, msg_len;
263 struct toy_inst *inst;
264 struct toy_src desc;
265 struct toy_dst tmp, real_dst[4];
266 int i;
267
268 /* set message header */
269 inst = tc_MOV(tc, header, r0);
270 inst->mask_ctrl = BRW_MASK_DISABLE;
271
272 /* set global offset */
273 inst = tc_MOV(tc, global_offset, idx);
274 inst->mask_ctrl = BRW_MASK_DISABLE;
275 inst->exec_size = BRW_EXECUTE_1;
276 inst->src[0].rect = TOY_RECT_010;
277
278 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ;
279 msg_ctrl = BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW << 8;
280 msg_len = 1;
281
282 desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
283 msg_type, msg_ctrl, ILO_WM_CONST_SURFACE(dim));
284
285 tmp = tc_alloc_tmp(tc);
286
287 tc_SEND(tc, tmp, tsrc_from(header), desc, fcc->const_cache);
288
289 tdst_transpose(dst, real_dst);
290 for (i = 0; i < 4; i++) {
291 const struct toy_src src =
292 tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
293
294 /* cast to type D to make sure these are raw moves */
295 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
296 }
297 }
298
299 static void
300 fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
301 struct toy_dst dst, int dim, struct toy_src idx)
302 {
303 struct toy_compiler *tc = &fcc->tc;
304 const struct toy_dst offset =
305 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
306 struct toy_src desc;
307 struct toy_inst *inst;
308 struct toy_dst tmp, real_dst[4];
309 int i;
310
311 /*
312 * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
313 * changed from OWord Block Read to ld to increase performance in the
314 * classic driver. Since we use the constant cache instead of the data
315 * cache, I wonder if we still want to follow the classic driver.
316 */
317
318 /* set offset */
319 inst = tc_MOV(tc, offset, tsrc_rect(idx, TOY_RECT_010));
320 inst->exec_size = BRW_EXECUTE_8;
321 inst->mask_ctrl = BRW_MASK_DISABLE;
322
323 desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
324 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
325 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
326 0,
327 ILO_WM_CONST_SURFACE(dim));
328
329 tmp = tc_alloc_tmp(tc);
330 inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, BRW_SFID_SAMPLER);
331 inst->exec_size = BRW_EXECUTE_8;
332 inst->mask_ctrl = BRW_MASK_DISABLE;
333
334 tdst_transpose(dst, real_dst);
335 for (i = 0; i < 4; i++) {
336 const struct toy_src src =
337 tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
338
339 /* cast to type D to make sure these are raw moves */
340 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
341 }
342 }
343
344 static void
345 fs_lower_opcode_tgsi_imm(struct fs_compile_context *fcc,
346 struct toy_dst dst, int idx)
347 {
348 const uint32_t *imm;
349 struct toy_dst real_dst[4];
350 int ch;
351
352 imm = toy_tgsi_get_imm(&fcc->tgsi, idx, NULL);
353
354 tdst_transpose(dst, real_dst);
355 /* raw moves */
356 for (ch = 0; ch < 4; ch++)
357 tc_MOV(&fcc->tc, tdst_ud(real_dst[ch]), tsrc_imm_ud(imm[ch]));
358 }
359
360 static void
361 fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc,
362 struct toy_dst dst, int dim, int idx)
363 {
364 struct toy_compiler *tc = &fcc->tc;
365 const struct toy_tgsi *tgsi = &fcc->tgsi;
366 int slot;
367
368 assert(!dim);
369
370 slot = toy_tgsi_find_system_value(tgsi, idx);
371 if (slot < 0)
372 return;
373
374 switch (tgsi->system_values[slot].semantic_name) {
375 case TGSI_SEMANTIC_PRIMID:
376 case TGSI_SEMANTIC_INSTANCEID:
377 case TGSI_SEMANTIC_VERTEXID:
378 default:
379 tc_fail(tc, "unhandled system value");
380 tc_MOV(tc, dst, tsrc_imm_d(0));
381 break;
382 }
383 }
384
385 static void
386 fs_lower_opcode_tgsi_direct(struct fs_compile_context *fcc,
387 struct toy_inst *inst)
388 {
389 struct toy_compiler *tc = &fcc->tc;
390 int dim, idx;
391
392 assert(inst->src[0].file == TOY_FILE_IMM);
393 dim = inst->src[0].val32;
394
395 assert(inst->src[1].file == TOY_FILE_IMM);
396 idx = inst->src[1].val32;
397
398 switch (inst->opcode) {
399 case TOY_OPCODE_TGSI_IN:
400 fs_lower_opcode_tgsi_in(fcc, inst->dst, dim, idx);
401 break;
402 case TOY_OPCODE_TGSI_CONST:
403 if (tc->dev->gen >= ILO_GEN(7))
404 fs_lower_opcode_tgsi_const_gen7(fcc, inst->dst, dim, inst->src[1]);
405 else
406 fs_lower_opcode_tgsi_const_gen6(fcc, inst->dst, dim, inst->src[1]);
407 break;
408 case TOY_OPCODE_TGSI_SV:
409 fs_lower_opcode_tgsi_sv(fcc, inst->dst, dim, idx);
410 break;
411 case TOY_OPCODE_TGSI_IMM:
412 assert(!dim);
413 fs_lower_opcode_tgsi_imm(fcc, inst->dst, idx);
414 break;
415 default:
416 tc_fail(tc, "unhandled TGSI fetch");
417 break;
418 }
419
420 tc_discard_inst(tc, inst);
421 }
422
423 static void
424 fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc,
425 struct toy_inst *inst)
426 {
427 tc_fail(&fcc->tc, "no TGSI indirection support");
428 }
429
430 /**
431 * Emit instructions to move sampling parameters to the message registers.
432 */
433 static int
434 fs_add_sampler_params_gen6(struct toy_compiler *tc, int msg_type,
435 int base_mrf, int param_size,
436 struct toy_src *coords, int num_coords,
437 struct toy_src bias_or_lod, struct toy_src ref_or_si,
438 struct toy_src *ddx, struct toy_src *ddy,
439 int num_derivs)
440 {
441 int num_params, i;
442
443 assert(num_coords <= 4);
444 assert(num_derivs <= 3 && num_derivs <= num_coords);
445
446 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
447 switch (msg_type) {
448 case GEN5_SAMPLER_MESSAGE_SAMPLE:
449 for (i = 0; i < num_coords; i++)
450 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
451 num_params = num_coords;
452 break;
453 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS:
454 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
455 for (i = 0; i < num_coords; i++)
456 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
457 tc_MOV(tc, SAMPLER_PARAM(4), bias_or_lod);
458 num_params = 5;
459 break;
460 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE:
461 for (i = 0; i < num_coords; i++)
462 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
463 tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
464 num_params = 5;
465 break;
466 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
467 for (i = 0; i < num_coords; i++)
468 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
469 for (i = 0; i < num_derivs; i++) {
470 tc_MOV(tc, SAMPLER_PARAM(4 + i * 2), ddx[i]);
471 tc_MOV(tc, SAMPLER_PARAM(5 + i * 2), ddy[i]);
472 }
473 num_params = 4 + num_derivs * 2;
474 break;
475 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE:
476 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
477 for (i = 0; i < num_coords; i++)
478 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
479 tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
480 tc_MOV(tc, SAMPLER_PARAM(5), bias_or_lod);
481 num_params = 6;
482 break;
483 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
484 assert(num_coords <= 3);
485
486 for (i = 0; i < num_coords; i++)
487 tc_MOV(tc, tdst_d(SAMPLER_PARAM(i)), coords[i]);
488 tc_MOV(tc, tdst_d(SAMPLER_PARAM(3)), bias_or_lod);
489 tc_MOV(tc, tdst_d(SAMPLER_PARAM(4)), ref_or_si);
490 num_params = 5;
491 break;
492 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
493 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
494 num_params = 1;
495 break;
496 default:
497 tc_fail(tc, "unknown sampler opcode");
498 num_params = 0;
499 break;
500 }
501 #undef SAMPLER_PARAM
502
503 return num_params * param_size;
504 }
505
506 static int
507 fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type,
508 int base_mrf, int param_size,
509 struct toy_src *coords, int num_coords,
510 struct toy_src bias_or_lod, struct toy_src ref_or_si,
511 struct toy_src *ddx, struct toy_src *ddy,
512 int num_derivs)
513 {
514 int num_params, i;
515
516 assert(num_coords <= 4);
517 assert(num_derivs <= 3 && num_derivs <= num_coords);
518
519 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
520 switch (msg_type) {
521 case GEN5_SAMPLER_MESSAGE_SAMPLE:
522 for (i = 0; i < num_coords; i++)
523 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
524 num_params = num_coords;
525 break;
526 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS:
527 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
528 tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod);
529 for (i = 0; i < num_coords; i++)
530 tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
531 num_params = 1 + num_coords;
532 break;
533 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE:
534 tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
535 for (i = 0; i < num_coords; i++)
536 tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
537 num_params = 1 + num_coords;
538 break;
539 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
540 for (i = 0; i < num_coords; i++) {
541 tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]);
542 if (i < num_derivs) {
543 tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]);
544 tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]);
545 }
546 }
547 num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0);
548 break;
549 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE:
550 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
551 tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
552 tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod);
553 for (i = 0; i < num_coords; i++)
554 tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]);
555 num_params = 2 + num_coords;
556 break;
557 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
558 assert(num_coords >= 1 && num_coords <= 3);
559
560 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]);
561 tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod);
562 for (i = 1; i < num_coords; i++)
563 tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]);
564 num_params = 1 + num_coords;
565 break;
566 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
567 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
568 num_params = 1;
569 break;
570 default:
571 tc_fail(tc, "unknown sampler opcode");
572 num_params = 0;
573 break;
574 }
575 #undef SAMPLER_PARAM
576
577 return num_params * param_size;
578 }
579
580 /**
581 * Set up message registers and return the message descriptor for sampling.
582 */
583 static struct toy_src
584 fs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst,
585 int base_mrf, const uint32_t *saturate_coords,
586 unsigned *ret_sampler_index)
587 {
588 unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
589 struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si;
590 int num_coords, ref_pos, num_derivs;
591 int sampler_src, param_size, i;
592
593 switch (inst->exec_size) {
594 case BRW_EXECUTE_8:
595 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
596 param_size = 1;
597 break;
598 case BRW_EXECUTE_16:
599 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
600 param_size = 2;
601 break;
602 default:
603 tc_fail(tc, "unsupported execute size for sampling");
604 return tsrc_null();
605 break;
606 }
607
608 num_coords = toy_tgsi_get_texture_coord_dim(inst->tex.target, &ref_pos);
609 tsrc_transpose(inst->src[0], coords);
610 bias_or_lod = tsrc_null();
611 ref_or_si = tsrc_null();
612 num_derivs = 0;
613 sampler_src = 1;
614
615 /*
616 * For TXD,
617 *
618 * src0 := (x, y, z, w)
619 * src1 := ddx
620 * src2 := ddy
621 * src3 := sampler
622 *
623 * For TEX2, TXB2, and TXL2,
624 *
625 * src0 := (x, y, z, w)
626 * src1 := (v or bias or lod, ...)
627 * src2 := sampler
628 *
629 * For TEX, TXB, TXL, and TXP,
630 *
631 * src0 := (x, y, z, w or bias or lod or projection)
632 * src1 := sampler
633 *
634 * For TXQ,
635 *
636 * src0 := (lod, ...)
637 * src1 := sampler
638 *
639 * For TXQ_LZ,
640 *
641 * src0 := sampler
642 *
643 * And for TXF,
644 *
645 * src0 := (x, y, z, w or lod)
646 * src1 := sampler
647 *
648 * State trackers should not generate opcode+texture combinations with
649 * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
650 */
651 switch (inst->opcode) {
652 case TOY_OPCODE_TGSI_TEX:
653 if (ref_pos >= 0) {
654 assert(ref_pos < 4);
655
656 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
657 ref_or_si = coords[ref_pos];
658 }
659 else {
660 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
661 }
662 break;
663 case TOY_OPCODE_TGSI_TXD:
664 if (ref_pos >= 0)
665 tc_fail(tc, "TXD with shadow sampler not supported");
666
667 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
668 tsrc_transpose(inst->src[1], ddx);
669 tsrc_transpose(inst->src[2], ddy);
670 num_derivs = num_coords;
671 sampler_src = 3;
672 break;
673 case TOY_OPCODE_TGSI_TXP:
674 if (ref_pos >= 0) {
675 assert(ref_pos < 3);
676
677 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
678 ref_or_si = coords[ref_pos];
679 }
680 else {
681 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
682 }
683
684 /* project the coordinates */
685 {
686 struct toy_dst tmp[4];
687
688 tc_alloc_tmp4(tc, tmp);
689
690 tc_INV(tc, tmp[3], coords[3]);
691 for (i = 0; i < num_coords && i < 3; i++) {
692 tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
693 coords[i] = tsrc_from(tmp[i]);
694 }
695
696 if (ref_pos >= i) {
697 tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3]));
698 ref_or_si = tsrc_from(tmp[ref_pos]);
699 }
700 }
701 break;
702 case TOY_OPCODE_TGSI_TXB:
703 if (ref_pos >= 0) {
704 assert(ref_pos < 3);
705
706 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
707 ref_or_si = coords[ref_pos];
708 }
709 else {
710 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
711 }
712
713 bias_or_lod = coords[3];
714 break;
715 case TOY_OPCODE_TGSI_TXL:
716 if (ref_pos >= 0) {
717 assert(ref_pos < 3);
718
719 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
720 ref_or_si = coords[ref_pos];
721 }
722 else {
723 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
724 }
725
726 bias_or_lod = coords[3];
727 break;
728 case TOY_OPCODE_TGSI_TXF:
729 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
730
731 switch (inst->tex.target) {
732 case TGSI_TEXTURE_2D_MSAA:
733 case TGSI_TEXTURE_2D_ARRAY_MSAA:
734 assert(ref_pos >= 0 && ref_pos < 4);
735 /* lod is always 0 */
736 bias_or_lod = tsrc_imm_d(0);
737 ref_or_si = coords[ref_pos];
738 break;
739 default:
740 bias_or_lod = coords[3];
741 break;
742 }
743
744 /* offset the coordinates */
745 if (!tsrc_is_null(inst->tex.offsets[0])) {
746 struct toy_dst tmp[4];
747 struct toy_src offsets[4];
748
749 tc_alloc_tmp4(tc, tmp);
750 tsrc_transpose(inst->tex.offsets[0], offsets);
751
752 for (i = 0; i < num_coords; i++) {
753 tc_ADD(tc, tmp[i], coords[i], offsets[i]);
754 coords[i] = tsrc_from(tmp[i]);
755 }
756 }
757
758 sampler_src = 1;
759 break;
760 case TOY_OPCODE_TGSI_TXQ:
761 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
762 num_coords = 0;
763 bias_or_lod = coords[0];
764 break;
765 case TOY_OPCODE_TGSI_TXQ_LZ:
766 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
767 num_coords = 0;
768 sampler_src = 0;
769 break;
770 case TOY_OPCODE_TGSI_TEX2:
771 if (ref_pos >= 0) {
772 assert(ref_pos < 5);
773
774 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
775
776 if (ref_pos >= 4) {
777 struct toy_src src1[4];
778 tsrc_transpose(inst->src[1], src1);
779 ref_or_si = src1[ref_pos - 4];
780 }
781 else {
782 ref_or_si = coords[ref_pos];
783 }
784 }
785 else {
786 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
787 }
788
789 sampler_src = 2;
790 break;
791 case TOY_OPCODE_TGSI_TXB2:
792 if (ref_pos >= 0) {
793 assert(ref_pos < 4);
794
795 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
796 ref_or_si = coords[ref_pos];
797 }
798 else {
799 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
800 }
801
802 {
803 struct toy_src src1[4];
804 tsrc_transpose(inst->src[1], src1);
805 bias_or_lod = src1[0];
806 }
807
808 sampler_src = 2;
809 break;
810 case TOY_OPCODE_TGSI_TXL2:
811 if (ref_pos >= 0) {
812 assert(ref_pos < 4);
813
814 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
815 ref_or_si = coords[ref_pos];
816 }
817 else {
818 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
819 }
820
821 {
822 struct toy_src src1[4];
823 tsrc_transpose(inst->src[1], src1);
824 bias_or_lod = src1[0];
825 }
826
827 sampler_src = 2;
828 break;
829 default:
830 assert(!"unhandled sampling opcode");
831 return tsrc_null();
832 break;
833 }
834
835 assert(inst->src[sampler_src].file == TOY_FILE_IMM);
836 sampler_index = inst->src[sampler_src].val32;
837 binding_table_index = ILO_WM_TEXTURE_SURFACE(sampler_index);
838
839 /*
840 * From the Sandy Bridge PRM, volume 4 part 1, page 18:
841 *
842 * "Note that the (cube map) coordinates delivered to the sampling
843 * engine must already have been divided by the component with the
844 * largest absolute value."
845 */
846 switch (inst->tex.target) {
847 case TGSI_TEXTURE_CUBE:
848 case TGSI_TEXTURE_SHADOWCUBE:
849 case TGSI_TEXTURE_CUBE_ARRAY:
850 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
851 /* TXQ does not need coordinates */
852 if (num_coords >= 3) {
853 struct toy_dst tmp[4];
854
855 tc_alloc_tmp4(tc, tmp);
856
857 tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]),
858 tsrc_absolute(coords[1]), BRW_CONDITIONAL_GE);
859 tc_SEL(tc, tmp[3], tsrc_from(tmp[3]),
860 tsrc_absolute(coords[2]), BRW_CONDITIONAL_GE);
861 tc_INV(tc, tmp[3], tsrc_from(tmp[3]));
862
863 for (i = 0; i < 3; i++) {
864 tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
865 coords[i] = tsrc_from(tmp[i]);
866 }
867 }
868 break;
869 }
870
871 /*
872 * Saturate (s, t, r). saturate_coords is set for sampler and coordinate
873 * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively. It is
874 * so that sampling outside the border gets the correct colors.
875 */
876 for (i = 0; i < MIN2(num_coords, 3); i++) {
877 bool is_rect;
878
879 if (!(saturate_coords[i] & (1 << sampler_index)))
880 continue;
881
882 switch (inst->tex.target) {
883 case TGSI_TEXTURE_RECT:
884 case TGSI_TEXTURE_SHADOWRECT:
885 is_rect = true;
886 break;
887 default:
888 is_rect = false;
889 break;
890 }
891
892 if (is_rect) {
893 struct toy_src min, max;
894 struct toy_dst tmp;
895
896 tc_fail(tc, "GL_CLAMP with rectangle texture unsupported");
897 tmp = tc_alloc_tmp(tc);
898
899 /* saturate to [0, width] or [0, height] */
900 /* TODO TXQ? */
901 min = tsrc_imm_f(0.0f);
902 max = tsrc_imm_f(2048.0f);
903
904 tc_SEL(tc, tmp, coords[i], min, BRW_CONDITIONAL_G);
905 tc_SEL(tc, tmp, tsrc_from(tmp), max, BRW_CONDITIONAL_L);
906
907 coords[i] = tsrc_from(tmp);
908 }
909 else {
910 struct toy_dst tmp;
911 struct toy_inst *inst2;
912
913 tmp = tc_alloc_tmp(tc);
914
915 /* saturate to [0.0f, 1.0f] */
916 inst2 = tc_MOV(tc, tmp, coords[i]);
917 inst2->saturate = true;
918
919 coords[i] = tsrc_from(tmp);
920 }
921 }
922
923 /* set up sampler parameters */
924 if (tc->dev->gen >= ILO_GEN(7)) {
925 msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size,
926 coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
927 }
928 else {
929 msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size,
930 coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
931 }
932
933 /*
934 * From the Sandy Bridge PRM, volume 4 part 1, page 136:
935 *
936 * "The maximum message length allowed to the sampler is 11. This would
937 * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
938 * SIMD16."
939 */
940 if (msg_len > 11)
941 tc_fail(tc, "maximum length for messages to the sampler is 11");
942
943 if (ret_sampler_index)
944 *ret_sampler_index = sampler_index;
945
946 return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size,
947 false, simd_mode, msg_type, sampler_index, binding_table_index);
948 }
949
950 static void
951 fs_lower_opcode_tgsi_sampling(struct fs_compile_context *fcc,
952 struct toy_inst *inst)
953 {
954 struct toy_compiler *tc = &fcc->tc;
955 struct toy_dst dst[4], tmp[4];
956 struct toy_src desc;
957 unsigned sampler_index;
958 int swizzles[4], i;
959 bool need_filter;
960
961 desc = fs_prepare_tgsi_sampling(tc, inst,
962 fcc->first_free_mrf,
963 fcc->variant->saturate_tex_coords,
964 &sampler_index);
965
966 switch (inst->opcode) {
967 case TOY_OPCODE_TGSI_TXF:
968 case TOY_OPCODE_TGSI_TXQ:
969 case TOY_OPCODE_TGSI_TXQ_LZ:
970 need_filter = false;
971 break;
972 default:
973 need_filter = true;
974 break;
975 }
976
977 toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_SAMPLER);
978 inst->src[0] = tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0);
979 inst->src[1] = desc;
980 for (i = 2; i < Elements(inst->src); i++)
981 inst->src[i] = tsrc_null();
982
983 /* write to temps first */
984 tc_alloc_tmp4(tc, tmp);
985 tdst_transpose(inst->dst, dst);
986 inst->dst = tmp[0];
987
988 tc_move_inst(tc, inst);
989
990 if (need_filter) {
991 assert(sampler_index < fcc->variant->num_sampler_views);
992 swizzles[0] = fcc->variant->sampler_view_swizzles[sampler_index].r;
993 swizzles[1] = fcc->variant->sampler_view_swizzles[sampler_index].g;
994 swizzles[2] = fcc->variant->sampler_view_swizzles[sampler_index].b;
995 swizzles[3] = fcc->variant->sampler_view_swizzles[sampler_index].a;
996 }
997 else {
998 swizzles[0] = PIPE_SWIZZLE_RED;
999 swizzles[1] = PIPE_SWIZZLE_GREEN;
1000 swizzles[2] = PIPE_SWIZZLE_BLUE;
1001 swizzles[3] = PIPE_SWIZZLE_ALPHA;
1002 }
1003
1004 /* swizzle the results */
1005 for (i = 0; i < 4; i++) {
1006 switch (swizzles[i]) {
1007 case PIPE_SWIZZLE_ZERO:
1008 tc_MOV(tc, dst[i], tsrc_imm_f(0.0f));
1009 break;
1010 case PIPE_SWIZZLE_ONE:
1011 tc_MOV(tc, dst[i], tsrc_imm_f(1.0f));
1012 break;
1013 default:
1014 tc_MOV(tc, dst[i], tsrc_from(tmp[swizzles[i]]));
1015 break;
1016 }
1017 }
1018 }
1019
1020 static void
1021 fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst)
1022 {
1023 struct toy_dst dst[4];
1024 struct toy_src src[4];
1025 int i;
1026
1027 tdst_transpose(inst->dst, dst);
1028 tsrc_transpose(inst->src[0], src);
1029
1030 /*
1031 * Every four fragments are from a 2x2 subspan, with
1032 *
1033 * fragment 1 on the top-left,
1034 * fragment 2 on the top-right,
1035 * fragment 3 on the bottom-left,
1036 * fragment 4 on the bottom-right.
1037 *
1038 * DDX should thus produce
1039 *
1040 * dst = src.yyww - src.xxzz
1041 *
1042 * and DDY should produce
1043 *
1044 * dst = src.zzww - src.xxyy
1045 *
1046 * But since we are in BRW_ALIGN_1, swizzling does not work and we have to
1047 * play with the region parameters.
1048 */
1049 if (inst->opcode == TOY_OPCODE_DDX) {
1050 for (i = 0; i < 4; i++) {
1051 struct toy_src left, right;
1052
1053 left = tsrc_rect(src[i], TOY_RECT_220);
1054 right = tsrc_offset(left, 0, 1);
1055
1056 tc_ADD(tc, dst[i], right, tsrc_negate(left));
1057 }
1058 }
1059 else {
1060 for (i = 0; i < 4; i++) {
1061 struct toy_src top, bottom;
1062
1063 /* approximate with dst = src.zzzz - src.xxxx */
1064 top = tsrc_rect(src[i], TOY_RECT_440);
1065 bottom = tsrc_offset(top, 0, 2);
1066
1067 tc_ADD(tc, dst[i], bottom, tsrc_negate(top));
1068 }
1069 }
1070
1071 tc_discard_inst(tc, inst);
1072 }
1073
1074 static void
1075 fs_lower_opcode_fb_write(struct toy_compiler *tc, struct toy_inst *inst)
1076 {
1077 /* fs_write_fb() has set up the message registers */
1078 toy_compiler_lower_to_send(tc, inst, true,
1079 GEN6_SFID_DATAPORT_RENDER_CACHE);
1080 }
1081
1082 static void
1083 fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst)
1084 {
1085 struct toy_dst pixel_mask_dst;
1086 struct toy_src f0, pixel_mask;
1087 struct toy_inst *tmp;
1088
1089 /* lower half of r1.7:ud */
1090 pixel_mask_dst = tdst_uw(tdst(TOY_FILE_GRF, 1, 7 * 4));
1091 pixel_mask = tsrc_rect(tsrc_from(pixel_mask_dst), TOY_RECT_010);
1092
1093 f0 = tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF, BRW_ARF_FLAG, 0)), TOY_RECT_010);
1094
1095 /* KILP or KIL */
1096 if (tsrc_is_null(inst->src[0])) {
1097 struct toy_src dummy = tsrc_uw(tsrc(TOY_FILE_GRF, 0, 0));
1098 struct toy_dst f0_dst = tdst_uw(tdst(TOY_FILE_ARF, BRW_ARF_FLAG, 0));
1099
1100 /* create a mask that masks out all pixels */
1101 tmp = tc_MOV(tc, f0_dst, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010));
1102 tmp->exec_size = BRW_EXECUTE_1;
1103 tmp->mask_ctrl = BRW_MASK_DISABLE;
1104
1105 tc_CMP(tc, tdst_null(), dummy, dummy, BRW_CONDITIONAL_NEQ);
1106
1107 /* swapping the two src operands breaks glBitmap()!? */
1108 tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1109 tmp->exec_size = BRW_EXECUTE_1;
1110 tmp->mask_ctrl = BRW_MASK_DISABLE;
1111 }
1112 else {
1113 struct toy_src src[4];
1114 int i;
1115
1116 tsrc_transpose(inst->src[0], src);
1117 /* mask out killed pixels */
1118 for (i = 0; i < 4; i++) {
1119 tc_CMP(tc, tdst_null(), src[i], tsrc_imm_f(0.0f),
1120 BRW_CONDITIONAL_GE);
1121
1122 /* swapping the two src operands breaks glBitmap()!? */
1123 tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1124 tmp->exec_size = BRW_EXECUTE_1;
1125 tmp->mask_ctrl = BRW_MASK_DISABLE;
1126 }
1127 }
1128
1129 tc_discard_inst(tc, inst);
1130 }
1131
1132 static void
1133 fs_lower_virtual_opcodes(struct fs_compile_context *fcc)
1134 {
1135 struct toy_compiler *tc = &fcc->tc;
1136 struct toy_inst *inst;
1137
1138 /* lower TGSI's first, as they might be lowered to other virtual opcodes */
1139 tc_head(tc);
1140 while ((inst = tc_next(tc)) != NULL) {
1141 switch (inst->opcode) {
1142 case TOY_OPCODE_TGSI_IN:
1143 case TOY_OPCODE_TGSI_CONST:
1144 case TOY_OPCODE_TGSI_SV:
1145 case TOY_OPCODE_TGSI_IMM:
1146 fs_lower_opcode_tgsi_direct(fcc, inst);
1147 break;
1148 case TOY_OPCODE_TGSI_INDIRECT_FETCH:
1149 case TOY_OPCODE_TGSI_INDIRECT_STORE:
1150 fs_lower_opcode_tgsi_indirect(fcc, inst);
1151 break;
1152 case TOY_OPCODE_TGSI_TEX:
1153 case TOY_OPCODE_TGSI_TXB:
1154 case TOY_OPCODE_TGSI_TXD:
1155 case TOY_OPCODE_TGSI_TXL:
1156 case TOY_OPCODE_TGSI_TXP:
1157 case TOY_OPCODE_TGSI_TXF:
1158 case TOY_OPCODE_TGSI_TXQ:
1159 case TOY_OPCODE_TGSI_TXQ_LZ:
1160 case TOY_OPCODE_TGSI_TEX2:
1161 case TOY_OPCODE_TGSI_TXB2:
1162 case TOY_OPCODE_TGSI_TXL2:
1163 case TOY_OPCODE_TGSI_SAMPLE:
1164 case TOY_OPCODE_TGSI_SAMPLE_I:
1165 case TOY_OPCODE_TGSI_SAMPLE_I_MS:
1166 case TOY_OPCODE_TGSI_SAMPLE_B:
1167 case TOY_OPCODE_TGSI_SAMPLE_C:
1168 case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
1169 case TOY_OPCODE_TGSI_SAMPLE_D:
1170 case TOY_OPCODE_TGSI_SAMPLE_L:
1171 case TOY_OPCODE_TGSI_GATHER4:
1172 case TOY_OPCODE_TGSI_SVIEWINFO:
1173 case TOY_OPCODE_TGSI_SAMPLE_POS:
1174 case TOY_OPCODE_TGSI_SAMPLE_INFO:
1175 fs_lower_opcode_tgsi_sampling(fcc, inst);
1176 break;
1177 }
1178 }
1179
1180 tc_head(tc);
1181 while ((inst = tc_next(tc)) != NULL) {
1182 switch (inst->opcode) {
1183 case TOY_OPCODE_INV:
1184 case TOY_OPCODE_LOG:
1185 case TOY_OPCODE_EXP:
1186 case TOY_OPCODE_SQRT:
1187 case TOY_OPCODE_RSQ:
1188 case TOY_OPCODE_SIN:
1189 case TOY_OPCODE_COS:
1190 case TOY_OPCODE_FDIV:
1191 case TOY_OPCODE_POW:
1192 case TOY_OPCODE_INT_DIV_QUOTIENT:
1193 case TOY_OPCODE_INT_DIV_REMAINDER:
1194 toy_compiler_lower_math(tc, inst);
1195 break;
1196 case TOY_OPCODE_DDX:
1197 case TOY_OPCODE_DDY:
1198 fs_lower_opcode_derivative(tc, inst);
1199 break;
1200 case TOY_OPCODE_FB_WRITE:
1201 fs_lower_opcode_fb_write(tc, inst);
1202 break;
1203 case TOY_OPCODE_KIL:
1204 fs_lower_opcode_kil(tc, inst);
1205 break;
1206 default:
1207 if (inst->opcode > 127)
1208 tc_fail(tc, "unhandled virtual opcode");
1209 break;
1210 }
1211 }
1212 }
1213
1214 /**
1215 * Compile the shader.
1216 */
1217 static bool
1218 fs_compile(struct fs_compile_context *fcc)
1219 {
1220 struct toy_compiler *tc = &fcc->tc;
1221 struct ilo_shader *sh = fcc->shader;
1222
1223 fs_lower_virtual_opcodes(fcc);
1224 toy_compiler_legalize_for_ra(tc);
1225 toy_compiler_optimize(tc);
1226 toy_compiler_allocate_registers(tc,
1227 fcc->first_free_grf,
1228 fcc->last_free_grf,
1229 fcc->num_grf_per_vrf);
1230 toy_compiler_legalize_for_asm(tc);
1231
1232 if (tc->fail) {
1233 ilo_err("failed to legalize FS instructions: %s\n", tc->reason);
1234 return false;
1235 }
1236
1237 if (ilo_debug & ILO_DEBUG_FS) {
1238 ilo_printf("legalized instructions:\n");
1239 toy_compiler_dump(tc);
1240 ilo_printf("\n");
1241 }
1242
1243 if (true) {
1244 sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
1245 }
1246 else {
1247 static const uint32_t microcode[] = {
1248 /* fill in the microcode here */
1249 0x0, 0x0, 0x0, 0x0,
1250 };
1251 const bool swap = true;
1252
1253 sh->kernel_size = sizeof(microcode);
1254 sh->kernel = MALLOC(sh->kernel_size);
1255
1256 if (sh->kernel) {
1257 const int num_dwords = sizeof(microcode) / 4;
1258 const uint32_t *src = microcode;
1259 uint32_t *dst = (uint32_t *) sh->kernel;
1260 int i;
1261
1262 for (i = 0; i < num_dwords; i += 4) {
1263 if (swap) {
1264 dst[i + 0] = src[i + 3];
1265 dst[i + 1] = src[i + 2];
1266 dst[i + 2] = src[i + 1];
1267 dst[i + 3] = src[i + 0];
1268 }
1269 else {
1270 memcpy(dst, src, 16);
1271 }
1272 }
1273 }
1274 }
1275
1276 if (!sh->kernel) {
1277 ilo_err("failed to compile FS: %s\n", tc->reason);
1278 return false;
1279 }
1280
1281 if (ilo_debug & ILO_DEBUG_FS) {
1282 ilo_printf("disassembly:\n");
1283 toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
1284 ilo_printf("\n");
1285 }
1286
1287 return true;
1288 }
1289
1290 /**
1291 * Emit instructions to write the color buffers (and the depth buffer).
1292 */
1293 static void
1294 fs_write_fb(struct fs_compile_context *fcc)
1295 {
1296 struct toy_compiler *tc = &fcc->tc;
1297 int base_mrf = fcc->first_free_mrf;
1298 const struct toy_dst header = tdst_ud(tdst(TOY_FILE_MRF, base_mrf, 0));
1299 bool header_present = false;
1300 struct toy_src desc;
1301 unsigned msg_type, ctrl;
1302 int color_slots[ILO_MAX_DRAW_BUFFERS], num_cbufs;
1303 int pos_slot = -1, cbuf, i;
1304
1305 for (i = 0; i < Elements(color_slots); i++)
1306 color_slots[i] = -1;
1307
1308 for (i = 0; i < fcc->tgsi.num_outputs; i++) {
1309 if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_COLOR) {
1310 assert(fcc->tgsi.outputs[i].semantic_index < Elements(color_slots));
1311 color_slots[fcc->tgsi.outputs[i].semantic_index] = i;
1312 }
1313 else if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1314 pos_slot = i;
1315 }
1316 }
1317
1318 num_cbufs = fcc->variant->u.fs.num_cbufs;
1319 /* still need to send EOT (and probably depth) */
1320 if (!num_cbufs)
1321 num_cbufs = 1;
1322
1323 /* we need the header to specify the pixel mask or render target */
1324 if (fcc->tgsi.uses_kill || num_cbufs > 1) {
1325 const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
1326 struct toy_inst *inst;
1327
1328 inst = tc_MOV(tc, header, r0);
1329 inst->mask_ctrl = BRW_MASK_DISABLE;
1330 base_mrf += fcc->num_grf_per_vrf;
1331
1332 /* this is a two-register header */
1333 if (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) {
1334 inst = tc_MOV(tc, tdst_offset(header, 1, 0), tsrc_offset(r0, 1, 0));
1335 inst->mask_ctrl = BRW_MASK_DISABLE;
1336 base_mrf += fcc->num_grf_per_vrf;
1337 }
1338
1339 header_present = true;
1340 }
1341
1342 for (cbuf = 0; cbuf < num_cbufs; cbuf++) {
1343 const int slot =
1344 color_slots[(fcc->tgsi.props.fs_color0_writes_all_cbufs) ? 0 : cbuf];
1345 int mrf = base_mrf, vrf;
1346 struct toy_src src[4];
1347
1348 if (slot >= 0) {
1349 const unsigned undefined_mask =
1350 fcc->tgsi.outputs[slot].undefined_mask;
1351 const int index = fcc->tgsi.outputs[slot].index;
1352
1353 vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1354 if (vrf >= 0) {
1355 const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1356 tsrc_transpose(tmp, src);
1357 }
1358 else {
1359 /* use (0, 0, 0, 0) */
1360 tsrc_transpose(tsrc_imm_f(0.0f), src);
1361 }
1362
1363 for (i = 0; i < 4; i++) {
1364 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1365
1366 if (undefined_mask & (1 << i))
1367 src[i] = tsrc_imm_f(0.0f);
1368
1369 tc_MOV(tc, dst, src[i]);
1370
1371 mrf += fcc->num_grf_per_vrf;
1372 }
1373 }
1374 else {
1375 /* use (0, 0, 0, 0) */
1376 for (i = 0; i < 4; i++) {
1377 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1378
1379 tc_MOV(tc, dst, tsrc_imm_f(0.0f));
1380 mrf += fcc->num_grf_per_vrf;
1381 }
1382 }
1383
1384 /* select BLEND_STATE[rt] */
1385 if (cbuf > 0) {
1386 struct toy_inst *inst;
1387
1388 inst = tc_MOV(tc, tdst_offset(header, 0, 2), tsrc_imm_ud(cbuf));
1389 inst->mask_ctrl = BRW_MASK_DISABLE;
1390 inst->exec_size = BRW_EXECUTE_1;
1391 inst->src[0].rect = TOY_RECT_010;
1392 }
1393
1394 if (cbuf == 0 && pos_slot >= 0) {
1395 const int index = fcc->tgsi.outputs[pos_slot].index;
1396 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1397 struct toy_src src[4];
1398 int vrf;
1399
1400 vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1401 if (vrf >= 0) {
1402 const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1403 tsrc_transpose(tmp, src);
1404 }
1405 else {
1406 /* use (0, 0, 0, 0) */
1407 tsrc_transpose(tsrc_imm_f(0.0f), src);
1408 }
1409
1410 /* only Z */
1411 tc_MOV(tc, dst, src[2]);
1412
1413 mrf += fcc->num_grf_per_vrf;
1414 }
1415
1416 msg_type = (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ?
1417 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE :
1418 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1419
1420 ctrl = (cbuf == num_cbufs - 1) << 12 |
1421 msg_type << 8;
1422
1423 desc = tsrc_imm_mdesc_data_port(tc, cbuf == num_cbufs - 1,
1424 mrf - fcc->first_free_mrf, 0,
1425 header_present, false,
1426 GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE,
1427 ctrl, ILO_WM_DRAW_SURFACE(cbuf));
1428
1429 tc_add2(tc, TOY_OPCODE_FB_WRITE, tdst_null(),
1430 tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0), desc);
1431 }
1432 }
1433
1434 /**
1435 * Set up shader outputs for fixed-function units.
1436 */
1437 static void
1438 fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
1439 {
1440 int i;
1441
1442 sh->out.count = tgsi->num_outputs;
1443 for (i = 0; i < tgsi->num_outputs; i++) {
1444 sh->out.register_indices[i] = tgsi->outputs[i].index;
1445 sh->out.semantic_names[i] = tgsi->outputs[i].semantic_name;
1446 sh->out.semantic_indices[i] = tgsi->outputs[i].semantic_index;
1447
1448 if (tgsi->outputs[i].semantic_name == TGSI_SEMANTIC_POSITION)
1449 sh->out.has_pos = true;
1450 }
1451 }
1452
1453 /**
1454 * Set up shader inputs for fixed-function units.
1455 */
1456 static void
1457 fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
1458 bool flatshade)
1459 {
1460 int i;
1461
1462 sh->in.count = tgsi->num_inputs;
1463 for (i = 0; i < tgsi->num_inputs; i++) {
1464 sh->in.semantic_names[i] = tgsi->inputs[i].semantic_name;
1465 sh->in.semantic_indices[i] = tgsi->inputs[i].semantic_index;
1466 sh->in.interp[i] = tgsi->inputs[i].interp;
1467 sh->in.centroid[i] = tgsi->inputs[i].centroid;
1468
1469 if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1470 sh->in.has_pos = true;
1471 continue;
1472 }
1473 else if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_FACE) {
1474 continue;
1475 }
1476
1477 switch (tgsi->inputs[i].interp) {
1478 case TGSI_INTERPOLATE_LINEAR:
1479 sh->in.has_linear_interp = true;
1480
1481 if (tgsi->inputs[i].centroid) {
1482 sh->in.barycentric_interpolation_mode |=
1483 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
1484 }
1485 else {
1486 sh->in.barycentric_interpolation_mode |=
1487 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
1488 }
1489 break;
1490 case TGSI_INTERPOLATE_COLOR:
1491 if (flatshade)
1492 break;
1493 /* fall through */
1494 case TGSI_INTERPOLATE_PERSPECTIVE:
1495 if (tgsi->inputs[i].centroid) {
1496 sh->in.barycentric_interpolation_mode |=
1497 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
1498 }
1499 else {
1500 sh->in.barycentric_interpolation_mode |=
1501 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
1502 }
1503 break;
1504 default:
1505 break;
1506 }
1507 }
1508 }
1509
1510 static int
1511 fs_setup_payloads(struct fs_compile_context *fcc)
1512 {
1513 const struct ilo_shader *sh = fcc->shader;
1514 int grf, i;
1515
1516 grf = 0;
1517
1518 /* r0: header */
1519 grf++;
1520
1521 /* r1-r2: coordinates and etc. */
1522 grf += (fcc->dispatch_mode == GEN6_WM_32_DISPATCH_ENABLE) ? 2 : 1;
1523
1524 for (i = 0; i < Elements(fcc->payloads); i++) {
1525 int interp;
1526
1527 /* r3-r26 or r32-r55: barycentric interpolation parameters */
1528 for (interp = 0; interp < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; interp++) {
1529 if (!(sh->in.barycentric_interpolation_mode & (1 << interp)))
1530 continue;
1531
1532 fcc->payloads[i].barycentric_interps[interp] = grf;
1533 grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 2 : 4;
1534 }
1535
1536 /* r27-r28 or r56-r57: interpoloated depth */
1537 if (sh->in.has_pos) {
1538 fcc->payloads[i].source_depth = grf;
1539 grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 1 : 2;
1540 }
1541
1542 /* r29-r30 or r58-r59: interpoloated w */
1543 if (sh->in.has_pos) {
1544 fcc->payloads[i].source_w = grf;
1545 grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 1 : 2;
1546 }
1547
1548 /* r31 or r60: position offset */
1549 if (false) {
1550 fcc->payloads[i].pos_offset = grf;
1551 grf++;
1552 }
1553
1554 if (fcc->dispatch_mode != GEN6_WM_32_DISPATCH_ENABLE)
1555 break;
1556 }
1557
1558 return grf;
1559 }
1560
1561 /**
1562 * Translate the TGSI tokens.
1563 */
1564 static bool
1565 fs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
1566 struct toy_tgsi *tgsi)
1567 {
1568 if (ilo_debug & ILO_DEBUG_FS) {
1569 ilo_printf("dumping fragment shader\n");
1570 ilo_printf("\n");
1571
1572 tgsi_dump(tokens, 0);
1573 ilo_printf("\n");
1574 }
1575
1576 toy_compiler_translate_tgsi(tc, tokens, false, tgsi);
1577 if (tc->fail) {
1578 ilo_err("failed to translate FS TGSI tokens: %s\n", tc->reason);
1579 return false;
1580 }
1581
1582 if (ilo_debug & ILO_DEBUG_FS) {
1583 ilo_printf("TGSI translator:\n");
1584 toy_tgsi_dump(tgsi);
1585 ilo_printf("\n");
1586 toy_compiler_dump(tc);
1587 ilo_printf("\n");
1588 }
1589
1590 return true;
1591 }
1592
1593 /**
1594 * Set up FS compile context. This includes translating the TGSI tokens.
1595 */
1596 static bool
1597 fs_setup(struct fs_compile_context *fcc,
1598 const struct ilo_shader_state *state,
1599 const struct ilo_shader_variant *variant)
1600 {
1601 int num_consts;
1602
1603 memset(fcc, 0, sizeof(*fcc));
1604
1605 fcc->shader = CALLOC_STRUCT(ilo_shader);
1606 if (!fcc->shader)
1607 return false;
1608
1609 fcc->variant = variant;
1610
1611 toy_compiler_init(&fcc->tc, state->info.dev);
1612
1613 fcc->dispatch_mode = GEN6_WM_8_DISPATCH_ENABLE;
1614
1615 fcc->tc.templ.access_mode = BRW_ALIGN_1;
1616 if (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) {
1617 fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1H;
1618 fcc->tc.templ.exec_size = BRW_EXECUTE_16;
1619 }
1620 else {
1621 fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1Q;
1622 fcc->tc.templ.exec_size = BRW_EXECUTE_8;
1623 }
1624
1625 fcc->tc.rect_linear_width = 8;
1626
1627 /*
1628 * The classic driver uses the sampler cache (gen6) or the data cache
1629 * (gen7). Why?
1630 */
1631 fcc->const_cache = GEN6_SFID_DATAPORT_CONSTANT_CACHE;
1632
1633 if (!fs_setup_tgsi(&fcc->tc, state->info.tokens, &fcc->tgsi)) {
1634 toy_compiler_cleanup(&fcc->tc);
1635 FREE(fcc->shader);
1636 return false;
1637 }
1638
1639 fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade);
1640 fs_setup_shader_out(fcc->shader, &fcc->tgsi);
1641
1642 /* we do not make use of push constant buffers yet */
1643 num_consts = 0;
1644
1645 fcc->first_const_grf = fs_setup_payloads(fcc);
1646 fcc->first_attr_grf = fcc->first_const_grf + num_consts;
1647 fcc->first_free_grf = fcc->first_attr_grf + fcc->shader->in.count * 2;
1648 fcc->last_free_grf = 127;
1649
1650 /* m0 is reserved for system routines */
1651 fcc->first_free_mrf = 1;
1652 fcc->last_free_mrf = 15;
1653
1654 /* instructions are compressed with BRW_EXECUTE_16 */
1655 fcc->num_grf_per_vrf =
1656 (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ? 2 : 1;
1657
1658 if (fcc->tc.dev->gen >= ILO_GEN(7)) {
1659 fcc->last_free_grf -= 15;
1660 fcc->first_free_mrf = fcc->last_free_grf + 1;
1661 fcc->last_free_mrf = fcc->first_free_mrf + 14;
1662 }
1663
1664 fcc->shader->in.start_grf = fcc->first_const_grf;
1665 fcc->shader->has_kill = fcc->tgsi.uses_kill;
1666 fcc->shader->dispatch_16 =
1667 (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE);
1668
1669 return true;
1670 }
1671
1672 /**
1673 * Compile the fragment shader.
1674 */
1675 struct ilo_shader *
1676 ilo_shader_compile_fs(const struct ilo_shader_state *state,
1677 const struct ilo_shader_variant *variant)
1678 {
1679 struct fs_compile_context fcc;
1680
1681 if (!fs_setup(&fcc, state, variant))
1682 return NULL;
1683
1684 fs_write_fb(&fcc);
1685
1686 if (!fs_compile(&fcc)) {
1687 FREE(fcc.shader);
1688 fcc.shader = NULL;
1689 }
1690
1691 toy_tgsi_cleanup(&fcc.tgsi);
1692 toy_compiler_cleanup(&fcc.tc);
1693
1694 return fcc.shader;
1695 }