48d5721631c40c443f628cc77f5714c5f533259d
[mesa.git] / src / gallium / drivers / ilo / shader / ilo_shader_fs.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_util.h"
30 #include "toy_compiler.h"
31 #include "toy_tgsi.h"
32 #include "toy_legalize.h"
33 #include "toy_optimize.h"
34 #include "toy_helpers.h"
35 #include "ilo_context.h"
36 #include "ilo_shader_internal.h"
37
38 struct fs_compile_context {
39 struct ilo_shader *shader;
40 const struct ilo_shader_variant *variant;
41
42 struct toy_compiler tc;
43 struct toy_tgsi tgsi;
44
45 enum brw_message_target const_cache;
46 int dispatch_mode;
47
48 struct {
49 int barycentric_interps[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
50 int source_depth;
51 int source_w;
52 int pos_offset;
53 } payloads[2];
54
55 int first_const_grf;
56 int first_attr_grf;
57 int first_free_grf;
58 int last_free_grf;
59
60 int num_grf_per_vrf;
61
62 int first_free_mrf;
63 int last_free_mrf;
64 };
65
66 static void
67 fetch_position(struct fs_compile_context *fcc, struct toy_dst dst)
68 {
69 struct toy_compiler *tc = &fcc->tc;
70 const struct toy_src src_z =
71 tsrc(TOY_FILE_GRF, fcc->payloads[0].source_depth, 0);
72 const struct toy_src src_w =
73 tsrc(TOY_FILE_GRF, fcc->payloads[0].source_w, 0);
74 const int fb_height =
75 (fcc->variant->u.fs.fb_height) ? fcc->variant->u.fs.fb_height : 1;
76 const bool origin_upper_left =
77 (fcc->tgsi.props.fs_coord_origin == TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
78 const bool pixel_center_integer =
79 (fcc->tgsi.props.fs_coord_pixel_center ==
80 TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
81 struct toy_src subspan_x, subspan_y;
82 struct toy_dst tmp, tmp_uw;
83 struct toy_dst real_dst[4];
84
85 tdst_transpose(dst, real_dst);
86
87 subspan_x = tsrc_uw(tsrc(TOY_FILE_GRF, 1, 2 * 4));
88 subspan_x = tsrc_rect(subspan_x, TOY_RECT_240);
89
90 subspan_y = tsrc_offset(subspan_x, 0, 1);
91
92 tmp_uw = tdst_uw(tc_alloc_tmp(tc));
93 tmp = tc_alloc_tmp(tc);
94
95 /* X */
96 tc_ADD(tc, tmp_uw, subspan_x, tsrc_imm_v(0x10101010));
97 tc_MOV(tc, tmp, tsrc_from(tmp_uw));
98 if (pixel_center_integer)
99 tc_MOV(tc, real_dst[0], tsrc_from(tmp));
100 else
101 tc_ADD(tc, real_dst[0], tsrc_from(tmp), tsrc_imm_f(0.5f));
102
103 /* Y */
104 tc_ADD(tc, tmp_uw, subspan_y, tsrc_imm_v(0x11001100));
105 tc_MOV(tc, tmp, tsrc_from(tmp_uw));
106 if (origin_upper_left && pixel_center_integer) {
107 tc_MOV(tc, real_dst[1], tsrc_from(tmp));
108 }
109 else {
110 struct toy_src y = tsrc_from(tmp);
111 float offset = 0.0f;
112
113 if (!pixel_center_integer)
114 offset += 0.5f;
115
116 if (!origin_upper_left) {
117 offset += (float) (fb_height - 1);
118 y = tsrc_negate(y);
119 }
120
121 tc_ADD(tc, real_dst[1], y, tsrc_imm_f(offset));
122 }
123
124 /* Z and W */
125 tc_MOV(tc, real_dst[2], src_z);
126 tc_INV(tc, real_dst[3], src_w);
127 }
128
129 static void
130 fetch_face(struct fs_compile_context *fcc, struct toy_dst dst)
131 {
132 struct toy_compiler *tc = &fcc->tc;
133 const struct toy_src r0 = tsrc_d(tsrc(TOY_FILE_GRF, 0, 0));
134 struct toy_dst tmp_f, tmp;
135 struct toy_dst real_dst[4];
136
137 tdst_transpose(dst, real_dst);
138
139 tmp_f = tc_alloc_tmp(tc);
140 tmp = tdst_d(tmp_f);
141 tc_SHR(tc, tmp, tsrc_rect(r0, TOY_RECT_010), tsrc_imm_d(15));
142 tc_AND(tc, tmp, tsrc_from(tmp), tsrc_imm_d(1));
143 tc_MOV(tc, tmp_f, tsrc_from(tmp));
144
145 /* convert to 1.0 and -1.0 */
146 tc_MUL(tc, tmp_f, tsrc_from(tmp_f), tsrc_imm_f(-2.0f));
147 tc_ADD(tc, real_dst[0], tsrc_from(tmp_f), tsrc_imm_f(1.0f));
148
149 tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
150 tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
151 tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
152 }
153
154 static void
155 fetch_attr(struct fs_compile_context *fcc, struct toy_dst dst, int slot)
156 {
157 struct toy_compiler *tc = &fcc->tc;
158 struct toy_dst real_dst[4];
159 bool is_const = false;
160 int grf, mode, ch;
161
162 tdst_transpose(dst, real_dst);
163
164 grf = fcc->first_attr_grf + slot * 2;
165
166 switch (fcc->tgsi.inputs[slot].interp) {
167 case TGSI_INTERPOLATE_CONSTANT:
168 is_const = true;
169 break;
170 case TGSI_INTERPOLATE_LINEAR:
171 if (fcc->tgsi.inputs[slot].centroid)
172 mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
173 else
174 mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
175 break;
176 case TGSI_INTERPOLATE_COLOR:
177 if (fcc->variant->u.fs.flatshade) {
178 is_const = true;
179 break;
180 }
181 /* fall through */
182 case TGSI_INTERPOLATE_PERSPECTIVE:
183 if (fcc->tgsi.inputs[slot].centroid)
184 mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
185 else
186 mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
187 break;
188 default:
189 assert(!"unexpected FS interpolation");
190 mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
191 break;
192 }
193
194 if (is_const) {
195 struct toy_src a0[4];
196
197 a0[0] = tsrc(TOY_FILE_GRF, grf + 0, 3 * 4);
198 a0[1] = tsrc(TOY_FILE_GRF, grf + 0, 7 * 4);
199 a0[2] = tsrc(TOY_FILE_GRF, grf + 1, 3 * 4);
200 a0[3] = tsrc(TOY_FILE_GRF, grf + 1, 7 * 4);
201
202 for (ch = 0; ch < 4; ch++)
203 tc_MOV(tc, real_dst[ch], tsrc_rect(a0[ch], TOY_RECT_010));
204 }
205 else {
206 struct toy_src attr[4], uv;
207
208 attr[0] = tsrc(TOY_FILE_GRF, grf + 0, 0);
209 attr[1] = tsrc(TOY_FILE_GRF, grf + 0, 4 * 4);
210 attr[2] = tsrc(TOY_FILE_GRF, grf + 1, 0);
211 attr[3] = tsrc(TOY_FILE_GRF, grf + 1, 4 * 4);
212
213 uv = tsrc(TOY_FILE_GRF, fcc->payloads[0].barycentric_interps[mode], 0);
214
215 for (ch = 0; ch < 4; ch++) {
216 tc_add2(tc, BRW_OPCODE_PLN, real_dst[ch],
217 tsrc_rect(attr[ch], TOY_RECT_010), uv);
218 }
219 }
220
221 if (fcc->tgsi.inputs[slot].semantic_name == TGSI_SEMANTIC_FOG) {
222 tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
223 tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
224 tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
225 }
226 }
227
228 static void
229 fs_lower_opcode_tgsi_in(struct fs_compile_context *fcc,
230 struct toy_dst dst, int dim, int idx)
231 {
232 int slot;
233
234 assert(!dim);
235
236 slot = toy_tgsi_find_input(&fcc->tgsi, idx);
237 if (slot < 0)
238 return;
239
240 switch (fcc->tgsi.inputs[slot].semantic_name) {
241 case TGSI_SEMANTIC_POSITION:
242 fetch_position(fcc, dst);
243 break;
244 case TGSI_SEMANTIC_FACE:
245 fetch_face(fcc, dst);
246 break;
247 default:
248 fetch_attr(fcc, dst, slot);
249 break;
250 }
251 }
252
253 static void
254 fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc,
255 struct toy_dst dst, int dim,
256 struct toy_src idx)
257 {
258 const struct toy_dst offset =
259 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
260 struct toy_compiler *tc = &fcc->tc;
261 unsigned simd_mode, param_size;
262 struct toy_inst *inst;
263 struct toy_src desc, real_src[4];
264 struct toy_dst tmp, real_dst[4];
265 int i;
266
267 tsrc_transpose(idx, real_src);
268
269 /* set offset */
270 inst = tc_MOV(tc, offset, real_src[0]);
271 inst->mask_ctrl = BRW_MASK_DISABLE;
272
273 switch (inst->exec_size) {
274 case BRW_EXECUTE_8:
275 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
276 param_size = 1;
277 break;
278 case BRW_EXECUTE_16:
279 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
280 param_size = 2;
281 break;
282 default:
283 assert(!"unsupported execution size");
284 tc_MOV(tc, dst, tsrc_imm_f(0.0f));
285 return;
286 break;
287 }
288
289 desc = tsrc_imm_mdesc_sampler(tc, param_size, param_size * 4, false,
290 simd_mode,
291 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
292 0,
293 ILO_WM_CONST_SURFACE(dim));
294
295 tmp = tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, param_size * 4), 0);
296 inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, BRW_SFID_SAMPLER);
297 inst->mask_ctrl = BRW_MASK_DISABLE;
298
299 tdst_transpose(dst, real_dst);
300 for (i = 0; i < 4; i++) {
301 const struct toy_src src =
302 tsrc_offset(tsrc_from(tmp), param_size * i, 0);
303
304 /* cast to type D to make sure these are raw moves */
305 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
306 }
307 }
308
309 static bool
310 fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc,
311 struct toy_dst dst, int dim,
312 struct toy_src idx)
313 {
314 const int grf = fcc->first_const_grf + idx.val32 / 2;
315 const int grf_subreg = (idx.val32 & 1) * 16;
316 struct toy_src src;
317 struct toy_dst real_dst[4];
318 int i;
319
320 if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM ||
321 grf >= fcc->first_attr_grf)
322 return false;
323
324 src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_010);
325
326 tdst_transpose(dst, real_dst);
327 for (i = 0; i < 4; i++) {
328 /* cast to type D to make sure these are raw moves */
329 tc_MOV(&fcc->tc, tdst_d(real_dst[i]), tsrc_d(tsrc_offset(src, 0, i)));
330 }
331
332 return true;
333 }
334
335 static void
336 fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
337 struct toy_dst dst, int dim, struct toy_src idx)
338 {
339 const struct toy_dst header =
340 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
341 const struct toy_dst global_offset =
342 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 2 * 4));
343 const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
344 struct toy_compiler *tc = &fcc->tc;
345 unsigned msg_type, msg_ctrl, msg_len;
346 struct toy_inst *inst;
347 struct toy_src desc;
348 struct toy_dst tmp, real_dst[4];
349 int i;
350
351 if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
352 return;
353
354 /* set message header */
355 inst = tc_MOV(tc, header, r0);
356 inst->mask_ctrl = BRW_MASK_DISABLE;
357
358 /* set global offset */
359 inst = tc_MOV(tc, global_offset, idx);
360 inst->mask_ctrl = BRW_MASK_DISABLE;
361 inst->exec_size = BRW_EXECUTE_1;
362 inst->src[0].rect = TOY_RECT_010;
363
364 msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ;
365 msg_ctrl = BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW << 8;
366 msg_len = 1;
367
368 desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
369 msg_type, msg_ctrl, ILO_WM_CONST_SURFACE(dim));
370
371 tmp = tc_alloc_tmp(tc);
372
373 tc_SEND(tc, tmp, tsrc_from(header), desc, fcc->const_cache);
374
375 tdst_transpose(dst, real_dst);
376 for (i = 0; i < 4; i++) {
377 const struct toy_src src =
378 tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
379
380 /* cast to type D to make sure these are raw moves */
381 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
382 }
383 }
384
385 static void
386 fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
387 struct toy_dst dst, int dim, struct toy_src idx)
388 {
389 struct toy_compiler *tc = &fcc->tc;
390 const struct toy_dst offset =
391 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
392 struct toy_src desc;
393 struct toy_inst *inst;
394 struct toy_dst tmp, real_dst[4];
395 int i;
396
397 if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
398 return;
399
400 /*
401 * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
402 * changed from OWord Block Read to ld to increase performance in the
403 * classic driver. Since we use the constant cache instead of the data
404 * cache, I wonder if we still want to follow the classic driver.
405 */
406
407 /* set offset */
408 inst = tc_MOV(tc, offset, tsrc_rect(idx, TOY_RECT_010));
409 inst->exec_size = BRW_EXECUTE_8;
410 inst->mask_ctrl = BRW_MASK_DISABLE;
411
412 desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
413 BRW_SAMPLER_SIMD_MODE_SIMD4X2,
414 GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
415 0,
416 ILO_WM_CONST_SURFACE(dim));
417
418 tmp = tc_alloc_tmp(tc);
419 inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, BRW_SFID_SAMPLER);
420 inst->exec_size = BRW_EXECUTE_8;
421 inst->mask_ctrl = BRW_MASK_DISABLE;
422
423 tdst_transpose(dst, real_dst);
424 for (i = 0; i < 4; i++) {
425 const struct toy_src src =
426 tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
427
428 /* cast to type D to make sure these are raw moves */
429 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
430 }
431 }
432
433 static void
434 fs_lower_opcode_tgsi_imm(struct fs_compile_context *fcc,
435 struct toy_dst dst, int idx)
436 {
437 const uint32_t *imm;
438 struct toy_dst real_dst[4];
439 int ch;
440
441 imm = toy_tgsi_get_imm(&fcc->tgsi, idx, NULL);
442
443 tdst_transpose(dst, real_dst);
444 /* raw moves */
445 for (ch = 0; ch < 4; ch++)
446 tc_MOV(&fcc->tc, tdst_ud(real_dst[ch]), tsrc_imm_ud(imm[ch]));
447 }
448
449 static void
450 fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc,
451 struct toy_dst dst, int dim, int idx)
452 {
453 struct toy_compiler *tc = &fcc->tc;
454 const struct toy_tgsi *tgsi = &fcc->tgsi;
455 int slot;
456
457 assert(!dim);
458
459 slot = toy_tgsi_find_system_value(tgsi, idx);
460 if (slot < 0)
461 return;
462
463 switch (tgsi->system_values[slot].semantic_name) {
464 case TGSI_SEMANTIC_PRIMID:
465 case TGSI_SEMANTIC_INSTANCEID:
466 case TGSI_SEMANTIC_VERTEXID:
467 default:
468 tc_fail(tc, "unhandled system value");
469 tc_MOV(tc, dst, tsrc_imm_d(0));
470 break;
471 }
472 }
473
474 static void
475 fs_lower_opcode_tgsi_direct(struct fs_compile_context *fcc,
476 struct toy_inst *inst)
477 {
478 struct toy_compiler *tc = &fcc->tc;
479 int dim, idx;
480
481 assert(inst->src[0].file == TOY_FILE_IMM);
482 dim = inst->src[0].val32;
483
484 assert(inst->src[1].file == TOY_FILE_IMM);
485 idx = inst->src[1].val32;
486
487 switch (inst->opcode) {
488 case TOY_OPCODE_TGSI_IN:
489 fs_lower_opcode_tgsi_in(fcc, inst->dst, dim, idx);
490 break;
491 case TOY_OPCODE_TGSI_CONST:
492 if (tc->dev->gen >= ILO_GEN(7))
493 fs_lower_opcode_tgsi_const_gen7(fcc, inst->dst, dim, inst->src[1]);
494 else
495 fs_lower_opcode_tgsi_const_gen6(fcc, inst->dst, dim, inst->src[1]);
496 break;
497 case TOY_OPCODE_TGSI_SV:
498 fs_lower_opcode_tgsi_sv(fcc, inst->dst, dim, idx);
499 break;
500 case TOY_OPCODE_TGSI_IMM:
501 assert(!dim);
502 fs_lower_opcode_tgsi_imm(fcc, inst->dst, idx);
503 break;
504 default:
505 tc_fail(tc, "unhandled TGSI fetch");
506 break;
507 }
508
509 tc_discard_inst(tc, inst);
510 }
511
512 static void
513 fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc,
514 struct toy_inst *inst)
515 {
516 struct toy_compiler *tc = &fcc->tc;
517 enum tgsi_file_type file;
518 int dim, idx;
519 struct toy_src indirect_dim, indirect_idx;
520
521 assert(inst->src[0].file == TOY_FILE_IMM);
522 file = inst->src[0].val32;
523
524 assert(inst->src[1].file == TOY_FILE_IMM);
525 dim = inst->src[1].val32;
526 indirect_dim = inst->src[2];
527
528 assert(inst->src[3].file == TOY_FILE_IMM);
529 idx = inst->src[3].val32;
530 indirect_idx = inst->src[4];
531
532 /* no dimension indirection */
533 assert(indirect_dim.file == TOY_FILE_IMM);
534 dim += indirect_dim.val32;
535
536 switch (inst->opcode) {
537 case TOY_OPCODE_TGSI_INDIRECT_FETCH:
538 if (file == TGSI_FILE_CONSTANT) {
539 if (idx) {
540 struct toy_dst tmp = tc_alloc_tmp(tc);
541
542 tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx));
543 indirect_idx = tsrc_from(tmp);
544 }
545
546 fs_lower_opcode_tgsi_indirect_const(fcc, inst->dst, dim, indirect_idx);
547 break;
548 }
549 /* fall through */
550 case TOY_OPCODE_TGSI_INDIRECT_STORE:
551 default:
552 tc_fail(tc, "unhandled TGSI indirection");
553 break;
554 }
555
556 tc_discard_inst(tc, inst);
557 }
558
559 /**
560 * Emit instructions to move sampling parameters to the message registers.
561 */
562 static int
563 fs_add_sampler_params_gen6(struct toy_compiler *tc, int msg_type,
564 int base_mrf, int param_size,
565 struct toy_src *coords, int num_coords,
566 struct toy_src bias_or_lod, struct toy_src ref_or_si,
567 struct toy_src *ddx, struct toy_src *ddy,
568 int num_derivs)
569 {
570 int num_params, i;
571
572 assert(num_coords <= 4);
573 assert(num_derivs <= 3 && num_derivs <= num_coords);
574
575 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
576 switch (msg_type) {
577 case GEN5_SAMPLER_MESSAGE_SAMPLE:
578 for (i = 0; i < num_coords; i++)
579 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
580 num_params = num_coords;
581 break;
582 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS:
583 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
584 for (i = 0; i < num_coords; i++)
585 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
586 tc_MOV(tc, SAMPLER_PARAM(4), bias_or_lod);
587 num_params = 5;
588 break;
589 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE:
590 for (i = 0; i < num_coords; i++)
591 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
592 tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
593 num_params = 5;
594 break;
595 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
596 for (i = 0; i < num_coords; i++)
597 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
598 for (i = 0; i < num_derivs; i++) {
599 tc_MOV(tc, SAMPLER_PARAM(4 + i * 2), ddx[i]);
600 tc_MOV(tc, SAMPLER_PARAM(5 + i * 2), ddy[i]);
601 }
602 num_params = 4 + num_derivs * 2;
603 break;
604 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE:
605 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
606 for (i = 0; i < num_coords; i++)
607 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
608 tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
609 tc_MOV(tc, SAMPLER_PARAM(5), bias_or_lod);
610 num_params = 6;
611 break;
612 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
613 assert(num_coords <= 3);
614
615 for (i = 0; i < num_coords; i++)
616 tc_MOV(tc, tdst_d(SAMPLER_PARAM(i)), coords[i]);
617 tc_MOV(tc, tdst_d(SAMPLER_PARAM(3)), bias_or_lod);
618 tc_MOV(tc, tdst_d(SAMPLER_PARAM(4)), ref_or_si);
619 num_params = 5;
620 break;
621 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
622 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
623 num_params = 1;
624 break;
625 default:
626 tc_fail(tc, "unknown sampler opcode");
627 num_params = 0;
628 break;
629 }
630 #undef SAMPLER_PARAM
631
632 return num_params * param_size;
633 }
634
635 static int
636 fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type,
637 int base_mrf, int param_size,
638 struct toy_src *coords, int num_coords,
639 struct toy_src bias_or_lod, struct toy_src ref_or_si,
640 struct toy_src *ddx, struct toy_src *ddy,
641 int num_derivs)
642 {
643 int num_params, i;
644
645 assert(num_coords <= 4);
646 assert(num_derivs <= 3 && num_derivs <= num_coords);
647
648 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
649 switch (msg_type) {
650 case GEN5_SAMPLER_MESSAGE_SAMPLE:
651 for (i = 0; i < num_coords; i++)
652 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
653 num_params = num_coords;
654 break;
655 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS:
656 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
657 tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod);
658 for (i = 0; i < num_coords; i++)
659 tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
660 num_params = 1 + num_coords;
661 break;
662 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE:
663 tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
664 for (i = 0; i < num_coords; i++)
665 tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
666 num_params = 1 + num_coords;
667 break;
668 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
669 for (i = 0; i < num_coords; i++) {
670 tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]);
671 if (i < num_derivs) {
672 tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]);
673 tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]);
674 }
675 }
676 num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0);
677 break;
678 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE:
679 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
680 tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
681 tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod);
682 for (i = 0; i < num_coords; i++)
683 tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]);
684 num_params = 2 + num_coords;
685 break;
686 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
687 assert(num_coords >= 1 && num_coords <= 3);
688
689 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]);
690 tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod);
691 for (i = 1; i < num_coords; i++)
692 tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]);
693 num_params = 1 + num_coords;
694 break;
695 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
696 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
697 num_params = 1;
698 break;
699 default:
700 tc_fail(tc, "unknown sampler opcode");
701 num_params = 0;
702 break;
703 }
704 #undef SAMPLER_PARAM
705
706 return num_params * param_size;
707 }
708
709 /**
710 * Set up message registers and return the message descriptor for sampling.
711 */
712 static struct toy_src
713 fs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst,
714 int base_mrf, const uint32_t *saturate_coords,
715 unsigned *ret_sampler_index)
716 {
717 unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
718 struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si;
719 int num_coords, ref_pos, num_derivs;
720 int sampler_src, param_size, i;
721
722 switch (inst->exec_size) {
723 case BRW_EXECUTE_8:
724 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
725 param_size = 1;
726 break;
727 case BRW_EXECUTE_16:
728 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
729 param_size = 2;
730 break;
731 default:
732 tc_fail(tc, "unsupported execute size for sampling");
733 return tsrc_null();
734 break;
735 }
736
737 num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos);
738 tsrc_transpose(inst->src[0], coords);
739 bias_or_lod = tsrc_null();
740 ref_or_si = tsrc_null();
741 num_derivs = 0;
742 sampler_src = 1;
743
744 /*
745 * For TXD,
746 *
747 * src0 := (x, y, z, w)
748 * src1 := ddx
749 * src2 := ddy
750 * src3 := sampler
751 *
752 * For TEX2, TXB2, and TXL2,
753 *
754 * src0 := (x, y, z, w)
755 * src1 := (v or bias or lod, ...)
756 * src2 := sampler
757 *
758 * For TEX, TXB, TXL, and TXP,
759 *
760 * src0 := (x, y, z, w or bias or lod or projection)
761 * src1 := sampler
762 *
763 * For TXQ,
764 *
765 * src0 := (lod, ...)
766 * src1 := sampler
767 *
768 * For TXQ_LZ,
769 *
770 * src0 := sampler
771 *
772 * And for TXF,
773 *
774 * src0 := (x, y, z, w or lod)
775 * src1 := sampler
776 *
777 * State trackers should not generate opcode+texture combinations with
778 * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
779 */
780 switch (inst->opcode) {
781 case TOY_OPCODE_TGSI_TEX:
782 if (ref_pos >= 0) {
783 assert(ref_pos < 4);
784
785 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
786 ref_or_si = coords[ref_pos];
787 }
788 else {
789 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
790 }
791 break;
792 case TOY_OPCODE_TGSI_TXD:
793 if (ref_pos >= 0)
794 tc_fail(tc, "TXD with shadow sampler not supported");
795
796 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
797 tsrc_transpose(inst->src[1], ddx);
798 tsrc_transpose(inst->src[2], ddy);
799 num_derivs = num_coords;
800 sampler_src = 3;
801 break;
802 case TOY_OPCODE_TGSI_TXP:
803 if (ref_pos >= 0) {
804 assert(ref_pos < 3);
805
806 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
807 ref_or_si = coords[ref_pos];
808 }
809 else {
810 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
811 }
812
813 /* project the coordinates */
814 {
815 struct toy_dst tmp[4];
816
817 tc_alloc_tmp4(tc, tmp);
818
819 tc_INV(tc, tmp[3], coords[3]);
820 for (i = 0; i < num_coords && i < 3; i++) {
821 tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
822 coords[i] = tsrc_from(tmp[i]);
823 }
824
825 if (ref_pos >= i) {
826 tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3]));
827 ref_or_si = tsrc_from(tmp[ref_pos]);
828 }
829 }
830 break;
831 case TOY_OPCODE_TGSI_TXB:
832 if (ref_pos >= 0) {
833 assert(ref_pos < 3);
834
835 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
836 ref_or_si = coords[ref_pos];
837 }
838 else {
839 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
840 }
841
842 bias_or_lod = coords[3];
843 break;
844 case TOY_OPCODE_TGSI_TXL:
845 if (ref_pos >= 0) {
846 assert(ref_pos < 3);
847
848 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
849 ref_or_si = coords[ref_pos];
850 }
851 else {
852 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
853 }
854
855 bias_or_lod = coords[3];
856 break;
857 case TOY_OPCODE_TGSI_TXF:
858 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
859
860 switch (inst->tex.target) {
861 case TGSI_TEXTURE_2D_MSAA:
862 case TGSI_TEXTURE_2D_ARRAY_MSAA:
863 assert(ref_pos >= 0 && ref_pos < 4);
864 /* lod is always 0 */
865 bias_or_lod = tsrc_imm_d(0);
866 ref_or_si = coords[ref_pos];
867 break;
868 default:
869 bias_or_lod = coords[3];
870 break;
871 }
872
873 /* offset the coordinates */
874 if (!tsrc_is_null(inst->tex.offsets[0])) {
875 struct toy_dst tmp[4];
876 struct toy_src offsets[4];
877
878 tc_alloc_tmp4(tc, tmp);
879 tsrc_transpose(inst->tex.offsets[0], offsets);
880
881 for (i = 0; i < num_coords; i++) {
882 tc_ADD(tc, tmp[i], coords[i], offsets[i]);
883 coords[i] = tsrc_from(tmp[i]);
884 }
885 }
886
887 sampler_src = 1;
888 break;
889 case TOY_OPCODE_TGSI_TXQ:
890 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
891 num_coords = 0;
892 bias_or_lod = coords[0];
893 break;
894 case TOY_OPCODE_TGSI_TXQ_LZ:
895 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
896 num_coords = 0;
897 sampler_src = 0;
898 break;
899 case TOY_OPCODE_TGSI_TEX2:
900 if (ref_pos >= 0) {
901 assert(ref_pos < 5);
902
903 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
904
905 if (ref_pos >= 4) {
906 struct toy_src src1[4];
907 tsrc_transpose(inst->src[1], src1);
908 ref_or_si = src1[ref_pos - 4];
909 }
910 else {
911 ref_or_si = coords[ref_pos];
912 }
913 }
914 else {
915 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
916 }
917
918 sampler_src = 2;
919 break;
920 case TOY_OPCODE_TGSI_TXB2:
921 if (ref_pos >= 0) {
922 assert(ref_pos < 4);
923
924 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
925 ref_or_si = coords[ref_pos];
926 }
927 else {
928 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
929 }
930
931 {
932 struct toy_src src1[4];
933 tsrc_transpose(inst->src[1], src1);
934 bias_or_lod = src1[0];
935 }
936
937 sampler_src = 2;
938 break;
939 case TOY_OPCODE_TGSI_TXL2:
940 if (ref_pos >= 0) {
941 assert(ref_pos < 4);
942
943 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
944 ref_or_si = coords[ref_pos];
945 }
946 else {
947 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
948 }
949
950 {
951 struct toy_src src1[4];
952 tsrc_transpose(inst->src[1], src1);
953 bias_or_lod = src1[0];
954 }
955
956 sampler_src = 2;
957 break;
958 default:
959 assert(!"unhandled sampling opcode");
960 return tsrc_null();
961 break;
962 }
963
964 assert(inst->src[sampler_src].file == TOY_FILE_IMM);
965 sampler_index = inst->src[sampler_src].val32;
966 binding_table_index = ILO_WM_TEXTURE_SURFACE(sampler_index);
967
968 /*
969 * From the Sandy Bridge PRM, volume 4 part 1, page 18:
970 *
971 * "Note that the (cube map) coordinates delivered to the sampling
972 * engine must already have been divided by the component with the
973 * largest absolute value."
974 */
975 switch (inst->tex.target) {
976 case TGSI_TEXTURE_CUBE:
977 case TGSI_TEXTURE_SHADOWCUBE:
978 case TGSI_TEXTURE_CUBE_ARRAY:
979 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
980 /* TXQ does not need coordinates */
981 if (num_coords >= 3) {
982 struct toy_dst tmp[4];
983
984 tc_alloc_tmp4(tc, tmp);
985
986 tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]),
987 tsrc_absolute(coords[1]), BRW_CONDITIONAL_GE);
988 tc_SEL(tc, tmp[3], tsrc_from(tmp[3]),
989 tsrc_absolute(coords[2]), BRW_CONDITIONAL_GE);
990 tc_INV(tc, tmp[3], tsrc_from(tmp[3]));
991
992 for (i = 0; i < 3; i++) {
993 tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
994 coords[i] = tsrc_from(tmp[i]);
995 }
996 }
997 break;
998 }
999
1000 /*
1001 * Saturate (s, t, r). saturate_coords is set for sampler and coordinate
1002 * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively. It is
1003 * so that sampling outside the border gets the correct colors.
1004 */
1005 for (i = 0; i < MIN2(num_coords, 3); i++) {
1006 bool is_rect;
1007
1008 if (!(saturate_coords[i] & (1 << sampler_index)))
1009 continue;
1010
1011 switch (inst->tex.target) {
1012 case TGSI_TEXTURE_RECT:
1013 case TGSI_TEXTURE_SHADOWRECT:
1014 is_rect = true;
1015 break;
1016 default:
1017 is_rect = false;
1018 break;
1019 }
1020
1021 if (is_rect) {
1022 struct toy_src min, max;
1023 struct toy_dst tmp;
1024
1025 tc_fail(tc, "GL_CLAMP with rectangle texture unsupported");
1026 tmp = tc_alloc_tmp(tc);
1027
1028 /* saturate to [0, width] or [0, height] */
1029 /* TODO TXQ? */
1030 min = tsrc_imm_f(0.0f);
1031 max = tsrc_imm_f(2048.0f);
1032
1033 tc_SEL(tc, tmp, coords[i], min, BRW_CONDITIONAL_G);
1034 tc_SEL(tc, tmp, tsrc_from(tmp), max, BRW_CONDITIONAL_L);
1035
1036 coords[i] = tsrc_from(tmp);
1037 }
1038 else {
1039 struct toy_dst tmp;
1040 struct toy_inst *inst2;
1041
1042 tmp = tc_alloc_tmp(tc);
1043
1044 /* saturate to [0.0f, 1.0f] */
1045 inst2 = tc_MOV(tc, tmp, coords[i]);
1046 inst2->saturate = true;
1047
1048 coords[i] = tsrc_from(tmp);
1049 }
1050 }
1051
1052 /* set up sampler parameters */
1053 if (tc->dev->gen >= ILO_GEN(7)) {
1054 msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size,
1055 coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
1056 }
1057 else {
1058 msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size,
1059 coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
1060 }
1061
1062 /*
1063 * From the Sandy Bridge PRM, volume 4 part 1, page 136:
1064 *
1065 * "The maximum message length allowed to the sampler is 11. This would
1066 * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
1067 * SIMD16."
1068 */
1069 if (msg_len > 11)
1070 tc_fail(tc, "maximum length for messages to the sampler is 11");
1071
1072 if (ret_sampler_index)
1073 *ret_sampler_index = sampler_index;
1074
1075 return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size,
1076 false, simd_mode, msg_type, sampler_index, binding_table_index);
1077 }
1078
1079 static void
1080 fs_lower_opcode_tgsi_sampling(struct fs_compile_context *fcc,
1081 struct toy_inst *inst)
1082 {
1083 struct toy_compiler *tc = &fcc->tc;
1084 struct toy_dst dst[4], tmp[4];
1085 struct toy_src desc;
1086 unsigned sampler_index;
1087 int swizzles[4], i;
1088 bool need_filter;
1089
1090 desc = fs_prepare_tgsi_sampling(tc, inst,
1091 fcc->first_free_mrf,
1092 fcc->variant->saturate_tex_coords,
1093 &sampler_index);
1094
1095 switch (inst->opcode) {
1096 case TOY_OPCODE_TGSI_TXF:
1097 case TOY_OPCODE_TGSI_TXQ:
1098 case TOY_OPCODE_TGSI_TXQ_LZ:
1099 need_filter = false;
1100 break;
1101 default:
1102 need_filter = true;
1103 break;
1104 }
1105
1106 toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_SAMPLER);
1107 inst->src[0] = tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0);
1108 inst->src[1] = desc;
1109 for (i = 2; i < Elements(inst->src); i++)
1110 inst->src[i] = tsrc_null();
1111
1112 /* write to temps first */
1113 tc_alloc_tmp4(tc, tmp);
1114 for (i = 0; i < 4; i++)
1115 tmp[i].type = inst->dst.type;
1116 tdst_transpose(inst->dst, dst);
1117 inst->dst = tmp[0];
1118
1119 tc_move_inst(tc, inst);
1120
1121 if (need_filter) {
1122 assert(sampler_index < fcc->variant->num_sampler_views);
1123 swizzles[0] = fcc->variant->sampler_view_swizzles[sampler_index].r;
1124 swizzles[1] = fcc->variant->sampler_view_swizzles[sampler_index].g;
1125 swizzles[2] = fcc->variant->sampler_view_swizzles[sampler_index].b;
1126 swizzles[3] = fcc->variant->sampler_view_swizzles[sampler_index].a;
1127 }
1128 else {
1129 swizzles[0] = PIPE_SWIZZLE_RED;
1130 swizzles[1] = PIPE_SWIZZLE_GREEN;
1131 swizzles[2] = PIPE_SWIZZLE_BLUE;
1132 swizzles[3] = PIPE_SWIZZLE_ALPHA;
1133 }
1134
1135 /* swizzle the results */
1136 for (i = 0; i < 4; i++) {
1137 switch (swizzles[i]) {
1138 case PIPE_SWIZZLE_ZERO:
1139 tc_MOV(tc, dst[i], tsrc_imm_f(0.0f));
1140 break;
1141 case PIPE_SWIZZLE_ONE:
1142 tc_MOV(tc, dst[i], tsrc_imm_f(1.0f));
1143 break;
1144 default:
1145 tc_MOV(tc, dst[i], tsrc_from(tmp[swizzles[i]]));
1146 break;
1147 }
1148 }
1149 }
1150
1151 static void
1152 fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst)
1153 {
1154 struct toy_dst dst[4];
1155 struct toy_src src[4];
1156 int i;
1157
1158 tdst_transpose(inst->dst, dst);
1159 tsrc_transpose(inst->src[0], src);
1160
1161 /*
1162 * Every four fragments are from a 2x2 subspan, with
1163 *
1164 * fragment 1 on the top-left,
1165 * fragment 2 on the top-right,
1166 * fragment 3 on the bottom-left,
1167 * fragment 4 on the bottom-right.
1168 *
1169 * DDX should thus produce
1170 *
1171 * dst = src.yyww - src.xxzz
1172 *
1173 * and DDY should produce
1174 *
1175 * dst = src.zzww - src.xxyy
1176 *
1177 * But since we are in BRW_ALIGN_1, swizzling does not work and we have to
1178 * play with the region parameters.
1179 */
1180 if (inst->opcode == TOY_OPCODE_DDX) {
1181 for (i = 0; i < 4; i++) {
1182 struct toy_src left, right;
1183
1184 left = tsrc_rect(src[i], TOY_RECT_220);
1185 right = tsrc_offset(left, 0, 1);
1186
1187 tc_ADD(tc, dst[i], right, tsrc_negate(left));
1188 }
1189 }
1190 else {
1191 for (i = 0; i < 4; i++) {
1192 struct toy_src top, bottom;
1193
1194 /* approximate with dst = src.zzzz - src.xxxx */
1195 top = tsrc_rect(src[i], TOY_RECT_440);
1196 bottom = tsrc_offset(top, 0, 2);
1197
1198 tc_ADD(tc, dst[i], bottom, tsrc_negate(top));
1199 }
1200 }
1201
1202 tc_discard_inst(tc, inst);
1203 }
1204
1205 static void
1206 fs_lower_opcode_fb_write(struct toy_compiler *tc, struct toy_inst *inst)
1207 {
1208 /* fs_write_fb() has set up the message registers */
1209 toy_compiler_lower_to_send(tc, inst, true,
1210 GEN6_SFID_DATAPORT_RENDER_CACHE);
1211 }
1212
1213 static void
1214 fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst)
1215 {
1216 struct toy_dst pixel_mask_dst;
1217 struct toy_src f0, pixel_mask;
1218 struct toy_inst *tmp;
1219
1220 /* lower half of r1.7:ud */
1221 pixel_mask_dst = tdst_uw(tdst(TOY_FILE_GRF, 1, 7 * 4));
1222 pixel_mask = tsrc_rect(tsrc_from(pixel_mask_dst), TOY_RECT_010);
1223
1224 f0 = tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF, BRW_ARF_FLAG, 0)), TOY_RECT_010);
1225
1226 /* KILL or KILL_IF */
1227 if (tsrc_is_null(inst->src[0])) {
1228 struct toy_src dummy = tsrc_uw(tsrc(TOY_FILE_GRF, 0, 0));
1229 struct toy_dst f0_dst = tdst_uw(tdst(TOY_FILE_ARF, BRW_ARF_FLAG, 0));
1230
1231 /* create a mask that masks out all pixels */
1232 tmp = tc_MOV(tc, f0_dst, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010));
1233 tmp->exec_size = BRW_EXECUTE_1;
1234 tmp->mask_ctrl = BRW_MASK_DISABLE;
1235
1236 tc_CMP(tc, tdst_null(), dummy, dummy, BRW_CONDITIONAL_NEQ);
1237
1238 /* swapping the two src operands breaks glBitmap()!? */
1239 tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1240 tmp->exec_size = BRW_EXECUTE_1;
1241 tmp->mask_ctrl = BRW_MASK_DISABLE;
1242 }
1243 else {
1244 struct toy_src src[4];
1245 int i;
1246
1247 tsrc_transpose(inst->src[0], src);
1248 /* mask out killed pixels */
1249 for (i = 0; i < 4; i++) {
1250 tc_CMP(tc, tdst_null(), src[i], tsrc_imm_f(0.0f),
1251 BRW_CONDITIONAL_GE);
1252
1253 /* swapping the two src operands breaks glBitmap()!? */
1254 tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1255 tmp->exec_size = BRW_EXECUTE_1;
1256 tmp->mask_ctrl = BRW_MASK_DISABLE;
1257 }
1258 }
1259
1260 tc_discard_inst(tc, inst);
1261 }
1262
1263 static void
1264 fs_lower_virtual_opcodes(struct fs_compile_context *fcc)
1265 {
1266 struct toy_compiler *tc = &fcc->tc;
1267 struct toy_inst *inst;
1268
1269 /* lower TGSI's first, as they might be lowered to other virtual opcodes */
1270 tc_head(tc);
1271 while ((inst = tc_next(tc)) != NULL) {
1272 switch (inst->opcode) {
1273 case TOY_OPCODE_TGSI_IN:
1274 case TOY_OPCODE_TGSI_CONST:
1275 case TOY_OPCODE_TGSI_SV:
1276 case TOY_OPCODE_TGSI_IMM:
1277 fs_lower_opcode_tgsi_direct(fcc, inst);
1278 break;
1279 case TOY_OPCODE_TGSI_INDIRECT_FETCH:
1280 case TOY_OPCODE_TGSI_INDIRECT_STORE:
1281 fs_lower_opcode_tgsi_indirect(fcc, inst);
1282 break;
1283 case TOY_OPCODE_TGSI_TEX:
1284 case TOY_OPCODE_TGSI_TXB:
1285 case TOY_OPCODE_TGSI_TXD:
1286 case TOY_OPCODE_TGSI_TXL:
1287 case TOY_OPCODE_TGSI_TXP:
1288 case TOY_OPCODE_TGSI_TXF:
1289 case TOY_OPCODE_TGSI_TXQ:
1290 case TOY_OPCODE_TGSI_TXQ_LZ:
1291 case TOY_OPCODE_TGSI_TEX2:
1292 case TOY_OPCODE_TGSI_TXB2:
1293 case TOY_OPCODE_TGSI_TXL2:
1294 case TOY_OPCODE_TGSI_SAMPLE:
1295 case TOY_OPCODE_TGSI_SAMPLE_I:
1296 case TOY_OPCODE_TGSI_SAMPLE_I_MS:
1297 case TOY_OPCODE_TGSI_SAMPLE_B:
1298 case TOY_OPCODE_TGSI_SAMPLE_C:
1299 case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
1300 case TOY_OPCODE_TGSI_SAMPLE_D:
1301 case TOY_OPCODE_TGSI_SAMPLE_L:
1302 case TOY_OPCODE_TGSI_GATHER4:
1303 case TOY_OPCODE_TGSI_SVIEWINFO:
1304 case TOY_OPCODE_TGSI_SAMPLE_POS:
1305 case TOY_OPCODE_TGSI_SAMPLE_INFO:
1306 fs_lower_opcode_tgsi_sampling(fcc, inst);
1307 break;
1308 }
1309 }
1310
1311 tc_head(tc);
1312 while ((inst = tc_next(tc)) != NULL) {
1313 switch (inst->opcode) {
1314 case TOY_OPCODE_INV:
1315 case TOY_OPCODE_LOG:
1316 case TOY_OPCODE_EXP:
1317 case TOY_OPCODE_SQRT:
1318 case TOY_OPCODE_RSQ:
1319 case TOY_OPCODE_SIN:
1320 case TOY_OPCODE_COS:
1321 case TOY_OPCODE_FDIV:
1322 case TOY_OPCODE_POW:
1323 case TOY_OPCODE_INT_DIV_QUOTIENT:
1324 case TOY_OPCODE_INT_DIV_REMAINDER:
1325 toy_compiler_lower_math(tc, inst);
1326 break;
1327 case TOY_OPCODE_DDX:
1328 case TOY_OPCODE_DDY:
1329 fs_lower_opcode_derivative(tc, inst);
1330 break;
1331 case TOY_OPCODE_FB_WRITE:
1332 fs_lower_opcode_fb_write(tc, inst);
1333 break;
1334 case TOY_OPCODE_KIL:
1335 fs_lower_opcode_kil(tc, inst);
1336 break;
1337 default:
1338 if (inst->opcode > 127)
1339 tc_fail(tc, "unhandled virtual opcode");
1340 break;
1341 }
1342 }
1343 }
1344
1345 /**
1346 * Compile the shader.
1347 */
1348 static bool
1349 fs_compile(struct fs_compile_context *fcc)
1350 {
1351 struct toy_compiler *tc = &fcc->tc;
1352 struct ilo_shader *sh = fcc->shader;
1353
1354 fs_lower_virtual_opcodes(fcc);
1355 toy_compiler_legalize_for_ra(tc);
1356 toy_compiler_optimize(tc);
1357 toy_compiler_allocate_registers(tc,
1358 fcc->first_free_grf,
1359 fcc->last_free_grf,
1360 fcc->num_grf_per_vrf);
1361 toy_compiler_legalize_for_asm(tc);
1362
1363 if (tc->fail) {
1364 ilo_err("failed to legalize FS instructions: %s\n", tc->reason);
1365 return false;
1366 }
1367
1368 if (ilo_debug & ILO_DEBUG_FS) {
1369 ilo_printf("legalized instructions:\n");
1370 toy_compiler_dump(tc);
1371 ilo_printf("\n");
1372 }
1373
1374 if (true) {
1375 sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
1376 }
1377 else {
1378 static const uint32_t microcode[] = {
1379 /* fill in the microcode here */
1380 0x0, 0x0, 0x0, 0x0,
1381 };
1382 const bool swap = true;
1383
1384 sh->kernel_size = sizeof(microcode);
1385 sh->kernel = MALLOC(sh->kernel_size);
1386
1387 if (sh->kernel) {
1388 const int num_dwords = sizeof(microcode) / 4;
1389 const uint32_t *src = microcode;
1390 uint32_t *dst = (uint32_t *) sh->kernel;
1391 int i;
1392
1393 for (i = 0; i < num_dwords; i += 4) {
1394 if (swap) {
1395 dst[i + 0] = src[i + 3];
1396 dst[i + 1] = src[i + 2];
1397 dst[i + 2] = src[i + 1];
1398 dst[i + 3] = src[i + 0];
1399 }
1400 else {
1401 memcpy(dst, src, 16);
1402 }
1403 }
1404 }
1405 }
1406
1407 if (!sh->kernel) {
1408 ilo_err("failed to compile FS: %s\n", tc->reason);
1409 return false;
1410 }
1411
1412 if (ilo_debug & ILO_DEBUG_FS) {
1413 ilo_printf("disassembly:\n");
1414 toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
1415 ilo_printf("\n");
1416 }
1417
1418 return true;
1419 }
1420
1421 /**
1422 * Emit instructions to write the color buffers (and the depth buffer).
1423 */
1424 static void
1425 fs_write_fb(struct fs_compile_context *fcc)
1426 {
1427 struct toy_compiler *tc = &fcc->tc;
1428 int base_mrf = fcc->first_free_mrf;
1429 const struct toy_dst header = tdst_ud(tdst(TOY_FILE_MRF, base_mrf, 0));
1430 bool header_present = false;
1431 struct toy_src desc;
1432 unsigned msg_type, ctrl;
1433 int color_slots[ILO_MAX_DRAW_BUFFERS], num_cbufs;
1434 int pos_slot = -1, cbuf, i;
1435
1436 for (i = 0; i < Elements(color_slots); i++)
1437 color_slots[i] = -1;
1438
1439 for (i = 0; i < fcc->tgsi.num_outputs; i++) {
1440 if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_COLOR) {
1441 assert(fcc->tgsi.outputs[i].semantic_index < Elements(color_slots));
1442 color_slots[fcc->tgsi.outputs[i].semantic_index] = i;
1443 }
1444 else if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1445 pos_slot = i;
1446 }
1447 }
1448
1449 num_cbufs = fcc->variant->u.fs.num_cbufs;
1450 /* still need to send EOT (and probably depth) */
1451 if (!num_cbufs)
1452 num_cbufs = 1;
1453
1454 /* we need the header to specify the pixel mask or render target */
1455 if (fcc->tgsi.uses_kill || num_cbufs > 1) {
1456 const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
1457 struct toy_inst *inst;
1458
1459 inst = tc_MOV(tc, header, r0);
1460 inst->mask_ctrl = BRW_MASK_DISABLE;
1461 base_mrf += fcc->num_grf_per_vrf;
1462
1463 /* this is a two-register header */
1464 if (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) {
1465 inst = tc_MOV(tc, tdst_offset(header, 1, 0), tsrc_offset(r0, 1, 0));
1466 inst->mask_ctrl = BRW_MASK_DISABLE;
1467 base_mrf += fcc->num_grf_per_vrf;
1468 }
1469
1470 header_present = true;
1471 }
1472
1473 for (cbuf = 0; cbuf < num_cbufs; cbuf++) {
1474 const int slot =
1475 color_slots[(fcc->tgsi.props.fs_color0_writes_all_cbufs) ? 0 : cbuf];
1476 int mrf = base_mrf, vrf;
1477 struct toy_src src[4];
1478
1479 if (slot >= 0) {
1480 const unsigned undefined_mask =
1481 fcc->tgsi.outputs[slot].undefined_mask;
1482 const int index = fcc->tgsi.outputs[slot].index;
1483
1484 vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1485 if (vrf >= 0) {
1486 const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1487 tsrc_transpose(tmp, src);
1488 }
1489 else {
1490 /* use (0, 0, 0, 0) */
1491 tsrc_transpose(tsrc_imm_f(0.0f), src);
1492 }
1493
1494 for (i = 0; i < 4; i++) {
1495 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1496
1497 if (undefined_mask & (1 << i))
1498 src[i] = tsrc_imm_f(0.0f);
1499
1500 tc_MOV(tc, dst, src[i]);
1501
1502 mrf += fcc->num_grf_per_vrf;
1503 }
1504 }
1505 else {
1506 /* use (0, 0, 0, 0) */
1507 for (i = 0; i < 4; i++) {
1508 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1509
1510 tc_MOV(tc, dst, tsrc_imm_f(0.0f));
1511 mrf += fcc->num_grf_per_vrf;
1512 }
1513 }
1514
1515 /* select BLEND_STATE[rt] */
1516 if (cbuf > 0) {
1517 struct toy_inst *inst;
1518
1519 inst = tc_MOV(tc, tdst_offset(header, 0, 2), tsrc_imm_ud(cbuf));
1520 inst->mask_ctrl = BRW_MASK_DISABLE;
1521 inst->exec_size = BRW_EXECUTE_1;
1522 inst->src[0].rect = TOY_RECT_010;
1523 }
1524
1525 if (cbuf == 0 && pos_slot >= 0) {
1526 const int index = fcc->tgsi.outputs[pos_slot].index;
1527 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1528 struct toy_src src[4];
1529 int vrf;
1530
1531 vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1532 if (vrf >= 0) {
1533 const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1534 tsrc_transpose(tmp, src);
1535 }
1536 else {
1537 /* use (0, 0, 0, 0) */
1538 tsrc_transpose(tsrc_imm_f(0.0f), src);
1539 }
1540
1541 /* only Z */
1542 tc_MOV(tc, dst, src[2]);
1543
1544 mrf += fcc->num_grf_per_vrf;
1545 }
1546
1547 msg_type = (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ?
1548 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE :
1549 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
1550
1551 ctrl = (cbuf == num_cbufs - 1) << 12 |
1552 msg_type << 8;
1553
1554 desc = tsrc_imm_mdesc_data_port(tc, cbuf == num_cbufs - 1,
1555 mrf - fcc->first_free_mrf, 0,
1556 header_present, false,
1557 GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE,
1558 ctrl, ILO_WM_DRAW_SURFACE(cbuf));
1559
1560 tc_add2(tc, TOY_OPCODE_FB_WRITE, tdst_null(),
1561 tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0), desc);
1562 }
1563 }
1564
1565 /**
1566 * Set up shader outputs for fixed-function units.
1567 */
1568 static void
1569 fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
1570 {
1571 int i;
1572
1573 sh->out.count = tgsi->num_outputs;
1574 for (i = 0; i < tgsi->num_outputs; i++) {
1575 sh->out.register_indices[i] = tgsi->outputs[i].index;
1576 sh->out.semantic_names[i] = tgsi->outputs[i].semantic_name;
1577 sh->out.semantic_indices[i] = tgsi->outputs[i].semantic_index;
1578
1579 if (tgsi->outputs[i].semantic_name == TGSI_SEMANTIC_POSITION)
1580 sh->out.has_pos = true;
1581 }
1582 }
1583
1584 /**
1585 * Set up shader inputs for fixed-function units.
1586 */
1587 static void
1588 fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
1589 bool flatshade)
1590 {
1591 int i;
1592
1593 sh->in.count = tgsi->num_inputs;
1594 for (i = 0; i < tgsi->num_inputs; i++) {
1595 sh->in.semantic_names[i] = tgsi->inputs[i].semantic_name;
1596 sh->in.semantic_indices[i] = tgsi->inputs[i].semantic_index;
1597 sh->in.interp[i] = tgsi->inputs[i].interp;
1598 sh->in.centroid[i] = tgsi->inputs[i].centroid;
1599
1600 if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1601 sh->in.has_pos = true;
1602 continue;
1603 }
1604 else if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_FACE) {
1605 continue;
1606 }
1607
1608 switch (tgsi->inputs[i].interp) {
1609 case TGSI_INTERPOLATE_CONSTANT:
1610 sh->in.const_interp_enable |= 1 << i;
1611 break;
1612 case TGSI_INTERPOLATE_LINEAR:
1613 sh->in.has_linear_interp = true;
1614
1615 if (tgsi->inputs[i].centroid) {
1616 sh->in.barycentric_interpolation_mode |=
1617 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
1618 }
1619 else {
1620 sh->in.barycentric_interpolation_mode |=
1621 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
1622 }
1623 break;
1624 case TGSI_INTERPOLATE_COLOR:
1625 if (flatshade) {
1626 sh->in.const_interp_enable |= 1 << i;
1627 break;
1628 }
1629 /* fall through */
1630 case TGSI_INTERPOLATE_PERSPECTIVE:
1631 if (tgsi->inputs[i].centroid) {
1632 sh->in.barycentric_interpolation_mode |=
1633 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
1634 }
1635 else {
1636 sh->in.barycentric_interpolation_mode |=
1637 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
1638 }
1639 break;
1640 default:
1641 break;
1642 }
1643 }
1644 }
1645
1646 static int
1647 fs_setup_payloads(struct fs_compile_context *fcc)
1648 {
1649 const struct ilo_shader *sh = fcc->shader;
1650 int grf, i;
1651
1652 grf = 0;
1653
1654 /* r0: header */
1655 grf++;
1656
1657 /* r1-r2: coordinates and etc. */
1658 grf += (fcc->dispatch_mode == GEN6_WM_32_DISPATCH_ENABLE) ? 2 : 1;
1659
1660 for (i = 0; i < Elements(fcc->payloads); i++) {
1661 int interp;
1662
1663 /* r3-r26 or r32-r55: barycentric interpolation parameters */
1664 for (interp = 0; interp < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; interp++) {
1665 if (!(sh->in.barycentric_interpolation_mode & (1 << interp)))
1666 continue;
1667
1668 fcc->payloads[i].barycentric_interps[interp] = grf;
1669 grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 2 : 4;
1670 }
1671
1672 /* r27-r28 or r56-r57: interpoloated depth */
1673 if (sh->in.has_pos) {
1674 fcc->payloads[i].source_depth = grf;
1675 grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 1 : 2;
1676 }
1677
1678 /* r29-r30 or r58-r59: interpoloated w */
1679 if (sh->in.has_pos) {
1680 fcc->payloads[i].source_w = grf;
1681 grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 1 : 2;
1682 }
1683
1684 /* r31 or r60: position offset */
1685 if (false) {
1686 fcc->payloads[i].pos_offset = grf;
1687 grf++;
1688 }
1689
1690 if (fcc->dispatch_mode != GEN6_WM_32_DISPATCH_ENABLE)
1691 break;
1692 }
1693
1694 return grf;
1695 }
1696
1697 /**
1698 * Translate the TGSI tokens.
1699 */
1700 static bool
1701 fs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
1702 struct toy_tgsi *tgsi)
1703 {
1704 if (ilo_debug & ILO_DEBUG_FS) {
1705 ilo_printf("dumping fragment shader\n");
1706 ilo_printf("\n");
1707
1708 tgsi_dump(tokens, 0);
1709 ilo_printf("\n");
1710 }
1711
1712 toy_compiler_translate_tgsi(tc, tokens, false, tgsi);
1713 if (tc->fail) {
1714 ilo_err("failed to translate FS TGSI tokens: %s\n", tc->reason);
1715 return false;
1716 }
1717
1718 if (ilo_debug & ILO_DEBUG_FS) {
1719 ilo_printf("TGSI translator:\n");
1720 toy_tgsi_dump(tgsi);
1721 ilo_printf("\n");
1722 toy_compiler_dump(tc);
1723 ilo_printf("\n");
1724 }
1725
1726 return true;
1727 }
1728
1729 /**
1730 * Set up FS compile context. This includes translating the TGSI tokens.
1731 */
1732 static bool
1733 fs_setup(struct fs_compile_context *fcc,
1734 const struct ilo_shader_state *state,
1735 const struct ilo_shader_variant *variant)
1736 {
1737 int num_consts;
1738
1739 memset(fcc, 0, sizeof(*fcc));
1740
1741 fcc->shader = CALLOC_STRUCT(ilo_shader);
1742 if (!fcc->shader)
1743 return false;
1744
1745 fcc->variant = variant;
1746
1747 toy_compiler_init(&fcc->tc, state->info.dev);
1748
1749 fcc->dispatch_mode = GEN6_WM_8_DISPATCH_ENABLE;
1750
1751 fcc->tc.templ.access_mode = BRW_ALIGN_1;
1752 if (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) {
1753 fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1H;
1754 fcc->tc.templ.exec_size = BRW_EXECUTE_16;
1755 }
1756 else {
1757 fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1Q;
1758 fcc->tc.templ.exec_size = BRW_EXECUTE_8;
1759 }
1760
1761 fcc->tc.rect_linear_width = 8;
1762
1763 /*
1764 * The classic driver uses the sampler cache (gen6) or the data cache
1765 * (gen7). Why?
1766 */
1767 fcc->const_cache = GEN6_SFID_DATAPORT_CONSTANT_CACHE;
1768
1769 if (!fs_setup_tgsi(&fcc->tc, state->info.tokens, &fcc->tgsi)) {
1770 toy_compiler_cleanup(&fcc->tc);
1771 FREE(fcc->shader);
1772 return false;
1773 }
1774
1775 fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade);
1776 fs_setup_shader_out(fcc->shader, &fcc->tgsi);
1777
1778 if (fcc->variant->use_pcb && !fcc->tgsi.const_indirect) {
1779 num_consts = (fcc->tgsi.const_count + 1) / 2;
1780
1781 /*
1782 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1783 *
1784 * "The sum of all four read length fields (each incremented to
1785 * represent the actual read length) must be less than or equal to
1786 * 64"
1787 *
1788 * Since we are usually under a high register pressure, do not allow
1789 * for more than 8.
1790 */
1791 if (num_consts > 8)
1792 num_consts = 0;
1793 }
1794 else {
1795 num_consts = 0;
1796 }
1797
1798 fcc->shader->skip_cbuf0_upload = (!fcc->tgsi.const_count || num_consts);
1799 fcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8);
1800
1801 fcc->first_const_grf = fs_setup_payloads(fcc);
1802 fcc->first_attr_grf = fcc->first_const_grf + num_consts;
1803 fcc->first_free_grf = fcc->first_attr_grf + fcc->shader->in.count * 2;
1804 fcc->last_free_grf = 127;
1805
1806 /* m0 is reserved for system routines */
1807 fcc->first_free_mrf = 1;
1808 fcc->last_free_mrf = 15;
1809
1810 /* instructions are compressed with BRW_EXECUTE_16 */
1811 fcc->num_grf_per_vrf =
1812 (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ? 2 : 1;
1813
1814 if (fcc->tc.dev->gen >= ILO_GEN(7)) {
1815 fcc->last_free_grf -= 15;
1816 fcc->first_free_mrf = fcc->last_free_grf + 1;
1817 fcc->last_free_mrf = fcc->first_free_mrf + 14;
1818 }
1819
1820 fcc->shader->in.start_grf = fcc->first_const_grf;
1821 fcc->shader->has_kill = fcc->tgsi.uses_kill;
1822 fcc->shader->dispatch_16 =
1823 (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE);
1824
1825 return true;
1826 }
1827
1828 /**
1829 * Compile the fragment shader.
1830 */
1831 struct ilo_shader *
1832 ilo_shader_compile_fs(const struct ilo_shader_state *state,
1833 const struct ilo_shader_variant *variant)
1834 {
1835 struct fs_compile_context fcc;
1836
1837 if (!fs_setup(&fcc, state, variant))
1838 return NULL;
1839
1840 fs_write_fb(&fcc);
1841
1842 if (!fs_compile(&fcc)) {
1843 FREE(fcc.shader);
1844 fcc.shader = NULL;
1845 }
1846
1847 toy_tgsi_cleanup(&fcc.tgsi);
1848 toy_compiler_cleanup(&fcc.tc);
1849
1850 return fcc.shader;
1851 }