2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_util.h"
30 #include "toy_compiler.h"
32 #include "toy_legalize.h"
33 #include "toy_optimize.h"
34 #include "toy_helpers.h"
35 #include "ilo_context.h"
36 #include "ilo_shader_internal.h"
38 struct fs_compile_context
{
39 struct ilo_shader
*shader
;
40 const struct ilo_shader_variant
*variant
;
42 struct toy_compiler tc
;
45 enum brw_message_target const_cache
;
49 int barycentric_interps
[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT
];
67 fetch_position(struct fs_compile_context
*fcc
, struct toy_dst dst
)
69 struct toy_compiler
*tc
= &fcc
->tc
;
70 const struct toy_src src_z
=
71 tsrc(TOY_FILE_GRF
, fcc
->payloads
[0].source_depth
, 0);
72 const struct toy_src src_w
=
73 tsrc(TOY_FILE_GRF
, fcc
->payloads
[0].source_w
, 0);
75 (fcc
->variant
->u
.fs
.fb_height
) ? fcc
->variant
->u
.fs
.fb_height
: 1;
76 const bool origin_upper_left
=
77 (fcc
->tgsi
.props
.fs_coord_origin
== TGSI_FS_COORD_ORIGIN_UPPER_LEFT
);
78 const bool pixel_center_integer
=
79 (fcc
->tgsi
.props
.fs_coord_pixel_center
==
80 TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
81 struct toy_src subspan_x
, subspan_y
;
82 struct toy_dst tmp
, tmp_uw
;
83 struct toy_dst real_dst
[4];
85 tdst_transpose(dst
, real_dst
);
87 subspan_x
= tsrc_uw(tsrc(TOY_FILE_GRF
, 1, 2 * 4));
88 subspan_x
= tsrc_rect(subspan_x
, TOY_RECT_240
);
90 subspan_y
= tsrc_offset(subspan_x
, 0, 1);
92 tmp_uw
= tdst_uw(tc_alloc_tmp(tc
));
93 tmp
= tc_alloc_tmp(tc
);
96 tc_ADD(tc
, tmp_uw
, subspan_x
, tsrc_imm_v(0x10101010));
97 tc_MOV(tc
, tmp
, tsrc_from(tmp_uw
));
98 if (pixel_center_integer
)
99 tc_MOV(tc
, real_dst
[0], tsrc_from(tmp
));
101 tc_ADD(tc
, real_dst
[0], tsrc_from(tmp
), tsrc_imm_f(0.5f
));
104 tc_ADD(tc
, tmp_uw
, subspan_y
, tsrc_imm_v(0x11001100));
105 tc_MOV(tc
, tmp
, tsrc_from(tmp_uw
));
106 if (origin_upper_left
&& pixel_center_integer
) {
107 tc_MOV(tc
, real_dst
[1], tsrc_from(tmp
));
110 struct toy_src y
= tsrc_from(tmp
);
113 if (!pixel_center_integer
)
116 if (!origin_upper_left
) {
117 offset
+= (float) (fb_height
- 1);
121 tc_ADD(tc
, real_dst
[1], y
, tsrc_imm_f(offset
));
125 tc_MOV(tc
, real_dst
[2], src_z
);
126 tc_INV(tc
, real_dst
[3], src_w
);
130 fetch_face(struct fs_compile_context
*fcc
, struct toy_dst dst
)
132 struct toy_compiler
*tc
= &fcc
->tc
;
133 const struct toy_src r0
= tsrc_d(tsrc(TOY_FILE_GRF
, 0, 0));
134 struct toy_dst tmp_f
, tmp
;
135 struct toy_dst real_dst
[4];
137 tdst_transpose(dst
, real_dst
);
139 tmp_f
= tc_alloc_tmp(tc
);
141 tc_SHR(tc
, tmp
, tsrc_rect(r0
, TOY_RECT_010
), tsrc_imm_d(15));
142 tc_AND(tc
, tmp
, tsrc_from(tmp
), tsrc_imm_d(1));
143 tc_MOV(tc
, tmp_f
, tsrc_from(tmp
));
145 /* convert to 1.0 and -1.0 */
146 tc_MUL(tc
, tmp_f
, tsrc_from(tmp_f
), tsrc_imm_f(-2.0f
));
147 tc_ADD(tc
, real_dst
[0], tsrc_from(tmp_f
), tsrc_imm_f(1.0f
));
149 tc_MOV(tc
, real_dst
[1], tsrc_imm_f(0.0f
));
150 tc_MOV(tc
, real_dst
[2], tsrc_imm_f(0.0f
));
151 tc_MOV(tc
, real_dst
[3], tsrc_imm_f(1.0f
));
155 fetch_attr(struct fs_compile_context
*fcc
, struct toy_dst dst
, int slot
)
157 struct toy_compiler
*tc
= &fcc
->tc
;
158 struct toy_dst real_dst
[4];
159 bool is_const
= false;
162 tdst_transpose(dst
, real_dst
);
164 grf
= fcc
->first_attr_grf
+ slot
* 2;
166 switch (fcc
->tgsi
.inputs
[slot
].interp
) {
167 case TGSI_INTERPOLATE_CONSTANT
:
170 case TGSI_INTERPOLATE_LINEAR
:
171 if (fcc
->tgsi
.inputs
[slot
].centroid
)
172 mode
= BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
;
174 mode
= BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
;
176 case TGSI_INTERPOLATE_COLOR
:
177 if (fcc
->variant
->u
.fs
.flatshade
) {
182 case TGSI_INTERPOLATE_PERSPECTIVE
:
183 if (fcc
->tgsi
.inputs
[slot
].centroid
)
184 mode
= BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC
;
186 mode
= BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
189 assert(!"unexpected FS interpolation");
190 mode
= BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
195 struct toy_src a0
[4];
197 a0
[0] = tsrc(TOY_FILE_GRF
, grf
+ 0, 3 * 4);
198 a0
[1] = tsrc(TOY_FILE_GRF
, grf
+ 0, 7 * 4);
199 a0
[2] = tsrc(TOY_FILE_GRF
, grf
+ 1, 3 * 4);
200 a0
[3] = tsrc(TOY_FILE_GRF
, grf
+ 1, 7 * 4);
202 for (ch
= 0; ch
< 4; ch
++)
203 tc_MOV(tc
, real_dst
[ch
], tsrc_rect(a0
[ch
], TOY_RECT_010
));
206 struct toy_src attr
[4], uv
;
208 attr
[0] = tsrc(TOY_FILE_GRF
, grf
+ 0, 0);
209 attr
[1] = tsrc(TOY_FILE_GRF
, grf
+ 0, 4 * 4);
210 attr
[2] = tsrc(TOY_FILE_GRF
, grf
+ 1, 0);
211 attr
[3] = tsrc(TOY_FILE_GRF
, grf
+ 1, 4 * 4);
213 uv
= tsrc(TOY_FILE_GRF
, fcc
->payloads
[0].barycentric_interps
[mode
], 0);
215 for (ch
= 0; ch
< 4; ch
++) {
216 tc_add2(tc
, BRW_OPCODE_PLN
, real_dst
[ch
],
217 tsrc_rect(attr
[ch
], TOY_RECT_010
), uv
);
221 if (fcc
->tgsi
.inputs
[slot
].semantic_name
== TGSI_SEMANTIC_FOG
) {
222 tc_MOV(tc
, real_dst
[1], tsrc_imm_f(0.0f
));
223 tc_MOV(tc
, real_dst
[2], tsrc_imm_f(0.0f
));
224 tc_MOV(tc
, real_dst
[3], tsrc_imm_f(1.0f
));
229 fs_lower_opcode_tgsi_in(struct fs_compile_context
*fcc
,
230 struct toy_dst dst
, int dim
, int idx
)
236 slot
= toy_tgsi_find_input(&fcc
->tgsi
, idx
);
240 switch (fcc
->tgsi
.inputs
[slot
].semantic_name
) {
241 case TGSI_SEMANTIC_POSITION
:
242 fetch_position(fcc
, dst
);
244 case TGSI_SEMANTIC_FACE
:
245 fetch_face(fcc
, dst
);
248 fetch_attr(fcc
, dst
, slot
);
254 fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context
*fcc
,
255 struct toy_dst dst
, int dim
,
258 const struct toy_dst offset
=
259 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0));
260 struct toy_compiler
*tc
= &fcc
->tc
;
261 unsigned simd_mode
, param_size
;
262 struct toy_inst
*inst
;
263 struct toy_src desc
, real_src
[4];
264 struct toy_dst tmp
, real_dst
[4];
267 tsrc_transpose(idx
, real_src
);
270 inst
= tc_MOV(tc
, offset
, real_src
[0]);
271 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
273 switch (inst
->exec_size
) {
275 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD8
;
279 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD16
;
283 assert(!"unsupported execution size");
284 tc_MOV(tc
, dst
, tsrc_imm_f(0.0f
));
289 desc
= tsrc_imm_mdesc_sampler(tc
, param_size
, param_size
* 4, false,
291 GEN5_SAMPLER_MESSAGE_SAMPLE_LD
,
293 ILO_WM_CONST_SURFACE(dim
));
295 tmp
= tdst(TOY_FILE_VRF
, tc_alloc_vrf(tc
, param_size
* 4), 0);
296 inst
= tc_SEND(tc
, tmp
, tsrc_from(offset
), desc
, BRW_SFID_SAMPLER
);
297 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
299 tdst_transpose(dst
, real_dst
);
300 for (i
= 0; i
< 4; i
++) {
301 const struct toy_src src
=
302 tsrc_offset(tsrc_from(tmp
), param_size
* i
, 0);
304 /* cast to type D to make sure these are raw moves */
305 tc_MOV(tc
, tdst_d(real_dst
[i
]), tsrc_d(src
));
310 fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context
*fcc
,
311 struct toy_dst dst
, int dim
, struct toy_src idx
)
313 const struct toy_dst header
=
314 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0));
315 const struct toy_dst global_offset
=
316 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 2 * 4));
317 const struct toy_src r0
= tsrc_ud(tsrc(TOY_FILE_GRF
, 0, 0));
318 struct toy_compiler
*tc
= &fcc
->tc
;
319 unsigned msg_type
, msg_ctrl
, msg_len
;
320 struct toy_inst
*inst
;
322 struct toy_dst tmp
, real_dst
[4];
325 /* set message header */
326 inst
= tc_MOV(tc
, header
, r0
);
327 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
329 /* set global offset */
330 inst
= tc_MOV(tc
, global_offset
, idx
);
331 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
332 inst
->exec_size
= BRW_EXECUTE_1
;
333 inst
->src
[0].rect
= TOY_RECT_010
;
335 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
;
336 msg_ctrl
= BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
<< 8;
339 desc
= tsrc_imm_mdesc_data_port(tc
, false, msg_len
, 1, true, false,
340 msg_type
, msg_ctrl
, ILO_WM_CONST_SURFACE(dim
));
342 tmp
= tc_alloc_tmp(tc
);
344 tc_SEND(tc
, tmp
, tsrc_from(header
), desc
, fcc
->const_cache
);
346 tdst_transpose(dst
, real_dst
);
347 for (i
= 0; i
< 4; i
++) {
348 const struct toy_src src
=
349 tsrc_offset(tsrc_rect(tsrc_from(tmp
), TOY_RECT_010
), 0, i
);
351 /* cast to type D to make sure these are raw moves */
352 tc_MOV(tc
, tdst_d(real_dst
[i
]), tsrc_d(src
));
357 fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context
*fcc
,
358 struct toy_dst dst
, int dim
, struct toy_src idx
)
360 struct toy_compiler
*tc
= &fcc
->tc
;
361 const struct toy_dst offset
=
362 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0));
364 struct toy_inst
*inst
;
365 struct toy_dst tmp
, real_dst
[4];
369 * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
370 * changed from OWord Block Read to ld to increase performance in the
371 * classic driver. Since we use the constant cache instead of the data
372 * cache, I wonder if we still want to follow the classic driver.
376 inst
= tc_MOV(tc
, offset
, tsrc_rect(idx
, TOY_RECT_010
));
377 inst
->exec_size
= BRW_EXECUTE_8
;
378 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
380 desc
= tsrc_imm_mdesc_sampler(tc
, 1, 1, false,
381 BRW_SAMPLER_SIMD_MODE_SIMD4X2
,
382 GEN5_SAMPLER_MESSAGE_SAMPLE_LD
,
384 ILO_WM_CONST_SURFACE(dim
));
386 tmp
= tc_alloc_tmp(tc
);
387 inst
= tc_SEND(tc
, tmp
, tsrc_from(offset
), desc
, BRW_SFID_SAMPLER
);
388 inst
->exec_size
= BRW_EXECUTE_8
;
389 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
391 tdst_transpose(dst
, real_dst
);
392 for (i
= 0; i
< 4; i
++) {
393 const struct toy_src src
=
394 tsrc_offset(tsrc_rect(tsrc_from(tmp
), TOY_RECT_010
), 0, i
);
396 /* cast to type D to make sure these are raw moves */
397 tc_MOV(tc
, tdst_d(real_dst
[i
]), tsrc_d(src
));
402 fs_lower_opcode_tgsi_imm(struct fs_compile_context
*fcc
,
403 struct toy_dst dst
, int idx
)
406 struct toy_dst real_dst
[4];
409 imm
= toy_tgsi_get_imm(&fcc
->tgsi
, idx
, NULL
);
411 tdst_transpose(dst
, real_dst
);
413 for (ch
= 0; ch
< 4; ch
++)
414 tc_MOV(&fcc
->tc
, tdst_ud(real_dst
[ch
]), tsrc_imm_ud(imm
[ch
]));
418 fs_lower_opcode_tgsi_sv(struct fs_compile_context
*fcc
,
419 struct toy_dst dst
, int dim
, int idx
)
421 struct toy_compiler
*tc
= &fcc
->tc
;
422 const struct toy_tgsi
*tgsi
= &fcc
->tgsi
;
427 slot
= toy_tgsi_find_system_value(tgsi
, idx
);
431 switch (tgsi
->system_values
[slot
].semantic_name
) {
432 case TGSI_SEMANTIC_PRIMID
:
433 case TGSI_SEMANTIC_INSTANCEID
:
434 case TGSI_SEMANTIC_VERTEXID
:
436 tc_fail(tc
, "unhandled system value");
437 tc_MOV(tc
, dst
, tsrc_imm_d(0));
443 fs_lower_opcode_tgsi_direct(struct fs_compile_context
*fcc
,
444 struct toy_inst
*inst
)
446 struct toy_compiler
*tc
= &fcc
->tc
;
449 assert(inst
->src
[0].file
== TOY_FILE_IMM
);
450 dim
= inst
->src
[0].val32
;
452 assert(inst
->src
[1].file
== TOY_FILE_IMM
);
453 idx
= inst
->src
[1].val32
;
455 switch (inst
->opcode
) {
456 case TOY_OPCODE_TGSI_IN
:
457 fs_lower_opcode_tgsi_in(fcc
, inst
->dst
, dim
, idx
);
459 case TOY_OPCODE_TGSI_CONST
:
460 if (tc
->dev
->gen
>= ILO_GEN(7))
461 fs_lower_opcode_tgsi_const_gen7(fcc
, inst
->dst
, dim
, inst
->src
[1]);
463 fs_lower_opcode_tgsi_const_gen6(fcc
, inst
->dst
, dim
, inst
->src
[1]);
465 case TOY_OPCODE_TGSI_SV
:
466 fs_lower_opcode_tgsi_sv(fcc
, inst
->dst
, dim
, idx
);
468 case TOY_OPCODE_TGSI_IMM
:
470 fs_lower_opcode_tgsi_imm(fcc
, inst
->dst
, idx
);
473 tc_fail(tc
, "unhandled TGSI fetch");
477 tc_discard_inst(tc
, inst
);
481 fs_lower_opcode_tgsi_indirect(struct fs_compile_context
*fcc
,
482 struct toy_inst
*inst
)
484 struct toy_compiler
*tc
= &fcc
->tc
;
485 enum tgsi_file_type file
;
487 struct toy_src indirect_dim
, indirect_idx
;
489 assert(inst
->src
[0].file
== TOY_FILE_IMM
);
490 file
= inst
->src
[0].val32
;
492 assert(inst
->src
[1].file
== TOY_FILE_IMM
);
493 dim
= inst
->src
[1].val32
;
494 indirect_dim
= inst
->src
[2];
496 assert(inst
->src
[3].file
== TOY_FILE_IMM
);
497 idx
= inst
->src
[3].val32
;
498 indirect_idx
= inst
->src
[4];
500 /* no dimension indirection */
501 assert(indirect_dim
.file
== TOY_FILE_IMM
);
502 dim
+= indirect_dim
.val32
;
504 switch (inst
->opcode
) {
505 case TOY_OPCODE_TGSI_INDIRECT_FETCH
:
506 if (file
== TGSI_FILE_CONSTANT
) {
508 struct toy_dst tmp
= tc_alloc_tmp(tc
);
510 tc_ADD(tc
, tmp
, indirect_idx
, tsrc_imm_d(idx
));
511 indirect_idx
= tsrc_from(tmp
);
514 fs_lower_opcode_tgsi_indirect_const(fcc
, inst
->dst
, dim
, indirect_idx
);
518 case TOY_OPCODE_TGSI_INDIRECT_STORE
:
520 tc_fail(tc
, "unhandled TGSI indirection");
524 tc_discard_inst(tc
, inst
);
528 * Emit instructions to move sampling parameters to the message registers.
531 fs_add_sampler_params_gen6(struct toy_compiler
*tc
, int msg_type
,
532 int base_mrf
, int param_size
,
533 struct toy_src
*coords
, int num_coords
,
534 struct toy_src bias_or_lod
, struct toy_src ref_or_si
,
535 struct toy_src
*ddx
, struct toy_src
*ddy
,
540 assert(num_coords
<= 4);
541 assert(num_derivs
<= 3 && num_derivs
<= num_coords
);
543 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
545 case GEN5_SAMPLER_MESSAGE_SAMPLE
:
546 for (i
= 0; i
< num_coords
; i
++)
547 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
548 num_params
= num_coords
;
550 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
:
551 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
:
552 for (i
= 0; i
< num_coords
; i
++)
553 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
554 tc_MOV(tc
, SAMPLER_PARAM(4), bias_or_lod
);
557 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
:
558 for (i
= 0; i
< num_coords
; i
++)
559 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
560 tc_MOV(tc
, SAMPLER_PARAM(4), ref_or_si
);
563 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
:
564 for (i
= 0; i
< num_coords
; i
++)
565 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
566 for (i
= 0; i
< num_derivs
; i
++) {
567 tc_MOV(tc
, SAMPLER_PARAM(4 + i
* 2), ddx
[i
]);
568 tc_MOV(tc
, SAMPLER_PARAM(5 + i
* 2), ddy
[i
]);
570 num_params
= 4 + num_derivs
* 2;
572 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
:
573 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
:
574 for (i
= 0; i
< num_coords
; i
++)
575 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
576 tc_MOV(tc
, SAMPLER_PARAM(4), ref_or_si
);
577 tc_MOV(tc
, SAMPLER_PARAM(5), bias_or_lod
);
580 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD
:
581 assert(num_coords
<= 3);
583 for (i
= 0; i
< num_coords
; i
++)
584 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(i
)), coords
[i
]);
585 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(3)), bias_or_lod
);
586 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(4)), ref_or_si
);
589 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
:
590 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(0)), bias_or_lod
);
594 tc_fail(tc
, "unknown sampler opcode");
600 return num_params
* param_size
;
604 fs_add_sampler_params_gen7(struct toy_compiler
*tc
, int msg_type
,
605 int base_mrf
, int param_size
,
606 struct toy_src
*coords
, int num_coords
,
607 struct toy_src bias_or_lod
, struct toy_src ref_or_si
,
608 struct toy_src
*ddx
, struct toy_src
*ddy
,
613 assert(num_coords
<= 4);
614 assert(num_derivs
<= 3 && num_derivs
<= num_coords
);
616 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
618 case GEN5_SAMPLER_MESSAGE_SAMPLE
:
619 for (i
= 0; i
< num_coords
; i
++)
620 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
621 num_params
= num_coords
;
623 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
:
624 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
:
625 tc_MOV(tc
, SAMPLER_PARAM(0), bias_or_lod
);
626 for (i
= 0; i
< num_coords
; i
++)
627 tc_MOV(tc
, SAMPLER_PARAM(1 + i
), coords
[i
]);
628 num_params
= 1 + num_coords
;
630 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
:
631 tc_MOV(tc
, SAMPLER_PARAM(0), ref_or_si
);
632 for (i
= 0; i
< num_coords
; i
++)
633 tc_MOV(tc
, SAMPLER_PARAM(1 + i
), coords
[i
]);
634 num_params
= 1 + num_coords
;
636 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
:
637 for (i
= 0; i
< num_coords
; i
++) {
638 tc_MOV(tc
, SAMPLER_PARAM(i
* 3), coords
[i
]);
639 if (i
< num_derivs
) {
640 tc_MOV(tc
, SAMPLER_PARAM(i
* 3 + 1), ddx
[i
]);
641 tc_MOV(tc
, SAMPLER_PARAM(i
* 3 + 2), ddy
[i
]);
644 num_params
= num_coords
* 3 - ((num_coords
> num_derivs
) ? 2 : 0);
646 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
:
647 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
:
648 tc_MOV(tc
, SAMPLER_PARAM(0), ref_or_si
);
649 tc_MOV(tc
, SAMPLER_PARAM(1), bias_or_lod
);
650 for (i
= 0; i
< num_coords
; i
++)
651 tc_MOV(tc
, SAMPLER_PARAM(2 + i
), coords
[i
]);
652 num_params
= 2 + num_coords
;
654 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD
:
655 assert(num_coords
>= 1 && num_coords
<= 3);
657 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(0)), coords
[0]);
658 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(1)), bias_or_lod
);
659 for (i
= 1; i
< num_coords
; i
++)
660 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(1 + i
)), coords
[i
]);
661 num_params
= 1 + num_coords
;
663 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
:
664 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(0)), bias_or_lod
);
668 tc_fail(tc
, "unknown sampler opcode");
674 return num_params
* param_size
;
678 * Set up message registers and return the message descriptor for sampling.
680 static struct toy_src
681 fs_prepare_tgsi_sampling(struct toy_compiler
*tc
, const struct toy_inst
*inst
,
682 int base_mrf
, const uint32_t *saturate_coords
,
683 unsigned *ret_sampler_index
)
685 unsigned simd_mode
, msg_type
, msg_len
, sampler_index
, binding_table_index
;
686 struct toy_src coords
[4], ddx
[4], ddy
[4], bias_or_lod
, ref_or_si
;
687 int num_coords
, ref_pos
, num_derivs
;
688 int sampler_src
, param_size
, i
;
690 switch (inst
->exec_size
) {
692 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD8
;
696 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD16
;
700 tc_fail(tc
, "unsupported execute size for sampling");
705 num_coords
= tgsi_util_get_texture_coord_dim(inst
->tex
.target
, &ref_pos
);
706 tsrc_transpose(inst
->src
[0], coords
);
707 bias_or_lod
= tsrc_null();
708 ref_or_si
= tsrc_null();
715 * src0 := (x, y, z, w)
720 * For TEX2, TXB2, and TXL2,
722 * src0 := (x, y, z, w)
723 * src1 := (v or bias or lod, ...)
726 * For TEX, TXB, TXL, and TXP,
728 * src0 := (x, y, z, w or bias or lod or projection)
742 * src0 := (x, y, z, w or lod)
745 * State trackers should not generate opcode+texture combinations with
746 * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
748 switch (inst
->opcode
) {
749 case TOY_OPCODE_TGSI_TEX
:
753 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
;
754 ref_or_si
= coords
[ref_pos
];
757 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE
;
760 case TOY_OPCODE_TGSI_TXD
:
762 tc_fail(tc
, "TXD with shadow sampler not supported");
764 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
;
765 tsrc_transpose(inst
->src
[1], ddx
);
766 tsrc_transpose(inst
->src
[2], ddy
);
767 num_derivs
= num_coords
;
770 case TOY_OPCODE_TGSI_TXP
:
774 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
;
775 ref_or_si
= coords
[ref_pos
];
778 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE
;
781 /* project the coordinates */
783 struct toy_dst tmp
[4];
785 tc_alloc_tmp4(tc
, tmp
);
787 tc_INV(tc
, tmp
[3], coords
[3]);
788 for (i
= 0; i
< num_coords
&& i
< 3; i
++) {
789 tc_MUL(tc
, tmp
[i
], coords
[i
], tsrc_from(tmp
[3]));
790 coords
[i
] = tsrc_from(tmp
[i
]);
794 tc_MUL(tc
, tmp
[ref_pos
], ref_or_si
, tsrc_from(tmp
[3]));
795 ref_or_si
= tsrc_from(tmp
[ref_pos
]);
799 case TOY_OPCODE_TGSI_TXB
:
803 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
;
804 ref_or_si
= coords
[ref_pos
];
807 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
;
810 bias_or_lod
= coords
[3];
812 case TOY_OPCODE_TGSI_TXL
:
816 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
817 ref_or_si
= coords
[ref_pos
];
820 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
823 bias_or_lod
= coords
[3];
825 case TOY_OPCODE_TGSI_TXF
:
826 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LD
;
828 switch (inst
->tex
.target
) {
829 case TGSI_TEXTURE_2D_MSAA
:
830 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
831 assert(ref_pos
>= 0 && ref_pos
< 4);
832 /* lod is always 0 */
833 bias_or_lod
= tsrc_imm_d(0);
834 ref_or_si
= coords
[ref_pos
];
837 bias_or_lod
= coords
[3];
841 /* offset the coordinates */
842 if (!tsrc_is_null(inst
->tex
.offsets
[0])) {
843 struct toy_dst tmp
[4];
844 struct toy_src offsets
[4];
846 tc_alloc_tmp4(tc
, tmp
);
847 tsrc_transpose(inst
->tex
.offsets
[0], offsets
);
849 for (i
= 0; i
< num_coords
; i
++) {
850 tc_ADD(tc
, tmp
[i
], coords
[i
], offsets
[i
]);
851 coords
[i
] = tsrc_from(tmp
[i
]);
857 case TOY_OPCODE_TGSI_TXQ
:
858 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
860 bias_or_lod
= coords
[0];
862 case TOY_OPCODE_TGSI_TXQ_LZ
:
863 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
867 case TOY_OPCODE_TGSI_TEX2
:
871 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
;
874 struct toy_src src1
[4];
875 tsrc_transpose(inst
->src
[1], src1
);
876 ref_or_si
= src1
[ref_pos
- 4];
879 ref_or_si
= coords
[ref_pos
];
883 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE
;
888 case TOY_OPCODE_TGSI_TXB2
:
892 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
;
893 ref_or_si
= coords
[ref_pos
];
896 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
;
900 struct toy_src src1
[4];
901 tsrc_transpose(inst
->src
[1], src1
);
902 bias_or_lod
= src1
[0];
907 case TOY_OPCODE_TGSI_TXL2
:
911 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
912 ref_or_si
= coords
[ref_pos
];
915 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
919 struct toy_src src1
[4];
920 tsrc_transpose(inst
->src
[1], src1
);
921 bias_or_lod
= src1
[0];
927 assert(!"unhandled sampling opcode");
932 assert(inst
->src
[sampler_src
].file
== TOY_FILE_IMM
);
933 sampler_index
= inst
->src
[sampler_src
].val32
;
934 binding_table_index
= ILO_WM_TEXTURE_SURFACE(sampler_index
);
937 * From the Sandy Bridge PRM, volume 4 part 1, page 18:
939 * "Note that the (cube map) coordinates delivered to the sampling
940 * engine must already have been divided by the component with the
941 * largest absolute value."
943 switch (inst
->tex
.target
) {
944 case TGSI_TEXTURE_CUBE
:
945 case TGSI_TEXTURE_SHADOWCUBE
:
946 case TGSI_TEXTURE_CUBE_ARRAY
:
947 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
948 /* TXQ does not need coordinates */
949 if (num_coords
>= 3) {
950 struct toy_dst tmp
[4];
952 tc_alloc_tmp4(tc
, tmp
);
954 tc_SEL(tc
, tmp
[3], tsrc_absolute(coords
[0]),
955 tsrc_absolute(coords
[1]), BRW_CONDITIONAL_GE
);
956 tc_SEL(tc
, tmp
[3], tsrc_from(tmp
[3]),
957 tsrc_absolute(coords
[2]), BRW_CONDITIONAL_GE
);
958 tc_INV(tc
, tmp
[3], tsrc_from(tmp
[3]));
960 for (i
= 0; i
< 3; i
++) {
961 tc_MUL(tc
, tmp
[i
], coords
[i
], tsrc_from(tmp
[3]));
962 coords
[i
] = tsrc_from(tmp
[i
]);
969 * Saturate (s, t, r). saturate_coords is set for sampler and coordinate
970 * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively. It is
971 * so that sampling outside the border gets the correct colors.
973 for (i
= 0; i
< MIN2(num_coords
, 3); i
++) {
976 if (!(saturate_coords
[i
] & (1 << sampler_index
)))
979 switch (inst
->tex
.target
) {
980 case TGSI_TEXTURE_RECT
:
981 case TGSI_TEXTURE_SHADOWRECT
:
990 struct toy_src min
, max
;
993 tc_fail(tc
, "GL_CLAMP with rectangle texture unsupported");
994 tmp
= tc_alloc_tmp(tc
);
996 /* saturate to [0, width] or [0, height] */
998 min
= tsrc_imm_f(0.0f
);
999 max
= tsrc_imm_f(2048.0f
);
1001 tc_SEL(tc
, tmp
, coords
[i
], min
, BRW_CONDITIONAL_G
);
1002 tc_SEL(tc
, tmp
, tsrc_from(tmp
), max
, BRW_CONDITIONAL_L
);
1004 coords
[i
] = tsrc_from(tmp
);
1008 struct toy_inst
*inst2
;
1010 tmp
= tc_alloc_tmp(tc
);
1012 /* saturate to [0.0f, 1.0f] */
1013 inst2
= tc_MOV(tc
, tmp
, coords
[i
]);
1014 inst2
->saturate
= true;
1016 coords
[i
] = tsrc_from(tmp
);
1020 /* set up sampler parameters */
1021 if (tc
->dev
->gen
>= ILO_GEN(7)) {
1022 msg_len
= fs_add_sampler_params_gen7(tc
, msg_type
, base_mrf
, param_size
,
1023 coords
, num_coords
, bias_or_lod
, ref_or_si
, ddx
, ddy
, num_derivs
);
1026 msg_len
= fs_add_sampler_params_gen6(tc
, msg_type
, base_mrf
, param_size
,
1027 coords
, num_coords
, bias_or_lod
, ref_or_si
, ddx
, ddy
, num_derivs
);
1031 * From the Sandy Bridge PRM, volume 4 part 1, page 136:
1033 * "The maximum message length allowed to the sampler is 11. This would
1034 * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
1038 tc_fail(tc
, "maximum length for messages to the sampler is 11");
1040 if (ret_sampler_index
)
1041 *ret_sampler_index
= sampler_index
;
1043 return tsrc_imm_mdesc_sampler(tc
, msg_len
, 4 * param_size
,
1044 false, simd_mode
, msg_type
, sampler_index
, binding_table_index
);
1048 fs_lower_opcode_tgsi_sampling(struct fs_compile_context
*fcc
,
1049 struct toy_inst
*inst
)
1051 struct toy_compiler
*tc
= &fcc
->tc
;
1052 struct toy_dst dst
[4], tmp
[4];
1053 struct toy_src desc
;
1054 unsigned sampler_index
;
1058 desc
= fs_prepare_tgsi_sampling(tc
, inst
,
1059 fcc
->first_free_mrf
,
1060 fcc
->variant
->saturate_tex_coords
,
1063 switch (inst
->opcode
) {
1064 case TOY_OPCODE_TGSI_TXF
:
1065 case TOY_OPCODE_TGSI_TXQ
:
1066 case TOY_OPCODE_TGSI_TXQ_LZ
:
1067 need_filter
= false;
1074 toy_compiler_lower_to_send(tc
, inst
, false, BRW_SFID_SAMPLER
);
1075 inst
->src
[0] = tsrc(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0);
1076 inst
->src
[1] = desc
;
1077 for (i
= 2; i
< Elements(inst
->src
); i
++)
1078 inst
->src
[i
] = tsrc_null();
1080 /* write to temps first */
1081 tc_alloc_tmp4(tc
, tmp
);
1082 for (i
= 0; i
< 4; i
++)
1083 tmp
[i
].type
= inst
->dst
.type
;
1084 tdst_transpose(inst
->dst
, dst
);
1087 tc_move_inst(tc
, inst
);
1090 assert(sampler_index
< fcc
->variant
->num_sampler_views
);
1091 swizzles
[0] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].r
;
1092 swizzles
[1] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].g
;
1093 swizzles
[2] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].b
;
1094 swizzles
[3] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].a
;
1097 swizzles
[0] = PIPE_SWIZZLE_RED
;
1098 swizzles
[1] = PIPE_SWIZZLE_GREEN
;
1099 swizzles
[2] = PIPE_SWIZZLE_BLUE
;
1100 swizzles
[3] = PIPE_SWIZZLE_ALPHA
;
1103 /* swizzle the results */
1104 for (i
= 0; i
< 4; i
++) {
1105 switch (swizzles
[i
]) {
1106 case PIPE_SWIZZLE_ZERO
:
1107 tc_MOV(tc
, dst
[i
], tsrc_imm_f(0.0f
));
1109 case PIPE_SWIZZLE_ONE
:
1110 tc_MOV(tc
, dst
[i
], tsrc_imm_f(1.0f
));
1113 tc_MOV(tc
, dst
[i
], tsrc_from(tmp
[swizzles
[i
]]));
1120 fs_lower_opcode_derivative(struct toy_compiler
*tc
, struct toy_inst
*inst
)
1122 struct toy_dst dst
[4];
1123 struct toy_src src
[4];
1126 tdst_transpose(inst
->dst
, dst
);
1127 tsrc_transpose(inst
->src
[0], src
);
1130 * Every four fragments are from a 2x2 subspan, with
1132 * fragment 1 on the top-left,
1133 * fragment 2 on the top-right,
1134 * fragment 3 on the bottom-left,
1135 * fragment 4 on the bottom-right.
1137 * DDX should thus produce
1139 * dst = src.yyww - src.xxzz
1141 * and DDY should produce
1143 * dst = src.zzww - src.xxyy
1145 * But since we are in BRW_ALIGN_1, swizzling does not work and we have to
1146 * play with the region parameters.
1148 if (inst
->opcode
== TOY_OPCODE_DDX
) {
1149 for (i
= 0; i
< 4; i
++) {
1150 struct toy_src left
, right
;
1152 left
= tsrc_rect(src
[i
], TOY_RECT_220
);
1153 right
= tsrc_offset(left
, 0, 1);
1155 tc_ADD(tc
, dst
[i
], right
, tsrc_negate(left
));
1159 for (i
= 0; i
< 4; i
++) {
1160 struct toy_src top
, bottom
;
1162 /* approximate with dst = src.zzzz - src.xxxx */
1163 top
= tsrc_rect(src
[i
], TOY_RECT_440
);
1164 bottom
= tsrc_offset(top
, 0, 2);
1166 tc_ADD(tc
, dst
[i
], bottom
, tsrc_negate(top
));
1170 tc_discard_inst(tc
, inst
);
1174 fs_lower_opcode_fb_write(struct toy_compiler
*tc
, struct toy_inst
*inst
)
1176 /* fs_write_fb() has set up the message registers */
1177 toy_compiler_lower_to_send(tc
, inst
, true,
1178 GEN6_SFID_DATAPORT_RENDER_CACHE
);
1182 fs_lower_opcode_kil(struct toy_compiler
*tc
, struct toy_inst
*inst
)
1184 struct toy_dst pixel_mask_dst
;
1185 struct toy_src f0
, pixel_mask
;
1186 struct toy_inst
*tmp
;
1188 /* lower half of r1.7:ud */
1189 pixel_mask_dst
= tdst_uw(tdst(TOY_FILE_GRF
, 1, 7 * 4));
1190 pixel_mask
= tsrc_rect(tsrc_from(pixel_mask_dst
), TOY_RECT_010
);
1192 f0
= tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF
, BRW_ARF_FLAG
, 0)), TOY_RECT_010
);
1194 /* KILL or KILL_IF */
1195 if (tsrc_is_null(inst
->src
[0])) {
1196 struct toy_src dummy
= tsrc_uw(tsrc(TOY_FILE_GRF
, 0, 0));
1197 struct toy_dst f0_dst
= tdst_uw(tdst(TOY_FILE_ARF
, BRW_ARF_FLAG
, 0));
1199 /* create a mask that masks out all pixels */
1200 tmp
= tc_MOV(tc
, f0_dst
, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010
));
1201 tmp
->exec_size
= BRW_EXECUTE_1
;
1202 tmp
->mask_ctrl
= BRW_MASK_DISABLE
;
1204 tc_CMP(tc
, tdst_null(), dummy
, dummy
, BRW_CONDITIONAL_NEQ
);
1206 /* swapping the two src operands breaks glBitmap()!? */
1207 tmp
= tc_AND(tc
, pixel_mask_dst
, f0
, pixel_mask
);
1208 tmp
->exec_size
= BRW_EXECUTE_1
;
1209 tmp
->mask_ctrl
= BRW_MASK_DISABLE
;
1212 struct toy_src src
[4];
1215 tsrc_transpose(inst
->src
[0], src
);
1216 /* mask out killed pixels */
1217 for (i
= 0; i
< 4; i
++) {
1218 tc_CMP(tc
, tdst_null(), src
[i
], tsrc_imm_f(0.0f
),
1219 BRW_CONDITIONAL_GE
);
1221 /* swapping the two src operands breaks glBitmap()!? */
1222 tmp
= tc_AND(tc
, pixel_mask_dst
, f0
, pixel_mask
);
1223 tmp
->exec_size
= BRW_EXECUTE_1
;
1224 tmp
->mask_ctrl
= BRW_MASK_DISABLE
;
1228 tc_discard_inst(tc
, inst
);
1232 fs_lower_virtual_opcodes(struct fs_compile_context
*fcc
)
1234 struct toy_compiler
*tc
= &fcc
->tc
;
1235 struct toy_inst
*inst
;
1237 /* lower TGSI's first, as they might be lowered to other virtual opcodes */
1239 while ((inst
= tc_next(tc
)) != NULL
) {
1240 switch (inst
->opcode
) {
1241 case TOY_OPCODE_TGSI_IN
:
1242 case TOY_OPCODE_TGSI_CONST
:
1243 case TOY_OPCODE_TGSI_SV
:
1244 case TOY_OPCODE_TGSI_IMM
:
1245 fs_lower_opcode_tgsi_direct(fcc
, inst
);
1247 case TOY_OPCODE_TGSI_INDIRECT_FETCH
:
1248 case TOY_OPCODE_TGSI_INDIRECT_STORE
:
1249 fs_lower_opcode_tgsi_indirect(fcc
, inst
);
1251 case TOY_OPCODE_TGSI_TEX
:
1252 case TOY_OPCODE_TGSI_TXB
:
1253 case TOY_OPCODE_TGSI_TXD
:
1254 case TOY_OPCODE_TGSI_TXL
:
1255 case TOY_OPCODE_TGSI_TXP
:
1256 case TOY_OPCODE_TGSI_TXF
:
1257 case TOY_OPCODE_TGSI_TXQ
:
1258 case TOY_OPCODE_TGSI_TXQ_LZ
:
1259 case TOY_OPCODE_TGSI_TEX2
:
1260 case TOY_OPCODE_TGSI_TXB2
:
1261 case TOY_OPCODE_TGSI_TXL2
:
1262 case TOY_OPCODE_TGSI_SAMPLE
:
1263 case TOY_OPCODE_TGSI_SAMPLE_I
:
1264 case TOY_OPCODE_TGSI_SAMPLE_I_MS
:
1265 case TOY_OPCODE_TGSI_SAMPLE_B
:
1266 case TOY_OPCODE_TGSI_SAMPLE_C
:
1267 case TOY_OPCODE_TGSI_SAMPLE_C_LZ
:
1268 case TOY_OPCODE_TGSI_SAMPLE_D
:
1269 case TOY_OPCODE_TGSI_SAMPLE_L
:
1270 case TOY_OPCODE_TGSI_GATHER4
:
1271 case TOY_OPCODE_TGSI_SVIEWINFO
:
1272 case TOY_OPCODE_TGSI_SAMPLE_POS
:
1273 case TOY_OPCODE_TGSI_SAMPLE_INFO
:
1274 fs_lower_opcode_tgsi_sampling(fcc
, inst
);
1280 while ((inst
= tc_next(tc
)) != NULL
) {
1281 switch (inst
->opcode
) {
1282 case TOY_OPCODE_INV
:
1283 case TOY_OPCODE_LOG
:
1284 case TOY_OPCODE_EXP
:
1285 case TOY_OPCODE_SQRT
:
1286 case TOY_OPCODE_RSQ
:
1287 case TOY_OPCODE_SIN
:
1288 case TOY_OPCODE_COS
:
1289 case TOY_OPCODE_FDIV
:
1290 case TOY_OPCODE_POW
:
1291 case TOY_OPCODE_INT_DIV_QUOTIENT
:
1292 case TOY_OPCODE_INT_DIV_REMAINDER
:
1293 toy_compiler_lower_math(tc
, inst
);
1295 case TOY_OPCODE_DDX
:
1296 case TOY_OPCODE_DDY
:
1297 fs_lower_opcode_derivative(tc
, inst
);
1299 case TOY_OPCODE_FB_WRITE
:
1300 fs_lower_opcode_fb_write(tc
, inst
);
1302 case TOY_OPCODE_KIL
:
1303 fs_lower_opcode_kil(tc
, inst
);
1306 if (inst
->opcode
> 127)
1307 tc_fail(tc
, "unhandled virtual opcode");
1314 * Compile the shader.
1317 fs_compile(struct fs_compile_context
*fcc
)
1319 struct toy_compiler
*tc
= &fcc
->tc
;
1320 struct ilo_shader
*sh
= fcc
->shader
;
1322 fs_lower_virtual_opcodes(fcc
);
1323 toy_compiler_legalize_for_ra(tc
);
1324 toy_compiler_optimize(tc
);
1325 toy_compiler_allocate_registers(tc
,
1326 fcc
->first_free_grf
,
1328 fcc
->num_grf_per_vrf
);
1329 toy_compiler_legalize_for_asm(tc
);
1332 ilo_err("failed to legalize FS instructions: %s\n", tc
->reason
);
1336 if (ilo_debug
& ILO_DEBUG_FS
) {
1337 ilo_printf("legalized instructions:\n");
1338 toy_compiler_dump(tc
);
1343 sh
->kernel
= toy_compiler_assemble(tc
, &sh
->kernel_size
);
1346 static const uint32_t microcode
[] = {
1347 /* fill in the microcode here */
1350 const bool swap
= true;
1352 sh
->kernel_size
= sizeof(microcode
);
1353 sh
->kernel
= MALLOC(sh
->kernel_size
);
1356 const int num_dwords
= sizeof(microcode
) / 4;
1357 const uint32_t *src
= microcode
;
1358 uint32_t *dst
= (uint32_t *) sh
->kernel
;
1361 for (i
= 0; i
< num_dwords
; i
+= 4) {
1363 dst
[i
+ 0] = src
[i
+ 3];
1364 dst
[i
+ 1] = src
[i
+ 2];
1365 dst
[i
+ 2] = src
[i
+ 1];
1366 dst
[i
+ 3] = src
[i
+ 0];
1369 memcpy(dst
, src
, 16);
1376 ilo_err("failed to compile FS: %s\n", tc
->reason
);
1380 if (ilo_debug
& ILO_DEBUG_FS
) {
1381 ilo_printf("disassembly:\n");
1382 toy_compiler_disassemble(tc
, sh
->kernel
, sh
->kernel_size
);
1390 * Emit instructions to write the color buffers (and the depth buffer).
1393 fs_write_fb(struct fs_compile_context
*fcc
)
1395 struct toy_compiler
*tc
= &fcc
->tc
;
1396 int base_mrf
= fcc
->first_free_mrf
;
1397 const struct toy_dst header
= tdst_ud(tdst(TOY_FILE_MRF
, base_mrf
, 0));
1398 bool header_present
= false;
1399 struct toy_src desc
;
1400 unsigned msg_type
, ctrl
;
1401 int color_slots
[ILO_MAX_DRAW_BUFFERS
], num_cbufs
;
1402 int pos_slot
= -1, cbuf
, i
;
1404 for (i
= 0; i
< Elements(color_slots
); i
++)
1405 color_slots
[i
] = -1;
1407 for (i
= 0; i
< fcc
->tgsi
.num_outputs
; i
++) {
1408 if (fcc
->tgsi
.outputs
[i
].semantic_name
== TGSI_SEMANTIC_COLOR
) {
1409 assert(fcc
->tgsi
.outputs
[i
].semantic_index
< Elements(color_slots
));
1410 color_slots
[fcc
->tgsi
.outputs
[i
].semantic_index
] = i
;
1412 else if (fcc
->tgsi
.outputs
[i
].semantic_name
== TGSI_SEMANTIC_POSITION
) {
1417 num_cbufs
= fcc
->variant
->u
.fs
.num_cbufs
;
1418 /* still need to send EOT (and probably depth) */
1422 /* we need the header to specify the pixel mask or render target */
1423 if (fcc
->tgsi
.uses_kill
|| num_cbufs
> 1) {
1424 const struct toy_src r0
= tsrc_ud(tsrc(TOY_FILE_GRF
, 0, 0));
1425 struct toy_inst
*inst
;
1427 inst
= tc_MOV(tc
, header
, r0
);
1428 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
1429 base_mrf
+= fcc
->num_grf_per_vrf
;
1431 /* this is a two-register header */
1432 if (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) {
1433 inst
= tc_MOV(tc
, tdst_offset(header
, 1, 0), tsrc_offset(r0
, 1, 0));
1434 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
1435 base_mrf
+= fcc
->num_grf_per_vrf
;
1438 header_present
= true;
1441 for (cbuf
= 0; cbuf
< num_cbufs
; cbuf
++) {
1443 color_slots
[(fcc
->tgsi
.props
.fs_color0_writes_all_cbufs
) ? 0 : cbuf
];
1444 int mrf
= base_mrf
, vrf
;
1445 struct toy_src src
[4];
1448 const unsigned undefined_mask
=
1449 fcc
->tgsi
.outputs
[slot
].undefined_mask
;
1450 const int index
= fcc
->tgsi
.outputs
[slot
].index
;
1452 vrf
= toy_tgsi_get_vrf(&fcc
->tgsi
, TGSI_FILE_OUTPUT
, 0, index
);
1454 const struct toy_src tmp
= tsrc(TOY_FILE_VRF
, vrf
, 0);
1455 tsrc_transpose(tmp
, src
);
1458 /* use (0, 0, 0, 0) */
1459 tsrc_transpose(tsrc_imm_f(0.0f
), src
);
1462 for (i
= 0; i
< 4; i
++) {
1463 const struct toy_dst dst
= tdst(TOY_FILE_MRF
, mrf
, 0);
1465 if (undefined_mask
& (1 << i
))
1466 src
[i
] = tsrc_imm_f(0.0f
);
1468 tc_MOV(tc
, dst
, src
[i
]);
1470 mrf
+= fcc
->num_grf_per_vrf
;
1474 /* use (0, 0, 0, 0) */
1475 for (i
= 0; i
< 4; i
++) {
1476 const struct toy_dst dst
= tdst(TOY_FILE_MRF
, mrf
, 0);
1478 tc_MOV(tc
, dst
, tsrc_imm_f(0.0f
));
1479 mrf
+= fcc
->num_grf_per_vrf
;
1483 /* select BLEND_STATE[rt] */
1485 struct toy_inst
*inst
;
1487 inst
= tc_MOV(tc
, tdst_offset(header
, 0, 2), tsrc_imm_ud(cbuf
));
1488 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
1489 inst
->exec_size
= BRW_EXECUTE_1
;
1490 inst
->src
[0].rect
= TOY_RECT_010
;
1493 if (cbuf
== 0 && pos_slot
>= 0) {
1494 const int index
= fcc
->tgsi
.outputs
[pos_slot
].index
;
1495 const struct toy_dst dst
= tdst(TOY_FILE_MRF
, mrf
, 0);
1496 struct toy_src src
[4];
1499 vrf
= toy_tgsi_get_vrf(&fcc
->tgsi
, TGSI_FILE_OUTPUT
, 0, index
);
1501 const struct toy_src tmp
= tsrc(TOY_FILE_VRF
, vrf
, 0);
1502 tsrc_transpose(tmp
, src
);
1505 /* use (0, 0, 0, 0) */
1506 tsrc_transpose(tsrc_imm_f(0.0f
), src
);
1510 tc_MOV(tc
, dst
, src
[2]);
1512 mrf
+= fcc
->num_grf_per_vrf
;
1515 msg_type
= (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
) ?
1516 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
:
1517 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
1519 ctrl
= (cbuf
== num_cbufs
- 1) << 12 |
1522 desc
= tsrc_imm_mdesc_data_port(tc
, cbuf
== num_cbufs
- 1,
1523 mrf
- fcc
->first_free_mrf
, 0,
1524 header_present
, false,
1525 GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
,
1526 ctrl
, ILO_WM_DRAW_SURFACE(cbuf
));
1528 tc_add2(tc
, TOY_OPCODE_FB_WRITE
, tdst_null(),
1529 tsrc(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0), desc
);
1534 * Set up shader outputs for fixed-function units.
1537 fs_setup_shader_out(struct ilo_shader
*sh
, const struct toy_tgsi
*tgsi
)
1541 sh
->out
.count
= tgsi
->num_outputs
;
1542 for (i
= 0; i
< tgsi
->num_outputs
; i
++) {
1543 sh
->out
.register_indices
[i
] = tgsi
->outputs
[i
].index
;
1544 sh
->out
.semantic_names
[i
] = tgsi
->outputs
[i
].semantic_name
;
1545 sh
->out
.semantic_indices
[i
] = tgsi
->outputs
[i
].semantic_index
;
1547 if (tgsi
->outputs
[i
].semantic_name
== TGSI_SEMANTIC_POSITION
)
1548 sh
->out
.has_pos
= true;
1553 * Set up shader inputs for fixed-function units.
1556 fs_setup_shader_in(struct ilo_shader
*sh
, const struct toy_tgsi
*tgsi
,
1561 sh
->in
.count
= tgsi
->num_inputs
;
1562 for (i
= 0; i
< tgsi
->num_inputs
; i
++) {
1563 sh
->in
.semantic_names
[i
] = tgsi
->inputs
[i
].semantic_name
;
1564 sh
->in
.semantic_indices
[i
] = tgsi
->inputs
[i
].semantic_index
;
1565 sh
->in
.interp
[i
] = tgsi
->inputs
[i
].interp
;
1566 sh
->in
.centroid
[i
] = tgsi
->inputs
[i
].centroid
;
1568 if (tgsi
->inputs
[i
].semantic_name
== TGSI_SEMANTIC_POSITION
) {
1569 sh
->in
.has_pos
= true;
1572 else if (tgsi
->inputs
[i
].semantic_name
== TGSI_SEMANTIC_FACE
) {
1576 switch (tgsi
->inputs
[i
].interp
) {
1577 case TGSI_INTERPOLATE_CONSTANT
:
1578 sh
->in
.const_interp_enable
|= 1 << i
;
1580 case TGSI_INTERPOLATE_LINEAR
:
1581 sh
->in
.has_linear_interp
= true;
1583 if (tgsi
->inputs
[i
].centroid
) {
1584 sh
->in
.barycentric_interpolation_mode
|=
1585 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
;
1588 sh
->in
.barycentric_interpolation_mode
|=
1589 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
;
1592 case TGSI_INTERPOLATE_COLOR
:
1594 sh
->in
.const_interp_enable
|= 1 << i
;
1598 case TGSI_INTERPOLATE_PERSPECTIVE
:
1599 if (tgsi
->inputs
[i
].centroid
) {
1600 sh
->in
.barycentric_interpolation_mode
|=
1601 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC
;
1604 sh
->in
.barycentric_interpolation_mode
|=
1605 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
1615 fs_setup_payloads(struct fs_compile_context
*fcc
)
1617 const struct ilo_shader
*sh
= fcc
->shader
;
1625 /* r1-r2: coordinates and etc. */
1626 grf
+= (fcc
->dispatch_mode
== GEN6_WM_32_DISPATCH_ENABLE
) ? 2 : 1;
1628 for (i
= 0; i
< Elements(fcc
->payloads
); i
++) {
1631 /* r3-r26 or r32-r55: barycentric interpolation parameters */
1632 for (interp
= 0; interp
< BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT
; interp
++) {
1633 if (!(sh
->in
.barycentric_interpolation_mode
& (1 << interp
)))
1636 fcc
->payloads
[i
].barycentric_interps
[interp
] = grf
;
1637 grf
+= (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) ? 2 : 4;
1640 /* r27-r28 or r56-r57: interpoloated depth */
1641 if (sh
->in
.has_pos
) {
1642 fcc
->payloads
[i
].source_depth
= grf
;
1643 grf
+= (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) ? 1 : 2;
1646 /* r29-r30 or r58-r59: interpoloated w */
1647 if (sh
->in
.has_pos
) {
1648 fcc
->payloads
[i
].source_w
= grf
;
1649 grf
+= (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) ? 1 : 2;
1652 /* r31 or r60: position offset */
1654 fcc
->payloads
[i
].pos_offset
= grf
;
1658 if (fcc
->dispatch_mode
!= GEN6_WM_32_DISPATCH_ENABLE
)
1666 * Translate the TGSI tokens.
1669 fs_setup_tgsi(struct toy_compiler
*tc
, const struct tgsi_token
*tokens
,
1670 struct toy_tgsi
*tgsi
)
1672 if (ilo_debug
& ILO_DEBUG_FS
) {
1673 ilo_printf("dumping fragment shader\n");
1676 tgsi_dump(tokens
, 0);
1680 toy_compiler_translate_tgsi(tc
, tokens
, false, tgsi
);
1682 ilo_err("failed to translate FS TGSI tokens: %s\n", tc
->reason
);
1686 if (ilo_debug
& ILO_DEBUG_FS
) {
1687 ilo_printf("TGSI translator:\n");
1688 toy_tgsi_dump(tgsi
);
1690 toy_compiler_dump(tc
);
1698 * Set up FS compile context. This includes translating the TGSI tokens.
1701 fs_setup(struct fs_compile_context
*fcc
,
1702 const struct ilo_shader_state
*state
,
1703 const struct ilo_shader_variant
*variant
)
1707 memset(fcc
, 0, sizeof(*fcc
));
1709 fcc
->shader
= CALLOC_STRUCT(ilo_shader
);
1713 fcc
->variant
= variant
;
1715 toy_compiler_init(&fcc
->tc
, state
->info
.dev
);
1717 fcc
->dispatch_mode
= GEN6_WM_8_DISPATCH_ENABLE
;
1719 fcc
->tc
.templ
.access_mode
= BRW_ALIGN_1
;
1720 if (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
) {
1721 fcc
->tc
.templ
.qtr_ctrl
= GEN6_COMPRESSION_1H
;
1722 fcc
->tc
.templ
.exec_size
= BRW_EXECUTE_16
;
1725 fcc
->tc
.templ
.qtr_ctrl
= GEN6_COMPRESSION_1Q
;
1726 fcc
->tc
.templ
.exec_size
= BRW_EXECUTE_8
;
1729 fcc
->tc
.rect_linear_width
= 8;
1732 * The classic driver uses the sampler cache (gen6) or the data cache
1735 fcc
->const_cache
= GEN6_SFID_DATAPORT_CONSTANT_CACHE
;
1737 if (!fs_setup_tgsi(&fcc
->tc
, state
->info
.tokens
, &fcc
->tgsi
)) {
1738 toy_compiler_cleanup(&fcc
->tc
);
1743 fs_setup_shader_in(fcc
->shader
, &fcc
->tgsi
, fcc
->variant
->u
.fs
.flatshade
);
1744 fs_setup_shader_out(fcc
->shader
, &fcc
->tgsi
);
1746 /* we do not make use of push constant buffers yet */
1749 fcc
->first_const_grf
= fs_setup_payloads(fcc
);
1750 fcc
->first_attr_grf
= fcc
->first_const_grf
+ num_consts
;
1751 fcc
->first_free_grf
= fcc
->first_attr_grf
+ fcc
->shader
->in
.count
* 2;
1752 fcc
->last_free_grf
= 127;
1754 /* m0 is reserved for system routines */
1755 fcc
->first_free_mrf
= 1;
1756 fcc
->last_free_mrf
= 15;
1758 /* instructions are compressed with BRW_EXECUTE_16 */
1759 fcc
->num_grf_per_vrf
=
1760 (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
) ? 2 : 1;
1762 if (fcc
->tc
.dev
->gen
>= ILO_GEN(7)) {
1763 fcc
->last_free_grf
-= 15;
1764 fcc
->first_free_mrf
= fcc
->last_free_grf
+ 1;
1765 fcc
->last_free_mrf
= fcc
->first_free_mrf
+ 14;
1768 fcc
->shader
->in
.start_grf
= fcc
->first_const_grf
;
1769 fcc
->shader
->has_kill
= fcc
->tgsi
.uses_kill
;
1770 fcc
->shader
->dispatch_16
=
1771 (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
);
1777 * Compile the fragment shader.
1780 ilo_shader_compile_fs(const struct ilo_shader_state
*state
,
1781 const struct ilo_shader_variant
*variant
)
1783 struct fs_compile_context fcc
;
1785 if (!fs_setup(&fcc
, state
, variant
))
1790 if (!fs_compile(&fcc
)) {
1795 toy_tgsi_cleanup(&fcc
.tgsi
);
1796 toy_compiler_cleanup(&fcc
.tc
);