2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_util.h"
30 #include "toy_compiler.h"
32 #include "toy_legalize.h"
33 #include "toy_optimize.h"
34 #include "toy_helpers.h"
35 #include "ilo_context.h"
36 #include "ilo_shader_internal.h"
38 struct fs_compile_context
{
39 struct ilo_shader
*shader
;
40 const struct ilo_shader_variant
*variant
;
42 struct toy_compiler tc
;
45 enum brw_message_target const_cache
;
49 int barycentric_interps
[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT
];
67 fetch_position(struct fs_compile_context
*fcc
, struct toy_dst dst
)
69 struct toy_compiler
*tc
= &fcc
->tc
;
70 const struct toy_src src_z
=
71 tsrc(TOY_FILE_GRF
, fcc
->payloads
[0].source_depth
, 0);
72 const struct toy_src src_w
=
73 tsrc(TOY_FILE_GRF
, fcc
->payloads
[0].source_w
, 0);
75 (fcc
->variant
->u
.fs
.fb_height
) ? fcc
->variant
->u
.fs
.fb_height
: 1;
76 const bool origin_upper_left
=
77 (fcc
->tgsi
.props
.fs_coord_origin
== TGSI_FS_COORD_ORIGIN_UPPER_LEFT
);
78 const bool pixel_center_integer
=
79 (fcc
->tgsi
.props
.fs_coord_pixel_center
==
80 TGSI_FS_COORD_PIXEL_CENTER_INTEGER
);
81 struct toy_src subspan_x
, subspan_y
;
82 struct toy_dst tmp
, tmp_uw
;
83 struct toy_dst real_dst
[4];
85 tdst_transpose(dst
, real_dst
);
87 subspan_x
= tsrc_uw(tsrc(TOY_FILE_GRF
, 1, 2 * 4));
88 subspan_x
= tsrc_rect(subspan_x
, TOY_RECT_240
);
90 subspan_y
= tsrc_offset(subspan_x
, 0, 1);
92 tmp_uw
= tdst_uw(tc_alloc_tmp(tc
));
93 tmp
= tc_alloc_tmp(tc
);
96 tc_ADD(tc
, tmp_uw
, subspan_x
, tsrc_imm_v(0x10101010));
97 tc_MOV(tc
, tmp
, tsrc_from(tmp_uw
));
98 if (pixel_center_integer
)
99 tc_MOV(tc
, real_dst
[0], tsrc_from(tmp
));
101 tc_ADD(tc
, real_dst
[0], tsrc_from(tmp
), tsrc_imm_f(0.5f
));
104 tc_ADD(tc
, tmp_uw
, subspan_y
, tsrc_imm_v(0x11001100));
105 tc_MOV(tc
, tmp
, tsrc_from(tmp_uw
));
106 if (origin_upper_left
&& pixel_center_integer
) {
107 tc_MOV(tc
, real_dst
[1], tsrc_from(tmp
));
110 struct toy_src y
= tsrc_from(tmp
);
113 if (!pixel_center_integer
)
116 if (!origin_upper_left
) {
117 offset
+= (float) (fb_height
- 1);
121 tc_ADD(tc
, real_dst
[1], y
, tsrc_imm_f(offset
));
125 tc_MOV(tc
, real_dst
[2], src_z
);
126 tc_INV(tc
, real_dst
[3], src_w
);
130 fetch_face(struct fs_compile_context
*fcc
, struct toy_dst dst
)
132 struct toy_compiler
*tc
= &fcc
->tc
;
133 const struct toy_src r0
= tsrc_d(tsrc(TOY_FILE_GRF
, 0, 0));
134 struct toy_dst tmp_f
, tmp
;
135 struct toy_dst real_dst
[4];
137 tdst_transpose(dst
, real_dst
);
139 tmp_f
= tc_alloc_tmp(tc
);
141 tc_SHR(tc
, tmp
, tsrc_rect(r0
, TOY_RECT_010
), tsrc_imm_d(15));
142 tc_AND(tc
, tmp
, tsrc_from(tmp
), tsrc_imm_d(1));
143 tc_MOV(tc
, tmp_f
, tsrc_from(tmp
));
145 /* convert to 1.0 and -1.0 */
146 tc_MUL(tc
, tmp_f
, tsrc_from(tmp_f
), tsrc_imm_f(-2.0f
));
147 tc_ADD(tc
, real_dst
[0], tsrc_from(tmp_f
), tsrc_imm_f(1.0f
));
149 tc_MOV(tc
, real_dst
[1], tsrc_imm_f(0.0f
));
150 tc_MOV(tc
, real_dst
[2], tsrc_imm_f(0.0f
));
151 tc_MOV(tc
, real_dst
[3], tsrc_imm_f(1.0f
));
155 fetch_attr(struct fs_compile_context
*fcc
, struct toy_dst dst
, int slot
)
157 struct toy_compiler
*tc
= &fcc
->tc
;
158 struct toy_dst real_dst
[4];
159 bool is_const
= false;
162 tdst_transpose(dst
, real_dst
);
164 grf
= fcc
->first_attr_grf
+ slot
* 2;
166 switch (fcc
->tgsi
.inputs
[slot
].interp
) {
167 case TGSI_INTERPOLATE_CONSTANT
:
170 case TGSI_INTERPOLATE_LINEAR
:
171 if (fcc
->tgsi
.inputs
[slot
].centroid
)
172 mode
= BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
;
174 mode
= BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
;
176 case TGSI_INTERPOLATE_COLOR
:
177 if (fcc
->variant
->u
.fs
.flatshade
) {
182 case TGSI_INTERPOLATE_PERSPECTIVE
:
183 if (fcc
->tgsi
.inputs
[slot
].centroid
)
184 mode
= BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC
;
186 mode
= BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
189 assert(!"unexpected FS interpolation");
190 mode
= BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
195 struct toy_src a0
[4];
197 a0
[0] = tsrc(TOY_FILE_GRF
, grf
+ 0, 3 * 4);
198 a0
[1] = tsrc(TOY_FILE_GRF
, grf
+ 0, 7 * 4);
199 a0
[2] = tsrc(TOY_FILE_GRF
, grf
+ 1, 3 * 4);
200 a0
[3] = tsrc(TOY_FILE_GRF
, grf
+ 1, 7 * 4);
202 for (ch
= 0; ch
< 4; ch
++)
203 tc_MOV(tc
, real_dst
[ch
], tsrc_rect(a0
[ch
], TOY_RECT_010
));
206 struct toy_src attr
[4], uv
;
208 attr
[0] = tsrc(TOY_FILE_GRF
, grf
+ 0, 0);
209 attr
[1] = tsrc(TOY_FILE_GRF
, grf
+ 0, 4 * 4);
210 attr
[2] = tsrc(TOY_FILE_GRF
, grf
+ 1, 0);
211 attr
[3] = tsrc(TOY_FILE_GRF
, grf
+ 1, 4 * 4);
213 uv
= tsrc(TOY_FILE_GRF
, fcc
->payloads
[0].barycentric_interps
[mode
], 0);
215 for (ch
= 0; ch
< 4; ch
++) {
216 tc_add2(tc
, BRW_OPCODE_PLN
, real_dst
[ch
],
217 tsrc_rect(attr
[ch
], TOY_RECT_010
), uv
);
221 if (fcc
->tgsi
.inputs
[slot
].semantic_name
== TGSI_SEMANTIC_FOG
) {
222 tc_MOV(tc
, real_dst
[1], tsrc_imm_f(0.0f
));
223 tc_MOV(tc
, real_dst
[2], tsrc_imm_f(0.0f
));
224 tc_MOV(tc
, real_dst
[3], tsrc_imm_f(1.0f
));
229 fs_lower_opcode_tgsi_in(struct fs_compile_context
*fcc
,
230 struct toy_dst dst
, int dim
, int idx
)
236 slot
= toy_tgsi_find_input(&fcc
->tgsi
, idx
);
240 switch (fcc
->tgsi
.inputs
[slot
].semantic_name
) {
241 case TGSI_SEMANTIC_POSITION
:
242 fetch_position(fcc
, dst
);
244 case TGSI_SEMANTIC_FACE
:
245 fetch_face(fcc
, dst
);
248 fetch_attr(fcc
, dst
, slot
);
254 fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context
*fcc
,
255 struct toy_dst dst
, int dim
,
258 const struct toy_dst offset
=
259 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0));
260 struct toy_compiler
*tc
= &fcc
->tc
;
261 unsigned simd_mode
, param_size
;
262 struct toy_inst
*inst
;
263 struct toy_src desc
, real_src
[4];
264 struct toy_dst tmp
, real_dst
[4];
267 tsrc_transpose(idx
, real_src
);
270 inst
= tc_MOV(tc
, offset
, real_src
[0]);
271 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
273 switch (inst
->exec_size
) {
275 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD8
;
279 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD16
;
283 assert(!"unsupported execution size");
284 tc_MOV(tc
, dst
, tsrc_imm_f(0.0f
));
289 desc
= tsrc_imm_mdesc_sampler(tc
, param_size
, param_size
* 4, false,
291 GEN5_SAMPLER_MESSAGE_SAMPLE_LD
,
293 ILO_WM_CONST_SURFACE(dim
));
295 tmp
= tdst(TOY_FILE_VRF
, tc_alloc_vrf(tc
, param_size
* 4), 0);
296 inst
= tc_SEND(tc
, tmp
, tsrc_from(offset
), desc
, BRW_SFID_SAMPLER
);
297 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
299 tdst_transpose(dst
, real_dst
);
300 for (i
= 0; i
< 4; i
++) {
301 const struct toy_src src
=
302 tsrc_offset(tsrc_from(tmp
), param_size
* i
, 0);
304 /* cast to type D to make sure these are raw moves */
305 tc_MOV(tc
, tdst_d(real_dst
[i
]), tsrc_d(src
));
310 fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context
*fcc
,
311 struct toy_dst dst
, int dim
,
314 const int grf
= fcc
->first_const_grf
+ idx
.val32
/ 2;
315 const int grf_subreg
= (idx
.val32
& 1) * 16;
317 struct toy_dst real_dst
[4];
320 if (!fcc
->variant
->use_pcb
|| dim
!= 0 || idx
.file
!= TOY_FILE_IMM
||
321 grf
>= fcc
->first_attr_grf
)
324 src
= tsrc_rect(tsrc(TOY_FILE_GRF
, grf
, grf_subreg
), TOY_RECT_010
);
326 tdst_transpose(dst
, real_dst
);
327 for (i
= 0; i
< 4; i
++) {
328 /* cast to type D to make sure these are raw moves */
329 tc_MOV(&fcc
->tc
, tdst_d(real_dst
[i
]), tsrc_d(tsrc_offset(src
, 0, i
)));
336 fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context
*fcc
,
337 struct toy_dst dst
, int dim
, struct toy_src idx
)
339 const struct toy_dst header
=
340 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0));
341 const struct toy_dst global_offset
=
342 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 2 * 4));
343 const struct toy_src r0
= tsrc_ud(tsrc(TOY_FILE_GRF
, 0, 0));
344 struct toy_compiler
*tc
= &fcc
->tc
;
345 unsigned msg_type
, msg_ctrl
, msg_len
;
346 struct toy_inst
*inst
;
348 struct toy_dst tmp
, real_dst
[4];
351 if (fs_lower_opcode_tgsi_const_pcb(fcc
, dst
, dim
, idx
))
354 /* set message header */
355 inst
= tc_MOV(tc
, header
, r0
);
356 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
358 /* set global offset */
359 inst
= tc_MOV(tc
, global_offset
, idx
);
360 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
361 inst
->exec_size
= BRW_EXECUTE_1
;
362 inst
->src
[0].rect
= TOY_RECT_010
;
364 msg_type
= BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ
;
365 msg_ctrl
= BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW
<< 8;
368 desc
= tsrc_imm_mdesc_data_port(tc
, false, msg_len
, 1, true, false,
369 msg_type
, msg_ctrl
, ILO_WM_CONST_SURFACE(dim
));
371 tmp
= tc_alloc_tmp(tc
);
373 tc_SEND(tc
, tmp
, tsrc_from(header
), desc
, fcc
->const_cache
);
375 tdst_transpose(dst
, real_dst
);
376 for (i
= 0; i
< 4; i
++) {
377 const struct toy_src src
=
378 tsrc_offset(tsrc_rect(tsrc_from(tmp
), TOY_RECT_010
), 0, i
);
380 /* cast to type D to make sure these are raw moves */
381 tc_MOV(tc
, tdst_d(real_dst
[i
]), tsrc_d(src
));
386 fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context
*fcc
,
387 struct toy_dst dst
, int dim
, struct toy_src idx
)
389 struct toy_compiler
*tc
= &fcc
->tc
;
390 const struct toy_dst offset
=
391 tdst_ud(tdst(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0));
393 struct toy_inst
*inst
;
394 struct toy_dst tmp
, real_dst
[4];
397 if (fs_lower_opcode_tgsi_const_pcb(fcc
, dst
, dim
, idx
))
401 * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
402 * changed from OWord Block Read to ld to increase performance in the
403 * classic driver. Since we use the constant cache instead of the data
404 * cache, I wonder if we still want to follow the classic driver.
408 inst
= tc_MOV(tc
, offset
, tsrc_rect(idx
, TOY_RECT_010
));
409 inst
->exec_size
= BRW_EXECUTE_8
;
410 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
412 desc
= tsrc_imm_mdesc_sampler(tc
, 1, 1, false,
413 BRW_SAMPLER_SIMD_MODE_SIMD4X2
,
414 GEN5_SAMPLER_MESSAGE_SAMPLE_LD
,
416 ILO_WM_CONST_SURFACE(dim
));
418 tmp
= tc_alloc_tmp(tc
);
419 inst
= tc_SEND(tc
, tmp
, tsrc_from(offset
), desc
, BRW_SFID_SAMPLER
);
420 inst
->exec_size
= BRW_EXECUTE_8
;
421 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
423 tdst_transpose(dst
, real_dst
);
424 for (i
= 0; i
< 4; i
++) {
425 const struct toy_src src
=
426 tsrc_offset(tsrc_rect(tsrc_from(tmp
), TOY_RECT_010
), 0, i
);
428 /* cast to type D to make sure these are raw moves */
429 tc_MOV(tc
, tdst_d(real_dst
[i
]), tsrc_d(src
));
434 fs_lower_opcode_tgsi_imm(struct fs_compile_context
*fcc
,
435 struct toy_dst dst
, int idx
)
438 struct toy_dst real_dst
[4];
441 imm
= toy_tgsi_get_imm(&fcc
->tgsi
, idx
, NULL
);
443 tdst_transpose(dst
, real_dst
);
445 for (ch
= 0; ch
< 4; ch
++)
446 tc_MOV(&fcc
->tc
, tdst_ud(real_dst
[ch
]), tsrc_imm_ud(imm
[ch
]));
450 fs_lower_opcode_tgsi_sv(struct fs_compile_context
*fcc
,
451 struct toy_dst dst
, int dim
, int idx
)
453 struct toy_compiler
*tc
= &fcc
->tc
;
454 const struct toy_tgsi
*tgsi
= &fcc
->tgsi
;
459 slot
= toy_tgsi_find_system_value(tgsi
, idx
);
463 switch (tgsi
->system_values
[slot
].semantic_name
) {
464 case TGSI_SEMANTIC_PRIMID
:
465 case TGSI_SEMANTIC_INSTANCEID
:
466 case TGSI_SEMANTIC_VERTEXID
:
468 tc_fail(tc
, "unhandled system value");
469 tc_MOV(tc
, dst
, tsrc_imm_d(0));
475 fs_lower_opcode_tgsi_direct(struct fs_compile_context
*fcc
,
476 struct toy_inst
*inst
)
478 struct toy_compiler
*tc
= &fcc
->tc
;
481 assert(inst
->src
[0].file
== TOY_FILE_IMM
);
482 dim
= inst
->src
[0].val32
;
484 assert(inst
->src
[1].file
== TOY_FILE_IMM
);
485 idx
= inst
->src
[1].val32
;
487 switch (inst
->opcode
) {
488 case TOY_OPCODE_TGSI_IN
:
489 fs_lower_opcode_tgsi_in(fcc
, inst
->dst
, dim
, idx
);
491 case TOY_OPCODE_TGSI_CONST
:
492 if (tc
->dev
->gen
>= ILO_GEN(7))
493 fs_lower_opcode_tgsi_const_gen7(fcc
, inst
->dst
, dim
, inst
->src
[1]);
495 fs_lower_opcode_tgsi_const_gen6(fcc
, inst
->dst
, dim
, inst
->src
[1]);
497 case TOY_OPCODE_TGSI_SV
:
498 fs_lower_opcode_tgsi_sv(fcc
, inst
->dst
, dim
, idx
);
500 case TOY_OPCODE_TGSI_IMM
:
502 fs_lower_opcode_tgsi_imm(fcc
, inst
->dst
, idx
);
505 tc_fail(tc
, "unhandled TGSI fetch");
509 tc_discard_inst(tc
, inst
);
513 fs_lower_opcode_tgsi_indirect(struct fs_compile_context
*fcc
,
514 struct toy_inst
*inst
)
516 struct toy_compiler
*tc
= &fcc
->tc
;
517 enum tgsi_file_type file
;
519 struct toy_src indirect_dim
, indirect_idx
;
521 assert(inst
->src
[0].file
== TOY_FILE_IMM
);
522 file
= inst
->src
[0].val32
;
524 assert(inst
->src
[1].file
== TOY_FILE_IMM
);
525 dim
= inst
->src
[1].val32
;
526 indirect_dim
= inst
->src
[2];
528 assert(inst
->src
[3].file
== TOY_FILE_IMM
);
529 idx
= inst
->src
[3].val32
;
530 indirect_idx
= inst
->src
[4];
532 /* no dimension indirection */
533 assert(indirect_dim
.file
== TOY_FILE_IMM
);
534 dim
+= indirect_dim
.val32
;
536 switch (inst
->opcode
) {
537 case TOY_OPCODE_TGSI_INDIRECT_FETCH
:
538 if (file
== TGSI_FILE_CONSTANT
) {
540 struct toy_dst tmp
= tc_alloc_tmp(tc
);
542 tc_ADD(tc
, tmp
, indirect_idx
, tsrc_imm_d(idx
));
543 indirect_idx
= tsrc_from(tmp
);
546 fs_lower_opcode_tgsi_indirect_const(fcc
, inst
->dst
, dim
, indirect_idx
);
550 case TOY_OPCODE_TGSI_INDIRECT_STORE
:
552 tc_fail(tc
, "unhandled TGSI indirection");
556 tc_discard_inst(tc
, inst
);
560 * Emit instructions to move sampling parameters to the message registers.
563 fs_add_sampler_params_gen6(struct toy_compiler
*tc
, int msg_type
,
564 int base_mrf
, int param_size
,
565 struct toy_src
*coords
, int num_coords
,
566 struct toy_src bias_or_lod
, struct toy_src ref_or_si
,
567 struct toy_src
*ddx
, struct toy_src
*ddy
,
572 assert(num_coords
<= 4);
573 assert(num_derivs
<= 3 && num_derivs
<= num_coords
);
575 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
577 case GEN5_SAMPLER_MESSAGE_SAMPLE
:
578 for (i
= 0; i
< num_coords
; i
++)
579 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
580 num_params
= num_coords
;
582 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
:
583 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
:
584 for (i
= 0; i
< num_coords
; i
++)
585 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
586 tc_MOV(tc
, SAMPLER_PARAM(4), bias_or_lod
);
589 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
:
590 for (i
= 0; i
< num_coords
; i
++)
591 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
592 tc_MOV(tc
, SAMPLER_PARAM(4), ref_or_si
);
595 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
:
596 for (i
= 0; i
< num_coords
; i
++)
597 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
598 for (i
= 0; i
< num_derivs
; i
++) {
599 tc_MOV(tc
, SAMPLER_PARAM(4 + i
* 2), ddx
[i
]);
600 tc_MOV(tc
, SAMPLER_PARAM(5 + i
* 2), ddy
[i
]);
602 num_params
= 4 + num_derivs
* 2;
604 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
:
605 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
:
606 for (i
= 0; i
< num_coords
; i
++)
607 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
608 tc_MOV(tc
, SAMPLER_PARAM(4), ref_or_si
);
609 tc_MOV(tc
, SAMPLER_PARAM(5), bias_or_lod
);
612 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD
:
613 assert(num_coords
<= 3);
615 for (i
= 0; i
< num_coords
; i
++)
616 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(i
)), coords
[i
]);
617 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(3)), bias_or_lod
);
618 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(4)), ref_or_si
);
621 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
:
622 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(0)), bias_or_lod
);
626 tc_fail(tc
, "unknown sampler opcode");
632 return num_params
* param_size
;
636 fs_add_sampler_params_gen7(struct toy_compiler
*tc
, int msg_type
,
637 int base_mrf
, int param_size
,
638 struct toy_src
*coords
, int num_coords
,
639 struct toy_src bias_or_lod
, struct toy_src ref_or_si
,
640 struct toy_src
*ddx
, struct toy_src
*ddy
,
645 assert(num_coords
<= 4);
646 assert(num_derivs
<= 3 && num_derivs
<= num_coords
);
648 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
650 case GEN5_SAMPLER_MESSAGE_SAMPLE
:
651 for (i
= 0; i
< num_coords
; i
++)
652 tc_MOV(tc
, SAMPLER_PARAM(i
), coords
[i
]);
653 num_params
= num_coords
;
655 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
:
656 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
:
657 tc_MOV(tc
, SAMPLER_PARAM(0), bias_or_lod
);
658 for (i
= 0; i
< num_coords
; i
++)
659 tc_MOV(tc
, SAMPLER_PARAM(1 + i
), coords
[i
]);
660 num_params
= 1 + num_coords
;
662 case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
:
663 tc_MOV(tc
, SAMPLER_PARAM(0), ref_or_si
);
664 for (i
= 0; i
< num_coords
; i
++)
665 tc_MOV(tc
, SAMPLER_PARAM(1 + i
), coords
[i
]);
666 num_params
= 1 + num_coords
;
668 case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
:
669 for (i
= 0; i
< num_coords
; i
++) {
670 tc_MOV(tc
, SAMPLER_PARAM(i
* 3), coords
[i
]);
671 if (i
< num_derivs
) {
672 tc_MOV(tc
, SAMPLER_PARAM(i
* 3 + 1), ddx
[i
]);
673 tc_MOV(tc
, SAMPLER_PARAM(i
* 3 + 2), ddy
[i
]);
676 num_params
= num_coords
* 3 - ((num_coords
> num_derivs
) ? 2 : 0);
678 case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
:
679 case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
:
680 tc_MOV(tc
, SAMPLER_PARAM(0), ref_or_si
);
681 tc_MOV(tc
, SAMPLER_PARAM(1), bias_or_lod
);
682 for (i
= 0; i
< num_coords
; i
++)
683 tc_MOV(tc
, SAMPLER_PARAM(2 + i
), coords
[i
]);
684 num_params
= 2 + num_coords
;
686 case GEN5_SAMPLER_MESSAGE_SAMPLE_LD
:
687 assert(num_coords
>= 1 && num_coords
<= 3);
689 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(0)), coords
[0]);
690 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(1)), bias_or_lod
);
691 for (i
= 1; i
< num_coords
; i
++)
692 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(1 + i
)), coords
[i
]);
693 num_params
= 1 + num_coords
;
695 case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
:
696 tc_MOV(tc
, tdst_d(SAMPLER_PARAM(0)), bias_or_lod
);
700 tc_fail(tc
, "unknown sampler opcode");
706 return num_params
* param_size
;
710 * Set up message registers and return the message descriptor for sampling.
712 static struct toy_src
713 fs_prepare_tgsi_sampling(struct toy_compiler
*tc
, const struct toy_inst
*inst
,
714 int base_mrf
, const uint32_t *saturate_coords
,
715 unsigned *ret_sampler_index
)
717 unsigned simd_mode
, msg_type
, msg_len
, sampler_index
, binding_table_index
;
718 struct toy_src coords
[4], ddx
[4], ddy
[4], bias_or_lod
, ref_or_si
;
719 int num_coords
, ref_pos
, num_derivs
;
720 int sampler_src
, param_size
, i
;
722 switch (inst
->exec_size
) {
724 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD8
;
728 simd_mode
= BRW_SAMPLER_SIMD_MODE_SIMD16
;
732 tc_fail(tc
, "unsupported execute size for sampling");
737 num_coords
= tgsi_util_get_texture_coord_dim(inst
->tex
.target
, &ref_pos
);
738 tsrc_transpose(inst
->src
[0], coords
);
739 bias_or_lod
= tsrc_null();
740 ref_or_si
= tsrc_null();
747 * src0 := (x, y, z, w)
752 * For TEX2, TXB2, and TXL2,
754 * src0 := (x, y, z, w)
755 * src1 := (v or bias or lod, ...)
758 * For TEX, TXB, TXL, and TXP,
760 * src0 := (x, y, z, w or bias or lod or projection)
774 * src0 := (x, y, z, w or lod)
777 * State trackers should not generate opcode+texture combinations with
778 * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
780 switch (inst
->opcode
) {
781 case TOY_OPCODE_TGSI_TEX
:
785 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
;
786 ref_or_si
= coords
[ref_pos
];
789 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE
;
792 case TOY_OPCODE_TGSI_TXD
:
796 msg_type
= HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE
;
797 ref_or_si
= coords
[ref_pos
];
799 if (tc
->dev
->gen
< ILO_GEN(7.5))
800 tc_fail(tc
, "TXD with shadow sampler not supported");
803 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS
;
806 tsrc_transpose(inst
->src
[1], ddx
);
807 tsrc_transpose(inst
->src
[2], ddy
);
808 num_derivs
= num_coords
;
811 case TOY_OPCODE_TGSI_TXP
:
815 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
;
816 ref_or_si
= coords
[ref_pos
];
819 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE
;
822 /* project the coordinates */
824 struct toy_dst tmp
[4];
826 tc_alloc_tmp4(tc
, tmp
);
828 tc_INV(tc
, tmp
[3], coords
[3]);
829 for (i
= 0; i
< num_coords
&& i
< 3; i
++) {
830 tc_MUL(tc
, tmp
[i
], coords
[i
], tsrc_from(tmp
[3]));
831 coords
[i
] = tsrc_from(tmp
[i
]);
835 tc_MUL(tc
, tmp
[ref_pos
], ref_or_si
, tsrc_from(tmp
[3]));
836 ref_or_si
= tsrc_from(tmp
[ref_pos
]);
840 case TOY_OPCODE_TGSI_TXB
:
844 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
;
845 ref_or_si
= coords
[ref_pos
];
848 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
;
851 bias_or_lod
= coords
[3];
853 case TOY_OPCODE_TGSI_TXL
:
857 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
858 ref_or_si
= coords
[ref_pos
];
861 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
864 bias_or_lod
= coords
[3];
866 case TOY_OPCODE_TGSI_TXF
:
867 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LD
;
869 switch (inst
->tex
.target
) {
870 case TGSI_TEXTURE_2D_MSAA
:
871 case TGSI_TEXTURE_2D_ARRAY_MSAA
:
872 assert(ref_pos
>= 0 && ref_pos
< 4);
873 /* lod is always 0 */
874 bias_or_lod
= tsrc_imm_d(0);
875 ref_or_si
= coords
[ref_pos
];
878 bias_or_lod
= coords
[3];
882 /* offset the coordinates */
883 if (!tsrc_is_null(inst
->tex
.offsets
[0])) {
884 struct toy_dst tmp
[4];
885 struct toy_src offsets
[4];
887 tc_alloc_tmp4(tc
, tmp
);
888 tsrc_transpose(inst
->tex
.offsets
[0], offsets
);
890 for (i
= 0; i
< num_coords
; i
++) {
891 tc_ADD(tc
, tmp
[i
], coords
[i
], offsets
[i
]);
892 coords
[i
] = tsrc_from(tmp
[i
]);
898 case TOY_OPCODE_TGSI_TXQ
:
899 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
901 bias_or_lod
= coords
[0];
903 case TOY_OPCODE_TGSI_TXQ_LZ
:
904 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO
;
908 case TOY_OPCODE_TGSI_TEX2
:
912 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE
;
915 struct toy_src src1
[4];
916 tsrc_transpose(inst
->src
[1], src1
);
917 ref_or_si
= src1
[ref_pos
- 4];
920 ref_or_si
= coords
[ref_pos
];
924 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE
;
929 case TOY_OPCODE_TGSI_TXB2
:
933 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE
;
934 ref_or_si
= coords
[ref_pos
];
937 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS
;
941 struct toy_src src1
[4];
942 tsrc_transpose(inst
->src
[1], src1
);
943 bias_or_lod
= src1
[0];
948 case TOY_OPCODE_TGSI_TXL2
:
952 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE
;
953 ref_or_si
= coords
[ref_pos
];
956 msg_type
= GEN5_SAMPLER_MESSAGE_SAMPLE_LOD
;
960 struct toy_src src1
[4];
961 tsrc_transpose(inst
->src
[1], src1
);
962 bias_or_lod
= src1
[0];
968 assert(!"unhandled sampling opcode");
973 assert(inst
->src
[sampler_src
].file
== TOY_FILE_IMM
);
974 sampler_index
= inst
->src
[sampler_src
].val32
;
975 binding_table_index
= ILO_WM_TEXTURE_SURFACE(sampler_index
);
978 * From the Sandy Bridge PRM, volume 4 part 1, page 18:
980 * "Note that the (cube map) coordinates delivered to the sampling
981 * engine must already have been divided by the component with the
982 * largest absolute value."
984 switch (inst
->tex
.target
) {
985 case TGSI_TEXTURE_CUBE
:
986 case TGSI_TEXTURE_SHADOWCUBE
:
987 case TGSI_TEXTURE_CUBE_ARRAY
:
988 case TGSI_TEXTURE_SHADOWCUBE_ARRAY
:
989 /* TXQ does not need coordinates */
990 if (num_coords
>= 3) {
991 struct toy_dst tmp
[4];
993 tc_alloc_tmp4(tc
, tmp
);
995 tc_SEL(tc
, tmp
[3], tsrc_absolute(coords
[0]),
996 tsrc_absolute(coords
[1]), BRW_CONDITIONAL_GE
);
997 tc_SEL(tc
, tmp
[3], tsrc_from(tmp
[3]),
998 tsrc_absolute(coords
[2]), BRW_CONDITIONAL_GE
);
999 tc_INV(tc
, tmp
[3], tsrc_from(tmp
[3]));
1001 for (i
= 0; i
< 3; i
++) {
1002 tc_MUL(tc
, tmp
[i
], coords
[i
], tsrc_from(tmp
[3]));
1003 coords
[i
] = tsrc_from(tmp
[i
]);
1010 * Saturate (s, t, r). saturate_coords is set for sampler and coordinate
1011 * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively. It is
1012 * so that sampling outside the border gets the correct colors.
1014 for (i
= 0; i
< MIN2(num_coords
, 3); i
++) {
1017 if (!(saturate_coords
[i
] & (1 << sampler_index
)))
1020 switch (inst
->tex
.target
) {
1021 case TGSI_TEXTURE_RECT
:
1022 case TGSI_TEXTURE_SHADOWRECT
:
1031 struct toy_src min
, max
;
1034 tc_fail(tc
, "GL_CLAMP with rectangle texture unsupported");
1035 tmp
= tc_alloc_tmp(tc
);
1037 /* saturate to [0, width] or [0, height] */
1039 min
= tsrc_imm_f(0.0f
);
1040 max
= tsrc_imm_f(2048.0f
);
1042 tc_SEL(tc
, tmp
, coords
[i
], min
, BRW_CONDITIONAL_G
);
1043 tc_SEL(tc
, tmp
, tsrc_from(tmp
), max
, BRW_CONDITIONAL_L
);
1045 coords
[i
] = tsrc_from(tmp
);
1049 struct toy_inst
*inst2
;
1051 tmp
= tc_alloc_tmp(tc
);
1053 /* saturate to [0.0f, 1.0f] */
1054 inst2
= tc_MOV(tc
, tmp
, coords
[i
]);
1055 inst2
->saturate
= true;
1057 coords
[i
] = tsrc_from(tmp
);
1061 /* set up sampler parameters */
1062 if (tc
->dev
->gen
>= ILO_GEN(7)) {
1063 msg_len
= fs_add_sampler_params_gen7(tc
, msg_type
, base_mrf
, param_size
,
1064 coords
, num_coords
, bias_or_lod
, ref_or_si
, ddx
, ddy
, num_derivs
);
1067 msg_len
= fs_add_sampler_params_gen6(tc
, msg_type
, base_mrf
, param_size
,
1068 coords
, num_coords
, bias_or_lod
, ref_or_si
, ddx
, ddy
, num_derivs
);
1072 * From the Sandy Bridge PRM, volume 4 part 1, page 136:
1074 * "The maximum message length allowed to the sampler is 11. This would
1075 * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
1079 tc_fail(tc
, "maximum length for messages to the sampler is 11");
1081 if (ret_sampler_index
)
1082 *ret_sampler_index
= sampler_index
;
1084 return tsrc_imm_mdesc_sampler(tc
, msg_len
, 4 * param_size
,
1085 false, simd_mode
, msg_type
, sampler_index
, binding_table_index
);
1089 fs_lower_opcode_tgsi_sampling(struct fs_compile_context
*fcc
,
1090 struct toy_inst
*inst
)
1092 struct toy_compiler
*tc
= &fcc
->tc
;
1093 struct toy_dst dst
[4], tmp
[4];
1094 struct toy_src desc
;
1095 unsigned sampler_index
;
1099 desc
= fs_prepare_tgsi_sampling(tc
, inst
,
1100 fcc
->first_free_mrf
,
1101 fcc
->variant
->saturate_tex_coords
,
1104 switch (inst
->opcode
) {
1105 case TOY_OPCODE_TGSI_TXF
:
1106 case TOY_OPCODE_TGSI_TXQ
:
1107 case TOY_OPCODE_TGSI_TXQ_LZ
:
1108 need_filter
= false;
1115 toy_compiler_lower_to_send(tc
, inst
, false, BRW_SFID_SAMPLER
);
1116 inst
->src
[0] = tsrc(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0);
1117 inst
->src
[1] = desc
;
1118 for (i
= 2; i
< Elements(inst
->src
); i
++)
1119 inst
->src
[i
] = tsrc_null();
1121 /* write to temps first */
1122 tc_alloc_tmp4(tc
, tmp
);
1123 for (i
= 0; i
< 4; i
++)
1124 tmp
[i
].type
= inst
->dst
.type
;
1125 tdst_transpose(inst
->dst
, dst
);
1128 tc_move_inst(tc
, inst
);
1131 assert(sampler_index
< fcc
->variant
->num_sampler_views
);
1132 swizzles
[0] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].r
;
1133 swizzles
[1] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].g
;
1134 swizzles
[2] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].b
;
1135 swizzles
[3] = fcc
->variant
->sampler_view_swizzles
[sampler_index
].a
;
1138 swizzles
[0] = PIPE_SWIZZLE_RED
;
1139 swizzles
[1] = PIPE_SWIZZLE_GREEN
;
1140 swizzles
[2] = PIPE_SWIZZLE_BLUE
;
1141 swizzles
[3] = PIPE_SWIZZLE_ALPHA
;
1144 /* swizzle the results */
1145 for (i
= 0; i
< 4; i
++) {
1146 switch (swizzles
[i
]) {
1147 case PIPE_SWIZZLE_ZERO
:
1148 tc_MOV(tc
, dst
[i
], tsrc_imm_f(0.0f
));
1150 case PIPE_SWIZZLE_ONE
:
1151 tc_MOV(tc
, dst
[i
], tsrc_imm_f(1.0f
));
1154 tc_MOV(tc
, dst
[i
], tsrc_from(tmp
[swizzles
[i
]]));
1161 fs_lower_opcode_derivative(struct toy_compiler
*tc
, struct toy_inst
*inst
)
1163 struct toy_dst dst
[4];
1164 struct toy_src src
[4];
1167 tdst_transpose(inst
->dst
, dst
);
1168 tsrc_transpose(inst
->src
[0], src
);
1171 * Every four fragments are from a 2x2 subspan, with
1173 * fragment 1 on the top-left,
1174 * fragment 2 on the top-right,
1175 * fragment 3 on the bottom-left,
1176 * fragment 4 on the bottom-right.
1178 * DDX should thus produce
1180 * dst = src.yyww - src.xxzz
1182 * and DDY should produce
1184 * dst = src.zzww - src.xxyy
1186 * But since we are in BRW_ALIGN_1, swizzling does not work and we have to
1187 * play with the region parameters.
1189 if (inst
->opcode
== TOY_OPCODE_DDX
) {
1190 for (i
= 0; i
< 4; i
++) {
1191 struct toy_src left
, right
;
1193 left
= tsrc_rect(src
[i
], TOY_RECT_220
);
1194 right
= tsrc_offset(left
, 0, 1);
1196 tc_ADD(tc
, dst
[i
], right
, tsrc_negate(left
));
1200 for (i
= 0; i
< 4; i
++) {
1201 struct toy_src top
, bottom
;
1203 /* approximate with dst = src.zzzz - src.xxxx */
1204 top
= tsrc_rect(src
[i
], TOY_RECT_440
);
1205 bottom
= tsrc_offset(top
, 0, 2);
1207 tc_ADD(tc
, dst
[i
], bottom
, tsrc_negate(top
));
1211 tc_discard_inst(tc
, inst
);
1215 fs_lower_opcode_fb_write(struct toy_compiler
*tc
, struct toy_inst
*inst
)
1217 /* fs_write_fb() has set up the message registers */
1218 toy_compiler_lower_to_send(tc
, inst
, true,
1219 GEN6_SFID_DATAPORT_RENDER_CACHE
);
1223 fs_lower_opcode_kil(struct toy_compiler
*tc
, struct toy_inst
*inst
)
1225 struct toy_dst pixel_mask_dst
;
1226 struct toy_src f0
, pixel_mask
;
1227 struct toy_inst
*tmp
;
1229 /* lower half of r1.7:ud */
1230 pixel_mask_dst
= tdst_uw(tdst(TOY_FILE_GRF
, 1, 7 * 4));
1231 pixel_mask
= tsrc_rect(tsrc_from(pixel_mask_dst
), TOY_RECT_010
);
1233 f0
= tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF
, BRW_ARF_FLAG
, 0)), TOY_RECT_010
);
1235 /* KILL or KILL_IF */
1236 if (tsrc_is_null(inst
->src
[0])) {
1237 struct toy_src dummy
= tsrc_uw(tsrc(TOY_FILE_GRF
, 0, 0));
1238 struct toy_dst f0_dst
= tdst_uw(tdst(TOY_FILE_ARF
, BRW_ARF_FLAG
, 0));
1240 /* create a mask that masks out all pixels */
1241 tmp
= tc_MOV(tc
, f0_dst
, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010
));
1242 tmp
->exec_size
= BRW_EXECUTE_1
;
1243 tmp
->mask_ctrl
= BRW_MASK_DISABLE
;
1245 tc_CMP(tc
, tdst_null(), dummy
, dummy
, BRW_CONDITIONAL_NEQ
);
1247 /* swapping the two src operands breaks glBitmap()!? */
1248 tmp
= tc_AND(tc
, pixel_mask_dst
, f0
, pixel_mask
);
1249 tmp
->exec_size
= BRW_EXECUTE_1
;
1250 tmp
->mask_ctrl
= BRW_MASK_DISABLE
;
1253 struct toy_src src
[4];
1256 tsrc_transpose(inst
->src
[0], src
);
1257 /* mask out killed pixels */
1258 for (i
= 0; i
< 4; i
++) {
1259 tc_CMP(tc
, tdst_null(), src
[i
], tsrc_imm_f(0.0f
),
1260 BRW_CONDITIONAL_GE
);
1262 /* swapping the two src operands breaks glBitmap()!? */
1263 tmp
= tc_AND(tc
, pixel_mask_dst
, f0
, pixel_mask
);
1264 tmp
->exec_size
= BRW_EXECUTE_1
;
1265 tmp
->mask_ctrl
= BRW_MASK_DISABLE
;
1269 tc_discard_inst(tc
, inst
);
1273 fs_lower_virtual_opcodes(struct fs_compile_context
*fcc
)
1275 struct toy_compiler
*tc
= &fcc
->tc
;
1276 struct toy_inst
*inst
;
1278 /* lower TGSI's first, as they might be lowered to other virtual opcodes */
1280 while ((inst
= tc_next(tc
)) != NULL
) {
1281 switch (inst
->opcode
) {
1282 case TOY_OPCODE_TGSI_IN
:
1283 case TOY_OPCODE_TGSI_CONST
:
1284 case TOY_OPCODE_TGSI_SV
:
1285 case TOY_OPCODE_TGSI_IMM
:
1286 fs_lower_opcode_tgsi_direct(fcc
, inst
);
1288 case TOY_OPCODE_TGSI_INDIRECT_FETCH
:
1289 case TOY_OPCODE_TGSI_INDIRECT_STORE
:
1290 fs_lower_opcode_tgsi_indirect(fcc
, inst
);
1292 case TOY_OPCODE_TGSI_TEX
:
1293 case TOY_OPCODE_TGSI_TXB
:
1294 case TOY_OPCODE_TGSI_TXD
:
1295 case TOY_OPCODE_TGSI_TXL
:
1296 case TOY_OPCODE_TGSI_TXP
:
1297 case TOY_OPCODE_TGSI_TXF
:
1298 case TOY_OPCODE_TGSI_TXQ
:
1299 case TOY_OPCODE_TGSI_TXQ_LZ
:
1300 case TOY_OPCODE_TGSI_TEX2
:
1301 case TOY_OPCODE_TGSI_TXB2
:
1302 case TOY_OPCODE_TGSI_TXL2
:
1303 case TOY_OPCODE_TGSI_SAMPLE
:
1304 case TOY_OPCODE_TGSI_SAMPLE_I
:
1305 case TOY_OPCODE_TGSI_SAMPLE_I_MS
:
1306 case TOY_OPCODE_TGSI_SAMPLE_B
:
1307 case TOY_OPCODE_TGSI_SAMPLE_C
:
1308 case TOY_OPCODE_TGSI_SAMPLE_C_LZ
:
1309 case TOY_OPCODE_TGSI_SAMPLE_D
:
1310 case TOY_OPCODE_TGSI_SAMPLE_L
:
1311 case TOY_OPCODE_TGSI_GATHER4
:
1312 case TOY_OPCODE_TGSI_SVIEWINFO
:
1313 case TOY_OPCODE_TGSI_SAMPLE_POS
:
1314 case TOY_OPCODE_TGSI_SAMPLE_INFO
:
1315 fs_lower_opcode_tgsi_sampling(fcc
, inst
);
1321 while ((inst
= tc_next(tc
)) != NULL
) {
1322 switch (inst
->opcode
) {
1323 case TOY_OPCODE_INV
:
1324 case TOY_OPCODE_LOG
:
1325 case TOY_OPCODE_EXP
:
1326 case TOY_OPCODE_SQRT
:
1327 case TOY_OPCODE_RSQ
:
1328 case TOY_OPCODE_SIN
:
1329 case TOY_OPCODE_COS
:
1330 case TOY_OPCODE_FDIV
:
1331 case TOY_OPCODE_POW
:
1332 case TOY_OPCODE_INT_DIV_QUOTIENT
:
1333 case TOY_OPCODE_INT_DIV_REMAINDER
:
1334 toy_compiler_lower_math(tc
, inst
);
1336 case TOY_OPCODE_DDX
:
1337 case TOY_OPCODE_DDY
:
1338 fs_lower_opcode_derivative(tc
, inst
);
1340 case TOY_OPCODE_FB_WRITE
:
1341 fs_lower_opcode_fb_write(tc
, inst
);
1343 case TOY_OPCODE_KIL
:
1344 fs_lower_opcode_kil(tc
, inst
);
1347 if (inst
->opcode
> 127)
1348 tc_fail(tc
, "unhandled virtual opcode");
1355 * Compile the shader.
1358 fs_compile(struct fs_compile_context
*fcc
)
1360 struct toy_compiler
*tc
= &fcc
->tc
;
1361 struct ilo_shader
*sh
= fcc
->shader
;
1363 fs_lower_virtual_opcodes(fcc
);
1364 toy_compiler_legalize_for_ra(tc
);
1365 toy_compiler_optimize(tc
);
1366 toy_compiler_allocate_registers(tc
,
1367 fcc
->first_free_grf
,
1369 fcc
->num_grf_per_vrf
);
1370 toy_compiler_legalize_for_asm(tc
);
1373 ilo_err("failed to legalize FS instructions: %s\n", tc
->reason
);
1377 if (ilo_debug
& ILO_DEBUG_FS
) {
1378 ilo_printf("legalized instructions:\n");
1379 toy_compiler_dump(tc
);
1384 sh
->kernel
= toy_compiler_assemble(tc
, &sh
->kernel_size
);
1387 static const uint32_t microcode
[] = {
1388 /* fill in the microcode here */
1391 const bool swap
= true;
1393 sh
->kernel_size
= sizeof(microcode
);
1394 sh
->kernel
= MALLOC(sh
->kernel_size
);
1397 const int num_dwords
= sizeof(microcode
) / 4;
1398 const uint32_t *src
= microcode
;
1399 uint32_t *dst
= (uint32_t *) sh
->kernel
;
1402 for (i
= 0; i
< num_dwords
; i
+= 4) {
1404 dst
[i
+ 0] = src
[i
+ 3];
1405 dst
[i
+ 1] = src
[i
+ 2];
1406 dst
[i
+ 2] = src
[i
+ 1];
1407 dst
[i
+ 3] = src
[i
+ 0];
1410 memcpy(dst
, src
, 16);
1417 ilo_err("failed to compile FS: %s\n", tc
->reason
);
1421 if (ilo_debug
& ILO_DEBUG_FS
) {
1422 ilo_printf("disassembly:\n");
1423 toy_compiler_disassemble(tc
, sh
->kernel
, sh
->kernel_size
);
1431 * Emit instructions to write the color buffers (and the depth buffer).
1434 fs_write_fb(struct fs_compile_context
*fcc
)
1436 struct toy_compiler
*tc
= &fcc
->tc
;
1437 int base_mrf
= fcc
->first_free_mrf
;
1438 const struct toy_dst header
= tdst_ud(tdst(TOY_FILE_MRF
, base_mrf
, 0));
1439 bool header_present
= false;
1440 struct toy_src desc
;
1441 unsigned msg_type
, ctrl
;
1442 int color_slots
[ILO_MAX_DRAW_BUFFERS
], num_cbufs
;
1443 int pos_slot
= -1, cbuf
, i
;
1445 for (i
= 0; i
< Elements(color_slots
); i
++)
1446 color_slots
[i
] = -1;
1448 for (i
= 0; i
< fcc
->tgsi
.num_outputs
; i
++) {
1449 if (fcc
->tgsi
.outputs
[i
].semantic_name
== TGSI_SEMANTIC_COLOR
) {
1450 assert(fcc
->tgsi
.outputs
[i
].semantic_index
< Elements(color_slots
));
1451 color_slots
[fcc
->tgsi
.outputs
[i
].semantic_index
] = i
;
1453 else if (fcc
->tgsi
.outputs
[i
].semantic_name
== TGSI_SEMANTIC_POSITION
) {
1458 num_cbufs
= fcc
->variant
->u
.fs
.num_cbufs
;
1459 /* still need to send EOT (and probably depth) */
1463 /* we need the header to specify the pixel mask or render target */
1464 if (fcc
->tgsi
.uses_kill
|| num_cbufs
> 1) {
1465 const struct toy_src r0
= tsrc_ud(tsrc(TOY_FILE_GRF
, 0, 0));
1466 struct toy_inst
*inst
;
1468 inst
= tc_MOV(tc
, header
, r0
);
1469 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
1470 base_mrf
+= fcc
->num_grf_per_vrf
;
1472 /* this is a two-register header */
1473 if (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) {
1474 inst
= tc_MOV(tc
, tdst_offset(header
, 1, 0), tsrc_offset(r0
, 1, 0));
1475 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
1476 base_mrf
+= fcc
->num_grf_per_vrf
;
1479 header_present
= true;
1482 for (cbuf
= 0; cbuf
< num_cbufs
; cbuf
++) {
1484 color_slots
[(fcc
->tgsi
.props
.fs_color0_writes_all_cbufs
) ? 0 : cbuf
];
1485 int mrf
= base_mrf
, vrf
;
1486 struct toy_src src
[4];
1489 const unsigned undefined_mask
=
1490 fcc
->tgsi
.outputs
[slot
].undefined_mask
;
1491 const int index
= fcc
->tgsi
.outputs
[slot
].index
;
1493 vrf
= toy_tgsi_get_vrf(&fcc
->tgsi
, TGSI_FILE_OUTPUT
, 0, index
);
1495 const struct toy_src tmp
= tsrc(TOY_FILE_VRF
, vrf
, 0);
1496 tsrc_transpose(tmp
, src
);
1499 /* use (0, 0, 0, 0) */
1500 tsrc_transpose(tsrc_imm_f(0.0f
), src
);
1503 for (i
= 0; i
< 4; i
++) {
1504 const struct toy_dst dst
= tdst(TOY_FILE_MRF
, mrf
, 0);
1506 if (undefined_mask
& (1 << i
))
1507 src
[i
] = tsrc_imm_f(0.0f
);
1509 tc_MOV(tc
, dst
, src
[i
]);
1511 mrf
+= fcc
->num_grf_per_vrf
;
1515 /* use (0, 0, 0, 0) */
1516 for (i
= 0; i
< 4; i
++) {
1517 const struct toy_dst dst
= tdst(TOY_FILE_MRF
, mrf
, 0);
1519 tc_MOV(tc
, dst
, tsrc_imm_f(0.0f
));
1520 mrf
+= fcc
->num_grf_per_vrf
;
1524 /* select BLEND_STATE[rt] */
1526 struct toy_inst
*inst
;
1528 inst
= tc_MOV(tc
, tdst_offset(header
, 0, 2), tsrc_imm_ud(cbuf
));
1529 inst
->mask_ctrl
= BRW_MASK_DISABLE
;
1530 inst
->exec_size
= BRW_EXECUTE_1
;
1531 inst
->src
[0].rect
= TOY_RECT_010
;
1534 if (cbuf
== 0 && pos_slot
>= 0) {
1535 const int index
= fcc
->tgsi
.outputs
[pos_slot
].index
;
1536 const struct toy_dst dst
= tdst(TOY_FILE_MRF
, mrf
, 0);
1537 struct toy_src src
[4];
1540 vrf
= toy_tgsi_get_vrf(&fcc
->tgsi
, TGSI_FILE_OUTPUT
, 0, index
);
1542 const struct toy_src tmp
= tsrc(TOY_FILE_VRF
, vrf
, 0);
1543 tsrc_transpose(tmp
, src
);
1546 /* use (0, 0, 0, 0) */
1547 tsrc_transpose(tsrc_imm_f(0.0f
), src
);
1551 tc_MOV(tc
, dst
, src
[2]);
1553 mrf
+= fcc
->num_grf_per_vrf
;
1556 msg_type
= (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
) ?
1557 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE
:
1558 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01
;
1560 ctrl
= (cbuf
== num_cbufs
- 1) << 12 |
1563 desc
= tsrc_imm_mdesc_data_port(tc
, cbuf
== num_cbufs
- 1,
1564 mrf
- fcc
->first_free_mrf
, 0,
1565 header_present
, false,
1566 GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE
,
1567 ctrl
, ILO_WM_DRAW_SURFACE(cbuf
));
1569 tc_add2(tc
, TOY_OPCODE_FB_WRITE
, tdst_null(),
1570 tsrc(TOY_FILE_MRF
, fcc
->first_free_mrf
, 0), desc
);
1575 * Set up shader outputs for fixed-function units.
1578 fs_setup_shader_out(struct ilo_shader
*sh
, const struct toy_tgsi
*tgsi
)
1582 sh
->out
.count
= tgsi
->num_outputs
;
1583 for (i
= 0; i
< tgsi
->num_outputs
; i
++) {
1584 sh
->out
.register_indices
[i
] = tgsi
->outputs
[i
].index
;
1585 sh
->out
.semantic_names
[i
] = tgsi
->outputs
[i
].semantic_name
;
1586 sh
->out
.semantic_indices
[i
] = tgsi
->outputs
[i
].semantic_index
;
1588 if (tgsi
->outputs
[i
].semantic_name
== TGSI_SEMANTIC_POSITION
)
1589 sh
->out
.has_pos
= true;
1594 * Set up shader inputs for fixed-function units.
1597 fs_setup_shader_in(struct ilo_shader
*sh
, const struct toy_tgsi
*tgsi
,
1602 sh
->in
.count
= tgsi
->num_inputs
;
1603 for (i
= 0; i
< tgsi
->num_inputs
; i
++) {
1604 sh
->in
.semantic_names
[i
] = tgsi
->inputs
[i
].semantic_name
;
1605 sh
->in
.semantic_indices
[i
] = tgsi
->inputs
[i
].semantic_index
;
1606 sh
->in
.interp
[i
] = tgsi
->inputs
[i
].interp
;
1607 sh
->in
.centroid
[i
] = tgsi
->inputs
[i
].centroid
;
1609 if (tgsi
->inputs
[i
].semantic_name
== TGSI_SEMANTIC_POSITION
) {
1610 sh
->in
.has_pos
= true;
1613 else if (tgsi
->inputs
[i
].semantic_name
== TGSI_SEMANTIC_FACE
) {
1617 switch (tgsi
->inputs
[i
].interp
) {
1618 case TGSI_INTERPOLATE_CONSTANT
:
1619 sh
->in
.const_interp_enable
|= 1 << i
;
1621 case TGSI_INTERPOLATE_LINEAR
:
1622 sh
->in
.has_linear_interp
= true;
1624 if (tgsi
->inputs
[i
].centroid
) {
1625 sh
->in
.barycentric_interpolation_mode
|=
1626 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
;
1629 sh
->in
.barycentric_interpolation_mode
|=
1630 1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
;
1633 case TGSI_INTERPOLATE_COLOR
:
1635 sh
->in
.const_interp_enable
|= 1 << i
;
1639 case TGSI_INTERPOLATE_PERSPECTIVE
:
1640 if (tgsi
->inputs
[i
].centroid
) {
1641 sh
->in
.barycentric_interpolation_mode
|=
1642 1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC
;
1645 sh
->in
.barycentric_interpolation_mode
|=
1646 1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC
;
1656 fs_setup_payloads(struct fs_compile_context
*fcc
)
1658 const struct ilo_shader
*sh
= fcc
->shader
;
1666 /* r1-r2: coordinates and etc. */
1667 grf
+= (fcc
->dispatch_mode
== GEN6_WM_32_DISPATCH_ENABLE
) ? 2 : 1;
1669 for (i
= 0; i
< Elements(fcc
->payloads
); i
++) {
1672 /* r3-r26 or r32-r55: barycentric interpolation parameters */
1673 for (interp
= 0; interp
< BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT
; interp
++) {
1674 if (!(sh
->in
.barycentric_interpolation_mode
& (1 << interp
)))
1677 fcc
->payloads
[i
].barycentric_interps
[interp
] = grf
;
1678 grf
+= (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) ? 2 : 4;
1681 /* r27-r28 or r56-r57: interpoloated depth */
1682 if (sh
->in
.has_pos
) {
1683 fcc
->payloads
[i
].source_depth
= grf
;
1684 grf
+= (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) ? 1 : 2;
1687 /* r29-r30 or r58-r59: interpoloated w */
1688 if (sh
->in
.has_pos
) {
1689 fcc
->payloads
[i
].source_w
= grf
;
1690 grf
+= (fcc
->dispatch_mode
== GEN6_WM_8_DISPATCH_ENABLE
) ? 1 : 2;
1693 /* r31 or r60: position offset */
1695 fcc
->payloads
[i
].pos_offset
= grf
;
1699 if (fcc
->dispatch_mode
!= GEN6_WM_32_DISPATCH_ENABLE
)
1707 * Translate the TGSI tokens.
1710 fs_setup_tgsi(struct toy_compiler
*tc
, const struct tgsi_token
*tokens
,
1711 struct toy_tgsi
*tgsi
)
1713 if (ilo_debug
& ILO_DEBUG_FS
) {
1714 ilo_printf("dumping fragment shader\n");
1717 tgsi_dump(tokens
, 0);
1721 toy_compiler_translate_tgsi(tc
, tokens
, false, tgsi
);
1723 ilo_err("failed to translate FS TGSI tokens: %s\n", tc
->reason
);
1727 if (ilo_debug
& ILO_DEBUG_FS
) {
1728 ilo_printf("TGSI translator:\n");
1729 toy_tgsi_dump(tgsi
);
1731 toy_compiler_dump(tc
);
1739 * Set up FS compile context. This includes translating the TGSI tokens.
1742 fs_setup(struct fs_compile_context
*fcc
,
1743 const struct ilo_shader_state
*state
,
1744 const struct ilo_shader_variant
*variant
)
1748 memset(fcc
, 0, sizeof(*fcc
));
1750 fcc
->shader
= CALLOC_STRUCT(ilo_shader
);
1754 fcc
->variant
= variant
;
1756 toy_compiler_init(&fcc
->tc
, state
->info
.dev
);
1758 fcc
->dispatch_mode
= GEN6_WM_8_DISPATCH_ENABLE
;
1760 fcc
->tc
.templ
.access_mode
= BRW_ALIGN_1
;
1761 if (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
) {
1762 fcc
->tc
.templ
.qtr_ctrl
= GEN6_COMPRESSION_1H
;
1763 fcc
->tc
.templ
.exec_size
= BRW_EXECUTE_16
;
1766 fcc
->tc
.templ
.qtr_ctrl
= GEN6_COMPRESSION_1Q
;
1767 fcc
->tc
.templ
.exec_size
= BRW_EXECUTE_8
;
1770 fcc
->tc
.rect_linear_width
= 8;
1773 * The classic driver uses the sampler cache (gen6) or the data cache
1776 fcc
->const_cache
= GEN6_SFID_DATAPORT_CONSTANT_CACHE
;
1778 if (!fs_setup_tgsi(&fcc
->tc
, state
->info
.tokens
, &fcc
->tgsi
)) {
1779 toy_compiler_cleanup(&fcc
->tc
);
1784 fs_setup_shader_in(fcc
->shader
, &fcc
->tgsi
, fcc
->variant
->u
.fs
.flatshade
);
1785 fs_setup_shader_out(fcc
->shader
, &fcc
->tgsi
);
1787 if (fcc
->variant
->use_pcb
&& !fcc
->tgsi
.const_indirect
) {
1788 num_consts
= (fcc
->tgsi
.const_count
+ 1) / 2;
1791 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1793 * "The sum of all four read length fields (each incremented to
1794 * represent the actual read length) must be less than or equal to
1797 * Since we are usually under a high register pressure, do not allow
1807 fcc
->shader
->skip_cbuf0_upload
= (!fcc
->tgsi
.const_count
|| num_consts
);
1808 fcc
->shader
->pcb
.cbuf0_size
= num_consts
* (sizeof(float) * 8);
1810 fcc
->first_const_grf
= fs_setup_payloads(fcc
);
1811 fcc
->first_attr_grf
= fcc
->first_const_grf
+ num_consts
;
1812 fcc
->first_free_grf
= fcc
->first_attr_grf
+ fcc
->shader
->in
.count
* 2;
1813 fcc
->last_free_grf
= 127;
1815 /* m0 is reserved for system routines */
1816 fcc
->first_free_mrf
= 1;
1817 fcc
->last_free_mrf
= 15;
1819 /* instructions are compressed with BRW_EXECUTE_16 */
1820 fcc
->num_grf_per_vrf
=
1821 (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
) ? 2 : 1;
1823 if (fcc
->tc
.dev
->gen
>= ILO_GEN(7)) {
1824 fcc
->last_free_grf
-= 15;
1825 fcc
->first_free_mrf
= fcc
->last_free_grf
+ 1;
1826 fcc
->last_free_mrf
= fcc
->first_free_mrf
+ 14;
1829 fcc
->shader
->in
.start_grf
= fcc
->first_const_grf
;
1830 fcc
->shader
->has_kill
= fcc
->tgsi
.uses_kill
;
1831 fcc
->shader
->dispatch_16
=
1832 (fcc
->dispatch_mode
== GEN6_WM_16_DISPATCH_ENABLE
);
1838 * Compile the fragment shader.
1841 ilo_shader_compile_fs(const struct ilo_shader_state
*state
,
1842 const struct ilo_shader_variant
*variant
)
1844 struct fs_compile_context fcc
;
1846 if (!fs_setup(&fcc
, state
, variant
))
1851 if (!fs_compile(&fcc
)) {
1856 toy_tgsi_cleanup(&fcc
.tgsi
);
1857 toy_compiler_cleanup(&fcc
.tc
);