2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @file gen8_instruction.c
27 * A representation of a Gen8+ EU instruction, with helper methods to get
28 * and set various fields. This is the actual hardware format.
31 #include "main/compiler.h"
32 #include "brw_defines.h"
33 #include "gen8_instruction.h"
36 gen8_convert_mrf_to_grf(struct brw_reg
*reg
)
38 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
39 * "The send with EOT should use register space R112-R127 for <src>. This is
40 * to enable loading of a new thread into the same slot while the message
41 * with EOT for current thread is pending dispatch."
43 * Since we're pretending to have 16 MRFs anyway, we may as well use the
44 * registers required for messages with EOT.
46 if (reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
47 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
48 reg
->nr
+= GEN7_MRF_HACK_START
;
53 gen8_set_dst(const struct brw_context
*brw
,
54 struct gen8_instruction
*inst
,
57 gen8_convert_mrf_to_grf(®
);
59 if (reg
.file
== BRW_GENERAL_REGISTER_FILE
)
60 assert(reg
.nr
< BRW_MAX_GRF
);
62 gen8_set_dst_reg_file(inst
, reg
.file
);
63 gen8_set_dst_reg_type(inst
, brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
));
64 gen8_set_dst_address_mode(inst
, reg
.address_mode
);
66 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
67 gen8_set_dst_da_reg_nr(inst
, reg
.nr
);
69 if (gen8_access_mode(inst
) == BRW_ALIGN_1
) {
70 /* Set Dst.SubRegNum[4:0] */
71 gen8_set_dst_da1_subreg_nr(inst
, reg
.subnr
);
73 /* Set Dst.HorzStride */
74 if (reg
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
75 reg
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
76 gen8_set_dst_da1_hstride(inst
, reg
.hstride
);
78 /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
79 assert(reg
.subnr
== 0 || reg
.subnr
== 16);
80 gen8_set_dst_da16_subreg_nr(inst
, reg
.subnr
>> 4);
81 gen8_set_da16_writemask(inst
, reg
.dw1
.bits
.writemask
);
84 /* Indirect addressing */
85 assert(gen8_access_mode(inst
) == BRW_ALIGN_1
);
87 /* Set Dst.HorzStride */
88 if (reg
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
89 reg
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
90 gen8_set_dst_da1_hstride(inst
, reg
.hstride
);
91 gen8_set_dst_ia1_subreg_nr(inst
, reg
.subnr
);
92 gen8_set_dst_ia1_addr_imm(inst
, reg
.dw1
.bits
.indirect_offset
);
95 /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
96 * or 16 (SIMD16), as that's normally correct. However, when dealing with
97 * small registers, we automatically reduce it to match the register size.
99 if (reg
.width
< BRW_EXECUTE_8
)
100 gen8_set_exec_size(inst
, reg
.width
);
104 gen8_validate_reg(struct gen8_instruction
*inst
, struct brw_reg reg
)
106 int hstride_for_reg
[] = {0, 1, 2, 4};
107 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
108 int width_for_reg
[] = {1, 2, 4, 8, 16};
109 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
110 int width
, hstride
, vstride
, execsize
;
112 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
113 /* TODO: check immediate vectors */
117 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
)
120 assert(reg
.hstride
>= 0 && reg
.hstride
< ARRAY_SIZE(hstride_for_reg
));
121 hstride
= hstride_for_reg
[reg
.hstride
];
123 if (reg
.vstride
== 0xf) {
126 assert(reg
.vstride
>= 0 && reg
.vstride
< ARRAY_SIZE(vstride_for_reg
));
127 vstride
= vstride_for_reg
[reg
.vstride
];
130 assert(reg
.width
>= 0 && reg
.width
< ARRAY_SIZE(width_for_reg
));
131 width
= width_for_reg
[reg
.width
];
133 assert(gen8_exec_size(inst
) >= 0 &&
134 gen8_exec_size(inst
) < ARRAY_SIZE(execsize_for_reg
));
135 execsize
= execsize_for_reg
[gen8_exec_size(inst
)];
137 /* Restrictions from 3.3.10: Register Region Restrictions. */
139 assert(execsize
>= width
);
142 if (execsize
== width
&& hstride
!= 0) {
143 assert(vstride
== -1 || vstride
== width
* hstride
);
147 if (execsize
== width
&& hstride
== 0) {
148 /* no restriction on vstride. */
153 assert(hstride
== 0);
157 if (execsize
== 1 && width
== 1) {
158 assert(hstride
== 0);
159 assert(vstride
== 0);
163 if (vstride
== 0 && hstride
== 0) {
167 /* 10. Check destination issues. */
171 gen8_set_src0(const struct brw_context
*brw
,
172 struct gen8_instruction
*inst
,
175 gen8_convert_mrf_to_grf(®
);
177 if (reg
.file
== BRW_GENERAL_REGISTER_FILE
)
178 assert(reg
.nr
< BRW_MAX_GRF
);
180 gen8_validate_reg(inst
, reg
);
182 gen8_set_src0_reg_file(inst
, reg
.file
);
183 gen8_set_src0_reg_type(inst
,
184 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
));
185 gen8_set_src0_abs(inst
, reg
.abs
);
186 gen8_set_src0_negate(inst
, reg
.negate
);
188 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
189 inst
->data
[3] = reg
.dw1
.ud
;
191 /* Required to set some fields in src1 as well: */
192 gen8_set_src1_reg_file(inst
, BRW_ARCHITECTURE_REGISTER_FILE
);
193 gen8_set_src1_reg_type(inst
,
194 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
));
198 gen8_set_src0_address_mode(inst
, reg
.address_mode
);
200 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
201 gen8_set_src0_da_reg_nr(inst
, reg
.nr
);
203 if (gen8_access_mode(inst
) == BRW_ALIGN_1
) {
204 /* Set Src0.SubRegNum[4:0] */
205 gen8_set_src0_da1_subreg_nr(inst
, reg
.subnr
);
207 if (reg
.width
== BRW_WIDTH_1
&& gen8_exec_size(inst
) == BRW_EXECUTE_1
) {
208 gen8_set_src0_da1_hstride(inst
, BRW_HORIZONTAL_STRIDE_0
);
209 gen8_set_src0_vert_stride(inst
, BRW_VERTICAL_STRIDE_0
);
211 gen8_set_src0_da1_hstride(inst
, reg
.hstride
);
212 gen8_set_src0_vert_stride(inst
, reg
.vstride
);
214 gen8_set_src0_da1_width(inst
, reg
.width
);
217 /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
218 assert(reg
.subnr
== 0 || reg
.subnr
== 16);
219 gen8_set_src0_da16_subreg_nr(inst
, reg
.subnr
>> 4);
221 gen8_set_src0_da16_swiz_x(inst
,
222 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
224 gen8_set_src0_da16_swiz_y(inst
,
225 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
227 gen8_set_src0_da16_swiz_z(inst
,
228 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
230 gen8_set_src0_da16_swiz_w(inst
,
231 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
234 /* This is an oddity of the fact that we're using the same
235 * descriptions for registers in both Align16 and Align1 modes.
237 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
238 gen8_set_src0_vert_stride(inst
, BRW_VERTICAL_STRIDE_4
);
240 gen8_set_src0_vert_stride(inst
, reg
.vstride
);
243 /* Indirect addressing */
244 assert(gen8_access_mode(inst
) == BRW_ALIGN_1
);
245 if (reg
.width
== BRW_WIDTH_1
&&
246 gen8_exec_size(inst
) == BRW_EXECUTE_1
) {
247 gen8_set_src0_da1_hstride(inst
, BRW_HORIZONTAL_STRIDE_0
);
248 gen8_set_src0_vert_stride(inst
, BRW_VERTICAL_STRIDE_0
);
250 gen8_set_src0_da1_hstride(inst
, reg
.hstride
);
251 gen8_set_src0_vert_stride(inst
, reg
.vstride
);
254 gen8_set_src0_da1_width(inst
, reg
.width
);
255 gen8_set_src0_ia1_subreg_nr(inst
, reg
.subnr
);
256 gen8_set_src0_ia1_addr_imm(inst
, reg
.dw1
.bits
.indirect_offset
);
261 gen8_set_src1(const struct brw_context
*brw
,
262 struct gen8_instruction
*inst
,
265 gen8_convert_mrf_to_grf(®
);
267 if (reg
.file
== BRW_GENERAL_REGISTER_FILE
)
268 assert(reg
.nr
< BRW_MAX_GRF
);
270 gen8_validate_reg(inst
, reg
);
272 gen8_set_src1_reg_file(inst
, reg
.file
);
273 gen8_set_src1_reg_type(inst
,
274 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
));
275 gen8_set_src1_abs(inst
, reg
.abs
);
276 gen8_set_src1_negate(inst
, reg
.negate
);
278 /* Only src1 can be an immediate in two-argument instructions. */
279 assert(gen8_src0_reg_file(inst
) != BRW_IMMEDIATE_VALUE
);
281 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
282 inst
->data
[3] = reg
.dw1
.ud
;
286 gen8_set_src1_address_mode(inst
, reg
.address_mode
);
288 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
289 gen8_set_src1_da_reg_nr(inst
, reg
.nr
);
291 if (gen8_access_mode(inst
) == BRW_ALIGN_1
) {
292 /* Set Src0.SubRegNum[4:0] */
293 gen8_set_src1_da1_subreg_nr(inst
, reg
.subnr
);
295 if (reg
.width
== BRW_WIDTH_1
&& gen8_exec_size(inst
) == BRW_EXECUTE_1
) {
296 gen8_set_src1_da1_hstride(inst
, BRW_HORIZONTAL_STRIDE_0
);
297 gen8_set_src1_vert_stride(inst
, BRW_VERTICAL_STRIDE_0
);
299 gen8_set_src1_da1_hstride(inst
, reg
.hstride
);
300 gen8_set_src1_vert_stride(inst
, reg
.vstride
);
302 gen8_set_src1_da1_width(inst
, reg
.width
);
304 /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
305 assert(reg
.subnr
== 0 || reg
.subnr
== 16);
306 gen8_set_src1_da16_subreg_nr(inst
, reg
.subnr
>> 4);
308 gen8_set_src1_da16_swiz_x(inst
,
309 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
311 gen8_set_src1_da16_swiz_y(inst
,
312 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
314 gen8_set_src1_da16_swiz_z(inst
,
315 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
317 gen8_set_src1_da16_swiz_w(inst
,
318 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
321 /* This is an oddity of the fact that we're using the same
322 * descriptions for registers in both Align16 and Align1 modes.
324 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
325 gen8_set_src1_vert_stride(inst
, BRW_VERTICAL_STRIDE_4
);
327 gen8_set_src1_vert_stride(inst
, reg
.vstride
);
330 /* Indirect addressing */
331 assert(gen8_access_mode(inst
) == BRW_ALIGN_1
);
332 if (reg
.width
== BRW_WIDTH_1
&& gen8_exec_size(inst
) == BRW_EXECUTE_1
) {
333 gen8_set_src1_da1_hstride(inst
, BRW_HORIZONTAL_STRIDE_0
);
334 gen8_set_src1_vert_stride(inst
, BRW_VERTICAL_STRIDE_0
);
336 gen8_set_src1_da1_hstride(inst
, reg
.hstride
);
337 gen8_set_src1_vert_stride(inst
, reg
.vstride
);
340 gen8_set_src1_da1_width(inst
, reg
.width
);
341 gen8_set_src1_ia1_subreg_nr(inst
, reg
.subnr
);
342 gen8_set_src1_ia1_addr_imm(inst
, reg
.dw1
.bits
.indirect_offset
);
347 * Set the Message Descriptor and Extended Message Descriptor fields
350 * \note This zeroes out the Function Control bits, so it must be called
351 * \b before filling out any message-specific data. Callers can
352 * choose not to fill in irrelevant bits; they will be zero.
355 gen8_set_message_descriptor(const struct brw_context
*brw
,
356 struct gen8_instruction
*inst
,
357 enum brw_message_target sfid
,
359 unsigned response_length
,
363 gen8_set_src1(brw
, inst
, brw_imm_d(0));
365 gen8_set_sfid(inst
, sfid
);
366 gen8_set_mlen(inst
, msg_length
);
367 gen8_set_rlen(inst
, response_length
);
368 gen8_set_header_present(inst
, header_present
);
369 gen8_set_eot(inst
, end_of_thread
);
373 gen8_set_urb_message(const struct brw_context
*brw
,
374 struct gen8_instruction
*inst
,
375 enum brw_urb_write_flags flags
,
377 unsigned response_length
,
381 gen8_set_message_descriptor(brw
, inst
, BRW_SFID_URB
,
382 msg_length
, response_length
,
383 true, flags
& BRW_URB_WRITE_EOT
);
384 gen8_set_src0(brw
, inst
, brw_vec8_grf(GEN7_MRF_HACK_START
+ 1, 0));
385 if (flags
& BRW_URB_WRITE_OWORD
) {
386 assert(msg_length
== 2);
387 gen8_set_urb_opcode(inst
, BRW_URB_OPCODE_WRITE_OWORD
);
389 gen8_set_urb_opcode(inst
, BRW_URB_OPCODE_WRITE_HWORD
);
391 gen8_set_urb_global_offset(inst
, offset
);
392 gen8_set_urb_interleave(inst
, interleave
);
393 gen8_set_urb_per_slot_offset(inst
,
394 flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
? 1 : 0);
398 gen8_set_sampler_message(const struct brw_context
*brw
,
399 struct gen8_instruction
*inst
,
400 unsigned binding_table_index
,
403 unsigned response_length
,
408 gen8_set_message_descriptor(brw
, inst
, BRW_SFID_SAMPLER
, msg_length
,
409 response_length
, header_present
, false);
411 gen8_set_binding_table_index(inst
, binding_table_index
);
412 gen8_set_sampler(inst
, sampler
);
413 gen8_set_sampler_msg_type(inst
, msg_type
);
414 gen8_set_sampler_simd_mode(inst
, simd_mode
);
418 gen8_set_dp_message(const struct brw_context
*brw
,
419 struct gen8_instruction
*inst
,
420 enum brw_message_target sfid
,
421 unsigned binding_table_index
,
423 unsigned msg_control
,
429 gen8_set_message_descriptor(brw
, inst
, sfid
, mlen
, rlen
, header_present
,
431 gen8_set_binding_table_index(inst
, binding_table_index
);
432 gen8_set_dp_message_type(inst
, msg_type
);
433 gen8_set_dp_message_control(inst
, msg_control
);
437 gen8_set_dp_scratch_message(const struct brw_context
*brw
,
438 struct gen8_instruction
*inst
,
441 bool invalidate_after_read
,
443 unsigned addr_offset
,
449 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4 || num_regs
== 8);
450 gen8_set_message_descriptor(brw
, inst
, GEN7_SFID_DATAPORT_DATA_CACHE
,
451 mlen
, rlen
, header_present
, end_of_thread
);
452 gen8_set_dp_category(inst
, 1); /* Scratch Block Read/Write messages */
453 gen8_set_scratch_read_write(inst
, write
);
454 gen8_set_scratch_type(inst
, dword
);
455 gen8_set_scratch_invalidate_after_read(inst
, invalidate_after_read
);
456 gen8_set_scratch_block_size(inst
, ffs(num_regs
) - 1);
457 gen8_set_scratch_addr_offset(inst
, addr_offset
);