2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @file gen8_instruction.c
27 * A representation of a Gen8+ EU instruction, with helper methods to get
28 * and set various fields. This is the actual hardware format.
31 #include "brw_defines.h"
32 #include "gen8_instruction.h"
35 gen8_convert_mrf_to_grf(struct brw_reg
*reg
)
37 /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
38 * "The send with EOT should use register space R112-R127 for <src>. This is
39 * to enable loading of a new thread into the same slot while the message
40 * with EOT for current thread is pending dispatch."
42 * Since we're pretending to have 16 MRFs anyway, we may as well use the
43 * registers required for messages with EOT.
45 if (reg
->file
== BRW_MESSAGE_REGISTER_FILE
) {
46 reg
->file
= BRW_GENERAL_REGISTER_FILE
;
47 reg
->nr
+= GEN7_MRF_HACK_START
;
52 gen8_set_dst(const struct brw_context
*brw
,
53 struct gen8_instruction
*inst
,
56 gen8_convert_mrf_to_grf(®
);
58 if (reg
.file
== BRW_GENERAL_REGISTER_FILE
)
59 assert(reg
.nr
< BRW_MAX_GRF
);
61 gen8_set_dst_reg_file(inst
, reg
.file
);
62 gen8_set_dst_reg_type(inst
, brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
));
63 gen8_set_dst_address_mode(inst
, reg
.address_mode
);
65 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
66 gen8_set_dst_da_reg_nr(inst
, reg
.nr
);
68 if (gen8_access_mode(inst
) == BRW_ALIGN_1
) {
69 /* Set Dst.SubRegNum[4:0] */
70 gen8_set_dst_da1_subreg_nr(inst
, reg
.subnr
);
72 /* Set Dst.HorzStride */
73 if (reg
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
74 reg
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
75 gen8_set_dst_da1_hstride(inst
, reg
.hstride
);
77 /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
78 assert(reg
.subnr
== 0 || reg
.subnr
== 16);
79 gen8_set_dst_da16_subreg_nr(inst
, reg
.subnr
>> 4);
80 gen8_set_da16_writemask(inst
, reg
.dw1
.bits
.writemask
);
83 /* Indirect addressing */
84 assert(gen8_access_mode(inst
) == BRW_ALIGN_1
);
86 /* Set Dst.HorzStride */
87 if (reg
.hstride
== BRW_HORIZONTAL_STRIDE_0
)
88 reg
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
89 gen8_set_dst_da1_hstride(inst
, reg
.hstride
);
90 gen8_set_dst_ia1_subreg_nr(inst
, reg
.subnr
);
91 gen8_set_dst_ia1_addr_imm(inst
, reg
.dw1
.bits
.indirect_offset
);
94 /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
95 * or 16 (SIMD16), as that's normally correct. However, when dealing with
96 * small registers, we automatically reduce it to match the register size.
98 if (reg
.width
< BRW_EXECUTE_8
)
99 gen8_set_exec_size(inst
, reg
.width
);
103 gen8_validate_reg(struct gen8_instruction
*inst
, struct brw_reg reg
)
105 int hstride_for_reg
[] = {0, 1, 2, 4};
106 int vstride_for_reg
[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
107 int width_for_reg
[] = {1, 2, 4, 8, 16};
108 int execsize_for_reg
[] = {1, 2, 4, 8, 16};
109 int width
, hstride
, vstride
, execsize
;
111 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
112 /* TODO: check immediate vectors */
116 if (reg
.file
== BRW_ARCHITECTURE_REGISTER_FILE
)
119 assert(reg
.hstride
>= 0 && reg
.hstride
< ARRAY_SIZE(hstride_for_reg
));
120 hstride
= hstride_for_reg
[reg
.hstride
];
122 if (reg
.vstride
== 0xf) {
125 assert(reg
.vstride
>= 0 && reg
.vstride
< ARRAY_SIZE(vstride_for_reg
));
126 vstride
= vstride_for_reg
[reg
.vstride
];
129 assert(reg
.width
>= 0 && reg
.width
< ARRAY_SIZE(width_for_reg
));
130 width
= width_for_reg
[reg
.width
];
132 assert(gen8_exec_size(inst
) >= 0 &&
133 gen8_exec_size(inst
) < ARRAY_SIZE(execsize_for_reg
));
134 execsize
= execsize_for_reg
[gen8_exec_size(inst
)];
136 /* Restrictions from 3.3.10: Register Region Restrictions. */
138 assert(execsize
>= width
);
141 if (execsize
== width
&& hstride
!= 0) {
142 assert(vstride
== -1 || vstride
== width
* hstride
);
146 if (execsize
== width
&& hstride
== 0) {
147 /* no restriction on vstride. */
152 assert(hstride
== 0);
156 if (execsize
== 1 && width
== 1) {
157 assert(hstride
== 0);
158 assert(vstride
== 0);
162 if (vstride
== 0 && hstride
== 0) {
166 /* 10. Check destination issues. */
170 gen8_set_src0(const struct brw_context
*brw
,
171 struct gen8_instruction
*inst
,
174 gen8_convert_mrf_to_grf(®
);
176 if (reg
.file
== BRW_GENERAL_REGISTER_FILE
)
177 assert(reg
.nr
< BRW_MAX_GRF
);
179 gen8_validate_reg(inst
, reg
);
181 gen8_set_src0_reg_file(inst
, reg
.file
);
182 gen8_set_src0_reg_type(inst
,
183 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
));
184 gen8_set_src0_abs(inst
, reg
.abs
);
185 gen8_set_src0_negate(inst
, reg
.negate
);
187 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
188 inst
->data
[3] = reg
.dw1
.ud
;
190 /* Required to set some fields in src1 as well: */
191 gen8_set_src1_reg_file(inst
, BRW_ARCHITECTURE_REGISTER_FILE
);
192 gen8_set_src1_reg_type(inst
,
193 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
));
197 gen8_set_src0_address_mode(inst
, reg
.address_mode
);
199 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
200 gen8_set_src0_da_reg_nr(inst
, reg
.nr
);
202 if (gen8_access_mode(inst
) == BRW_ALIGN_1
) {
203 /* Set Src0.SubRegNum[4:0] */
204 gen8_set_src0_da1_subreg_nr(inst
, reg
.subnr
);
206 if (reg
.width
== BRW_WIDTH_1
&& gen8_exec_size(inst
) == BRW_EXECUTE_1
) {
207 gen8_set_src0_da1_hstride(inst
, BRW_HORIZONTAL_STRIDE_0
);
208 gen8_set_src0_vert_stride(inst
, BRW_VERTICAL_STRIDE_0
);
210 gen8_set_src0_da1_hstride(inst
, reg
.hstride
);
211 gen8_set_src0_vert_stride(inst
, reg
.vstride
);
213 gen8_set_src0_da1_width(inst
, reg
.width
);
216 /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
217 assert(reg
.subnr
== 0 || reg
.subnr
== 16);
218 gen8_set_src0_da16_subreg_nr(inst
, reg
.subnr
>> 4);
220 gen8_set_src0_da16_swiz_x(inst
,
221 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
223 gen8_set_src0_da16_swiz_y(inst
,
224 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
226 gen8_set_src0_da16_swiz_z(inst
,
227 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
229 gen8_set_src0_da16_swiz_w(inst
,
230 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
233 /* This is an oddity of the fact that we're using the same
234 * descriptions for registers in both Align16 and Align1 modes.
236 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
237 gen8_set_src0_vert_stride(inst
, BRW_VERTICAL_STRIDE_4
);
239 gen8_set_src0_vert_stride(inst
, reg
.vstride
);
242 /* Indirect addressing */
243 assert(gen8_access_mode(inst
) == BRW_ALIGN_1
);
244 if (reg
.width
== BRW_WIDTH_1
&&
245 gen8_exec_size(inst
) == BRW_EXECUTE_1
) {
246 gen8_set_src0_da1_hstride(inst
, BRW_HORIZONTAL_STRIDE_0
);
247 gen8_set_src0_vert_stride(inst
, BRW_VERTICAL_STRIDE_0
);
249 gen8_set_src0_da1_hstride(inst
, reg
.hstride
);
250 gen8_set_src0_vert_stride(inst
, reg
.vstride
);
253 gen8_set_src0_da1_width(inst
, reg
.width
);
254 gen8_set_src0_ia1_subreg_nr(inst
, reg
.subnr
);
255 gen8_set_src0_ia1_addr_imm(inst
, reg
.dw1
.bits
.indirect_offset
);
260 gen8_set_src1(const struct brw_context
*brw
,
261 struct gen8_instruction
*inst
,
264 gen8_convert_mrf_to_grf(®
);
266 if (reg
.file
== BRW_GENERAL_REGISTER_FILE
)
267 assert(reg
.nr
< BRW_MAX_GRF
);
269 gen8_validate_reg(inst
, reg
);
271 gen8_set_src1_reg_file(inst
, reg
.file
);
272 gen8_set_src1_reg_type(inst
,
273 brw_reg_type_to_hw_type(brw
, reg
.type
, reg
.file
));
274 gen8_set_src1_abs(inst
, reg
.abs
);
275 gen8_set_src1_negate(inst
, reg
.negate
);
277 /* Only src1 can be an immediate in two-argument instructions. */
278 assert(gen8_src0_reg_file(inst
) != BRW_IMMEDIATE_VALUE
);
280 if (reg
.file
== BRW_IMMEDIATE_VALUE
) {
281 inst
->data
[3] = reg
.dw1
.ud
;
285 gen8_set_src1_address_mode(inst
, reg
.address_mode
);
287 if (reg
.address_mode
== BRW_ADDRESS_DIRECT
) {
288 gen8_set_src1_da_reg_nr(inst
, reg
.nr
);
290 if (gen8_access_mode(inst
) == BRW_ALIGN_1
) {
291 /* Set Src0.SubRegNum[4:0] */
292 gen8_set_src1_da1_subreg_nr(inst
, reg
.subnr
);
294 if (reg
.width
== BRW_WIDTH_1
&& gen8_exec_size(inst
) == BRW_EXECUTE_1
) {
295 gen8_set_src1_da1_hstride(inst
, BRW_HORIZONTAL_STRIDE_0
);
296 gen8_set_src1_vert_stride(inst
, BRW_VERTICAL_STRIDE_0
);
298 gen8_set_src1_da1_hstride(inst
, reg
.hstride
);
299 gen8_set_src1_vert_stride(inst
, reg
.vstride
);
301 gen8_set_src1_da1_width(inst
, reg
.width
);
303 /* Align16 SubRegNum only has a single bit (bit 4; bits 3:0 MBZ). */
304 assert(reg
.subnr
== 0 || reg
.subnr
== 16);
305 gen8_set_src1_da16_subreg_nr(inst
, reg
.subnr
>> 4);
307 gen8_set_src1_da16_swiz_x(inst
,
308 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
310 gen8_set_src1_da16_swiz_y(inst
,
311 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
313 gen8_set_src1_da16_swiz_z(inst
,
314 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
316 gen8_set_src1_da16_swiz_w(inst
,
317 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
,
320 /* This is an oddity of the fact that we're using the same
321 * descriptions for registers in both Align16 and Align1 modes.
323 if (reg
.vstride
== BRW_VERTICAL_STRIDE_8
)
324 gen8_set_src1_vert_stride(inst
, BRW_VERTICAL_STRIDE_4
);
326 gen8_set_src1_vert_stride(inst
, reg
.vstride
);
329 /* Indirect addressing */
330 assert(gen8_access_mode(inst
) == BRW_ALIGN_1
);
331 if (reg
.width
== BRW_WIDTH_1
&& gen8_exec_size(inst
) == BRW_EXECUTE_1
) {
332 gen8_set_src1_da1_hstride(inst
, BRW_HORIZONTAL_STRIDE_0
);
333 gen8_set_src1_vert_stride(inst
, BRW_VERTICAL_STRIDE_0
);
335 gen8_set_src1_da1_hstride(inst
, reg
.hstride
);
336 gen8_set_src1_vert_stride(inst
, reg
.vstride
);
339 gen8_set_src1_da1_width(inst
, reg
.width
);
340 gen8_set_src1_ia1_subreg_nr(inst
, reg
.subnr
);
341 gen8_set_src1_ia1_addr_imm(inst
, reg
.dw1
.bits
.indirect_offset
);
346 * Set the Message Descriptor and Extended Message Descriptor fields
349 * \note This zeroes out the Function Control bits, so it must be called
350 * \b before filling out any message-specific data. Callers can
351 * choose not to fill in irrelevant bits; they will be zero.
354 gen8_set_message_descriptor(const struct brw_context
*brw
,
355 struct gen8_instruction
*inst
,
356 enum brw_message_target sfid
,
358 unsigned response_length
,
362 gen8_set_src1(brw
, inst
, brw_imm_d(0));
364 gen8_set_sfid(inst
, sfid
);
365 gen8_set_mlen(inst
, msg_length
);
366 gen8_set_rlen(inst
, response_length
);
367 gen8_set_header_present(inst
, header_present
);
368 gen8_set_eot(inst
, end_of_thread
);
372 gen8_set_urb_message(const struct brw_context
*brw
,
373 struct gen8_instruction
*inst
,
374 enum brw_urb_write_flags flags
,
376 unsigned response_length
,
380 gen8_set_message_descriptor(brw
, inst
, BRW_SFID_URB
,
381 msg_length
, response_length
,
382 true, flags
& BRW_URB_WRITE_EOT
);
383 gen8_set_src0(brw
, inst
, brw_vec8_grf(GEN7_MRF_HACK_START
+ 1, 0));
384 if (flags
& BRW_URB_WRITE_OWORD
) {
385 assert(msg_length
== 2);
386 gen8_set_urb_opcode(inst
, BRW_URB_OPCODE_WRITE_OWORD
);
388 gen8_set_urb_opcode(inst
, BRW_URB_OPCODE_WRITE_HWORD
);
390 gen8_set_urb_global_offset(inst
, offset
);
391 gen8_set_urb_interleave(inst
, interleave
);
392 gen8_set_urb_per_slot_offset(inst
,
393 flags
& BRW_URB_WRITE_PER_SLOT_OFFSET
? 1 : 0);
397 gen8_set_sampler_message(const struct brw_context
*brw
,
398 struct gen8_instruction
*inst
,
399 unsigned binding_table_index
,
402 unsigned response_length
,
407 gen8_set_message_descriptor(brw
, inst
, BRW_SFID_SAMPLER
, msg_length
,
408 response_length
, header_present
, false);
410 gen8_set_binding_table_index(inst
, binding_table_index
);
411 gen8_set_sampler(inst
, sampler
);
412 gen8_set_sampler_msg_type(inst
, msg_type
);
413 gen8_set_sampler_simd_mode(inst
, simd_mode
);
417 gen8_set_dp_message(const struct brw_context
*brw
,
418 struct gen8_instruction
*inst
,
419 enum brw_message_target sfid
,
420 unsigned binding_table_index
,
422 unsigned msg_control
,
428 gen8_set_message_descriptor(brw
, inst
, sfid
, mlen
, rlen
, header_present
,
430 gen8_set_binding_table_index(inst
, binding_table_index
);
431 gen8_set_dp_message_type(inst
, msg_type
);
432 gen8_set_dp_message_control(inst
, msg_control
);
436 gen8_set_dp_scratch_message(const struct brw_context
*brw
,
437 struct gen8_instruction
*inst
,
440 bool invalidate_after_read
,
442 unsigned addr_offset
,
448 assert(num_regs
== 1 || num_regs
== 2 || num_regs
== 4 || num_regs
== 8);
449 gen8_set_message_descriptor(brw
, inst
, GEN7_SFID_DATAPORT_DATA_CACHE
,
450 mlen
, rlen
, header_present
, end_of_thread
);
451 gen8_set_dp_category(inst
, 1); /* Scratch Block Read/Write messages */
452 gen8_set_scratch_read_write(inst
, write
);
453 gen8_set_scratch_type(inst
, dword
);
454 gen8_set_scratch_invalidate_after_read(inst
, invalidate_after_read
);
455 gen8_set_scratch_block_size(inst
, ffs(num_regs
) - 1);
456 gen8_set_scratch_addr_offset(inst
, addr_offset
);