2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
28 #include "main/macros.h"
29 #include "program/program.h"
30 #include "program/prog_print.h"
31 #include "brw_context.h"
32 #include "brw_defines.h"
40 } inst_opcode
[128] = {
41 [BRW_OPCODE_MOV
] = { .name
= "mov", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
42 [BRW_OPCODE_FRC
] = { .name
= "frc", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
43 [BRW_OPCODE_RNDU
] = { .name
= "rndu", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
44 [BRW_OPCODE_RNDD
] = { .name
= "rndd", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
45 [BRW_OPCODE_RNDE
] = { .name
= "rnde", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
46 [BRW_OPCODE_RNDZ
] = { .name
= "rndz", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
47 [BRW_OPCODE_NOT
] = { .name
= "not", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
48 [BRW_OPCODE_LZD
] = { .name
= "lzd", .nsrc
= 1, .ndst
= 1 },
50 [BRW_OPCODE_MUL
] = { .name
= "mul", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
51 [BRW_OPCODE_MAC
] = { .name
= "mac", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
52 [BRW_OPCODE_MACH
] = { .name
= "mach", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
53 [BRW_OPCODE_LINE
] = { .name
= "line", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
54 [BRW_OPCODE_PLN
] = { .name
= "pln", .nsrc
= 2, .ndst
= 1 },
55 [BRW_OPCODE_SAD2
] = { .name
= "sad2", .nsrc
= 2, .ndst
= 1 },
56 [BRW_OPCODE_SADA2
] = { .name
= "sada2", .nsrc
= 2, .ndst
= 1 },
57 [BRW_OPCODE_DP4
] = { .name
= "dp4", .nsrc
= 2, .ndst
= 1 },
58 [BRW_OPCODE_DPH
] = { .name
= "dph", .nsrc
= 2, .ndst
= 1 },
59 [BRW_OPCODE_DP3
] = { .name
= "dp3", .nsrc
= 2, .ndst
= 1 },
60 [BRW_OPCODE_DP2
] = { .name
= "dp2", .nsrc
= 2, .ndst
= 1 },
61 [BRW_OPCODE_MATH
] = { .name
= "math", .nsrc
= 2, .ndst
= 1 },
63 [BRW_OPCODE_AVG
] = { .name
= "avg", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
64 [BRW_OPCODE_ADD
] = { .name
= "add", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
65 [BRW_OPCODE_SEL
] = { .name
= "sel", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
66 [BRW_OPCODE_AND
] = { .name
= "and", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
67 [BRW_OPCODE_OR
] = { .name
= "or", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
68 [BRW_OPCODE_XOR
] = { .name
= "xor", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
69 [BRW_OPCODE_SHR
] = { .name
= "shr", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
70 [BRW_OPCODE_SHL
] = { .name
= "shl", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
71 [BRW_OPCODE_ASR
] = { .name
= "asr", .nsrc
= 2, .ndst
= 1 },
72 [BRW_OPCODE_CMP
] = { .name
= "cmp", .nsrc
= 2, .ndst
= 1 },
73 [BRW_OPCODE_CMPN
] = { .name
= "cmpn", .nsrc
= 2, .ndst
= 1 },
75 [BRW_OPCODE_SEND
] = { .name
= "send", .nsrc
= 1, .ndst
= 1 },
76 [BRW_OPCODE_NOP
] = { .name
= "nop", .nsrc
= 0, .ndst
= 0 },
77 [BRW_OPCODE_JMPI
] = { .name
= "jmpi", .nsrc
= 1, .ndst
= 0 },
78 [BRW_OPCODE_IF
] = { .name
= "if", .nsrc
= 2, .ndst
= 0 },
79 [BRW_OPCODE_IFF
] = { .name
= "iff", .nsrc
= 2, .ndst
= 1 },
80 [BRW_OPCODE_WHILE
] = { .name
= "while", .nsrc
= 2, .ndst
= 0 },
81 [BRW_OPCODE_ELSE
] = { .name
= "else", .nsrc
= 2, .ndst
= 0 },
82 [BRW_OPCODE_BREAK
] = { .name
= "break", .nsrc
= 2, .ndst
= 0 },
83 [BRW_OPCODE_CONTINUE
] = { .name
= "cont", .nsrc
= 1, .ndst
= 0 },
84 [BRW_OPCODE_HALT
] = { .name
= "halt", .nsrc
= 1, .ndst
= 0 },
85 [BRW_OPCODE_MSAVE
] = { .name
= "msave", .nsrc
= 1, .ndst
= 1 },
86 [BRW_OPCODE_PUSH
] = { .name
= "push", .nsrc
= 1, .ndst
= 1 },
87 [BRW_OPCODE_MRESTORE
] = { .name
= "mrest", .nsrc
= 1, .ndst
= 1 },
88 [BRW_OPCODE_POP
] = { .name
= "pop", .nsrc
= 2, .ndst
= 0 },
89 [BRW_OPCODE_WAIT
] = { .name
= "wait", .nsrc
= 1, .ndst
= 0 },
90 [BRW_OPCODE_DO
] = { .name
= "do", .nsrc
= 0, .ndst
= 0 },
91 [BRW_OPCODE_ENDIF
] = { .name
= "endif", .nsrc
= 2, .ndst
= 0 },
95 GLboolean
brw_is_arithmetic_inst(const struct brw_instruction
*inst
)
97 return inst_opcode
[inst
->header
.opcode
].is_arith
;
100 static const GLuint inst_stride
[7] = {
110 static const GLuint inst_type_size
[8] = {
111 [BRW_REGISTER_TYPE_UD
] = 4,
112 [BRW_REGISTER_TYPE_D
] = 4,
113 [BRW_REGISTER_TYPE_UW
] = 2,
114 [BRW_REGISTER_TYPE_W
] = 2,
115 [BRW_REGISTER_TYPE_UB
] = 1,
116 [BRW_REGISTER_TYPE_B
] = 1,
117 [BRW_REGISTER_TYPE_F
] = 4
120 static INLINE GLboolean
121 brw_is_grf_written(const struct brw_instruction
*inst
,
122 int reg_index
, int size
,
125 if (inst_opcode
[inst
->header
.opcode
].ndst
== 0)
128 if (inst
->bits1
.da1
.dest_address_mode
!= BRW_ADDRESS_DIRECT
)
129 if (inst
->bits1
.ia1
.dest_reg_file
== BRW_GENERAL_REGISTER_FILE
)
132 if (inst
->bits1
.da1
.dest_reg_file
!= BRW_GENERAL_REGISTER_FILE
)
135 const int reg_start
= reg_index
* REG_SIZE
;
136 const int reg_end
= reg_start
+ size
;
138 const int type_size
= inst_type_size
[inst
->bits1
.da1
.dest_reg_type
];
139 const int write_start
= inst
->bits1
.da1
.dest_reg_nr
*REG_SIZE
140 + inst
->bits1
.da1
.dest_subreg_nr
;
141 int length
, write_end
;
143 /* SEND is specific */
144 if (inst
->header
.opcode
== BRW_OPCODE_SEND
) {
146 length
= inst
->bits3
.generic_gen5
.response_length
*REG_SIZE
;
148 length
= inst
->bits3
.generic
.response_length
*REG_SIZE
;
151 length
= 1 << inst
->header
.execution_size
;
153 length
*= inst
->bits1
.da1
.dest_horiz_stride
;
156 /* If the two intervals intersect, we overwrite the register */
157 write_end
= write_start
+ length
;
158 const int left
= MAX2(write_start
, reg_start
);
159 const int right
= MIN2(write_end
, reg_end
);
164 /* Specific path for message register since we need to handle the compr4 case */
165 static INLINE GLboolean
166 brw_is_mrf_written(const struct brw_instruction
*inst
, int reg_index
, int size
)
168 if (inst_opcode
[inst
->header
.opcode
].ndst
== 0)
171 if (inst
->bits1
.da1
.dest_address_mode
!= BRW_ADDRESS_DIRECT
)
172 if (inst
->bits1
.ia1
.dest_reg_file
== BRW_MESSAGE_REGISTER_FILE
)
175 if (inst
->bits1
.da1
.dest_reg_file
!= BRW_MESSAGE_REGISTER_FILE
)
178 const int reg_start
= reg_index
* REG_SIZE
;
179 const int reg_end
= reg_start
+ size
;
181 const int mrf_index
= inst
->bits1
.da1
.dest_reg_nr
& 0x0f;
182 const int is_compr4
= inst
->bits1
.da1
.dest_reg_nr
& BRW_MRF_COMPR4
;
183 const int type_size
= inst_type_size
[inst
->bits1
.da1
.dest_reg_type
];
185 /* We use compr4 with a size != 16 elements. Strange, we conservatively
186 * consider that we are writing the register.
188 if (is_compr4
&& inst
->header
.execution_size
!= BRW_EXECUTE_16
)
191 GLboolean is_written
= GL_FALSE
;
193 /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */
195 const int length
= 8 * type_size
* inst
->bits1
.da1
.dest_horiz_stride
;
197 /* First 8-way register */
198 const int write_start0
= mrf_index
*REG_SIZE
199 + inst
->bits1
.da1
.dest_subreg_nr
;
200 const int write_end0
= write_start0
+ length
;
202 /* Second 8-way register */
203 const int write_start1
= (mrf_index
+4)*REG_SIZE
204 + inst
->bits1
.da1
.dest_subreg_nr
;
205 const int write_end1
= write_start1
+ length
;
207 /* If the two intervals intersect, we overwrite the register */
208 const int left0
= MAX2(write_start0
, reg_start
);
209 const int right0
= MIN2(write_end0
, reg_end
);
210 const int left1
= MAX2(write_start1
, reg_start
);
211 const int right1
= MIN2(write_end1
, reg_end
);
213 is_written
= left0
< right0
|| left1
< right1
;
217 length
= 1 << inst
->header
.execution_size
;
219 length
*= inst
->bits1
.da1
.dest_horiz_stride
;
221 /* If the two intervals intersect, we write into the register */
222 const int write_start
= inst
->bits1
.da1
.dest_reg_nr
*REG_SIZE
223 + inst
->bits1
.da1
.dest_subreg_nr
;
224 const int write_end
= write_start
+ length
;
225 const int left
= MAX2(write_start
, reg_start
);
226 const int right
= MIN2(write_end
, reg_end
);;
228 is_written
= left
< right
;
231 /* SEND may perform an implicit mov to a mrf register */
232 if (is_written
== GL_FALSE
&&
233 inst
->header
.opcode
== BRW_OPCODE_SEND
&&
234 inst
->bits1
.da1
.src0_reg_file
!= 0) {
236 const int mrf_start
= inst
->header
.destreg__conditionalmod
;
237 const int write_start
= mrf_start
* REG_SIZE
;
238 const int write_end
= write_start
+ REG_SIZE
;
239 const int left
= MAX2(write_start
, reg_start
);
240 const int right
= MIN2(write_end
, reg_end
);;
241 is_written
= left
< right
;
247 static INLINE GLboolean
248 brw_is_mrf_read(const struct brw_instruction
*inst
,
249 int reg_index
, int size
, int gen
)
251 if (inst
->header
.opcode
!= BRW_OPCODE_SEND
)
253 if (inst
->bits2
.da1
.src0_address_mode
!= BRW_ADDRESS_DIRECT
)
256 const int reg_start
= reg_index
*REG_SIZE
;
257 const int reg_end
= reg_start
+ size
;
259 int length
, read_start
, read_end
;
261 length
= inst
->bits3
.generic_gen5
.msg_length
*REG_SIZE
;
263 length
= inst
->bits3
.generic
.msg_length
*REG_SIZE
;
265 /* Look if SEND uses an implicit mov. In that case, we read one less register
268 if (inst
->bits1
.da1
.src0_reg_file
!= 0)
269 read_start
= inst
->header
.destreg__conditionalmod
;
272 read_start
= inst
->header
.destreg__conditionalmod
+ 1;
274 read_start
*= REG_SIZE
;
275 read_end
= read_start
+ length
;
277 const int left
= MAX2(read_start
, reg_start
);
278 const int right
= MIN2(read_end
, reg_end
);
283 static INLINE GLboolean
284 brw_is_grf_read(const struct brw_instruction
*inst
, int reg_index
, int size
)
287 if (inst_opcode
[inst
->header
.opcode
].nsrc
== 0)
290 /* Look at first source. We must take into account register regions to
291 * monitor carefully the read. Note that we are a bit too conservative here
292 * since we do not take into account the fact that some complete registers
295 if (inst_opcode
[inst
->header
.opcode
].nsrc
>= 1) {
297 if (inst
->bits2
.da1
.src0_address_mode
!= BRW_ADDRESS_DIRECT
)
298 if (inst
->bits1
.ia1
.src0_reg_file
== BRW_GENERAL_REGISTER_FILE
)
300 if (inst
->bits1
.da1
.src0_reg_file
!= BRW_GENERAL_REGISTER_FILE
)
303 const int reg_start
= reg_index
*REG_SIZE
;
304 const int reg_end
= reg_start
+ size
;
306 /* See if at least one of this element intersects the interval */
307 const int type_size
= inst_type_size
[inst
->bits1
.da1
.src0_reg_type
];
308 const int elem_num
= 1 << inst
->header
.execution_size
;
309 const int width
= 1 << inst
->bits2
.da1
.src0_width
;
310 const int row_num
= elem_num
>> inst
->bits2
.da1
.src0_width
;
311 const int hs
= type_size
*inst_stride
[inst
->bits2
.da1
.src0_horiz_stride
];
312 const int vs
= type_size
*inst_stride
[inst
->bits2
.da1
.src0_vert_stride
];
313 int row_start
= inst
->bits2
.da1
.src0_reg_nr
*REG_SIZE
314 + inst
->bits2
.da1
.src0_subreg_nr
;
315 for (j
= 0; j
< row_num
; ++j
) {
316 int write_start
= row_start
;
317 for (i
= 0; i
< width
; ++i
) {
318 const int write_end
= write_start
+ type_size
;
319 const int left
= write_start
> reg_start
? write_start
: reg_start
;
320 const int right
= write_end
< reg_end
? write_end
: reg_end
;
329 /* Second src register */
330 if (inst_opcode
[inst
->header
.opcode
].nsrc
>= 2) {
332 if (inst
->bits3
.da1
.src1_address_mode
!= BRW_ADDRESS_DIRECT
)
333 if (inst
->bits1
.ia1
.src1_reg_file
== BRW_GENERAL_REGISTER_FILE
)
335 if (inst
->bits1
.da1
.src1_reg_file
!= BRW_GENERAL_REGISTER_FILE
)
338 const int reg_start
= reg_index
*REG_SIZE
;
339 const int reg_end
= reg_start
+ size
;
341 /* See if at least one of this element intersects the interval */
342 const int type_size
= inst_type_size
[inst
->bits1
.da1
.src1_reg_type
];
343 const int elem_num
= 1 << inst
->header
.execution_size
;
344 const int width
= 1 << inst
->bits3
.da1
.src1_width
;
345 const int row_num
= elem_num
>> inst
->bits3
.da1
.src1_width
;
346 const int hs
= type_size
*inst_stride
[inst
->bits3
.da1
.src1_horiz_stride
];
347 const int vs
= type_size
*inst_stride
[inst
->bits3
.da1
.src1_vert_stride
];
348 int row_start
= inst
->bits3
.da1
.src1_reg_nr
*REG_SIZE
349 + inst
->bits3
.da1
.src1_subreg_nr
;
350 for (j
= 0; j
< row_num
; ++j
) {
351 int write_start
= row_start
;
352 for (i
= 0; i
< width
; ++i
) {
353 const int write_end
= write_start
+ type_size
;
354 const int left
= write_start
> reg_start
? write_start
: reg_start
;
355 const int right
= write_end
< reg_end
? write_end
: reg_end
;
367 static INLINE GLboolean
368 brw_is_control_done(const struct brw_instruction
*mov
) {
370 mov
->header
.dependency_control
!= 0 ||
371 mov
->header
.thread_control
!= 0 ||
372 mov
->header
.mask_control
!= 0 ||
373 mov
->header
.saturate
!= 0 ||
374 mov
->header
.debug_control
!= 0;
377 static INLINE GLboolean
378 brw_is_predicated(const struct brw_instruction
*mov
) {
379 return mov
->header
.predicate_control
!= 0;
382 static INLINE GLboolean
383 brw_is_grf_to_mrf_mov(const struct brw_instruction
*mov
,
386 GLboolean
*is_compr4
)
388 if (brw_is_predicated(mov
) ||
389 brw_is_control_done(mov
) ||
390 mov
->header
.debug_control
!= 0)
393 if (mov
->bits1
.da1
.dest_address_mode
!= BRW_ADDRESS_DIRECT
||
394 mov
->bits1
.da1
.dest_reg_file
!= BRW_MESSAGE_REGISTER_FILE
||
395 mov
->bits1
.da1
.dest_reg_type
!= BRW_REGISTER_TYPE_F
||
396 mov
->bits1
.da1
.dest_horiz_stride
!= BRW_HORIZONTAL_STRIDE_1
||
397 mov
->bits1
.da1
.dest_subreg_nr
!= 0)
400 if (mov
->bits2
.da1
.src0_address_mode
!= BRW_ADDRESS_DIRECT
||
401 mov
->bits1
.da1
.src0_reg_file
!= BRW_GENERAL_REGISTER_FILE
||
402 mov
->bits1
.da1
.src0_reg_type
!= BRW_REGISTER_TYPE_F
||
403 mov
->bits2
.da1
.src0_width
!= BRW_WIDTH_8
||
404 mov
->bits2
.da1
.src0_horiz_stride
!= BRW_HORIZONTAL_STRIDE_1
||
405 mov
->bits2
.da1
.src0_vert_stride
!= BRW_VERTICAL_STRIDE_8
||
406 mov
->bits2
.da1
.src0_subreg_nr
!= 0 ||
407 mov
->bits2
.da1
.src0_abs
!= 0 ||
408 mov
->bits2
.da1
.src0_negate
!= 0)
411 *grf_index
= mov
->bits2
.da1
.src0_reg_nr
;
412 *mrf_index
= mov
->bits1
.da1
.dest_reg_nr
& 0x0f;
413 *is_compr4
= (mov
->bits1
.da1
.dest_reg_nr
& BRW_MRF_COMPR4
) != 0;
417 static INLINE GLboolean
418 brw_is_grf_straight_write(const struct brw_instruction
*inst
, int grf_index
)
420 /* remark: no problem to predicate a SEL instruction */
421 if ((!brw_is_predicated(inst
) || inst
->header
.opcode
== BRW_OPCODE_SEL
) &&
422 brw_is_control_done(inst
) == GL_FALSE
&&
423 inst
->header
.execution_size
== 4 &&
424 inst
->header
.access_mode
== BRW_ALIGN_1
&&
425 inst
->bits1
.da1
.dest_address_mode
== BRW_ADDRESS_DIRECT
&&
426 inst
->bits1
.da1
.dest_reg_file
== BRW_GENERAL_REGISTER_FILE
&&
427 inst
->bits1
.da1
.dest_reg_type
== BRW_REGISTER_TYPE_F
&&
428 inst
->bits1
.da1
.dest_horiz_stride
== BRW_HORIZONTAL_STRIDE_1
&&
429 inst
->bits1
.da1
.dest_reg_nr
== grf_index
&&
430 inst
->bits1
.da1
.dest_subreg_nr
== 0 &&
431 brw_is_arithmetic_inst(inst
))
437 static INLINE GLboolean
438 brw_inst_are_equal(const struct brw_instruction
*src0
,
439 const struct brw_instruction
*src1
)
441 const GLuint
*field0
= (GLuint
*) src0
;
442 const GLuint
*field1
= (GLuint
*) src1
;
443 return field0
[0] == field1
[0] &&
444 field0
[1] == field1
[1] &&
445 field0
[2] == field1
[2] &&
446 field0
[3] == field1
[3];
450 brw_inst_copy(struct brw_instruction
*dst
,
451 const struct brw_instruction
*src
)
453 GLuint
*field_dst
= (GLuint
*) dst
;
454 const GLuint
*field_src
= (GLuint
*) src
;
455 field_dst
[0] = field_src
[0];
456 field_dst
[1] = field_src
[1];
457 field_dst
[2] = field_src
[2];
458 field_dst
[3] = field_src
[3];
461 static void brw_remove_inst(struct brw_compile
*p
, const GLboolean
*removeInst
)
463 int i
, nr_insn
= 0, to
= 0, from
= 0;
465 for (from
= 0; from
< p
->nr_insn
; ++from
) {
466 if (removeInst
[from
])
469 brw_inst_copy(p
->store
+ to
, p
->store
+ from
);
473 for (i
= 0; i
< p
->nr_insn
; ++i
)
474 if (removeInst
[i
] == GL_FALSE
)
476 p
->nr_insn
= nr_insn
;
479 /* The gen code emitter generates a lot of duplications in the
480 * grf-to-mrf moves, for example when texture sampling with the same
481 * coordinates from multiple textures.. Here, we monitor same mov
482 * grf-to-mrf instrutions and remove repeated ones where the operands
483 * and dst ahven't changed in between.
485 void brw_remove_duplicate_mrf_moves(struct brw_compile
*p
)
487 const int gen
= p
->brw
->intel
.gen
;
490 GLboolean
*removeInst
= calloc(sizeof(GLboolean
), p
->nr_insn
);
491 for (i
= 0; i
< p
->nr_insn
; i
++) {
495 const struct brw_instruction
*mov
= p
->store
+ i
;
496 int mrf_index
, grf_index
;
499 /* Only consider _straight_ grf-to-mrf moves */
500 if (!brw_is_grf_to_mrf_mov(mov
, &mrf_index
, &grf_index
, &is_compr4
))
503 const int mrf_index0
= mrf_index
;
504 const int mrf_index1
= is_compr4
? mrf_index0
+4 : mrf_index0
+1;
505 const int simd16_size
= 2 * REG_SIZE
;
507 for (j
= i
+ 1; j
< p
->nr_insn
; j
++) {
508 const struct brw_instruction
*inst
= p
->store
+ j
;
510 if (brw_inst_are_equal(mov
, inst
)) {
511 removeInst
[j
] = GL_TRUE
;
515 if (brw_is_grf_written(inst
, grf_index
, simd16_size
, gen
) ||
516 brw_is_mrf_written(inst
, mrf_index0
, REG_SIZE
) ||
517 brw_is_mrf_written(inst
, mrf_index1
, REG_SIZE
))
522 brw_remove_inst(p
, removeInst
);
526 /* Replace moves to MRFs where the value moved is the result of a
527 * normal arithmetic operation with computation right into the MRF.
529 void brw_remove_grf_to_mrf_moves(struct brw_compile
*p
)
532 struct brw_context
*brw
= p
->brw
;
533 const int gen
= brw
->intel
.gen
;
534 const int simd16_size
= 2*REG_SIZE
;
536 GLboolean
*removeInst
= calloc(sizeof(GLboolean
), p
->nr_insn
);
539 for (i
= 0; i
< p
->nr_insn
; i
++) {
543 struct brw_instruction
*grf_inst
= NULL
;
544 const struct brw_instruction
*mov
= p
->store
+ i
;
545 int mrf_index
, grf_index
;
548 /* Only consider _straight_ grf-to-mrf moves */
549 if (!brw_is_grf_to_mrf_mov(mov
, &mrf_index
, &grf_index
, &is_compr4
))
552 /* Using comp4 enables a stride of 4 for this instruction */
553 const int mrf_index0
= mrf_index
;
554 const int mrf_index1
= is_compr4
? mrf_index
+4 : mrf_index
+1;
556 /* Look where the register has been set */
558 GLboolean potential_remove
= GL_FALSE
;
561 /* If _one_ instruction writes the grf, we try to remove the mov */
562 struct brw_instruction
*inst
= p
->store
+ prev
;
563 if (brw_is_grf_straight_write(inst
, grf_index
)) {
564 potential_remove
= GL_TRUE
;
571 if (potential_remove
== GL_FALSE
)
573 removeInst
[i
] = GL_TRUE
;
575 /* Monitor first the section of code between the grf computation and the
576 * mov. Here we cannot read or write both mrf and grf register
578 for (j
= prev
+ 1; j
< i
; ++j
) {
579 struct brw_instruction
*inst
= p
->store
+ j
;
582 if (brw_is_grf_written(inst
, grf_index
, simd16_size
, gen
) ||
583 brw_is_grf_read(inst
, grf_index
, simd16_size
) ||
584 brw_is_mrf_written(inst
, mrf_index0
, REG_SIZE
) ||
585 brw_is_mrf_written(inst
, mrf_index1
, REG_SIZE
) ||
586 brw_is_mrf_read(inst
, mrf_index0
, REG_SIZE
, gen
) ||
587 brw_is_mrf_read(inst
, mrf_index1
, REG_SIZE
, gen
)) {
588 removeInst
[i
] = GL_FALSE
;
593 /* After the mov, we can read or write the mrf. If the grf is overwritten,
596 for (j
= i
+ 1; j
< p
->nr_insn
; ++j
) {
597 struct brw_instruction
*inst
= p
->store
+ j
;
601 if (brw_is_grf_read(inst
, grf_index
, simd16_size
)) {
602 removeInst
[i
] = GL_FALSE
;
606 if (brw_is_grf_straight_write(inst
, grf_index
))
610 /* Note that with the top down traversal, we can safely pacth the mov
614 grf_inst
->bits1
.da1
.dest_reg_file
= mov
->bits1
.da1
.dest_reg_file
;
615 grf_inst
->bits1
.da1
.dest_reg_nr
= mov
->bits1
.da1
.dest_reg_nr
;
619 brw_remove_inst(p
, removeInst
);
624 is_single_channel_dp4(struct brw_instruction
*insn
)
626 if (insn
->header
.opcode
!= BRW_OPCODE_DP4
||
627 insn
->header
.execution_size
!= BRW_EXECUTE_8
||
628 insn
->header
.access_mode
!= BRW_ALIGN_16
||
629 insn
->bits1
.da1
.dest_reg_file
!= BRW_GENERAL_REGISTER_FILE
)
632 if (!is_power_of_two(insn
->bits1
.da16
.dest_writemask
))
639 * Sets the dependency control fields on DP4 instructions.
641 * The hardware only tracks dependencies on a register basis, so when
644 * DP4 dst.x src1 src2
645 * DP4 dst.y src1 src3
646 * DP4 dst.z src1 src4
647 * DP4 dst.w src1 src5
649 * It will wait to do the DP4 dst.y until the dst.x is resolved, etc.
650 * We can examine our instruction stream and set the dependency
651 * control fields to tell the hardware when to do it.
653 * We may want to extend this to other instructions that are used to
654 * fill in a channel at a time of the destination register.
657 brw_set_dp4_dependency_control(struct brw_compile
*p
)
661 for (i
= 1; i
< p
->nr_insn
; i
++) {
662 struct brw_instruction
*insn
= &p
->store
[i
];
663 struct brw_instruction
*prev
= &p
->store
[i
- 1];
665 if (!is_single_channel_dp4(prev
))
668 if (!is_single_channel_dp4(insn
)) {
673 /* Only avoid hw dep control if the write masks are different
674 * channels of one reg.
676 if (insn
->bits1
.da16
.dest_writemask
== prev
->bits1
.da16
.dest_writemask
)
678 if (insn
->bits1
.da16
.dest_reg_nr
!= prev
->bits1
.da16
.dest_reg_nr
)
681 /* Check if the second instruction depends on the previous one
684 if (insn
->bits1
.da1
.src0_reg_file
== BRW_GENERAL_REGISTER_FILE
&&
685 (insn
->bits2
.da1
.src0_address_mode
!= BRW_ADDRESS_DIRECT
||
686 insn
->bits2
.da1
.src0_reg_nr
== insn
->bits1
.da16
.dest_reg_nr
))
688 if (insn
->bits1
.da1
.src1_reg_file
== BRW_GENERAL_REGISTER_FILE
&&
689 (insn
->bits3
.da1
.src1_address_mode
!= BRW_ADDRESS_DIRECT
||
690 insn
->bits3
.da1
.src1_reg_nr
== insn
->bits1
.da16
.dest_reg_nr
))
693 prev
->header
.dependency_control
|= BRW_DEPENDENCY_NOTCLEARED
;
694 insn
->header
.dependency_control
|= BRW_DEPENDENCY_NOTCHECKED
;
699 brw_optimize(struct brw_compile
*p
)
701 brw_set_dp4_dependency_control(p
);