2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
28 #include "main/macros.h"
29 #include "program/program.h"
30 #include "program/prog_print.h"
31 #include "brw_context.h"
32 #include "brw_defines.h"
35 const struct brw_instruction_info brw_opcodes
[128] = {
36 [BRW_OPCODE_MOV
] = { .name
= "mov", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
37 [BRW_OPCODE_FRC
] = { .name
= "frc", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
38 [BRW_OPCODE_RNDU
] = { .name
= "rndu", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
39 [BRW_OPCODE_RNDD
] = { .name
= "rndd", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
40 [BRW_OPCODE_RNDE
] = { .name
= "rnde", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
41 [BRW_OPCODE_RNDZ
] = { .name
= "rndz", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
42 [BRW_OPCODE_NOT
] = { .name
= "not", .nsrc
= 1, .ndst
= 1, .is_arith
= 1 },
43 [BRW_OPCODE_LZD
] = { .name
= "lzd", .nsrc
= 1, .ndst
= 1 },
45 [BRW_OPCODE_MUL
] = { .name
= "mul", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
46 [BRW_OPCODE_MAC
] = { .name
= "mac", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
47 [BRW_OPCODE_MACH
] = { .name
= "mach", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
48 [BRW_OPCODE_LINE
] = { .name
= "line", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
49 [BRW_OPCODE_PLN
] = { .name
= "pln", .nsrc
= 2, .ndst
= 1 },
50 [BRW_OPCODE_SAD2
] = { .name
= "sad2", .nsrc
= 2, .ndst
= 1 },
51 [BRW_OPCODE_SADA2
] = { .name
= "sada2", .nsrc
= 2, .ndst
= 1 },
52 [BRW_OPCODE_DP4
] = { .name
= "dp4", .nsrc
= 2, .ndst
= 1 },
53 [BRW_OPCODE_DPH
] = { .name
= "dph", .nsrc
= 2, .ndst
= 1 },
54 [BRW_OPCODE_DP3
] = { .name
= "dp3", .nsrc
= 2, .ndst
= 1 },
55 [BRW_OPCODE_DP2
] = { .name
= "dp2", .nsrc
= 2, .ndst
= 1 },
56 [BRW_OPCODE_MATH
] = { .name
= "math", .nsrc
= 2, .ndst
= 1 },
58 [BRW_OPCODE_AVG
] = { .name
= "avg", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
59 [BRW_OPCODE_ADD
] = { .name
= "add", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
60 [BRW_OPCODE_SEL
] = { .name
= "sel", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
61 [BRW_OPCODE_AND
] = { .name
= "and", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
62 [BRW_OPCODE_OR
] = { .name
= "or", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
63 [BRW_OPCODE_XOR
] = { .name
= "xor", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
64 [BRW_OPCODE_SHR
] = { .name
= "shr", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
65 [BRW_OPCODE_SHL
] = { .name
= "shl", .nsrc
= 2, .ndst
= 1, .is_arith
= 1 },
66 [BRW_OPCODE_ASR
] = { .name
= "asr", .nsrc
= 2, .ndst
= 1 },
67 [BRW_OPCODE_CMP
] = { .name
= "cmp", .nsrc
= 2, .ndst
= 1 },
68 [BRW_OPCODE_CMPN
] = { .name
= "cmpn", .nsrc
= 2, .ndst
= 1 },
70 [BRW_OPCODE_SEND
] = { .name
= "send", .nsrc
= 1, .ndst
= 1 },
71 [BRW_OPCODE_NOP
] = { .name
= "nop", .nsrc
= 0, .ndst
= 0 },
72 [BRW_OPCODE_JMPI
] = { .name
= "jmpi", .nsrc
= 1, .ndst
= 0 },
73 [BRW_OPCODE_IF
] = { .name
= "if", .nsrc
= 2, .ndst
= 0 },
74 [BRW_OPCODE_IFF
] = { .name
= "iff", .nsrc
= 2, .ndst
= 1 },
75 [BRW_OPCODE_WHILE
] = { .name
= "while", .nsrc
= 2, .ndst
= 0 },
76 [BRW_OPCODE_ELSE
] = { .name
= "else", .nsrc
= 2, .ndst
= 0 },
77 [BRW_OPCODE_BREAK
] = { .name
= "break", .nsrc
= 2, .ndst
= 0 },
78 [BRW_OPCODE_CONTINUE
] = { .name
= "cont", .nsrc
= 1, .ndst
= 0 },
79 [BRW_OPCODE_HALT
] = { .name
= "halt", .nsrc
= 1, .ndst
= 0 },
80 [BRW_OPCODE_MSAVE
] = { .name
= "msave", .nsrc
= 1, .ndst
= 1 },
81 [BRW_OPCODE_PUSH
] = { .name
= "push", .nsrc
= 1, .ndst
= 1 },
82 [BRW_OPCODE_MRESTORE
] = { .name
= "mrest", .nsrc
= 1, .ndst
= 1 },
83 [BRW_OPCODE_POP
] = { .name
= "pop", .nsrc
= 2, .ndst
= 0 },
84 [BRW_OPCODE_WAIT
] = { .name
= "wait", .nsrc
= 1, .ndst
= 0 },
85 [BRW_OPCODE_DO
] = { .name
= "do", .nsrc
= 0, .ndst
= 0 },
86 [BRW_OPCODE_ENDIF
] = { .name
= "endif", .nsrc
= 2, .ndst
= 0 },
90 bool brw_is_arithmetic_inst(const struct brw_instruction
*inst
)
92 return brw_opcodes
[inst
->header
.opcode
].is_arith
;
95 static const GLuint inst_stride
[7] = {
105 static const GLuint inst_type_size
[8] = {
106 [BRW_REGISTER_TYPE_UD
] = 4,
107 [BRW_REGISTER_TYPE_D
] = 4,
108 [BRW_REGISTER_TYPE_UW
] = 2,
109 [BRW_REGISTER_TYPE_W
] = 2,
110 [BRW_REGISTER_TYPE_UB
] = 1,
111 [BRW_REGISTER_TYPE_B
] = 1,
112 [BRW_REGISTER_TYPE_F
] = 4
116 brw_is_grf_written(const struct brw_instruction
*inst
,
117 int reg_index
, int size
,
120 if (brw_opcodes
[inst
->header
.opcode
].ndst
== 0)
123 if (inst
->bits1
.da1
.dest_address_mode
!= BRW_ADDRESS_DIRECT
)
124 if (inst
->bits1
.ia1
.dest_reg_file
== BRW_GENERAL_REGISTER_FILE
)
127 if (inst
->bits1
.da1
.dest_reg_file
!= BRW_GENERAL_REGISTER_FILE
)
130 const int reg_start
= reg_index
* REG_SIZE
;
131 const int reg_end
= reg_start
+ size
;
133 const int type_size
= inst_type_size
[inst
->bits1
.da1
.dest_reg_type
];
134 const int write_start
= inst
->bits1
.da1
.dest_reg_nr
*REG_SIZE
135 + inst
->bits1
.da1
.dest_subreg_nr
;
136 int length
, write_end
;
138 /* SEND is specific */
139 if (inst
->header
.opcode
== BRW_OPCODE_SEND
) {
141 length
= inst
->bits3
.generic_gen5
.response_length
*REG_SIZE
;
143 length
= inst
->bits3
.generic
.response_length
*REG_SIZE
;
146 length
= 1 << inst
->header
.execution_size
;
148 length
*= inst
->bits1
.da1
.dest_horiz_stride
;
151 /* If the two intervals intersect, we overwrite the register */
152 write_end
= write_start
+ length
;
153 const int left
= MAX2(write_start
, reg_start
);
154 const int right
= MIN2(write_end
, reg_end
);
160 brw_is_mrf_written_alu(const struct brw_instruction
*inst
,
161 int reg_index
, int size
)
163 if (brw_opcodes
[inst
->header
.opcode
].ndst
== 0)
166 if (inst
->bits1
.da1
.dest_reg_file
!= BRW_MESSAGE_REGISTER_FILE
)
169 if (inst
->bits1
.da1
.dest_address_mode
!= BRW_ADDRESS_DIRECT
)
172 const int reg_start
= reg_index
* REG_SIZE
;
173 const int reg_end
= reg_start
+ size
;
175 const int mrf_index
= inst
->bits1
.da1
.dest_reg_nr
& 0x0f;
176 const int is_compr4
= inst
->bits1
.da1
.dest_reg_nr
& BRW_MRF_COMPR4
;
177 const int type_size
= inst_type_size
[inst
->bits1
.da1
.dest_reg_type
];
179 /* We use compr4 with a size != 16 elements. Strange, we conservatively
180 * consider that we are writing the register.
182 if (is_compr4
&& inst
->header
.execution_size
!= BRW_EXECUTE_16
)
185 /* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */
187 const int length
= 8 * type_size
* inst
->bits1
.da1
.dest_horiz_stride
;
189 /* First 8-way register */
190 const int write_start0
= mrf_index
*REG_SIZE
191 + inst
->bits1
.da1
.dest_subreg_nr
;
192 const int write_end0
= write_start0
+ length
;
194 /* Second 8-way register */
195 const int write_start1
= (mrf_index
+4)*REG_SIZE
196 + inst
->bits1
.da1
.dest_subreg_nr
;
197 const int write_end1
= write_start1
+ length
;
199 /* If the two intervals intersect, we overwrite the register */
200 const int left0
= MAX2(write_start0
, reg_start
);
201 const int right0
= MIN2(write_end0
, reg_end
);
202 const int left1
= MAX2(write_start1
, reg_start
);
203 const int right1
= MIN2(write_end1
, reg_end
);
205 if (left0
< right0
|| left1
< right1
)
210 length
= 1 << inst
->header
.execution_size
;
212 length
*= inst
->bits1
.da1
.dest_horiz_stride
;
214 /* If the two intervals intersect, we write into the register */
215 const int write_start
= inst
->bits1
.da1
.dest_reg_nr
*REG_SIZE
216 + inst
->bits1
.da1
.dest_subreg_nr
;
217 const int write_end
= write_start
+ length
;
218 const int left
= MAX2(write_start
, reg_start
);
219 const int right
= MIN2(write_end
, reg_end
);
228 /* SEND may perform an implicit mov to a mrf register */
230 brw_is_mrf_written_send(const struct brw_instruction
*inst
,
231 int reg_index
, int size
)
234 const int reg_start
= reg_index
* REG_SIZE
;
235 const int reg_end
= reg_start
+ size
;
236 const int mrf_start
= inst
->header
.destreg__conditionalmod
;
237 const int write_start
= mrf_start
* REG_SIZE
;
238 const int write_end
= write_start
+ REG_SIZE
;
239 const int left
= MAX2(write_start
, reg_start
);
240 const int right
= MIN2(write_end
, reg_end
);
242 if (inst
->header
.opcode
!= BRW_OPCODE_SEND
||
243 inst
->bits1
.da1
.src0_reg_file
== 0)
249 /* Specific path for message register since we need to handle the compr4 case */
251 brw_is_mrf_written(const struct brw_instruction
*inst
, int reg_index
, int size
)
253 return (brw_is_mrf_written_alu(inst
, reg_index
, size
) ||
254 brw_is_mrf_written_send(inst
, reg_index
, size
));
258 brw_is_mrf_read(const struct brw_instruction
*inst
,
259 int reg_index
, int size
, int gen
)
261 if (inst
->header
.opcode
!= BRW_OPCODE_SEND
)
263 if (inst
->bits2
.da1
.src0_address_mode
!= BRW_ADDRESS_DIRECT
)
266 const int reg_start
= reg_index
*REG_SIZE
;
267 const int reg_end
= reg_start
+ size
;
269 int length
, read_start
, read_end
;
271 length
= inst
->bits3
.generic_gen5
.msg_length
*REG_SIZE
;
273 length
= inst
->bits3
.generic
.msg_length
*REG_SIZE
;
275 /* Look if SEND uses an implicit mov. In that case, we read one less register
278 if (inst
->bits1
.da1
.src0_reg_file
!= 0)
279 read_start
= inst
->header
.destreg__conditionalmod
;
282 read_start
= inst
->header
.destreg__conditionalmod
+ 1;
284 read_start
*= REG_SIZE
;
285 read_end
= read_start
+ length
;
287 const int left
= MAX2(read_start
, reg_start
);
288 const int right
= MIN2(read_end
, reg_end
);
294 brw_is_grf_read(const struct brw_instruction
*inst
, int reg_index
, int size
)
297 if (brw_opcodes
[inst
->header
.opcode
].nsrc
== 0)
300 /* Look at first source. We must take into account register regions to
301 * monitor carefully the read. Note that we are a bit too conservative here
302 * since we do not take into account the fact that some complete registers
305 if (brw_opcodes
[inst
->header
.opcode
].nsrc
>= 1) {
307 if (inst
->bits2
.da1
.src0_address_mode
!= BRW_ADDRESS_DIRECT
)
308 if (inst
->bits1
.ia1
.src0_reg_file
== BRW_GENERAL_REGISTER_FILE
)
310 if (inst
->bits1
.da1
.src0_reg_file
!= BRW_GENERAL_REGISTER_FILE
)
313 const int reg_start
= reg_index
*REG_SIZE
;
314 const int reg_end
= reg_start
+ size
;
316 /* See if at least one of this element intersects the interval */
317 const int type_size
= inst_type_size
[inst
->bits1
.da1
.src0_reg_type
];
318 const int elem_num
= 1 << inst
->header
.execution_size
;
319 const int width
= 1 << inst
->bits2
.da1
.src0_width
;
320 const int row_num
= elem_num
>> inst
->bits2
.da1
.src0_width
;
321 const int hs
= type_size
*inst_stride
[inst
->bits2
.da1
.src0_horiz_stride
];
322 const int vs
= type_size
*inst_stride
[inst
->bits2
.da1
.src0_vert_stride
];
323 int row_start
= inst
->bits2
.da1
.src0_reg_nr
*REG_SIZE
324 + inst
->bits2
.da1
.src0_subreg_nr
;
325 for (j
= 0; j
< row_num
; ++j
) {
326 int write_start
= row_start
;
327 for (i
= 0; i
< width
; ++i
) {
328 const int write_end
= write_start
+ type_size
;
329 const int left
= write_start
> reg_start
? write_start
: reg_start
;
330 const int right
= write_end
< reg_end
? write_end
: reg_end
;
339 /* Second src register */
340 if (brw_opcodes
[inst
->header
.opcode
].nsrc
>= 2) {
342 if (inst
->bits3
.da1
.src1_address_mode
!= BRW_ADDRESS_DIRECT
)
343 if (inst
->bits1
.ia1
.src1_reg_file
== BRW_GENERAL_REGISTER_FILE
)
345 if (inst
->bits1
.da1
.src1_reg_file
!= BRW_GENERAL_REGISTER_FILE
)
348 const int reg_start
= reg_index
*REG_SIZE
;
349 const int reg_end
= reg_start
+ size
;
351 /* See if at least one of this element intersects the interval */
352 const int type_size
= inst_type_size
[inst
->bits1
.da1
.src1_reg_type
];
353 const int elem_num
= 1 << inst
->header
.execution_size
;
354 const int width
= 1 << inst
->bits3
.da1
.src1_width
;
355 const int row_num
= elem_num
>> inst
->bits3
.da1
.src1_width
;
356 const int hs
= type_size
*inst_stride
[inst
->bits3
.da1
.src1_horiz_stride
];
357 const int vs
= type_size
*inst_stride
[inst
->bits3
.da1
.src1_vert_stride
];
358 int row_start
= inst
->bits3
.da1
.src1_reg_nr
*REG_SIZE
359 + inst
->bits3
.da1
.src1_subreg_nr
;
360 for (j
= 0; j
< row_num
; ++j
) {
361 int write_start
= row_start
;
362 for (i
= 0; i
< width
; ++i
) {
363 const int write_end
= write_start
+ type_size
;
364 const int left
= write_start
> reg_start
? write_start
: reg_start
;
365 const int right
= write_end
< reg_end
? write_end
: reg_end
;
378 brw_is_control_done(const struct brw_instruction
*mov
) {
380 mov
->header
.dependency_control
!= 0 ||
381 mov
->header
.thread_control
!= 0 ||
382 mov
->header
.mask_control
!= 0 ||
383 mov
->header
.saturate
!= 0 ||
384 mov
->header
.debug_control
!= 0;
388 brw_is_predicated(const struct brw_instruction
*mov
) {
389 return mov
->header
.predicate_control
!= 0;
393 brw_is_grf_to_mrf_mov(const struct brw_instruction
*mov
,
398 if (brw_is_predicated(mov
) ||
399 brw_is_control_done(mov
) ||
400 mov
->header
.debug_control
!= 0)
403 if (mov
->bits1
.da1
.dest_address_mode
!= BRW_ADDRESS_DIRECT
||
404 mov
->bits1
.da1
.dest_reg_file
!= BRW_MESSAGE_REGISTER_FILE
||
405 mov
->bits1
.da1
.dest_reg_type
!= BRW_REGISTER_TYPE_F
||
406 mov
->bits1
.da1
.dest_horiz_stride
!= BRW_HORIZONTAL_STRIDE_1
||
407 mov
->bits1
.da1
.dest_subreg_nr
!= 0)
410 if (mov
->bits2
.da1
.src0_address_mode
!= BRW_ADDRESS_DIRECT
||
411 mov
->bits1
.da1
.src0_reg_file
!= BRW_GENERAL_REGISTER_FILE
||
412 mov
->bits1
.da1
.src0_reg_type
!= BRW_REGISTER_TYPE_F
||
413 mov
->bits2
.da1
.src0_width
!= BRW_WIDTH_8
||
414 mov
->bits2
.da1
.src0_horiz_stride
!= BRW_HORIZONTAL_STRIDE_1
||
415 mov
->bits2
.da1
.src0_vert_stride
!= BRW_VERTICAL_STRIDE_8
||
416 mov
->bits2
.da1
.src0_subreg_nr
!= 0 ||
417 mov
->bits2
.da1
.src0_abs
!= 0 ||
418 mov
->bits2
.da1
.src0_negate
!= 0)
421 *grf_index
= mov
->bits2
.da1
.src0_reg_nr
;
422 *mrf_index
= mov
->bits1
.da1
.dest_reg_nr
& 0x0f;
423 *is_compr4
= (mov
->bits1
.da1
.dest_reg_nr
& BRW_MRF_COMPR4
) != 0;
428 brw_is_grf_straight_write(const struct brw_instruction
*inst
, int grf_index
)
430 /* remark: no problem to predicate a SEL instruction */
431 if ((!brw_is_predicated(inst
) || inst
->header
.opcode
== BRW_OPCODE_SEL
) &&
432 brw_is_control_done(inst
) == false &&
433 inst
->header
.execution_size
== 4 &&
434 inst
->header
.access_mode
== BRW_ALIGN_1
&&
435 inst
->bits1
.da1
.dest_address_mode
== BRW_ADDRESS_DIRECT
&&
436 inst
->bits1
.da1
.dest_reg_file
== BRW_GENERAL_REGISTER_FILE
&&
437 inst
->bits1
.da1
.dest_reg_type
== BRW_REGISTER_TYPE_F
&&
438 inst
->bits1
.da1
.dest_horiz_stride
== BRW_HORIZONTAL_STRIDE_1
&&
439 inst
->bits1
.da1
.dest_reg_nr
== grf_index
&&
440 inst
->bits1
.da1
.dest_subreg_nr
== 0 &&
441 brw_is_arithmetic_inst(inst
))
448 brw_inst_are_equal(const struct brw_instruction
*src0
,
449 const struct brw_instruction
*src1
)
451 const GLuint
*field0
= (GLuint
*) src0
;
452 const GLuint
*field1
= (GLuint
*) src1
;
453 return field0
[0] == field1
[0] &&
454 field0
[1] == field1
[1] &&
455 field0
[2] == field1
[2] &&
456 field0
[3] == field1
[3];
460 brw_inst_copy(struct brw_instruction
*dst
,
461 const struct brw_instruction
*src
)
463 GLuint
*field_dst
= (GLuint
*) dst
;
464 const GLuint
*field_src
= (GLuint
*) src
;
465 field_dst
[0] = field_src
[0];
466 field_dst
[1] = field_src
[1];
467 field_dst
[2] = field_src
[2];
468 field_dst
[3] = field_src
[3];
471 static void brw_remove_inst(struct brw_compile
*p
, const bool *removeInst
)
473 int i
, nr_insn
= 0, to
= 0, from
= 0;
475 for (from
= 0; from
< p
->nr_insn
; ++from
) {
476 if (removeInst
[from
])
479 brw_inst_copy(p
->store
+ to
, p
->store
+ from
);
483 for (i
= 0; i
< p
->nr_insn
; ++i
)
484 if (removeInst
[i
] == false)
486 p
->nr_insn
= nr_insn
;
489 /* The gen code emitter generates a lot of duplications in the
490 * grf-to-mrf moves, for example when texture sampling with the same
491 * coordinates from multiple textures.. Here, we monitor same mov
492 * grf-to-mrf instrutions and remove repeated ones where the operands
493 * and dst ahven't changed in between.
495 void brw_remove_duplicate_mrf_moves(struct brw_compile
*p
)
497 const int gen
= p
->brw
->intel
.gen
;
500 bool *removeInst
= calloc(sizeof(bool), p
->nr_insn
);
501 for (i
= 0; i
< p
->nr_insn
; i
++) {
505 const struct brw_instruction
*mov
= p
->store
+ i
;
506 int mrf_index
, grf_index
;
509 /* Only consider _straight_ grf-to-mrf moves */
510 if (!brw_is_grf_to_mrf_mov(mov
, &mrf_index
, &grf_index
, &is_compr4
))
513 const int mrf_index0
= mrf_index
;
514 const int mrf_index1
= is_compr4
? mrf_index0
+4 : mrf_index0
+1;
515 const int simd16_size
= 2 * REG_SIZE
;
517 for (j
= i
+ 1; j
< p
->nr_insn
; j
++) {
518 const struct brw_instruction
*inst
= p
->store
+ j
;
520 if (brw_inst_are_equal(mov
, inst
)) {
521 removeInst
[j
] = true;
525 if (brw_is_grf_written(inst
, grf_index
, simd16_size
, gen
) ||
526 brw_is_mrf_written(inst
, mrf_index0
, REG_SIZE
) ||
527 brw_is_mrf_written(inst
, mrf_index1
, REG_SIZE
))
532 brw_remove_inst(p
, removeInst
);
536 /* Replace moves to MRFs where the value moved is the result of a
537 * normal arithmetic operation with computation right into the MRF.
539 void brw_remove_grf_to_mrf_moves(struct brw_compile
*p
)
542 struct brw_context
*brw
= p
->brw
;
543 const int gen
= brw
->intel
.gen
;
544 const int simd16_size
= 2*REG_SIZE
;
546 bool *removeInst
= calloc(sizeof(bool), p
->nr_insn
);
549 for (i
= 0; i
< p
->nr_insn
; i
++) {
553 struct brw_instruction
*grf_inst
= NULL
;
554 const struct brw_instruction
*mov
= p
->store
+ i
;
555 int mrf_index
, grf_index
;
558 /* Only consider _straight_ grf-to-mrf moves */
559 if (!brw_is_grf_to_mrf_mov(mov
, &mrf_index
, &grf_index
, &is_compr4
))
562 /* Using comp4 enables a stride of 4 for this instruction */
563 const int mrf_index0
= mrf_index
;
564 const int mrf_index1
= is_compr4
? mrf_index
+4 : mrf_index
+1;
566 /* Look where the register has been set */
568 bool potential_remove
= false;
571 /* If _one_ instruction writes the grf, we try to remove the mov */
572 struct brw_instruction
*inst
= p
->store
+ prev
;
573 if (brw_is_grf_straight_write(inst
, grf_index
)) {
574 potential_remove
= true;
581 if (potential_remove
== false)
583 removeInst
[i
] = true;
585 /* Monitor first the section of code between the grf computation and the
586 * mov. Here we cannot read or write both mrf and grf register
588 for (j
= prev
+ 1; j
< i
; ++j
) {
589 struct brw_instruction
*inst
= p
->store
+ j
;
592 if (brw_is_grf_written(inst
, grf_index
, simd16_size
, gen
) ||
593 brw_is_grf_read(inst
, grf_index
, simd16_size
) ||
594 brw_is_mrf_written(inst
, mrf_index0
, REG_SIZE
) ||
595 brw_is_mrf_written(inst
, mrf_index1
, REG_SIZE
) ||
596 brw_is_mrf_read(inst
, mrf_index0
, REG_SIZE
, gen
) ||
597 brw_is_mrf_read(inst
, mrf_index1
, REG_SIZE
, gen
)) {
598 removeInst
[i
] = false;
603 /* After the mov, we can read or write the mrf. If the grf is overwritten,
606 for (j
= i
+ 1; j
< p
->nr_insn
; ++j
) {
607 struct brw_instruction
*inst
= p
->store
+ j
;
611 if (brw_is_grf_read(inst
, grf_index
, simd16_size
)) {
612 removeInst
[i
] = false;
616 if (brw_is_grf_straight_write(inst
, grf_index
))
620 /* Note that with the top down traversal, we can safely pacth the mov
624 grf_inst
->bits1
.da1
.dest_reg_file
= mov
->bits1
.da1
.dest_reg_file
;
625 grf_inst
->bits1
.da1
.dest_reg_nr
= mov
->bits1
.da1
.dest_reg_nr
;
629 brw_remove_inst(p
, removeInst
);
634 is_single_channel_dp4(struct brw_instruction
*insn
)
636 if (insn
->header
.opcode
!= BRW_OPCODE_DP4
||
637 insn
->header
.execution_size
!= BRW_EXECUTE_8
||
638 insn
->header
.access_mode
!= BRW_ALIGN_16
||
639 insn
->bits1
.da1
.dest_reg_file
!= BRW_GENERAL_REGISTER_FILE
)
642 if (!is_power_of_two(insn
->bits1
.da16
.dest_writemask
))
649 * Sets the dependency control fields on DP4 instructions.
651 * The hardware only tracks dependencies on a register basis, so when
654 * DP4 dst.x src1 src2
655 * DP4 dst.y src1 src3
656 * DP4 dst.z src1 src4
657 * DP4 dst.w src1 src5
659 * It will wait to do the DP4 dst.y until the dst.x is resolved, etc.
660 * We can examine our instruction stream and set the dependency
661 * control fields to tell the hardware when to do it.
663 * We may want to extend this to other instructions that are used to
664 * fill in a channel at a time of the destination register.
667 brw_set_dp4_dependency_control(struct brw_compile
*p
)
671 for (i
= 1; i
< p
->nr_insn
; i
++) {
672 struct brw_instruction
*insn
= &p
->store
[i
];
673 struct brw_instruction
*prev
= &p
->store
[i
- 1];
675 if (!is_single_channel_dp4(prev
))
678 if (!is_single_channel_dp4(insn
)) {
683 /* Only avoid hw dep control if the write masks are different
684 * channels of one reg.
686 if (insn
->bits1
.da16
.dest_writemask
== prev
->bits1
.da16
.dest_writemask
)
688 if (insn
->bits1
.da16
.dest_reg_nr
!= prev
->bits1
.da16
.dest_reg_nr
)
691 /* Check if the second instruction depends on the previous one
694 if (insn
->bits1
.da1
.src0_reg_file
== BRW_GENERAL_REGISTER_FILE
&&
695 (insn
->bits2
.da1
.src0_address_mode
!= BRW_ADDRESS_DIRECT
||
696 insn
->bits2
.da1
.src0_reg_nr
== insn
->bits1
.da16
.dest_reg_nr
))
698 if (insn
->bits1
.da1
.src1_reg_file
== BRW_GENERAL_REGISTER_FILE
&&
699 (insn
->bits3
.da1
.src1_address_mode
!= BRW_ADDRESS_DIRECT
||
700 insn
->bits3
.da1
.src1_reg_nr
== insn
->bits1
.da16
.dest_reg_nr
))
703 prev
->header
.dependency_control
|= BRW_DEPENDENCY_NOTCLEARED
;
704 insn
->header
.dependency_control
|= BRW_DEPENDENCY_NOTCHECKED
;
709 brw_optimize(struct brw_compile
*p
)
711 brw_set_dp4_dependency_control(p
);