2 * Copyright © 2019 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #ifndef GEN_MI_BUILDER_H
25 #define GEN_MI_BUILDER_H
27 #include "genxml/genX_bits.h"
28 #include "util/bitscan.h"
29 #include "util/fast_idiv_by_const.h"
30 #include "util/u_math.h"
32 #ifndef GEN_MI_BUILDER_NUM_ALLOC_GPRS
33 /** The number of GPRs the MI builder is allowed to allocate
35 * This may be set by a user of this API so that it can reserve some GPRs at
36 * the top end for its own use.
38 #define GEN_MI_BUILDER_NUM_ALLOC_GPRS 16
41 /** These must be defined by the user of the builder
43 * void *__gen_get_batch_dwords(__gen_user_data *user_data,
44 * unsigned num_dwords);
47 * __gen_address_offset(__gen_address_type addr, uint64_t offset);
50 * If self-modifying batches are supported, we must be able to pass batch
51 * addresses around as void*s so pinning as well as batch chaining or some
52 * other mechanism for ensuring batch pointers remain valid during building is
53 * required. The following function must also be defined, it returns an
54 * address in canonical form:
57 * __gen_get_batch_address(__gen_user_data *user_data, void *location);
59 * Also, __gen_combine_address must accept a location value of NULL and return
60 * a fully valid 64-bit address.
64 * Start of the actual MI builder
67 #define __genxml_cmd_length(cmd) cmd ## _length
68 #define __genxml_cmd_header(cmd) cmd ## _header
69 #define __genxml_cmd_pack(cmd) cmd ## _pack
71 #define gen_mi_builder_pack(b, cmd, dst, name) \
72 for (struct cmd name = { __genxml_cmd_header(cmd) }, \
73 *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \
74 __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name), \
77 #define gen_mi_builder_emit(b, cmd, name) \
78 gen_mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
81 enum gen_mi_value_type
{
82 GEN_MI_VALUE_TYPE_IMM
,
83 GEN_MI_VALUE_TYPE_MEM32
,
84 GEN_MI_VALUE_TYPE_MEM64
,
85 GEN_MI_VALUE_TYPE_REG32
,
86 GEN_MI_VALUE_TYPE_REG64
,
90 enum gen_mi_value_type type
;
94 __gen_address_type addr
;
98 #if GEN_GEN >= 7 || GEN_IS_HASWELL
104 #define GEN_MI_BUILDER_MAX_MATH_DWORDS 256
106 #define GEN_MI_BUILDER_MAX_MATH_DWORDS 64
109 struct gen_mi_builder
{
110 __gen_user_data
*user_data
;
112 #if GEN_GEN >= 8 || GEN_IS_HASWELL
114 uint8_t gpr_refs
[GEN_MI_BUILDER_NUM_ALLOC_GPRS
];
116 unsigned num_math_dwords
;
117 uint32_t math_dwords
[GEN_MI_BUILDER_MAX_MATH_DWORDS
];
122 gen_mi_builder_init(struct gen_mi_builder
*b
, __gen_user_data
*user_data
)
124 memset(b
, 0, sizeof(*b
));
125 b
->user_data
= user_data
;
127 #if GEN_GEN >= 8 || GEN_IS_HASWELL
129 b
->num_math_dwords
= 0;
134 gen_mi_builder_flush_math(struct gen_mi_builder
*b
)
136 #if GEN_GEN >= 8 || GEN_IS_HASWELL
137 if (b
->num_math_dwords
== 0)
140 uint32_t *dw
= (uint32_t *)__gen_get_batch_dwords(b
->user_data
,
141 1 + b
->num_math_dwords
);
142 gen_mi_builder_pack(b
, GENX(MI_MATH
), dw
, math
) {
143 math
.DWordLength
= 1 + b
->num_math_dwords
- GENX(MI_MATH_length_bias
);
145 memcpy(dw
+ 1, b
->math_dwords
, b
->num_math_dwords
* sizeof(uint32_t));
146 b
->num_math_dwords
= 0;
150 #define _GEN_MI_BUILDER_GPR_BASE 0x2600
151 /* The actual hardware limit on GPRs */
152 #define _GEN_MI_BUILDER_NUM_HW_GPRS 16
154 #if GEN_GEN >= 8 || GEN_IS_HASWELL
157 gen_mi_value_is_gpr(struct gen_mi_value val
)
159 return (val
.type
== GEN_MI_VALUE_TYPE_REG32
||
160 val
.type
== GEN_MI_VALUE_TYPE_REG64
) &&
161 val
.reg
>= _GEN_MI_BUILDER_GPR_BASE
&&
162 val
.reg
< _GEN_MI_BUILDER_GPR_BASE
+
163 _GEN_MI_BUILDER_NUM_HW_GPRS
* 8;
167 _gen_mi_value_is_allocated_gpr(struct gen_mi_value val
)
169 return (val
.type
== GEN_MI_VALUE_TYPE_REG32
||
170 val
.type
== GEN_MI_VALUE_TYPE_REG64
) &&
171 val
.reg
>= _GEN_MI_BUILDER_GPR_BASE
&&
172 val
.reg
< _GEN_MI_BUILDER_GPR_BASE
+
173 GEN_MI_BUILDER_NUM_ALLOC_GPRS
* 8;
176 static inline uint32_t
177 _gen_mi_value_as_gpr(struct gen_mi_value val
)
179 assert(gen_mi_value_is_gpr(val
));
180 assert(val
.reg
% 8 == 0);
181 return (val
.reg
- _GEN_MI_BUILDER_GPR_BASE
) / 8;
184 static inline struct gen_mi_value
185 gen_mi_new_gpr(struct gen_mi_builder
*b
)
187 unsigned gpr
= ffs(~b
->gprs
) - 1;
188 assert(gpr
< GEN_MI_BUILDER_NUM_ALLOC_GPRS
);
189 assert(b
->gpr_refs
[gpr
] == 0);
190 b
->gprs
|= (1u << gpr
);
191 b
->gpr_refs
[gpr
] = 1;
193 return (struct gen_mi_value
) {
194 .type
= GEN_MI_VALUE_TYPE_REG64
,
195 .reg
= _GEN_MI_BUILDER_GPR_BASE
+ gpr
* 8,
198 #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
200 /** Take a reference to a gen_mi_value
202 * The MI builder uses reference counting to automatically free ALU GPRs for
203 * re-use in calculations. All gen_mi_* math functions consume the reference
204 * they are handed for each source and return a reference to a value which the
205 * caller must consume. In particular, if you pas the same value into a
206 * single gen_mi_* math function twice (say to add a number to itself), you
207 * are responsible for calling gen_mi_value_ref() to get a second reference
208 * because the gen_mi_* math function will consume it twice.
210 static inline struct gen_mi_value
211 gen_mi_value_ref(struct gen_mi_builder
*b
, struct gen_mi_value val
)
213 #if GEN_GEN >= 8 || GEN_IS_HASWELL
214 if (_gen_mi_value_is_allocated_gpr(val
)) {
215 unsigned gpr
= _gen_mi_value_as_gpr(val
);
216 assert(gpr
< GEN_MI_BUILDER_NUM_ALLOC_GPRS
);
217 assert(b
->gprs
& (1u << gpr
));
218 assert(b
->gpr_refs
[gpr
] < UINT8_MAX
);
221 #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
226 /** Drop a reference to a gen_mi_value
228 * See also gen_mi_value_ref.
231 gen_mi_value_unref(struct gen_mi_builder
*b
, struct gen_mi_value val
)
233 #if GEN_GEN >= 8 || GEN_IS_HASWELL
234 if (_gen_mi_value_is_allocated_gpr(val
)) {
235 unsigned gpr
= _gen_mi_value_as_gpr(val
);
236 assert(gpr
< GEN_MI_BUILDER_NUM_ALLOC_GPRS
);
237 assert(b
->gprs
& (1u << gpr
));
238 assert(b
->gpr_refs
[gpr
] > 0);
239 if (--b
->gpr_refs
[gpr
] == 0)
240 b
->gprs
&= ~(1u << gpr
);
242 #endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
245 static inline struct gen_mi_value
246 gen_mi_imm(uint64_t imm
)
248 return (struct gen_mi_value
) {
249 .type
= GEN_MI_VALUE_TYPE_IMM
,
254 static inline struct gen_mi_value
255 gen_mi_reg32(uint32_t reg
)
257 struct gen_mi_value val
= {
258 .type
= GEN_MI_VALUE_TYPE_REG32
,
261 #if GEN_GEN >= 8 || GEN_IS_HASWELL
262 assert(!_gen_mi_value_is_allocated_gpr(val
));
267 static inline struct gen_mi_value
268 gen_mi_reg64(uint32_t reg
)
270 struct gen_mi_value val
= {
271 .type
= GEN_MI_VALUE_TYPE_REG64
,
274 #if GEN_GEN >= 8 || GEN_IS_HASWELL
275 assert(!_gen_mi_value_is_allocated_gpr(val
));
280 static inline struct gen_mi_value
281 gen_mi_mem32(__gen_address_type addr
)
283 return (struct gen_mi_value
) {
284 .type
= GEN_MI_VALUE_TYPE_MEM32
,
289 static inline struct gen_mi_value
290 gen_mi_mem64(__gen_address_type addr
)
292 return (struct gen_mi_value
) {
293 .type
= GEN_MI_VALUE_TYPE_MEM64
,
298 static inline struct gen_mi_value
299 gen_mi_value_half(struct gen_mi_value value
, bool top_32_bits
)
301 switch (value
.type
) {
302 case GEN_MI_VALUE_TYPE_IMM
:
306 value
.imm
&= 0xffffffffu
;
309 case GEN_MI_VALUE_TYPE_MEM32
:
310 assert(!top_32_bits
);
313 case GEN_MI_VALUE_TYPE_MEM64
:
315 value
.addr
= __gen_address_offset(value
.addr
, 4);
316 value
.type
= GEN_MI_VALUE_TYPE_MEM32
;
319 case GEN_MI_VALUE_TYPE_REG32
:
320 assert(!top_32_bits
);
323 case GEN_MI_VALUE_TYPE_REG64
:
326 value
.type
= GEN_MI_VALUE_TYPE_REG32
;
330 unreachable("Invalid gen_mi_value type");
334 _gen_mi_copy_no_unref(struct gen_mi_builder
*b
,
335 struct gen_mi_value dst
, struct gen_mi_value src
)
337 #if GEN_GEN >= 7 || GEN_IS_HASWELL
338 /* TODO: We could handle src.invert by emitting a bit of math if we really
341 assert(!dst
.invert
&& !src
.invert
);
343 gen_mi_builder_flush_math(b
);
346 case GEN_MI_VALUE_TYPE_IMM
:
347 unreachable("Cannot copy to an immediate");
349 case GEN_MI_VALUE_TYPE_MEM64
:
350 case GEN_MI_VALUE_TYPE_REG64
:
351 /* If the destination is 64 bits, we have to copy in two halves */
352 _gen_mi_copy_no_unref(b
, gen_mi_value_half(dst
, false),
353 gen_mi_value_half(src
, false));
355 case GEN_MI_VALUE_TYPE_IMM
:
356 case GEN_MI_VALUE_TYPE_MEM64
:
357 case GEN_MI_VALUE_TYPE_REG64
:
358 /* TODO: Use MI_STORE_DATA_IMM::StoreQWord when we have it */
359 _gen_mi_copy_no_unref(b
, gen_mi_value_half(dst
, true),
360 gen_mi_value_half(src
, true));
363 _gen_mi_copy_no_unref(b
, gen_mi_value_half(dst
, true),
369 case GEN_MI_VALUE_TYPE_MEM32
:
371 case GEN_MI_VALUE_TYPE_IMM
:
372 gen_mi_builder_emit(b
, GENX(MI_STORE_DATA_IMM
), sdi
) {
373 sdi
.Address
= dst
.addr
;
375 sdi
.ForceWriteCompletionCheck
= true;
377 sdi
.ImmediateData
= src
.imm
;
381 case GEN_MI_VALUE_TYPE_MEM32
:
382 case GEN_MI_VALUE_TYPE_MEM64
:
384 gen_mi_builder_emit(b
, GENX(MI_COPY_MEM_MEM
), cmm
) {
385 cmm
.DestinationMemoryAddress
= dst
.addr
;
386 cmm
.SourceMemoryAddress
= src
.addr
;
390 struct gen_mi_value tmp
= gen_mi_new_gpr(b
);
391 _gen_mi_copy_no_unref(b
, tmp
, src
);
392 _gen_mi_copy_no_unref(b
, dst
, tmp
);
393 gen_mi_value_unref(b
, tmp
);
396 unreachable("Cannot do mem <-> mem copy on IVB and earlier");
400 case GEN_MI_VALUE_TYPE_REG32
:
401 case GEN_MI_VALUE_TYPE_REG64
:
402 gen_mi_builder_emit(b
, GENX(MI_STORE_REGISTER_MEM
), srm
) {
403 srm
.RegisterAddress
= src
.reg
;
404 srm
.MemoryAddress
= dst
.addr
;
409 unreachable("Invalid gen_mi_value type");
413 case GEN_MI_VALUE_TYPE_REG32
:
415 case GEN_MI_VALUE_TYPE_IMM
:
416 gen_mi_builder_emit(b
, GENX(MI_LOAD_REGISTER_IMM
), lri
) {
417 lri
.RegisterOffset
= dst
.reg
;
418 lri
.DataDWord
= src
.imm
;
422 case GEN_MI_VALUE_TYPE_MEM32
:
423 case GEN_MI_VALUE_TYPE_MEM64
:
424 gen_mi_builder_emit(b
, GENX(MI_LOAD_REGISTER_MEM
), lrm
) {
425 lrm
.RegisterAddress
= dst
.reg
;
426 lrm
.MemoryAddress
= src
.addr
;
430 case GEN_MI_VALUE_TYPE_REG32
:
431 case GEN_MI_VALUE_TYPE_REG64
:
432 #if GEN_GEN >= 8 || GEN_IS_HASWELL
433 if (src
.reg
!= dst
.reg
) {
434 gen_mi_builder_emit(b
, GENX(MI_LOAD_REGISTER_REG
), lrr
) {
435 lrr
.SourceRegisterAddress
= src
.reg
;
436 lrr
.DestinationRegisterAddress
= dst
.reg
;
440 unreachable("Cannot do reg <-> reg copy on IVB and earlier");
445 unreachable("Invalid gen_mi_value type");
450 unreachable("Invalid gen_mi_value type");
454 /** Store the value in src to the value represented by dst
456 * If the bit size of src and dst mismatch, this function does an unsigned
457 * integer cast. If src has more bits than dst, it takes the bottom bits. If
458 * src has fewer bits then dst, it fills the top bits with zeros.
460 * This function consumes one reference for each of src and dst.
463 gen_mi_store(struct gen_mi_builder
*b
,
464 struct gen_mi_value dst
, struct gen_mi_value src
)
466 _gen_mi_copy_no_unref(b
, dst
, src
);
467 gen_mi_value_unref(b
, src
);
468 gen_mi_value_unref(b
, dst
);
472 gen_mi_memset(struct gen_mi_builder
*b
, __gen_address_type dst
,
473 uint32_t value
, uint32_t size
)
475 #if GEN_GEN >= 8 || GEN_IS_HASWELL
476 assert(b
->num_math_dwords
== 0);
479 /* This memset operates in units of dwords. */
480 assert(size
% 4 == 0);
482 for (uint32_t i
= 0; i
< size
; i
+= 4) {
483 gen_mi_store(b
, gen_mi_mem32(__gen_address_offset(dst
, i
)),
488 /* NOTE: On IVB, this function stomps GEN7_3DPRIM_BASE_VERTEX */
490 gen_mi_memcpy(struct gen_mi_builder
*b
, __gen_address_type dst
,
491 __gen_address_type src
, uint32_t size
)
493 #if GEN_GEN >= 8 || GEN_IS_HASWELL
494 assert(b
->num_math_dwords
== 0);
497 /* This memcpy operates in units of dwords. */
498 assert(size
% 4 == 0);
500 for (uint32_t i
= 0; i
< size
; i
+= 4) {
501 struct gen_mi_value dst_val
= gen_mi_mem32(__gen_address_offset(dst
, i
));
502 struct gen_mi_value src_val
= gen_mi_mem32(__gen_address_offset(src
, i
));
503 #if GEN_GEN >= 8 || GEN_IS_HASWELL
504 gen_mi_store(b
, dst_val
, src_val
);
506 /* IVB does not have a general purpose register for command streamer
507 * commands. Therefore, we use an alternate temporary register.
509 struct gen_mi_value tmp_reg
= gen_mi_reg32(0x2440); /* GEN7_3DPRIM_BASE_VERTEX */
510 gen_mi_store(b
, tmp_reg
, src_val
);
511 gen_mi_store(b
, dst_val
, tmp_reg
);
517 * MI_MATH Section. Only available on Haswell+
520 #if GEN_GEN >= 8 || GEN_IS_HASWELL
523 * Perform a predicated store (assuming the condition is already loaded
524 * in the MI_PREDICATE_RESULT register) of the value in src to the memory
525 * location specified by dst. Non-memory destinations are not supported.
527 * This function consumes one reference for each of src and dst.
530 gen_mi_store_if(struct gen_mi_builder
*b
,
531 struct gen_mi_value dst
,
532 struct gen_mi_value src
)
534 assert(!dst
.invert
&& !src
.invert
);
536 gen_mi_builder_flush_math(b
);
538 /* We can only predicate MI_STORE_REGISTER_MEM, so restrict the
539 * destination to be memory, and resolve the source to a temporary
540 * register if it isn't in one already.
542 assert(dst
.type
== GEN_MI_VALUE_TYPE_MEM64
||
543 dst
.type
== GEN_MI_VALUE_TYPE_MEM32
);
545 if (src
.type
!= GEN_MI_VALUE_TYPE_REG32
&&
546 src
.type
!= GEN_MI_VALUE_TYPE_REG64
) {
547 struct gen_mi_value tmp
= gen_mi_new_gpr(b
);
548 _gen_mi_copy_no_unref(b
, tmp
, src
);
552 if (dst
.type
== GEN_MI_VALUE_TYPE_MEM64
) {
553 gen_mi_builder_emit(b
, GENX(MI_STORE_REGISTER_MEM
), srm
) {
554 srm
.RegisterAddress
= src
.reg
;
555 srm
.MemoryAddress
= dst
.addr
;
556 srm
.PredicateEnable
= true;
558 gen_mi_builder_emit(b
, GENX(MI_STORE_REGISTER_MEM
), srm
) {
559 srm
.RegisterAddress
= src
.reg
+ 4;
560 srm
.MemoryAddress
= __gen_address_offset(dst
.addr
, 4);
561 srm
.PredicateEnable
= true;
564 gen_mi_builder_emit(b
, GENX(MI_STORE_REGISTER_MEM
), srm
) {
565 srm
.RegisterAddress
= src
.reg
;
566 srm
.MemoryAddress
= dst
.addr
;
567 srm
.PredicateEnable
= true;
571 gen_mi_value_unref(b
, src
);
572 gen_mi_value_unref(b
, dst
);
576 _gen_mi_builder_push_math(struct gen_mi_builder
*b
,
577 const uint32_t *dwords
,
580 assert(num_dwords
< GEN_MI_BUILDER_MAX_MATH_DWORDS
);
581 if (b
->num_math_dwords
+ num_dwords
> GEN_MI_BUILDER_MAX_MATH_DWORDS
)
582 gen_mi_builder_flush_math(b
);
584 memcpy(&b
->math_dwords
[b
->num_math_dwords
],
585 dwords
, num_dwords
* sizeof(*dwords
));
586 b
->num_math_dwords
+= num_dwords
;
589 static inline uint32_t
590 _gen_mi_pack_alu(uint32_t opcode
, uint32_t operand1
, uint32_t operand2
)
592 struct GENX(MI_MATH_ALU_INSTRUCTION
) instr
= {
593 .Operand2
= operand2
,
594 .Operand1
= operand1
,
599 GENX(MI_MATH_ALU_INSTRUCTION_pack
)(NULL
, &dw
, &instr
);
604 static inline struct gen_mi_value
605 gen_mi_value_to_gpr(struct gen_mi_builder
*b
, struct gen_mi_value val
)
607 if (gen_mi_value_is_gpr(val
))
610 /* Save off the invert flag because it makes copy() grumpy */
611 bool invert
= val
.invert
;
614 struct gen_mi_value tmp
= gen_mi_new_gpr(b
);
615 _gen_mi_copy_no_unref(b
, tmp
, val
);
621 static inline uint32_t
622 _gen_mi_math_load_src(struct gen_mi_builder
*b
,
623 unsigned src
, struct gen_mi_value
*val
)
625 if (val
->type
== GEN_MI_VALUE_TYPE_IMM
&&
626 (val
->imm
== 0 || val
->imm
== UINT64_MAX
)) {
627 uint64_t imm
= val
->invert
? ~val
->imm
: val
->imm
;
628 return _gen_mi_pack_alu(imm
? MI_ALU_LOAD1
: MI_ALU_LOAD0
, src
, 0);
630 *val
= gen_mi_value_to_gpr(b
, *val
);
631 return _gen_mi_pack_alu(val
->invert
? MI_ALU_LOADINV
: MI_ALU_LOAD
,
632 src
, _gen_mi_value_as_gpr(*val
));
636 static inline struct gen_mi_value
637 gen_mi_math_binop(struct gen_mi_builder
*b
, uint32_t opcode
,
638 struct gen_mi_value src0
, struct gen_mi_value src1
,
639 uint32_t store_op
, uint32_t store_src
)
641 struct gen_mi_value dst
= gen_mi_new_gpr(b
);
644 dw
[0] = _gen_mi_math_load_src(b
, MI_ALU_SRCA
, &src0
);
645 dw
[1] = _gen_mi_math_load_src(b
, MI_ALU_SRCB
, &src1
);
646 dw
[2] = _gen_mi_pack_alu(opcode
, 0, 0);
647 dw
[3] = _gen_mi_pack_alu(store_op
, _gen_mi_value_as_gpr(dst
), store_src
);
648 _gen_mi_builder_push_math(b
, dw
, 4);
650 gen_mi_value_unref(b
, src0
);
651 gen_mi_value_unref(b
, src1
);
656 static inline struct gen_mi_value
657 gen_mi_inot(struct gen_mi_builder
*b
, struct gen_mi_value val
)
659 /* TODO These currently can't be passed into gen_mi_copy */
660 val
.invert
= !val
.invert
;
664 static inline struct gen_mi_value
665 gen_mi_iadd(struct gen_mi_builder
*b
,
666 struct gen_mi_value src0
, struct gen_mi_value src1
)
668 return gen_mi_math_binop(b
, MI_ALU_ADD
, src0
, src1
,
669 MI_ALU_STORE
, MI_ALU_ACCU
);
672 static inline struct gen_mi_value
673 gen_mi_iadd_imm(struct gen_mi_builder
*b
,
674 struct gen_mi_value src
, uint64_t N
)
679 return gen_mi_iadd(b
, src
, gen_mi_imm(N
));
682 static inline struct gen_mi_value
683 gen_mi_isub(struct gen_mi_builder
*b
,
684 struct gen_mi_value src0
, struct gen_mi_value src1
)
686 return gen_mi_math_binop(b
, MI_ALU_SUB
, src0
, src1
,
687 MI_ALU_STORE
, MI_ALU_ACCU
);
690 static inline struct gen_mi_value
691 gen_mi_ult(struct gen_mi_builder
*b
,
692 struct gen_mi_value src0
, struct gen_mi_value src1
)
694 /* Compute "less than" by subtracting and storing the carry bit */
695 return gen_mi_math_binop(b
, MI_ALU_SUB
, src0
, src1
,
696 MI_ALU_STORE
, MI_ALU_CF
);
699 static inline struct gen_mi_value
700 gen_mi_uge(struct gen_mi_builder
*b
,
701 struct gen_mi_value src0
, struct gen_mi_value src1
)
703 /* Compute "less than" by subtracting and storing the carry bit */
704 return gen_mi_math_binop(b
, MI_ALU_SUB
, src0
, src1
,
705 MI_ALU_STOREINV
, MI_ALU_CF
);
708 static inline struct gen_mi_value
709 gen_mi_iand(struct gen_mi_builder
*b
,
710 struct gen_mi_value src0
, struct gen_mi_value src1
)
712 return gen_mi_math_binop(b
, MI_ALU_AND
, src0
, src1
,
713 MI_ALU_STORE
, MI_ALU_ACCU
);
717 * Returns (src != 0) ? 1 : 0.
719 static inline struct gen_mi_value
720 gen_mi_nz(struct gen_mi_builder
*b
, struct gen_mi_value src
)
722 return gen_mi_math_binop(b
, MI_ALU_ADD
, src
, gen_mi_imm(0),
723 MI_ALU_STOREINV
, MI_ALU_ZF
);
727 * Returns (src == 0) ? 1 : 0.
729 static inline struct gen_mi_value
730 gen_mi_z(struct gen_mi_builder
*b
, struct gen_mi_value src
)
732 return gen_mi_math_binop(b
, MI_ALU_ADD
, src
, gen_mi_imm(0),
733 MI_ALU_STORE
, MI_ALU_ZF
);
736 static inline struct gen_mi_value
737 gen_mi_ior(struct gen_mi_builder
*b
,
738 struct gen_mi_value src0
, struct gen_mi_value src1
)
740 return gen_mi_math_binop(b
, MI_ALU_OR
, src0
, src1
,
741 MI_ALU_STORE
, MI_ALU_ACCU
);
744 static inline struct gen_mi_value
745 gen_mi_imul_imm(struct gen_mi_builder
*b
,
746 struct gen_mi_value src
, uint32_t N
)
749 gen_mi_value_unref(b
, src
);
750 return gen_mi_imm(0);
756 src
= gen_mi_value_to_gpr(b
, src
);
758 struct gen_mi_value res
= gen_mi_value_ref(b
, src
);
760 unsigned top_bit
= 31 - __builtin_clz(N
);
761 for (int i
= top_bit
- 1; i
>= 0; i
--) {
762 res
= gen_mi_iadd(b
, res
, gen_mi_value_ref(b
, res
));
764 res
= gen_mi_iadd(b
, res
, gen_mi_value_ref(b
, src
));
767 gen_mi_value_unref(b
, src
);
772 static inline struct gen_mi_value
773 gen_mi_ishl_imm(struct gen_mi_builder
*b
,
774 struct gen_mi_value src
, uint32_t shift
)
776 struct gen_mi_value res
= gen_mi_value_to_gpr(b
, src
);
778 for (unsigned i
= 0; i
< shift
; i
++)
779 res
= gen_mi_iadd(b
, res
, gen_mi_value_ref(b
, res
));
784 static inline struct gen_mi_value
785 gen_mi_ushr32_imm(struct gen_mi_builder
*b
,
786 struct gen_mi_value src
, uint32_t shift
)
788 /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits
789 * of the result. This assumes the top 32 bits are zero.
792 return gen_mi_imm(0);
795 struct gen_mi_value tmp
= gen_mi_new_gpr(b
);
796 _gen_mi_copy_no_unref(b
, gen_mi_value_half(tmp
, false),
797 gen_mi_value_half(src
, true));
798 _gen_mi_copy_no_unref(b
, gen_mi_value_half(tmp
, true), gen_mi_imm(0));
799 gen_mi_value_unref(b
, src
);
804 struct gen_mi_value tmp
= gen_mi_ishl_imm(b
, src
, 32 - shift
);
805 struct gen_mi_value dst
= gen_mi_new_gpr(b
);
806 _gen_mi_copy_no_unref(b
, gen_mi_value_half(dst
, false),
807 gen_mi_value_half(tmp
, true));
808 _gen_mi_copy_no_unref(b
, gen_mi_value_half(dst
, true), gen_mi_imm(0));
809 gen_mi_value_unref(b
, tmp
);
813 static inline struct gen_mi_value
814 gen_mi_udiv32_imm(struct gen_mi_builder
*b
,
815 struct gen_mi_value N
, uint32_t D
)
817 /* We implicitly assume that N is only a 32-bit value */
819 /* This is invalid but we should do something */
820 return gen_mi_imm(0);
821 } else if (util_is_power_of_two_or_zero(D
)) {
822 return gen_mi_ushr32_imm(b
, N
, util_logbase2(D
));
824 struct util_fast_udiv_info m
= util_compute_fast_udiv_info(D
, 32, 32);
825 assert(m
.multiplier
<= UINT32_MAX
);
828 N
= gen_mi_ushr32_imm(b
, N
, m
.pre_shift
);
830 /* Do the 32x32 multiply into gpr0 */
831 N
= gen_mi_imul_imm(b
, N
, m
.multiplier
);
834 N
= gen_mi_iadd(b
, N
, gen_mi_imm(m
.multiplier
));
836 N
= gen_mi_ushr32_imm(b
, N
, 32);
839 N
= gen_mi_ushr32_imm(b
, N
, m
.post_shift
);
845 #endif /* MI_MATH section */
847 /* This assumes addresses of strictly more than 32bits (aka. Gen8+). */
848 #if GEN_MI_BUILDER_CAN_WRITE_BATCH
850 struct gen_mi_address_token
{
851 /* Pointers to address memory fields in the batch. */
855 static inline struct gen_mi_address_token
856 gen_mi_store_address(struct gen_mi_builder
*b
,
857 struct gen_mi_value addr_reg
)
859 gen_mi_builder_flush_math(b
);
861 assert(addr_reg
.type
== GEN_MI_VALUE_TYPE_REG64
);
863 struct gen_mi_address_token token
= {};
865 for (unsigned i
= 0; i
< 2; i
++) {
866 gen_mi_builder_emit(b
, GENX(MI_STORE_REGISTER_MEM
), srm
) {
867 srm
.RegisterAddress
= addr_reg
.reg
+ (i
* 4);
869 const unsigned addr_dw
=
870 GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start
) / 8;
871 token
.ptrs
[i
] = (void *)_dst
+ addr_dw
;
875 gen_mi_value_unref(b
, addr_reg
);
880 gen_mi_self_mod_barrier(struct gen_mi_builder
*b
)
882 /* Documentation says Gen11+ should be able to invalidate the command cache
883 * but experiment show it doesn't work properly, so for now just get over
886 for (uint32_t i
= 0; i
< 128; i
++)
887 gen_mi_builder_emit(b
, GENX(MI_NOOP
), noop
);
891 _gen_mi_resolve_address_token(struct gen_mi_builder
*b
,
892 struct gen_mi_address_token token
,
893 void *batch_location
)
895 uint64_t addr_addr_u64
= __gen_get_batch_address(b
->user_data
,
897 *(token
.ptrs
[0]) = addr_addr_u64
;
898 *(token
.ptrs
[1]) = addr_addr_u64
+ 4;
901 #endif /* GEN_MI_BUILDER_CAN_WRITE_BATCH */
903 #endif /* GEN_MI_BUILDER_H */