freedreno/computerator: add computerator
[mesa.git] / src / freedreno / computerator / main.h
1 /*
2 * Copyright © 2020 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef __MAIN_H__
25 #define __MAIN_H__
26
27 #include <err.h>
28 #include <stdint.h>
29 #include <stdio.h>
30
31 #include "drm/freedreno_drmif.h"
32 #include "drm/freedreno_ringbuffer.h"
33
34 #include "registers/adreno_pm4.xml.h"
35 #include "registers/adreno_common.xml.h"
36
37 #define MAX_BUFS 4
38
39 struct kernel {
40 /* filled in by backend when shader is assembled: */
41 uint32_t local_size[3];
42 uint32_t num_bufs;
43 uint32_t buf_sizes[MAX_BUFS]; /* size in dwords */
44
45 /* filled in by frontend before launching grid: */
46 struct fd_bo *bufs[MAX_BUFS];
47 };
48
49 /* per-generation entry-points: */
50 struct backend {
51 struct kernel *(*assemble)(struct backend *b, FILE *in);
52 void (*disassemble)(struct kernel *kernel, FILE *out);
53 void (*emit_grid)(struct kernel *kernel, uint32_t grid[3],
54 struct fd_submit *submit);
55 };
56
57 #define define_cast(_from, _to) \
58 static inline struct _to * \
59 to_ ## _to(struct _from *f) \
60 { return (struct _to *)f; }
61
62 struct backend *a6xx_init(struct fd_device *dev, uint32_t gpu_id);
63
64 /*
65 * cmdstream helpers:
66 */
67
68 static inline void
69 BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
70 {
71 if (ring->cur + ndwords > ring->end)
72 fd_ringbuffer_grow(ring, ndwords);
73 }
74
75 static inline void
76 OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
77 {
78 fd_ringbuffer_emit(ring, data);
79 }
80
81 static inline unsigned
82 _odd_parity_bit(unsigned val)
83 {
84 /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
85 * note that we want odd parity so 0x6996 is inverted.
86 */
87 val ^= val >> 16;
88 val ^= val >> 8;
89 val ^= val >> 4;
90 val &= 0xf;
91 return (~0x6996 >> val) & 1;
92 }
93
94 static inline void
95 OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
96 {
97 BEGIN_RING(ring, cnt+1);
98 OUT_RING(ring, CP_TYPE4_PKT | cnt |
99 (_odd_parity_bit(cnt) << 7) |
100 ((regindx & 0x3ffff) << 8) |
101 ((_odd_parity_bit(regindx) << 27)));
102 }
103
104 static inline void
105 OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
106 {
107 BEGIN_RING(ring, cnt+1);
108 OUT_RING(ring, CP_TYPE7_PKT | cnt |
109 (_odd_parity_bit(cnt) << 15) |
110 ((opcode & 0x7f) << 16) |
111 ((_odd_parity_bit(opcode) << 23)));
112 }
113
114 /*
115 * NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+
116 */
117
118 static inline void
119 __out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo,
120 uint32_t offset, uint64_t or, int32_t shift, uint32_t flags)
121 {
122 debug_assert(offset < fd_bo_size(bo));
123 fd_ringbuffer_reloc(ring, &(struct fd_reloc){
124 .bo = bo,
125 .flags = flags,
126 .offset = offset,
127 .or = or,
128 .shift = shift,
129 .orhi = or >> 32,
130 });
131 }
132
133 static inline void
134 OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
135 uint32_t offset, uint64_t or, int32_t shift)
136 {
137 __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ);
138 }
139
140 static inline void
141 OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
142 uint32_t offset, uint64_t or, int32_t shift)
143 {
144 __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_WRITE);
145 }
146
147 static inline void
148 OUT_RELOCD(struct fd_ringbuffer *ring, struct fd_bo *bo,
149 uint32_t offset, uint64_t or, int32_t shift)
150 {
151 __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_DUMP);
152 }
153
154 static inline void
155 OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
156 {
157 fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
158 }
159
160 /* for conditionally setting boolean flag(s): */
161 #define COND(bool, val) ((bool) ? (val) : 0)
162
163 #endif /* __MAIN_H__ */