pan/midgard: Lower bitfield extract to shifts
[mesa.git] / src / panfrost / midgard / midgard_compile.h
1 /*
2 * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef __MIDGARD_H_
25 #define __MIDGARD_H_
26
27 #include "compiler/nir/nir.h"
28 #include "util/u_dynarray.h"
29 #include "panfrost-job.h"
30
31 /* Define the general compiler entry point */
32
33 #define MAX_SYSVAL_COUNT 32
34
35 /* Allow 2D of sysval IDs, while allowing nonparametric sysvals to equal
36 * their class for equal comparison */
37
38 #define PAN_SYSVAL(type, no) (((no) << 16) | PAN_SYSVAL_##type)
39 #define PAN_SYSVAL_TYPE(sysval) ((sysval) & 0xffff)
40 #define PAN_SYSVAL_ID(sysval) ((sysval) >> 16)
41
42 /* Define some common types. We start at one for easy indexing of hash
43 * tables internal to the compiler */
44
45 enum {
46 PAN_SYSVAL_VIEWPORT_SCALE = 1,
47 PAN_SYSVAL_VIEWPORT_OFFSET = 2,
48 PAN_SYSVAL_TEXTURE_SIZE = 3,
49 PAN_SYSVAL_SSBO = 4,
50 PAN_SYSVAL_NUM_WORK_GROUPS = 5,
51 PAN_SYSVAL_SAMPLER = 7,
52 } pan_sysval;
53
54 #define PAN_TXS_SYSVAL_ID(texidx, dim, is_array) \
55 ((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))
56
57 #define PAN_SYSVAL_ID_TO_TXS_TEX_IDX(id) ((id) & 0x7f)
58 #define PAN_SYSVAL_ID_TO_TXS_DIM(id) (((id) >> 7) & 0x3)
59 #define PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(id) !!((id) & (1 << 9))
60
61 /* Special attribute slots for vertex builtins. Sort of arbitrary but let's be
62 * consistent with the blob so we can compare traces easier. */
63
64 enum {
65 PAN_VERTEX_ID = 16,
66 PAN_INSTANCE_ID = 17,
67 PAN_MAX_ATTRIBUTE
68 } pan_special_attributes;
69
70 typedef struct {
71 int work_register_count;
72 int uniform_count;
73 int uniform_cutoff;
74
75 /* Prepended before uniforms, mapping to SYSVAL_ names for the
76 * sysval */
77
78 unsigned sysval_count;
79 unsigned sysvals[MAX_SYSVAL_COUNT];
80
81 unsigned varyings[32];
82 enum mali_format varying_type[32];
83
84 /* Boolean properties of the program */
85 bool writes_point_size;
86
87 int first_tag;
88
89 struct util_dynarray compiled;
90
91 /* For a blend shader using a constant color -- patch point. If
92 * negative, there's no constant. */
93
94 int blend_patch_offset;
95
96 /* The number of bytes to allocate per-thread for Thread Local Storage
97 * (register spilling), or zero if no spilling is used */
98 unsigned tls_size;
99
100 /* IN: For a fragment shader with a lowered alpha test, the ref value */
101 float alpha_ref;
102 } midgard_program;
103
104 int
105 midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend, unsigned blend_rt, unsigned gpu_id, bool shaderdb);
106
107 /* NIR options are shared between the standalone compiler and the online
108 * compiler. Defining it here is the simplest, though maybe not the Right
109 * solution. */
110
111 static const nir_shader_compiler_options midgard_nir_options = {
112 .lower_ffma = true,
113 .lower_sub = true,
114 .lower_scmp = true,
115 .lower_flrp32 = true,
116 .lower_flrp64 = true,
117 .lower_ffract = true,
118 .lower_fmod = true,
119 .lower_fdiv = true,
120 .lower_idiv = true,
121 .lower_isign = true,
122 .lower_fpow = true,
123 .lower_find_lsb = true,
124 .lower_fdph = true,
125
126 .lower_wpos_pntc = true,
127
128 /* TODO: We have native ops to help here, which we'll want to look into
129 * eventually */
130 .lower_fsign = true,
131
132 .lower_extract_byte = true,
133 .lower_extract_word = true,
134 .lower_rotate = true,
135
136 .lower_pack_half_2x16 = true,
137 .lower_pack_half_2x16_split = true,
138 .lower_pack_unorm_2x16 = true,
139 .lower_pack_snorm_2x16 = true,
140 .lower_pack_unorm_4x8 = true,
141 .lower_pack_snorm_4x8 = true,
142 .lower_unpack_half_2x16 = true,
143 .lower_unpack_half_2x16_split = true,
144 .lower_unpack_unorm_2x16 = true,
145 .lower_unpack_snorm_2x16 = true,
146 .lower_unpack_unorm_4x8 = true,
147 .lower_unpack_snorm_4x8 = true,
148
149 .lower_doubles_options = nir_lower_dmod,
150
151 .lower_bitfield_extract_to_shifts = true,
152 .vectorize_io = true,
153 .use_interpolated_input_intrinsics = true
154 };
155
156 #endif