(c-file-style . "stroustrup")
(fill-column . 78)
(eval . (progn
+ (c-set-offset 'case-label '0)
(c-set-offset 'innamespace '0)
(c-set-offset 'inline-open '0)))
)
# - Select Git and fill in the Git clone URL
# - Setup a Git hook as explained in
# https://github.com/appveyor/webhooks#installing-git-hook
-# - Check 'Settings > General > Skip branches without appveyor'
+# - Check 'Settings > General > Skip branches without appveyor.yml'
# - Check 'Settings > General > Rolling builds'
# - Setup the global or project notifications to your liking
#
except:
- /^travis.*$/
-clone_depth: 5
+# Don't download the full Mesa history to speed up cloning. However the clone
+# depth must not be too small, otherwise builds might fail when lots of patches
+# are committed in succession, because the desired commit is not found on the
+# truncated history.
+#
+# See also:
+# - https://www.appveyor.com/blog/2014/06/04/shallow-clone-for-git-repositories
+clone_depth: 100
cache:
- win_flex_bison-2.4.5.zip
fi
}
+dnl This is for Glamor. Skip this if OpenGL is disabled.
require_egl_drm() {
+ if test "x$enable_opengl" = xno; then
+ return 0
+ fi
+
case "$with_egl_platforms" in
*drm*)
;;
GL_ARB_texture_compression_bptc DONE (i965, nvc0, r600, radeonsi)
GL_ARB_compressed_texture_pixel_storage DONE (all drivers)
- GL_ARB_shader_atomic_counters DONE (i965)
+ GL_ARB_shader_atomic_counters DONE (i965, nvc0)
GL_ARB_texture_storage DONE (all drivers)
GL_ARB_transform_feedback_instanced DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_base_instance DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_program_interface_query DONE (all drivers)
GL_ARB_robust_buffer_access_behavior not started
GL_ARB_shader_image_size DONE (i965)
- GL_ARB_shader_storage_buffer_object DONE (i965)
+ GL_ARB_shader_storage_buffer_object DONE (i965, nvc0)
GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_texture_buffer_range DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30)
- specified transform/feedback layout in progress
- input/output block locations DONE
GL_ARB_multi_bind DONE (all drivers)
- GL_ARB_query_buffer_object not started
+ GL_ARB_query_buffer_object DONE (nvc0)
GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_texture_stencil8 DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
GL_ARB_vertex_type_10f_11f_11f_rev DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
"130". Mesa will not really implement all the features of the given language version
if it's higher than what's normally reported. (for developers only)
<li>MESA_GLSL - <a href="shading.html#envvars">shading language compiler options</a>
+<li>MESA_NO_MINMAX_CACHE - when set, the minmax index cache is globally disabled.
</ul>
<li>GL_ARB_compute_shader on i965</li>
<li>GL_ARB_copy_image on r600</li>
<li>GL_ARB_indirect_parameters on nvc0</li>
+<li>GL_ARB_query_buffer_object on nvc0</li>
+<li>GL_ARB_shader_atomic_counters on nvc0</li>
<li>GL_ARB_shader_draw_parameters on i965, nvc0</li>
+<li>GL_ARB_shader_storage_buffer_object on nvc0</li>
<li>GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)</li>
<li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li>
<li>GL_ARB_texture_buffer_range on freedreno/a4xx</li>
<li>GL_ARB_vertex_type_10f_11f_11f_rev on freedreno/a4xx</li>
<li>GL_KHR_texture_compression_astc_ldr on freedreno/a4xx</li>
<li>GL_AMD_performance_monitor on radeonsi (CIK+ only)</li>
+<li>GL_ATI_meminfo on r600, radeonsi</li>
+<li>GL_NVX_gpu_memory_info on r600, radeonsi</li>
<li>New OSMesaCreateContextAttribs() function (for creating core profile
contexts)</li>
</ul>
#define D3DERR_DRIVERINVALIDCALL MAKE_D3DHRESULT(2157)
#define D3DERR_DEVICEREMOVED MAKE_D3DHRESULT(2160)
#define D3DERR_DEVICEHUNG MAKE_D3DHRESULT(2164)
+#define S_PRESENT_OCCLUDED MAKE_D3DSTATUS(2168)
/********************************************************
* Bitmasks *
HRESULT (WINAPI *SetCursor)(ID3DPresent *This, void *pBitmap, POINT *pHotspot, BOOL bShow);
HRESULT (WINAPI *SetGammaRamp)(ID3DPresent *This, const D3DGAMMARAMP *pRamp, HWND hWndOverride);
HRESULT (WINAPI *GetWindowInfo)(ID3DPresent *This, HWND hWnd, int *width, int *height, int *depth);
+ /* Available since version 1.1 */
+ BOOL (WINAPI *GetWindowOccluded)(ID3DPresent *This);
} ID3DPresentVtbl;
struct ID3DPresent
#define ID3DPresent_SetCursor(p,a,b,c) (p)->lpVtbl->SetCursor(p,a,b,c)
#define ID3DPresent_SetGammaRamp(p,a,b) (p)->lpVtbl->SetGammaRamp(p,a,b)
#define ID3DPresent_GetWindowInfo(p,a,b,c,d) (p)->lpVtbl->GetWindowSize(p,a,b,c,d)
+#define ID3DPresent_GetWindowOccluded(p) (p)->lpVtbl->GetWindowOccluded(p)
typedef struct ID3DPresentGroupVtbl
{
--- /dev/null
+glsl_compiler
LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
glsl/glsl_parser.cpp glsl/glsl_parser.h: glsl/glsl_parser.yy
+ $(MKDIR_GEN)
$(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl/glsl_parser.h $(srcdir)/glsl/glsl_parser.yy
glsl/glsl_lexer.cpp: glsl/glsl_lexer.ll
+ $(MKDIR_GEN)
$(LEX_GEN) -o $@ $(srcdir)/glsl/glsl_lexer.ll
glsl/glcpp/glcpp-parse.c glsl/glcpp/glcpp-parse.h: glsl/glcpp/glcpp-parse.y
-glsl_compiler
glsl_lexer.cpp
glsl_parser.cpp
glsl_parser.h
if (!state->is_version(120, 0))
return false;
+ /* ESSL does not allow implicit conversions */
+ if (state->es_shader)
+ return false;
+
/* From page 27 (page 33 of the PDF) of the GLSL 1.50 spec:
*
* "There are no implicit array or structure conversions. For
BA1(roundEven)
BA1(ceil)
BA1(fract)
- B2(mod)
+ BA2(mod)
BA1(modf)
BA2(min)
BA2(max)
FD(fract)
add_function("mod",
- _mod(glsl_type::float_type, glsl_type::float_type),
- _mod(glsl_type::vec2_type, glsl_type::float_type),
- _mod(glsl_type::vec3_type, glsl_type::float_type),
- _mod(glsl_type::vec4_type, glsl_type::float_type),
+ _mod(always_available, glsl_type::float_type, glsl_type::float_type),
+ _mod(always_available, glsl_type::vec2_type, glsl_type::float_type),
+ _mod(always_available, glsl_type::vec3_type, glsl_type::float_type),
+ _mod(always_available, glsl_type::vec4_type, glsl_type::float_type),
- _mod(glsl_type::vec2_type, glsl_type::vec2_type),
- _mod(glsl_type::vec3_type, glsl_type::vec3_type),
- _mod(glsl_type::vec4_type, glsl_type::vec4_type),
+ _mod(always_available, glsl_type::vec2_type, glsl_type::vec2_type),
+ _mod(always_available, glsl_type::vec3_type, glsl_type::vec3_type),
+ _mod(always_available, glsl_type::vec4_type, glsl_type::vec4_type),
- _mod(glsl_type::double_type, glsl_type::double_type),
- _mod(glsl_type::dvec2_type, glsl_type::double_type),
- _mod(glsl_type::dvec3_type, glsl_type::double_type),
- _mod(glsl_type::dvec4_type, glsl_type::double_type),
+ _mod(fp64, glsl_type::double_type, glsl_type::double_type),
+ _mod(fp64, glsl_type::dvec2_type, glsl_type::double_type),
+ _mod(fp64, glsl_type::dvec3_type, glsl_type::double_type),
+ _mod(fp64, glsl_type::dvec4_type, glsl_type::double_type),
- _mod(glsl_type::dvec2_type, glsl_type::dvec2_type),
- _mod(glsl_type::dvec3_type, glsl_type::dvec3_type),
- _mod(glsl_type::dvec4_type, glsl_type::dvec4_type),
+ _mod(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type),
+ _mod(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type),
+ _mod(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type),
NULL);
FD(modf)
UNOPA(fract, ir_unop_fract)
ir_function_signature *
-builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type)
+builtin_builder::_mod(builtin_available_predicate avail,
+ const glsl_type *x_type, const glsl_type *y_type)
{
- return binop(always_available, ir_binop_mod, x_type, x_type, y_type);
+ return binop(avail, ir_binop_mod, x_type, x_type, y_type);
}
ir_function_signature *
this->fields[this->num_fields].sample = 0;
this->fields[this->num_fields].patch = 0;
this->fields[this->num_fields].precision = GLSL_PRECISION_NONE;
+ this->fields[this->num_fields].image_read_only = 0;
+ this->fields[this->num_fields].image_write_only = 0;
+ this->fields[this->num_fields].image_coherent = 0;
+ this->fields[this->num_fields].image_volatile = 0;
+ this->fields[this->num_fields].image_restrict = 0;
this->num_fields++;
}
/* gl_Position and gl_PointSize are not visible from fragment shaders. */
if (state->stage != MESA_SHADER_FRAGMENT) {
add_varying(VARYING_SLOT_POS, vec4_t, "gl_Position");
- add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
+ if (!state->es_shader ||
+ state->stage == MESA_SHADER_VERTEX ||
+ (state->stage == MESA_SHADER_GEOMETRY &&
+ state->OES_geometry_point_size_enable)) {
+ add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize");
+ }
}
if (state->is_version(130, 0)) {
add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1);
if (extensions->ARB_blend_func_extended)
add_builtin_define(parser, "GL_EXT_blend_func_extended", 1);
+
+ if (version >= 310) {
+ if (extensions->OES_geometry_shader) {
+ add_builtin_define(parser, "GL_OES_geometry_point_size", 1);
+ add_builtin_define(parser, "GL_OES_geometry_shader", 1);
+ }
+ }
}
} else {
add_builtin_define(parser, "GL_ARB_draw_buffers", 1);
/* OES extensions go here, sorted alphabetically.
*/
EXT(OES_EGL_image_external, false, true, OES_EGL_image_external),
+ EXT(OES_geometry_point_size, false, true, OES_geometry_shader),
EXT(OES_geometry_shader, false, true, OES_geometry_shader),
EXT(OES_standard_derivatives, false, true, OES_standard_derivatives),
EXT(OES_texture_3D, false, true, dummy_true),
const struct gl_shader_compiler_options *options,
bool native_integers)
{
+ const bool debug = false;
GLboolean progress = GL_FALSE;
- progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress;
+#define OPT(PASS, ...) do { \
+ if (debug) { \
+ fprintf(stderr, "START GLSL optimization %s\n", #PASS); \
+ const bool opt_progress = PASS(__VA_ARGS__); \
+ progress = opt_progress || progress; \
+ if (opt_progress) \
+ _mesa_print_ir(stderr, ir, NULL); \
+ fprintf(stderr, "GLSL optimization %s: %s progress\n", \
+ #PASS, opt_progress ? "made" : "no"); \
+ } else { \
+ progress = PASS(__VA_ARGS__) || progress; \
+ } \
+ } while (false)
+
+ OPT(lower_instructions, ir, SUB_TO_ADD_NEG);
if (linked) {
- progress = do_function_inlining(ir) || progress;
- progress = do_dead_functions(ir) || progress;
- progress = do_structure_splitting(ir) || progress;
+ OPT(do_function_inlining, ir);
+ OPT(do_dead_functions, ir);
+ OPT(do_structure_splitting, ir);
}
- progress = do_if_simplification(ir) || progress;
- progress = opt_flatten_nested_if_blocks(ir) || progress;
- progress = opt_conditional_discard(ir) || progress;
- progress = do_copy_propagation(ir) || progress;
- progress = do_copy_propagation_elements(ir) || progress;
+ OPT(do_if_simplification, ir);
+ OPT(opt_flatten_nested_if_blocks, ir);
+ OPT(opt_conditional_discard, ir);
+ OPT(do_copy_propagation, ir);
+ OPT(do_copy_propagation_elements, ir);
if (options->OptimizeForAOS && !linked)
- progress = opt_flip_matrices(ir) || progress;
+ OPT(opt_flip_matrices, ir);
if (linked && options->OptimizeForAOS) {
- progress = do_vectorize(ir) || progress;
+ OPT(do_vectorize, ir);
}
if (linked)
- progress = do_dead_code(ir, uniform_locations_assigned) || progress;
+ OPT(do_dead_code, ir, uniform_locations_assigned);
else
- progress = do_dead_code_unlinked(ir) || progress;
- progress = do_dead_code_local(ir) || progress;
- progress = do_tree_grafting(ir) || progress;
- progress = do_constant_propagation(ir) || progress;
+ OPT(do_dead_code_unlinked, ir);
+ OPT(do_dead_code_local, ir);
+ OPT(do_tree_grafting, ir);
+ OPT(do_constant_propagation, ir);
if (linked)
- progress = do_constant_variable(ir) || progress;
+ OPT(do_constant_variable, ir);
else
- progress = do_constant_variable_unlinked(ir) || progress;
- progress = do_constant_folding(ir) || progress;
- progress = do_minmax_prune(ir) || progress;
- progress = do_rebalance_tree(ir) || progress;
- progress = do_algebraic(ir, native_integers, options) || progress;
- progress = do_lower_jumps(ir) || progress;
- progress = do_vec_index_to_swizzle(ir) || progress;
- progress = lower_vector_insert(ir, false) || progress;
- progress = do_swizzle_swizzle(ir) || progress;
- progress = do_noop_swizzle(ir) || progress;
-
- progress = optimize_split_arrays(ir, linked) || progress;
- progress = optimize_redundant_jumps(ir) || progress;
+ OPT(do_constant_variable_unlinked, ir);
+ OPT(do_constant_folding, ir);
+ OPT(do_minmax_prune, ir);
+ OPT(do_rebalance_tree, ir);
+ OPT(do_algebraic, ir, native_integers, options);
+ OPT(do_lower_jumps, ir);
+ OPT(do_vec_index_to_swizzle, ir);
+ OPT(lower_vector_insert, ir, false);
+ OPT(do_swizzle_swizzle, ir);
+ OPT(do_noop_swizzle, ir);
+
+ OPT(optimize_split_arrays, ir, linked);
+ OPT(optimize_redundant_jumps, ir);
loop_state *ls = analyze_loop_variables(ir);
if (ls->loop_found) {
- progress = set_loop_controls(ir, ls) || progress;
- progress = unroll_loops(ir, ls, options) || progress;
+ OPT(set_loop_controls, ir, ls);
+ OPT(unroll_loops, ir, ls, options);
}
delete ls;
+#undef OPT
+
return progress;
}
*/
bool OES_EGL_image_external_enable;
bool OES_EGL_image_external_warn;
+ bool OES_geometry_point_size_enable;
+ bool OES_geometry_point_size_warn;
bool OES_geometry_shader_enable;
bool OES_geometry_shader_warn;
bool OES_standard_derivatives_enable;
*/
class parcel_out_uniform_storage : public program_resource_visitor {
public:
- parcel_out_uniform_storage(struct string_to_uint_map *map,
+ parcel_out_uniform_storage(struct gl_shader_program *prog,
+ struct string_to_uint_map *map,
struct gl_uniform_storage *uniforms,
union gl_constant_value *values)
- : map(map), uniforms(uniforms), values(values)
+ : prog(prog), map(map), uniforms(uniforms), values(values)
{
}
memset(this->targets, 0, sizeof(this->targets));
}
- void set_and_process(struct gl_shader_program *prog,
- ir_variable *var)
+ void set_and_process(ir_variable *var)
{
current_var = var;
field_counter = 0;
uniform->opaque[shader_type].index = this->next_image;
uniform->opaque[shader_type].active = true;
+ /* Set image access qualifiers */
+ const GLenum access =
+ (current_var->data.image_read_only ? GL_READ_ONLY :
+ current_var->data.image_write_only ? GL_WRITE_ONLY :
+ GL_READ_WRITE);
+
+ for (unsigned j = 0; j < MAX2(1, uniform->array_elements); ++j)
+ prog->_LinkedShaders[shader_type]->
+ ImageAccess[this->next_image + j] = access;
+
/* Increment the image index by 1 for non-arrays and by the
* number of array elements for arrays.
*/
this->values += values_for_type(type);
}
+ /**
+ * Current program being processed.
+ */
+ struct gl_shader_program *prog;
+
struct string_to_uint_map *map;
struct gl_uniform_storage *uniforms;
}
}
-static void
-link_set_image_access_qualifiers(struct gl_shader_program *prog,
- gl_shader *sh, unsigned shader_stage,
- ir_variable *var, const glsl_type *type,
- char **name, size_t name_length)
-{
- /* Handle arrays of arrays */
- if (type->is_array() && type->fields.array->is_array()) {
- for (unsigned i = 0; i < type->length; i++) {
- size_t new_length = name_length;
-
- /* Append the subscript to the current variable name */
- ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
-
- link_set_image_access_qualifiers(prog, sh, shader_stage, var,
- type->fields.array, name,
- new_length);
- }
- } else {
- unsigned id = 0;
- bool found = prog->UniformHash->get(id, *name);
- assert(found);
- (void) found;
- const gl_uniform_storage *storage = &prog->UniformStorage[id];
- const unsigned index = storage->opaque[shader_stage].index;
- const GLenum access = (var->data.image_read_only ? GL_READ_ONLY :
- var->data.image_write_only ? GL_WRITE_ONLY :
- GL_READ_WRITE);
-
- for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j)
- sh->ImageAccess[index + j] = access;
- }
-}
-
/**
* Combine the hidden uniform hash map with the uniform hash map so that the
* hidden uniforms will be given indicies at the end of the uniform storage
union gl_constant_value *data_end = &data[num_data_slots];
#endif
- parcel_out_uniform_storage parcel(prog->UniformHash, uniforms, data);
+ parcel_out_uniform_storage parcel(prog, prog->UniformHash, uniforms, data);
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
var->data.mode != ir_var_shader_storage))
continue;
- parcel.set_and_process(prog, var);
+ parcel.set_and_process(var);
}
prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used;
prog->NumHiddenUniforms = hidden_uniforms;
prog->UniformStorage = uniforms;
- /**
- * Scan the program for image uniforms and store image unit access
- * information into the gl_shader data structure.
- */
- for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- gl_shader *sh = prog->_LinkedShaders[i];
-
- if (sh == NULL)
- continue;
-
- foreach_in_list(ir_instruction, node, sh->ir) {
- ir_variable *var = node->as_variable();
-
- if (var && var->data.mode == ir_var_uniform &&
- var->type->contains_image()) {
- char *name_copy = ralloc_strdup(NULL, var->name);
- link_set_image_access_qualifiers(prog, sh, i, var, var->type,
- &name_copy, strlen(var->name));
- ralloc_free(name_copy);
- }
- }
- }
-
link_set_uniform_initializers(prog, boolean_true);
return;
return;
}
- if ((consumer_var == NULL && producer_var->type->contains_integer()) ||
+ bool needs_flat_qualifier = consumer_var == NULL &&
+ (producer_var->type->contains_integer() ||
+ producer_var->type->contains_double());
+
+ if (needs_flat_qualifier ||
(consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT)) {
/* Since this varying is not being consumed by the fragment shader, its
* interpolation type varying cannot possibly affect rendering.
- * Also, this variable is non-flat and is (or contains) an integer.
+ * Also, this variable is non-flat and is (or contains) an integer
+ * or a double.
* If the consumer stage is unknown, don't modify the interpolation
* type as it could affect rendering later with separate shaders.
*
&prog->NumShaderStorageBlocks,
&prog->SsboInterfaceBlockIndex);
- /* FINISHME: Assign fragment shader output locations. */
-
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
if (prog->_LinkedShaders[i] == NULL)
continue;
unsigned *const_offset,
bool *row_major,
int *matrix_columns,
+ const glsl_struct_field **struct_field,
unsigned packing)
{
*offset = new(mem_ctx) ir_constant(0u);
intra_struct_offset = glsl_align(intra_struct_offset, field_align);
if (strcmp(struct_type->fields.structure[i].name,
- deref_record->field) == 0)
+ deref_record->field) == 0) {
+ if (struct_field)
+ *struct_field = &struct_type->fields.structure[i];
break;
+ }
if (packing == GLSL_INTERFACE_PACKING_STD430)
intra_struct_offset += type->std430_size(field_row_major);
void setup_buffer_access(void *mem_ctx, ir_variable *var, ir_rvalue *deref,
ir_rvalue **offset, unsigned *const_offset,
bool *row_major, int *matrix_columns,
+ const glsl_struct_field **struct_field,
unsigned packing);
};
setup_buffer_access(mem_ctx, var, deref,
&offset, &const_offset,
- &row_major, &matrix_columns, packing);
+ &row_major, &matrix_columns, NULL, packing);
/* Now that we've calculated the offset to the start of the
* dereference, walk over the type and emit loads into a temporary.
setup_buffer_access(mem_ctx, var, deref,
&offset, &const_offset,
- &row_major, &matrix_columns, packing);
+ &row_major, &matrix_columns, NULL, packing);
deref = new(mem_ctx) ir_dereference_variable(store_var);
setup_buffer_access(mem_ctx, var, deref,
&offset, &const_offset,
- &row_major, &matrix_columns, packing);
+ &row_major, &matrix_columns, NULL, packing);
assert(offset);
assert(!row_major);
public lower_buffer_access::lower_buffer_access {
public:
lower_ubo_reference_visitor(struct gl_shader *shader)
- : shader(shader)
+ : shader(shader), struct_field(NULL), variable(NULL)
{
}
bool *row_major,
int *matrix_columns,
unsigned packing);
+ uint32_t ssbo_access_params();
ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
ir_rvalue *offset);
ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
struct gl_shader *shader;
struct gl_uniform_buffer_variable *ubo_var;
+ const struct glsl_struct_field *struct_field;
+ ir_variable *variable;
ir_rvalue *uniform_block;
bool progress;
};
*const_offset = ubo_var->Offset;
+ this->struct_field = NULL;
setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major,
- matrix_columns, packing);
+ matrix_columns, &this->struct_field, packing);
}
void
this->buffer_access_type =
var->is_in_shader_storage_block() ?
ssbo_load_access : ubo_load_access;
+ this->variable = var;
/* Compute the offset to the start if the dereference as well as other
* information we need to configure the write
return state->ARB_shader_storage_buffer_object_enable;
}
+uint32_t
+lower_ubo_reference_visitor::ssbo_access_params()
+{
+ assert(variable);
+
+ if (variable->is_interface_instance()) {
+ assert(struct_field);
+
+ return ((struct_field->image_coherent ? ACCESS_COHERENT : 0) |
+ (struct_field->image_restrict ? ACCESS_RESTRICT : 0) |
+ (struct_field->image_volatile ? ACCESS_VOLATILE : 0));
+ } else {
+ return ((variable->data.image_coherent ? ACCESS_COHERENT : 0) |
+ (variable->data.image_restrict ? ACCESS_RESTRICT : 0) |
+ (variable->data.image_volatile ? ACCESS_VOLATILE : 0));
+ }
+}
+
ir_call *
lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
ir_rvalue *deref,
ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
sig_params.push_tail(writemask_ref);
+ ir_variable *access_ref = new(mem_ctx)
+ ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
+ sig_params.push_tail(access_ref);
+
ir_function_signature *sig = new(mem_ctx)
ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
assert(sig);
call_params.push_tail(offset->clone(mem_ctx, NULL));
call_params.push_tail(deref->clone(mem_ctx, NULL));
call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
+ call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
return new(mem_ctx) ir_call(sig, NULL, &call_params);
}
ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
sig_params.push_tail(offset_ref);
+ ir_variable *access_ref = new(mem_ctx)
+ ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
+ sig_params.push_tail(access_ref);
+
ir_function_signature *sig =
new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
assert(sig);
exec_list call_params;
call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
call_params.push_tail(offset->clone(mem_ctx, NULL));
+ call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
return new(mem_ctx) ir_call(sig, deref_result, &call_params);
}
unsigned packing = var->get_interface_type()->interface_packing;
this->buffer_access_type = ssbo_store_access;
+ this->variable = var;
/* Compute the offset to the start if the dereference as well as other
* information we need to configure the write
int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
this->buffer_access_type = ssbo_unsized_array_length_access;
+ this->variable = var;
/* Compute the offset to the start if the dereference as well as other
* information we need to calculate the length.
unsigned packing = var->get_interface_type()->interface_packing;
this->buffer_access_type = ssbo_atomic_access;
+ this->variable = var;
setup_for_load_or_store(mem_ctx, var, deref,
&offset, &const_offset,
if (!lhs_var)
continue;
- if (lhs_var->data.mode == ir_var_function_out ||
- lhs_var->data.mode == ir_var_function_inout ||
- lhs_var->data.mode == ir_var_shader_out ||
- lhs_var->data.mode == ir_var_shader_storage)
- continue;
+ if (lhs_var->data.mode == ir_var_function_out ||
+ lhs_var->data.mode == ir_var_function_inout ||
+ lhs_var->data.mode == ir_var_shader_out ||
+ lhs_var->data.mode == ir_var_shader_storage ||
+ lhs_var->data.mode == ir_var_shader_shared)
+ continue;
ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var);
this->fields.structure[i].sample = fields[i].sample;
this->fields.structure[i].matrix_layout = fields[i].matrix_layout;
this->fields.structure[i].patch = fields[i].patch;
+ this->fields.structure[i].image_read_only = fields[i].image_read_only;
+ this->fields.structure[i].image_write_only = fields[i].image_write_only;
+ this->fields.structure[i].image_coherent = fields[i].image_coherent;
+ this->fields.structure[i].image_volatile = fields[i].image_volatile;
+ this->fields.structure[i].image_restrict = fields[i].image_restrict;
this->fields.structure[i].precision = fields[i].precision;
}
if (this == desired)
return true;
+ /* ESSL does not allow implicit conversions. If there is no state, we're
+ * doing intra-stage function linking where these checks have already been
+ * done.
+ */
+ if (state && state->es_shader)
+ return false;
+
/* There is no conversion among matrix types. */
if (this->matrix_columns > 1 || desired->matrix_columns > 1)
return false;
glsl_struct_field(const struct glsl_type *_type, const char *_name)
: type(_type), name(_name), location(-1), interpolation(0), centroid(0),
sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
- precision(GLSL_PRECISION_NONE)
+ precision(GLSL_PRECISION_NONE), image_read_only(0), image_write_only(0),
+ image_coherent(0), image_volatile(0), image_restrict(0)
{
/* empty */
}
b->shader->options->lower_pack_unorm_2x16);
nir_ssa_def *word =
- nir_extract_uword(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
+ nir_extract_u16(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
nir_ssa_def *val =
nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)),
nir_channel(b, word, 0));
b->shader->options->lower_pack_unorm_4x8);
nir_ssa_def *byte =
- nir_extract_ubyte(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
+ nir_extract_u8(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
nir_ssa_def *val =
nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)),
nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))),
unpack_4x8("unorm")
unpack_2x16("half")
-unop_horiz("pack_uvec2_to_uint", 0, tuint, 2, tuint, """
-dst = (src0.x & 0xffff) | (src0.y >> 16);
+unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """
+dst.x = (src0.x & 0xffff) | (src0.y >> 16);
""")
-unop_horiz("pack_uvec4_to_uint", 0, tuint, 4, tuint, """
-dst = (src0.x << 0) |
- (src0.y << 8) |
- (src0.z << 16) |
- (src0.w << 24);
+unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """
+dst.x = (src0.x << 0) |
+ (src0.y << 8) |
+ (src0.z << 16) |
+ (src0.w << 24);
""")
# Lowered floating point unpacking operations.
""")
# Byte extraction
-binop("extract_ubyte", tuint, "", "(uint8_t)(src0 >> (src1 * 8))")
-binop("extract_ibyte", tint, "", "(int8_t)(src0 >> (src1 * 8))")
+binop("extract_u8", tuint, "", "(uint8_t)(src0 >> (src1 * 8))")
+binop("extract_i8", tint, "", "(int8_t)(src0 >> (src1 * 8))")
# Word extraction
-binop("extract_uword", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
-binop("extract_iword", tint, "", "(int16_t)(src0 >> (src1 * 16))")
+binop("extract_u16", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
+binop("extract_i16", tint, "", "(int16_t)(src0 >> (src1 * 16))")
def triop(name, ty, const_expr):
('ubfe', 'value', 'offset', 'bits')),
'options->lower_bitfield_extract'),
- (('extract_ibyte', a, b),
- ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 8),
+ (('extract_i8', a, b),
+ ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 24),
'options->lower_extract_byte'),
- (('extract_ubyte', a, b),
+ (('extract_u8', a, b),
('iand', ('ushr', a, ('imul', b, 8)), 0xff),
'options->lower_extract_byte'),
- (('extract_iword', a, b),
+ (('extract_i16', a, b),
('ishr', ('ishl', a, ('imul', ('isub', 1, b), 16)), 16),
'options->lower_extract_word'),
- (('extract_uword', a, b),
+ (('extract_u16', a, b),
('iand', ('ushr', a, ('imul', b, 16)), 0xffff),
'options->lower_extract_word'),
'options->lower_pack_snorm_4x8'),
(('unpack_unorm_2x16', 'v'),
- ('fdiv', ('u2f', ('vec4', ('extract_uword', 'v', 0),
- ('extract_uword', 'v', 1), 0, 0)),
+ ('fdiv', ('u2f', ('vec2', ('extract_u16', 'v', 0),
+ ('extract_u16', 'v', 1))),
65535.0),
'options->lower_unpack_unorm_2x16'),
(('unpack_unorm_4x8', 'v'),
- ('fdiv', ('u2f', ('vec4', ('extract_ubyte', 'v', 0),
- ('extract_ubyte', 'v', 1),
- ('extract_ubyte', 'v', 2),
- ('extract_ubyte', 'v', 3))),
+ ('fdiv', ('u2f', ('vec4', ('extract_u8', 'v', 0),
+ ('extract_u8', 'v', 1),
+ ('extract_u8', 'v', 2),
+ ('extract_u8', 'v', 3))),
255.0),
'options->lower_unpack_unorm_4x8'),
(('unpack_snorm_2x16', 'v'),
- ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_iword', 'v', 0),
- ('extract_iword', 'v', 1), 0, 0)),
+ ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec2', ('extract_i16', 'v', 0),
+ ('extract_i16', 'v', 1))),
32767.0))),
'options->lower_unpack_snorm_2x16'),
(('unpack_snorm_4x8', 'v'),
- ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_ibyte', 'v', 0),
- ('extract_ibyte', 'v', 1),
- ('extract_ibyte', 'v', 2),
- ('extract_ibyte', 'v', 3))),
+ ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_i8', 'v', 0),
+ ('extract_i8', 'v', 1),
+ ('extract_i8', 'v', 2),
+ ('extract_i8', 'v', 3))),
127.0))),
'options->lower_unpack_snorm_4x8'),
]
FRAG_DEPTH_LAYOUT_UNCHANGED
};
+/**
+ * \brief Buffer access qualifiers
+ */
+enum gl_buffer_access_qualifier
+{
+ ACCESS_COHERENT = 1,
+ ACCESS_RESTRICT = 2,
+ ACCESS_VOLATILE = 4,
+};
+
#ifdef __cplusplus
} /* extern "C" */
#endif
# virgl
ifneq ($(filter virgl, $(MESA_GPU_DRIVERS)),)
-SUBDIRS += winsys/virgl/drm drivers/virgl
+SUBDIRS += winsys/virgl/drm winsys/virgl/vtest drivers/virgl
endif
# vmwgfx
*
* Convert float32 to half floats, preserving Infs and NaNs,
* with rounding towards zero (trunc).
+ * XXX: For GL, would prefer rounding towards nearest(-even).
*/
LLVMValueRef
lp_build_float_to_half(struct gallivm_state *gallivm,
struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
LLVMValueRef result;
+ /*
+ * Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits
+ * directly, without any (x86 or generic) intrinsics.
+ * Albeit the rounding mode cannot be specified (and is undefined,
+ * though in practice on x86 seems to do nearest-even but it may
+ * be dependent on instruction set support), so is essentially
+ * useless.
+ */
+
if (util_cpu_caps.has_f16c &&
(length == 4 || length == 8)) {
struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
LLVMValueRef index = LLVMConstInt(i32t, i, 0);
LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
#if 0
- /* XXX: not really supported by backends */
+ /*
+ * XXX: not really supported by backends.
+ * Even if they would now, rounding mode cannot be specified and
+ * is undefined.
+ */
LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
#else
LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");
}
+/**
+ * Un-interleave vector.
+ * This will return a vector consisting of every second element
+ * (depending on lo_hi, beginning at 0 or 1).
+ * The returned vector size (elems and width) will only be half
+ * that of the source vector.
+ */
+LLVMValueRef
+lp_build_uninterleave1(struct gallivm_state *gallivm,
+ unsigned num_elems,
+ LLVMValueRef a,
+ unsigned lo_hi)
+{
+ LLVMValueRef shuffle, elems[LP_MAX_VECTOR_LENGTH];
+ unsigned i;
+ assert(num_elems <= LP_MAX_VECTOR_LENGTH);
+
+ for (i = 0; i < num_elems / 2; ++i)
+ elems[i] = lp_build_const_int32(gallivm, 2*i + lo_hi);
+
+ shuffle = LLVMConstVector(elems, num_elems / 2);
+
+ return LLVMBuildShuffleVector(gallivm->builder, a, a, shuffle, "");
+}
+
+
/**
* Interleave vector elements.
*
LLVMValueRef b,
unsigned lo_hi);
+LLVMValueRef
+lp_build_uninterleave1(struct gallivm_state *gallivm,
+ unsigned num_elems,
+ LLVMValueRef a,
+ unsigned lo_hi);
void
lp_build_unpack2(struct gallivm_state *gallivm,
/* Ignore deprecated instructions */
switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_UP2H:
case TGSI_OPCODE_UP2US:
case TGSI_OPCODE_UP4B:
case TGSI_OPCODE_UP4UB:
#include "lp_bld_arit.h"
#include "lp_bld_bitarit.h"
#include "lp_bld_const.h"
+#include "lp_bld_conv.h"
#include "lp_bld_gather.h"
#include "lp_bld_logic.h"
+#include "lp_bld_pack.h"
#include "tgsi/tgsi_exec.h"
log_emit /* emit */
};
+/* TGSI_OPCODE_PK2H */
+
+static void
+pk2h_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ /* src0.x */
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_X);
+ /* src0.y */
+ emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_Y);
+}
+
+static void
+pk2h_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_type f16i_t;
+ LLVMValueRef lo, hi, res;
+
+ f16i_t = lp_type_uint_vec(16, bld_base->base.type.length * 32);
+ lo = lp_build_float_to_half(gallivm, emit_data->args[0]);
+ hi = lp_build_float_to_half(gallivm, emit_data->args[1]);
+ /* maybe some interleave doubling vector width would be useful... */
+ lo = lp_build_pad_vector(gallivm, lo, bld_base->base.type.length * 2);
+ hi = lp_build_pad_vector(gallivm, hi, bld_base->base.type.length * 2);
+ res = lp_build_interleave2(gallivm, f16i_t, lo, hi, 0);
+
+ emit_data->output[emit_data->chan] = res;
+}
+
+static struct lp_build_tgsi_action pk2h_action = {
+ pk2h_fetch_args, /* fetch_args */
+ pk2h_emit /* emit */
+};
+
+/* TGSI_OPCODE_UP2H */
+
+static void
+up2h_emit(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMContextRef context = gallivm->context;
+ LLVMValueRef lo, hi, res[2], arg;
+ unsigned nr = bld_base->base.type.length;
+ LLVMTypeRef i16t = LLVMVectorType(LLVMInt16TypeInContext(context), nr * 2);
+
+ arg = LLVMBuildBitCast(builder, emit_data->args[0], i16t, "");
+ lo = lp_build_uninterleave1(gallivm, nr * 2, arg, 0);
+ hi = lp_build_uninterleave1(gallivm, nr * 2, arg, 1);
+ res[0] = lp_build_half_to_float(gallivm, lo);
+ res[1] = lp_build_half_to_float(gallivm, hi);
+
+ emit_data->output[0] = emit_data->output[2] = res[0];
+ emit_data->output[1] = emit_data->output[3] = res[1];
+}
+
+static struct lp_build_tgsi_action up2h_action = {
+ scalar_unary_fetch_args, /* fetch_args */
+ up2h_emit /* emit */
+};
+
/* TGSI_OPCODE_LRP */
static void
bld_base->op_actions[TGSI_OPCODE_EXP] = exp_action;
bld_base->op_actions[TGSI_OPCODE_LIT] = lit_action;
bld_base->op_actions[TGSI_OPCODE_LOG] = log_action;
+ bld_base->op_actions[TGSI_OPCODE_PK2H] = pk2h_action;
bld_base->op_actions[TGSI_OPCODE_RSQ] = rsq_action;
bld_base->op_actions[TGSI_OPCODE_SQRT] = sqrt_action;
bld_base->op_actions[TGSI_OPCODE_POW] = pow_action;
bld_base->op_actions[TGSI_OPCODE_SCS] = scs_action;
+ bld_base->op_actions[TGSI_OPCODE_UP2H] = up2h_action;
bld_base->op_actions[TGSI_OPCODE_XPD] = xpd_action;
bld_base->op_actions[TGSI_OPCODE_BREAKC].fetch_args = scalar_unary_fetch_args;
struct pipe_screen *
pipe_virgl_create_screen(int fd)
{
- struct virgl_winsys *vws;
struct pipe_screen *screen;
- vws = virgl_drm_winsys_create(fd);
- if (!vws)
- return NULL;
-
- screen = virgl_create_screen(vws);
+ screen = virgl_drm_screen_create(fd);
return screen ? debug_screen_wrap(screen) : NULL;
}
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
+#include "util/u_half.h"
#include "util/u_memory.h"
#include "util/u_math.h"
}
}
+static void
+exec_pk2h(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned chan;
+ union tgsi_exec_channel arg[2], dst;
+
+ fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+ for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
+ dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
+ (util_float_to_half(arg[1].f[chan]) << 16);
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT);
+ }
+ }
+}
+
+static void
+exec_up2h(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ unsigned chan;
+ union tgsi_exec_channel arg, dst[2];
+
+ fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
+ for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
+ dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
+ dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
+ }
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+ }
+}
+
static void
exec_scs(struct tgsi_exec_machine *mach,
const struct tgsi_full_instruction *inst)
break;
case TGSI_OPCODE_PK2H:
- assert (0);
+ exec_pk2h(mach, inst);
break;
case TGSI_OPCODE_PK2US:
break;
case TGSI_OPCODE_UP2H:
- assert (0);
+ exec_up2h(mach, inst);
break;
case TGSI_OPCODE_UP2US:
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
- { 0, 1, 0, 0, 0, 0, 1, NONE, "", 112 }, /* removed */
+ { 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
{ 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
case TGSI_OPCODE_SAMPLE_I:
case TGSI_OPCODE_SAMPLE_I_MS:
case TGSI_OPCODE_UMUL_HI:
+ case TGSI_OPCODE_UP2H:
return TGSI_TYPE_UNSIGNED;
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_I2F:
info->reads_position = TRUE;
else if (semName == TGSI_SEMANTIC_FACE)
info->uses_frontface = TRUE;
+ else if (semName == TGSI_SEMANTIC_SAMPLEMASK)
+ info->reads_samplemask = TRUE;
}
else if (file == TGSI_FILE_OUTPUT) {
info->output_semantic_name[reg] = (ubyte) semName;
ubyte colors_written;
boolean reads_position; /**< does fragment shader read position? */
boolean reads_z; /**< does fragment shader read depth? */
+ boolean reads_samplemask; /**< does fragment shader read sample mask? */
boolean writes_z; /**< does fragment shader write Z value? */
boolean writes_stencil; /**< does fragment shader write stencil value? */
boolean writes_samplemask; /**< does fragment shader write sample mask? */
dst->height = MAX2(src->height >> l, 1);
}
+static inline void
+u_box_minify_3d(struct pipe_box *dst,
+ const struct pipe_box *src, unsigned l)
+{
+ dst->x = src->x >> l;
+ dst->y = src->y >> l;
+ dst->z = src->z >> l;
+ dst->width = MAX2(src->width >> l, 1);
+ dst->height = MAX2(src->height >> l, 1);
+ dst->depth = MAX2(src->depth >> l, 1);
+}
+
#endif
#include <machine/cpu.h>
#endif
-#if defined(PIPE_OS_FREEBSD)
+#if defined(PIPE_OS_FREEBSD) || defined(PIPE_OS_DRAGONFLY)
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
return channels
def parse(filename):
- '''Parse the format descrition in CSV format in terms of the
+ '''Parse the format description in CSV format in terms of the
Channel and Format classes above.'''
stream = open(filename)
f32.ui &= round_mask;
f32.f *= magic.f;
f32.ui -= round_mask;
-
+ /*
+ * XXX: The magic mul relies on denorms being available, otherwise
+ * all f16 denorms get flushed to zero - hence when this is used
+ * for tgsi_exec in softpipe we won't get f16 denorms.
+ */
/*
* Clamp to max finite value if overflowed.
* OpenGL has completely undefined rounding behavior for float to
/* Adjust */
f32.f *= magic.f;
+ /* XXX: The magic mul relies on denorms being available */
/* Inf / NaN */
if (f32.f >= infnan.f)
VS_O_VTEX = 0
};
+const int vl_zscan_normal_16[] =
+{
+ /* Zig-Zag scan pattern */
+ 0, 1, 4, 8, 5, 2, 3, 6,
+ 9,12,13,10, 7,11,14,15
+};
+
const int vl_zscan_linear[] =
{
/* Linear scan pattern */
struct pipe_surface *dst;
};
+extern const int vl_zscan_normal_16[];
extern const int vl_zscan_linear[];
extern const int vl_zscan_normal[];
extern const int vl_zscan_alternate[];
will not block and the return value will be TRUE if the query has
completed or FALSE otherwise.
+``get_query_result_resource`` is used to store the result of a query into
+a resource without synchronizing with the CPU. This write will optionally
+wait for the query to complete, and will optionally write whether the value
+is available instead of the value itself.
+
The interface currently includes the following types of queries:
``PIPE_QUERY_OCCLUSION_COUNTER`` counts the number of fragments which
* ``PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT``: Describes the required
alignment for pipe_sampler_view::u.buf.first_element, in bytes.
If a driver does not support first/last_element, it should return 0.
+* ``PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY``: Whether the driver only
+ supports R, RG, RGB and RGBA formats for PIPE_BUFFER sampler views.
+ When this is the case it should be assumed that the swizzle parameters
+ in the sampler view have no effect.
* ``PIPE_CAP_TGSI_TEXCOORD``: This CAP describes a hw limitation.
If true, the hardware cannot replace arbitrary shader inputs with sprite
coordinates and hence the inputs that are desired to be replaceable must
view it is intended to be used with, or herein undefined results may occur
for permutational swizzles.
* ``PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE``: The maximum accessible size with
- a buffer sampler view, in bytes.
+ a buffer sampler view, in texels.
* ``PIPE_CAP_MAX_VIEWPORTS``: The maximum number of viewports (and scissors
since they are linked) a driver can support. Returning 0 is equivalent
to returning 1 because every driver has to support at least a single
* ``PIPE_CAP_GENERATE_MIPMAP``: Indicates whether pipe_context::generate_mipmap
is supported.
* ``PIPE_CAP_STRING_MARKER``: Whether pipe->emit_string_marker() is supported.
+* ``PIPE_CAP_SURFACE_REINTERPRET_BLOCKS``: Indicates whether
+ pipe_context::create_surface supports reinterpreting a texture as a surface
+ of a format with different block width/height (but same block size in bits).
+ For example, a compressed texture image can be interpreted as a
+ non-compressed surface whose texels are the same number of bits as the
+ compressed blocks, and vice versa. The width and height of the surface is
+ adjusted appropriately.
+* ``PIPE_CAP_QUERY_BUFFER_OBJECT``: Driver supports
+ context::get_query_result_resource callback.
.. _pipe_capf:
the program. Results are unspecified if any of the remaining
threads terminates or never reaches an executed BARRIER instruction.
+.. opcode:: MEMBAR - Memory barrier
+
+ ``MEMBAR type``
+
+ This opcode waits for the completion of all memory accesses based on
+ the type passed in. The type is an immediate bitfield with the following
+ meaning:
+
+ Bit 0: Shader storage buffers
+ Bit 1: Atomic buffers
+ Bit 2: Images
+ Bit 3: Shared memory
+ Bit 4: Thread group
+
+ These may be passed in in any combination. An implementation is free to not
+ distinguish between these as it sees fit. However these map to all the
+ possibilities made available by GLSL.
.. _atomopcodes:
struct fd_ringbuffer *ring = ctx->ring;
const uint32_t *buf = (const void *)string;
+ /* max packet size is 0x3fff dwords: */
+ len = MIN2(len, 0x3fff * 4);
+
OUT_PKT3(ring, CP_NOP, align(len, 4) / 4);
while (len >= 4) {
OUT_RING(ring, *buf);
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_COMPUTE:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_SM3:
case PIPE_CAP_CLIP_HALFZ:
return is_a3xx(screen) || is_a4xx(screen);
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
if (is_a3xx(screen)) return 16;
if (is_a4xx(screen)) return 32;
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
return 0;
case PIPE_CAP_MAX_VIEWPORTS:
/* Queries. */
case PIPE_CAP_QUERY_TIME_ELAPSED:
case PIPE_CAP_QUERY_TIMESTAMP:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_OCCLUSION_QUERY:
return is_a3xx(screen) || is_a4xx(screen);
}
}
+/* NOTE: this creates the "TGSI" style fragface (ie. input slot
+ * VARYING_SLOT_FACE). For NIR style nir_intrinsic_load_front_face
+ * we can just use the value from hw directly (since it is boolean)
+ */
static struct ir3_instruction *
create_frag_face(struct ir3_compile *ctx, unsigned comp)
{
break;
case nir_intrinsic_load_vertex_id_zero_base:
if (!ctx->vertex_id) {
- ctx->vertex_id = create_input(ctx->block, 0);
+ ctx->vertex_id = create_input(b, 0);
add_sysval_input(ctx, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
ctx->vertex_id);
}
break;
case nir_intrinsic_load_instance_id:
if (!ctx->instance_id) {
- ctx->instance_id = create_input(ctx->block, 0);
+ ctx->instance_id = create_input(b, 0);
add_sysval_input(ctx, SYSTEM_VALUE_INSTANCE_ID,
ctx->instance_id);
}
dst[i] = create_driver_param(ctx, IR3_DP_UCP0_X + n);
}
break;
+ case nir_intrinsic_load_front_face:
+ if (!ctx->frag_face) {
+ ctx->so->frag_face = true;
+ ctx->frag_face = create_input(b, 0);
+ ctx->frag_face->regs[0]->flags |= IR3_REG_HALF;
+ }
+ dst[0] = ir3_ADD_S(b, ctx->frag_face, 0, create_immed(b, 1), 0);
+ break;
case nir_intrinsic_discard_if:
case nir_intrinsic_discard: {
struct ir3_instruction *cond, *kill;
struct ir3_block *b = ctx->block;
struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy;
+ struct ir3_instruction *const_off[4];
bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
unsigned i, coords, flags;
unsigned nsrc0 = 0, nsrc1 = 0;
ddy = get_src(ctx, &tex->src[i].src);
break;
default:
- compile_error(ctx, "Unhandled NIR tex serc type: %d\n",
+ compile_error(ctx, "Unhandled NIR tex src type: %d\n",
tex->src[i].src_type);
return;
}
tex_info(tex, &flags, &coords);
+ if (!has_off) {
+ /* could still have a constant offset: */
+ if (tex->const_offset[0] || tex->const_offset[1] ||
+ tex->const_offset[2] || tex->const_offset[3]) {
+ off = const_off;
+
+ off[0] = create_immed(b, tex->const_offset[0]);
+ off[1] = create_immed(b, tex->const_offset[1]);
+ off[2] = create_immed(b, tex->const_offset[2]);
+ off[3] = create_immed(b, tex->const_offset[3]);
+
+ has_off = true;
+ }
+ }
+
/* scale up integer coords for TXF based on the LOD */
if (ctx->unminify_coords && (opc == OPC_ISAML)) {
assert(has_lod);
case VARYING_SLOT_CLIP_DIST0:
case VARYING_SLOT_CLIP_DIST1:
break;
+ case VARYING_SLOT_CLIP_VERTEX:
+ /* handled entirely in nir_lower_clip: */
+ return;
default:
if (slot >= VARYING_SLOT_VAR0)
break;
setup_output(ctx, var);
}
- /* Setup variables (which should only be arrays): */
+ /* Setup global variables (which should only be arrays): */
nir_foreach_variable(var, &ctx->s->globals) {
declare_var(ctx, var);
}
+ /* Setup local variables (which should only be arrays): */
+ /* NOTE: need to do something more clever when we support >1 fxn */
+ nir_foreach_variable(var, &fxn->locals) {
+ declare_var(ctx, var);
+ }
+
/* And emit the body: */
ctx->impl = fxn;
emit_function(ctx, fxn);
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return true;
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 1;
case PIPE_CAP_TGSI_TEXCOORD:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:
lp_debug_draw_bins_by_coverage( struct lp_scene *scene );
-#ifdef PIPE_ARCH_SSE
-#include <emmintrin.h>
-#include "util/u_sse.h"
-
-static inline __m128i
-lp_plane_to_m128i(const struct lp_rast_plane *plane)
-{
- return _mm_setr_epi32((int32_t)plane->c, (int32_t)plane->dcdx,
- (int32_t)plane->dcdy, (int32_t)plane->eo);
-}
-
-#endif
-
#endif
void
lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
+ const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
const struct lp_rast_plane *plane = GET_PLANES(tri);
struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16];
unsigned nr = 0;
- __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
- __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
- __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
+ /* p0 and p2 are aligned, p1 is not (plane size 24 bytes). */
+ __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* clo, chi, dcdx, dcdy */
+ __m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]);
+ __m128i p2 = _mm_load_si128((__m128i *)&plane[2]);
__m128i zero = _mm_setzero_si128();
- __m128i c;
- __m128i dcdx;
- __m128i dcdy;
- __m128i rej4;
-
- __m128i dcdx2;
- __m128i dcdx3;
+ __m128i c, dcdx, dcdy, rej4;
+ __m128i dcdx_neg_mask, dcdy_neg_mask;
+ __m128i dcdx2, dcdx3;
__m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
__m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
__m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
__m128i unused;
-
+
transpose4_epi32(&p0, &p1, &p2, &zero,
- &c, &dcdx, &dcdy, &rej4);
+ &c, &unused, &dcdx, &dcdy);
+
+ /* recalc eo - easier than trying to load as scalars / shuffle... */
+ dcdx_neg_mask = _mm_srai_epi32(dcdx, 31);
+ dcdy_neg_mask = _mm_srai_epi32(dcdy, 31);
+ rej4 = _mm_sub_epi32(_mm_andnot_si128(dcdy_neg_mask, dcdy),
+ _mm_and_si128(dcdx_neg_mask, dcdx));
/* Adjust dcdx;
*/
void
lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task,
- const union lp_rast_cmd_arg arg)
+ const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
const struct lp_rast_plane *plane = GET_PLANES(tri);
unsigned x = (arg.triangle.plane_mask & 0xff) + task->x;
unsigned y = (arg.triangle.plane_mask >> 8) + task->y;
- __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */
- __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */
- __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */
+ /* p0 and p2 are aligned, p1 is not (plane size 24 bytes). */
+ __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* clo, chi, dcdx, dcdy */
+ __m128i p1 = _mm_loadu_si128((__m128i *)&plane[1]);
+ __m128i p2 = _mm_load_si128((__m128i *)&plane[2]);
__m128i zero = _mm_setzero_si128();
- __m128i c;
- __m128i dcdx;
- __m128i dcdy;
+ __m128i c, dcdx, dcdy;
+ __m128i dcdx2, dcdx3;
- __m128i dcdx2;
- __m128i dcdx3;
-
__m128i span_0; /* 0,dcdx,2dcdx,3dcdx for plane 0 */
__m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */
__m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */
__m128i unused;
transpose4_epi32(&p0, &p1, &p2, &zero,
- &c, &dcdx, &dcdy, &unused);
+ &c, &unused, &dcdx, &dcdy);
/* Adjust dcdx;
*/
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
}
/* should only get here on unhandled cases */
const float (*v2)[4]);
};
+static inline void
+scissor_planes_needed(boolean scis_planes[4], struct u_rect *bbox,
+ struct u_rect *scissor)
+{
+ /* left */
+ scis_planes[0] = (bbox->x0 < scissor->x0);
+ /* right */
+ scis_planes[1] = (bbox->x1 > scissor->x1);
+ /* top */
+ scis_planes[2] = (bbox->y0 < scissor->y0);
+ /* bottom */
+ scis_planes[3] = (bbox->y1 > scissor->y1);
+}
+
+
void lp_setup_choose_triangle( struct lp_setup_context *setup );
void lp_setup_choose_line( struct lp_setup_context *setup );
void lp_setup_choose_point( struct lp_setup_context *setup );
layer = MIN2(layer, scene->fb_max_layer);
}
- if (setup->scissor_test) {
- nr_planes = 8;
- }
- else {
- nr_planes = 4;
- }
-
dx = v1[0][0] - v2[0][0];
dy = v1[0][1] - v2[0][1];
area = (dx * dx + dy * dy);
bbox.x0 = MAX2(bbox.x0, 0);
bbox.y0 = MAX2(bbox.y0, 0);
+ nr_planes = 4;
+ /*
+ * Determine how many scissor planes we need, that is drop scissor
+ * edges if the bounding box of the tri is fully inside that edge.
+ */
+ if (setup->scissor_test) {
+ /* why not just use draw_regions */
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]);
+ nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
+ }
+
line = lp_setup_alloc_triangle(scene,
key->num_inputs,
nr_planes,
* Note that otherwise, the scissor planes only vary in 'C' value,
* and even then only on state-changes. Could alternatively store
* these planes elsewhere.
+ * (Or only store the c value together with a bit indicating which
+ * scissor edge this is, so rasterization would treat them differently
+ * (easier to evaluate) to ordinary planes.)
*/
- if (nr_planes == 8) {
- const struct u_rect *scissor =
- &setup->scissors[viewport_index];
-
- plane[4].dcdx = -1 << 8;
- plane[4].dcdy = 0;
- plane[4].c = (1-scissor->x0) << 8;
- plane[4].eo = 1 << 8;
-
- plane[5].dcdx = 1 << 8;
- plane[5].dcdy = 0;
- plane[5].c = (scissor->x1+1) << 8;
- plane[5].eo = 0;
-
- plane[6].dcdx = 0;
- plane[6].dcdy = 1 << 8;
- plane[6].c = (1-scissor->y0) << 8;
- plane[6].eo = 1 << 8;
-
- plane[7].dcdx = 0;
- plane[7].dcdy = -1 << 8;
- plane[7].c = (scissor->y1+1) << 8;
- plane[7].eo = 0;
+ if (nr_planes > 4) {
+ /* why not just use draw_regions */
+ struct u_rect *scissor = &setup->scissors[viewport_index];
+ struct lp_rast_plane *plane_s = &plane[4];
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, scissor);
+
+ if (s_planes[0]) {
+ plane_s->dcdx = -1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (1-scissor->x0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[1]) {
+ plane_s->dcdx = 1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (scissor->x1+1) << 8;
+ plane_s->eo = 0 << 8;
+ plane_s++;
+ }
+ if (s_planes[2]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = 1 << 8;
+ plane_s->c = (1-scissor->y0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[3]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = -1 << 8;
+ plane_s->c = (scissor->y1+1) << 8;
+ plane_s->eo = 0;
+ plane_s++;
+ }
+ assert(plane_s == &plane[nr_planes]);
}
return lp_setup_bin_triangle(setup, line, &bbox, nr_planes, viewport_index);
layer = MIN2(layer, scene->fb_max_layer);
}
- if (setup->scissor_test) {
- nr_planes = 7;
- }
- else {
- nr_planes = 3;
- }
-
/* Bounding rectangle (in pixels) */
{
/* Yes this is necessary to accurately calculate bounding boxes
bbox.x0 = MAX2(bbox.x0, 0);
bbox.y0 = MAX2(bbox.y0, 0);
+ nr_planes = 3;
+ /*
+ * Determine how many scissor planes we need, that is drop scissor
+ * edges if the bounding box of the tri is fully inside that edge.
+ */
+ if (setup->scissor_test) {
+ /* why not just use draw_regions */
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, &setup->scissors[viewport_index]);
+ nr_planes += s_planes[0] + s_planes[1] + s_planes[2] + s_planes[3];
+ }
+
tri = lp_setup_alloc_triangle(scene,
key->num_inputs,
nr_planes,
/* Setup parameter interpolants:
*/
- setup->setup.variant->jit_function( v0,
- v1,
- v2,
- frontfacing,
- GET_A0(&tri->inputs),
- GET_DADX(&tri->inputs),
- GET_DADY(&tri->inputs) );
+ setup->setup.variant->jit_function(v0, v1, v2,
+ frontfacing,
+ GET_A0(&tri->inputs),
+ GET_DADX(&tri->inputs),
+ GET_DADY(&tri->inputs));
tri->inputs.frontfacing = frontfacing;
tri->inputs.disable = FALSE;
if (0)
lp_dump_setup_coef(&setup->setup.variant->key,
- (const float (*)[4])GET_A0(&tri->inputs),
- (const float (*)[4])GET_DADX(&tri->inputs),
- (const float (*)[4])GET_DADY(&tri->inputs));
+ (const float (*)[4])GET_A0(&tri->inputs),
+ (const float (*)[4])GET_DADX(&tri->inputs),
+ (const float (*)[4])GET_DADY(&tri->inputs));
plane = GET_PLANES(tri);
* Note that otherwise, the scissor planes only vary in 'C' value,
* and even then only on state-changes. Could alternatively store
* these planes elsewhere.
+ * (Or only store the c value together with a bit indicating which
+ * scissor edge this is, so rasterization would treat them differently
+ * (easier to evaluate) to ordinary planes.)
*/
- if (nr_planes == 7) {
- const struct u_rect *scissor = &setup->scissors[viewport_index];
-
- plane[3].dcdx = -1 << 8;
- plane[3].dcdy = 0;
- plane[3].c = (1-scissor->x0) << 8;
- plane[3].eo = 1 << 8;
-
- plane[4].dcdx = 1 << 8;
- plane[4].dcdy = 0;
- plane[4].c = (scissor->x1+1) << 8;
- plane[4].eo = 0;
-
- plane[5].dcdx = 0;
- plane[5].dcdy = 1 << 8;
- plane[5].c = (1-scissor->y0) << 8;
- plane[5].eo = 1 << 8;
-
- plane[6].dcdx = 0;
- plane[6].dcdy = -1 << 8;
- plane[6].c = (scissor->y1+1) << 8;
- plane[6].eo = 0;
+ if (nr_planes > 3) {
+ /* why not just use draw_regions */
+ struct u_rect *scissor = &setup->scissors[viewport_index];
+ struct lp_rast_plane *plane_s = &plane[3];
+ boolean s_planes[4];
+ scissor_planes_needed(s_planes, &bbox, scissor);
+
+ if (s_planes[0]) {
+ plane_s->dcdx = -1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (1-scissor->x0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[1]) {
+ plane_s->dcdx = 1 << 8;
+ plane_s->dcdy = 0;
+ plane_s->c = (scissor->x1+1) << 8;
+ plane_s->eo = 0 << 8;
+ plane_s++;
+ }
+ if (s_planes[2]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = 1 << 8;
+ plane_s->c = (1-scissor->y0) << 8;
+ plane_s->eo = 1 << 8;
+ plane_s++;
+ }
+ if (s_planes[3]) {
+ plane_s->dcdx = 0;
+ plane_s->dcdy = -1 << 8;
+ plane_s->c = (scissor->y1+1) << 8;
+ plane_s->eo = 0;
+ plane_s++;
+ }
+ assert(plane_s == &plane[nr_planes]);
}
return lp_setup_bin_triangle(setup, tri, &bbox, nr_planes, viewport_index);
* Both should be acceptable, I think.
*/
#if defined(PIPE_ARCH_SSE)
- __m128d v0r, v1r, v2r;
+ __m128 v0r, v1r;
__m128 vxy0xy2, vxy1xy0;
__m128i vxy0xy2i, vxy1xy0i;
__m128i dxdy0120, x0x2y0y2, x1x0y1y0, x0120, y0120;
__m128 pix_offset = _mm_set1_ps(setup->pixel_offset);
__m128 fixed_one = _mm_set1_ps((float)FIXED_ONE);
- v0r = _mm_load_sd((const double *)v0[0]);
- v1r = _mm_load_sd((const double *)v1[0]);
- v2r = _mm_load_sd((const double *)v2[0]);
- vxy0xy2 = _mm_castpd_ps(_mm_unpacklo_pd(v0r, v2r));
- vxy1xy0 = _mm_castpd_ps(_mm_unpacklo_pd(v1r, v0r));
+ v0r = _mm_castpd_ps(_mm_load_sd((double *)v0[0]));
+ vxy0xy2 = _mm_loadh_pi(v0r, (__m64 *)v2[0]);
+ v1r = _mm_castpd_ps(_mm_load_sd((double *)v1[0]));
+ vxy1xy0 = _mm_movelh_ps(v1r, vxy0xy2);
vxy0xy2 = _mm_sub_ps(vxy0xy2, pix_offset);
vxy1xy0 = _mm_sub_ps(vxy1xy0, pix_offset);
vxy0xy2 = _mm_mul_ps(vxy0xy2, fixed_one);
case TYPE_S32:
case TYPE_U32:
return reg.data.s32 == i; // as if ...
+ case TYPE_S64:
+ case TYPE_U64:
+ return reg.data.s64 == i; // as if ...
case TYPE_F32:
return reg.data.f32 == static_cast<float>(i);
case TYPE_F64:
OP_SUBFM, // surface bitfield manipulation
OP_SUCLAMP, // clamp surface coordinates
OP_SUEAU, // surface effective address
+ OP_SUQ, // surface query
OP_MADSP, // special integer multiply-add
OP_TEXBAR, // texture dependency barrier
OP_DFDX,
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
- case OP_CVT:
case OP_SAT:
emitCVT(insn);
break;
+ case OP_CVT:
+ if (insn->def(0).getFile() == FILE_PREDICATE ||
+ insn->src(0).getFile() == FILE_PREDICATE)
+ emitMOV(insn);
+ else
+ emitCVT(insn);
+ break;
case OP_RSQ:
emitSFnOp(insn, 5 + 2 * insn->subOp);
break;
(insn->sType != TYPE_F32 && !longIMMD(insn->src(0)))) {
switch (insn->src(0).getFile()) {
case FILE_GPR:
- emitInsn(0x5c980000);
+ if (insn->def(0).getFile() == FILE_PREDICATE) {
+ emitInsn(0x5b6a0000);
+ emitGPR (0x08);
+ } else {
+ emitInsn(0x5c980000);
+ }
emitGPR (0x14, insn->src(0));
break;
case FILE_MEMORY_CONST:
emitInsn(0x38980000);
emitIMMD(0x14, 19, insn->src(0));
break;
+ case FILE_PREDICATE:
+ emitInsn(0x50880000);
+ emitPRED(0x0c, insn->src(0));
+ emitPRED(0x1d);
+ emitPRED(0x27);
+ break;
default:
assert(!"bad src file");
break;
}
- emitField(0x27, 4, insn->lanes);
+ if (insn->def(0).getFile() != FILE_PREDICATE &&
+ insn->src(0).getFile() != FILE_PREDICATE)
+ emitField(0x27, 4, insn->lanes);
} else {
emitInsn (0x01000000);
emitIMMD (0x14, 32, insn->src(0));
emitField(0x0c, 4, insn->lanes);
}
- emitGPR(0x00, insn->def(0));
+ if (insn->def(0).getFile() == FILE_PREDICATE) {
+ emitPRED(0x27);
+ emitPRED(0x03, insn->def(0));
+ emitPRED(0x00);
+ } else {
+ emitGPR(0x00, insn->def(0));
+ }
}
void
emitRAM();
break;
case OP_MOV:
- if (insn->def(0).getFile() == FILE_GPR &&
- insn->src(0).getFile() != FILE_PREDICATE)
- emitMOV();
- else
- assert(!"R2P/P2R");
+ emitMOV();
break;
case OP_RDSV:
emitS2R();
case OP_CEIL:
case OP_TRUNC:
case OP_CVT:
- if (isFloatType(insn->dType)) {
+ if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
+ insn->src(0).getFile() == FILE_PREDICATE)) {
+ emitMOV();
+ } else if (isFloatType(insn->dType)) {
if (isFloatType(insn->sType))
emitF2F();
else
code[0] |= 63 << 20;
}
- if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
- srcId(i->src(2), 32 + 17);
+ if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ assert(i->src(1).getSize() == 2 * typeSizeof(i->sType));
+ code[1] |= (SDATA(i->src(1)).id + 1) << 17;
+ }
}
void
case OP_CEIL:
case OP_FLOOR:
case OP_TRUNC:
- case OP_CVT:
case OP_SAT:
emitCVT(insn);
break;
+ case OP_CVT:
+ if (insn->def(0).getFile() == FILE_PREDICATE ||
+ insn->src(0).getFile() == FILE_PREDICATE)
+ emitMOV(insn);
+ else
+ emitCVT(insn);
+ break;
case OP_RSQ:
emitSFnOp(insn, 5 + 2 * insn->subOp);
break;
static nv50_ir::DataFile translateFile(uint file);
static nv50_ir::TexTarget translateTexture(uint texTarg);
static nv50_ir::SVSemantic translateSysVal(uint sysval);
+static nv50_ir::CacheMode translateCacheMode(uint qualifier);
class Instruction
{
nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const;
+ nv50_ir::CacheMode getCacheMode() const {
+ if (!insn->Instruction.Memory)
+ return nv50_ir::CACHE_CA;
+ return translateCacheMode(insn->Memory.Qualifier);
+ }
+
inline uint getLabel() { return insn->Label.Label; }
unsigned getSaturate() const { return insn->Instruction.Saturate; }
case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
- //case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL;
+ case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL;
case TGSI_FILE_SAMPLER:
case TGSI_FILE_NULL:
default:
}
}
+static nv50_ir::CacheMode translateCacheMode(uint qualifier)
+{
+ if (qualifier & TGSI_MEMORY_VOLATILE)
+ return nv50_ir::CACHE_CV;
+ if (qualifier & TGSI_MEMORY_COHERENT)
+ return nv50_ir::CACHE_CG;
+ return nv50_ir::CACHE_CA;
+}
+
nv50_ir::DataType Instruction::inferSrcType() const
{
switch (getOpcode()) {
case TGSI_FILE_IMMEDIATE:
case TGSI_FILE_PREDICATE:
case TGSI_FILE_SAMPLER:
+ case TGSI_FILE_BUFFER:
break;
default:
ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) {
if (insn.getDst(0).isIndirect(0))
indirectTempArrays.insert(insn.getDst(0).getArrayId());
+ } else
+ if (insn.getDst(0).getFile() == TGSI_FILE_BUFFER) {
+ info->io.globalAccess |= 0x2;
}
}
if (src.isIndirect(0))
indirectTempArrays.insert(src.getArrayId());
} else
-/*
- if (src.getFile() == TGSI_FILE_RESOURCE) {
- if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL)
- info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+ if (src.getFile() == TGSI_FILE_BUFFER) {
+ info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
0x1 : 0x2;
} else
-*/
if (src.getFile() == TGSI_FILE_OUTPUT) {
if (src.isIndirect(0)) {
// We don't know which one is accessed, just mark everything for
int idx = dst.getIndex(0);
int idx2d = dst.is2D() ? dst.getIndex(1) : 0;
- if (dst.isMasked(c)/* || f == TGSI_FILE_RESOURCE*/)
+ if (dst.isMasked(c) || f == TGSI_FILE_BUFFER)
return NULL;
if (dst.isIndirect(0) ||
int c;
std::vector<Value *> off, src, ldv, def;
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ for (c = 0; c < 4; ++c) {
+ if (!dst0[c])
+ continue;
+
+ Value *off = fetchSrc(1, c);
+ Symbol *sym;
+ if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) {
+ off = NULL;
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c);
+ } else {
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
+ }
+
+ Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
+ ld->cache = tgsi.getCacheMode();
+ if (tgsi.getSrc(0).isIndirect(0))
+ ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
+ }
+ return;
+ }
+
getResourceCoords(off, r, 1);
if (isResourceRaw(code, r)) {
int c;
std::vector<Value *> off, src, dummy;
+ if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) {
+ for (c = 0; c < 4; ++c) {
+ if (!(tgsi.getDst(0).getMask() & (1 << c)))
+ continue;
+
+ Symbol *sym;
+ Value *off;
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) {
+ off = NULL;
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c,
+ tgsi.getSrc(0).getValueU32(0, info) + 4 * c);
+ } else {
+ off = fetchSrc(0, 0);
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c);
+ }
+
+ Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
+ st->cache = tgsi.getCacheMode();
+ if (tgsi.getDst(0).isIndirect(0))
+ st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0));
+ }
+ return;
+ }
+
getResourceCoords(off, r, 0);
src = off;
const int s = src.size();
std::vector<Value *> defv;
LValue *dst = getScratch();
+ if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
+ for (int c = 0; c < 4; ++c) {
+ if (!dst0[c])
+ continue;
+
+ Instruction *insn;
+ Value *off = fetchSrc(1, c), *off2 = NULL;
+ Value *sym;
+ if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info));
+ else
+ sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0);
+ if (tgsi.getSrc(0).isIndirect(0))
+ off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
+ if (subOp == NV50_IR_SUBOP_ATOM_CAS)
+ insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));
+ else
+ insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
+ if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
+ insn->setIndirect(0, 0, off);
+ if (off2)
+ insn->setIndirect(0, 1, off2);
+ insn->subOp = subOp;
+ }
+ for (int c = 0; c < 4; ++c)
+ if (dst0[c])
+ dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov
+ return;
+ }
+
+
getResourceCoords(srcv, r, 1);
if (isResourceSpecial(r)) {
geni->fixed = 1;
geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode());
break;
+ case TGSI_OPCODE_MEMBAR:
+ geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL);
+ geni->fixed = 1;
+ if (tgsi.getSrc(0).getValueU32(0, info) & TGSI_MEMBAR_THREAD_GROUP)
+ geni->subOp = NV50_IR_SUBOP_MEMBAR(M, CTA);
+ else
+ geni->subOp = NV50_IR_SUBOP_MEMBAR(M, GL);
+ break;
case TGSI_OPCODE_ATOMUADD:
case TGSI_OPCODE_ATOMXCHG:
case TGSI_OPCODE_ATOMCAS:
case TGSI_OPCODE_ATOMIMAX:
handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode()));
break;
+ case TGSI_OPCODE_RESQ:
+ geni = mkOp1(OP_SUQ, TYPE_U32, dst0[0],
+ makeSym(TGSI_FILE_BUFFER, tgsi.getSrc(0).getIndex(0), -1, 0, 0));
+ if (tgsi.getSrc(0).isIndirect(0))
+ geni->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
+ break;
case TGSI_OPCODE_IBFE:
case TGSI_OPCODE_UBFE:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
return true;
}
+bool
+NVC0LoweringPass::handleSUQ(Instruction *suq)
+{
+ suq->op = OP_MOV;
+ suq->setSrc(0, loadResLength32(suq->getIndirect(0, 1),
+ suq->getSrc(0)->reg.fileIndex * 16));
+ suq->setIndirect(0, 0, NULL);
+ suq->setIndirect(0, 1, NULL);
+ return true;
+}
bool
NVC0LoweringPass::handleATOM(Instruction *atom)
{
SVSemantic sv;
+ Value *ptr = atom->getIndirect(0, 0), *ind = atom->getIndirect(0, 1), *base;
switch (atom->src(0).getFile()) {
case FILE_MEMORY_LOCAL:
break;
default:
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
+ base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
+ assert(base->reg.size == 8);
+ if (ptr)
+ base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
+ assert(base->reg.size == 8);
+ atom->setIndirect(0, 0, base);
return true;
}
- Value *base =
+ base =
bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0));
- Value *ptr = atom->getIndirect(0, 0);
atom->setSrc(0, cloneShallow(func, atom->getSrc(0)));
atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
if (ptr)
base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr);
+ atom->setIndirect(0, 1, NULL);
atom->setIndirect(0, 0, base);
return true;
cctl->setPredicate(cas->cc, cas->getPredicate());
}
- if (cas->defExists(0) && cas->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS) {
// CAS is crazy. It's 2nd source is a double reg, and the 3rd source
// should be set to the high part of the double reg or bad things will
// happen elsewhere in the universe.
bld.setPosition(cas, false);
bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2));
cas->setSrc(1, dreg);
+ cas->setSrc(2, dreg);
}
return true;
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr);
}
+inline Value *
+NVC0LoweringPass::loadResInfo64(Value *ptr, uint32_t off)
+{
+ uint8_t b = prog->driver->io.resInfoCBSlot;
+ off += prog->driver->io.suInfoBase;
+
+ if (ptr)
+ ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
+
+ return bld.
+ mkLoadv(TYPE_U64, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off), ptr);
+}
+
+inline Value *
+NVC0LoweringPass::loadResLength32(Value *ptr, uint32_t off)
+{
+ uint8_t b = prog->driver->io.resInfoCBSlot;
+ off += prog->driver->io.suInfoBase;
+
+ if (ptr)
+ ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(), ptr, bld.mkImm(4));
+
+ return bld.
+ mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U64, off + 8), ptr);
+}
+
inline Value *
NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
{
return handleRDSV(i);
case OP_WRSV:
return handleWRSV(i);
+ case OP_STORE:
case OP_LOAD:
if (i->src(0).getFile() == FILE_SHADER_INPUT) {
if (prog->getType() == Program::TYPE_COMPUTE) {
} else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
i->op = OP_VFETCH;
+ } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
+ Value *ind = i->getIndirect(0, 1);
+ Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16);
+ // XXX come up with a way not to do this for EVERY little access but
+ // rather to batch these up somehow. Unfortunately we've lost the
+ // information about the field width by the time we get here.
+ Value *offset = bld.loadImm(NULL, i->getSrc(0)->reg.data.offset + typeSizeof(i->sType));
+ Value *length = loadResLength32(ind, i->getSrc(0)->reg.fileIndex * 16);
+ Value *pred = new_LValue(func, FILE_PREDICATE);
+ if (i->src(0).isIndirect(0)) {
+ bld.mkOp2(OP_ADD, TYPE_U64, ptr, ptr, i->getIndirect(0, 0));
+ bld.mkOp2(OP_ADD, TYPE_U32, offset, offset, i->getIndirect(0, 0));
+ }
+ i->setIndirect(0, 1, NULL);
+ i->setIndirect(0, 0, ptr);
+ bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length);
+ i->setPredicate(CC_NOT_P, pred);
+ if (i->defExists(0)) {
+ bld.mkMov(i->getDef(0), bld.mkImm(0));
+ }
}
break;
case OP_ATOM:
if (targ->getChipset() >= NVISA_GK104_CHIPSET)
handleSurfaceOpNVE4(i->asTex());
break;
+ case OP_SUQ:
+ handleSUQ(i);
+ break;
default:
break;
}
bool handleTXQ(TexInstruction *);
virtual bool handleManualTXD(TexInstruction *);
bool handleTXLQ(TexInstruction *);
+ bool handleSUQ(Instruction *);
bool handleATOM(Instruction *);
bool handleCasExch(Instruction *, bool needCctl);
void handleSurfaceOpNVE4(TexInstruction *);
void readTessCoord(LValue *dst, int c);
Value *loadResInfo32(Value *ptr, uint32_t off);
+ Value *loadResInfo64(Value *ptr, uint32_t off);
+ Value *loadResLength32(Value *ptr, uint32_t off);
Value *loadMsInfo32(Value *ptr, uint32_t off);
Value *loadTexHandle(Value *ptr, unsigned int slot);
void expr(Instruction *, ImmediateValue&, ImmediateValue&);
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
void opnd(Instruction *, ImmediateValue&, int s);
+ void opnd3(Instruction *, ImmediateValue&);
void unary(Instruction *, const ImmediateValue&);
else
if (i->srcExists(1) && i->src(1).getImmediate(src1))
opnd(i, src1, 1);
+ if (i->srcExists(2) && i->src(2).getImmediate(src2))
+ opnd3(i, src2);
}
return true;
}
}
}
+void
+ConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2)
+{
+ switch (i->op) {
+ case OP_MAD:
+ case OP_FMA:
+ if (imm2.isInteger(0)) {
+ i->op = OP_MUL;
+ i->setSrc(2, NULL);
+ foldCount++;
+ return;
+ }
+ break;
+ default:
+ return;
+ }
+}
+
void
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
{
i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32));
}
break;
+ case OP_SHR:
+ if (si->src(1).getImmediate(imm1) && imm0.reg.data.u32 == imm1.reg.data.u32) {
+ bld.setPosition(i, false);
+ i->op = OP_AND;
+ i->setSrc(0, si->getSrc(0));
+ i->setSrc(1, bld.loadImm(NULL, ~((1 << imm0.reg.data.u32) - 1)));
+ }
+ break;
case OP_MUL:
int muls;
if (isFloatType(si->dType))
}
} else
if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) {
+ if (typeSizeof(ldst->dType) == 4 &&
+ ldst->src(1).getFile() == FILE_GPR &&
+ ldst->getSrc(1)->getInsn()->op == OP_NOP) {
+ delete_Instruction(prog, ldst);
+ continue;
+ }
isLoad = false;
} else {
// TODO: maybe have all fixed ops act as barrier ?
if (that->srcExists(s))
return false;
- if (op == OP_LOAD || op == OP_VFETCH) {
+ if (op == OP_LOAD || op == OP_VFETCH || op == OP_ATOM) {
switch (src(0).getFile()) {
case FILE_MEMORY_CONST:
case FILE_SHADER_INPUT:
ik = phi->getSrc(0)->getInsn();
if (!ik)
continue; // probably a function input
+ if (ik->defCount(0xff) > 1)
+ continue; // too painful to check if we can really push this forward
for (s = 1; phi->srcExists(s); ++s) {
if (phi->getSrc(s)->refCount() > 1)
break;
bool
DeadCodeElim::visit(BasicBlock *bb)
{
- Instruction *next;
+ Instruction *prev;
- for (Instruction *i = bb->getFirst(); i; i = next) {
- next = i->next;
+ for (Instruction *i = bb->getExit(); i; i = prev) {
+ prev = i->prev;
if (i->isDead()) {
++deadCount;
delete_Instruction(prog, i);
"subfm",
"suclamp",
"sueau",
+ "suq",
"madsp",
"texbar",
"dfdx",
delete[] nodes;
nodes = NULL;
+ hi.next = hi.prev = &hi;
+ lo[0].next = lo[0].prev = &lo[0];
+ lo[1].next = lo[1].prev = &lo[1];
}
Symbol *
1, 1, 1, // TEX, TXB, TXL,
1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
- 3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP
+ 3, 3, 3, 1, 3, // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
0, // TEXBAR
1, 1, // DFDX, DFDY
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
// SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE,
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE,
- // SUBFM, SUCLAMP, SUEAU, MADSP
- OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
+ // SUBFM, SUCLAMP, SUEAU, SUQ, MADSP
+ OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH,
// TEXBAR
OPCLASS_OTHER,
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
int i;
for (i = 0; i < num_buffers; ++i) {
+#ifndef NDEBUG
assert(bsp_bo->size >= str_bsp->w0[0] + num_bytes[i]);
+#endif
memcpy(dec->bsp_ptr, data[i], num_bytes[i]);
dec->bsp_ptr += num_bytes[i];
str_bsp->w0[0] += num_bytes[i];
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:
NOUVEAU_ERR("shader translation failed: %i\n", ret);
goto out;
}
- FREE(info->bin.syms);
prog->code = info->bin.code;
prog->code_size = info->bin.codeSize;
break;
}
prog->gp.vert_count = info->prop.gp.maxVertices;
- } else
+ }
+
if (prog->type == PIPE_SHADER_COMPUTE) {
prog->cp.syms = info->bin.syms;
prog->cp.num_syms = info->bin.numSyms;
+ } else {
+ FREE(info->bin.syms);
}
if (prog->pipe.stream_output.num_outputs)
FREE(p->interps);
FREE(p->so);
+ if (type == PIPE_SHADER_COMPUTE)
+ FREE(p->cp.syms);
+
memset(p, 0, sizeof(*p));
p->pipe = pipe;
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:
PUSH_DATA (push, nv50->rt_array_mode);
}
+static void
+nv50_clear_buffer_push(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv04_resource *buf = nv04_resource(res);
+ unsigned count = (size + 3) / 4;
+ unsigned xcoord = offset & 0xff;
+ unsigned tmp, i;
+
+ if (data_size == 1) {
+ tmp = *(unsigned char *)data;
+ tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
+ data = &tmp;
+ data_size = 4;
+ } else if (data_size == 2) {
+ tmp = *(unsigned short *)data;
+ tmp = (tmp << 16) | tmp;
+ data = &tmp;
+ data_size = 4;
+ }
+
+ unsigned data_words = data_size / 4;
+
+ nouveau_bufctx_refn(nv50->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nv50->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ offset &= ~0xff;
+
+ BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_2D(DST_PITCH), 5);
+ PUSH_DATA (push, 262144);
+ PUSH_DATA (push, 65536);
+ PUSH_DATA (push, 1);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM);
+ BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10);
+ PUSH_DATA (push, size);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, xcoord);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ while (count) {
+ unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+ unsigned nr = nr_data * data_words;
+
+ BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
+ for (i = 0; i < nr_data; i++)
+ PUSH_DATAp(push, data, data_words);
+
+ count -= nr;
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
+ }
+
+ nouveau_bufctx_reset(nv50->bufctx, 0);
+}
+
static void
nv50_clear_buffer(struct pipe_context *pipe,
struct pipe_resource *res,
assert(size % data_size == 0);
+ if (offset & 0xff) {
+ unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
+ assert(fixup_size % data_size == 0);
+ nv50_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
+ offset += fixup_size;
+ size -= fixup_size;
+ if (!size)
+ return;
+ }
+
elements = size / data_size;
height = (elements + 8191) / 8192;
width = elements / height;
+ if (height > 1)
+ width &= ~0xff;
+ assert(width > 0);
BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
PUSH_DATAf(push, color.f[0]);
BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
PUSH_DATA (push, 1);
BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5);
- PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
- PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
PUSH_DATA (push, nv50_format_table[dst_fmt].rt);
PUSH_DATA (push, 0);
PUSH_DATA (push, 0);
BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
- PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
+ PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | align(width * data_size, 0x100));
PUSH_DATA (push, height);
BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
PUSH_DATA (push, 0);
BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
PUSH_DATA (push, 0x3c);
+ BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
+ PUSH_DATA (push, nv50->cond_condmode);
+
+ if (buf->mm) {
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
+ }
+
if (width * height != elements) {
offset += width * height * data_size;
width = elements - width * height;
- height = 1;
- BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 2);
- PUSH_DATAh(push, buf->bo->offset + buf->offset + offset);
- PUSH_DATA (push, buf->bo->offset + buf->offset + offset);
- BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
- PUSH_DATA (push, NV50_3D_RT_HORIZ_LINEAR | (width * data_size));
- PUSH_DATA (push, height);
- BEGIN_NI04(push, NV50_3D(CLEAR_BUFFERS), 1);
- PUSH_DATA (push, 0x3c);
+ nv50_clear_buffer_push(pipe, res, offset, width * data_size,
+ data, data_size);
}
- BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
- PUSH_DATA (push, nv50->cond_condmode);
-
- nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence);
- nouveau_fence_ref(nv50->screen->base.fence.current, &buf->fence_wr);
-
nv50->dirty |= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
}
daic_runout_check:
branz annul $r7 #daic_runout
bra annul #daic_restore
+
+/* NVC0_3D_MACRO_QUERY_BUFFER_WRITE:
+ *
+ * This is a combination macro for all of our query buffer object needs.
+ * It has the option to clamp results to a configurable amount, as well as
+ * to write out one or two words.
+ *
+ * We use the query engine to write out the values, and expect the query
+ * address to point to the right place.
+ *
+ * arg = clamp value (0 means unclamped). clamped means just 1 written value.
+ * parm[0] = LSB of end value
+ * parm[1] = MSB of end value
+ * parm[2] = LSB of start value
+ * parm[3] = MSB of start value
+ * parm[4] = desired sequence
+ * parm[5] = actual sequence
+ */
+.section #mme9097_query_buffer_write
+ parm $r2
+ parm $r3
+ parm $r4
+ parm $r5 maddr 0x16c2 /* QUERY_SEQUENCE */
+ parm $r6
+ parm $r7
+ mov $r6 (sub $r7 $r6) /* actual - desired */
+ mov $r6 (sbb 0x0 0x0) /* if there was underflow, not reached yet */
+ braz annul $r6 #qbw_ready
+ exit
+qbw_ready:
+ mov $r2 (sub $r2 $r4)
+ braz $r1 #qbw_postclamp
+ mov $r3 (sbb $r3 $r5)
+ branz annul $r3 #qbw_clamp
+ mov $r4 (sub $r1 $r2)
+ mov $r4 (sbb 0x0 0x0)
+ braz annul $r4 #qbw_postclamp
+qbw_clamp:
+ mov $r2 $r1
+qbw_postclamp:
+ send $r2
+ mov $r4 0x1000
+ branz annul $r1 #qbw_done
+ send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
+ maddr 0x16c2 /* QUERY_SEQUENCE */
+ send $r3
+qbw_done:
+ exit send (extrinsrt 0x0 $r4 0x0 0x10 0x10)
+ nop
0xfffef837,
0xfffdc027,
};
+
+uint32_t mme9097_query_buffer_write[] = {
+ 0x00000201,
+ 0x00000301,
+/* 0x000a: qbw_ready */
+ 0x00000401,
+ 0x05b08551,
+/* 0x0011: qbw_clamp */
+/* 0x0012: qbw_postclamp */
+ 0x00000601,
+ 0x00000701,
+/* 0x0018: qbw_done */
+ 0x0005be10,
+ 0x00060610,
+ 0x0000b027,
+ 0x00000091,
+ 0x00051210,
+ 0x0001c807,
+ 0x00075b10,
+ 0x00011837,
+ 0x00048c10,
+ 0x00060410,
+ 0x0000a027,
+ 0x00000a11,
+ 0x00001041,
+ 0x04000411,
+ 0x00010837,
+ 0x84010042,
+ 0x05b08021,
+ 0x00001841,
+ 0x840100c2,
+ 0x00000011,
+};
nvc0_memory_barrier(struct pipe_context *pipe, unsigned flags)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
int i, s;
if (flags & PIPE_BARRIER_MAPPED_BUFFER) {
}
}
}
+ if (flags & PIPE_BARRIER_SHADER_BUFFER) {
+ IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
+ }
}
static void
pipe_surface_reference(&nvc0->surfaces[s][i], NULL);
}
+ for (s = 0; s < 6; ++s)
+ for (i = 0; i < NVC0_MAX_BUFFERS; ++i)
+ pipe_resource_reference(&nvc0->buffers[s][i].buffer, NULL);
+
for (i = 0; i < nvc0->num_tfbbufs; ++i)
pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
int ref)
{
struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
- unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
unsigned s, i;
- if (bind & PIPE_BIND_RENDER_TARGET) {
+ if (res->bind & PIPE_BIND_RENDER_TARGET) {
for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
if (nvc0->framebuffer.cbufs[i] &&
nvc0->framebuffer.cbufs[i]->texture == res) {
}
}
}
- if (bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
if (nvc0->framebuffer.zsbuf &&
nvc0->framebuffer.zsbuf->texture == res) {
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}
}
- if (bind & (PIPE_BIND_VERTEX_BUFFER |
- PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER |
- PIPE_BIND_STREAM_OUTPUT |
- PIPE_BIND_COMMAND_ARGS_BUFFER |
- PIPE_BIND_SAMPLER_VIEW)) {
+ if (res->target == PIPE_BUFFER) {
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
if (nvc0->vtxbuf[i].buffer == res) {
nvc0->dirty |= NVC0_NEW_ARRAYS;
}
}
}
+
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < NVC0_MAX_BUFFERS; ++i) {
+ if (nvc0->buffers[s][i].buffer == res) {
+ nvc0->buffers_dirty[s] |= 1 << i;
+ nvc0->dirty |= NVC0_NEW_BUFFERS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
}
return ref;
#define NVC0_NEW_SURFACES (1 << 23)
#define NVC0_NEW_MIN_SAMPLES (1 << 24)
#define NVC0_NEW_TESSFACTOR (1 << 25)
+#define NVC0_NEW_BUFFERS (1 << 26)
#define NVC0_NEW_CP_PROGRAM (1 << 0)
#define NVC0_NEW_CP_SURFACES (1 << 1)
#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i))
#define NVC0_BIND_TFB 244
#define NVC0_BIND_SUF 245
-#define NVC0_BIND_SCREEN 246
-#define NVC0_BIND_TLS 247
-#define NVC0_BIND_3D_COUNT 248
+#define NVC0_BIND_BUF 246
+#define NVC0_BIND_SCREEN 247
+#define NVC0_BIND_TLS 249
+#define NVC0_BIND_3D_COUNT 250
/* compute bufctx (during launch_grid) */
#define NVC0_BIND_CP_CB(i) ( 0 + (i))
struct nvc0_blitctx *blit;
+ /* NOTE: some of these surfaces may reference buffers */
struct pipe_surface *surfaces[2][NVC0_MAX_SURFACE_SLOTS];
uint16_t surfaces_dirty[2];
uint16_t surfaces_valid[2];
+ struct pipe_shader_buffer buffers[6][NVC0_MAX_BUFFERS];
+ uint32_t buffers_dirty[6];
+ uint32_t buffers_valid[6];
+
struct util_dynarray global_residents;
};
#define NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT 0x00003850
+#define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858
+
#endif /* __NVC0_MACROS_H__ */
}
info->io.resInfoCBSlot = 15;
info->io.sampleInfoBase = 256 + 128;
+ info->io.suInfoBase = 512;
info->io.msInfoCBSlot = 15;
info->io.msInfoBase = 0; /* TODO */
}
}
*/
if (info->io.globalAccess)
+ prog->hdr[0] |= 1 << 26;
+ if (info->io.globalAccess & 0x2)
prog->hdr[0] |= 1 << 16;
if (info->io.fp64)
prog->hdr[0] |= 1 << 27;
return q->funcs->get_query_result(nvc0_context(pipe), q, wait, result);
}
+static void
+nvc0_get_query_result_resource(struct pipe_context *pipe,
+ struct pipe_query *pq,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset)
+{
+ struct nvc0_query *q = nvc0_query(pq);
+ if (!q->funcs->get_query_result_resource) {
+ assert(!"Unexpected lack of get_query_result_resource");
+ return;
+ }
+ q->funcs->get_query_result_resource(nvc0_context(pipe), q, wait, result_type,
+ index, resource, offset);
+}
+
static void
nvc0_render_condition(struct pipe_context *pipe,
struct pipe_query *pq,
}
if (wait)
- nvc0_hw_query_fifo_wait(push, q);
+ nvc0_hw_query_fifo_wait(nvc0, q);
PUSH_SPACE(push, 7);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
pipe->begin_query = nvc0_begin_query;
pipe->end_query = nvc0_end_query;
pipe->get_query_result = nvc0_get_query_result;
+ pipe->get_query_result_resource = nvc0_get_query_result_resource;
pipe->render_condition = nvc0_render_condition;
nvc0->cond_condmode = NVC0_3D_COND_MODE_ALWAYS;
}
void (*end_query)(struct nvc0_context *, struct nvc0_query *);
boolean (*get_query_result)(struct nvc0_context *, struct nvc0_query *,
boolean, union pipe_query_result *);
+ void (*get_query_result_resource)(struct nvc0_context *nvc0,
+ struct nvc0_query *q,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset);
};
struct nvc0_query {
return true;
}
+static void
+nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
+ struct nvc0_query *q,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_hw_query *hq = nvc0_hw_query(q);
+ struct nv04_resource *buf = nv04_resource(resource);
+ unsigned stride;
+
+ assert(!hq->funcs || !hq->funcs->get_query_result);
+
+ if (index == -1) {
+ /* TODO: Use a macro to write the availability of the query */
+ if (hq->state != NVC0_HW_QUERY_STATE_READY)
+ nvc0_hw_query_update(nvc0->screen->base.client, q);
+ uint32_t ready[2] = {hq->state == NVC0_HW_QUERY_STATE_READY};
+ nvc0->base.push_cb(&nvc0->base, buf, offset,
+ result_type >= PIPE_QUERY_TYPE_I64 ? 2 : 1,
+ ready);
+ return;
+ }
+
+ /* If the fence guarding this query has not been emitted, that makes a lot
+ * of the following logic more complicated.
+ */
+ if (hq->is64bit && hq->fence->state < NOUVEAU_FENCE_STATE_EMITTED)
+ nouveau_fence_emit(hq->fence);
+
+ /* We either need to compute a 32- or 64-bit difference between 2 values,
+ * and then store the result as either a 32- or 64-bit value. As such let's
+ * treat all inputs as 64-bit (and just push an extra 0 for the 32-bit
+ * ones), and have one macro that clamps result to i32, u32, or just
+ * outputs the difference (no need to worry about 64-bit clamping).
+ */
+ if (hq->state != NVC0_HW_QUERY_STATE_READY)
+ nvc0_hw_query_update(nvc0->screen->base.client, q);
+
+ if (wait && hq->state != NVC0_HW_QUERY_STATE_READY)
+ nvc0_hw_query_fifo_wait(nvc0, q);
+
+ nouveau_pushbuf_space(push, 16, 2, 0);
+ PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_1IC0(push, NVC0_3D(MACRO_QUERY_BUFFER_WRITE), 7);
+ if (q->type == PIPE_QUERY_OCCLUSION_PREDICATE) /* XXX what if 64-bit? */
+ PUSH_DATA(push, 0x00000001);
+ else if (result_type == PIPE_QUERY_TYPE_I32)
+ PUSH_DATA(push, 0x7fffffff);
+ else if (result_type == PIPE_QUERY_TYPE_U32)
+ PUSH_DATA(push, 0xffffffff);
+ else
+ PUSH_DATA(push, 0x00000000);
+
+ switch (q->type) {
+ case PIPE_QUERY_SO_STATISTICS:
+ stride = 2;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ stride = 12;
+ break;
+ default:
+ assert(index == 0);
+ stride = 1;
+ break;
+ }
+
+ if (hq->is64bit) {
+ nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * index,
+ 8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * (index + stride),
+ 8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ } else {
+ nouveau_pushbuf_data(push, hq->bo, hq->offset + 4,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ PUSH_DATA(push, 0);
+ nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 + 4,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ PUSH_DATA(push, 0);
+ }
+
+ if (wait || hq->state == NVC0_HW_QUERY_STATE_READY) {
+ PUSH_DATA(push, 0);
+ PUSH_DATA(push, 0);
+ } else if (hq->is64bit) {
+ PUSH_DATA(push, hq->fence->sequence);
+ nouveau_pushbuf_data(push, nvc0->screen->fence.bo, 0,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ } else {
+ PUSH_DATA(push, hq->sequence);
+ nouveau_pushbuf_data(push, hq->bo, hq->offset,
+ 4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+}
+
static const struct nvc0_query_funcs hw_query_funcs = {
.destroy_query = nvc0_hw_destroy_query,
.begin_query = nvc0_hw_begin_query,
.end_query = nvc0_hw_end_query,
.get_query_result = nvc0_hw_get_query_result,
+ .get_query_result_resource = nvc0_hw_get_query_result_resource,
};
struct nvc0_query *
}
void
-nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nvc0_query *q)
+nvc0_hw_query_fifo_wait(struct nvc0_context *nvc0, struct nvc0_query *q)
{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_hw_query *hq = nvc0_hw_query(q);
unsigned offset = hq->offset;
PUSH_SPACE(push, 5);
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
- PUSH_DATAh(push, hq->bo->offset + offset);
- PUSH_DATA (push, hq->bo->offset + offset);
- PUSH_DATA (push, hq->sequence);
+ if (hq->is64bit) {
+ PUSH_DATAh(push, nvc0->screen->fence.bo->offset);
+ PUSH_DATA (push, nvc0->screen->fence.bo->offset);
+ PUSH_DATA (push, hq->fence->sequence);
+ } else {
+ PUSH_DATAh(push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->bo->offset + offset);
+ PUSH_DATA (push, hq->sequence);
+ }
PUSH_DATA (push, (1 << 12) |
NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
}
nvc0_hw_query_pushbuf_submit(struct nouveau_pushbuf *, struct nvc0_query *,
unsigned);
void
-nvc0_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nvc0_query *);
+nvc0_hw_query_fifo_wait(struct nvc0_context *, struct nvc0_query *);
#endif
return 256;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 1; /* 256 for binding as RT, but that's not possible in GL */
+ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+ return 16;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
case PIPE_CAP_MAX_VIEWPORTS:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 1;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
- case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_VENDOR_ID:
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
- case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
return 0;
+ case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ return NVC0_MAX_BUFFERS;
case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
return 16; /* would be 32 in linked (OpenGL-style) mode */
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
push->rsvd_kick = 5;
screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_SHADER_BUFFER |
PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_COMMAND_ARGS_BUFFER;
+ PIPE_BIND_COMMAND_ARGS_BUFFER | PIPE_BIND_QUERY_BUFFER;
screen->base.sysmem_bindings |=
PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
/* TIC and TSC entries for each unit (nve4+ only) */
/* auxiliary constants (6 user clip planes, base instance id) */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 10));
BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
PUSH_DATA (push, (15 << 4) | 1);
if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
/* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 256);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
PUSH_DATA (push, 0);
PUSH_DATAf(push, 0.0f);
PUSH_DATAf(push, 0.0f);
PUSH_DATAf(push, 0.0f);
BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
- PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10));
if (screen->base.drm->version >= 0x01000101) {
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
PUSH_DATA (push, screen->tls->size);
BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1);
PUSH_DATA (push, 0);
+ /* Reduce likelihood of collision with real buffers by placing the hole at
+ * the top of the 4G area. This will have to be dealt with for real
+ * eventually by blocking off that area from the VM.
+ */
BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1);
- PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0xff << 24);
if (screen->eng3d->oclass < GM107_3D_CLASS) {
ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 17, 1 << 20, NULL,
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
+ MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
PUSH_DATA (push, 1);
#define NVC0_MAX_VIEWPORTS 16
+#define NVC0_MAX_BUFFERS 32
+
struct nvc0_context;
continue;
if (!targ->clean)
- nvc0_hw_query_fifo_wait(push, nvc0_query(targ->pq));
+ nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq));
nouveau_pushbuf_space(push, 0, 0, 1);
BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
PUSH_DATA (push, 1);
unsigned start_slot, unsigned count,
struct pipe_image_view **views)
{
-#if 0
- nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, views);
+}
+
+static void
+nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t,
+ unsigned start, unsigned nr,
+ struct pipe_shader_buffer *pbuffers)
+{
+ const unsigned end = start + nr;
+ const unsigned mask = ((1 << nr) - 1) << start;
+ unsigned i;
+
+ assert(t < 5);
+
+ if (pbuffers) {
+ for (i = start; i < end; ++i) {
+ const unsigned p = i - start;
+ if (pbuffers[p].buffer)
+ nvc0->buffers_valid[t] |= (1 << i);
+ else
+ nvc0->buffers_valid[t] &= ~(1 << i);
+ nvc0->buffers[t][i].buffer_offset = pbuffers[p].buffer_offset;
+ nvc0->buffers[t][i].buffer_size = pbuffers[p].buffer_size;
+ pipe_resource_reference(&nvc0->buffers[t][i].buffer, pbuffers[p].buffer);
+ }
+ } else {
+ for (i = start; i < end; ++i)
+ pipe_resource_reference(&nvc0->buffers[t][i].buffer, NULL);
+ nvc0->buffers_valid[t] &= ~mask;
+ }
+ nvc0->buffers_dirty[t] |= mask;
+
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF);
+}
+
+static void
+nvc0_set_shader_buffers(struct pipe_context *pipe,
+ unsigned shader,
+ unsigned start, unsigned nr,
+ struct pipe_shader_buffer *buffers)
+{
+ const unsigned s = nvc0_shader_stage(shader);
+ nvc0_bind_buffers_range(nvc0_context(pipe), s, start, nr, buffers);
- nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES;
-#endif
+ nvc0_context(pipe)->dirty |= NVC0_NEW_BUFFERS;
}
static inline void
pipe->set_global_binding = nvc0_set_global_bindings;
pipe->set_compute_resources = nvc0_set_compute_resources;
pipe->set_shader_images = nvc0_set_shader_images;
+ pipe->set_shader_buffers = nvc0_set_shader_buffers;
nvc0->sample_mask = ~0;
nvc0->min_samples = 1;
ms = 1 << ms_mode;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
- PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9));
- PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 9));
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
PUSH_DATA (push, 256 + 128);
for (i = 0; i < ms; i++) {
struct nouveau_bo *bo = nvc0->screen->uniform_bo;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
- PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9));
- PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9));
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 10));
+ PUSH_DATA (push, bo->offset + (5 << 16) + (s << 10));
BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
PUSH_DATA (push, 256);
PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
}
}
+static void
+nvc0_validate_buffers(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ int i, s;
+
+ for (s = 0; s < 5; s++) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 1024);
+ PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
+ PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS);
+ PUSH_DATA (push, 512);
+ for (i = 0; i < NVC0_MAX_BUFFERS; i++) {
+ if (nvc0->buffers[s][i].buffer) {
+ struct nv04_resource *res =
+ nv04_resource(nvc0->buffers[s][i].buffer);
+ PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset);
+ PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset);
+ PUSH_DATA (push, nvc0->buffers[s][i].buffer_size);
+ PUSH_DATA (push, 0);
+ BCTX_REFN(nvc0->bufctx_3d, BUF, res, RDWR);
+ } else {
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ }
+ }
+ }
+
+}
+
static void
nvc0_validate_sample_mask(struct nvc0_context *nvc0)
{
{ nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
{ nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
{ nvc0_validate_surfaces, NVC0_NEW_SURFACES },
+ { nvc0_validate_buffers, NVC0_NEW_BUFFERS },
{ nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
{ nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG },
{ nvc0_validate_min_samples, NVC0_NEW_MIN_SAMPLES },
}
static void
-nvc0_clear_buffer_cpu(struct pipe_context *pipe,
- struct pipe_resource *res,
- unsigned offset, unsigned size,
- const void *data, int data_size)
+nvc0_clear_buffer_push_nvc0(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nv04_resource *buf = nv04_resource(res);
- struct pipe_transfer *pt;
- struct pipe_box box;
- unsigned elements, i;
+ unsigned i;
- elements = size / data_size;
+ nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+ nouveau_pushbuf_validate(push);
- u_box_1d(offset, size, &box);
+ unsigned count = (size + 3) / 4;
+ unsigned data_words = data_size / 4;
- uint8_t *map = buf->vtbl->transfer_map(pipe, res, 0, PIPE_TRANSFER_WRITE,
- &box, &pt);
+ while (count) {
+ unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+ unsigned nr = nr_data * data_words;
- for (i = 0; i < elements; ++i)
- memcpy(&map[i*data_size], data, data_size);
+ if (!PUSH_SPACE(push, nr + 9))
+ break;
- buf->vtbl->transfer_unmap(pipe, pt);
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, MIN2(size, nr * 4));
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, 0x100111);
+
+ /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+ BEGIN_NIC0(push, NVC0_M2MF(DATA), nr);
+ for (i = 0; i < nr_data; i++)
+ PUSH_DATAp(push, data, data_words);
+
+ count -= nr;
+ offset += nr * 4;
+ size -= nr * 4;
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
+nvc0_clear_buffer_push_nve4(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv04_resource *buf = nv04_resource(res);
+ unsigned i;
+
+ nouveau_bufctx_refn(nvc0->bufctx, 0, buf->bo, buf->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ unsigned count = (size + 3) / 4;
+ unsigned data_words = data_size / 4;
+
+ while (count) {
+ unsigned nr_data = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN) / data_words;
+ unsigned nr = nr_data * data_words;
+
+ if (!PUSH_SPACE(push, nr + 10))
+ break;
+
+ BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, MIN2(size, nr * 4));
+ PUSH_DATA (push, 1);
+ /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+ BEGIN_1IC0(push, NVE4_P2MF(UPLOAD_EXEC), nr + 1);
+ PUSH_DATA (push, 0x1001);
+ for (i = 0; i < nr_data; i++)
+ PUSH_DATAp(push, data, data_words);
+
+ count -= nr;
+ offset += nr * 4;
+ size -= nr * 4;
+ }
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
+nvc0_clear_buffer_push(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ unsigned tmp;
+
+ if (data_size == 1) {
+ tmp = *(unsigned char *)data;
+ tmp = (tmp << 24) | (tmp << 16) | (tmp << 8) | tmp;
+ data = &tmp;
+ data_size = 4;
+ } else if (data_size == 2) {
+ tmp = *(unsigned short *)data;
+ tmp = (tmp << 16) | tmp;
+ data = &tmp;
+ data_size = 4;
+ }
+
+ if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
+ nvc0_clear_buffer_push_nvc0(pipe, res, offset, size, data, data_size);
+ else
+ nvc0_clear_buffer_push_nve4(pipe, res, offset, size, data, data_size);
}
static void
memcpy(&color.ui, data, 16);
break;
case 12:
- /* This doesn't work, RGB32 is not a valid RT format.
- * dst_fmt = PIPE_FORMAT_R32G32B32_UINT;
- * memcpy(&color.ui, data, 12);
- * memset(&color.ui[3], 0, 4);
+ /* RGB32 is not a valid RT format. This will be handled by the pushbuf
+ * uploader.
*/
break;
case 8:
assert(size % data_size == 0);
if (data_size == 12) {
- /* TODO: Find a way to do this with the GPU! */
- nvc0_clear_buffer_cpu(pipe, res, offset, size, data, data_size);
+ nvc0_clear_buffer_push(pipe, res, offset, size, data, data_size);
return;
}
+ if (offset & 0xff) {
+ unsigned fixup_size = MIN2(size, align(offset, 0x100) - offset);
+ assert(fixup_size % data_size == 0);
+ nvc0_clear_buffer_push(pipe, res, offset, fixup_size, data, data_size);
+ offset += fixup_size;
+ size -= fixup_size;
+ if (!size)
+ return;
+ }
+
elements = size / data_size;
height = (elements + 16383) / 16384;
width = elements / height;
+ if (height > 1)
+ width &= ~0xff;
+ assert(width > 0);
if (!PUSH_SPACE(push, 40))
return;
BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
- PUSH_DATA (push, width * data_size);
+ PUSH_DATA (push, align(width * data_size, 0x100));
PUSH_DATA (push, height);
PUSH_DATA (push, nvc0_format_table[dst_fmt].rt);
PUSH_DATA (push, NVC0_3D_RT_TILE_MODE_LINEAR);
IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
+
+ if (buf->mm) {
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+ }
+
if (width * height != elements) {
offset += width * height * data_size;
width = elements - width * height;
- height = 1;
-
- BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 4);
- PUSH_DATAh(push, buf->address + offset);
- PUSH_DATA (push, buf->address + offset);
- PUSH_DATA (push, width * data_size);
- PUSH_DATA (push, height);
-
- IMMED_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 0x3c);
+ nvc0_clear_buffer_push(pipe, res, offset, width * data_size,
+ data, data_size);
}
- IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode);
-
- nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
- nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
}
return;
address = nvc0->screen->uniform_bo->offset + (5 << 16);
- for (s = 0; s < 5; ++s, address += (1 << 9)) {
+ for (s = 0; s < 5; ++s, address += (1 << 10)) {
uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
if (!dirty)
continue;
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
- PUSH_DATA (push, 512);
+ PUSH_DATA (push, 1024);
PUSH_DATAh(push, address);
PUSH_DATA (push, address);
do {
b = ve->pipe.vertex_buffer_index;
vb = &nvc0->vtxbuf[b];
- if (!vb->buffer) {
+ if (nvc0->vbo_user & (1 << b)) {
if (!(nvc0->constant_vbos & (1 << b))) {
if (ve->pipe.instance_divisor) {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
if (unlikely(ve->pipe.instance_divisor)) {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4);
- PUSH_DATA (push, (1 << 12) | vb->stride);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
PUSH_DATA (push, ve->pipe.instance_divisor);
} else {
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3);
- PUSH_DATA (push, (1 << 12) | vb->stride);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
}
unsigned b;
const uint32_t mask = nvc0->vbo_user;
- PUSH_SPACE(push, nvc0->num_vtxbufs * 8);
+ PUSH_SPACE(push, nvc0->num_vtxbufs * 8 + nvc0->vertex->num_elements);
for (b = 0; b < nvc0->num_vtxbufs; ++b) {
struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
struct nv04_resource *buf;
}
/* address/value set in nvc0_update_user_vbufs_shared */
continue;
+ } else if (!vb->buffer) {
+ /* there can be holes in the vertex buffer lists */
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0);
+ continue;
}
buf = nv04_resource(vb->buffer);
offset = vb->buffer_offset;
BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD);
}
+ /* If there are more elements than buffers, we might not have unset
+ * fetching on the later elements.
+ */
+ for (; b < nvc0->vertex->num_elements; ++b)
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 0);
+
if (nvc0->vbo_user)
nvc0_update_user_vbufs_shared(nvc0);
}
if (count & 1) {
count--;
- PUSH_SPACE(push, 1);
+ PUSH_SPACE(push, 2);
BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
PUSH_DATA (push, *map++);
}
res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
PUSH_SPACE(push, 2);
IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
- nvc0_hw_query_fifo_wait(push, nvc0_query(so->pq));
+ nvc0_hw_query_fifo_wait(nvc0, nvc0_query(so->pq));
if (nvc0->screen->eng3d->oclass < GM107_3D_CLASS)
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
unsigned size, macro, count = info->indirect_count, drawid = info->drawid;
uint32_t offset = buf->offset + info->indirect_offset;
+ PUSH_SPACE(push, 7);
+
/* must make FIFO wait for engines idle before continuing to process */
if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) ||
(buf_count && buf_count->fence_wr &&
if (info->mode == PIPE_PRIM_PATCHES &&
nvc0->state.patch_vertices != info->vertices_per_patch) {
nvc0->state.patch_vertices = info->vertices_per_patch;
+ PUSH_SPACE(push, 1);
IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), nvc0->state.patch_vertices);
}
nvc0_state_validate(nvc0, ~0, 8);
if (nvc0->vertprog->vp.need_draw_parameters) {
+ PUSH_SPACE(push, 9);
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
PUSH_DATA (push, 512);
PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9));
}
if (nvc0->cb_dirty) {
+ PUSH_SPACE(push, 1);
IMMED_NVC0(push, NVC0_3D(MEM_BARRIER), 0x1011);
nvc0->cb_dirty = false;
}
if (!nvc0->textures_coherent[s])
continue;
+ PUSH_SPACE(push, nvc0->num_textures[s] * 2);
+
for (int i = 0; i < nvc0->num_textures[s]; ++i) {
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
if (!(nvc0->textures_coherent[s] & (1 << i)))
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
/* SWTCL-only features. */
}
}
#else
- memset(&shader->binary, 0, sizeof(shader->binary));
+ radeon_shader_binary_init(&shader->binary);
radeon_elf_read(code, header->num_bytes, &shader->binary);
r600_create_shader(&shader->bc, &shader->binary, &use_kill);
return shader;
}
-void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
+void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state)
{
- struct r600_pipe_compute *shader = (struct r600_pipe_compute *)state;
+ struct r600_context *ctx = (struct r600_context *)ctx_;
+ COMPUTE_DBG(ctx->screen, "*** evergreen_delete_compute_state\n");
+ struct r600_pipe_compute *shader = state;
if (!shader)
return;
+#ifdef HAVE_OPENCL
+#if HAVE_LLVM < 0x0306
+ for (unsigned i = 0; i < shader->num_kernels; i++) {
+ struct r600_kernel *kernel = &shader->kernels[i];
+ LLVMDisposeModule(module);
+ }
+ FREE(shader->kernels);
+ LLVMContextDispose(shader->llvm_ctx);
+#else
+ radeon_shader_binary_clean(&shader->binary);
+ r600_destroy_shader(&shader->bc);
+
+ /* TODO destroy shader->code_bo, shader->const_bo
+ * we'll need something like r600_buffer_free */
+#endif
+#endif
FREE(shader);
}
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
unsigned num_waves;
- unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
+ unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
unsigned wave_divisor = (16 * num_pipes);
int group_size = 1;
int grid_size = 1;
* command stream by the start_cs_cmd atom. However, since the SET_CONTEXT_REG
* packet requires that the shader type bit be set, we must initialize all
* context registers needed for compute in this function. The registers
- * intialized by the start_cs_cmd atom can be found in evereen_state.c in the
+ * initialized by the start_cs_cmd atom can be found in evergreen_state.c in the
* functions evergreen_init_atom_start_cs or cayman_init_atom_start_cs depending
* on the GPU family.
*/
int num_threads;
int num_stack_entries;
- /* since all required registers are initialised in the
+ /* since all required registers are initialized in the
* start_compute_cs_cmd atom, we can EMIT_EARLY here.
*/
r600_init_command_buffer(cb, 256);
* R_008E28_SQ_STATIC_THREAD_MGMT3
*/
- /* XXX: We may need to adjust the thread and stack resouce
+ /* XXX: We may need to adjust the thread and stack resource
* values for 3D/compute interop */
r600_store_config_reg_seq(cb, R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 5);
if (util_format_get_blocksize(pipe_format) >= 16)
non_disp_tiling = 1;
}
- nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
+ nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
if (state->target == PIPE_TEXTURE_1D_ARRAY) {
height = 1;
unsigned block_size =
align(util_format_get_blocksize(pipe_buffer->format), 4);
unsigned pitch_alignment =
- MAX2(64, rctx->screen->b.tiling_info.group_bytes / block_size);
+ MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size);
unsigned pitch = align(pipe_buffer->width0, pitch_alignment);
/* XXX: This is copied from evergreen_init_color_surface(). I don't
if (util_format_get_blocksize(surf->base.format) >= 16)
non_disp_tiling = 1;
}
- nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
+ nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
desc = util_format_description(surf->base.format);
for (i = 0; i < 4; i++) {
if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
macro_aspect = eg_macro_tile_aspect(macro_aspect);
bankw = eg_bank_wh(bankw);
bankh = eg_bank_wh(bankh);
- nbanks = eg_num_banks(rscreen->b.tiling_info.num_banks);
+ nbanks = eg_num_banks(rscreen->b.info.r600_num_banks);
offset >>= 8;
surf->db_z_info = S_028040_ARRAY_MODE(array_mode) |
sub_cmd = EG_DMA_COPY_TILED;
lbpp = util_logbase2(bpp);
pitch_tile_max = ((pitch / bpp) / 8) - 1;
- nbanks = eg_num_banks(rctx->screen->b.tiling_info.num_banks);
+ nbanks = eg_num_banks(rctx->screen->b.info.r600_num_banks);
if (dst_mode == RADEON_SURF_MODE_LINEAR) {
/* T2L */
unsigned id = 1;
unsigned i;
/* !!!
- * To avoid GPU lockup registers must be emited in a specific order
+ * To avoid GPU lockup registers must be emitted in a specific order
* (no kidding ...). The order below is important and have been
- * partialy infered from analyzing fglrx command stream.
+ * partially inferred from analyzing fglrx command stream.
*
* Don't reorder atom without carefully checking the effect (GPU lockup
* or piglit regression).
unsigned output_patch0_offset, perpatch_output_offset, lds_size;
uint32_t values[16];
unsigned num_waves;
- unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
+ unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes;
unsigned wave_divisor = (16 * num_pipes);
*num_patches = 1;
unsigned ar_chan;
unsigned ar_handling;
unsigned r6xx_nop_after_rel_dst;
- bool index_loaded[2];
- unsigned index_reg[2]; /* indexing register CF_INDEX_[01] */
+ bool index_loaded[2];
+ unsigned index_reg[2]; /* indexing register CF_INDEX_[01] */
unsigned debug_id;
struct r600_isa* isa;
};
lp_build_tgsi_llvm(bld_base, tokens);
+ LLVMBuildRetVoid(bld_base->base.gallivm->builder);
radeon_llvm_finalize_module(ctx);
return ctx->gallivm.module;
return 0;
}
+void r600_destroy_shader(struct r600_bytecode *bc)
+{
+ FREE(bc->bytecode);
+}
+
unsigned r600_llvm_compile(
LLVMModuleRef mod,
enum radeon_family family,
struct radeon_shader_binary binary;
const char * gpu_family = r600_get_llvm_processor_name(family);
- memset(&binary, 0, sizeof(struct radeon_shader_binary));
+ radeon_shader_binary_init(&binary);
if (dump)
LLVMDumpModule(mod);
r = radeon_llvm_compile(mod, &binary, gpu_family, NULL, debug);
r = r600_create_shader(bc, &binary, use_kill);
- FREE(binary.code);
- FREE(binary.config);
- FREE(binary.rodata);
- FREE(binary.global_symbol_offsets);
+ radeon_shader_binary_clean(&binary);
return r;
}
const struct radeon_shader_binary *binary,
boolean *use_kill);
+void r600_destroy_shader(struct r600_bytecode *bc);
+
void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
struct r600_bytecode *bc,
uint64_t symbol_offset,
case PIPE_CAP_TGSI_TXQS:
case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 1;
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
/* kernel command checker support is also required */
return family >= CHIP_CEDAR && rscreen->b.info.drm_minor >= 41;
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return family >= CHIP_CEDAR ? 0 : 1;
+
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIME_ELAPSED:
- return rscreen->b.info.r600_clock_crystal_freq != 0;
+ return rscreen->b.info.clock_crystal_freq != 0;
case PIPE_CAP_QUERY_TIMESTAMP:
return rscreen->b.info.drm_minor >= 20 &&
- rscreen->b.info.r600_clock_crystal_freq != 0;
+ rscreen->b.info.clock_crystal_freq != 0;
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MIN_TEXEL_OFFSET:
struct r600_texture *chroma = (struct r600_texture *)buf->resources[1];
msg->body.decode.dt_field_mode = buf->base.interlaced;
- msg->body.decode.dt_surf_tile_config |= RUVD_NUM_BANKS(eg_num_banks(rscreen->b.tiling_info.num_banks));
+ msg->body.decode.dt_surf_tile_config |= RUVD_NUM_BANKS(eg_num_banks(rscreen->b.info.r600_num_banks));
ruvd_set_dt_surfaces(msg, &luma->surface, &chroma->surface);
old_buf = res->buf;
res->buf = new_buf; /* should be atomic */
- if (rscreen->info.r600_virtual_address)
+ if (rscreen->info.has_virtual_memory)
res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf);
else
res->gpu_address = 0;
return NULL;
}
- if (rscreen->info.r600_virtual_address)
+ if (rscreen->info.has_virtual_memory)
rbuffer->gpu_address =
ws->buffer_get_virtual_address(rbuffer->buf);
else
enum radeon_bo_priority priority)
{
struct radeon_winsys_cs *cs = ring->cs;
- bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.r600_virtual_address;
+ bool has_vm = ((struct r600_common_screen*)rctx->b.screen)->info.has_virtual_memory;
unsigned reloc = radeon_add_to_buffer_list(rctx, ring, rbo, usage, priority);
if (!has_vm) {
/* Max counters per HW block */
#define R600_QUERY_MAX_COUNTERS 16
-static const char * const r600_pc_shader_suffix[] = {
- "", "_PS", "_VS", "_GS", "_ES", "_HS", "_LS", "_CS"
-};
-
static struct r600_perfcounter_block *
lookup_counter(struct r600_perfcounters *pc, unsigned index,
unsigned *base_gid, unsigned *sub_index)
unsigned stride;
};
+#define R600_PC_SHADERS_WINDOWING (1 << 31)
+
struct r600_query_pc {
struct r600_query_hw b;
if (block->flags & R600_PC_BLOCK_SHADER) {
unsigned sub_gids = block->num_instances;
unsigned shader_id;
- unsigned shader_mask;
- unsigned query_shader_mask;
+ unsigned shaders;
+ unsigned query_shaders;
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
sub_gids = sub_gids * screen->info.max_se;
shader_id = sub_gid / sub_gids;
sub_gid = sub_gid % sub_gids;
- if (shader_id == 0)
- shader_mask = R600_PC_SHADER_ALL;
- else
- shader_mask = 1 << (shader_id - 1);
+ shaders = screen->perfcounters->shader_type_bits[shader_id];
- query_shader_mask = query->shaders & R600_PC_SHADER_ALL;
- if (query_shader_mask && query_shader_mask != shader_mask) {
+ query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
+ if (query_shaders && query_shaders != shaders) {
fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
FREE(group);
return NULL;
}
- query->shaders |= shader_mask;
+ query->shaders = shaders;
}
- if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED) {
+ if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
// A non-zero value in query->shaders ensures that the shader
// masking is reset unless the user explicitly requests one.
- query->shaders |= R600_PC_SHADER_WINDOWING;
+ query->shaders = R600_PC_SHADERS_WINDOWING;
}
if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
}
if (query->shaders) {
- if ((query->shaders & R600_PC_SHADER_ALL) == 0)
- query->shaders |= R600_PC_SHADER_ALL;
+ if (query->shaders == R600_PC_SHADERS_WINDOWING)
+ query->shaders = 0xffffffff;
query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
}
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
groups_se = screen->info.max_se;
if (block->flags & R600_PC_BLOCK_SHADER)
- groups_shader = ARRAY_SIZE(r600_pc_shader_suffix);
+ groups_shader = screen->perfcounters->num_shader_types;
namelen = strlen(block->basename);
block->group_name_stride = namelen + 1;
groupname = block->group_names;
for (i = 0; i < groups_shader; ++i) {
- unsigned shaderlen = strlen(r600_pc_shader_suffix[i]);
+ const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
+ unsigned shaderlen = strlen(shader_suffix);
for (j = 0; j < groups_se; ++j) {
for (k = 0; k < groups_instance; ++k) {
strcpy(groupname, block->basename);
p = groupname + namelen;
if (block->flags & R600_PC_BLOCK_SHADER) {
- strcpy(p, r600_pc_shader_suffix[i]);
+ strcpy(p, shader_suffix);
p += shaderlen;
}
if (block->flags & R600_PC_BLOCK_SE_GROUPS)
block->num_groups *= rscreen->info.max_se;
if (block->flags & R600_PC_BLOCK_SHADER)
- block->num_groups *= ARRAY_SIZE(r600_pc_shader_suffix);
+ block->num_groups *= pc->num_shader_types;
++pc->num_blocks;
pc->num_groups += block->num_groups;
struct pipe_fence_handle *sdma;
};
+/*
+ * shader binary helpers.
+ */
+void radeon_shader_binary_init(struct radeon_shader_binary *b)
+{
+ memset(b, 0, sizeof(*b));
+}
+
+void radeon_shader_binary_clean(struct radeon_shader_binary *b)
+{
+ if (!b)
+ return;
+ FREE(b->code);
+ FREE(b->config);
+ FREE(b->rodata);
+ FREE(b->global_symbol_offsets);
+ FREE(b->relocs);
+ FREE(b->disasm_string);
+}
+
/*
* pipe_context
*/
rctx->chip_class = rscreen->chip_class;
if (rscreen->chip_class >= CIK)
- rctx->max_db = MAX2(8, rscreen->info.r600_num_backends);
+ rctx->max_db = MAX2(8, rscreen->info.num_render_backends);
else if (rscreen->chip_class >= EVERGREEN)
rctx->max_db = 8;
else
if (!rctx->ctx)
return false;
- if (rscreen->info.r600_has_dma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
+ if (rscreen->info.has_sdma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) {
rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
r600_flush_dma_ring,
rctx, NULL);
{ "noir", DBG_NO_IR, "Don't print the LLVM IR"},
{ "notgsi", DBG_NO_TGSI, "Don't print the TGSI"},
{ "noasm", DBG_NO_ASM, "Don't print disassembled shaders"},
+ { "preoptir", DBG_PREOPT_IR, "Print the LLVM IR before initial optimizations" },
/* features */
{ "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
{ "nodcc", DBG_NO_DCC, "Disable DCC." },
{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
{ "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
+ { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." },
DEBUG_NAMED_VALUE_END /* must be last */
};
case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
if (ret) {
uint32_t *max_clock_frequency = ret;
- *max_clock_frequency = rscreen->info.max_sclk;
+ *max_clock_frequency = rscreen->info.max_shader_clock;
}
return sizeof(uint32_t);
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
- rscreen->info.r600_clock_crystal_freq;
+ rscreen->info.clock_crystal_freq;
}
static void r600_fence_reference(struct pipe_screen *screen,
return rws->fence_wait(rws, rfence->gfx, timeout);
}
-static bool r600_interpret_tiling(struct r600_common_screen *rscreen,
- uint32_t tiling_config)
+static void r600_query_memory_info(struct pipe_screen *screen,
+ struct pipe_memory_info *info)
{
- switch ((tiling_config & 0xe) >> 1) {
- case 0:
- rscreen->tiling_info.num_channels = 1;
- break;
- case 1:
- rscreen->tiling_info.num_channels = 2;
- break;
- case 2:
- rscreen->tiling_info.num_channels = 4;
- break;
- case 3:
- rscreen->tiling_info.num_channels = 8;
- break;
- default:
- return false;
- }
-
- switch ((tiling_config & 0x30) >> 4) {
- case 0:
- rscreen->tiling_info.num_banks = 4;
- break;
- case 1:
- rscreen->tiling_info.num_banks = 8;
- break;
- default:
- return false;
-
- }
- switch ((tiling_config & 0xc0) >> 6) {
- case 0:
- rscreen->tiling_info.group_bytes = 256;
- break;
- case 1:
- rscreen->tiling_info.group_bytes = 512;
- break;
- default:
- return false;
- }
- return true;
-}
-
-static bool evergreen_interpret_tiling(struct r600_common_screen *rscreen,
- uint32_t tiling_config)
-{
- switch (tiling_config & 0xf) {
- case 0:
- rscreen->tiling_info.num_channels = 1;
- break;
- case 1:
- rscreen->tiling_info.num_channels = 2;
- break;
- case 2:
- rscreen->tiling_info.num_channels = 4;
- break;
- case 3:
- rscreen->tiling_info.num_channels = 8;
- break;
- default:
- return false;
- }
-
- switch ((tiling_config & 0xf0) >> 4) {
- case 0:
- rscreen->tiling_info.num_banks = 4;
- break;
- case 1:
- rscreen->tiling_info.num_banks = 8;
- break;
- case 2:
- rscreen->tiling_info.num_banks = 16;
- break;
- default:
- return false;
- }
-
- switch ((tiling_config & 0xf00) >> 8) {
- case 0:
- rscreen->tiling_info.group_bytes = 256;
- break;
- case 1:
- rscreen->tiling_info.group_bytes = 512;
- break;
- default:
- return false;
- }
- return true;
-}
-
-static bool r600_init_tiling(struct r600_common_screen *rscreen)
-{
- uint32_t tiling_config = rscreen->info.r600_tiling_config;
-
- /* set default group bytes, overridden by tiling info ioctl */
- if (rscreen->chip_class <= R700) {
- rscreen->tiling_info.group_bytes = 256;
- } else {
- rscreen->tiling_info.group_bytes = 512;
- }
-
- if (!tiling_config)
- return true;
-
- if (rscreen->chip_class <= R700) {
- return r600_interpret_tiling(rscreen, tiling_config);
- } else {
- return evergreen_interpret_tiling(rscreen, tiling_config);
- }
+ struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+ struct radeon_winsys *ws = rscreen->ws;
+ unsigned vram_usage, gtt_usage;
+
+ info->total_device_memory = rscreen->info.vram_size / 1024;
+ info->total_staging_memory = rscreen->info.gart_size / 1024;
+
+ /* The real TTM memory usage is somewhat random, because:
+ *
+ * 1) TTM delays freeing memory, because it can only free it after
+ * fences expire.
+ *
+ * 2) The memory usage can be really low if big VRAM evictions are
+ * taking place, but the real usage is well above the size of VRAM.
+ *
+ * Instead, return statistics of this process.
+ */
+ vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
+ gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
+
+ info->avail_device_memory =
+ vram_usage <= info->total_device_memory ?
+ info->total_device_memory - vram_usage : 0;
+ info->avail_staging_memory =
+ gtt_usage <= info->total_staging_memory ?
+ info->total_staging_memory - gtt_usage : 0;
+
+ info->device_memory_evicted =
+ ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
+ /* Just return the number of evicted 64KB pages. */
+ info->nr_device_memory_evictions = info->device_memory_evicted / 64;
}
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
rscreen->b.fence_reference = r600_fence_reference;
rscreen->b.resource_destroy = u_resource_destroy_vtbl;
rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory;
+ rscreen->b.query_memory_info = r600_query_memory_info;
if (rscreen->info.has_uvd) {
rscreen->b.get_video_param = rvid_get_video_param;
rscreen->chip_class = rscreen->info.chip_class;
rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
- if (!r600_init_tiling(rscreen)) {
- return false;
- }
util_format_s3tc_init();
pipe_mutex_init(rscreen->aux_context_lock);
pipe_mutex_init(rscreen->gpu_load_mutex);
if (rscreen->debug_flags & DBG_INFO) {
printf("pci_id = 0x%x\n", rscreen->info.pci_id);
- printf("family = %i\n", rscreen->info.family);
+ printf("family = %i (%s)\n", rscreen->info.family,
+ r600_get_chip_name(rscreen));
printf("chip_class = %i\n", rscreen->info.chip_class);
- printf("gart_size = %i MB\n", (int)(rscreen->info.gart_size >> 20));
- printf("vram_size = %i MB\n", (int)(rscreen->info.vram_size >> 20));
- printf("max_sclk = %i\n", rscreen->info.max_sclk);
+ printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024));
+ printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024));
+ printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory);
+ printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2);
+ printf("has_sdma = %i\n", rscreen->info.has_sdma);
+ printf("has_uvd = %i\n", rscreen->info.has_uvd);
+ printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version);
+ printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config);
+ printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq);
+ printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
+ rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
+ printf("has_userptr = %i\n", rscreen->info.has_userptr);
+
+ printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes);
+ printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock);
printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units);
printf("max_se = %i\n", rscreen->info.max_se);
printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se);
- printf("drm = %i.%i.%i\n", rscreen->info.drm_major,
- rscreen->info.drm_minor, rscreen->info.drm_patchlevel);
- printf("has_uvd = %i\n", rscreen->info.has_uvd);
- printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version);
- printf("r600_num_backends = %i\n", rscreen->info.r600_num_backends);
- printf("r600_clock_crystal_freq = %i\n", rscreen->info.r600_clock_crystal_freq);
- printf("r600_tiling_config = 0x%x\n", rscreen->info.r600_tiling_config);
- printf("r600_num_tile_pipes = %i\n", rscreen->info.r600_num_tile_pipes);
- printf("r600_max_pipes = %i\n", rscreen->info.r600_max_pipes);
- printf("r600_virtual_address = %i\n", rscreen->info.r600_virtual_address);
- printf("r600_has_dma = %i\n", rscreen->info.r600_has_dma);
- printf("r600_backend_map = %i\n", rscreen->info.r600_backend_map);
- printf("r600_backend_map_valid = %i\n", rscreen->info.r600_backend_map_valid);
+
+ printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map);
+ printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid);
+ printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks);
+ printf("num_render_backends = %i\n", rscreen->info.num_render_backends);
+ printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes);
+ printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes);
printf("si_tile_mode_array_valid = %i\n", rscreen->info.si_tile_mode_array_valid);
printf("cik_macrotile_mode_array_valid = %i\n", rscreen->info.cik_macrotile_mode_array_valid);
}
#define DBG_NO_IR (1 << 12)
#define DBG_NO_TGSI (1 << 13)
#define DBG_NO_ASM (1 << 14)
+#define DBG_PREOPT_IR (1 << 15)
/* Bits 21-31 are reserved for the r600g driver. */
/* features */
#define DBG_NO_ASYNC_DMA (1llu << 32)
#define DBG_NO_DCC (1llu << 43)
#define DBG_NO_DCC_CLEAR (1llu << 44)
#define DBG_NO_RB_PLUS (1llu << 45)
+#define DBG_SI_SCHED (1llu << 46)
#define R600_MAP_BUFFER_ALIGNMENT 64
char *disasm_string;
};
+void radeon_shader_binary_init(struct radeon_shader_binary *b);
+void radeon_shader_binary_clean(struct radeon_shader_binary *b);
+
struct r600_resource {
struct u_resource b;
unsigned spi_shader_col_format_alpha; /* SI+, alpha-to-coverage */
unsigned spi_shader_col_format_blend; /* SI+, blending without alpha. */
unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
- unsigned sx_ps_downconvert; /* Stoney only */
- unsigned sx_blend_opt_epsilon; /* Stoney only */
struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
unsigned pa_su_poly_offset_db_fmt_cntl;
};
-struct r600_tiling_info {
- unsigned num_channels;
- unsigned num_banks;
- unsigned group_bytes;
-};
-
struct r600_common_screen {
struct pipe_screen b;
struct radeon_winsys *ws;
enum radeon_family family;
enum chip_class chip_class;
struct radeon_info info;
- struct r600_tiling_info tiling_info;
uint64_t debug_flags;
bool has_cp_dma;
bool has_streamout;
case R600_QUERY_NUM_SHADERS_CREATED:
query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
break;
+ case R600_QUERY_GPIN_ASIC_ID:
+ case R600_QUERY_GPIN_NUM_SIMD:
+ case R600_QUERY_GPIN_NUM_RB:
+ case R600_QUERY_GPIN_NUM_SPI:
+ case R600_QUERY_GPIN_NUM_SE:
+ break;
default:
unreachable("r600_query_sw_begin: bad query type");
}
case R600_QUERY_NUM_SHADERS_CREATED:
query->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
break;
+ case R600_QUERY_GPIN_ASIC_ID:
+ case R600_QUERY_GPIN_NUM_SIMD:
+ case R600_QUERY_GPIN_NUM_RB:
+ case R600_QUERY_GPIN_NUM_SPI:
+ case R600_QUERY_GPIN_NUM_SE:
+ break;
default:
unreachable("r600_query_sw_end: bad query type");
}
case PIPE_QUERY_TIMESTAMP_DISJOINT:
/* Convert from cycles per millisecond to cycles per second (Hz). */
result->timestamp_disjoint.frequency =
- (uint64_t)rctx->screen->info.r600_clock_crystal_freq * 1000;
+ (uint64_t)rctx->screen->info.clock_crystal_freq * 1000;
result->timestamp_disjoint.disjoint = FALSE;
return TRUE;
case PIPE_QUERY_GPU_FINISHED: {
wait ? PIPE_TIMEOUT_INFINITE : 0);
return result->b;
}
+
+ case R600_QUERY_GPIN_ASIC_ID:
+ result->u32 = 0;
+ return TRUE;
+ case R600_QUERY_GPIN_NUM_SIMD:
+ result->u32 = rctx->screen->info.num_good_compute_units;
+ return TRUE;
+ case R600_QUERY_GPIN_NUM_RB:
+ result->u32 = rctx->screen->info.num_render_backends;
+ return TRUE;
+ case R600_QUERY_GPIN_NUM_SPI:
+ result->u32 = 1; /* all supported chips have one SPI per SE */
+ return TRUE;
+ case R600_QUERY_GPIN_NUM_SE:
+ result->u32 = rctx->screen->info.max_se;
+ return TRUE;
}
result->u64 = query->end_result - query->begin_result;
/* Convert the time to expected units. */
if (rquery->type == PIPE_QUERY_TIME_ELAPSED ||
rquery->type == PIPE_QUERY_TIMESTAMP) {
- result->u64 = (1000000 * result->u64) / rctx->screen->info.r600_clock_crystal_freq;
+ result->u64 = (1000000 * result->u64) / rctx->screen->info.clock_crystal_freq;
}
return TRUE;
}
struct radeon_winsys_cs *cs = ctx->gfx.cs;
struct r600_resource *buffer;
uint32_t *results;
- unsigned num_backends = ctx->screen->info.r600_num_backends;
+ unsigned num_backends = ctx->screen->info.num_render_backends;
unsigned i, mask = 0;
/* if backend_map query is supported by the kernel */
- if (ctx->screen->info.r600_backend_map_valid) {
- unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes;
- unsigned backend_map = ctx->screen->info.r600_backend_map;
+ if (ctx->screen->info.r600_gb_backend_map_valid) {
+ unsigned num_tile_pipes = ctx->screen->info.num_tile_pipes;
+ unsigned backend_map = ctx->screen->info.r600_gb_backend_map;
unsigned item_width, item_mask;
if (ctx->chip_class >= EVERGREEN) {
return;
}
-#define X(name_, query_type_, type_, result_type_) \
+#define XFULL(name_, query_type_, type_, result_type_, group_id_) \
{ \
.name = name_, \
.query_type = R600_QUERY_##query_type_, \
.type = PIPE_DRIVER_QUERY_TYPE_##type_, \
.result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
- .group_id = ~(unsigned)0 \
+ .group_id = group_id_ \
}
+#define X(name_, query_type_, type_, result_type_) \
+ XFULL(name_, query_type_, type_, result_type_, ~(unsigned)0)
+
+#define XG(group_, name_, query_type_, type_, result_type_) \
+ XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_)
+
static struct pipe_driver_query_info r600_driver_query_list[] = {
X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
+
+ /* GPIN queries are for the benefit of old versions of GPUPerfStudio,
+ * which use it as a fallback path to detect the GPU type.
+ *
+ * Note: The names of these queries are significant for GPUPerfStudio
+ * (and possibly their order as well). */
+ XG(GPIN, "GPIN_000", GPIN_ASIC_ID, UINT, AVERAGE),
+ XG(GPIN, "GPIN_001", GPIN_NUM_SIMD, UINT, AVERAGE),
+ XG(GPIN, "GPIN_002", GPIN_NUM_RB, UINT, AVERAGE),
+ XG(GPIN, "GPIN_003", GPIN_NUM_SPI, UINT, AVERAGE),
+ XG(GPIN, "GPIN_004", GPIN_NUM_SE, UINT, AVERAGE),
+
+ /* The following queries must be at the end of the list because their
+ * availability is adjusted dynamically based on the DRM version. */
X("GPU-load", GPU_LOAD, UINT64, AVERAGE),
X("temperature", GPU_TEMPERATURE, UINT64, AVERAGE),
X("shader-clock", CURRENT_GPU_SCLK, HZ, AVERAGE),
};
#undef X
+#undef XG
+#undef XFULL
static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
{
break;
}
+ if (info->group_id != ~(unsigned)0 && rscreen->perfcounters)
+ info->group_id += rscreen->perfcounters->num_groups;
+
return 1;
}
+/* Note: Unfortunately, GPUPerfStudio hardcodes the order of hardware
+ * performance counter groups, so be careful when changing this and related
+ * functions.
+ */
static int r600_get_driver_query_group_info(struct pipe_screen *screen,
unsigned index,
struct pipe_driver_query_group_info *info)
{
struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+ unsigned num_pc_groups = 0;
- return r600_get_perfcounter_group_info(rscreen, index, info);
+ if (rscreen->perfcounters)
+ num_pc_groups = rscreen->perfcounters->num_groups;
+
+ if (!info)
+ return num_pc_groups + R600_NUM_SW_QUERY_GROUPS;
+
+ if (index < num_pc_groups)
+ return r600_get_perfcounter_group_info(rscreen, index, info);
+
+ index -= num_pc_groups;
+ if (index >= R600_NUM_SW_QUERY_GROUPS)
+ return 0;
+
+ info->name = "GPIN";
+ info->max_active_queries = 5;
+ info->num_queries = 5;
+ return 1;
}
void r600_query_init(struct r600_common_context *rctx)
rctx->b.get_query_result = r600_get_query_result;
rctx->render_cond_atom.emit = r600_emit_query_predication;
- if (((struct r600_common_screen*)rctx->b.screen)->info.r600_num_backends > 0)
+ if (((struct r600_common_screen*)rctx->b.screen)->info.num_render_backends > 0)
rctx->b.render_condition = r600_render_condition;
LIST_INITHEAD(&rctx->active_nontimer_queries);
#define R600_QUERY_GPU_LOAD (PIPE_QUERY_DRIVER_SPECIFIC + 11)
#define R600_QUERY_NUM_COMPILATIONS (PIPE_QUERY_DRIVER_SPECIFIC + 12)
#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13)
+#define R600_QUERY_GPIN_ASIC_ID (PIPE_QUERY_DRIVER_SPECIFIC + 14)
+#define R600_QUERY_GPIN_NUM_SIMD (PIPE_QUERY_DRIVER_SPECIFIC + 15)
+#define R600_QUERY_GPIN_NUM_RB (PIPE_QUERY_DRIVER_SPECIFIC + 16)
+#define R600_QUERY_GPIN_NUM_SPI (PIPE_QUERY_DRIVER_SPECIFIC + 17)
+#define R600_QUERY_GPIN_NUM_SE (PIPE_QUERY_DRIVER_SPECIFIC + 18)
#define R600_QUERY_FIRST_PERFCOUNTER (PIPE_QUERY_DRIVER_SPECIFIC + 100)
+enum {
+ R600_QUERY_GROUP_GPIN = 0,
+ R600_NUM_SW_QUERY_GROUPS
+};
+
struct r600_query_ops {
void (*destroy)(struct r600_common_context *, struct r600_query *);
boolean (*begin)(struct r600_common_context *, struct r600_query *);
R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
};
-/* Shader enable bits. Chosen to coincide with SQ_PERFCOUNTER_CTRL values */
-enum {
- R600_PC_SHADER_PS = (1 << 0),
- R600_PC_SHADER_VS = (1 << 1),
- R600_PC_SHADER_GS = (1 << 2),
- R600_PC_SHADER_ES = (1 << 3),
- R600_PC_SHADER_HS = (1 << 4),
- R600_PC_SHADER_LS = (1 << 5),
- R600_PC_SHADER_CS = (1 << 6),
-
- R600_PC_SHADER_ALL = R600_PC_SHADER_PS | R600_PC_SHADER_VS |
- R600_PC_SHADER_GS | R600_PC_SHADER_ES |
- R600_PC_SHADER_HS | R600_PC_SHADER_LS |
- R600_PC_SHADER_CS,
-
- R600_PC_SHADER_WINDOWING = (1 << 31),
-};
-
/* Describes a hardware block with performance counters. Multiple instances of
* each block, possibly per-SE, may exist on the chip. Depending on the block
* and on the user's configuration, we either
unsigned num_instance_cs_dwords;
unsigned num_shaders_cs_dwords;
+ unsigned num_shader_types;
+ const char * const *shader_type_suffixes;
+ const unsigned *shader_type_bits;
+
void (*get_size)(struct r600_perfcounter_block *,
unsigned count, unsigned *selectors,
unsigned *num_select_dw, unsigned *num_read_dw);
unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
unsigned element_bits = 4;
unsigned cmask_cache_bits = 1024;
- unsigned num_pipes = rscreen->tiling_info.num_channels;
- unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
+ unsigned num_pipes = rscreen->info.num_tile_pipes;
+ unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
struct r600_texture *rtex,
struct r600_cmask_info *out)
{
- unsigned pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
- unsigned num_pipes = rscreen->tiling_info.num_channels;
+ unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
+ unsigned num_pipes = rscreen->info.num_tile_pipes;
unsigned cl_width, cl_height;
switch (num_pipes) {
{
unsigned cl_width, cl_height, width, height;
unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
- unsigned num_pipes = rscreen->tiling_info.num_channels;
+ unsigned num_pipes = rscreen->info.num_tile_pipes;
if (rscreen->chip_class <= EVERGREEN &&
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
return 0;
+ /* Overalign HTILE on Stoney to fix piglit/depthstencil-render-miplevels 585. */
+ if (rscreen->family == CHIP_STONEY)
+ num_pipes = 4;
+
switch (num_pipes) {
case 1:
cl_width = 32;
slice_elements = (width * height) / (8 * 8);
slice_bytes = slice_elements * 4;
- pipe_interleave_bytes = rscreen->tiling_info.group_bytes;
+ pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
base_align = num_pipes * pipe_interleave_bytes;
rtex->htile.pitch = width;
const struct pipe_surface *templ)
{
unsigned level = templ->u.tex.level;
+ unsigned width = u_minify(tex->width0, level);
+ unsigned height = u_minify(tex->height0, level);
+
+ if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
+ const struct util_format_description *tex_desc
+ = util_format_description(tex->format);
+ const struct util_format_description *templ_desc
+ = util_format_description(templ->format);
+
+ assert(tex_desc->block.bits == templ_desc->block.bits);
+
+ /* Adjust size of surface if and only if the block width or
+ * height is changed. */
+ if (tex_desc->block.width != templ_desc->block.width ||
+ tex_desc->block.height != templ_desc->block.height) {
+ unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
+ unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
+
+ width = nblks_x * templ_desc->block.width;
+ height = nblks_y * templ_desc->block.height;
+ }
+ }
- return r600_create_surface_custom(pipe, tex, templ,
- u_minify(tex->width0, level),
- u_minify(tex->height0, level));
+ return r600_create_surface_custom(pipe, tex, templ, width, height);
}
static void r600_surface_destroy(struct pipe_context *pipe,
return;
for (i = 0; i < fb->nr_cbufs; i++) {
- struct r600_surface *surf;
struct r600_texture *tex;
unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
if (!(*buffers & clear_bit))
continue;
- surf = (struct r600_surface *)fb->cbufs[i];
tex = (struct r600_texture *)fb->cbufs[i]->texture;
/* 128-bit formats are unusupported */
if (clear_words_needed)
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
} else {
- /* RB+ doesn't work with CMASK fast clear. */
- if (surf->sx_ps_downconvert)
+ /* Stoney/RB+ doesn't work with CMASK fast clear. */
+ if (rctx->family == CHIP_STONEY)
continue;
/* ensure CMASK is enabled */
emit_data->args[1], "");
}
+static void pk2h_fetch_args(struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_X);
+ emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_Y);
+}
+
+static void emit_pk2h(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMContextRef context = bld_base->base.gallivm->context;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ LLVMTypeRef fp16, i16;
+ LLVMValueRef const16, comp[2];
+ unsigned i;
+
+ fp16 = LLVMHalfTypeInContext(context);
+ i16 = LLVMInt16TypeInContext(context);
+ const16 = lp_build_const_int32(uint_bld->gallivm, 16);
+
+ for (i = 0; i < 2; i++) {
+ comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, "");
+ comp[i] = LLVMBuildBitCast(builder, comp[i], i16, "");
+ comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, "");
+ }
+
+ comp[1] = LLVMBuildShl(builder, comp[1], const16, "");
+ comp[0] = LLVMBuildOr(builder, comp[0], comp[1], "");
+
+ emit_data->output[emit_data->chan] = comp[0];
+}
+
+static void up2h_fetch_args(struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+ 0, TGSI_CHAN_X);
+}
+
+static void emit_up2h(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMContextRef context = bld_base->base.gallivm->context;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ LLVMTypeRef fp16, i16;
+ LLVMValueRef const16, input, val;
+ unsigned i;
+
+ fp16 = LLVMHalfTypeInContext(context);
+ i16 = LLVMInt16TypeInContext(context);
+ const16 = lp_build_const_int32(uint_bld->gallivm, 16);
+ input = emit_data->args[0];
+
+ for (i = 0; i < 2; i++) {
+ val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
+ val = LLVMBuildTrunc(builder, val, i16, "");
+ val = LLVMBuildBitCast(builder, val, fp16, "");
+ emit_data->output[i] =
+ LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
+ }
+}
+
void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
{
struct lp_type type;
bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
+ bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
+ bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
+ bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
}
void radeon_llvm_create_func(struct radeon_llvm_context * ctx,
void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx)
{
struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm;
- /* End the main function with Return*/
- LLVMBuildRetVoid(gallivm->builder);
/* Create the pass manager */
- ctx->gallivm.passmgr = LLVMCreateFunctionPassManagerForModule(
+ gallivm->passmgr = LLVMCreateFunctionPassManagerForModule(
gallivm->module);
/* This pass should eliminate all the load and store instructions */
};
struct radeon_info {
+ /* Device info. */
uint32_t pci_id;
enum radeon_family family;
enum chip_class chip_class;
uint64_t gart_size;
uint64_t vram_size;
- uint32_t max_sclk;
- uint32_t num_good_compute_units;
- uint32_t max_se;
- uint32_t max_sh_per_se;
+ boolean has_virtual_memory;
+ bool gfx_ib_pad_with_type2;
+ boolean has_sdma;
+ boolean has_uvd;
+ uint32_t vce_fw_version;
+ uint32_t vce_harvest_config;
+ uint32_t clock_crystal_freq;
+ /* Kernel info. */
uint32_t drm_major; /* version */
uint32_t drm_minor;
uint32_t drm_patchlevel;
-
- boolean has_uvd;
- uint32_t vce_fw_version;
boolean has_userptr;
- bool gfx_ib_pad_with_type2;
+ /* Shader cores. */
+ uint32_t r600_max_quad_pipes; /* wave size / 16 */
+ uint32_t max_shader_clock;
+ uint32_t num_good_compute_units;
+ uint32_t max_se; /* shader engines */
+ uint32_t max_sh_per_se; /* shader arrays per shader engine */
+
+ /* Render backends (color + depth blocks). */
uint32_t r300_num_gb_pipes;
uint32_t r300_num_z_pipes;
-
- uint32_t r600_num_backends;
- uint32_t r600_clock_crystal_freq;
- uint32_t r600_tiling_config;
- uint32_t r600_num_tile_pipes;
- uint32_t r600_max_pipes;
- boolean r600_virtual_address;
- boolean r600_has_dma;
-
- uint32_t r600_backend_map;
- boolean r600_backend_map_valid;
-
+ uint32_t r600_gb_backend_map; /* R600 harvest config */
+ boolean r600_gb_backend_map_valid;
+ uint32_t r600_num_banks;
+ uint32_t num_render_backends;
+ uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */
+ uint32_t pipe_interleave_bytes;
+ uint32_t enabled_rb_mask; /* GCN harvest config */
+
+ /* Tile modes. */
boolean si_tile_mode_array_valid;
uint32_t si_tile_mode_array[32];
- uint32_t si_backend_enabled_mask;
-
boolean cik_macrotile_mode_array_valid;
uint32_t cik_macrotile_mode_array[16];
- uint32_t vce_harvest_config;
};
enum radeon_feature_id {
}
mtilew = (8 * rsrc->surface.bankw *
- sctx->screen->b.tiling_info.num_channels) *
+ sctx->screen->b.info.num_tile_pipes) *
rsrc->surface.mtilea;
assert(!(mtilew & (mtilew - 1)));
mtileh = (8 * rsrc->surface.bankh * num_banks) /
LLVMContextDispose(program->llvm_ctx);
}
#else
- FREE(program->shader.binary.config);
- FREE(program->shader.binary.rodata);
- FREE(program->shader.binary.global_symbol_offsets);
si_shader_destroy(&program->shader);
#endif
si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs);
si_mark_atom_dirty(ctx, &ctx->msaa_config);
si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
- si_mark_atom_dirty(ctx, &ctx->cb_target_mask);
+ si_mark_atom_dirty(ctx, &ctx->cb_render_state);
si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
si_mark_atom_dirty(ctx, &ctx->db_render_state);
si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
/* Registers are laid out in decreasing rather than increasing order. */
SI_PC_REG_REVERSE = 4,
+
+ SI_PC_FAKE = 8,
};
struct si_pc_block_base {
unsigned instances;
};
+/* The order is chosen to be compatible with GPUPerfStudio's hardcoding of
+ * performance counter group IDs.
+ */
+static const char * const si_pc_shader_type_suffixes[] = {
+ "", "_ES", "_GS", "_VS", "_PS", "_LS", "_HS", "_CS"
+};
+
+static const unsigned si_pc_shader_type_bits[] = {
+ 0x7f,
+ S_036780_ES_EN(1),
+ S_036780_GS_EN(1),
+ S_036780_VS_EN(1),
+ S_036780_PS_EN(1),
+ S_036780_LS_EN(1),
+ S_036780_HS_EN(1),
+ S_036780_CS_EN(1),
+};
static struct si_pc_block_base cik_CB = {
.name = "CB",
.counter0_lo = R_034200_WD_PERFCOUNTER0_LO,
};
+static struct si_pc_block_base cik_MC = {
+ .name = "MC",
+ .num_counters = 4,
+
+ .layout = SI_PC_FAKE,
+};
+
+static struct si_pc_block_base cik_SRBM = {
+ .name = "SRBM",
+ .num_counters = 2,
+
+ .layout = SI_PC_FAKE,
+};
+
/* Both the number of instances and selectors varies between chips of the same
* class. We only differentiate by class here and simply expose the maximum
* number over all chips in a class.
+ *
+ * Unfortunately, GPUPerfStudio uses the order of performance counter groups
+ * blindly once it believes it has identified the hardware, so the order of
+ * blocks here matters.
*/
static struct si_pc_block groups_CIK[] = {
{ &cik_CB, 226, 4 },
- { &cik_CPC, 22 },
{ &cik_CPF, 17 },
- { &cik_CPG, 46 },
{ &cik_DB, 257, 4 },
- { &cik_GDS, 121 },
{ &cik_GRBM, 34 },
{ &cik_GRBMSE, 15 },
- { &cik_IA, 22 },
- { &cik_PA_SC, 395 },
{ &cik_PA_SU, 153 },
+ { &cik_PA_SC, 395 },
{ &cik_SPI, 186 },
{ &cik_SQ, 252 },
{ &cik_SX, 32 },
{ &cik_TA, 111, 11 },
{ &cik_TCA, 39, 2 },
{ &cik_TCC, 160, 16 },
- { &cik_TCP, 154, 11 },
{ &cik_TD, 55, 11 },
+ { &cik_TCP, 154, 11 },
+ { &cik_GDS, 121 },
{ &cik_VGT, 140 },
+ { &cik_IA, 22 },
+ { &cik_MC, 22 },
+ { &cik_SRBM, 19 },
{ &cik_WD, 22 },
+ { &cik_CPG, 46 },
+ { &cik_CPC, 22 },
+
};
static struct si_pc_block groups_VI[] = {
{ &cik_CB, 396, 4 },
- { &cik_CPC, 24 },
{ &cik_CPF, 19 },
- { &cik_CPG, 48 },
{ &cik_DB, 257, 4 },
- { &cik_GDS, 121 },
{ &cik_GRBM, 34 },
{ &cik_GRBMSE, 15 },
- { &cik_IA, 24 },
- { &cik_PA_SC, 397 },
{ &cik_PA_SU, 153 },
+ { &cik_PA_SC, 397 },
{ &cik_SPI, 197 },
{ &cik_SQ, 273 },
{ &cik_SX, 34 },
{ &cik_TA, 119, 16 },
{ &cik_TCA, 35, 2 },
{ &cik_TCC, 192, 16 },
- { &cik_TCP, 180, 16 },
{ &cik_TD, 55, 16 },
+ { &cik_TCP, 180, 16 },
+ { &cik_GDS, 121 },
{ &cik_VGT, 147 },
+ { &cik_IA, 24 },
+ { &cik_MC, 22 },
+ { &cik_SRBM, 27 },
{ &cik_WD, 37 },
+ { &cik_CPG, 48 },
+ { &cik_CPC, 24 },
+
};
static void si_pc_get_size(struct r600_perfcounter_block *group,
struct si_pc_block_base *regs = sigroup->b;
unsigned layout_multi = regs->layout & SI_PC_MULTI_MASK;
- if (layout_multi == SI_PC_MULTI_BLOCK) {
+ if (regs->layout & SI_PC_FAKE) {
+ *num_select_dw = 0;
+ } else if (layout_multi == SI_PC_MULTI_BLOCK) {
if (count < regs->num_multi)
*num_select_dw = 2 * (count + 2) + regs->num_prelude;
else
assert(count <= regs->num_counters);
+ if (regs->layout & SI_PC_FAKE)
+ return;
+
if (layout_multi == SI_PC_MULTI_BLOCK) {
assert(!(regs->layout & SI_PC_REG_REVERSE));
unsigned reg = regs->counter0_lo;
unsigned reg_delta = 8;
- if (regs->layout & SI_PC_REG_REVERSE)
- reg_delta = -reg_delta;
+ if (!(regs->layout & SI_PC_FAKE)) {
+ if (regs->layout & SI_PC_REG_REVERSE)
+ reg_delta = -reg_delta;
- for (idx = 0; idx < count; ++idx) {
- if (regs->counters)
- reg = regs->counters[idx];
-
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
- COPY_DATA_DST_SEL(COPY_DATA_MEM));
- radeon_emit(cs, reg >> 2);
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- va += 4;
- reg += reg_delta;
+ for (idx = 0; idx < count; ++idx) {
+ if (regs->counters)
+ reg = regs->counters[idx];
+
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
+ COPY_DATA_DST_SEL(COPY_DATA_MEM));
+ radeon_emit(cs, reg >> 2);
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ va += 4;
+ reg += reg_delta;
+ }
+ } else {
+ for (idx = 0; idx < count; ++idx) {
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
+ COPY_DATA_DST_SEL(COPY_DATA_MEM));
+ radeon_emit(cs, 0); /* immediate */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ va += 4;
+ }
}
}
pc->num_stop_cs_dwords += 6;
}
+ pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits);
+ pc->shader_type_suffixes = si_pc_shader_type_suffixes;
+ pc->shader_type_bits = si_pc_shader_type_bits;
+
pc->get_size = si_pc_get_size;
pc->emit_instance = si_pc_emit_instance;
pc->emit_shaders = si_pc_emit_shaders;
r600_target = radeon_llvm_get_r600_target(triple);
sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
r600_get_llvm_processor_name(sscreen->b.family),
- "+DumpCode,+vgpr-spilling",
+#if HAVE_LLVM >= 0x0308
+ sscreen->b.debug_flags & DBG_SI_SCHED ?
+ "+DumpCode,+vgpr-spilling,+si-scheduler" :
+#endif
+ "+DumpCode,+vgpr-spilling",
LLVMCodeGenLevelDefault,
LLVMRelocDefault,
LLVMCodeModelDefault);
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
case PIPE_CAP_INVALIDATE_BUFFER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 1;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
return 4;
+ case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+ return HAVE_LLVM >= 0x0306;
+
case PIPE_CAP_GLSL_FEATURE_LEVEL:
return HAVE_LLVM >= 0x0307 ? 410 : 330;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
+
/* Unsupported features. */
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_CLEAR_TEXTURE:
case PIPE_CAP_DRAW_PARAMETERS:
- case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
/* Timer queries, present when the clock frequency is non zero. */
case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_QUERY_TIME_ELAPSED:
- return sscreen->b.info.r600_clock_crystal_freq != 0;
+ return sscreen->b.info.clock_crystal_freq != 0;
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MIN_TEXEL_OFFSET:
r600_destroy_common_screen(&sscreen->b);
}
-#define SI_TILE_MODE_COLOR_2D_8BPP 14
-
-/* Initialize pipe config. This is especially important for GPUs
- * with 16 pipes and more where it's initialized incorrectly by
- * the TILING_CONFIG ioctl. */
-static bool si_initialize_pipe_config(struct si_screen *sscreen)
-{
- unsigned mode2d;
-
- /* This is okay, because there can be no 2D tiling without
- * the tile mode array, so we won't need the pipe config.
- * Return "success".
- */
- if (!sscreen->b.info.si_tile_mode_array_valid)
- return true;
-
- /* The same index is used for the 2D mode on CIK too. */
- mode2d = sscreen->b.info.si_tile_mode_array[SI_TILE_MODE_COLOR_2D_8BPP];
-
- switch (G_009910_PIPE_CONFIG(mode2d)) {
- case V_02803C_ADDR_SURF_P2:
- sscreen->b.tiling_info.num_channels = 2;
- break;
- case V_02803C_X_ADDR_SURF_P4_8X16:
- case V_02803C_X_ADDR_SURF_P4_16X16:
- case V_02803C_X_ADDR_SURF_P4_16X32:
- case V_02803C_X_ADDR_SURF_P4_32X32:
- sscreen->b.tiling_info.num_channels = 4;
- break;
- case V_02803C_X_ADDR_SURF_P8_16X16_8X16:
- case V_02803C_X_ADDR_SURF_P8_16X32_8X16:
- case V_02803C_X_ADDR_SURF_P8_32X32_8X16:
- case V_02803C_X_ADDR_SURF_P8_16X32_16X16:
- case V_02803C_X_ADDR_SURF_P8_32X32_16X16:
- case V_02803C_X_ADDR_SURF_P8_32X32_16X32:
- case V_02803C_X_ADDR_SURF_P8_32X64_32X32:
- sscreen->b.tiling_info.num_channels = 8;
- break;
- case V_02803C_X_ADDR_SURF_P16_32X32_8X16:
- case V_02803C_X_ADDR_SURF_P16_32X32_16X16:
- sscreen->b.tiling_info.num_channels = 16;
- break;
- default:
- assert(0);
- fprintf(stderr, "radeonsi: Unknown pipe config %i.\n",
- G_009910_PIPE_CONFIG(mode2d));
- return false;
- }
- return true;
-}
-
static bool si_init_gs_info(struct si_screen *sscreen)
{
switch (sscreen->b.family) {
sscreen->b.b.resource_create = r600_resource_create_common;
if (!r600_common_screen_init(&sscreen->b, ws) ||
- !si_initialize_pipe_config(sscreen) ||
!si_init_gs_info(sscreen)) {
FREE(sscreen);
return NULL;
struct r600_atom db_render_state;
struct r600_atom msaa_config;
struct si_sample_mask sample_mask;
- struct r600_atom cb_target_mask;
+ struct r600_atom cb_render_state;
struct si_blend_color blend_color;
struct r600_atom clip_regs;
struct si_clip_state clip_state;
si_shader_dump_disassembly(&shader->binary, debug);
si_shader_dump_stats(sscreen, &shader->config,
- shader->selector->info.num_inputs,
+ shader->selector ? shader->selector->info.num_inputs : 0,
shader->binary.code_size, debug, processor);
}
if (r600_can_dump_shader(&sscreen->b, processor)) {
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
- if (!(sscreen->b.debug_flags & DBG_NO_IR))
+ if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR)))
LLVMDumpModule(mod);
}
si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
+ LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+
+ /* Dump LLVM IR before any optimization passes */
+ if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
+ r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY))
+ LLVMDumpModule(bld_base->base.gallivm->module);
+
radeon_llvm_finalize_module(&si_shader_ctx->radeon_bld);
if (dump)
goto out;
}
+ LLVMBuildRetVoid(bld_base->base.gallivm->builder);
+ mod = bld_base->base.gallivm->module;
+
+ /* Dump LLVM IR before any optimization passes */
+ if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
+ r600_can_dump_shader(&sscreen->b, si_shader_ctx.type))
+ LLVMDumpModule(mod);
+
radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
- mod = bld_base->base.gallivm->module;
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
mod, debug, si_shader_ctx.type);
if (r) {
return r;
}
-void si_shader_destroy_binary(struct radeon_shader_binary *binary)
-{
- FREE(binary->code);
- FREE(binary->rodata);
- FREE(binary->relocs);
- FREE(binary->disasm_string);
-}
-
void si_shader_destroy(struct si_shader *shader)
{
if (shader->gs_copy_shader) {
r600_resource_reference(&shader->scratch_bo, NULL);
r600_resource_reference(&shader->bo, NULL);
- si_shader_destroy_binary(&shader->binary);
+
+ radeon_shader_binary_clean(&shader->binary);
}
struct pipe_debug_callback *debug,
unsigned processor);
void si_shader_destroy(struct si_shader *shader);
-void si_shader_destroy_binary(struct radeon_shader_binary *binary);
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
}
/* The old way. */
- switch (sscreen->b.tiling_info.num_banks) {
+ switch (sscreen->b.info.r600_num_banks) {
case 2:
return V_02803C_ADDR_SURF_2_BANK;
case 4:
/* This is probably broken for a lot of chips, but it's only used
* if the kernel cannot return the tile mode array for CIK. */
- switch (sscreen->b.info.r600_num_tile_pipes) {
+ switch (sscreen->b.info.num_tile_pipes) {
case 16:
return V_02803C_X_ADDR_SURF_P16_32X32_16X16;
case 8:
return V_02803C_X_ADDR_SURF_P8_32X32_16X16;
case 4:
default:
- if (sscreen->b.info.r600_num_backends == 4)
+ if (sscreen->b.info.num_render_backends == 4)
return V_02803C_X_ADDR_SURF_P4_16X16;
else
return V_02803C_X_ADDR_SURF_P4_8X16;
/*
* Inferred framebuffer and blender state.
*
- * One of the reasons this must be derived from the framebuffer state is that:
+ * One of the reasons CB_TARGET_MASK must be derived from the framebuffer state
+ * is that:
* - The blend state mask is 0xf most of the time.
* - The COLOR1 format isn't INVALID because of possible dual-source blending,
* so COLOR1 is enabled pretty much all the time.
*
* Another reason is to avoid a hang with dual source blending.
*/
-static void si_emit_cb_target_mask(struct si_context *sctx, struct r600_atom *atom)
+static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_state_blend *blend = sctx->queued.named.blend;
- uint32_t mask = 0, i;
+ uint32_t cb_target_mask = 0, i;
for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++)
if (sctx->framebuffer.state.cbufs[i])
- mask |= 0xf << (4*i);
+ cb_target_mask |= 0xf << (4*i);
if (blend)
- mask &= blend->cb_target_mask;
+ cb_target_mask &= blend->cb_target_mask;
/* Avoid a hang that happens when dual source blending is enabled
* but there is not enough color outputs. This is undefined behavior,
if (blend && blend->dual_src_blend &&
sctx->ps_shader.cso &&
(sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
- mask = 0;
+ cb_target_mask = 0;
- radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, mask);
+ radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask);
+
+ /* STONEY-specific register settings. */
+ if (sctx->b.family == CHIP_STONEY) {
+ unsigned spi_shader_col_format =
+ sctx->ps_shader.cso ?
+ sctx->ps_shader.current->key.ps.spi_shader_col_format : 0;
+ unsigned sx_ps_downconvert = 0;
+ unsigned sx_blend_opt_epsilon = 0;
+ unsigned sx_blend_opt_control = 0;
+
+ for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
+ struct r600_surface *surf =
+ (struct r600_surface*)sctx->framebuffer.state.cbufs[i];
+ unsigned format, swap, spi_format, colormask;
+ bool has_alpha, has_rgb;
+
+ if (!surf)
+ continue;
+
+ format = G_028C70_FORMAT(surf->cb_color_info);
+ swap = G_028C70_COMP_SWAP(surf->cb_color_info);
+ spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
+ colormask = (cb_target_mask >> (i * 4)) & 0xf;
+
+ /* Set if RGB and A are present. */
+ has_alpha = !G_028C74_FORCE_DST_ALPHA_1(surf->cb_color_attrib);
+
+ if (format == V_028C70_COLOR_8 ||
+ format == V_028C70_COLOR_16 ||
+ format == V_028C70_COLOR_32)
+ has_rgb = !has_alpha;
+ else
+ has_rgb = true;
+
+ /* Check the colormask and export format. */
+ if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A)))
+ has_rgb = false;
+ if (!(colormask & PIPE_MASK_A))
+ has_alpha = false;
+
+ if (spi_format == V_028714_SPI_SHADER_ZERO) {
+ has_rgb = false;
+ has_alpha = false;
+ }
+
+ /* Disable value checking for disabled channels. */
+ if (!has_rgb)
+ sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
+ if (!has_alpha)
+ sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
+
+ /* Enable down-conversion for 32bpp and smaller formats. */
+ switch (format) {
+ case V_028C70_COLOR_8:
+ case V_028C70_COLOR_8_8:
+ case V_028C70_COLOR_8_8_8_8:
+ /* For 1 and 2-channel formats, use the superset thereof. */
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_5_6_5:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_1_5_5_5:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_4_4_4_4:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_32:
+ if (swap == V_0280A0_SWAP_STD &&
+ spi_format == V_028714_SPI_SHADER_32_R)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
+ else if (swap == V_0280A0_SWAP_ALT_REV &&
+ spi_format == V_028714_SPI_SHADER_32_AR)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
+ break;
+
+ case V_028C70_COLOR_16:
+ case V_028C70_COLOR_16_16:
+ /* For 1-channel formats, use the superset thereof. */
+ if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+ if (swap == V_0280A0_SWAP_STD ||
+ swap == V_0280A0_SWAP_STD_REV)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
+ else
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_10_11_11:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_2_10_10_10:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
+ }
+ break;
+ }
+ }
+
+ if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) {
+ sx_ps_downconvert = 0;
+ sx_blend_opt_epsilon = 0;
+ sx_blend_opt_control = 0;
+ }
+
+ radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
+ radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */
+ radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */
+ radeon_emit(cs, sx_blend_opt_control); /* R_02875C_SX_BLEND_OPT_CONTROL */
+ }
}
/*
}
}
+/**
+ * Get rid of DST in the blend factors by commuting the operands:
+ * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
+ */
+static void si_blend_remove_dst(unsigned *func, unsigned *src_factor,
+ unsigned *dst_factor, unsigned expected_dst,
+ unsigned replacement_src)
+{
+ if (*src_factor == expected_dst &&
+ *dst_factor == PIPE_BLENDFACTOR_ZERO) {
+ *src_factor = PIPE_BLENDFACTOR_ZERO;
+ *dst_factor = replacement_src;
+
+ /* Commuting the operands requires reversing subtractions. */
+ if (*func == PIPE_BLEND_SUBTRACT)
+ *func = PIPE_BLEND_REVERSE_SUBTRACT;
+ else if (*func == PIPE_BLEND_REVERSE_SUBTRACT)
+ *func = PIPE_BLEND_SUBTRACT;
+ }
+}
+
+static bool si_blend_factor_uses_dst(unsigned factor)
+{
+ return factor == PIPE_BLENDFACTOR_DST_COLOR ||
+ factor == PIPE_BLENDFACTOR_DST_ALPHA ||
+ factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ factor == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
+ factor == PIPE_BLENDFACTOR_INV_DST_COLOR;
+}
+
static void *si_create_blend_state_mode(struct pipe_context *ctx,
const struct pipe_blend_state *state,
unsigned mode)
struct si_context *sctx = (struct si_context*)ctx;
struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
struct si_pm4_state *pm4 = &blend->pm4;
-
+ uint32_t sx_mrt_blend_opt[8] = {0};
uint32_t color_control = 0;
if (!blend)
unsigned srcA = state->rt[j].alpha_src_factor;
unsigned dstA = state->rt[j].alpha_dst_factor;
+ unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
unsigned blend_cntl = 0;
+ sx_mrt_blend_opt[i] =
+ S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
+ S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+
if (!state->rt[j].colormask)
continue;
- /* we pretend 8 buffer are used, CB_SHADER_MASK will disable unused one */
+ /* cb_render_state will disable unused ones */
blend->cb_target_mask |= state->rt[j].colormask << (4 * i);
if (!state->rt[j].blend_enable) {
continue;
}
+ /* Blending optimizations for Stoney.
+ * These transformations don't change the behavior.
+ *
+ * First, get rid of DST in the blend factors:
+ * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
+ */
+ si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
+ PIPE_BLENDFACTOR_DST_COLOR,
+ PIPE_BLENDFACTOR_SRC_COLOR);
+ si_blend_remove_dst(&eqA, &srcA, &dstA,
+ PIPE_BLENDFACTOR_DST_COLOR,
+ PIPE_BLENDFACTOR_SRC_COLOR);
+ si_blend_remove_dst(&eqA, &srcA, &dstA,
+ PIPE_BLENDFACTOR_DST_ALPHA,
+ PIPE_BLENDFACTOR_SRC_ALPHA);
+
+ /* Look up the ideal settings from tables. */
+ srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
+ dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
+ srcA_opt = si_translate_blend_opt_factor(srcA, true);
+ dstA_opt = si_translate_blend_opt_factor(dstA, true);
+
+ /* Handle interdependencies. */
+ if (si_blend_factor_uses_dst(srcRGB))
+ dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+ if (si_blend_factor_uses_dst(srcA))
+ dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+
+ if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
+ (dstRGB == PIPE_BLENDFACTOR_ZERO ||
+ dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE))
+ dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
+
+ /* Set the final value. */
+ sx_mrt_blend_opt[i] =
+ S_028760_COLOR_SRC_OPT(srcRGB_opt) |
+ S_028760_COLOR_DST_OPT(dstRGB_opt) |
+ S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
+ S_028760_ALPHA_SRC_OPT(srcA_opt) |
+ S_028760_ALPHA_DST_OPT(dstA_opt) |
+ S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
+
+ /* Set blend state. */
blend_cntl |= S_028780_ENABLE(1);
blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
}
if (sctx->b.family == CHIP_STONEY) {
- uint32_t sx_blend_opt_control = 0;
-
- for (int i = 0; i < 8; i++) {
- const int j = state->independent_blend_enable ? i : 0;
-
- /* TODO: We can also set this if the surface doesn't contain RGB. */
- if (!state->rt[j].blend_enable ||
- !(state->rt[j].colormask & (PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B)))
- sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (4 * i);
-
- /* TODO: We can also set this if the surface doesn't contain alpha. */
- if (!state->rt[j].blend_enable ||
- !(state->rt[j].colormask & PIPE_MASK_A))
- sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (4 * i);
-
- if (!state->rt[j].blend_enable) {
- si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
- S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
- S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED));
- continue;
- }
-
+ for (int i = 0; i < 8; i++)
si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
- S_028760_COLOR_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_src_factor, false)) |
- S_028760_COLOR_DST_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_dst_factor, false)) |
- S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(state->rt[j].rgb_func)) |
- S_028760_ALPHA_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_src_factor, true)) |
- S_028760_ALPHA_DST_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_dst_factor, true)) |
- S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(state->rt[j].alpha_func)));
- }
-
- si_pm4_set_reg(pm4, R_02875C_SX_BLEND_OPT_CONTROL, sx_blend_opt_control);
+ sx_mrt_blend_opt[i]);
- /* RB+ doesn't work with dual source blending */
- if (blend->dual_src_blend)
+ /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
+ if (blend->dual_src_blend || state->logicop_enable ||
+ mode == V_028808_CB_RESOLVE)
color_control |= S_028808_DISABLE_DUAL_QUAD(1);
}
{
struct si_context *sctx = (struct si_context *)ctx;
si_pm4_bind_state(sctx, blend, (struct si_state_blend *)state);
- si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
}
static void si_delete_blend_state(struct pipe_context *ctx, void *state)
color_pitch = S_028C64_TILE_MAX(pitch);
+ /* Intensity is implemented as Red, so treat it that way. */
color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
- S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1);
+ S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1 ||
+ util_format_is_intensity(surf->base.format));
if (rtex->resource.b.b.nr_samples > 1) {
unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
/* Determine pixel shader export format */
si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
- if (sctx->b.family == CHIP_STONEY &&
- !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
- switch (desc->channel[0].size) {
- case 32:
- if (desc->nr_channels == 1) {
- if (swap == V_0280A0_SWAP_STD)
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R;
- else if (swap == V_0280A0_SWAP_ALT_REV)
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_A;
- }
- break;
- case 16:
- /* For 1-channel formats, use the superset thereof. */
- if (desc->nr_channels <= 2) {
- if (swap == V_0280A0_SWAP_STD ||
- swap == V_0280A0_SWAP_STD_REV)
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_GR;
- else
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_AR;
- }
- break;
- case 11:
- if (desc->nr_channels == 3) {
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_10_11_11;
- surf->sx_blend_opt_epsilon = V_028758_11BIT_FORMAT;
- }
- break;
- case 10:
- if (desc->nr_channels == 4) {
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_2_10_10_10;
- surf->sx_blend_opt_epsilon = V_028758_10BIT_FORMAT;
- }
- break;
- case 8:
- /* For 1 and 2-channel formats, use the superset thereof. */
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_8_8_8_8;
- surf->sx_blend_opt_epsilon = V_028758_8BIT_FORMAT;
- break;
- case 5:
- if (desc->nr_channels == 3) {
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_5_6_5;
- surf->sx_blend_opt_epsilon = V_028758_6BIT_FORMAT;
- } else if (desc->nr_channels == 4) {
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_1_5_5_5;
- surf->sx_blend_opt_epsilon = V_028758_5BIT_FORMAT;
- }
- break;
- case 4:
- /* For 1 nad 2-channel formats, use the superset thereof. */
- surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_4_4_4_4;
- surf->sx_blend_opt_epsilon = V_028758_4BIT_FORMAT;
- break;
- }
- }
-
surf->color_initialized = true;
}
}
si_update_poly_offset_state(sctx);
- si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
if (sctx->framebuffer.nr_samples != old_nr_samples) {
unsigned i, nr_cbufs = state->nr_cbufs;
struct r600_texture *tex = NULL;
struct r600_surface *cb = NULL;
- uint32_t sx_ps_downconvert = 0;
- uint32_t sx_blend_opt_epsilon = 0;
/* Colorbuffers. */
for (i = 0; i < nr_cbufs; i++) {
if (sctx->b.chip_class >= VI)
radeon_emit(cs, cb->cb_dcc_base); /* R_028C94_CB_COLOR0_DCC_BASE */
-
- sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
- sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
}
/* set CB_COLOR1_INFO for possible dual-src blending */
if (i == 1 && state->cbufs[0] &&
sctx->framebuffer.dirty_cbufs & (1 << 0)) {
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
cb->cb_color_info | tex->cb_color_info);
- sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
- sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
i++;
}
for (; i < 8 ; i++)
if (sctx->framebuffer.dirty_cbufs & (1 << i))
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
- if (sctx->b.family == CHIP_STONEY) {
- radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 2);
- radeon_emit(cs, sx_ps_downconvert); /* R_028754_SX_PS_DOWNCONVERT */
- radeon_emit(cs, sx_blend_opt_epsilon); /* R_028758_SX_BLEND_OPT_EPSILON */
- }
-
/* ZS buffer. */
if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
si_init_atom(sctx, &sctx->db_render_state, &sctx->atoms.s.db_render_state, si_emit_db_render_state);
si_init_atom(sctx, &sctx->msaa_config, &sctx->atoms.s.msaa_config, si_emit_msaa_config);
si_init_atom(sctx, &sctx->sample_mask.atom, &sctx->atoms.s.sample_mask, si_emit_sample_mask);
- si_init_atom(sctx, &sctx->cb_target_mask, &sctx->atoms.s.cb_target_mask, si_emit_cb_target_mask);
+ si_init_atom(sctx, &sctx->cb_render_state, &sctx->atoms.s.cb_render_state, si_emit_cb_render_state);
si_init_atom(sctx, &sctx->blend_color.atom, &sctx->atoms.s.blend_color, si_emit_blend_color);
si_init_atom(sctx, &sctx->clip_regs, &sctx->atoms.s.clip_regs, si_emit_clip_regs);
si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
{
unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1);
unsigned num_se = MAX2(sctx->screen->b.info.max_se, 1);
- unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
- unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
+ unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
+ unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
unsigned rb_per_pkr = MIN2(num_rb / num_se / sh_per_se, 2);
unsigned rb_per_se = num_rb / num_se;
unsigned se_mask[4];
static void si_init_config(struct si_context *sctx)
{
struct si_screen *sscreen = sctx->screen;
- unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
- unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
+ unsigned num_rb = MIN2(sctx->screen->b.info.num_render_backends, 16);
+ unsigned rb_mask = sctx->screen->b.info.enabled_rb_mask;
unsigned raster_config, raster_config_1;
uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
struct r600_atom *db_render_state;
struct r600_atom *msaa_config;
struct r600_atom *sample_mask;
- struct r600_atom *cb_target_mask;
+ struct r600_atom *cb_render_state;
struct r600_atom *blend_color;
struct r600_atom *clip_regs;
struct r600_atom *clip_state;
}
/* Select the hw shader variant depending on the current state. */
-static int si_shader_select(struct pipe_context *ctx,
- struct si_shader_ctx_state *state)
+static int si_shader_select_with_key(struct pipe_context *ctx,
+ struct si_shader_ctx_state *state,
+ union si_shader_key *key)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_selector *sel = state->cso;
struct si_shader *current = state->current;
- union si_shader_key key;
struct si_shader *iter, *shader = NULL;
int r;
- si_shader_selector_key(ctx, sel, &key);
-
/* Check if we don't need to change anything.
* This path is also used for most shaders that don't need multiple
* variants, it will cost just a computation of the key and this
* test. */
- if (likely(current && memcmp(¤t->key, &key, sizeof(key)) == 0))
+ if (likely(current && memcmp(¤t->key, key, sizeof(*key)) == 0))
return 0;
pipe_mutex_lock(sel->mutex);
for (iter = sel->first_variant; iter; iter = iter->next_variant) {
/* Don't check the "current" shader. We checked it above. */
if (current != iter &&
- memcmp(&iter->key, &key, sizeof(key)) == 0) {
+ memcmp(&iter->key, key, sizeof(*key)) == 0) {
state->current = iter;
pipe_mutex_unlock(sel->mutex);
return 0;
return -ENOMEM;
}
shader->selector = sel;
- shader->key = key;
+ shader->key = *key;
r = si_shader_create(sctx->screen, sctx->tm, shader, &sctx->b.debug);
if (unlikely(r)) {
return 0;
}
+static int si_shader_select(struct pipe_context *ctx,
+ struct si_shader_ctx_state *state)
+{
+ union si_shader_key key;
+
+ si_shader_selector_key(ctx, state->cso, &key);
+ return si_shader_select_with_key(ctx, state, &key);
+}
+
static void *si_create_shader_selector(struct pipe_context *ctx,
const struct pipe_shader_state *state)
{
/* Pre-compilation. */
if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
struct si_shader_ctx_state state = {sel};
+ union si_shader_key key;
- if (si_shader_select(ctx, &state)) {
+ memset(&key, 0, sizeof(key));
+
+ /* Set reasonable defaults, so that the shader key doesn't
+ * cause any code to be eliminated.
+ */
+ switch (sel->type) {
+ case PIPE_SHADER_TESS_CTRL:
+ key.tcs.prim_mode = PIPE_PRIM_TRIANGLES;
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ key.ps.alpha_func = PIPE_FUNC_ALWAYS;
+ for (i = 0; i < 8; i++)
+ if (sel->info.colors_written & (1 << i))
+ key.ps.spi_shader_col_format |=
+ V_028710_SPI_SHADER_FP16_ABGR << (i * 4);
+ break;
+ }
+
+ if (si_shader_select_with_key(ctx, &state, &key)) {
fprintf(stderr, "radeonsi: can't create a shader\n");
tgsi_free_tokens(sel->tokens);
FREE(sel);
sctx->ps_shader.cso = sel;
sctx->ps_shader.current = sel ? sel->first_variant : NULL;
- si_mark_atom_dirty(sctx, &sctx->cb_target_mask);
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
}
static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
si_mark_atom_dirty(sctx, &sctx->spi_ps_input);
}
+ if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps))
+ si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+
if (sctx->ps_db_shader_control != db_shader_control) {
sctx->ps_db_shader_control = db_shader_control;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
return 1;
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
return 1;
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
return 65536;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
}
/* should only get here on unhandled cases */
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
return 64;
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
}
}
+static void trace_context_set_shader_buffers(struct pipe_context *_context,
+ unsigned shader,
+ unsigned start, unsigned nr,
+ struct pipe_shader_buffer *buffers)
+{
+ struct trace_context *tr_context = trace_context(_context);
+ struct pipe_context *context = tr_context->pipe;
+ struct pipe_shader_buffer *_buffers = NULL;
+
+ trace_dump_call_begin("pipe_context", "set_shader_buffers");
+ trace_dump_arg(ptr, context);
+ trace_dump_arg(uint, shader);
+ trace_dump_arg(uint, start);
+ trace_dump_arg_begin("buffers");
+ trace_dump_struct_array(shader_buffer, buffers, nr);
+ trace_dump_arg_end();
+ trace_dump_call_end();
+
+ if (buffers) {
+ int i;
+
+ _buffers = MALLOC(nr * sizeof(struct pipe_shader_buffer));
+ if (!_buffers)
+ return;
+
+ for (i = 0; i < nr; i++) {
+ _buffers[i] = buffers[i];
+ _buffers[i].buffer = trace_resource_unwrap(
+ tr_context, _buffers[i].buffer);
+ }
+ }
+
+ context->set_shader_buffers(context, shader, start, nr, _buffers);
+
+ if (_buffers)
+ FREE(_buffers);
+}
+
+
static const struct debug_named_value rbug_blocker_flags[] = {
{"before", 1, NULL},
{"after", 2, NULL},
TR_CTX_INIT(texture_barrier);
TR_CTX_INIT(memory_barrier);
TR_CTX_INIT(set_tess_state);
+ TR_CTX_INIT(set_shader_buffers);
TR_CTX_INIT(transfer_map);
TR_CTX_INIT(transfer_unmap);
}
+void trace_dump_shader_buffer(const struct pipe_shader_buffer *state)
+{
+ if (!trace_dumping_enabled_locked())
+ return;
+
+ if(!state) {
+ trace_dump_null();
+ return;
+ }
+
+ trace_dump_struct_begin("pipe_shader_buffer");
+ trace_dump_member(resource_ptr, state, buffer);
+ trace_dump_member(uint, state, buffer_offset);
+ trace_dump_member(uint, state, buffer_size);
+ trace_dump_struct_end();
+}
+
+
void trace_dump_draw_info(const struct pipe_draw_info *state)
{
if (!trace_dumping_enabled_locked())
void trace_dump_constant_buffer(const struct pipe_constant_buffer *state);
+void trace_dump_shader_buffer(const struct pipe_shader_buffer *buffer);
+
void trace_dump_draw_info(const struct pipe_draw_info *state);
void trace_dump_blit_info(const struct pipe_blit_info *);
fprintf(stderr, "Draw call returned %s. "
"Expect corruption.\n", strerror(errno));
warned = true;
+ } else if (!ret) {
+ vc4->last_emit_seqno = submit.seqno;
}
}
- vc4->last_emit_seqno = submit.seqno;
+ if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {
+ if (!vc4_wait_seqno(vc4->screen,
+ vc4->last_emit_seqno - 5,
+ PIPE_TIMEOUT_INFINITE,
+ "job throttling")) {
+ fprintf(stderr, "Job throttling failed\n");
+ }
+ }
if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
/* Unsupported features. */
case PIPE_CAP_ANISOTROPIC_FILTER:
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_CUBE_MAP_ARRAY:
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
case PIPE_CAP_STRING_MARKER:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
+ case PIPE_CAP_QUERY_MEMORY_INFO:
return 0;
/* Stream output. */
return vscreen->caps.caps.v1.max_tbo_size > 0;
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
return 0;
+ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+ return 0;
case PIPE_CAP_CUBE_MAP_ARRAY:
return vscreen->caps.caps.v1.bset.cube_map_array;
case PIPE_CAP_TEXTURE_MULTISAMPLE:
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
case PIPE_CAP_INVALIDATE_BUFFER:
case PIPE_CAP_GENERATE_MIPMAP:
+ case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+ case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_VENDOR_ID:
return 0x1af4;
vws->get_caps(vws, &screen->caps);
+ screen->refcnt = 1;
util_format_s3tc_init();
return &screen->base;
struct virgl_screen {
struct pipe_screen base;
+
+ int refcnt;
+
+ /* place for winsys to stash it's own stuff: */
+ void *winsys_priv;
+
struct virgl_winsys *vws;
struct virgl_drm_caps caps;
struct pipe_query *q,
boolean wait,
union pipe_query_result *result);
+
+ /**
+ * Get results of a query, storing into resource. Note that this may not
+ * be used with batch queries.
+ *
+ * \param wait if true, this query will block until the result is ready
+ * \param result_type the type of the value being stored:
+ * \param index for queries that return multiple pieces of data, which
+ * item of that data to store (e.g. for
+ * PIPE_QUERY_PIPELINE_STATISTICS).
+ * When the index is -1, instead of the value of the query
+ * the driver should instead write a 1/0 to the appropriate
+ * location with 1 meaning that the query result is available.
+ */
+ void (*get_query_result_resource)(struct pipe_context *pipe,
+ struct pipe_query *q,
+ boolean wait,
+ enum pipe_query_value_type result_type,
+ int index,
+ struct pipe_resource *resource,
+ unsigned offset);
+
/*@}*/
/**
* Flags for pipe_context::memory_barrier.
*/
#define PIPE_BARRIER_MAPPED_BUFFER (1 << 0)
+#define PIPE_BARRIER_SHADER_BUFFER (1 << 1)
+#define PIPE_BARRIER_QUERY_BUFFER (1 << 2)
/**
* Resource binding flags -- state tracker must specify in advance all
#define PIPE_BIND_SHADER_IMAGE (1 << 15) /* set_shader_images */
#define PIPE_BIND_COMPUTE_RESOURCE (1 << 16) /* set_compute_resources */
#define PIPE_BIND_COMMAND_ARGS_BUFFER (1 << 17) /* pipe_draw_info.indirect */
+#define PIPE_BIND_QUERY_BUFFER (1 << 18) /* get_query_result_resource */
/**
* The first two flags above were previously part of the amorphous
PIPE_CAP_CUBE_MAP_ARRAY,
PIPE_CAP_TEXTURE_BUFFER_OBJECTS,
PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT,
+ PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY,
PIPE_CAP_TGSI_TEXCOORD,
PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER,
PIPE_CAP_QUERY_PIPELINE_STATISTICS,
PIPE_CAP_INVALIDATE_BUFFER,
PIPE_CAP_GENERATE_MIPMAP,
PIPE_CAP_STRING_MARKER,
+ PIPE_CAP_SURFACE_REINTERPRET_BLOCKS,
+ PIPE_CAP_QUERY_BUFFER_OBJECT,
+ PIPE_CAP_QUERY_MEMORY_INFO,
};
#define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
union pipe_numeric_type_union batch[1];
};
+enum pipe_query_value_type
+{
+ PIPE_QUERY_TYPE_I32,
+ PIPE_QUERY_TYPE_U32,
+ PIPE_QUERY_TYPE_I64,
+ PIPE_QUERY_TYPE_U64,
+};
+
union pipe_color_union
{
float f[4];
struct pipe_surface;
struct pipe_transfer;
struct pipe_box;
+struct pipe_memory_info;
/**
unsigned index,
struct pipe_driver_query_group_info *info);
+ /**
+ * Query information about memory usage.
+ */
+ void (*query_memory_info)(struct pipe_screen *screen,
+ struct pipe_memory_info *info);
};
#define TGSI_OPCODE_FSLT 110
#define TGSI_OPCODE_FSNE 111
- /* gap */
+#define TGSI_OPCODE_MEMBAR 112
#define TGSI_OPCODE_CALLNZ 113
/* gap */
#define TGSI_OPCODE_BREAKC 115
unsigned Padding : 29;
};
+#define TGSI_MEMBAR_SHADER_BUFFER (1 << 0)
+#define TGSI_MEMBAR_ATOMIC_BUFFER (1 << 1)
+#define TGSI_MEMBAR_SHADER_IMAGE (1 << 2)
+#define TGSI_MEMBAR_SHARED (1 << 3)
+#define TGSI_MEMBAR_THREAD_GROUP (1 << 4)
#ifdef __cplusplus
}
void *data;
};
+/**
+ * Information about memory usage. All sizes are in kilobytes.
+ */
+struct pipe_memory_info
+{
+ unsigned total_device_memory; /**< size of device memory, e.g. VRAM */
+ unsigned avail_device_memory; /**< free device memory at the moment */
+ unsigned total_staging_memory; /**< size of staging memory, e.g. GART */
+ unsigned avail_staging_memory; /**< free staging memory at the moment */
+ unsigned device_memory_evicted; /**< size of memory evicted (monotonic counter) */
+ unsigned nr_device_memory_evictions; /**< # of evictions (monotonic counter) */
+};
+
#ifdef __cplusplus
}
#endif
authenticatedchannel9.h \
basetexture9.c \
basetexture9.h \
+ buffer9.c \
+ buffer9.h \
cryptosession9.c \
cryptosession9.h \
cubetexture9.c \
D3DPIPECAP(INDEP_BLEND_ENABLE, D3DPMISCCAPS_INDEPENDENTWRITEMASKS) |
/*D3DPMISCCAPS_PERSTAGECONSTANT |*/ /* TODO */
/*D3DPMISCCAPS_POSTBLENDSRGBCONVERT |*/ /* TODO */
- D3DPMISCCAPS_FOGANDSPECULARALPHA |
+ D3DPMISCCAPS_FOGANDSPECULARALPHA | /* Note: documentation of the flag is wrong */
D3DPIPECAP(BLEND_EQUATION_SEPARATE, D3DPMISCCAPS_SEPARATEALPHABLEND) |
D3DPIPECAP(MIXED_COLORBUFFER_FORMATS, D3DPMISCCAPS_MRTINDEPENDENTBITDEPTHS) |
D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING |
pCaps->DestBlendCaps = pCaps->SrcBlendCaps;
- pCaps->AlphaCmpCaps = D3DPCMPCAPS_LESS |
+ pCaps->AlphaCmpCaps = D3DPCMPCAPS_NEVER |
+ D3DPCMPCAPS_LESS |
D3DPCMPCAPS_EQUAL |
D3DPCMPCAPS_LESSEQUAL |
D3DPCMPCAPS_GREATER |
hr = NineDevice9_new(screen, ¶ms, &caps, pPresentationParameters,
pD3D9, pPresentationGroup, This->ctx, FALSE, NULL,
- (struct NineDevice9 **)ppReturnedDeviceInterface);
+ (struct NineDevice9 **)ppReturnedDeviceInterface,
+ minor);
if (FAILED(hr)) {
DBG("Failed to create device.\n");
return hr;
hr = NineDevice9Ex_new(screen, ¶ms, &caps, pPresentationParameters,
pFullscreenDisplayMode,
pD3D9Ex, pPresentationGroup, This->ctx,
- (struct NineDevice9Ex **)ppReturnedDeviceInterface);
+ (struct NineDevice9Ex **)ppReturnedDeviceInterface,
+ minor);
if (FAILED(hr)) {
DBG("Failed to create device.\n");
return hr;
if (tex->dirty_box.width) {
for (l = min_level_dirty; l <= last_level; ++l) {
- u_box_minify_2d(&box, &tex->dirty_box, l);
+ u_box_minify_3d(&box, &tex->dirty_box, l);
NineVolume9_UploadSelf(tex->volumes[l], &box);
}
memset(&tex->dirty_box, 0, sizeof(tex->dirty_box));
--- /dev/null
+/*
+ * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
+ * Copyright 2015 Patrick Rudolph <siro@das-labor.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "buffer9.h"
+#include "device9.h"
+#include "nine_helpers.h"
+#include "nine_pipe.h"
+
+#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_format.h"
+#include "util/u_box.h"
+
+#define DBG_CHANNEL (DBG_INDEXBUFFER|DBG_VERTEXBUFFER)
+
+HRESULT
+NineBuffer9_ctor( struct NineBuffer9 *This,
+ struct NineUnknownParams *pParams,
+ D3DRESOURCETYPE Type,
+ DWORD Usage,
+ UINT Size,
+ D3DPOOL Pool )
+{
+ struct pipe_resource *info = &This->base.info;
+ HRESULT hr;
+
+ DBG("This=%p Size=0x%x Usage=%x Pool=%u\n", This, Size, Usage, Pool);
+
+ user_assert(Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL);
+
+ This->maps = MALLOC(sizeof(struct pipe_transfer *));
+ if (!This->maps)
+ return E_OUTOFMEMORY;
+ This->nmaps = 0;
+ This->maxmaps = 1;
+ This->size = Size;
+
+ This->pipe = pParams->device->pipe;
+
+ info->screen = pParams->device->screen;
+ info->target = PIPE_BUFFER;
+ info->format = PIPE_FORMAT_R8_UNORM;
+ info->width0 = Size;
+ info->flags = 0;
+
+ info->bind = PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_TRANSFER_WRITE;
+ if (!(Usage & D3DUSAGE_WRITEONLY))
+ info->bind |= PIPE_BIND_TRANSFER_READ;
+
+ info->usage = PIPE_USAGE_DEFAULT;
+ if (Usage & D3DUSAGE_DYNAMIC)
+ info->usage = PIPE_USAGE_STREAM;
+ else if (Pool == D3DPOOL_SYSTEMMEM)
+ info->usage = PIPE_USAGE_STAGING;
+
+ /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
+ /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
+ /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
+ /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
+ /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
+ if (Usage & D3DUSAGE_SOFTWAREPROCESSING)
+ DBG("Application asked for Software Vertex Processing, "
+ "but this is unimplemented\n");
+ /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */
+
+ info->height0 = 1;
+ info->depth0 = 1;
+ info->array_size = 1;
+ info->last_level = 0;
+ info->nr_samples = 0;
+
+ hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE,
+ Type, Pool, Usage);
+ return hr;
+}
+
+void
+NineBuffer9_dtor( struct NineBuffer9 *This )
+{
+ if (This->maps) {
+ while (This->nmaps) {
+ NineBuffer9_Unlock(This);
+ }
+ FREE(This->maps);
+ }
+
+ NineResource9_dtor(&This->base);
+}
+
+struct pipe_resource *
+NineBuffer9_GetResource( struct NineBuffer9 *This )
+{
+ return NineResource9_GetResource(&This->base);
+}
+
+HRESULT WINAPI
+NineBuffer9_Lock( struct NineBuffer9 *This,
+ UINT OffsetToLock,
+ UINT SizeToLock,
+ void **ppbData,
+ DWORD Flags )
+{
+ struct pipe_box box;
+ void *data;
+ unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags);
+
+ DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n",
+ This, This->base.resource,
+ OffsetToLock, SizeToLock, Flags);
+
+ user_assert(ppbData, E_POINTER);
+ user_assert(!(Flags & ~(D3DLOCK_DISCARD |
+ D3DLOCK_DONOTWAIT |
+ D3DLOCK_NO_DIRTY_UPDATE |
+ D3DLOCK_NOSYSLOCK |
+ D3DLOCK_READONLY |
+ D3DLOCK_NOOVERWRITE)), D3DERR_INVALIDCALL);
+
+ if (This->nmaps == This->maxmaps) {
+ struct pipe_transfer **newmaps =
+ REALLOC(This->maps, sizeof(struct pipe_transfer *)*This->maxmaps,
+ sizeof(struct pipe_transfer *)*(This->maxmaps << 1));
+ if (newmaps == NULL)
+ return E_OUTOFMEMORY;
+
+ This->maxmaps <<= 1;
+ This->maps = newmaps;
+ }
+
+ if (SizeToLock == 0) {
+ SizeToLock = This->size - OffsetToLock;
+ user_warn(OffsetToLock != 0);
+ }
+
+ u_box_1d(OffsetToLock, SizeToLock, &box);
+
+ data = This->pipe->transfer_map(This->pipe, This->base.resource, 0,
+ usage, &box, &This->maps[This->nmaps]);
+
+ if (!data) {
+ DBG("pipe::transfer_map failed\n"
+ " usage = %x\n"
+ " box.x = %u\n"
+ " box.width = %u\n",
+ usage, box.x, box.width);
+ /* not sure what to return, msdn suggests this */
+ if (Flags & D3DLOCK_DONOTWAIT)
+ return D3DERR_WASSTILLDRAWING;
+ return D3DERR_INVALIDCALL;
+ }
+
+ DBG("returning pointer %p\n", data);
+ This->nmaps++;
+ *ppbData = data;
+
+ return D3D_OK;
+}
+
+HRESULT WINAPI
+NineBuffer9_Unlock( struct NineBuffer9 *This )
+{
+ DBG("This=%p\n", This);
+
+ user_assert(This->nmaps > 0, D3DERR_INVALIDCALL);
+ This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]);
+ return D3D_OK;
+}
--- /dev/null
+/*
+ * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
+ * Copyright 2015 Patrick Rudolph <siro@das-labor.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef _NINE_BUFFER9_H_
+#define _NINE_BUFFER9_H_
+
+#include "resource9.h"
+
+struct pipe_screen;
+struct pipe_context;
+struct pipe_transfer;
+
+struct NineBuffer9
+{
+ struct NineResource9 base;
+
+ /* G3D */
+ struct pipe_context *pipe;
+ struct pipe_transfer **maps;
+ int nmaps, maxmaps;
+ UINT size;
+};
+static inline struct NineBuffer9 *
+NineBuffer9( void *data )
+{
+ return (struct NineBuffer9 *)data;
+}
+
+HRESULT
+NineBuffer9_ctor( struct NineBuffer9 *This,
+ struct NineUnknownParams *pParams,
+ D3DRESOURCETYPE Type,
+ DWORD Usage,
+ UINT Size,
+ D3DPOOL Pool );
+
+void
+NineBuffer9_dtor( struct NineBuffer9 *This );
+
+struct pipe_resource *
+NineBuffer9_GetResource( struct NineBuffer9 *This );
+
+HRESULT WINAPI
+NineBuffer9_Lock( struct NineBuffer9 *This,
+ UINT OffsetToLock,
+ UINT SizeToLock,
+ void **ppbData,
+ DWORD Flags );
+
+HRESULT WINAPI
+NineBuffer9_Unlock( struct NineBuffer9 *This );
+
+#endif /* _NINE_BUFFER9_H_ */
}
if (This->managed_buffer)
- FREE(This->managed_buffer);
+ align_free(This->managed_buffer);
NineBaseTexture9_dtor(&This->base);
}
#include "nine_pipe.h"
#include "nine_ff.h"
#include "nine_dump.h"
+#include "nine_limits.h"
#include "pipe/p_screen.h"
#include "pipe/p_context.h"
#endif
-static void
+void
NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset )
{
struct NineSurface9 *refSurf = NULL;
This->state.scissor.maxy = refSurf->desc.Height;
}
- if (This->nswapchains && This->swapchains[0]->params.EnableAutoDepthStencil)
+ if (This->nswapchains && This->swapchains[0]->params.EnableAutoDepthStencil) {
This->state.rs[D3DRS_ZENABLE] = TRUE;
+ This->state.rs_advertised[D3DRS_ZENABLE] = TRUE;
+ }
if (This->state.rs[D3DRS_ZENABLE])
NineDevice9_SetDepthStencilSurface(
This, (IDirect3DSurface9 *)This->swapchains[0]->zsbuf);
ID3DPresentGroup *pPresentationGroup,
struct d3dadapter9_context *pCTX,
boolean ex,
- D3DDISPLAYMODEEX *pFullscreenDisplayMode )
+ D3DDISPLAYMODEEX *pFullscreenDisplayMode,
+ int minorVersionNum )
{
unsigned i;
HRESULT hr = NineUnknown_ctor(&This->base, pParams);
This->params = *pCreationParameters;
This->ex = ex;
This->present = pPresentationGroup;
+ This->minor_version_num = minorVersionNum;
+
IDirect3D9_AddRef(This->d3d9);
ID3DPresentGroup_AddRef(This->present);
/* Create first, it messes up our state. */
This->hud = hud_create(This->pipe, This->cso); /* NULL result is fine */
+ /* Available memory counter. Updated only for allocations with this device
+ * instance. This is the Win 7 behavior.
+ * Win XP shares this counter across multiple devices. */
+ This->available_texture_mem = This->screen->get_param(This->screen, PIPE_CAP_VIDEO_MEMORY);
+ if (This->available_texture_mem < 4096)
+ This->available_texture_mem <<= 20;
+ else
+ This->available_texture_mem = UINT_MAX;
+ /* We cap texture memory usage to 80% of what is reported free initially
+ * This helps get closer Win behaviour. For example VertexBuffer allocation
+ * still succeeds when texture allocation fails. */
+ This->available_texture_limit = This->available_texture_mem * 20LL / 100LL;
+
/* create implicit swapchains */
This->nswapchains = ID3DPresentGroup_GetMultiheadCount(This->present);
This->swapchains = CALLOC(This->nswapchains,
if (This->swapchains) {
for (i = 0; i < This->nswapchains; ++i)
- NineUnknown_Unbind(NineUnknown(This->swapchains[i]));
+ if (This->swapchains[i])
+ NineUnknown_Unbind(NineUnknown(This->swapchains[i]));
FREE(This->swapchains);
}
HRESULT WINAPI
NineDevice9_TestCooperativeLevel( struct NineDevice9 *This )
{
- return D3D_OK; /* TODO */
+ if (NineSwapChain9_GetOccluded(This->swapchains[0])) {
+ This->device_needs_reset = TRUE;
+ return D3DERR_DEVICELOST;
+ } else if (This->device_needs_reset) {
+ return D3DERR_DEVICENOTRESET;
+ }
+
+ return D3D_OK;
}
UINT WINAPI
NineDevice9_GetAvailableTextureMem( struct NineDevice9 *This )
{
- const unsigned mem = This->screen->get_param(This->screen, PIPE_CAP_VIDEO_MEMORY);
- if (mem < 4096)
- return mem << 20;
- else
- return UINT_MAX;
+ return This->available_texture_mem;
}
HRESULT WINAPI
"pCursorBitmap=%p\n", This, XHotSpot, YHotSpot, pCursorBitmap);
user_assert(pCursorBitmap, D3DERR_INVALIDCALL);
+ user_assert(surf->desc.Format == D3DFMT_A8R8G8B8, D3DERR_INVALIDCALL);
if (This->swapchains[0]->params.Windowed) {
This->cursor.w = MIN2(surf->desc.Width, 32);
This, pPresentationParameters, pSwapChain);
user_assert(pPresentationParameters, D3DERR_INVALIDCALL);
+ user_assert(tmplt->params.Windowed && pPresentationParameters->Windowed, D3DERR_INVALIDCALL);
+
+ /* TODO: this deserves more tests */
+ if (!pPresentationParameters->hDeviceWindow)
+ pPresentationParameters->hDeviceWindow = This->params.hFocusWindow;
hr = ID3DPresentGroup_CreateAdditionalPresent(This->present, pPresentationParameters, &present);
DBG("This=%p pPresentationParameters=%p\n", This, pPresentationParameters);
+ if (NineSwapChain9_GetOccluded(This->swapchains[0])) {
+ This->device_needs_reset = TRUE;
+ return D3DERR_DEVICELOST;
+ }
+
for (i = 0; i < This->nswapchains; ++i) {
D3DPRESENT_PARAMETERS *params = &pPresentationParameters[i];
hr = NineSwapChain9_Resize(This->swapchains[i], params, NULL);
if (hr != D3D_OK)
- return hr;
+ break;
}
nine_pipe_context_clear(This);
This, 0, (IDirect3DSurface9 *)This->swapchains[0]->buffers[0]);
/* XXX: better use GetBackBuffer here ? */
+ This->device_needs_reset = (hr != D3D_OK);
return hr;
}
IDirect3DSurface9 **ppBackBuffer )
{
user_assert(ppBackBuffer != NULL, D3DERR_INVALIDCALL);
+ /* return NULL on error */
+ *ppBackBuffer = NULL;
user_assert(iSwapChain < This->nswapchains, D3DERR_INVALIDCALL);
return NineSwapChain9_GetBackBuffer(This->swapchains[iSwapChain],
struct NineSurface9 *src = NineSurface9(pSourceSurface);
struct pipe_resource *dst_res = NineSurface9_GetResource(dst);
struct pipe_resource *src_res = NineSurface9_GetResource(src);
- const boolean zs = util_format_is_depth_or_stencil(dst_res->format);
+ boolean zs;
struct pipe_blit_info blit;
boolean scaled, clamped, ms, flip_x = FALSE, flip_y = FALSE;
DBG("pDestRect=(%u,%u)-(%u,%u)\n", pDestRect->left, pDestRect->top,
pDestRect->right, pDestRect->bottom);
+ user_assert(dst->base.pool == D3DPOOL_DEFAULT &&
+ src->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
+ zs = util_format_is_depth_or_stencil(dst_res->format);
user_assert(!zs || !This->in_scene, D3DERR_INVALIDCALL);
user_assert(!zs || !pSourceRect ||
(pSourceRect->left == 0 &&
src_res->nr_samples,
PIPE_BIND_SAMPLER_VIEW),
D3DERR_INVALIDCALL);
- user_assert(dst->base.pool == D3DPOOL_DEFAULT &&
- src->base.pool == D3DPOOL_DEFAULT, D3DERR_INVALIDCALL);
/* We might want to permit these, but wine thinks we shouldn't. */
user_assert(!pDestRect ||
user_assert((surf->base.usage & D3DUSAGE_RENDERTARGET) ||
NineSurface9_IsOffscreenPlain(surf), D3DERR_INVALIDCALL);
+ user_assert(surf->desc.Format != D3DFMT_NULL, D3D_OK);
+
if (pRect) {
x = pRect->left;
y = pRect->top;
Count = 0;
#endif
+ nine_update_state_framebuffer_clear(This);
+
if (Flags & D3DCLEAR_TARGET) bufs |= PIPE_CLEAR_COLOR;
- if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH;
- if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL;
+ /* Ignore Z buffer if not bound */
+ if (This->state.fb.zsbuf != NULL) {
+ if (Flags & D3DCLEAR_ZBUFFER) bufs |= PIPE_CLEAR_DEPTH;
+ if (Flags & D3DCLEAR_STENCIL) bufs |= PIPE_CLEAR_STENCIL;
+ }
if (!bufs)
return D3D_OK;
d3dcolor_to_pipe_color_union(&rgba, Color);
- nine_update_state_framebuffer(This);
-
rect.x1 = This->state.viewport.X;
rect.y1 = This->state.viewport.Y;
rect.x2 = This->state.viewport.Width + rect.x1;
/* Case we clear depth buffer (and eventually rt too).
* depth buffer size is always >= rt size. Compare to clear region */
((bufs & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) &&
- This->state.fb.zsbuf != NULL &&
rect.x2 >= zsbuf_surf->desc.Width &&
rect.y2 >= zsbuf_surf->desc.Height))) {
DBG("Clear fast path\n");
DBG("This=%p State=%u(%s) Value=%08x\n", This,
State, nine_d3drs_to_string(State), Value);
+ user_assert(State < D3DRS_COUNT, D3DERR_INVALIDCALL);
+
+ if (state->rs_advertised[State] == Value && likely(!This->is_recording))
+ return D3D_OK;
+
+ state->rs_advertised[State] = Value;
+
/* Amd hacks (equivalent to GL extensions) */
- if (State == D3DRS_POINTSIZE) {
+ if (unlikely(State == D3DRS_POINTSIZE)) {
if (Value == RESZ_CODE)
return NineDevice9_ResolveZ(This);
}
/* NV hack */
- if (State == D3DRS_ADAPTIVETESS_Y &&
- (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && state->rs[NINED3DRS_ALPHACOVERAGE]))) {
+ if (unlikely(State == D3DRS_ADAPTIVETESS_Y)) {
+ if (Value == D3DFMT_ATOC || (Value == D3DFMT_UNKNOWN && state->rs[NINED3DRS_ALPHACOVERAGE])) {
state->rs[NINED3DRS_ALPHACOVERAGE] = (Value == D3DFMT_ATOC);
state->changed.group |= NINE_STATE_BLEND;
return D3D_OK;
+ }
}
- user_assert(State < Elements(state->rs), D3DERR_INVALIDCALL);
-
- if (likely(state->rs[State] != Value) || unlikely(This->is_recording)) {
- state->rs[State] = Value;
- state->changed.rs[State / 32] |= 1 << (State % 32);
- state->changed.group |= nine_render_state_group[State];
- }
+ state->rs[State] = nine_fix_render_state_value(State, Value);
+ state->changed.rs[State / 32] |= 1 << (State % 32);
+ state->changed.group |= nine_render_state_group[State];
return D3D_OK;
}
D3DRENDERSTATETYPE State,
DWORD *pValue )
{
- user_assert(State < Elements(This->state.rs), D3DERR_INVALIDCALL);
+ user_assert(State < D3DRS_COUNT, D3DERR_INVALIDCALL);
- *pValue = This->state.rs[State];
+ *pValue = This->state.rs_advertised[State];
return D3D_OK;
}
buffer_offset = 0;
} else {
/* SO matches vertex declaration */
- resource = dst->base.resource;
+ resource = NineVertexBuffer9_GetResource(dst);
buffer_offset = DestIndex * vs->so->stride[0];
}
target = This->pipe->create_stream_output_target(This->pipe, resource,
IDirect3DVertexDeclaration9 *pDecl )
{
struct nine_state *state = This->update;
+ BOOL was_programmable_vs = This->state.programmable_vs;
DBG("This=%p pDecl=%p\n", This, pDecl);
if (likely(!This->is_recording) && state->vdecl == NineVertexDeclaration9(pDecl))
return D3D_OK;
+
nine_bind(&state->vdecl, pDecl);
+ This->state.programmable_vs = This->state.vs && !(This->state.vdecl && This->state.vdecl->position_t);
+ if (likely(!This->is_recording) && was_programmable_vs != This->state.programmable_vs) {
+ state->commit |= NINE_STATE_COMMIT_CONST_VS;
+ state->changed.group |= NINE_STATE_VS;
+ }
+
state->changed.group |= NINE_STATE_VDECL;
return D3D_OK;
IDirect3DVertexShader9 *pShader )
{
struct nine_state *state = This->update;
+ BOOL was_programmable_vs = This->state.programmable_vs;
DBG("This=%p pShader=%p\n", This, pShader);
if (!This->is_recording && state->vs == (struct NineVertexShader9*)pShader)
return D3D_OK;
+ nine_bind(&state->vs, pShader);
+
+ This->state.programmable_vs = This->state.vs && !(This->state.vdecl && This->state.vdecl->position_t);
+
/* ff -> non-ff: commit back non-ff constants */
- if (!state->vs && pShader)
+ if (!was_programmable_vs && This->state.programmable_vs)
state->commit |= NINE_STATE_COMMIT_CONST_VS;
- nine_bind(&state->vs, pShader);
-
state->changed.group |= NINE_STATE_VS;
return D3D_OK;
state->vtxbuf[i].stride = Stride;
state->vtxbuf[i].buffer_offset = OffsetInBytes;
}
- state->vtxbuf[i].buffer = pStreamData ? pVBuf9->base.resource : NULL;
+ pipe_resource_reference(&state->vtxbuf[i].buffer,
+ pStreamData ? NineVertexBuffer9_GetResource(pVBuf9) : NULL);
return D3D_OK;
}
(Setting & D3DSTREAMSOURCE_INDEXEDDATA)), D3DERR_INVALIDCALL);
user_assert(Setting, D3DERR_INVALIDCALL);
+ if (likely(!This->is_recording) && state->stream_freq[StreamNumber] == Setting)
+ return D3D_OK;
+
state->stream_freq[StreamNumber] = Setting;
if (Setting & D3DSTREAMSOURCE_INSTANCEDATA)
else
state->stream_instancedata_mask &= ~(1 << StreamNumber);
- state->changed.stream_freq |= 1 << StreamNumber;
+ state->changed.stream_freq |= 1 << StreamNumber; /* Used for stateblocks */
+ if (StreamNumber != 0)
+ state->changed.group |= NINE_STATE_STREAMFREQ;
return D3D_OK;
}
struct d3dadapter9_context *pCTX,
boolean ex,
D3DDISPLAYMODEEX *pFullscreenDisplayMode,
- struct NineDevice9 **ppOut )
+ struct NineDevice9 **ppOut,
+ int minorVersionNum )
{
BOOL lock;
lock = !!(pCreationParameters->BehaviorFlags & D3DCREATE_MULTITHREADED);
NINE_NEW(Device9, ppOut, lock, /* args */
pScreen, pCreationParameters, pCaps,
pPresentationParameters, pD3D9, pPresentationGroup, pCTX,
- ex, pFullscreenDisplayMode);
+ ex, pFullscreenDisplayMode, minorVersionNum );
}
/* dummy vbo (containing 0 0 0 0) to bind if vertex shader input
* is not bound to anything by the vertex declaration */
struct pipe_resource *dummy_vbo;
+ BOOL device_needs_reset;
+ int minor_version_num;
+ long long available_texture_mem;
+ long long available_texture_limit;
};
static inline struct NineDevice9 *
NineDevice9( void *data )
struct d3dadapter9_context *pCTX,
boolean ex,
D3DDISPLAYMODEEX *pFullscreenDisplayMode,
- struct NineDevice9 **ppOut );
+ struct NineDevice9 **ppOut,
+ int minorVersionNum );
HRESULT
NineDevice9_ctor( struct NineDevice9 *This,
ID3DPresentGroup *pPresentationGroup,
struct d3dadapter9_context *pCTX,
boolean ex,
- D3DDISPLAYMODEEX *pFullscreenDisplayMode );
+ D3DDISPLAYMODEEX *pFullscreenDisplayMode,
+ int minorVersionNum );
void
NineDevice9_dtor( struct NineDevice9 *This );
/*** Nine private ***/
+void
+NineDevice9_SetDefaultState( struct NineDevice9 *This, boolean is_reset );
struct pipe_screen *
NineDevice9_GetScreen( struct NineDevice9 *This );
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "device9.h"
#include "device9ex.h"
+#include "nine_pipe.h"
#include "swapchain9ex.h"
#include "nine_helpers.h"
D3DDISPLAYMODEEX *pFullscreenDisplayMode,
IDirect3D9Ex *pD3D9Ex,
ID3DPresentGroup *pPresentationGroup,
- struct d3dadapter9_context *pCTX )
+ struct d3dadapter9_context *pCTX,
+ int minorVersionNum )
{
DBG("This=%p pParams=%p pScreen=%p pCreationParameters=%p pCaps=%p "
"pPresentationParameters=%p pFullscreenDisplayMode=%p "
pScreen, pCreationParameters, pCaps,
pPresentationParameters,
(IDirect3D9 *)pD3D9Ex, pPresentationGroup, pCTX,
- TRUE, pFullscreenDisplayMode);
+ TRUE, pFullscreenDisplayMode, minorVersionNum);
}
static void
DBG("This=%p hDestinationWindow=%p\n",
This, hDestinationWindow);
+ user_assert(!This->base.swapchains[0]->params.Windowed, D3D_OK);
+
+ if (This->base.params.hFocusWindow == hDestinationWindow) {
+ if (NineSwapChain9_GetOccluded(This->base.swapchains[0]))
+ return S_PRESENT_OCCLUDED;
+ } else if(!NineSwapChain9_GetOccluded(This->base.swapchains[0])) {
+ return S_PRESENT_OCCLUDED;
+ }
/* TODO: handle the other return values */
return D3D_OK;
}
if (pFullscreenDisplayMode) mode = &(pFullscreenDisplayMode[i]);
hr = NineSwapChain9_Resize(This->base.swapchains[i], params, mode);
if (FAILED(hr))
- return (hr == D3DERR_OUTOFVIDEOMEMORY) ? hr : D3DERR_DEVICELOST;
+ break;
}
NineDevice9_SetRenderTarget(
(struct NineDevice9 *)This, 0, (IDirect3DSurface9 *)This->base.swapchains[0]->buffers[0]);
+ return hr;
+}
+
+HRESULT WINAPI
+NineDevice9Ex_Reset( struct NineDevice9Ex *This,
+ D3DPRESENT_PARAMETERS *pPresentationParameters )
+{
+ HRESULT hr = D3D_OK;
+ unsigned i;
+
+ DBG("This=%p pPresentationParameters=%p\n", This, pPresentationParameters);
+
+ for (i = 0; i < This->base.nswapchains; ++i) {
+ D3DPRESENT_PARAMETERS *params = &pPresentationParameters[i];
+ hr = NineSwapChain9_Resize(This->base.swapchains[i], params, NULL);
+ if (FAILED(hr))
+ break;
+ }
+
+ nine_pipe_context_clear((struct NineDevice9 *)This);
+ nine_state_clear(&This->base.state, TRUE);
+
+ NineDevice9_SetDefaultState((struct NineDevice9 *)This, TRUE);
+ NineDevice9_SetRenderTarget(
+ (struct NineDevice9 *)This, 0, (IDirect3DSurface9 *)This->base.swapchains[0]->buffers[0]);
return hr;
}
return NineSwapChain9Ex_GetDisplayModeEx(swapchain, pMode, pRotation);
}
+HRESULT WINAPI
+NineDevice9Ex_TestCooperativeLevel( struct NineDevice9Ex *This )
+{
+ return D3D_OK;
+}
+
+
IDirect3DDevice9ExVtbl NineDevice9Ex_vtable = {
(void *)NineUnknown_QueryInterface,
(void *)NineUnknown_AddRef,
(void *)NineUnknown_Release,
- (void *)NineDevice9_TestCooperativeLevel,
+ (void *)NineDevice9Ex_TestCooperativeLevel,
(void *)NineDevice9_GetAvailableTextureMem,
(void *)NineDevice9_EvictManagedResources,
(void *)NineDevice9_GetDirect3D,
(void *)NineDevice9_CreateAdditionalSwapChain,
(void *)NineDevice9_GetSwapChain,
(void *)NineDevice9_GetNumberOfSwapChains,
- (void *)NineDevice9_Reset,
+ (void *)NineDevice9Ex_Reset,
(void *)NineDevice9_Present,
(void *)NineDevice9_GetBackBuffer,
(void *)NineDevice9_GetRasterStatus,
IDirect3D9Ex *pD3D9Ex,
ID3DPresentGroup *pPresentationGroup,
struct d3dadapter9_context *pCTX,
- struct NineDevice9Ex **ppOut )
+ struct NineDevice9Ex **ppOut,
+ int minorVersionNum )
{
BOOL lock;
lock = !!(pCreationParameters->BehaviorFlags & D3DCREATE_MULTITHREADED);
NINE_NEW(Device9Ex, ppOut, lock,
pScreen, pCreationParameters, pCaps, pPresentationParameters,
- pFullscreenDisplayMode, pD3D9Ex, pPresentationGroup, pCTX);
+ pFullscreenDisplayMode, pD3D9Ex, pPresentationGroup, pCTX, minorVersionNum );
}
IDirect3D9Ex *pD3D9Ex,
ID3DPresentGroup *pPresentationGroup,
struct d3dadapter9_context *pCTX,
- struct NineDevice9Ex **ppOut );
+ struct NineDevice9Ex **ppOut,
+ int minorVersionNum );
HRESULT WINAPI
NineDevice9Ex_SetConvolutionMonoKernel( struct NineDevice9Ex *This,
const RGNDATA *pDirtyRegion,
DWORD dwFlags );
+HRESULT WINAPI
+NineDevice9Ex_Present( struct NineDevice9Ex *This,
+ const RECT *pSourceRect,
+ const RECT *pDestRect,
+ HWND hDestWindowOverride,
+ const RGNDATA *pDirtyRegion );
+
HRESULT WINAPI
NineDevice9Ex_GetGPUThreadPriority( struct NineDevice9Ex *This,
INT *pPriority );
D3DPRESENT_PARAMETERS *pPresentationParameters,
D3DDISPLAYMODEEX *pFullscreenDisplayMode );
+HRESULT WINAPI
+NineDevice9Ex_Reset( struct NineDevice9Ex *This,
+ D3DPRESENT_PARAMETERS *pPresentationParameters );
+
HRESULT WINAPI
NineDevice9Ex_GetDisplayModeEx( struct NineDevice9Ex *This,
UINT iSwapChain,
D3DDISPLAYMODEEX *pMode,
D3DDISPLAYROTATION *pRotation );
+HRESULT WINAPI
+NineDevice9Ex_TestCooperativeLevel( struct NineDevice9Ex *This );
+
#endif /* _NINE_DEVICE9EX_H_ */
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include <stdio.h>
#include "guid.h"
const GUID IID_IUnknown = { 0x00000000, 0x0000, 0x0000, { 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46 } };
}
return TRUE;
}
+
+char* GUID_sprintf(char *guid_str, REFGUID id) {
+ sprintf( guid_str,
+ "{%08X,%04X,%04X,%02X%02X%02X%02X%02X%02X%02X%02X}",
+ id->Data1,
+ id->Data2,
+ id->Data3,
+ id->Data4[0],
+ id->Data4[1],
+ id->Data4[2],
+ id->Data4[3],
+ id->Data4[4],
+ id->Data4[5],
+ id->Data4[6],
+ id->Data4[7]);
+ return guid_str;
+}
GUID_equal( const GUID *a,
const GUID *b );
+char*
+GUID_sprintf( char *guid_str,
+ REFGUID id );
+
#endif /* _NINE_GUID_H_ */
struct NineUnknownParams *pParams,
D3DINDEXBUFFER_DESC *pDesc )
{
- struct pipe_resource *info = &This->base.info;
HRESULT hr;
DBG("This=%p pParams=%p pDesc=%p Usage=%s\n",
This, pParams, pDesc, nine_D3DUSAGE_to_str(pDesc->Usage));
- This->pipe = pParams->device->pipe;
-
- info->screen = pParams->device->screen;
- info->target = PIPE_BUFFER;
- info->format = PIPE_FORMAT_R8_UNORM;
- info->width0 = pDesc->Size;
- info->flags = 0;
-
- info->bind = PIPE_BIND_INDEX_BUFFER | PIPE_BIND_TRANSFER_WRITE;
- if (!(pDesc->Usage & D3DUSAGE_WRITEONLY))
- info->bind |= PIPE_BIND_TRANSFER_READ;
-
- info->usage = PIPE_USAGE_DEFAULT;
- if (pDesc->Usage & D3DUSAGE_DYNAMIC)
- info->usage = PIPE_USAGE_STREAM;
- if (pDesc->Pool == D3DPOOL_SYSTEMMEM)
- info->usage = PIPE_USAGE_STAGING;
-
- /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
- /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
- /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
- /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
- /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
- if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING)
- DBG("Application asked for Software Vertex Processing, "
- "but this is unimplemented\n");
-
- info->height0 = 1;
- info->depth0 = 1;
- info->array_size = 1;
- info->last_level = 0;
- info->nr_samples = 0;
-
- hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE, D3DRTYPE_INDEXBUFFER,
- pDesc->Pool, pDesc->Usage);
+ hr = NineBuffer9_ctor(&This->base, pParams, D3DRTYPE_INDEXBUFFER,
+ pDesc->Usage, pDesc->Size, pDesc->Pool);
if (FAILED(hr))
return hr;
- This->buffer.buffer = This->base.resource;
+ This->buffer.buffer = NineIndexBuffer9_GetResource(This);
This->buffer.offset = 0;
- This->map_count = 0;
switch (pDesc->Format) {
case D3DFMT_INDEX16: This->buffer.index_size = 2; break;
void
NineIndexBuffer9_dtor( struct NineIndexBuffer9 *This )
{
- if (This->transfer) { NineIndexBuffer9_Unlock(This); }
-
- NineResource9_dtor(&This->base);
+ NineBuffer9_dtor(&This->base);
}
const struct pipe_index_buffer *
return &This->buffer;
}
+struct pipe_resource *
+NineIndexBuffer9_GetResource( struct NineIndexBuffer9 *This )
+{
+ return NineBuffer9_GetResource(&This->base);
+}
+
HRESULT WINAPI
NineIndexBuffer9_Lock( struct NineIndexBuffer9 *This,
UINT OffsetToLock,
void **ppbData,
DWORD Flags )
{
- struct pipe_box box;
- void *data;
- UINT count;
- const unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags);
-
- DBG("This=%p OffsetToLock=%u SizeToLock=%u ppbData=%p Flags=%i "
- "transfer=%p map_count=%u\n", This, OffsetToLock,
- SizeToLock, ppbData, Flags, This->transfer, This->map_count);
-
- count = ++This->map_count;
-
- if (SizeToLock == 0) {
- SizeToLock = This->desc.Size - OffsetToLock;
- user_warn(OffsetToLock != 0);
- }
-
- u_box_1d(OffsetToLock, SizeToLock, &box);
-
- if (unlikely(count != 1)) {
- DBG("Lock has been called on already locked buffer."
- "Unmapping before mapping again.");
- This->pipe->transfer_unmap(This->pipe, This->transfer);
- }
- data = This->pipe->transfer_map(This->pipe, This->base.resource, 0,
- usage, &box, &This->transfer);
- if (!This->transfer) {
- DBG("pipe::transfer_map failed\n"
- " usage = %u\n"
- " box.x = %u\n"
- " box.width = %u\n",
- usage, box.x, box.width);
- }
- *ppbData = data;
- DBG("Returning memory at %p at address %p\n", *ppbData, ppbData);
-
- return D3D_OK;
+ return NineBuffer9_Lock(&This->base, OffsetToLock, SizeToLock, ppbData, Flags);
}
HRESULT WINAPI
NineIndexBuffer9_Unlock( struct NineIndexBuffer9 *This )
{
- DBG("This=%p\n", This);
- if (!This->map_count) {
- DBG("Unmap called without a previous map call.\n");
- return D3D_OK;
- }
- if (--This->map_count) {
- DBG("Ignoring unmap.\n");
- return D3D_OK;
- }
- This->pipe->transfer_unmap(This->pipe, This->transfer);
- This->transfer = NULL;
- return D3D_OK;
+ return NineBuffer9_Unlock(&This->base);
}
HRESULT WINAPI
#define _NINE_INDEXBUFFER9_H_
#include "resource9.h"
-
+#include "buffer9.h"
#include "pipe/p_state.h"
struct pipe_screen;
struct NineIndexBuffer9
{
- struct NineResource9 base;
+ struct NineBuffer9 base;
/* g3d stuff */
- struct pipe_context *pipe;
struct pipe_index_buffer buffer;
- struct pipe_transfer *transfer;
- UINT map_count;
D3DINDEXBUFFER_DESC desc;
};
const struct pipe_index_buffer *
NineIndexBuffer9_GetBuffer( struct NineIndexBuffer9 *This );
+struct pipe_resource *
+NineIndexBuffer9_GetResource( struct NineIndexBuffer9 *This );
/*** Direct3D public ***/
HRESULT WINAPI
uint32_t color0in_one : 1;
uint32_t color1in_one : 1;
uint32_t fog : 1;
- uint32_t pad1 : 7;
+ uint32_t specular_enable : 1;
+ uint32_t pad1 : 6;
uint32_t tc_dim_input: 16; /* 8 * 2 bits */
uint32_t pad2 : 16;
uint32_t tc_dim_output: 24; /* 8 * 3 bits */
ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 224.0f));
ureg_ARL(ureg, AR, ureg_src(tmp));
}
+
+ ureg_MOV(ureg, r[2], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
+ ureg_MOV(ureg, r[3], ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
+
for (i = 0; i < key->vertexblend; ++i) {
for (c = 0; c < 4; ++c) {
cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (224 + i * 4) * !key->vertexblend_indexed + c);
cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
}
/* multiply by WORLD(index) */
- ureg_MUL(ureg, r[0], _XXXX(vs->aVtx), cWM[0]);
- ureg_MAD(ureg, r[0], _YYYY(vs->aVtx), cWM[1], ureg_src(r[0]));
- ureg_MAD(ureg, r[0], _ZZZZ(vs->aVtx), cWM[2], ureg_src(r[0]));
- ureg_MAD(ureg, r[0], _WWWW(vs->aVtx), cWM[3], ureg_src(r[0]));
-
- /* accumulate weighted position value */
- if (i)
- ureg_MAD(ureg, r[2], ureg_src(r[0]), ureg_scalar(vs->aWgt, i), ureg_src(r[2]));
- else
- ureg_MUL(ureg, r[2], ureg_src(r[0]), ureg_scalar(vs->aWgt, 0));
+ ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
+ ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
+ ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
+ ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
+
+ if (i < (key->vertexblend - 1)) {
+ /* accumulate weighted position value */
+ ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(r[2]));
+ /* subtract weighted position value for last value */
+ ureg_SUB(ureg, r[3], ureg_src(r[3]), ureg_scalar(vs->aWgt, i));
+ }
}
+
+ /* the last weighted position is always 1 - sum_of_previous_weights */
+ ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(ureg_src(r[3]), key->vertexblend - 1), ureg_src(r[2]));
+
/* multiply by VIEW_PROJ */
- ureg_MUL(ureg, r[0], _X(r[2]), _CONST(8));
- ureg_MAD(ureg, r[0], _Y(r[2]), _CONST(9), ureg_src(r[0]));
- ureg_MAD(ureg, r[0], _Z(r[2]), _CONST(10), ureg_src(r[0]));
- ureg_MAD(ureg, oPos, _W(r[2]), _CONST(11), ureg_src(r[0]));
+ ureg_MUL(ureg, tmp, _X(r[2]), _CONST(8));
+ ureg_MAD(ureg, tmp, _Y(r[2]), _CONST(9), ureg_src(tmp));
+ ureg_MAD(ureg, tmp, _Z(r[2]), _CONST(10), ureg_src(tmp));
+ ureg_MAD(ureg, oPos, _W(r[2]), _CONST(11), ureg_src(tmp));
if (need_rVtx)
vs->aVtx = ureg_src(r[2]);
ureg_MOV(ureg, oPos, ureg_src(tmp));
} else {
/* position = vertex * WORLD_VIEW_PROJ */
- ureg_MUL(ureg, r[0], _XXXX(vs->aVtx), _CONST(0));
- ureg_MAD(ureg, r[0], _YYYY(vs->aVtx), _CONST(1), ureg_src(r[0]));
- ureg_MAD(ureg, r[0], _ZZZZ(vs->aVtx), _CONST(2), ureg_src(r[0]));
- ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(r[0]));
+ ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
+ ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
+ ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
+ ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
}
if (need_rVtx) {
{
/* hitDir = light.position - eyeVtx
* d = length(hitDir)
- * hitDir /= d
*/
ureg_SUB(ureg, rHit, cLPos, ureg_src(rVtx));
ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
ureg_RSQ(ureg, tmp_y, _X(tmp));
- ureg_MUL(ureg, rHit, ureg_src(rHit), _Y(tmp)); /* normalize */
ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
/* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
ureg_ENDIF(ureg);
+ /* normalize hitDir */
+ ureg_normalize3(ureg, rHit, ureg_src(rHit), tmp);
+
/* if (SPOT light) */
ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
ureg_IF(ureg, _X(tmp), &label[l++]);
/* midVec = normalize(hitDir + eyeDir) */
if (key->localviewer) {
ureg_normalize3(ureg, rMid, ureg_src(rVtx), tmp);
- ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
+ ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid));
} else {
- ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
+ ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
}
ureg_normalize3(ureg, rMid, ureg_src(rMid), tmp);
ureg_DP3(ureg, ureg_saturate(tmp_y), ureg_src(rNrm), ureg_src(rMid));
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE);
}
- ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
+
+ if (key->specular_enable) {
+ /* add oCol[1] to oCol[0] */
+ ureg_MAD(ureg, tmp, ureg_src(rD), vs->mtlD, ureg_src(tmp));
+ ureg_MAD(ureg, oCol[0], ureg_src(rS), vs->mtlS, ureg_src(tmp));
+ } else {
+ ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
+ }
ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
} else
/* COLOR */
reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
break;
case D3DTA_DIFFUSE:
- reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE);
+ reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
break;
case D3DTA_SPECULAR:
- reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
+ reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
break;
case D3DTA_TEMP:
reg = ps->rTmpSrc;
ps.ureg = ureg;
ps.stage.index_pre_mod = -1;
- ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE);
+ ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
/* Declare all TEMPs we might need, serious drivers have a register allocator. */
for (i = 0; i < Elements(ps.r); ++i)
if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
key->ts[s].colorarg1 == D3DTA_SPECULAR ||
key->ts[s].colorarg2 == D3DTA_SPECULAR)
- ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
+ ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
key->ts[s].colorarg1 == D3DTA_TEXTURE ||
if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
key->ts[s].alphaarg2 == D3DTA_SPECULAR)
- ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
+ ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
}
}
if (key->specular)
- ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
+ ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
if (key.fog_mode)
key.fog_range = !key.position_t && state->rs[D3DRS_RANGEFOGENABLE];
+ key.localviewer = !!state->rs[D3DRS_LOCALVIEWER];
+ key.specular_enable = !!state->rs[D3DRS_SPECULARENABLE];
+
if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
key.vertexblend_indexed = !!state->rs[D3DRS_INDEXEDVERTEXBLENDENABLE];
DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps);
/* NOTE: the only reference belongs to the hash table */
- if (!device->state.vs) {
+ if (!state->programmable_vs) {
device->ff.vs = nine_ff_get_vs(device);
device->state.changed.group |= NINE_STATE_VS;
}
device->state.changed.group |= NINE_STATE_PS;
}
- if (!device->state.vs) {
+ if (!state->programmable_vs) {
nine_ff_load_vs_transforms(device);
nine_ff_load_tex_matrices(device);
nine_ff_load_lights(device);
--- /dev/null
+/*
+ * Copyright 2015 Axel Davy <axel.davy@ens.fr>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef _NINE_LIMITS_H_
+#define _NINE_LIMITS_H_
+
+#include "assert.h"
+#include "d3d9types.h"
+
+// state can be any value
+#define NINE_STATE_NO_LIMIT 0
+// value is clamped if below min or max
+#define NINE_STATE_CLAMP 1
+// boolean: 0 -> false; any other value -> true
+#define NINE_STATE_BOOL 2
+// a mask is applied on the value
+#define NINE_STATE_MASK 3
+// if outside a range, state value is changed to a default value
+#define NINE_STATE_RANGE_DEF_VAL 4
+
+struct nine_state_behaviour {
+ unsigned state_value_behaviour;
+ union {
+ struct {
+ unsigned min;
+ unsigned max;
+ } clamp;
+ unsigned mask;
+ struct {
+ unsigned min;
+ unsigned max;
+ unsigned default_val;
+ } range_def_val;
+ } u;
+};
+
+#define __NO_LIMIT_RS(o) \
+ [D3DRS_##o] = {NINE_STATE_NO_LIMIT}
+
+#define __CLAMP_RS(o, m, M) \
+ [D3DRS_##o] = {NINE_STATE_CLAMP, {.clamp = {m, M}}}
+
+#define __BOOLEAN_RS(o) \
+ [D3DRS_##o] = {NINE_STATE_BOOL}
+
+#define __MASK_RS(o, m) \
+ [D3DRS_##o] = {NINE_STATE_MASK, {.mask = m}}
+
+#define __RANGE_DEF_VAL_RS(o, m, M, d) \
+ [D3DRS_##o] = {NINE_STATE_RANGE_DEF_VAL, {.range_def_val = {m, M, d}}}
+
+#define __TO_DETERMINE_RS(o, m, M) \
+ [D3DRS_##o] = {NINE_STATE_NO_LIMIT}
+
+static const struct nine_state_behaviour
+render_state_limits_table[D3DRS_BLENDOPALPHA + 1] = {
+ __TO_DETERMINE_RS(ZENABLE, 0, 3),
+ __TO_DETERMINE_RS(FILLMODE, 1, 3),
+ __CLAMP_RS(SHADEMODE, 1, 3),
+ __BOOLEAN_RS(ZWRITEENABLE),
+ __BOOLEAN_RS(ALPHATESTENABLE),
+ __BOOLEAN_RS(LASTPIXEL),
+ __RANGE_DEF_VAL_RS(SRCBLEND, 1, 17, D3DBLEND_ZERO),
+ __RANGE_DEF_VAL_RS(DESTBLEND, 1, 17, D3DBLEND_ZERO),
+ __CLAMP_RS(CULLMODE, 1, 3),
+ __CLAMP_RS(ZFUNC, 1, 8),
+ __MASK_RS(ALPHAREF, 0x000000FF),
+ __CLAMP_RS(ALPHAFUNC, 1, 8),
+ __BOOLEAN_RS(DITHERENABLE),
+ __BOOLEAN_RS(ALPHABLENDENABLE),
+ __BOOLEAN_RS(FOGENABLE),
+ __BOOLEAN_RS(SPECULARENABLE),
+ __NO_LIMIT_RS(FOGCOLOR),
+ __MASK_RS(FOGTABLEMODE, 0x00000007),
+ __NO_LIMIT_RS(FOGSTART), /* a bit more complex than that, lets ignore */
+ __NO_LIMIT_RS(FOGEND),
+ __NO_LIMIT_RS(FOGDENSITY), /* actually should be between 0.0 and 1.0 */
+ __BOOLEAN_RS(RANGEFOGENABLE),
+ __BOOLEAN_RS(STENCILENABLE),
+ __CLAMP_RS(STENCILFAIL, 1, 8),
+ __CLAMP_RS(STENCILZFAIL, 1, 8),
+ __CLAMP_RS(STENCILPASS, 1, 8),
+ __CLAMP_RS(STENCILFUNC, 1, 8),
+ __NO_LIMIT_RS(STENCILREF),
+ __NO_LIMIT_RS(STENCILMASK),
+ __NO_LIMIT_RS(STENCILWRITEMASK),
+ __NO_LIMIT_RS(TEXTUREFACTOR),
+ __TO_DETERMINE_RS(WRAP0, 0, 15),
+ __TO_DETERMINE_RS(WRAP1, 0, 15),
+ __TO_DETERMINE_RS(WRAP2, 0, 15),
+ __TO_DETERMINE_RS(WRAP3, 0, 15),
+ __TO_DETERMINE_RS(WRAP4, 0, 15),
+ __TO_DETERMINE_RS(WRAP5, 0, 15),
+ __TO_DETERMINE_RS(WRAP6, 0, 15),
+ __TO_DETERMINE_RS(WRAP7, 0, 15),
+ __BOOLEAN_RS(CLIPPING),
+ __BOOLEAN_RS(LIGHTING),
+ __NO_LIMIT_RS(AMBIENT),
+ __MASK_RS(FOGVERTEXMODE, 0x00000007),
+ __BOOLEAN_RS(COLORVERTEX),
+ __BOOLEAN_RS(LOCALVIEWER),
+ __BOOLEAN_RS(NORMALIZENORMALS),
+ __TO_DETERMINE_RS(DIFFUSEMATERIALSOURCE, 0, 2),
+ __TO_DETERMINE_RS(SPECULARMATERIALSOURCE, 0, 2),
+ __TO_DETERMINE_RS(AMBIENTMATERIALSOURCE, 0, 2),
+ __TO_DETERMINE_RS(EMISSIVEMATERIALSOURCE, 0, 2),
+ __TO_DETERMINE_RS(VERTEXBLEND, 0, 256), /* values between 4 and 254 -both included- are forbidden too */
+ __NO_LIMIT_RS(CLIPPLANEENABLE), /* expected check seems complex */
+ __TO_DETERMINE_RS(POINTSIZE, 0, 0xFFFFFFFF),
+ __TO_DETERMINE_RS(POINTSIZE_MIN, 0, 0x7FFFFFFF), /* float >= 0.0 */
+ __BOOLEAN_RS(POINTSPRITEENABLE),
+ __BOOLEAN_RS(POINTSCALEENABLE),
+ __TO_DETERMINE_RS(POINTSCALE_A, 0, 0x7FFFFFFF), /* float >= 0.0 */
+ __TO_DETERMINE_RS(POINTSCALE_B, 0, 0x7FFFFFFF), /* float >= 0.0 */
+ __TO_DETERMINE_RS(POINTSCALE_C, 0, 0x7FFFFFFF), /* float >= 0.0 */
+ __BOOLEAN_RS(MULTISAMPLEANTIALIAS),
+ __NO_LIMIT_RS(MULTISAMPLEMASK),
+ __TO_DETERMINE_RS(PATCHEDGESTYLE, 0, 1),
+ __TO_DETERMINE_RS(DEBUGMONITORTOKEN, 0, 1),
+ __TO_DETERMINE_RS(POINTSIZE_MAX, 0, 0x7FFFFFFF), /* check more complex than that */
+ __BOOLEAN_RS(INDEXEDVERTEXBLENDENABLE),
+ __TO_DETERMINE_RS(COLORWRITEENABLE, 0, 15),
+ __NO_LIMIT_RS(TWEENFACTOR),
+ __CLAMP_RS(BLENDOP, 1, 5),
+ __TO_DETERMINE_RS(POSITIONDEGREE, 1, 5), /* can actually be only 1 or 5 */
+ __TO_DETERMINE_RS(NORMALDEGREE, 1, 2),
+ __BOOLEAN_RS(SCISSORTESTENABLE),
+ __NO_LIMIT_RS(SLOPESCALEDEPTHBIAS),
+ __BOOLEAN_RS(ANTIALIASEDLINEENABLE),
+ __NO_LIMIT_RS(MINTESSELLATIONLEVEL),
+ __NO_LIMIT_RS(MAXTESSELLATIONLEVEL),
+ __NO_LIMIT_RS(ADAPTIVETESS_X),
+ __NO_LIMIT_RS(ADAPTIVETESS_Y),
+ __NO_LIMIT_RS(ADAPTIVETESS_Z),
+ __NO_LIMIT_RS(ADAPTIVETESS_W),
+ __BOOLEAN_RS(ENABLEADAPTIVETESSELLATION),
+ __BOOLEAN_RS(TWOSIDEDSTENCILMODE),
+ __CLAMP_RS(CCW_STENCILFAIL, 1, 8),
+ __CLAMP_RS(CCW_STENCILZFAIL, 1, 8),
+ __CLAMP_RS(CCW_STENCILPASS, 1, 8),
+ __CLAMP_RS(CCW_STENCILFUNC, 1, 8),
+ __TO_DETERMINE_RS(COLORWRITEENABLE1, 0, 15),
+ __TO_DETERMINE_RS(COLORWRITEENABLE2, 0, 15),
+ __TO_DETERMINE_RS(COLORWRITEENABLE3, 0, 15),
+ __NO_LIMIT_RS(BLENDFACTOR),
+ __BOOLEAN_RS(SRGBWRITEENABLE),
+ __NO_LIMIT_RS(DEPTHBIAS),
+ __TO_DETERMINE_RS(WRAP8, 0, 15),
+ __TO_DETERMINE_RS(WRAP9, 0, 15),
+ __TO_DETERMINE_RS(WRAP10, 0, 15),
+ __TO_DETERMINE_RS(WRAP11, 0, 15),
+ __TO_DETERMINE_RS(WRAP12, 0, 15),
+ __TO_DETERMINE_RS(WRAP13, 0, 15),
+ __TO_DETERMINE_RS(WRAP14, 0, 15),
+ __TO_DETERMINE_RS(WRAP15, 0, 15),
+ __BOOLEAN_RS(SEPARATEALPHABLENDENABLE),
+ __RANGE_DEF_VAL_RS(SRCBLENDALPHA, 1, 17, D3DBLEND_ZERO),
+ __RANGE_DEF_VAL_RS(DESTBLENDALPHA, 1, 17, D3DBLEND_ZERO),
+ __CLAMP_RS(BLENDOPALPHA, 1, 5)
+};
+
+static DWORD inline
+nine_fix_render_state_value(D3DRENDERSTATETYPE State,
+ DWORD Value)
+{
+ struct nine_state_behaviour behaviour = render_state_limits_table[State];
+
+ switch (behaviour.state_value_behaviour) {
+ case NINE_STATE_NO_LIMIT:
+ break;
+ case NINE_STATE_CLAMP:
+ if (Value < behaviour.u.clamp.min)
+ Value = behaviour.u.clamp.min;
+ else if (Value > behaviour.u.clamp.max)
+ Value = behaviour.u.clamp.max;
+ break;
+ case NINE_STATE_BOOL:
+ Value = Value ? 1 : 0;
+ break;
+ case NINE_STATE_MASK:
+ Value = Value & behaviour.u.mask;
+ break;
+ case NINE_STATE_RANGE_DEF_VAL:
+ if (Value < behaviour.u.range_def_val.min || Value > behaviour.u.range_def_val.max)
+ Value = behaviour.u.range_def_val.default_val;
+ break;
+ }
+
+ return Value;
+}
+
+#endif /* _NINE_HELPERS_H_ */
struct pheader
{
boolean unknown;
+ GUID guid;
DWORD size;
char data[1];
};
}
nine_convert_blend_state_fixup(&blend, rs); /* for BOTH[INV]SRCALPHA */
}
+
blend.rt[0].colormask = rs[D3DRS_COLORWRITEENABLE];
if (rs[D3DRS_COLORWRITEENABLE1] != rs[D3DRS_COLORWRITEENABLE] ||
samp.wrap_s = d3dtextureaddress_to_pipe_tex_wrap(ss[D3DSAMP_ADDRESSU]);
samp.wrap_t = d3dtextureaddress_to_pipe_tex_wrap(ss[D3DSAMP_ADDRESSV]);
samp.wrap_r = d3dtextureaddress_to_pipe_tex_wrap(ss[D3DSAMP_ADDRESSW]);
- samp.min_img_filter = ss[D3DSAMP_MINFILTER] == D3DTEXF_POINT ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
- samp.mag_img_filter = ss[D3DSAMP_MAGFILTER] == D3DTEXF_POINT ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
+ samp.min_img_filter = (ss[D3DSAMP_MINFILTER] == D3DTEXF_POINT && !ss[NINED3DSAMP_SHADOW]) ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
+ samp.mag_img_filter = (ss[D3DSAMP_MAGFILTER] == D3DTEXF_POINT && !ss[NINED3DSAMP_SHADOW]) ? PIPE_TEX_FILTER_NEAREST : PIPE_TEX_FILTER_LINEAR;
if (ss[D3DSAMP_MINFILTER] == D3DTEXF_ANISOTROPIC ||
ss[D3DSAMP_MAGFILTER] == D3DTEXF_ANISOTROPIC)
samp.max_anisotropy = ss[D3DSAMP_MAXANISOTROPY];
const enum pipe_format nine_d3d9_to_pipe_format_map[120] =
{
[D3DFMT_UNKNOWN] = PIPE_FORMAT_NONE,
- [D3DFMT_R8G8B8] = PIPE_FORMAT_NONE,
+ [D3DFMT_R8G8B8] = PIPE_FORMAT_R8G8B8_UNORM,
[D3DFMT_A8R8G8B8] = PIPE_FORMAT_B8G8R8A8_UNORM,
[D3DFMT_X8R8G8B8] = PIPE_FORMAT_B8G8R8X8_UNORM,
[D3DFMT_R5G6B5] = PIPE_FORMAT_B5G6R5_UNORM,
const D3DFORMAT nine_pipe_to_d3d9_format_map[PIPE_FORMAT_COUNT] =
{
[PIPE_FORMAT_NONE] = D3DFMT_UNKNOWN,
-
-/* [PIPE_FORMAT_B8G8R8_UNORM] = D3DFMT_R8G8B8, */
+ /* TODO: rename PIPE_FORMAT_R8G8B8_UNORM to PIPE_FORMAT_B8G8R8_UNORM */
+ [PIPE_FORMAT_R8G8B8_UNORM] = D3DFMT_R8G8B8,
[PIPE_FORMAT_B8G8R8A8_UNORM] = D3DFMT_A8R8G8B8,
[PIPE_FORMAT_B8G8R8X8_UNORM] = D3DFMT_X8R8G8B8,
[PIPE_FORMAT_B5G6R5_UNORM] = D3DFMT_R5G6B5,
/* the address register (vs only) must be
* assigned before use */
assert(!ureg_dst_is_undef(tx->regs.a0));
- ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
+ /* Round to lowest for vs1.1 (contrary to the doc), else
+ * round to nearest */
+ if (tx->version.major < 2 && tx->version.minor < 2)
+ ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
+ else
+ ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
src = ureg_src(tx->regs.address);
} else {
if (tx->version.major < 2 && tx->version.minor < 4) {
} else {
if (tx->version.major < 3) {
assert(!param->rel);
- src = ureg_DECL_fs_input(tx->ureg, TGSI_SEMANTIC_COLOR,
- param->idx,
- TGSI_INTERPOLATE_PERSPECTIVE);
+ src = ureg_DECL_fs_input_cyl_centroid(
+ ureg, TGSI_SEMANTIC_COLOR, param->idx,
+ TGSI_INTERPOLATE_COLOR, 0,
+ tx->info->force_color_in_centroid ?
+ TGSI_INTERPOLATE_LOC_CENTROID : 0,
+ 0, 1);
} else {
assert(!param->rel); /* TODO */
assert(param->idx < Elements(tx->regs.v));
assert(!param->rel);
tx->info->rt_mask |= 1 << param->idx;
if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
- /* ps < 3: oCol[0] will have fog blending afterward
- * vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
+ /* ps < 3: oCol[0] will have fog blending afterward */
if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
- } else if (IS_VS && tx->version.major < 3 && param->idx == 1) {
- tx->regs.oCol[1] = ureg_DECL_temporary(tx->ureg);
} else {
tx->regs.oCol[param->idx] =
ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
return D3D_OK;
}
-DECL_SPECIAL(MOV_vs1x)
-{
- if (tx->insn.dst[0].file == D3DSPR_ADDR) {
- /* Implementation note: We don't write directly
- * to the addr register, but to an intermediate
- * float register.
- * Contrary to the doc, when writing to ADDR here,
- * the rounding is not to nearest, but to lowest
- * (wine test).
- * Since we use ARR next, substract 0.5. */
- ureg_SUB(tx->ureg,
- tx_dst_param(tx, &tx->insn.dst[0]),
- tx_src_param(tx, &tx->insn.src[0]),
- ureg_imm1f(tx->ureg, 0.5f));
- return D3D_OK;
- }
- return NineTranslateInstruction_Generic(tx);
-}
-
DECL_SPECIAL(LOOP)
{
struct ureg_program *ureg = tx->ureg;
return TGSI_INTERPOLATE_LINEAR;
case TGSI_SEMANTIC_BCOLOR:
case TGSI_SEMANTIC_COLOR:
+ return TGSI_INTERPOLATE_COLOR;
case TGSI_SEMANTIC_FOG:
case TGSI_SEMANTIC_GENERIC:
case TGSI_SEMANTIC_TEXCOORD:
}
} else {
if (is_input && tx->version.major >= 3) {
+ unsigned interp_location = 0;
/* SM3 only, SM2 input semantic determined by file */
assert(sem.reg.idx < Elements(tx->regs.v));
+ if (sem.reg.mod & NINED3DSPDM_CENTROID ||
+ (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
+ interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
ureg, tgsi.Name, tgsi.Index,
nine_tgsi_to_interp_mode(&tgsi),
0, /* cylwrap */
- sem.reg.mod & NINED3DSPDM_CENTROID, 0, 1);
+ interp_location, 0, 1);
} else
if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
/* FragColor or FragDepth */
struct sm1_op_info inst_table[] =
{
_OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, NULL), /* 0 */
- _OPI(MOV, MOV, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, SPECIAL(MOV_vs1x)),
- _OPI(MOV, MOV, V(2,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
+ _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
_OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
_OPI(SUB, SUB, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 3 */
_OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
}
- /* vs < 3: oD1.w (D3DPMISCCAPS_FOGANDSPECULARALPHA) set to 0 even if set */
- if (IS_VS && tx->version.major < 3 && !ureg_dst_is_undef(tx->regs.oCol[1])) {
- struct ureg_dst dst = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 1);
- ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oCol[1]));
- ureg_MOV(tx->ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 0.0f));
- }
-
if (info->position_t)
ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
uint8_t fog_enable;
uint8_t fog_mode;
+ uint8_t force_color_in_centroid;
uint16_t projected; /* ps 1.1 to 1.3 */
unsigned const_i_base; /* in vec4 (16 byte) units */
uint32_t changed_group = 0;
int has_key_changed = 0;
- if (likely(vs))
+ if (likely(state->programmable_vs))
has_key_changed = NineVertexShader9_UpdateKey(vs, state);
if (!shader_changed && !has_key_changed)
return 0;
/* likely because we dislike FF */
- if (likely(vs)) {
+ if (likely(state->programmable_vs)) {
state->cso.vs = NineVertexShader9_GetVariant(vs);
} else {
vs = device->ff.vs;
/* State preparation + State commit */
-static uint32_t
-update_framebuffer(struct NineDevice9 *device)
+static void
+update_framebuffer(struct NineDevice9 *device, bool is_clear)
{
struct pipe_context *pipe = device->pipe;
struct nine_state *state = &device->state;
unsigned w = rt0->desc.Width;
unsigned h = rt0->desc.Height;
D3DMULTISAMPLE_TYPE nr_samples = rt0->desc.MultiSampleType;
- unsigned mask = state->ps ? state->ps->rt_mask : 1;
+ unsigned ps_mask = state->ps ? state->ps->rt_mask : 1;
+ unsigned mask = is_clear ? 0xf : ps_mask;
const int sRGB = state->rs[D3DRS_SRGBWRITEENABLE] ? 1 : 0;
DBG("\n");
pipe->set_framebuffer_state(pipe, fb); /* XXX: cso ? */
- return state->changed.group;
+ if (is_clear && state->rt_mask == ps_mask)
+ state->changed.group &= ~NINE_STATE_FB;
}
static void
update_viewport(struct NineDevice9 *device)
{
- struct pipe_context *pipe = device->pipe;
const D3DVIEWPORT9 *vport = &device->state.viewport;
struct pipe_viewport_state pvport;
pvport.translate[1] -= 1.0f / 128.0f;
}
- pipe->set_viewport_states(pipe, 0, 1, &pvport);
+ cso_set_viewport(device->cso, &pvport);
}
/* Loop through VS inputs and pick the vertex elements with the declared
state->stream_usage_mask = 0;
memset(vdecl_index_map, -1, 16);
memset(used_streams, 0, device->caps.MaxStreams);
- vs = device->state.vs ? device->state.vs : device->ff.vs;
+ vs = state->programmable_vs ? device->state.vs : device->ff.vs;
if (vdecl) {
for (n = 0; n < vs->num_inputs; ++n) {
cso_single_sampler_done(device->cso, PIPE_SHADER_FRAGMENT);
commit_samplers = FALSE;
- sampler_mask = state->vs ? state->vs->sampler_mask : 0;
+ sampler_mask = state->programmable_vs ? state->vs->sampler_mask : 0;
state->bound_samplers_mask_vs = 0;
for (num_textures = 0, i = 0; i < NINE_MAX_SAMPLERS_VS; ++i) {
const unsigned s = NINE_SAMPLER_VS(i);
{
struct pipe_context *pipe = device->pipe;
- if (unlikely(!device->state.vs))
+ if (unlikely(!device->state.programmable_vs))
pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs_ff);
else
pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &device->state.pipe.cb_vs);
NINE_STATE_DSA | \
NINE_STATE_VIEWPORT | \
NINE_STATE_VDECL | \
- NINE_STATE_IDXBUF)
+ NINE_STATE_IDXBUF | \
+ NINE_STATE_STREAMFREQ)
#define NINE_STATE_RARE \
(NINE_STATE_SCISSOR | \
}
void
-nine_update_state_framebuffer(struct NineDevice9 *device)
+nine_update_state_framebuffer_clear(struct NineDevice9 *device)
{
struct nine_state *state = &device->state;
validate_textures(device);
if (state->changed.group & NINE_STATE_FB)
- update_framebuffer(device);
-
- state->changed.group &= ~NINE_STATE_FB;
+ update_framebuffer(device, TRUE);
}
boolean
validate_textures(device); /* may clobber state */
/* ff_update may change VS/PS dirty bits */
- if (unlikely(!state->vs || !state->ps))
+ if (unlikely(!state->programmable_vs || !state->ps))
nine_ff_update(device);
group = state->changed.group;
if (group & (NINE_STATE_COMMON | NINE_STATE_VS)) {
if (group & NINE_STATE_FB)
- group |= update_framebuffer(device); /* may set NINE_STATE_RASTERIZER */
+ update_framebuffer(device, FALSE);
if (group & NINE_STATE_BLEND)
prepare_blend(device);
if (group & NINE_STATE_DSA)
prepare_dsa(device);
if (group & NINE_STATE_VIEWPORT)
update_viewport(device);
- if ((group & (NINE_STATE_VDECL | NINE_STATE_VS)) ||
- state->changed.stream_freq & ~1)
+ if (group & (NINE_STATE_VDECL | NINE_STATE_VS | NINE_STATE_STREAMFREQ))
update_vertex_elements(device);
if (group & NINE_STATE_IDXBUF)
commit_index_buffer(device);
if (group & (NINE_STATE_TEXTURE | NINE_STATE_SAMPLER))
update_textures_and_samplers(device);
if (device->prefer_user_constbuf) {
- if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->vs)
+ if ((group & (NINE_STATE_VS_CONST | NINE_STATE_VS)) && state->programmable_vs)
prepare_vs_constants_userbuf(device);
if ((group & (NINE_STATE_PS_CONST | NINE_STATE_PS)) && state->ps)
prepare_ps_constants_userbuf(device);
} else {
- if ((group & NINE_STATE_VS_CONST) && state->vs)
+ if ((group & NINE_STATE_VS_CONST) && state->programmable_vs)
upload_constants(device, PIPE_SHADER_VERTEX);
if ((group & NINE_STATE_PS_CONST) && state->ps)
upload_constants(device, PIPE_SHADER_FRAGMENT);
*/
state->rs[D3DRS_POINTSIZE_MAX] = fui(caps->MaxPointSize);
+ memcpy(state->rs_advertised, state->rs, sizeof(state->rs));
+
/* Set changed flags to initialize driver.
*/
state->changed.group = NINE_STATE_ALL;
nine_bind(&state->vs, NULL);
nine_bind(&state->ps, NULL);
nine_bind(&state->vdecl, NULL);
- for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
+ for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) {
nine_bind(&state->stream[i], NULL);
+ pipe_resource_reference(&state->vtxbuf[i].buffer, NULL);
+ }
nine_bind(&state->idxbuf, NULL);
for (i = 0; i < NINE_MAX_SAMPLERS; ++i) {
if (device &&
#define NINE_STATE_SAMPLER (1 << 11)
#define NINE_STATE_VDECL (1 << 12)
#define NINE_STATE_IDXBUF (1 << 13)
-#define NINE_STATE_PRIM (1 << 14)
-#define NINE_STATE_MATERIAL (1 << 15)
-#define NINE_STATE_BLEND_COLOR (1 << 16)
-#define NINE_STATE_STENCIL_REF (1 << 17)
-#define NINE_STATE_SAMPLE_MASK (1 << 18)
-#define NINE_STATE_FF (0x1f << 19)
-#define NINE_STATE_FF_VS (0x17 << 19)
-#define NINE_STATE_FF_PS (0x18 << 19)
-#define NINE_STATE_FF_LIGHTING (1 << 19)
-#define NINE_STATE_FF_MATERIAL (1 << 20)
-#define NINE_STATE_FF_VSTRANSF (1 << 21)
-#define NINE_STATE_FF_PSSTAGES (1 << 22)
-#define NINE_STATE_FF_OTHER (1 << 23)
-#define NINE_STATE_FOG_SHADER (1 << 24)
-#define NINE_STATE_PS1X_SHADER (1 << 25)
-#define NINE_STATE_ALL 0x3ffffff
-#define NINE_STATE_UNHANDLED (1 << 26)
+#define NINE_STATE_STREAMFREQ (1 << 14)
+#define NINE_STATE_PRIM (1 << 15)
+#define NINE_STATE_MATERIAL (1 << 16)
+#define NINE_STATE_BLEND_COLOR (1 << 17)
+#define NINE_STATE_STENCIL_REF (1 << 18)
+#define NINE_STATE_SAMPLE_MASK (1 << 19)
+#define NINE_STATE_FF (0x1f << 20)
+#define NINE_STATE_FF_VS (0x17 << 20)
+#define NINE_STATE_FF_PS (0x18 << 20)
+#define NINE_STATE_FF_LIGHTING (1 << 20)
+#define NINE_STATE_FF_MATERIAL (1 << 21)
+#define NINE_STATE_FF_VSTRANSF (1 << 22)
+#define NINE_STATE_FF_PSSTAGES (1 << 23)
+#define NINE_STATE_FF_OTHER (1 << 24)
+#define NINE_STATE_FOG_SHADER (1 << 25)
+#define NINE_STATE_PS1X_SHADER (1 << 26)
+#define NINE_STATE_ALL 0x7ffffff
+#define NINE_STATE_UNHANDLED (1 << 27)
#define NINE_STATE_COMMIT_DSA (1 << 0)
#define NINE_STATE_COMMIT_RASTERIZER (1 << 1)
int vs_const_i[NINE_MAX_CONST_I][4];
BOOL vs_const_b[NINE_MAX_CONST_B];
float *vs_lconstf_temp;
+ BOOL programmable_vs;
struct NinePixelShader9 *ps;
float *ps_const_f;
uint8_t rt_mask;
DWORD rs[NINED3DRS_COUNT];
+ DWORD rs_advertised[NINED3DRS_COUNT]; /* the ones apps get with GetRenderState */
struct NineBaseTexture9 *texture[NINE_MAX_SAMPLERS]; /* PS, DMAP, VS */
struct NineDevice9;
-void nine_update_state_framebuffer(struct NineDevice9 *);
+void nine_update_state_framebuffer_clear(struct NineDevice9 *);
boolean nine_update_state(struct NineDevice9 *);
void nine_state_restore_non_cso(struct NineDevice9 *device);
info.sampler_ps1xtypes = key;
info.fog_enable = device->state.rs[D3DRS_FOGENABLE];
info.fog_mode = device->state.rs[D3DRS_FOGTABLEMODE];
+ info.force_color_in_centroid = key >> 34 & 1;
info.projected = (key >> 48) & 0xffff;
hr = nine_translate_shader(This->base.device, &info);
#include "nine_state.h"
#include "basetexture9.h"
#include "nine_ff.h"
+#include "surface9.h"
struct nine_lconstf;
key |= ((uint64_t)state->rs[D3DRS_FOGTABLEMODE]) << 33;
}
+ /* centroid interpolation automatically used for color ps inputs */
+ if (state->rt[0]->desc.MultiSampleType > 1)
+ key |= ((uint64_t)1) << 34;
+
if (unlikely(ps->byte_code.version < 0x14)) {
projected = nine_ff_get_projected_key(state);
key |= ((uint64_t) projected) << 48;
#include "util/u_hash_table.h"
#include "util/u_inlines.h"
+#include "util/u_resource.h"
#include "nine_pdata.h"
#define DBG_CHANNEL DBG_RESOURCE
-
HRESULT
NineResource9_ctor( struct NineResource9 *This,
struct NineUnknownParams *pParams,
if (Allocate) {
assert(!initResource);
+
+ /* On Windows it is possible allocation fails when
+ * IDirect3DDevice9::GetAvailableTextureMem() still reports
+ * enough free space.
+ *
+ * Some games allocate surfaces
+ * in a loop until they receive D3DERR_OUTOFVIDEOMEMORY to measure
+ * the available texture memory size.
+ *
+ * We are not using the drivers VRAM statistics because:
+ * * This would add overhead to each resource allocation.
+ * * Freeing memory is lazy and takes some time, but applications
+ * expects the memory counter to change immediately after allocating
+ * or freeing memory.
+ *
+ * Vertexbuffers and indexbuffers are not accounted !
+ */
+ if (This->info.target != PIPE_BUFFER) {
+ This->size = util_resource_size(&This->info);
+
+ This->base.device->available_texture_mem -= This->size;
+ if (This->base.device->available_texture_mem <=
+ This->base.device->available_texture_limit) {
+ return D3DERR_OUTOFVIDEOMEMORY;
+ }
+ }
+
DBG("(%p) Creating pipe_resource.\n", This);
This->resource = screen->resource_create(screen, &This->info);
if (!This->resource)
* still hold a reference. */
pipe_resource_reference(&This->resource, NULL);
+ /* NOTE: size is 0, unless something has actually been allocated */
+ if (This->base.device)
+ This->base.device->available_texture_mem += This->size;
+
NineUnknown_dtor(&This->base);
}
enum pipe_error err;
struct pheader *header;
const void *user_data = pData;
+ char guid_str[64];
- DBG("This=%p refguid=%p pData=%p SizeOfData=%u Flags=%x\n",
- This, refguid, pData, SizeOfData, Flags);
+ DBG("This=%p GUID=%s pData=%p SizeOfData=%u Flags=%x\n",
+ This, GUID_sprintf(guid_str, refguid), pData, SizeOfData, Flags);
if (Flags & D3DSPD_IUNKNOWN)
user_assert(SizeOfData == sizeof(IUnknown *), D3DERR_INVALIDCALL);
header->size = SizeOfData;
memcpy(header->data, user_data, header->size);
+ memcpy(&header->guid, refguid, sizeof(header->guid));
- err = util_hash_table_set(This->pdata, refguid, header);
+ err = util_hash_table_set(This->pdata, &header->guid, header);
if (err == PIPE_OK) {
if (header->unknown) { IUnknown_AddRef(*(IUnknown **)header->data); }
return D3D_OK;
{
struct pheader *header;
DWORD sizeofdata;
+ char guid_str[64];
- DBG("This=%p refguid=%p pData=%p pSizeOfData=%p\n",
- This, refguid, pData, pSizeOfData);
+ DBG("This=%p GUID=%s pData=%p pSizeOfData=%p\n",
+ This, GUID_sprintf(guid_str, refguid), pData, pSizeOfData);
header = util_hash_table_get(This->pdata, refguid);
if (!header) { return D3DERR_NOTFOUND; }
REFGUID refguid )
{
struct pheader *header;
+ char guid_str[64];
- DBG("This=%p refguid=%p\n", This, refguid);
+ DBG("This=%p GUID=%s\n", This, GUID_sprintf(guid_str, refguid));
header = util_hash_table_get(This->pdata, refguid);
if (!header)
/* for [GS]etPrivateData/FreePrivateData */
struct util_hash_table *pdata;
+
+ long long size;
};
static inline struct NineResource9 *
NineResource9( void *data )
#include "device9.h"
#include "basetexture9.h"
#include "nine_helpers.h"
+#include "vertexdeclaration9.h"
#define DBG_CHANNEL DBG_STATEBLOCK
const int r = ffs(m) - 1;
m &= ~(1 << r);
dst->rs[i * 32 + r] = src->rs[i * 32 + r];
+ dst->rs_advertised[i * 32 + r] = src->rs_advertised[i * 32 + r];
}
}
nine_bind(&dst->stream[i], src->stream[i]);
if (src->stream[i]) {
dst->vtxbuf[i].buffer_offset = src->vtxbuf[i].buffer_offset;
- dst->vtxbuf[i].buffer = src->vtxbuf[i].buffer;
+ pipe_resource_reference(&dst->vtxbuf[i].buffer, src->vtxbuf[i].buffer);
dst->vtxbuf[i].stride = src->vtxbuf[i].stride;
}
}
dst->ff.light = REALLOC(dst->ff.light,
dst->ff.num_lights * sizeof(D3DLIGHT9),
mask->ff.num_lights * sizeof(D3DLIGHT9));
+ for (i = dst->ff.num_lights; i < mask->ff.num_lights; ++i) {
+ memset(&dst->ff.light[i], 0, sizeof(D3DLIGHT9));
+ dst->ff.light[i].Type = (D3DLIGHTTYPE)NINED3DLIGHT_INVALID;
+ }
dst->ff.num_lights = mask->ff.num_lights;
}
for (i = 0; i < mask->ff.num_lights; ++i)
/* Render states. */
memcpy(dst->rs, src->rs, sizeof(dst->rs));
+ memcpy(dst->rs_advertised, src->rs_advertised, sizeof(dst->rs_advertised));
if (apply)
memcpy(dst->changed.rs, src->changed.rs, sizeof(dst->changed.rs));
nine_bind(&dst->stream[i], src->stream[i]);
if (src->stream[i]) {
dst->vtxbuf[i].buffer_offset = src->vtxbuf[i].buffer_offset;
- dst->vtxbuf[i].buffer = src->vtxbuf[i].buffer;
+ pipe_resource_reference(&dst->vtxbuf[i].buffer, src->vtxbuf[i].buffer);
dst->vtxbuf[i].stride = src->vtxbuf[i].stride;
}
dst->stream_freq[i] = src->stream_freq[i];
nine_state_copy_common(dst, src, src, TRUE, pool);
if ((src->changed.group & NINE_STATE_VDECL) && src->vdecl)
- nine_bind(&dst->vdecl, src->vdecl);
+ NineDevice9_SetVertexDeclaration(This->base.device, (IDirect3DVertexDeclaration9 *)src->vdecl);
+
+ /* Recomputing it is needed if we changed vs but not vdecl */
+ dst->programmable_vs = dst->vs && !(dst->vdecl && dst->vdecl->position_t);
/* Textures */
if (src->changed.texture) {
D3DSURFACE_DESC *pDesc )
{
HRESULT hr;
+ union pipe_color_union rgba = {0};
+ struct pipe_surface *surf;
+ struct pipe_context *pipe = pParams->device->pipe;
DBG("This=%p pDevice=%p pResource=%p Level=%u Layer=%u pDesc=%p\n",
This, pParams->device, pResource, Level, Layer, pDesc);
if (pResource && NineSurface9_IsOffscreenPlain(This))
pResource->flags |= NINE_RESOURCE_FLAG_LOCKABLE;
+ /* TODO: investigate what else exactly needs to be cleared */
+ if (This->base.resource && (pDesc->Usage & D3DUSAGE_RENDERTARGET)) {
+ surf = NineSurface9_GetSurface(This, 0);
+ pipe->clear_render_target(pipe, surf, &rgba, 0, 0, pDesc->Width, pDesc->Height);
+ }
+
NineSurface9_Dump(This);
return D3D_OK;
/* Release system memory when we have to manage it (no parent) */
if (!This->base.base.container && This->data)
- FREE(This->data);
+ align_free(This->data);
NineResource9_dtor(&This->base);
}
D3DERR_INVALIDCALL);
if (pRect && This->desc.Pool == D3DPOOL_DEFAULT &&
- compressed_format (This->desc.Format)) {
+ util_format_is_compressed(This->base.info.format)) {
const unsigned w = util_format_get_blockwidth(This->base.info.format);
const unsigned h = util_format_get_blockheight(This->base.info.format);
user_assert((pRect->left == 0 && pRect->right == This->desc.Width &&
* and bpp 8, and the app has a workaround to work with the fact
* that it is actually compressed. */
if (is_ATI1_ATI2(This->base.info.format)) {
- pLockedRect->Pitch = This->desc.Height;
- pLockedRect->pBits = This->data + box.y * This->desc.Height + box.x;
+ pLockedRect->Pitch = This->desc.Width;
+ pLockedRect->pBits = This->data + box.y * This->desc.Width + box.x;
} else {
pLockedRect->Pitch = This->stride;
pLockedRect->pBits = NineSurface9_GetSystemMemPointer(This,
DBG("This=%p pParams=%p\n", This, pParams);
user_assert(pParams != NULL, E_POINTER);
+ user_assert(pParams->SwapEffect, D3DERR_INVALIDCALL);
+ user_assert((pParams->SwapEffect != D3DSWAPEFFECT_COPY) ||
+ (pParams->BackBufferCount <= 1), D3DERR_INVALIDCALL);
+ user_assert(pDevice->ex || pParams->BackBufferCount <= 3, D3DERR_INVALIDCALL);
+ user_assert(pDevice->ex ||
+ (pParams->SwapEffect == D3DSWAPEFFECT_FLIP) ||
+ (pParams->SwapEffect == D3DSWAPEFFECT_COPY) ||
+ (pParams->SwapEffect == D3DSWAPEFFECT_DISCARD), D3DERR_INVALIDCALL);
DBG("pParams(%p):\n"
"BackBufferWidth: %u\n"
pParams->FullScreen_RefreshRateInHz,
pParams->PresentationInterval);
- if (pParams->SwapEffect == D3DSWAPEFFECT_COPY &&
- pParams->BackBufferCount > 1) {
- pParams->BackBufferCount = 1;
- }
-
if (pParams->BackBufferCount > 3) {
pParams->BackBufferCount = 3;
}
This->pipe->blit(This->pipe, &blit);
}
+ /* The resource we present has to resolve fast clears
+ * if needed (and other things) */
+ This->pipe->flush_resource(This->pipe, resource);
+
if (This->params.SwapEffect != D3DSWAPEFFECT_DISCARD)
handle_draw_cursor_and_hud(This, resource);
return D3DERR_WASSTILLDRAWING;
}
- if (This->present_buffers)
- resource = This->present_buffers[0];
- else
- resource = This->buffers[0]->base.resource;
- This->pipe->flush_resource(This->pipe, resource);
-
if (!This->enable_threadpool) {
This->tasks[0]=NULL;
fence = swap_fences_pop_front(This);
if (hr == D3DERR_WASSTILLDRAWING)
return hr;
+ if (This->base.device->ex) {
+ if (NineSwapChain9_GetOccluded(This)) {
+ return S_PRESENT_OCCLUDED;
+ }
+ } else {
+ if (NineSwapChain9_GetOccluded(This)) {
+ This->base.device->device_needs_reset = TRUE;
+ }
+ if (This->base.device->device_needs_reset) {
+ return D3DERR_DEVICELOST;
+ }
+ }
+
switch (This->params.SwapEffect) {
case D3DSWAPEFFECT_FLIP:
UNTESTED(4);
ID3DPresent_WaitBufferReleased(This->present, This->present_handles[0]);
This->base.device->state.changed.group |= NINE_STATE_FB;
- nine_update_state_framebuffer(This->base.device);
return hr;
}
DBG("GetBackBuffer: This=%p iBackBuffer=%d Type=%d ppBackBuffer=%p\n",
This, iBackBuffer, Type, ppBackBuffer);
(void)user_error(Type == D3DBACKBUFFER_TYPE_MONO);
+ /* don't touch ppBackBuffer on error */
+ user_assert(ppBackBuffer != NULL, D3DERR_INVALIDCALL);
user_assert(iBackBuffer < This->params.BackBufferCount, D3DERR_INVALIDCALL);
- user_assert(ppBackBuffer != NULL, E_POINTER);
NineUnknown_AddRef(NineUnknown(This->buffers[iBackBuffer]));
*ppBackBuffer = (IDirect3DSurface9 *)This->buffers[iBackBuffer];
implicit, pPresent, pPresentationParameters,
pCTX, hFocusWindow, NULL);
}
+
+BOOL
+NineSwapChain9_GetOccluded( struct NineSwapChain9 *This )
+{
+ if (This->base.device->minor_version_num > 0) {
+ return ID3DPresent_GetWindowOccluded(This->present);
+ }
+
+ return FALSE;
+}
NineSwapChain9_GetPresentParameters( struct NineSwapChain9 *This,
D3DPRESENT_PARAMETERS *pPresentationParameters );
+BOOL
+NineSwapChain9_GetOccluded( struct NineSwapChain9 *This );
+
#endif /* _NINE_SWAPCHAIN9_H_ */
}
if (This->managed_buffer)
- FREE(This->managed_buffer);
+ align_free(This->managed_buffer);
NineBaseTexture9_dtor(&This->base);
}
struct NineUnknownParams *pParams,
D3DVERTEXBUFFER_DESC *pDesc )
{
- struct pipe_resource *info = &This->base.info;
HRESULT hr;
DBG("This=%p Size=0x%x Usage=%x Pool=%u\n", This,
pDesc->Size, pDesc->Usage, pDesc->Pool);
- user_assert(pDesc->Pool != D3DPOOL_SCRATCH, D3DERR_INVALIDCALL);
-
- This->maps = MALLOC(sizeof(struct pipe_transfer *));
- if (!This->maps)
- return E_OUTOFMEMORY;
- This->nmaps = 0;
- This->maxmaps = 1;
-
- This->pipe = pParams->device->pipe;
-
- info->screen = pParams->device->screen;
- info->target = PIPE_BUFFER;
- info->format = PIPE_FORMAT_R8_UNORM;
- info->width0 = pDesc->Size;
- info->flags = 0;
-
- info->bind = PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_TRANSFER_WRITE;
- if (!(pDesc->Usage & D3DUSAGE_WRITEONLY))
- info->bind |= PIPE_BIND_TRANSFER_READ;
-
- info->usage = PIPE_USAGE_DEFAULT;
- if (pDesc->Usage & D3DUSAGE_DYNAMIC)
- info->usage = PIPE_USAGE_STREAM;
- if (pDesc->Pool == D3DPOOL_SYSTEMMEM)
- info->usage = PIPE_USAGE_STAGING;
-
- /* if (pDesc->Usage & D3DUSAGE_DONOTCLIP) { } */
- /* if (pDesc->Usage & D3DUSAGE_NONSECURE) { } */
- /* if (pDesc->Usage & D3DUSAGE_NPATCHES) { } */
- /* if (pDesc->Usage & D3DUSAGE_POINTS) { } */
- /* if (pDesc->Usage & D3DUSAGE_RTPATCHES) { } */
- if (pDesc->Usage & D3DUSAGE_SOFTWAREPROCESSING)
- DBG("Application asked for Software Vertex Processing, "
- "but this is unimplemented\n");
- /* if (pDesc->Usage & D3DUSAGE_TEXTAPI) { } */
-
- info->height0 = 1;
- info->depth0 = 1;
- info->array_size = 1;
- info->last_level = 0;
- info->nr_samples = 0;
-
- hr = NineResource9_ctor(&This->base, pParams, NULL, TRUE,
- D3DRTYPE_VERTEXBUFFER, pDesc->Pool, pDesc->Usage);
+ hr = NineBuffer9_ctor(&This->base, pParams, D3DRTYPE_VERTEXBUFFER,
+ pDesc->Usage, pDesc->Size, pDesc->Pool);
if (FAILED(hr))
return hr;
void
NineVertexBuffer9_dtor( struct NineVertexBuffer9 *This )
{
- if (This->maps) {
- while (This->nmaps) {
- NineVertexBuffer9_Unlock(This);
- }
- FREE(This->maps);
- }
-
- NineResource9_dtor(&This->base);
+ NineBuffer9_dtor(&This->base);
+}
+
+struct pipe_resource *
+NineVertexBuffer9_GetResource( struct NineVertexBuffer9 *This )
+{
+ return NineBuffer9_GetResource(&This->base);
}
HRESULT WINAPI
NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This,
- UINT OffsetToLock,
- UINT SizeToLock,
- void **ppbData,
- DWORD Flags )
+ UINT OffsetToLock,
+ UINT SizeToLock,
+ void **ppbData,
+ DWORD Flags )
{
- struct pipe_box box;
- void *data;
- const unsigned usage = d3dlock_buffer_to_pipe_transfer_usage(Flags);
-
- DBG("This=%p(pipe=%p) OffsetToLock=0x%x, SizeToLock=0x%x, Flags=0x%x\n",
- This, This->base.resource,
- OffsetToLock, SizeToLock, Flags);
-
- user_assert(ppbData, E_POINTER);
- user_assert(!(Flags & ~(D3DLOCK_DISCARD |
- D3DLOCK_DONOTWAIT |
- D3DLOCK_NO_DIRTY_UPDATE |
- D3DLOCK_NOSYSLOCK |
- D3DLOCK_READONLY |
- D3DLOCK_NOOVERWRITE)), D3DERR_INVALIDCALL);
-
- if (This->nmaps == This->maxmaps) {
- struct pipe_transfer **newmaps =
- REALLOC(This->maps, sizeof(struct pipe_transfer *)*This->maxmaps,
- sizeof(struct pipe_transfer *)*(This->maxmaps << 1));
- if (newmaps == NULL)
- return E_OUTOFMEMORY;
-
- This->maxmaps <<= 1;
- This->maps = newmaps;
- }
-
- if (SizeToLock == 0) {
- SizeToLock = This->desc.Size - OffsetToLock;
- user_warn(OffsetToLock != 0);
- }
-
- u_box_1d(OffsetToLock, SizeToLock, &box);
-
- data = This->pipe->transfer_map(This->pipe, This->base.resource, 0,
- usage, &box, &This->maps[This->nmaps]);
- if (!data) {
- DBG("pipe::transfer_map failed\n"
- " usage = %x\n"
- " box.x = %u\n"
- " box.width = %u\n",
- usage, box.x, box.width);
- /* not sure what to return, msdn suggests this */
- if (Flags & D3DLOCK_DONOTWAIT)
- return D3DERR_WASSTILLDRAWING;
- return D3DERR_INVALIDCALL;
- }
-
- This->nmaps++;
- *ppbData = data;
-
- return D3D_OK;
+ return NineBuffer9_Lock(&This->base, OffsetToLock, SizeToLock, ppbData, Flags);
}
HRESULT WINAPI
NineVertexBuffer9_Unlock( struct NineVertexBuffer9 *This )
{
- DBG("This=%p\n", This);
-
- user_assert(This->nmaps > 0, D3DERR_INVALIDCALL);
- This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]);
- return D3D_OK;
+ return NineBuffer9_Unlock(&This->base);
}
HRESULT WINAPI
#ifndef _NINE_VERTEXBUFFER9_H_
#define _NINE_VERTEXBUFFER9_H_
-
#include "resource9.h"
+#include "buffer9.h"
struct pipe_screen;
struct pipe_context;
struct NineVertexBuffer9
{
- struct NineResource9 base;
+ struct NineBuffer9 base;
/* G3D */
struct pipe_context *pipe;
- struct pipe_transfer **maps;
- int nmaps, maxmaps;
-
D3DVERTEXBUFFER_DESC desc;
};
static inline struct NineVertexBuffer9 *
void
NineVertexBuffer9_dtor( struct NineVertexBuffer9 *This );
+/*** Nine private ***/
+
+struct pipe_resource *
+NineVertexBuffer9_GetResource( struct NineVertexBuffer9 *This );
+
+/*** Direct3D public ***/
HRESULT WINAPI
NineVertexBuffer9_Lock( struct NineVertexBuffer9 *This,
const D3DVERTEXELEMENT9 *pElements )
{
const D3DCAPS9 *caps;
- unsigned i;
-
+ unsigned i, nelems;
DBG("This=%p pParams=%p pElements=%p\n", This, pParams, pElements);
- HRESULT hr = NineUnknown_ctor(&This->base, pParams);
- if (FAILED(hr)) { return hr; }
-
/* wine */
- for (This->nelems = 0;
- pElements[This->nelems].Stream != 0xFF;
- ++This->nelems) {
- user_assert(pElements[This->nelems].Type != D3DDECLTYPE_UNUSED, E_FAIL);
- user_assert(!(pElements[This->nelems].Offset & 3), E_FAIL);
+ for (nelems = 0;
+ pElements[nelems].Stream != 0xFF;
+ ++nelems) {
+ user_assert(pElements[nelems].Type != D3DDECLTYPE_UNUSED, E_FAIL);
+ user_assert(!(pElements[nelems].Offset & 3), E_FAIL);
}
- caps = NineDevice9_GetCaps(This->base.device);
- user_assert(This->nelems <= caps->MaxStreams, D3DERR_INVALIDCALL);
+ caps = NineDevice9_GetCaps(pParams->device);
+ user_assert(nelems <= caps->MaxStreams, D3DERR_INVALIDCALL);
+ HRESULT hr = NineUnknown_ctor(&This->base, pParams);
+ if (FAILED(hr)) { return hr; }
+
+ This->nelems = nelems;
This->decls = CALLOC(This->nelems+1, sizeof(D3DVERTEXELEMENT9));
This->elems = CALLOC(This->nelems, sizeof(struct pipe_vertex_element));
This->usage_map = CALLOC(This->nelems, sizeof(uint16_t));
This->decls[i].UsageIndex);
This->usage_map[i] = usage;
+ if (This->decls[i].Usage == D3DDECLUSAGE_POSITIONT)
+ This->position_t = TRUE;
+
This->elems[i].src_offset = This->decls[i].Offset;
This->elems[i].instance_divisor = 0;
This->elems[i].vertex_buffer_index = This->decls[i].Stream;
D3DVERTEXELEMENT9 *decls;
DWORD fvf;
+
+ BOOL position_t;
};
static inline struct NineVertexDeclaration9 *
NineVertexDeclaration9( void *data )
NineVolume9_UnlockBox(This);
if (This->data)
- FREE(This->data);
+ align_free(This->data);
pipe_resource_reference(&This->resource, NULL);
usage |= PIPE_TRANSFER_DONTBLOCK;
if (pBox) {
+ user_assert(pBox->Right > pBox->Left, D3DERR_INVALIDCALL);
+ user_assert(pBox->Bottom > pBox->Top, D3DERR_INVALIDCALL);
+ user_assert(pBox->Back > pBox->Front, D3DERR_INVALIDCALL);
+ user_assert(pBox->Right <= This->desc.Width, D3DERR_INVALIDCALL);
+ user_assert(pBox->Bottom <= This->desc.Height, D3DERR_INVALIDCALL);
+ user_assert(pBox->Back <= This->desc.Depth, D3DERR_INVALIDCALL);
+
d3dbox_to_pipe_box(&box, pBox);
if (u_box_clip_2d(&box, &box, This->desc.Width, This->desc.Height) < 0) {
DBG("Locked volume intersection empty.\n");
#include "util/u_memory.h"
#include "util/u_video.h"
#include "vl/vl_rbsp.h"
+#include "vl/vl_zscan.h"
#include "entrypoint.h"
#include "vid_dec.h"
const uint8_t *defaultList, const uint8_t *fallbackList)
{
unsigned lastScale = 8, nextScale = 8;
+ const int *list;
unsigned i;
/* (pic|seq)_scaling_list_present_flag[i] */
return;
}
+ list = (sizeOfScalingList == 16) ? vl_zscan_normal_16 : vl_zscan_normal;
for (i = 0; i < sizeOfScalingList; ++i ) {
if (nextScale != 0) {
return;
}
}
- scalingList[i] = nextScale == 0 ? lastScale : nextScale;
- lastScale = scalingList[i];
+ scalingList[list[i]] = nextScale == 0 ? lastScale : nextScale;
+ lastScale = scalingList[list[i]];
}
}
DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_1)
DRI_CONF_SECTION_END
DRI_CONF_SECTION_NINE
+ DRI_CONF_NINE_OVERRIDEVENDOR(-1)
DRI_CONF_NINE_THROTTLE(-2)
DRI_CONF_NINE_THREADSUBMIT("false")
DRI_CONF_SECTION_END
DRI_CONF_END;
-/* define fallback value here: NVIDIA GeForce GTX 970 */
-#define FALLBACK_NAME "NV124"
-#define FALLBACK_DEVID 0x13C2
-#define FALLBACK_VENID 0x10de
+struct fallback_card_config {
+ const char *name;
+ unsigned vendor_id;
+ unsigned device_id;
+} fallback_cards[] = {
+ {"NV124", 0x10de, 0x13C2}, /* NVIDIA GeForce GTX 970 */
+ {"HAWAII", 0x1002, 0x67b1}, /* AMD Radeon R9 290 */
+ {"Haswell Mobile", 0x8086, 0x13C2}, /* Intel Haswell Mobile */
+ {"SVGA3D", 0x15ad, 0x0405}, /* VMware SVGA 3D */
+};
/* prototypes */
void
d3d_match_vendor_id( D3DADAPTER_IDENTIFIER9* drvid,
- unsigned fallback_ven,
- unsigned fallback_dev,
- const char* fallback_name );
+ unsigned fallback_ven,
+ unsigned fallback_dev,
+ const char* fallback_name );
void d3d_fill_driver_version(D3DADAPTER_IDENTIFIER9* drvid);
*subsysid = 0;
*revision = 0;
} else {
- DBG("Unable to detect card. Faking %s\n", FALLBACK_NAME);
- *vendorid = FALLBACK_VENID;
- *deviceid = FALLBACK_DEVID;
+ DBG("Unable to detect card. Faking %s\n", fallback_cards[0].name);
+ *vendorid = fallback_cards[0].vendor_id;
+ *deviceid = fallback_cards[0].device_id;
*subsysid = 0;
*revision = 0;
}
static inline void
read_descriptor( struct d3dadapter9_context *ctx,
- int fd )
+ int fd, int override_vendorid )
{
+ unsigned i;
+ BOOL found;
D3DADAPTER_IDENTIFIER9 *drvid = &ctx->identifier;
memset(drvid, 0, sizeof(*drvid));
strncpy(drvid->Description, ctx->hal->get_name(ctx->hal),
sizeof(drvid->Description));
+ if (override_vendorid > 0) {
+ found = FALSE;
+ /* fill in device_id and card name for fake vendor */
+ for (i = 0; i < sizeof(fallback_cards)/sizeof(fallback_cards[0]); i++) {
+ if (fallback_cards[i].vendor_id == override_vendorid) {
+ DBG("Faking card '%s' vendor 0x%04x, device 0x%04x\n",
+ fallback_cards[i].name,
+ fallback_cards[i].vendor_id,
+ fallback_cards[i].device_id);
+ drvid->VendorId = fallback_cards[i].vendor_id;
+ drvid->DeviceId = fallback_cards[i].device_id;
+ strncpy(drvid->Description, fallback_cards[i].name,
+ sizeof(drvid->Description));
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found) {
+ DBG("Unknown fake vendor 0x%04x! Using detected vendor !\n", override_vendorid);
+ }
+ }
/* choose fall-back vendor if necessary to allow
* the following functions to return sane results */
- d3d_match_vendor_id(drvid, FALLBACK_VENID, FALLBACK_DEVID, FALLBACK_NAME);
+ d3d_match_vendor_id(drvid, fallback_cards[0].vendor_id, fallback_cards[0].device_id, fallback_cards[0].name);
/* fill in driver name and version info */
d3d_fill_driver_version(drvid);
/* override Description field with Windows like names */
driOptionCache defaultInitOptions;
driOptionCache userInitOptions;
int throttling_value_user = -2;
+ int override_vendorid = -1;
if (!ctx) { return E_OUTOFMEMORY; }
"You should not expect any benefit.");
}
+ if (driCheckOption(&userInitOptions, "override_vendorid", DRI_INT)) {
+ override_vendorid = driQueryOptioni(&userInitOptions, "override_vendorid");
+ }
+
driDestroyOptionCache(&userInitOptions);
driDestroyOptionInfo(&defaultInitOptions);
}
/* read out PCI info */
- read_descriptor(&ctx->base, fd);
+ read_descriptor(&ctx->base, fd, override_vendorid);
/* create and return new ID3DAdapter9 */
hr = NineAdapter9_new(&ctx->base, (struct NineAdapter9 **)ppAdapter);
endif
ifneq ($(filter virgl,$(MESA_GPU_DRIVERS)),)
LOCAL_CFLAGS += -DGALLIUM_VIRGL
-gallium_DRIVERS += libmesa_winsys_virgl libmesa_pipe_virgl
+gallium_DRIVERS += libmesa_winsys_virgl libmesa_winsys_virgl_vtest libmesa_pipe_virgl
endif
ifneq ($(filter vmwgfx,$(MESA_GPU_DRIVERS)),)
gallium_DRIVERS += libmesa_winsys_svga libmesa_pipe_svga
request.alloc_size = size;
request.phys_alignment = alignment;
- if (initial_domain & RADEON_DOMAIN_VRAM) {
+ if (initial_domain & RADEON_DOMAIN_VRAM)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
- if (flags & RADEON_FLAG_CPU_ACCESS)
- request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- }
- if (initial_domain & RADEON_DOMAIN_GTT) {
+ if (initial_domain & RADEON_DOMAIN_GTT)
request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
- if (flags & RADEON_FLAG_GTT_WC)
- request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- }
+
+ if (flags & RADEON_FLAG_CPU_ACCESS)
+ request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ if (flags & RADEON_FLAG_NO_CPU_ACCESS)
+ request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+ if (flags & RADEON_FLAG_GTT_WC)
+ request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
if (r) {
switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) {
case CIK__PIPE_CONFIG__ADDR_SURF_P2:
- default:
return 2;
case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16:
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16:
return 16;
+ default:
+ fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n");
+ assert(!"this should never occur");
+ return 2;
}
}
-/* Convert Sea Islands register values GB_ADDR_CFG and MC_ADDR_CFG
- * into GB_TILING_CONFIG register which is only present on R600-R700. */
-static unsigned r600_get_gb_tiling_config(struct amdgpu_gpu_info *info)
-{
- unsigned num_pipes = info->gb_addr_cfg & 0x7;
- unsigned num_banks = info->mc_arb_ramcfg & 0x3;
- unsigned pipe_interleave_bytes = (info->gb_addr_cfg >> 4) & 0x7;
- unsigned row_size = (info->gb_addr_cfg >> 28) & 0x3;
-
- return num_pipes | (num_banks << 4) |
- (pipe_interleave_bytes << 8) |
- (row_size << 12);
-}
-
/* Helper function to do the ioctls needed for setup and init. */
static boolean do_winsys_init(struct amdgpu_winsys *ws)
{
ws->info.gart_size = gtt.heap_size;
ws->info.vram_size = vram.heap_size;
/* convert the shader clock from KHz to MHz */
- ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000;
+ ws->info.max_shader_clock = ws->amdinfo.max_engine_clk / 1000;
ws->info.max_se = ws->amdinfo.num_shader_engines;
ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
ws->info.has_uvd = uvd.available_rings != 0;
ws->info.vce_fw_version =
vce.available_rings ? vce_version : 0;
ws->info.has_userptr = TRUE;
- ws->info.r600_num_backends = ws->amdinfo.rb_pipes;
- ws->info.r600_clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
- ws->info.r600_tiling_config = r600_get_gb_tiling_config(&ws->amdinfo);
- ws->info.r600_num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo);
- ws->info.r600_max_pipes = ws->amdinfo.max_quad_shader_pipes; /* TODO: is this correct? */
- ws->info.r600_virtual_address = TRUE;
- ws->info.r600_has_dma = dma.available_rings != 0;
+ ws->info.num_render_backends = ws->amdinfo.rb_pipes;
+ ws->info.clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
+ ws->info.num_tile_pipes = cik_get_num_tile_pipes(&ws->amdinfo);
+ ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7);
+ ws->info.has_virtual_memory = TRUE;
+ ws->info.has_sdma = dma.available_rings != 0;
/* Get the number of good compute units. */
ws->info.num_good_compute_units = 0;
memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
sizeof(ws->amdinfo.gb_tile_mode));
ws->info.si_tile_mode_array_valid = TRUE;
- ws->info.si_backend_enabled_mask = ws->amdinfo.enabled_rb_pipes_mask;
+ ws->info.enabled_rb_mask = ws->amdinfo.enabled_rb_pipes_mask;
memcpy(ws->info.cik_macrotile_mode_array, ws->amdinfo.gb_macro_tile_mode,
sizeof(ws->amdinfo.gb_macro_tile_mode));
if (bo->ptr)
os_munmap(bo->ptr, bo->base.size);
- if (rws->info.r600_virtual_address) {
+ if (rws->info.has_virtual_memory) {
if (rws->va_unmap_working) {
struct drm_radeon_gem_va va;
pipe_mutex_init(bo->map_mutex);
pb_cache_init_entry(&rws->bo_cache, &bo->cache_entry, &bo->base);
- if (rws->info.r600_virtual_address) {
+ if (rws->info.has_virtual_memory) {
struct drm_radeon_gem_va va;
bo->va = radeon_bomgr_find_va(rws, size, alignment);
pipe_mutex_unlock(ws->bo_handles_mutex);
- if (ws->info.r600_virtual_address) {
+ if (ws->info.has_virtual_memory) {
struct drm_radeon_gem_va va;
bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
if (stride)
*stride = whandle->stride;
- if (ws->info.r600_virtual_address && !bo->va) {
+ if (ws->info.has_virtual_memory && !bo->va) {
struct drm_radeon_gem_va va;
bo->va = radeon_bomgr_find_va(ws, bo->base.size, 1 << 20);
* This doesn't have to be done if virtual memory is enabled,
* because there is no offset patching with virtual memory.
*/
- if (cs->base.ring_type != RING_DMA || cs->ws->info.r600_virtual_address) {
+ if (cs->base.ring_type != RING_DMA || cs->ws->info.has_virtual_memory) {
return i;
}
}
cs->cst->flags[0] = 0;
cs->cst->flags[1] = RADEON_CS_RING_DMA;
cs->cst->cs.num_chunks = 3;
- if (cs->ws->info.r600_virtual_address) {
+ if (cs->ws->info.has_virtual_memory) {
cs->cst->flags[0] |= RADEON_CS_USE_VM;
}
break;
cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
cs->cst->cs.num_chunks = 3;
}
- if (cs->ws->info.r600_virtual_address) {
+ if (cs->ws->info.has_virtual_memory) {
cs->cst->flags[0] |= RADEON_CS_USE_VM;
cs->cst->cs.num_chunks = 3;
}
}
/* Check for dma */
- ws->info.r600_has_dma = FALSE;
+ ws->info.has_sdma = FALSE;
/* DMA is disabled on R700. There is IB corruption and hangs. */
if (ws->info.chip_class >= EVERGREEN && ws->info.drm_minor >= 27) {
- ws->info.r600_has_dma = TRUE;
+ ws->info.has_sdma = TRUE;
}
/* Check for UVD and VCE */
/* Get max clock frequency info and convert it to MHz */
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,
- &ws->info.max_sclk);
- ws->info.max_sclk /= 1000;
+ &ws->info.max_shader_clock);
+ ws->info.max_shader_clock /= 1000;
radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL,
- &ws->info.si_backend_enabled_mask);
+ &ws->info.enabled_rb_mask);
ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
return FALSE;
}
else if (ws->gen >= DRV_R600) {
+ uint32_t tiling_config = 0;
+
if (ws->info.drm_minor >= 9 &&
!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS,
"num backends",
- &ws->info.r600_num_backends))
+ &ws->info.num_render_backends))
return FALSE;
/* get the GPU counter frequency, failure is not fatal */
radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL,
- &ws->info.r600_clock_crystal_freq);
+ &ws->info.clock_crystal_freq);
radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL,
- &ws->info.r600_tiling_config);
+ &tiling_config);
+
+ ws->info.r600_num_banks =
+ ws->info.chip_class >= EVERGREEN ?
+ 4 << ((tiling_config & 0xf0) >> 4) :
+ 4 << ((tiling_config & 0x30) >> 4);
+
+ ws->info.pipe_interleave_bytes =
+ ws->info.chip_class >= EVERGREEN ?
+ 256 << ((tiling_config & 0xf00) >> 8) :
+ 256 << ((tiling_config & 0xc0) >> 6);
+
+ if (!ws->info.pipe_interleave_bytes)
+ ws->info.pipe_interleave_bytes =
+ ws->info.chip_class >= EVERGREEN ? 512 : 256;
if (ws->info.drm_minor >= 11) {
radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL,
- &ws->info.r600_num_tile_pipes);
+ &ws->info.num_tile_pipes);
if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL,
- &ws->info.r600_backend_map))
- ws->info.r600_backend_map_valid = TRUE;
+ &ws->info.r600_gb_backend_map))
+ ws->info.r600_gb_backend_map_valid = TRUE;
+ } else {
+ ws->info.num_tile_pipes =
+ ws->info.chip_class >= EVERGREEN ?
+ 1 << (tiling_config & 0xf) :
+ 1 << ((tiling_config & 0xe) >> 1);
}
- ws->info.r600_virtual_address = FALSE;
+ ws->info.has_virtual_memory = FALSE;
if (ws->info.drm_minor >= 13) {
uint32_t ib_vm_max_size;
- ws->info.r600_virtual_address = TRUE;
+ ws->info.has_virtual_memory = TRUE;
if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
&ws->va_start))
- ws->info.r600_virtual_address = FALSE;
+ ws->info.has_virtual_memory = FALSE;
if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
&ib_vm_max_size))
- ws->info.r600_virtual_address = FALSE;
+ ws->info.has_virtual_memory = FALSE;
radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL,
&ws->va_unmap_working);
}
if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", FALSE))
- ws->info.r600_virtual_address = FALSE;
+ ws->info.has_virtual_memory = FALSE;
}
/* Get max pipes, this is only needed for compute shaders. All evergreen+
* chips have at least 2 pipes, so we use 2 as a default. */
- ws->info.r600_max_pipes = 2;
+ ws->info.r600_max_quad_pipes = 2;
radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_PIPES, NULL,
- &ws->info.r600_max_pipes);
+ &ws->info.r600_max_quad_pipes);
/* All GPUs have at least one compute unit */
ws->info.num_good_compute_units = 1;
ws->fd = dup(fd);
if (!do_winsys_init(ws))
- goto fail;
+ goto fail1;
pb_cache_init(&ws->bo_cache, 500000, 2.0f, 0,
MIN2(ws->info.vram_size, ws->info.gart_size),
return &ws->base;
fail:
- pipe_mutex_unlock(fd_tab_mutex);
pb_cache_deinit(&ws->bo_cache);
+fail1:
+ pipe_mutex_unlock(fd_tab_mutex);
if (ws->surf_man)
radeon_surface_manager_free(ws->surf_man);
if (ws->fd >= 0)
#ifndef VIRGL_DRM_PUBLIC_H
#define VIRGL_DRM_PUBLIC_H
-struct virgl_winsys;
+struct pipe_screen;
-struct virgl_winsys *virgl_drm_winsys_create(int drmFD);
+struct pipe_screen *virgl_drm_screen_create(int fd);
#endif
#include <fcntl.h>
#include <stdio.h>
#include <sys/ioctl.h>
+#include <sys/stat.h>
#include "os/os_mman.h"
#include "os/os_time.h"
#include "util/u_hash_table.h"
#include "util/u_inlines.h"
#include "state_tracker/drm_driver.h"
+#include "virgl/virgl_screen.h"
+#include "virgl/virgl_public.h"
#include <xf86drm.h>
#include "virtgpu_drm.h"
{
struct drm_gem_close args;
- if (res->name) {
+ if (res->flinked) {
+ pipe_mutex_lock(qdws->bo_handles_mutex);
+ util_hash_table_remove(qdws->bo_names,
+ (void *)(uintptr_t)res->flink);
+ pipe_mutex_unlock(qdws->bo_handles_mutex);
+ }
+
+ if (res->bo_handle) {
pipe_mutex_lock(qdws->bo_handles_mutex);
util_hash_table_remove(qdws->bo_handles,
- (void *)(uintptr_t)res->name);
+ (void *)(uintptr_t)res->bo_handle);
pipe_mutex_unlock(qdws->bo_handles_mutex);
}
virgl_cache_flush(qdws);
util_hash_table_destroy(qdws->bo_handles);
+ util_hash_table_destroy(qdws->bo_names);
pipe_mutex_destroy(qdws->bo_handles_mutex);
pipe_mutex_destroy(qdws->mutex);
struct drm_gem_open open_arg = {};
struct drm_virtgpu_resource_info info_arg = {};
struct virgl_hw_res *res;
+ uint32_t handle = whandle->handle;
pipe_mutex_lock(qdws->bo_handles_mutex);
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
- res = util_hash_table_get(qdws->bo_handles, (void*)(uintptr_t)whandle->handle);
+ res = util_hash_table_get(qdws->bo_names, (void*)(uintptr_t)handle);
if (res) {
struct virgl_hw_res *r = NULL;
virgl_drm_resource_reference(qdws, &r, res);
}
}
- res = CALLOC_STRUCT(virgl_hw_res);
- if (!res)
- goto done;
-
if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
int r;
- uint32_t handle;
r = drmPrimeFDToHandle(qdws->fd, whandle->handle, &handle);
if (r) {
- FREE(res);
res = NULL;
goto done;
}
+ }
+
+ res = util_hash_table_get(qdws->bo_handles, (void*)(uintptr_t)handle);
+ fprintf(stderr, "resource %p for handle %d, pfd=%d\n", res, handle, whandle->handle);
+ if (res) {
+ struct virgl_hw_res *r = NULL;
+ virgl_drm_resource_reference(qdws, &r, res);
+ goto done;
+ }
+
+ res = CALLOC_STRUCT(virgl_hw_res);
+ if (!res)
+ goto done;
+
+ if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
res->bo_handle = handle;
} else {
+ fprintf(stderr, "gem open handle %d\n", handle);
memset(&open_arg, 0, sizeof(open_arg));
open_arg.name = whandle->handle;
if (drmIoctl(qdws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
}
res->bo_handle = open_arg.handle;
}
- res->name = whandle->handle;
+ res->name = handle;
memset(&info_arg, 0, sizeof(info_arg));
info_arg.bo_handle = res->bo_handle;
pipe_reference_init(&res->reference, 1);
res->num_cs_references = 0;
- util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)whandle->handle, res);
+ util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)handle, res);
done:
pipe_mutex_unlock(qdws->bo_handles_mutex);
res->flink = flink.name;
pipe_mutex_lock(qdws->bo_handles_mutex);
- util_hash_table_set(qdws->bo_handles, (void *)(uintptr_t)res->flink, res);
+ util_hash_table_set(qdws->bo_names, (void *)(uintptr_t)res->flink, res);
pipe_mutex_unlock(qdws->bo_handles_mutex);
}
whandle->handle = res->flink;
}
-struct virgl_winsys *
+static struct virgl_winsys *
virgl_drm_winsys_create(int drmFD)
{
struct virgl_drm_winsys *qdws;
pipe_mutex_init(qdws->mutex);
pipe_mutex_init(qdws->bo_handles_mutex);
qdws->bo_handles = util_hash_table_create(handle_hash, handle_compare);
+ qdws->bo_names = util_hash_table_create(handle_hash, handle_compare);
qdws->base.destroy = virgl_drm_winsys_destroy;
qdws->base.transfer_put = virgl_bo_transfer_put;
return &qdws->base;
}
+
+static struct util_hash_table *fd_tab = NULL;
+pipe_static_mutex(virgl_screen_mutex);
+
+static void
+virgl_drm_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct virgl_screen *screen = virgl_screen(pscreen);
+ boolean destroy;
+
+ pipe_mutex_lock(virgl_screen_mutex);
+ destroy = --screen->refcnt == 0;
+ if (destroy) {
+ int fd = virgl_drm_winsys(screen->vws)->fd;
+ util_hash_table_remove(fd_tab, intptr_to_pointer(fd));
+ }
+ pipe_mutex_unlock(virgl_screen_mutex);
+
+ if (destroy) {
+ pscreen->destroy = screen->winsys_priv;
+ pscreen->destroy(pscreen);
+ }
+}
+
+static unsigned hash_fd(void *key)
+{
+ int fd = pointer_to_intptr(key);
+ struct stat stat;
+ fstat(fd, &stat);
+
+ return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
+}
+
+static int compare_fd(void *key1, void *key2)
+{
+ int fd1 = pointer_to_intptr(key1);
+ int fd2 = pointer_to_intptr(key2);
+ struct stat stat1, stat2;
+ fstat(fd1, &stat1);
+ fstat(fd2, &stat2);
+
+ return stat1.st_dev != stat2.st_dev ||
+ stat1.st_ino != stat2.st_ino ||
+ stat1.st_rdev != stat2.st_rdev;
+}
+
+struct pipe_screen *
+virgl_drm_screen_create(int fd)
+{
+ struct pipe_screen *pscreen = NULL;
+
+ pipe_mutex_lock(virgl_screen_mutex);
+ if (!fd_tab) {
+ fd_tab = util_hash_table_create(hash_fd, compare_fd);
+ if (!fd_tab)
+ goto unlock;
+ }
+
+ pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
+ if (pscreen) {
+ virgl_screen(pscreen)->refcnt++;
+ } else {
+ struct virgl_winsys *vws;
+ int dup_fd = dup(fd);
+
+ vws = virgl_drm_winsys_create(dup_fd);
+
+ pscreen = virgl_create_screen(vws);
+ if (pscreen) {
+ util_hash_table_set(fd_tab, intptr_to_pointer(dup_fd), pscreen);
+
+ /* Bit of a hack, to avoid circular linkage dependency,
+ * ie. pipe driver having to call in to winsys, we
+ * override the pipe drivers screen->destroy():
+ */
+ virgl_screen(pscreen)->winsys_priv = pscreen->destroy;
+ pscreen->destroy = virgl_drm_screen_destroy;
+ }
+ }
+
+unlock:
+ pipe_mutex_unlock(virgl_screen_mutex);
+ return pscreen;
+}
pipe_mutex mutex;
struct util_hash_table *bo_handles;
+ struct util_hash_table *bo_names;
pipe_mutex bo_handles_mutex;
};
--- /dev/null
+# Copyright (C) 2014 Emil Velikov <emil.l.velikov@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+LOCAL_PATH := $(call my-dir)
+
+# get C_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(C_SOURCES)
+
+LOCAL_MODULE := libmesa_winsys_virgl_vtest
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context");
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context_profile");
- if ((mask & (1 << __DRI_API_GLES2)) != 0)
- __glXEnableDirectExtension(&psc->base,
- "GLX_EXT_create_context_es2_profile");
+ if ((mask & ((1 << __DRI_API_GLES) |
+ (1 << __DRI_API_GLES2) |
+ (1 << __DRI_API_GLES3))) != 0) {
+ __glXEnableDirectExtension(&psc->base,
+ "GLX_EXT_create_context_es_profile");
+ __glXEnableDirectExtension(&psc->base,
+ "GLX_EXT_create_context_es2_profile");
+ }
}
for (i = 0; extensions[i]; i++) {
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context");
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context_profile");
- if ((mask & (1 << __DRI_API_GLES2)) != 0)
+ if ((mask & ((1 << __DRI_API_GLES) |
+ (1 << __DRI_API_GLES2) |
+ (1 << __DRI_API_GLES3))) != 0) {
+ __glXEnableDirectExtension(&psc->base,
+ "GLX_EXT_create_context_es_profile");
__glXEnableDirectExtension(&psc->base,
"GLX_EXT_create_context_es2_profile");
+ }
for (i = 0; extensions[i]; i++) {
/* when on a different gpu than the server, the server pixmaps
case GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB:
*api = __DRI_API_OPENGL;
break;
- case GLX_CONTEXT_ES2_PROFILE_BIT_EXT:
- *api = __DRI_API_GLES2;
- break;
+ case GLX_CONTEXT_ES_PROFILE_BIT_EXT:
+ if (*major_ver >= 3)
+ *api = __DRI_API_GLES3;
+ else if (*major_ver == 2 && *minor_ver == 0)
+ *api = __DRI_API_GLES2;
+ else if (*major_ver == 1 && *minor_ver < 2)
+ *api = __DRI_API_GLES;
+ else {
+ *error = __DRI_CTX_ERROR_BAD_API;
+ return false;
+ }
+ break;
default:
*error = __DRI_CTX_ERROR_BAD_API;
return false;
return false;
}
- /* The GLX_EXT_create_context_es2_profile spec says:
- *
- * "... If the version requested is 2.0, and the
- * GLX_CONTEXT_ES2_PROFILE_BIT_EXT bit is set in the
- * GLX_CONTEXT_PROFILE_MASK_ARB attribute (see below), then the context
- * returned will implement OpenGL ES 2.0. This is the only way in which
- * an implementation may request an OpenGL ES 2.0 context."
- */
- if (*api == __DRI_API_GLES2 && (*major_ver != 2 || *minor_ver != 0)) {
- *error = __DRI_CTX_ERROR_BAD_API;
- return false;
- }
-
*error = __DRI_CTX_ERROR_SUCCESS;
return true;
}
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context");
__glXEnableDirectExtension(&psc->base, "GLX_ARB_create_context_profile");
- /* DRISW version >= 2 implies support for OpenGL ES 2.0.
+ /* DRISW version >= 2 implies support for OpenGL ES.
*/
+ __glXEnableDirectExtension(&psc->base,
+ "GLX_EXT_create_context_es_profile");
__glXEnableDirectExtension(&psc->base,
"GLX_EXT_create_context_es2_profile");
}
{ GLX(EXT_fbconfig_packed_float), VER(0,0), Y, Y, N, N },
{ GLX(EXT_framebuffer_sRGB), VER(0,0), Y, Y, N, N },
{ GLX(EXT_create_context_es2_profile), VER(0,0), Y, N, N, N },
+ { GLX(EXT_create_context_es_profile), VER(0,0), Y, N, N, N },
{ GLX(MESA_copy_sub_buffer), VER(0,0), Y, N, N, N },
{ GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N },
{ GLX(MESA_query_renderer), VER(0,0), Y, N, N, Y },
EXT_import_context_bit,
EXT_framebuffer_sRGB_bit,
EXT_fbconfig_packed_float_bit,
+ EXT_create_context_es_profile_bit,
EXT_create_context_es2_profile_bit,
MESA_copy_sub_buffer_bit,
MESA_depth_float_bit,
<xi:include href="ARB_multi_bind.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
-<!-- ARB extensions 148 - 153 -->
+<category name="GL_ARB_query_buffer_object" number="148">
+ <enum name="QUERY_RESULT_NO_WAIT" value="0x9194"/>
+ <enum name="QUERY_BUFFER" value="0x9192"/>
+ <enum name="QUERY_BUFFER_BINDING" value="0x9193"/>
+ <enum name="QUERY_BUFFER_BARRIER_BIT" value="0x00008000"/>
+</category>
+
+<!-- ARB extensions 149 - 153 -->
<xi:include href="ARB_indirect_parameters.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<enum name="FRAMEBUFFER_SRGB_CAPABLE_EXT" value="0x8DBA"/>
</category>
+<category name="GL_ATI_meminfo" number="359">
+ <enum name="VBO_FREE_MEMORY_ATI" value="0x87FB" />
+ <enum name="TEXTURE_FREE_MEMORY_ATI" value="0x87FC" />
+ <enum name="RENDERBUFFER_FREE_MEMORY_ATI" value="0x87FD" />
+</category>
+
<xi:include href="AMD_performance_monitor.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<category name="GL_APPLE_texture_range" number="367">
<enum name="EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD" value="0x9160"/>
</category>
+<category name="GL_NVX_gpu_memory_info" number="438">
+ <enum name="GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX" value="0x9047" />
+ <enum name="GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX" value="0x9048" />
+ <enum name="GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX" value="0x9049" />
+ <enum name="GPU_MEMORY_INFO_EVICTION_COUNT_NVX" value="0x904A" />
+ <enum name="GPU_MEMORY_INFO_EVICTED_MEMORY_NVX" value="0x904B" />
+</category>
+
<xi:include href="INTEL_performance_query.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<category name="GL_EXT_polygon_offset_clamp" number="460">
vbo/vbo_exec_eval.c \
vbo/vbo_exec.h \
vbo/vbo.h \
+ vbo/vbo_minmax_index.c \
vbo/vbo_noop.c \
vbo/vbo_noop.h \
vbo/vbo_primitive_restart.c \
STATETRACKER_FILES = \
state_tracker/st_atom_array.c \
+ state_tracker/st_atom_atomicbuf.c \
state_tracker/st_atom_blend.c \
state_tracker/st_atom.c \
state_tracker/st_atom_clip.c \
state_tracker/st_atom_shader.c \
state_tracker/st_atom_shader.h \
state_tracker/st_atom_stipple.c \
+ state_tracker/st_atom_storagebuf.c \
state_tracker/st_atom_tess.c \
state_tracker/st_atom_texture.c \
state_tracker/st_atom_viewport.c \
DRI_CONF_OPT_BEGIN_B(thread_submit, def) \
DRI_CONF_DESC(en,gettext("Use an additional thread to submit buffers.")) \
DRI_CONF_OPT_END
+
+#define DRI_CONF_NINE_OVERRIDEVENDOR(def) \
+DRI_CONF_OPT_BEGIN(override_vendorid, int, def) \
+ DRI_CONF_DESC(en,"Define the vendor_id to report. This allows faking another hardware vendor.") \
+DRI_CONF_OPT_END
#include "brw_compiler.h"
#include "brw_context.h"
-#include "nir.h"
+#include "compiler/nir/nir.h"
#include "main/errors.h"
#include "util/debug.h"
}
}
+static void
+intel_update_framebuffer(struct gl_context *ctx,
+ struct gl_framebuffer *fb)
+{
+ struct brw_context *brw = brw_context(ctx);
+
+ /* Quantize the derived default number of samples
+ */
+ fb->DefaultGeometry._NumSamples =
+ intel_quantize_num_samples(brw->intelScreen,
+ fb->DefaultGeometry.NumSamples);
+}
+
static void
intel_update_state(struct gl_context * ctx, GLuint new_state)
{
}
_mesa_lock_context_textures(ctx);
+
+ if (new_state & _NEW_BUFFERS) {
+ intel_update_framebuffer(ctx, ctx->DrawBuffer);
+ if (ctx->DrawBuffer != ctx->ReadBuffer)
+ intel_update_framebuffer(ctx, ctx->ReadBuffer);
+ }
}
#define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
qsort(table.imm, table.len, sizeof(struct imm), compare);
/* Insert MOVs to load the constant values into GRFs. */
- fs_reg reg(VGRF, alloc.allocate(dispatch_width / 8));
+ fs_reg reg(VGRF, alloc.allocate(1));
reg.stride = 0;
for (int i = 0; i < table.len; i++) {
struct imm *imm = &table.imm[i];
imm->subreg_offset = reg.subreg_offset;
reg.subreg_offset += sizeof(float);
- if ((unsigned)reg.subreg_offset == dispatch_width * sizeof(float)) {
- reg.nr = alloc.allocate(dispatch_width / 8);
+ if ((unsigned)reg.subreg_offset == 8 * sizeof(float)) {
+ reg.nr = alloc.allocate(1);
reg.subreg_offset = 0;
}
}
inst->predicate = BRW_PREDICATE_NORMAL;
break;
- case nir_op_extract_ubyte:
- case nir_op_extract_ibyte: {
+ case nir_op_extract_u8:
+ case nir_op_extract_i8: {
nir_const_value *byte = nir_src_as_const_value(instr->src[1].src);
bld.emit(SHADER_OPCODE_EXTRACT_BYTE,
result, op[0], brw_imm_ud(byte->u[0]));
break;
}
- case nir_op_extract_uword:
- case nir_op_extract_iword: {
+ case nir_op_extract_u16:
+ case nir_op_extract_i16: {
nir_const_value *word = nir_src_as_const_value(instr->src[1].src);
bld.emit(SHADER_OPCODE_EXTRACT_WORD,
result, op[0], brw_imm_ud(word->u[0]));
/* Clear the message payload */
pbld.MOV(payload, brw_imm_ud(0u));
- /* Copy bits 27:24 of r0.2 (barrier id) to the message payload reg.2 */
+ /* Copy the barrier id from r0.2 to the message payload reg.2 */
fs_reg r0_2 = fs_reg(retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD));
pbld.AND(component(payload, 2), r0_2, brw_imm_ud(barrier_id_mask));
static bool
try_copy_propagate(const struct brw_device_info *devinfo,
- vec4_instruction *inst,
- int arg, struct copy_entry *entry)
+ vec4_instruction *inst, int arg,
+ struct copy_entry *entry, int attributes_per_reg)
{
/* Build up the value we are propagating as if it were the source of a
* single MOV
unsigned composed_swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
value.swizzle);
if (inst->is_3src() &&
- value.file == UNIFORM &&
+ (value.file == UNIFORM ||
+ (value.file == ATTR && attributes_per_reg != 1)) &&
!brw_is_single_value_swizzle(composed_swizzle))
return false;
bool
vec4_visitor::opt_copy_propagation(bool do_constant_prop)
{
+ /* If we are in dual instanced or single mode, then attributes are going
+ * to be interleaved, so one register contains two attribute slots.
+ */
+ const int attributes_per_reg =
+ prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
bool progress = false;
struct copy_entry entries[alloc.total_size];
if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry))
progress = true;
- if (try_copy_propagate(devinfo, inst, i, &entry))
+ if (try_copy_propagate(devinfo, inst, i, &entry, attributes_per_reg))
progress = true;
}
{
const unsigned depth = max_layer - min_layer;
struct intel_mipmap_tree *aux_mt = NULL;
- uint32_t aux_mode = 0;
+ uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
int surf_index = surf_offset - &brw->wm.base.surf_offset[0];
unsigned tiling_mode, pitch;
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_mipmap_tree *mt = irb->mt;
struct intel_mipmap_tree *aux_mt = NULL;
- uint32_t aux_mode = 0;
+ uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
unsigned width = mt->logical_width0;
unsigned height = mt->logical_height0;
unsigned pitch = mt->pitch;
#include <stdbool.h>
#include <inttypes.h> /* for PRId64 macro */
+#include "util/debug.h"
#include "glheader.h"
#include "enums.h"
#include "hash.h"
return &ctx->CopyReadBuffer;
case GL_COPY_WRITE_BUFFER:
return &ctx->CopyWriteBuffer;
+ case GL_QUERY_BUFFER:
+ if (_mesa_has_ARB_query_buffer_object(ctx))
+ return &ctx->QueryBuffer;
+ break;
case GL_DRAW_INDIRECT_BUFFER:
if ((ctx->API == API_OPENGL_CORE &&
ctx->Extensions.ARB_draw_indirect) ||
{
(void) ctx;
+ vbo_delete_minmax_cache(bufObj);
_mesa_align_free(bufObj->Data);
/* assign strange values here to help w/ debugging */
}
+/**
+ * Get the value of MESA_NO_MINMAX_CACHE.
+ */
+static bool
+get_no_minmax_cache()
+{
+ static bool read = false;
+ static bool disable = false;
+
+ if (!read) {
+ disable = env_var_as_boolean("MESA_NO_MINMAX_CACHE", false);
+ read = true;
+ }
+
+ return disable;
+}
+
+
/**
* Initialize a buffer object to default values.
*/
obj->RefCount = 1;
obj->Name = name;
obj->Usage = GL_STATIC_DRAW_ARB;
+
+ if (get_no_minmax_cache())
+ obj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
}
_mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer,
ctx->Shared->NullBufferObj);
+ _mesa_reference_buffer_object(ctx, &ctx->QueryBuffer,
+ ctx->Shared->NullBufferObj);
+
for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
_mesa_reference_buffer_object(ctx,
&ctx->UniformBufferBindings[i].BufferObject,
_mesa_reference_buffer_object(ctx, &ctx->DispatchIndirectBuffer, NULL);
+ _mesa_reference_buffer_object(ctx, &ctx->QueryBuffer, NULL);
+
for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
_mesa_reference_buffer_object(ctx,
&ctx->UniformBufferBindings[i].BufferObject,
return;
}
+ /* record usage history */
+ switch (target) {
+ case GL_PIXEL_PACK_BUFFER:
+ newBufObj->UsageHistory |= USAGE_PIXEL_PACK_BUFFER;
+ break;
+ default:
+ break;
+ }
+
/* bind new buffer */
_mesa_reference_buffer_object(ctx, bindTarget, newBufObj);
}
_mesa_BindBuffer(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0);
}
+ /* unbind query buffer binding point */
+ if (ctx->QueryBuffer == bufObj) {
+ _mesa_BindBuffer(GL_QUERY_BUFFER, 0);
+ }
+
/* The ID is immediately freed for re-use */
_mesa_HashRemove(ctx->Shared->BufferObjects, ids[i]);
/* Make sure we do not run into the classic ABA problem on bind.
bufObj->Written = GL_TRUE;
bufObj->Immutable = GL_TRUE;
+ bufObj->MinMaxCacheDirty = true;
assert(ctx->Driver.BufferData);
if (!ctx->Driver.BufferData(ctx, target, size, data, GL_DYNAMIC_DRAW,
FLUSH_VERTICES(ctx, _NEW_BUFFER_OBJECT);
bufObj->Written = GL_TRUE;
+ bufObj->MinMaxCacheDirty = true;
#ifdef VBO_DEBUG
printf("glBufferDataARB(%u, sz %ld, from %p, usage 0x%x)\n",
}
bufObj->Written = GL_TRUE;
+ bufObj->MinMaxCacheDirty = true;
assert(ctx->Driver.BufferSubData);
ctx->Driver.BufferSubData(ctx, offset, size, data, bufObj);
return;
}
+ /* Bail early. Negative size has already been checked. */
+ if (size == 0)
+ return;
+
+ bufObj->MinMaxCacheDirty = true;
+
if (data == NULL) {
/* clear to zeros, per the spec */
- if (size > 0) {
- ctx->Driver.ClearBufferSubData(ctx, offset, size,
- NULL, clearValueSize, bufObj);
- }
+ ctx->Driver.ClearBufferSubData(ctx, offset, size,
+ NULL, clearValueSize, bufObj);
return;
}
return;
}
- if (size > 0) {
- ctx->Driver.ClearBufferSubData(ctx, offset, size,
- clearValue, clearValueSize, bufObj);
- }
+ ctx->Driver.ClearBufferSubData(ctx, offset, size,
+ clearValue, clearValueSize, bufObj);
}
void GLAPIENTRY
}
}
+ dst->MinMaxCacheDirty = true;
+
ctx->Driver.CopyBufferSubData(ctx, src, dst, readOffset, writeOffset, size);
}
assert(bufObj->Mappings[MAP_USER].AccessFlags == access);
}
- if (access & GL_MAP_WRITE_BIT)
+ if (access & GL_MAP_WRITE_BIT) {
bufObj->Written = GL_TRUE;
+ bufObj->MinMaxCacheDirty = true;
+ }
#ifdef VBO_DEBUG
if (strstr(func, "Range") == NULL) { /* If not MapRange */
struct gl_shader_program;
struct gl_texture_image;
struct gl_texture_object;
+struct gl_memory_info;
/* GL_ARB_vertex_buffer_object */
/* Modifies GL_MAP_UNSYNCHRONIZED_BIT to allow driver to fail (return
void (*EndQuery)(struct gl_context *ctx, struct gl_query_object *q);
void (*CheckQuery)(struct gl_context *ctx, struct gl_query_object *q);
void (*WaitQuery)(struct gl_context *ctx, struct gl_query_object *q);
+ /*
+ * \pname the value requested to be written (GL_QUERY_RESULT, etc)
+ * \ptype the type of the value requested to be written:
+ * GL_UNSIGNED_INT, GL_UNSIGNED_INT64_ARB,
+ * GL_INT, GL_INT64_ARB
+ */
+ void (*StoreQueryResult)(struct gl_context *ctx, struct gl_query_object *q,
+ struct gl_buffer_object *buf, intptr_t offset,
+ GLenum pname, GLenum ptype);
/*@}*/
/**
void (*DispatchCompute)(struct gl_context *ctx, const GLuint *num_groups);
void (*DispatchComputeIndirect)(struct gl_context *ctx, GLintptr indirect);
/*@}*/
+
+ /**
+ * Query information about memory. Device memory is e.g. VRAM. Staging
+ * memory is e.g. GART. All sizes are in kilobytes.
+ */
+ void (*QueryMemoryInfo)(struct gl_context *ctx,
+ struct gl_memory_info *info);
};
EXT(ARB_point_sprite , ARB_point_sprite , GLL, GLC, x , x , 2003)
EXT(ARB_program_interface_query , dummy_true , GLL, GLC, x , x , 2012)
EXT(ARB_provoking_vertex , EXT_provoking_vertex , GLL, GLC, x , x , 2009)
+EXT(ARB_query_buffer_object , ARB_query_buffer_object , GLL, GLC, x , x , 2013)
EXT(ARB_robustness , dummy_true , GLL, GLC, x , x , 2010)
EXT(ARB_sample_shading , ARB_sample_shading , GLL, GLC, x , x , 2009)
EXT(ARB_sampler_objects , dummy_true , GLL, GLC, x , x , 2009)
EXT(ATI_blend_equation_separate , EXT_blend_equation_separate , GLL, GLC, x , x , 2003)
EXT(ATI_draw_buffers , dummy_true , GLL, x , x , x , 2002)
EXT(ATI_fragment_shader , ATI_fragment_shader , GLL, x , x , x , 2001)
+EXT(ATI_meminfo , ATI_meminfo , GLL, GLC, x , x , 2009)
EXT(ATI_separate_stencil , ATI_separate_stencil , GLL, x , x , x , 2006)
EXT(ATI_texture_compression_3dc , ATI_texture_compression_3dc , GLL, x , x , x , 2004)
EXT(ATI_texture_env_combine3 , ATI_texture_env_combine3 , GLL, x , x , x , 2002)
EXT(NV_texture_env_combine4 , NV_texture_env_combine4 , GLL, x , x , x , 1999)
EXT(NV_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2000)
EXT(NV_vdpau_interop , NV_vdpau_interop , GLL, GLC, x , x , 2010)
+EXT(NVX_gpu_memory_info , NVX_gpu_memory_info , GLL, GLC, x , x , 2013)
EXT(OES_EGL_image , OES_EGL_image , GLL, GLC, ES1, ES2, 2006) /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */
EXT(OES_EGL_image_external , OES_EGL_image_external , x , x , ES1, ES2, 2010)
EXT(OES_fbo_render_mipmap , dummy_true , x , x , ES1, ES2, 2005)
EXT(OES_fixed_point , dummy_true , x , x , ES1, x , 2002)
EXT(OES_framebuffer_object , dummy_true , x , x , ES1, x , 2005)
+EXT(OES_geometry_point_size , OES_geometry_shader , x , x , x , 31, 2015)
EXT(OES_geometry_shader , OES_geometry_shader , x , x , x , 31, 2015)
EXT(OES_get_program_binary , dummy_true , x , x , x , ES2, 2008)
EXT(OES_mapbuffer , dummy_true , x , x , ES1, ES2, 2005)
_mesa_error(ctx, GL_INVALID_ENUM,
"%s(pname=0x%x)", func, pname);
}
+
+ invalidate_framebuffer(fb);
+ ctx->NewState |= _NEW_BUFFERS;
}
void GLAPIENTRY
return channels
def parse(filename):
- """Parse a format descrition in CSV format.
+ """Parse a format description in CSV format.
This function parses the given CSV file and returns an iterable of
channels."""
#ifndef FRAMEBUFFER_H
#define FRAMEBUFFER_H
-#include "glheader.h"
+#include "mtypes.h"
struct gl_config;
struct gl_context;
_mesa_geometric_samples(const struct gl_framebuffer *buffer)
{
return buffer->_HasAttachments ?
- buffer->Visual.samples : buffer->DefaultGeometry.NumSamples;
+ buffer->Visual.samples :
+ buffer->DefaultGeometry._NumSamples;
}
static inline GLuint
EXTRA_VALID_CLIP_DISTANCE,
EXTRA_FLUSH_CURRENT,
EXTRA_GLSL_130,
- EXTRA_EXT_UBO_GS4,
- EXTRA_EXT_ATOMICS_GS4,
- EXTRA_EXT_SHADER_IMAGE_GS4,
+ EXTRA_EXT_UBO_GS,
+ EXTRA_EXT_ATOMICS_GS,
+ EXTRA_EXT_SHADER_IMAGE_GS,
EXTRA_EXT_ATOMICS_TESS,
EXTRA_EXT_SHADER_IMAGE_TESS,
+ EXTRA_EXT_SSBO_GS,
+ EXTRA_EXT_FB_NO_ATTACH_GS,
+ EXTRA_EXT_ES_GS,
};
#define NO_EXTRA NULL
};
static const int extra_ARB_uniform_buffer_object_and_geometry_shader[] = {
- EXTRA_EXT_UBO_GS4,
+ EXTRA_EXT_UBO_GS,
EXTRA_END
};
};
static const int extra_ARB_shader_atomic_counters_and_geometry_shader[] = {
- EXTRA_EXT_ATOMICS_GS4,
+ EXTRA_EXT_ATOMICS_GS,
EXTRA_END
};
static const int extra_ARB_shader_image_load_store_and_geometry_shader[] = {
- EXTRA_EXT_SHADER_IMAGE_GS4,
+ EXTRA_EXT_SHADER_IMAGE_GS,
EXTRA_END
};
EXTRA_END
};
+static const int extra_ARB_shader_storage_buffer_object_and_geometry_shader[] = {
+ EXTRA_EXT_SSBO_GS,
+ EXTRA_END
+};
+
+static const int extra_ARB_framebuffer_no_attachments_and_geometry_shader[] = {
+ EXTRA_EXT_FB_NO_ATTACH_GS,
+ EXTRA_END
+};
+
+static const int extra_ARB_viewport_array_or_oes_geometry_shader[] = {
+ EXT(ARB_viewport_array),
+ EXTRA_EXT_ES_GS,
+ EXTRA_END
+};
+
+static const int extra_ARB_gpu_shader5_or_oes_geometry_shader[] = {
+ EXT(ARB_gpu_shader5),
+ EXTRA_EXT_ES_GS,
+ EXTRA_END
+};
+
EXTRA_EXT(ARB_texture_cube_map);
EXTRA_EXT(EXT_texture_array);
EXTRA_EXT(NV_fog_distance);
EXTRA_EXT(ARB_viewport_array);
EXTRA_EXT(ARB_compute_shader);
EXTRA_EXT(ARB_gpu_shader5);
+EXTRA_EXT(ARB_query_buffer_object);
EXTRA_EXT2(ARB_transform_feedback3, ARB_gpu_shader5);
EXTRA_EXT(INTEL_performance_query);
EXTRA_EXT(ARB_explicit_uniform_location);
EXTRA_EXT(ARB_shader_subroutine);
EXTRA_EXT(ARB_shader_storage_buffer_object);
EXTRA_EXT(ARB_indirect_parameters);
+EXTRA_EXT(ATI_meminfo);
+EXTRA_EXT(NVX_gpu_memory_info);
static const int
extra_ARB_color_buffer_float_or_glcore[] = {
EXTRA_END,
};
+static const int extra_version_32_OES_geometry_shader[] = {
+ EXTRA_VERSION_32,
+ EXTRA_EXT_ES_GS,
+ EXTRA_END
+};
+
static const int extra_gl40_ARB_sample_shading[] = {
EXTRA_VERSION_40,
EXT(ARB_sample_shading),
case GL_SHADER_STORAGE_BUFFER_BINDING:
v->value_int = ctx->ShaderStorageBuffer->Name;
break;
+ /* GL_ARB_query_buffer_object */
+ case GL_QUERY_BUFFER_BINDING:
+ v->value_int = ctx->QueryBuffer->Name;
+ break;
/* GL_ARB_timer_query */
case GL_TIMESTAMP:
if (ctx->Driver.GetTimestamp) {
case GL_DISPATCH_INDIRECT_BUFFER_BINDING:
v->value_int = ctx->DispatchIndirectBuffer->Name;
break;
+ /* GL_ARB_multisample */
+ case GL_SAMPLES:
+ v->value_int = _mesa_geometric_samples(ctx->DrawBuffer);
+ break;
+ case GL_SAMPLE_BUFFERS:
+ v->value_int = _mesa_geometric_samples(ctx->DrawBuffer) > 0;
+ break;
+ /* GL_ATI_meminfo & GL_NVX_gpu_memory_info */
+ case GL_VBO_FREE_MEMORY_ATI:
+ case GL_TEXTURE_FREE_MEMORY_ATI:
+ case GL_RENDERBUFFER_FREE_MEMORY_ATI:
+ case GL_GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX:
+ case GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX:
+ case GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX:
+ case GL_GPU_MEMORY_INFO_EVICTION_COUNT_NVX:
+ case GL_GPU_MEMORY_INFO_EVICTED_MEMORY_NVX:
+ {
+ struct gl_memory_info info;
+
+ ctx->Driver.QueryMemoryInfo(ctx, &info);
+
+ if (d->pname == GL_GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX)
+ v->value_int = info.total_device_memory;
+ else if (d->pname == GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX)
+ v->value_int = info.total_device_memory +
+ info.total_staging_memory;
+ else if (d->pname == GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX)
+ v->value_int = info.avail_device_memory;
+ else if (d->pname == GL_GPU_MEMORY_INFO_EVICTION_COUNT_NVX)
+ v->value_int = info.nr_device_memory_evictions;
+ else if (d->pname == GL_GPU_MEMORY_INFO_EVICTED_MEMORY_NVX)
+ v->value_int = info.device_memory_evicted;
+ else {
+ /* ATI free memory enums.
+ *
+ * Since the GPU memory is (usually) page-table based, every two
+ * consecutive elements are equal. From the GL_ATI_meminfo
+ * specification:
+ *
+ * "param[0] - total memory free in the pool
+ * param[1] - largest available free block in the pool
+ * param[2] - total auxiliary memory free
+ * param[3] - largest auxiliary free block"
+ *
+ * All three (VBO, TEXTURE, RENDERBUFFER) queries return
+ * the same numbers here.
+ */
+ v->value_int_4[0] = info.avail_device_memory;
+ v->value_int_4[1] = info.avail_device_memory;
+ v->value_int_4[2] = info.avail_staging_memory;
+ v->value_int_4[3] = info.avail_staging_memory;
+ }
+ }
+ break;
}
}
if (ctx->Const.GLSLVersion >= 130)
api_found = GL_TRUE;
break;
- case EXTRA_EXT_UBO_GS4:
+ case EXTRA_EXT_UBO_GS:
api_check = GL_TRUE;
- api_found = (ctx->Extensions.ARB_uniform_buffer_object &&
- _mesa_has_geometry_shaders(ctx));
+ if (ctx->Extensions.ARB_uniform_buffer_object &&
+ _mesa_has_geometry_shaders(ctx))
+ api_found = GL_TRUE;
break;
- case EXTRA_EXT_ATOMICS_GS4:
+ case EXTRA_EXT_ATOMICS_GS:
api_check = GL_TRUE;
- api_found = (ctx->Extensions.ARB_shader_atomic_counters &&
- _mesa_has_geometry_shaders(ctx));
+ if (ctx->Extensions.ARB_shader_atomic_counters &&
+ _mesa_has_geometry_shaders(ctx))
+ api_found = GL_TRUE;
break;
- case EXTRA_EXT_SHADER_IMAGE_GS4:
+ case EXTRA_EXT_SHADER_IMAGE_GS:
api_check = GL_TRUE;
- api_found = (ctx->Extensions.ARB_shader_image_load_store &&
- _mesa_has_geometry_shaders(ctx));
+ if (ctx->Extensions.ARB_shader_image_load_store &&
+ _mesa_has_geometry_shaders(ctx))
+ api_found = GL_TRUE;
break;
case EXTRA_EXT_ATOMICS_TESS:
api_check = GL_TRUE;
api_found = ctx->Extensions.ARB_shader_image_load_store &&
_mesa_has_tessellation(ctx);
break;
+ case EXTRA_EXT_SSBO_GS:
+ api_check = GL_TRUE;
+ if (ctx->Extensions.ARB_shader_storage_buffer_object &&
+ _mesa_has_geometry_shaders(ctx))
+ api_found = GL_TRUE;
+ break;
+ case EXTRA_EXT_FB_NO_ATTACH_GS:
+ api_check = GL_TRUE;
+ if (ctx->Extensions.ARB_framebuffer_no_attachments &&
+ (_mesa_is_desktop_gl(ctx) ||
+ _mesa_has_OES_geometry_shader(ctx)))
+ api_found = GL_TRUE;
+ break;
+ case EXTRA_EXT_ES_GS:
+ api_check = GL_TRUE;
+ if (_mesa_has_OES_geometry_shader(ctx))
+ api_found = GL_TRUE;
+ break;
case EXTRA_END:
break;
default: /* *e is a offset into the extension struct */
[ "SAMPLE_COVERAGE_ARB", "CONTEXT_BOOL(Multisample.SampleCoverage), NO_EXTRA" ],
[ "SAMPLE_COVERAGE_VALUE_ARB", "CONTEXT_FLOAT(Multisample.SampleCoverageValue), NO_EXTRA" ],
[ "SAMPLE_COVERAGE_INVERT_ARB", "CONTEXT_BOOL(Multisample.SampleCoverageInvert), NO_EXTRA" ],
- [ "SAMPLE_BUFFERS_ARB", "BUFFER_INT(Visual.sampleBuffers), extra_new_buffers" ],
- [ "SAMPLES_ARB", "BUFFER_INT(Visual.samples), extra_new_buffers" ],
+ [ "SAMPLE_BUFFERS_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_new_buffers" ],
+ [ "SAMPLES_ARB", "LOC_CUSTOM, TYPE_INT, 0, extra_new_buffers" ],
# GL_ARB_sample_shading
[ "SAMPLE_SHADING_ARB", "CONTEXT_BOOL(Multisample.SampleShading), extra_gl40_ARB_sample_shading" ],
["MAX_FRAMEBUFFER_HEIGHT", "CONTEXT_INT(Const.MaxFramebufferHeight), extra_ARB_framebuffer_no_attachments"],
["MAX_FRAMEBUFFER_SAMPLES", "CONTEXT_INT(Const.MaxFramebufferSamples), extra_ARB_framebuffer_no_attachments"],
+# GL_ARB_framebuffer_no_attachments / geometry shader
+ [ "MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments_and_geometry_shader" ],
+
# GL_ARB_explicit_uniform_location / GLES 3.1
[ "MAX_UNIFORM_LOCATIONS", "CONTEXT_INT(Const.MaxUserAssignableUniformLocations), extra_ARB_explicit_uniform_location" ],
{ "apis": ["GL_CORE", "GLES31"], "params": [
# GL_ARB_draw_indirect / GLES 3.1
[ "DRAW_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_draw_indirect" ],
+
+# GL 3.2 / GL OES_geometry_shader
+ [ "MAX_GEOMETRY_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_OUTPUT_VERTICES", "CONTEXT_INT(Const.MaxGeometryOutputVertices), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.MaxGeometryTotalOutputComponents), extra_version_32_OES_geometry_shader" ],
+ [ "MAX_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents), extra_version_32_OES_geometry_shader" ],
+
+# GL_ARB_shader_image_load_store / geometry shader
+ [ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader" ],
+
+# GL_ARB_shader_atomic_counters / geometry shader
+ [ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader " ],
+ [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
+
+# GL_ARB_shader_storage_buffer_object / geometry shader
+ [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_and_geometry_shader" ],
+
+# GL_ARB_uniform_buffer_object / geometry shader
+ [ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
+ [ "MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxCombinedUniformComponents), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
+
+# GL_ARB_viewport_array / GL_OES_geometry_shader
+ [ "LAYER_PROVOKING_VERTEX", "CONTEXT_ENUM(Light.ProvokingVertex), extra_ARB_viewport_array_or_oes_geometry_shader" ],
+
+# GL_ARB_gpu_shader5 / GL_OES_geometry_shader
+ [ "MAX_GEOMETRY_SHADER_INVOCATIONS", "CONST(MAX_GEOMETRY_SHADER_INVOCATIONS), extra_ARB_gpu_shader5_or_oes_geometry_shader" ],
]},
# Remaining enums are only in OpenGL
# GL 3.2
[ "CONTEXT_PROFILE_MASK", "CONTEXT_INT(Const.ProfileMask), extra_version_32" ],
- [ "MAX_GEOMETRY_INPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents), extra_version_32" ],
- [ "MAX_GEOMETRY_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents), extra_version_32" ],
- [ "MAX_GEOMETRY_TEXTURE_IMAGE_UNITS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits), extra_version_32" ],
- [ "MAX_GEOMETRY_OUTPUT_VERTICES", "CONTEXT_INT(Const.MaxGeometryOutputVertices), extra_version_32" ],
- [ "MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS", "CONTEXT_INT(Const.MaxGeometryTotalOutputComponents), extra_version_32" ],
- [ "MAX_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents), extra_version_32" ],
# GL_ARB_robustness
[ "RESET_NOTIFICATION_STRATEGY_ARB", "CONTEXT_ENUM(Const.ResetStrategy), NO_EXTRA" ],
-
-# GL_ARB_uniform_buffer_object
- [ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
- [ "MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxCombinedUniformComponents), extra_ARB_uniform_buffer_object_and_geometry_shader" ],
-
# GL_ARB_timer_query
[ "TIMESTAMP", "LOC_CUSTOM, TYPE_INT64, 0, extra_ARB_timer_query" ],
# GL_ARB_texture_gather
[ "MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB", "CONTEXT_INT(Const.MaxProgramTextureGatherComponents), extra_ARB_texture_gather"],
-# GL_ARB_shader_atomic_counters
- [ "MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
- [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ],
-
# GL_ARB_shader_image_load_store
[ "MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS", "CONTEXT_INT(Const.MaxCombinedShaderOutputResources), extra_ARB_shader_image_load_store" ],
[ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ],
- [ "MAX_GEOMETRY_IMAGE_UNIFORMS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms), extra_ARB_shader_image_load_store_and_geometry_shader"],
-
-# GL_ARB_framebuffer_no_attachments
- ["MAX_FRAMEBUFFER_LAYERS", "CONTEXT_INT(Const.MaxFramebufferLayers), extra_ARB_framebuffer_no_attachments"],
# GL_EXT_polygon_offset_clamp
[ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ],
# GL_ARB_shader_storage_buffer_object
- [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
[ "MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
[ "MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ],
+
+# GL_ARB_query_buffer_object
+ [ "QUERY_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_query_buffer_object" ],
+
+# GL_ATI_meminfo
+ [ "VBO_FREE_MEMORY_ATI", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, extra_ATI_meminfo" ],
+ [ "TEXTURE_FREE_MEMORY_ATI", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, extra_ATI_meminfo" ],
+ [ "RENDERBUFFER_FREE_MEMORY_ATI", "LOC_CUSTOM, TYPE_INT_4, NO_OFFSET, extra_ATI_meminfo" ],
+
+# GL_NVX_gpu_memory_info
+ [ "GPU_MEMORY_INFO_DEDICATED_VIDMEM_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
+ [ "GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
+ [ "GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
+ [ "GPU_MEMORY_INFO_EVICTION_COUNT_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
+ [ "GPU_MEMORY_INFO_EVICTED_MEMORY_NVX", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, extra_NVX_gpu_memory_info" ],
]},
# Enums restricted to OpenGL Core profile
[ "MAX_VIEWPORTS", "CONTEXT_INT(Const.MaxViewports), extra_ARB_viewport_array" ],
[ "VIEWPORT_SUBPIXEL_BITS", "CONTEXT_INT(Const.ViewportSubpixelBits), extra_ARB_viewport_array" ],
[ "VIEWPORT_BOUNDS_RANGE", "CONTEXT_FLOAT2(Const.ViewportBounds), extra_ARB_viewport_array" ],
- [ "LAYER_PROVOKING_VERTEX", "CONTEXT_ENUM(Const.LayerAndVPIndexProvokingVertex), extra_ARB_viewport_array" ],
[ "VIEWPORT_INDEX_PROVOKING_VERTEX", "CONTEXT_ENUM(Const.LayerAndVPIndexProvokingVertex), extra_ARB_viewport_array" ],
# GL_ARB_gpu_shader5
GLuint
_mesa_HashNumEntries(const struct _mesa_HashTable *table)
{
- struct hash_entry *entry;
GLuint count = 0;
if (table->deleted_key_data)
count++;
- hash_table_foreach(table->ht, entry)
- count++;
+ count += _mesa_hash_table_num_entries(table->ht);
return count;
}
USAGE_TEXTURE_BUFFER = 0x2,
USAGE_ATOMIC_COUNTER_BUFFER = 0x4,
USAGE_SHADER_STORAGE_BUFFER = 0x8,
+ USAGE_TRANSFORM_FEEDBACK_BUFFER = 0x10,
+ USAGE_PIXEL_PACK_BUFFER = 0x20,
+ USAGE_DISABLE_MINMAX_CACHE = 0x40,
} gl_buffer_usage;
GLuint NumMapBufferWriteCalls;
struct gl_buffer_mapping Mappings[MAP_COUNT];
+
+ /** Memoization of min/max index computations for static index buffers */
+ struct hash_table *MinMaxCache;
+ unsigned MinMaxCacheHitIndices;
+ unsigned MinMaxCacheMissIndices;
+ bool MinMaxCacheDirty;
};
PROGRAM_SAMPLER, /**< for shader samplers, compile-time only */
PROGRAM_SYSTEM_VALUE,/**< InstanceId, PrimitiveID, etc. */
PROGRAM_UNDEFINED, /**< Invalid/TBD value */
+ PROGRAM_IMMEDIATE, /**< Immediate value, used by TGSI */
+ PROGRAM_BUFFER, /**< for shader buffers, compile-time only */
PROGRAM_FILE_MAX
} gl_register_file;
struct {
GLuint Width, Height, Layers, NumSamples;
GLboolean FixedSampleLocations;
+ /* Derived from NumSamples by the driver so that it can choose a valid
+ * value for the hardware.
+ */
+ GLuint _NumSamples;
} DefaultGeometry;
/** \name Drawing bounds (Intersection of buffer size and scissor box)
GLboolean ARB_occlusion_query2;
GLboolean ARB_pipeline_statistics_query;
GLboolean ARB_point_sprite;
+ GLboolean ARB_query_buffer_object;
GLboolean ARB_sample_shading;
GLboolean ARB_seamless_cube_map;
GLboolean ARB_shader_atomic_counters;
GLboolean AMD_vertex_shader_layer;
GLboolean AMD_vertex_shader_viewport_index;
GLboolean APPLE_object_purgeable;
+ GLboolean ATI_meminfo;
GLboolean ATI_texture_compression_3dc;
GLboolean ATI_texture_mirror_once;
GLboolean ATI_texture_env_combine3;
GLboolean NV_texture_env_combine4;
GLboolean NV_texture_rectangle;
GLboolean NV_vdpau_interop;
+ GLboolean NVX_gpu_memory_info;
GLboolean TDFX_texture_compression_FXT1;
GLboolean OES_EGL_image;
GLboolean OES_draw_texture;
struct gl_buffer_object *CopyReadBuffer; /**< GL_ARB_copy_buffer */
struct gl_buffer_object *CopyWriteBuffer; /**< GL_ARB_copy_buffer */
+ struct gl_buffer_object *QueryBuffer; /**< GL_ARB_query_buffer_object */
+
/**
* Current GL_ARB_uniform_buffer_object binding referenced by
* GL_UNIFORM_BUFFER target for glBufferData, glMapBuffer, etc.
GLboolean ShareGroupReset;
};
+/**
+ * Information about memory usage. All sizes are in kilobytes.
+ */
+struct gl_memory_info
+{
+ unsigned total_device_memory; /**< size of device memory, e.g. VRAM */
+ unsigned avail_device_memory; /**< free device memory at the moment */
+ unsigned total_staging_memory; /**< size of staging memory, e.g. GART */
+ unsigned avail_staging_memory; /**< free staging memory at the moment */
+ unsigned device_memory_evicted; /**< size of memory evicted (monotonic counter) */
+ unsigned nr_device_memory_evictions; /**< # of evictions (monotonic counter) */
+};
#ifdef DEBUG
extern int MESA_VERBOSE;
_mesa_ObjectPtrLabel(const void *ptr, GLsizei length, const GLchar *label)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr;
+ struct gl_sync_object *syncObj;
const char *callerstr;
char **labelPtr;
+ syncObj = _mesa_get_and_ref_sync(ctx, (void*)ptr, true);
+
if (_mesa_is_desktop_gl(ctx))
callerstr = "glObjectPtrLabel";
else
callerstr = "glObjectPtrLabelKHR";
- if (!_mesa_validate_sync(ctx, syncObj)) {
+ if (!syncObj) {
_mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)",
callerstr);
return;
labelPtr = &syncObj->Label;
set_label(ctx, labelPtr, label, length, callerstr);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
}
void GLAPIENTRY
GLchar *label)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) ptr;
+ struct gl_sync_object *syncObj;
const char *callerstr;
char **labelPtr;
return;
}
- if (!_mesa_validate_sync(ctx, syncObj)) {
+ syncObj = _mesa_get_and_ref_sync(ctx, (void*)ptr, true);
+ if (!syncObj) {
_mesa_error(ctx, GL_INVALID_VALUE, "%s (not a valid sync object)",
callerstr);
return;
labelPtr = &syncObj->Label;
copy_label(*labelPtr, label, length, bufSize);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
}
*/
+#include "bufferobj.h"
#include "glheader.h"
#include "context.h"
#include "enums.h"
_mesa_GetQueryIndexediv(target, 0, pname, params);
}
-void GLAPIENTRY
-_mesa_GetQueryObjectiv(GLuint id, GLenum pname, GLint *params)
+static void
+get_query_object(struct gl_context *ctx, const char *func,
+ GLuint id, GLenum pname, GLenum ptype,
+ struct gl_buffer_object *buf, intptr_t offset)
{
struct gl_query_object *q = NULL;
- GET_CURRENT_CONTEXT(ctx);
+ uint64_t value;
if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetQueryObjectiv(%u, %s)\n", id,
+ _mesa_debug(ctx, "%s(%u, %s)\n", func, id,
_mesa_enum_to_string(pname));
if (id)
if (!q || q->Active || !q->EverBound) {
_mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetQueryObjectivARB(id=%d is invalid or active)", id);
+ "%s(id=%d is invalid or active)", func, id);
return;
}
- switch (pname) {
- case GL_QUERY_RESULT_ARB:
- if (!q->Ready)
- ctx->Driver.WaitQuery(ctx, q);
- /* if result is too large for returned type, clamp to max value */
- if (q->Target == GL_ANY_SAMPLES_PASSED
- || q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE) {
- if (q->Result)
- *params = GL_TRUE;
- else
- *params = GL_FALSE;
- } else {
- if (q->Result > 0x7fffffff) {
- *params = 0x7fffffff;
- }
- else {
- *params = (GLint)q->Result;
- }
- }
- break;
- case GL_QUERY_RESULT_AVAILABLE_ARB:
- if (!q->Ready)
- ctx->Driver.CheckQuery( ctx, q );
- *params = q->Ready;
- break;
+ if (buf && buf != ctx->Shared->NullBufferObj) {
+ bool is_64bit = ptype == GL_INT64_ARB ||
+ ptype == GL_UNSIGNED_INT64_ARB;
+ if (!ctx->Extensions.ARB_query_buffer_object) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(not supported)", func);
+ return;
+ }
+ if (buf->Size < offset + 4 * (is_64bit ? 2 : 1)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION, "%s(out of bounds)", func);
+ return;
+ }
+
+ switch (pname) {
+ case GL_QUERY_RESULT:
+ case GL_QUERY_RESULT_NO_WAIT:
+ case GL_QUERY_RESULT_AVAILABLE:
case GL_QUERY_TARGET:
- *params = q->Target;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjectivARB(pname)");
+ ctx->Driver.StoreQueryResult(ctx, q, buf, offset, pname, ptype);
return;
+ }
+
+ /* fall through to get error below */
}
-}
+ switch (pname) {
+ case GL_QUERY_RESULT:
+ if (!q->Ready)
+ ctx->Driver.WaitQuery(ctx, q);
+ value = q->Result;
+ break;
+ case GL_QUERY_RESULT_NO_WAIT:
+ if (!ctx->Extensions.ARB_query_buffer_object)
+ goto invalid_enum;
+ ctx->Driver.CheckQuery(ctx, q);
+ if (!q->Ready)
+ return;
+ value = q->Result;
+ break;
+ case GL_QUERY_RESULT_AVAILABLE:
+ if (!q->Ready)
+ ctx->Driver.CheckQuery(ctx, q);
+ value = q->Ready;
+ break;
+ case GL_QUERY_TARGET:
+ value = q->Target;
+ break;
+ default:
+invalid_enum:
+ _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)",
+ func, _mesa_enum_to_string(pname));
+ return;
+ }
+
+ /* TODO: Have the driver be required to handle this fixup. */
+ if (q->Target == GL_ANY_SAMPLES_PASSED ||
+ q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE)
+ value = !!value;
+
+ switch (ptype) {
+ case GL_INT: {
+ GLint *param = (GLint *)offset;
+ if (value > 0x7fffffff)
+ *param = 0x7fffffff;
+ else
+ *param = value;
+ break;
+ }
+ case GL_UNSIGNED_INT: {
+ GLuint *param = (GLuint *)offset;
+ if (value > 0xffffffff)
+ *param = 0xffffffff;
+ else
+ *param = value;
+ break;
+ }
+ case GL_INT64_ARB:
+ case GL_UNSIGNED_INT64_ARB: {
+ GLuint64EXT *param = (GLuint64EXT *)offset;
+ *param = value;
+ break;
+ }
+ default:
+ unreachable("unexpected ptype");
+ }
+}
void GLAPIENTRY
-_mesa_GetQueryObjectuiv(GLuint id, GLenum pname, GLuint *params)
+_mesa_GetQueryObjectiv(GLuint id, GLenum pname, GLint *params)
{
- struct gl_query_object *q = NULL;
GET_CURRENT_CONTEXT(ctx);
- if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetQueryObjectuiv(%u, %s)\n", id,
- _mesa_enum_to_string(pname));
+ get_query_object(ctx, "glGetQueryObjectiv",
+ id, pname, GL_INT, ctx->QueryBuffer, (intptr_t)params);
+}
- if (id)
- q = _mesa_lookup_query_object(ctx, id);
- if (!q || q->Active || !q->EverBound) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetQueryObjectuivARB(id=%d is invalid or active)", id);
- return;
- }
+void GLAPIENTRY
+_mesa_GetQueryObjectuiv(GLuint id, GLenum pname, GLuint *params)
+{
+ GET_CURRENT_CONTEXT(ctx);
- switch (pname) {
- case GL_QUERY_RESULT_ARB:
- if (!q->Ready)
- ctx->Driver.WaitQuery(ctx, q);
- /* if result is too large for returned type, clamp to max value */
- if (q->Target == GL_ANY_SAMPLES_PASSED
- || q->Target == GL_ANY_SAMPLES_PASSED_CONSERVATIVE) {
- if (q->Result)
- *params = GL_TRUE;
- else
- *params = GL_FALSE;
- } else {
- if (q->Result > 0xffffffff) {
- *params = 0xffffffff;
- }
- else {
- *params = (GLuint)q->Result;
- }
- }
- break;
- case GL_QUERY_RESULT_AVAILABLE_ARB:
- if (!q->Ready)
- ctx->Driver.CheckQuery( ctx, q );
- *params = q->Ready;
- break;
- case GL_QUERY_TARGET:
- *params = q->Target;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjectuivARB(pname)");
- return;
- }
+ get_query_object(ctx, "glGetQueryObjectuiv",
+ id, pname, GL_UNSIGNED_INT,
+ ctx->QueryBuffer, (intptr_t)params);
}
void GLAPIENTRY
_mesa_GetQueryObjecti64v(GLuint id, GLenum pname, GLint64EXT *params)
{
- struct gl_query_object *q = NULL;
GET_CURRENT_CONTEXT(ctx);
- if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetQueryObjecti64v(%u, %s)\n", id,
- _mesa_enum_to_string(pname));
-
- if (id)
- q = _mesa_lookup_query_object(ctx, id);
-
- if (!q || q->Active || !q->EverBound) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetQueryObjectui64vARB(id=%d is invalid or active)", id);
- return;
- }
-
- switch (pname) {
- case GL_QUERY_RESULT_ARB:
- if (!q->Ready)
- ctx->Driver.WaitQuery(ctx, q);
- *params = q->Result;
- break;
- case GL_QUERY_RESULT_AVAILABLE_ARB:
- if (!q->Ready)
- ctx->Driver.CheckQuery( ctx, q );
- *params = q->Ready;
- break;
- case GL_QUERY_TARGET:
- *params = q->Target;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjecti64vARB(pname)");
- return;
- }
+ get_query_object(ctx, "glGetQueryObjecti64v",
+ id, pname, GL_INT64_ARB,
+ ctx->QueryBuffer, (intptr_t)params);
}
void GLAPIENTRY
_mesa_GetQueryObjectui64v(GLuint id, GLenum pname, GLuint64EXT *params)
{
- struct gl_query_object *q = NULL;
GET_CURRENT_CONTEXT(ctx);
- if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glGetQueryObjectui64v(%u, %s)\n", id,
- _mesa_enum_to_string(pname));
-
- if (id)
- q = _mesa_lookup_query_object(ctx, id);
-
- if (!q || q->Active || !q->EverBound) {
- _mesa_error(ctx, GL_INVALID_OPERATION,
- "glGetQueryObjectuui64vARB(id=%d is invalid or active)", id);
- return;
- }
-
- switch (pname) {
- case GL_QUERY_RESULT_ARB:
- if (!q->Ready)
- ctx->Driver.WaitQuery(ctx, q);
- *params = q->Result;
- break;
- case GL_QUERY_RESULT_AVAILABLE_ARB:
- if (!q->Ready)
- ctx->Driver.CheckQuery( ctx, q );
- *params = q->Ready;
- break;
- case GL_QUERY_TARGET:
- *params = q->Target;
- break;
- default:
- _mesa_error(ctx, GL_INVALID_ENUM, "glGetQueryObjectui64vARB(pname)");
- return;
- }
+ get_query_object(ctx, "glGetQueryObjectui64v",
+ id, pname, GL_UNSIGNED_INT64_ARB,
+ ctx->QueryBuffer, (intptr_t)params);
}
/**
_mesa_GetQueryBufferObjectiv(GLuint id, GLuint buffer, GLenum pname,
GLintptr offset)
{
+ struct gl_buffer_object *buf;
GET_CURRENT_CONTEXT(ctx);
- _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjectiv");
+
+ buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjectiv");
+ if (!buf)
+ return;
+
+ get_query_object(ctx, "glGetQueryBufferObjectiv",
+ id, pname, GL_INT, buf, offset);
}
_mesa_GetQueryBufferObjectuiv(GLuint id, GLuint buffer, GLenum pname,
GLintptr offset)
{
+ struct gl_buffer_object *buf;
GET_CURRENT_CONTEXT(ctx);
- _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjectuiv");
+
+ buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjectuiv");
+ if (!buf)
+ return;
+
+ get_query_object(ctx, "glGetQueryBufferObjectuiv",
+ id, pname, GL_UNSIGNED_INT, buf, offset);
}
_mesa_GetQueryBufferObjecti64v(GLuint id, GLuint buffer, GLenum pname,
GLintptr offset)
{
+ struct gl_buffer_object *buf;
GET_CURRENT_CONTEXT(ctx);
- _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjecti64v");
+
+ buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjecti64v");
+ if (!buf)
+ return;
+
+ get_query_object(ctx, "glGetQueryBufferObjecti64v",
+ id, pname, GL_INT64_ARB, buf, offset);
}
_mesa_GetQueryBufferObjectui64v(GLuint id, GLuint buffer, GLenum pname,
GLintptr offset)
{
+ struct gl_buffer_object *buf;
GET_CURRENT_CONTEXT(ctx);
- _mesa_error(ctx, GL_INVALID_OPERATION, "glGetQueryBufferObjectui64v");
+
+ buf = _mesa_lookup_bufferobj_err(ctx, buffer, "glGetQueryBufferObjectui64v");
+ if (!buf)
+ return;
+
+ get_query_object(ctx, "glGetQueryBufferObjectui64v",
+ id, pname, GL_UNSIGNED_INT64_ARB, buf, offset);
}
struct set_entry *entry;
set_foreach(shared->SyncObjects, entry) {
- _mesa_unref_sync_object(ctx, (struct gl_sync_object *) entry->key);
+ _mesa_unref_sync_object(ctx, (struct gl_sync_object *) entry->key, 1);
}
}
_mesa_set_destroy(shared->SyncObjects, NULL);
ctx->Multisample._Enabled = GL_FALSE;
if (ctx->Multisample.Enabled &&
ctx->DrawBuffer &&
- ctx->DrawBuffer->Visual.sampleBuffers)
+ _mesa_geometric_samples(ctx->DrawBuffer) > 0)
ctx->Multisample._Enabled = GL_TRUE;
}
* - not in sync objects hash table
* - type is GL_SYNC_FENCE
* - not marked as deleted
+ *
+ * Returns the internal gl_sync_object pointer if the sync object is valid
+ * or NULL if it isn't.
+ *
+ * If "incRefCount" is true, the reference count is incremented, which is
+ * normally what you want; otherwise, a glDeleteSync from another thread
+ * could delete the sync object while you are still working on it.
*/
-bool
-_mesa_validate_sync(struct gl_context *ctx,
- const struct gl_sync_object *syncObj)
+struct gl_sync_object *
+_mesa_get_and_ref_sync(struct gl_context *ctx, GLsync sync, bool incRefCount)
{
- return (syncObj != NULL)
+ struct gl_sync_object *syncObj = (struct gl_sync_object *) sync;
+ mtx_lock(&ctx->Shared->Mutex);
+ if (syncObj != NULL
&& _mesa_set_search(ctx->Shared->SyncObjects, syncObj) != NULL
&& (syncObj->Type == GL_SYNC_FENCE)
- && !syncObj->DeletePending;
-}
-
-
-void
-_mesa_ref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj)
-{
- mtx_lock(&ctx->Shared->Mutex);
- syncObj->RefCount++;
+ && !syncObj->DeletePending) {
+ if (incRefCount) {
+ syncObj->RefCount++;
+ }
+ } else {
+ syncObj = NULL;
+ }
mtx_unlock(&ctx->Shared->Mutex);
+ return syncObj;
}
void
-_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj)
+_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj,
+ int amount)
{
struct set_entry *entry;
mtx_lock(&ctx->Shared->Mutex);
- syncObj->RefCount--;
+ syncObj->RefCount -= amount;
if (syncObj->RefCount == 0) {
entry = _mesa_set_search(ctx->Shared->SyncObjects, syncObj);
assert (entry != NULL);
_mesa_IsSync(GLsync sync)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_FALSE);
- return _mesa_validate_sync(ctx, syncObj) ? GL_TRUE : GL_FALSE;
+ return _mesa_get_and_ref_sync(ctx, sync, false) ? GL_TRUE : GL_FALSE;
}
_mesa_DeleteSync(GLsync sync)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
+ struct gl_sync_object *syncObj;
/* From the GL_ARB_sync spec:
*
return;
}
- if (!_mesa_validate_sync(ctx, syncObj)) {
+ syncObj = _mesa_get_and_ref_sync(ctx, sync, true);
+ if (!syncObj) {
_mesa_error(ctx, GL_INVALID_VALUE, "glDeleteSync (not a valid sync object)");
return;
}
/* If there are no client-waits or server-waits pending on this sync, delete
- * the underlying object.
+ * the underlying object. Note that we double-unref the object, as
+ * _mesa_get_and_ref_sync above took an extra refcount to make sure the pointer
+ * is valid for us to manipulate.
*/
syncObj->DeletePending = GL_TRUE;
- _mesa_unref_sync_object(ctx, syncObj);
+ _mesa_unref_sync_object(ctx, syncObj, 2);
}
_mesa_ClientWaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
+ struct gl_sync_object *syncObj;
GLenum ret;
ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, GL_WAIT_FAILED);
- if (!_mesa_validate_sync(ctx, syncObj)) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glClientWaitSync (not a valid sync object)");
- return GL_WAIT_FAILED;
- }
-
if ((flags & ~GL_SYNC_FLUSH_COMMANDS_BIT) != 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "glClientWaitSync(flags=0x%x)", flags);
return GL_WAIT_FAILED;
}
- _mesa_ref_sync_object(ctx, syncObj);
+ syncObj = _mesa_get_and_ref_sync(ctx, sync, true);
+ if (!syncObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glClientWaitSync (not a valid sync object)");
+ return GL_WAIT_FAILED;
+ }
/* From the GL_ARB_sync spec:
*
}
}
- _mesa_unref_sync_object(ctx, syncObj);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
return ret;
}
_mesa_WaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
-
- if (!_mesa_validate_sync(ctx, syncObj)) {
- _mesa_error(ctx, GL_INVALID_VALUE, "glWaitSync (not a valid sync object)");
- return;
- }
+ struct gl_sync_object *syncObj;
if (flags != 0) {
_mesa_error(ctx, GL_INVALID_VALUE, "glWaitSync(flags=0x%x)", flags);
return;
}
+ syncObj = _mesa_get_and_ref_sync(ctx, sync, true);
+ if (!syncObj) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glWaitSync (not a valid sync object)");
+ return;
+ }
+
ctx->Driver.ServerWaitSync(ctx, syncObj, flags, timeout);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
}
GLint *values)
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_sync_object *const syncObj = (struct gl_sync_object *) sync;
+ struct gl_sync_object *syncObj;
GLsizei size = 0;
GLint v[1];
- if (!_mesa_validate_sync(ctx, syncObj)) {
+ syncObj = _mesa_get_and_ref_sync(ctx, sync, true);
+ if (!syncObj) {
_mesa_error(ctx, GL_INVALID_VALUE, "glGetSynciv (not a valid sync object)");
return;
}
default:
_mesa_error(ctx, GL_INVALID_ENUM, "glGetSynciv(pname=0x%x)\n", pname);
+ _mesa_unref_sync_object(ctx, syncObj, 1);
return;
}
if (length != NULL) {
*length = size;
}
+
+ _mesa_unref_sync_object(ctx, syncObj, 1);
}
extern void
_mesa_free_sync_data(struct gl_context *);
-extern void
-_mesa_ref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj);
+struct gl_sync_object *
+_mesa_get_and_ref_sync(struct gl_context *ctx, GLsync sync, bool incRefCount);
extern void
-_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj);
-
-extern bool
-_mesa_validate_sync(struct gl_context *ctx,
- const struct gl_sync_object *syncObj);
+_mesa_unref_sync_object(struct gl_context *ctx, struct gl_sync_object *syncObj,
+ int amount);
extern GLboolean GLAPIENTRY
_mesa_IsSync(GLsync sync);
tfObj->BufferNames[index] = bufObj->Name;
tfObj->Offset[index] = offset;
tfObj->RequestedSize[index] = size;
+
+ if (bufObj != ctx->Shared->NullBufferObj)
+ bufObj->UsageHistory |= USAGE_TRANSFORM_FEEDBACK_BUFFER;
}
/*** GL_ARB_direct_state_access ***/
(void) row_major;
+ /* atomics don't get real storage */
+ if (type->contains_atomic())
+ return;
+
if (type->is_vector() || type->is_scalar()) {
size = type->vector_elements;
if (type->is_double())
*posOut = -1;
return GL_FALSE;
}
-
-
-struct gl_program_parameter_list *
-_mesa_clone_parameter_list(const struct gl_program_parameter_list *list)
-{
- struct gl_program_parameter_list *clone;
- GLuint i;
-
- clone = _mesa_new_parameter_list();
- if (!clone)
- return NULL;
-
- /** Not too efficient, but correct */
- for (i = 0; i < list->NumParameters; i++) {
- struct gl_program_parameter *p = list->Parameters + i;
- struct gl_program_parameter *pCopy;
- GLuint size = MIN2(p->Size, 4);
- GLint j = _mesa_add_parameter(clone, p->Type, p->Name, size, p->DataType,
- list->ParameterValues[i], NULL);
- assert(j >= 0);
- pCopy = clone->Parameters + j;
- /* copy state indexes */
- if (p->Type == PROGRAM_STATE_VAR) {
- GLint k;
- for (k = 0; k < STATE_LENGTH; k++) {
- pCopy->StateIndexes[k] = p->StateIndexes[k];
- }
- }
- else {
- clone->Parameters[j].Size = p->Size;
- }
-
- }
-
- clone->StateFlags = list->StateFlags;
-
- return clone;
-}
-
-
-/**
- * Return a new parameter list which is listA + listB.
- */
-struct gl_program_parameter_list *
-_mesa_combine_parameter_lists(const struct gl_program_parameter_list *listA,
- const struct gl_program_parameter_list *listB)
-{
- struct gl_program_parameter_list *list;
-
- if (listA) {
- list = _mesa_clone_parameter_list(listA);
- if (list && listB) {
- GLuint i;
- for (i = 0; i < listB->NumParameters; i++) {
- struct gl_program_parameter *param = listB->Parameters + i;
- _mesa_add_parameter(list, param->Type, param->Name, param->Size,
- param->DataType,
- listB->ParameterValues[i],
- param->StateIndexes);
- }
- }
- }
- else if (listB) {
- list = _mesa_clone_parameter_list(listB);
- }
- else {
- list = NULL;
- }
- return list;
-}
extern void
_mesa_free_parameter_list(struct gl_program_parameter_list *paramList);
-extern struct gl_program_parameter_list *
-_mesa_clone_parameter_list(const struct gl_program_parameter_list *list);
-
-extern struct gl_program_parameter_list *
-_mesa_combine_parameter_lists(const struct gl_program_parameter_list *a,
- const struct gl_program_parameter_list *b);
-
static inline GLuint
_mesa_num_parameters(const struct gl_program_parameter_list *list)
{
#include "prog_statevars.h"
#include "prog_parameter.h"
#include "main/samplerobj.h"
+#include "framebuffer.h"
#define ONE_DIV_SQRT_LN2 (1.201122408786449815)
}
return;
case STATE_NUM_SAMPLES:
- ((int *)value)[0] = ctx->DrawBuffer->Visual.samples;
+ ((int *)value)[0] = _mesa_geometric_samples(ctx->DrawBuffer);
return;
case STATE_DEPTH_RANGE:
value[0] = ctx->ViewportArray[0].Near; /* near */
#include "main/glheader.h"
#include "main/context.h"
+#include "main/framebuffer.h"
#include "main/hash.h"
#include "main/macros.h"
#include "program.h"
* forces per-sample shading"
*/
if (prog->IsSample && !ignore_sample_qualifier)
- return MAX2(ctx->DrawBuffer->Visual.samples, 1);
+ return MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
if (prog->Base.SystemValuesRead & (SYSTEM_BIT_SAMPLE_ID |
SYSTEM_BIT_SAMPLE_POS))
- return MAX2(ctx->DrawBuffer->Visual.samples, 1);
+ return MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
else if (ctx->Multisample.SampleShading)
return MAX2(ceil(ctx->Multisample.MinSampleShadingValue *
- ctx->DrawBuffer->Visual.samples), 1);
+ _mesa_geometric_samples(ctx->DrawBuffer)), 1);
else
return 1;
}
&st_bind_tes_ubos,
&st_bind_fs_ubos,
&st_bind_gs_ubos,
+ &st_bind_vs_atomics,
+ &st_bind_tcs_atomics,
+ &st_bind_tes_atomics,
+ &st_bind_fs_atomics,
+ &st_bind_gs_atomics,
+ &st_bind_vs_ssbos,
+ &st_bind_tcs_ssbos,
+ &st_bind_tes_ssbos,
+ &st_bind_fs_ssbos,
+ &st_bind_gs_ssbos,
&st_update_pixel_transfer,
&st_update_tess,
extern const struct st_tracked_state st_bind_gs_ubos;
extern const struct st_tracked_state st_bind_tcs_ubos;
extern const struct st_tracked_state st_bind_tes_ubos;
+extern const struct st_tracked_state st_bind_fs_atomics;
+extern const struct st_tracked_state st_bind_vs_atomics;
+extern const struct st_tracked_state st_bind_gs_atomics;
+extern const struct st_tracked_state st_bind_tcs_atomics;
+extern const struct st_tracked_state st_bind_tes_atomics;
+extern const struct st_tracked_state st_bind_fs_ssbos;
+extern const struct st_tracked_state st_bind_vs_ssbos;
+extern const struct st_tracked_state st_bind_gs_ssbos;
+extern const struct st_tracked_state st_bind_tcs_ssbos;
+extern const struct st_tracked_state st_bind_tes_ssbos;
extern const struct st_tracked_state st_update_pixel_transfer;
extern const struct st_tracked_state st_update_tess;
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2014 Ilia Mirkin. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/imports.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "compiler/glsl/ir_uniform.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+
+#include "st_debug.h"
+#include "st_cb_bufferobjects.h"
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_program.h"
+
+static void
+st_bind_atomics(struct st_context *st,
+ struct gl_shader_program *prog,
+ unsigned shader_type)
+{
+ unsigned i;
+
+ if (!prog || !st->pipe->set_shader_buffers)
+ return;
+
+ for (i = 0; i < prog->NumAtomicBuffers; i++) {
+ struct gl_active_atomic_buffer *atomic = &prog->AtomicBuffers[i];
+ struct gl_atomic_buffer_binding *binding =
+ &st->ctx->AtomicBufferBindings[atomic->Binding];
+ struct st_buffer_object *st_obj =
+ st_buffer_object(binding->BufferObject);
+ struct pipe_shader_buffer sb = { 0 };
+
+ sb.buffer = st_obj->buffer;
+ sb.buffer_offset = binding->Offset;
+ sb.buffer_size = st_obj->buffer->width0 - binding->Offset;
+
+ st->pipe->set_shader_buffers(st->pipe, shader_type,
+ atomic->Binding, 1, &sb);
+ }
+}
+
+static void
+bind_vs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_VERTEX);
+}
+
+const struct st_tracked_state st_bind_vs_atomics = {
+ "st_bind_vs_atomics",
+ {
+ 0,
+ ST_NEW_VERTEX_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_vs_atomics
+};
+
+static void
+bind_fs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_FRAGMENT);
+}
+
+const struct st_tracked_state st_bind_fs_atomics = {
+ "st_bind_fs_atomics",
+ {
+ 0,
+ ST_NEW_FRAGMENT_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_fs_atomics
+};
+
+static void
+bind_gs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_GEOMETRY);
+}
+
+const struct st_tracked_state st_bind_gs_atomics = {
+ "st_bind_gs_atomics",
+ {
+ 0,
+ ST_NEW_GEOMETRY_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_gs_atomics
+};
+
+static void
+bind_tcs_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_TESS_CTRL);
+}
+
+const struct st_tracked_state st_bind_tcs_atomics = {
+ "st_bind_tcs_atomics",
+ {
+ 0,
+ ST_NEW_TESSCTRL_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_tcs_atomics
+};
+
+static void
+bind_tes_atomics(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+
+ st_bind_atomics(st, prog, PIPE_SHADER_TESS_EVAL);
+}
+
+const struct st_tracked_state st_bind_tes_atomics = {
+ "st_bind_tes_atomics",
+ {
+ 0,
+ ST_NEW_TESSEVAL_PROGRAM | ST_NEW_ATOMIC_BUFFER,
+ },
+ bind_tes_atomics
+};
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2014 Ilia Mirkin. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/imports.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "compiler/glsl/ir_uniform.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_surface.h"
+
+#include "st_debug.h"
+#include "st_cb_bufferobjects.h"
+#include "st_context.h"
+#include "st_atom.h"
+#include "st_program.h"
+
+static void
+st_bind_ssbos(struct st_context *st, struct gl_shader *shader,
+ unsigned shader_type)
+{
+ unsigned i;
+ struct pipe_shader_buffer buffers[MAX_SHADER_STORAGE_BUFFERS];
+ struct gl_program_constants *c;
+
+ if (!shader || !st->pipe->set_shader_buffers)
+ return;
+
+ c = &st->ctx->Const.Program[shader->Stage];
+
+ for (i = 0; i < shader->NumShaderStorageBlocks; i++) {
+ struct gl_shader_storage_buffer_binding *binding;
+ struct st_buffer_object *st_obj;
+ struct pipe_shader_buffer *sb = &buffers[i];
+
+ binding = &st->ctx->ShaderStorageBufferBindings[
+ shader->ShaderStorageBlocks[i]->Binding];
+ st_obj = st_buffer_object(binding->BufferObject);
+
+ sb->buffer = st_obj->buffer;
+
+ if (sb->buffer) {
+ sb->buffer_offset = binding->Offset;
+ sb->buffer_size = sb->buffer->width0 - binding->Offset;
+
+ /* AutomaticSize is FALSE if the buffer was set with BindBufferRange.
+ * Take the minimum just to be sure.
+ */
+ if (!binding->AutomaticSize)
+ sb->buffer_size = MIN2(sb->buffer_size, (unsigned) binding->Size);
+ }
+ else {
+ sb->buffer_offset = 0;
+ sb->buffer_size = 0;
+ }
+ }
+ st->pipe->set_shader_buffers(st->pipe, shader_type, c->MaxAtomicBuffers,
+ shader->NumShaderStorageBlocks, buffers);
+ /* clear out any stale shader buffers */
+ if (shader->NumShaderStorageBlocks < c->MaxShaderStorageBlocks)
+ st->pipe->set_shader_buffers(
+ st->pipe, shader_type,
+ c->MaxAtomicBuffers + shader->NumShaderStorageBlocks,
+ c->MaxShaderStorageBlocks - shader->NumShaderStorageBlocks,
+ NULL);
+}
+
+static void bind_vs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_VERTEX],
+ PIPE_SHADER_VERTEX);
+}
+
+const struct st_tracked_state st_bind_vs_ssbos = {
+ "st_bind_vs_ssbos",
+ {
+ 0,
+ ST_NEW_VERTEX_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_vs_ssbos
+};
+
+static void bind_fs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_FRAGMENT];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
+ PIPE_SHADER_FRAGMENT);
+}
+
+const struct st_tracked_state st_bind_fs_ssbos = {
+ "st_bind_fs_ssbos",
+ {
+ 0,
+ ST_NEW_FRAGMENT_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_fs_ssbos
+};
+
+static void bind_gs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
+ PIPE_SHADER_GEOMETRY);
+}
+
+const struct st_tracked_state st_bind_gs_ssbos = {
+ "st_bind_gs_ssbos",
+ {
+ 0,
+ ST_NEW_GEOMETRY_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_gs_ssbos
+};
+
+static void bind_tcs_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_CTRL];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_TESS_CTRL],
+ PIPE_SHADER_TESS_CTRL);
+}
+
+const struct st_tracked_state st_bind_tcs_ssbos = {
+ "st_bind_tcs_ssbos",
+ {
+ 0,
+ ST_NEW_TESSCTRL_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_tcs_ssbos
+};
+
+static void bind_tes_ssbos(struct st_context *st)
+{
+ struct gl_shader_program *prog =
+ st->ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
+
+ if (!prog)
+ return;
+
+ st_bind_ssbos(st, prog->_LinkedShaders[MESA_SHADER_TESS_EVAL],
+ PIPE_SHADER_TESS_EVAL);
+}
+
+const struct st_tracked_state st_bind_tes_ssbos = {
+ "st_bind_tes_ssbos",
+ {
+ 0,
+ ST_NEW_TESSEVAL_PROGRAM | ST_NEW_STORAGE_BUFFER,
+ },
+ bind_tes_ssbos
+};
case GL_PARAMETER_BUFFER_ARB:
bind = PIPE_BIND_COMMAND_ARGS_BUFFER;
break;
+ case GL_ATOMIC_COUNTER_BUFFER:
+ case GL_SHADER_STORAGE_BUFFER:
+ bind = PIPE_BIND_SHADER_BUFFER;
+ break;
+ case GL_QUERY_BUFFER:
+ bind = PIPE_BIND_QUERY_BUFFER;
+ break;
default:
bind = 0;
}
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
+#include "util/u_inlines.h"
#include "st_context.h"
#include "st_cb_queryobj.h"
#include "st_cb_bitmap.h"
+#include "st_cb_bufferobjects.h"
static struct gl_query_object *
{
/* nothing */
}
-
+
q->Ready = GL_TRUE;
}
}
}
+static void
+st_StoreQueryResult(struct gl_context *ctx, struct gl_query_object *q,
+ struct gl_buffer_object *buf, intptr_t offset,
+ GLenum pname, GLenum ptype)
+{
+ struct pipe_context *pipe = st_context(ctx)->pipe;
+ struct st_query_object *stq = st_query_object(q);
+ struct st_buffer_object *stObj = st_buffer_object(buf);
+ boolean wait = pname == GL_QUERY_RESULT;
+ enum pipe_query_value_type result_type;
+ int index;
+
+ /* GL_QUERY_TARGET is a bit of an extension since it has nothing to
+ * do with the GPU end of the query. Write it in "by hand".
+ */
+ if (pname == GL_QUERY_TARGET) {
+ /* Assume that the data must be LE. The endianness situation wrt CPU and
+ * GPU is incredibly confusing, but the vast majority of GPUs are
+ * LE. When a BE one comes along, this needs some form of resolution.
+ */
+ unsigned data[2] = { CPU_TO_LE32(q->Target), 0 };
+ pipe_buffer_write(pipe, stObj->buffer, offset,
+ (ptype == GL_INT64_ARB ||
+ ptype == GL_UNSIGNED_INT64_ARB) ? 8 : 4,
+ data);
+ return;
+ }
+
+ switch (ptype) {
+ case GL_INT:
+ result_type = PIPE_QUERY_TYPE_I32;
+ break;
+ case GL_UNSIGNED_INT:
+ result_type = PIPE_QUERY_TYPE_U32;
+ break;
+ case GL_INT64_ARB:
+ result_type = PIPE_QUERY_TYPE_I64;
+ break;
+ case GL_UNSIGNED_INT64_ARB:
+ result_type = PIPE_QUERY_TYPE_U64;
+ break;
+ default:
+ unreachable("Unexpected result type");
+ }
+
+ if (pname == GL_QUERY_RESULT_AVAILABLE) {
+ index = -1;
+ } else if (stq->type == PIPE_QUERY_PIPELINE_STATISTICS) {
+ switch (q->Target) {
+ case GL_VERTICES_SUBMITTED_ARB:
+ index = 0;
+ break;
+ case GL_PRIMITIVES_SUBMITTED_ARB:
+ index = 1;
+ break;
+ case GL_VERTEX_SHADER_INVOCATIONS_ARB:
+ index = 2;
+ break;
+ case GL_GEOMETRY_SHADER_INVOCATIONS:
+ index = 3;
+ break;
+ case GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB:
+ index = 4;
+ break;
+ case GL_CLIPPING_INPUT_PRIMITIVES_ARB:
+ index = 5;
+ break;
+ case GL_CLIPPING_OUTPUT_PRIMITIVES_ARB:
+ index = 6;
+ break;
+ case GL_FRAGMENT_SHADER_INVOCATIONS_ARB:
+ index = 7;
+ break;
+ case GL_TESS_CONTROL_SHADER_PATCHES_ARB:
+ index = 8;
+ break;
+ case GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB:
+ index = 9;
+ break;
+ case GL_COMPUTE_SHADER_INVOCATIONS_ARB:
+ index = 10;
+ break;
+ default:
+ unreachable("Unexpected target");
+ }
+ } else {
+ index = 0;
+ }
+
+ pipe->get_query_result_resource(pipe, stq->pq, wait, result_type, index,
+ stObj->buffer, offset);
+}
void st_init_query_functions(struct dd_function_table *functions)
{
functions->WaitQuery = st_WaitQuery;
functions->CheckQuery = st_CheckQuery;
functions->GetTimestamp = st_GetTimestamp;
+ functions->StoreQueryResult = st_StoreQueryResult;
}
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "util/u_inlines.h"
+#include "util/u_upload_mgr.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_tile.h"
#include "util/u_format.h"
#include "util/u_sampler.h"
#include "util/u_math.h"
#include "util/u_box.h"
+#include "util/u_simple_shaders.h"
+#include "cso_cache/cso_context.h"
+#include "tgsi/tgsi_ureg.h"
#define DBG if (0) printf
}
}
+void
+st_init_pbo_upload(struct st_context *st)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+
+ st->pbo_upload.enabled =
+ screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OBJECTS) &&
+ screen->get_param(screen, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT) >= 1 &&
+ screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_INTEGERS);
+ if (!st->pbo_upload.enabled)
+ return;
+
+ st->pbo_upload.rgba_only =
+ screen->get_param(screen, PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY);
+
+ if (screen->get_param(screen, PIPE_CAP_TGSI_INSTANCEID)) {
+ if (screen->get_param(screen, PIPE_CAP_TGSI_VS_LAYER_VIEWPORT)) {
+ st->pbo_upload.upload_layers = true;
+ } else if (screen->get_param(screen, PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES) >= 3) {
+ st->pbo_upload.upload_layers = true;
+ st->pbo_upload.use_gs = true;
+ }
+ }
+
+ /* Blend state */
+ memset(&st->pbo_upload.blend, 0, sizeof(struct pipe_blend_state));
+ st->pbo_upload.blend.rt[0].colormask = PIPE_MASK_RGBA;
+
+ /* Rasterizer state */
+ memset(&st->pbo_upload.raster, 0, sizeof(struct pipe_rasterizer_state));
+ st->pbo_upload.raster.half_pixel_center = 1;
+}
+
+void
+st_destroy_pbo_upload(struct st_context *st)
+{
+ if (st->pbo_upload.fs) {
+ cso_delete_fragment_shader(st->cso_context, st->pbo_upload.fs);
+ st->pbo_upload.fs = NULL;
+ }
+
+ if (st->pbo_upload.gs) {
+ cso_delete_geometry_shader(st->cso_context, st->pbo_upload.gs);
+ st->pbo_upload.gs = NULL;
+ }
+
+ if (st->pbo_upload.vs) {
+ cso_delete_vertex_shader(st->cso_context, st->pbo_upload.vs);
+ st->pbo_upload.vs = NULL;
+ }
+}
+
+/**
+ * Converts format to a format with the same components, types
+ * and sizes, but with the components in RGBA order.
+ */
+static enum pipe_format
+unswizzle_format(enum pipe_format format)
+{
+ switch (format)
+ {
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ return PIPE_FORMAT_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ return PIPE_FORMAT_R10G10B10A2_UNORM;
+
+ case PIPE_FORMAT_B10G10R10A2_SNORM:
+ return PIPE_FORMAT_R10G10B10A2_SNORM;
+
+ case PIPE_FORMAT_B10G10R10A2_UINT:
+ return PIPE_FORMAT_R10G10B10A2_UINT;
+
+ default:
+ return format;
+ }
+}
+
+/**
+ * Converts PIPE_FORMAT_A* to PIPE_FORMAT_R*.
+ */
+static enum pipe_format
+alpha_to_red(enum pipe_format format)
+{
+ switch (format)
+ {
+ case PIPE_FORMAT_A8_UNORM:
+ return PIPE_FORMAT_R8_UNORM;
+ case PIPE_FORMAT_A8_SNORM:
+ return PIPE_FORMAT_R8_SNORM;
+ case PIPE_FORMAT_A8_UINT:
+ return PIPE_FORMAT_R8_UINT;
+ case PIPE_FORMAT_A8_SINT:
+ return PIPE_FORMAT_R8_SINT;
+
+ case PIPE_FORMAT_A16_UNORM:
+ return PIPE_FORMAT_R16_UNORM;
+ case PIPE_FORMAT_A16_SNORM:
+ return PIPE_FORMAT_R16_SNORM;
+ case PIPE_FORMAT_A16_UINT:
+ return PIPE_FORMAT_R16_UINT;
+ case PIPE_FORMAT_A16_SINT:
+ return PIPE_FORMAT_R16_SINT;
+ case PIPE_FORMAT_A16_FLOAT:
+ return PIPE_FORMAT_R16_FLOAT;
+
+ case PIPE_FORMAT_A32_UINT:
+ return PIPE_FORMAT_R32_UINT;
+ case PIPE_FORMAT_A32_SINT:
+ return PIPE_FORMAT_R32_SINT;
+ case PIPE_FORMAT_A32_FLOAT:
+ return PIPE_FORMAT_R32_FLOAT;
+
+ default:
+ return format;
+ }
+}
+
+/**
+ * Converts PIPE_FORMAT_R*A* to PIPE_FORMAT_R*G*.
+ */
+static enum pipe_format
+red_alpha_to_red_green(enum pipe_format format)
+{
+ switch (format)
+ {
+ case PIPE_FORMAT_R8A8_UNORM:
+ return PIPE_FORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_R8A8_SNORM:
+ return PIPE_FORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_R8A8_UINT:
+ return PIPE_FORMAT_R8G8_UINT;
+ case PIPE_FORMAT_R8A8_SINT:
+ return PIPE_FORMAT_R8G8_SINT;
+
+ case PIPE_FORMAT_R16A16_UNORM:
+ return PIPE_FORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_R16A16_SNORM:
+ return PIPE_FORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_R16A16_UINT:
+ return PIPE_FORMAT_R16G16_UINT;
+ case PIPE_FORMAT_R16A16_SINT:
+ return PIPE_FORMAT_R16G16_SINT;
+ case PIPE_FORMAT_R16A16_FLOAT:
+ return PIPE_FORMAT_R16G16_FLOAT;
+
+ case PIPE_FORMAT_R32A32_UINT:
+ return PIPE_FORMAT_R32G32_UINT;
+ case PIPE_FORMAT_R32A32_SINT:
+ return PIPE_FORMAT_R32G32_SINT;
+ case PIPE_FORMAT_R32A32_FLOAT:
+ return PIPE_FORMAT_R32G32_FLOAT;
+
+ default:
+ return format;
+ }
+}
+
+/**
+ * Converts PIPE_FORMAT_L*A* to PIPE_FORMAT_R*G*.
+ */
+static enum pipe_format
+luminance_alpha_to_red_green(enum pipe_format format)
+{
+ switch (format)
+ {
+ case PIPE_FORMAT_L8A8_UNORM:
+ return PIPE_FORMAT_R8G8_UNORM;
+ case PIPE_FORMAT_L8A8_SNORM:
+ return PIPE_FORMAT_R8G8_SNORM;
+ case PIPE_FORMAT_L8A8_UINT:
+ return PIPE_FORMAT_R8G8_UINT;
+ case PIPE_FORMAT_L8A8_SINT:
+ return PIPE_FORMAT_R8G8_SINT;
+
+ case PIPE_FORMAT_L16A16_UNORM:
+ return PIPE_FORMAT_R16G16_UNORM;
+ case PIPE_FORMAT_L16A16_SNORM:
+ return PIPE_FORMAT_R16G16_SNORM;
+ case PIPE_FORMAT_L16A16_UINT:
+ return PIPE_FORMAT_R16G16_UINT;
+ case PIPE_FORMAT_L16A16_SINT:
+ return PIPE_FORMAT_R16G16_SINT;
+ case PIPE_FORMAT_L16A16_FLOAT:
+ return PIPE_FORMAT_R16G16_FLOAT;
+
+ case PIPE_FORMAT_L32A32_UINT:
+ return PIPE_FORMAT_R32G32_UINT;
+ case PIPE_FORMAT_L32A32_SINT:
+ return PIPE_FORMAT_R32G32_SINT;
+ case PIPE_FORMAT_L32A32_FLOAT:
+ return PIPE_FORMAT_R32G32_FLOAT;
+
+ default:
+ return format;
+ }
+}
+
+/**
+ * Returns true if format is a PIPE_FORMAT_A* format, and false otherwise.
+ */
+static bool
+format_is_alpha(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (desc->nr_channels == 1 &&
+ desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_X)
+ return true;
+
+ return false;
+}
+
+/**
+ * Returns true if format is a PIPE_FORMAT_R* format, and false otherwise.
+ */
+static bool
+format_is_red(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (desc->nr_channels == 1 &&
+ desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)
+ return true;
+
+ return false;
+}
+
+
+/**
+ * Returns true if format is a PIPE_FORMAT_L* format, and false otherwise.
+ */
+static bool
+format_is_luminance(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (desc->nr_channels == 1 &&
+ desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)
+ return true;
+
+ return false;
+}
+
+/**
+ * Returns true if format is a PIPE_FORMAT_R*A* format, and false otherwise.
+ */
+static bool
+format_is_red_alpha(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if (desc->nr_channels == 2 &&
+ desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X &&
+ desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 &&
+ desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_Y)
+ return true;
+
+ return false;
+}
+
+static bool
+format_is_swizzled_rgba(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ if ((desc->swizzle[0] == TGSI_SWIZZLE_X || desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_0) &&
+ (desc->swizzle[1] == TGSI_SWIZZLE_Y || desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0) &&
+ (desc->swizzle[2] == TGSI_SWIZZLE_Z || desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0) &&
+ (desc->swizzle[3] == TGSI_SWIZZLE_W || desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1))
+ return false;
+
+ return true;
+}
+
+struct format_table
+{
+ unsigned char swizzle[4];
+ enum pipe_format format;
+};
+
+static const struct format_table table_8888_unorm[] = {
+ { { 0, 1, 2, 3 }, PIPE_FORMAT_R8G8B8A8_UNORM },
+ { { 2, 1, 0, 3 }, PIPE_FORMAT_B8G8R8A8_UNORM },
+ { { 3, 0, 1, 2 }, PIPE_FORMAT_A8R8G8B8_UNORM },
+ { { 3, 2, 1, 0 }, PIPE_FORMAT_A8B8G8R8_UNORM }
+};
+
+static const struct format_table table_1010102_unorm[] = {
+ { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_UNORM },
+ { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_UNORM }
+};
+
+static const struct format_table table_1010102_snorm[] = {
+ { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_SNORM },
+ { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_SNORM }
+};
+
+static const struct format_table table_1010102_uint[] = {
+ { { 0, 1, 2, 3 }, PIPE_FORMAT_R10G10B10A2_UINT },
+ { { 2, 1, 0, 3 }, PIPE_FORMAT_B10G10R10A2_UINT }
+};
+
+static enum pipe_format
+swizzle_format(enum pipe_format format, const int * const swizzle)
+{
+ unsigned i;
+
+ switch (format) {
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ for (i = 0; i < ARRAY_SIZE(table_8888_unorm); i++) {
+ if (swizzle[0] == table_8888_unorm[i].swizzle[0] &&
+ swizzle[1] == table_8888_unorm[i].swizzle[1] &&
+ swizzle[2] == table_8888_unorm[i].swizzle[2] &&
+ swizzle[3] == table_8888_unorm[i].swizzle[3])
+ return table_8888_unorm[i].format;
+ }
+ break;
+
+ case PIPE_FORMAT_R10G10B10A2_UNORM:
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ for (i = 0; i < ARRAY_SIZE(table_1010102_unorm); i++) {
+ if (swizzle[0] == table_1010102_unorm[i].swizzle[0] &&
+ swizzle[1] == table_1010102_unorm[i].swizzle[1] &&
+ swizzle[2] == table_1010102_unorm[i].swizzle[2] &&
+ swizzle[3] == table_1010102_unorm[i].swizzle[3])
+ return table_1010102_unorm[i].format;
+ }
+ break;
+
+ case PIPE_FORMAT_R10G10B10A2_SNORM:
+ case PIPE_FORMAT_B10G10R10A2_SNORM:
+ for (i = 0; i < ARRAY_SIZE(table_1010102_snorm); i++) {
+ if (swizzle[0] == table_1010102_snorm[i].swizzle[0] &&
+ swizzle[1] == table_1010102_snorm[i].swizzle[1] &&
+ swizzle[2] == table_1010102_snorm[i].swizzle[2] &&
+ swizzle[3] == table_1010102_snorm[i].swizzle[3])
+ return table_1010102_snorm[i].format;
+ }
+ break;
+
+ case PIPE_FORMAT_R10G10B10A2_UINT:
+ case PIPE_FORMAT_B10G10R10A2_UINT:
+ for (i = 0; i < ARRAY_SIZE(table_1010102_uint); i++) {
+ if (swizzle[0] == table_1010102_uint[i].swizzle[0] &&
+ swizzle[1] == table_1010102_uint[i].swizzle[1] &&
+ swizzle[2] == table_1010102_uint[i].swizzle[2] &&
+ swizzle[3] == table_1010102_uint[i].swizzle[3])
+ return table_1010102_uint[i].format;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return PIPE_FORMAT_NONE;
+}
+
+static bool
+reinterpret_formats(enum pipe_format *src_format, enum pipe_format *dst_format)
+{
+ enum pipe_format src = *src_format;
+ enum pipe_format dst = *dst_format;
+
+ /* Note: dst_format has already been transformed from luminance/intensity
+ * to red when this function is called. The source format will never
+ * be an intensity format, because GL_INTENSITY is not a legal value
+ * for the format parameter in glTex(Sub)Image(). */
+
+ if (format_is_alpha(src)) {
+ if (!format_is_alpha(dst))
+ return false;
+
+ src = alpha_to_red(src);
+ dst = alpha_to_red(dst);
+ } else if (format_is_luminance(src)) {
+ if (!format_is_red(dst) && !format_is_red_alpha(dst))
+ return false;
+
+ src = util_format_luminance_to_red(src);
+ } else if (util_format_is_luminance_alpha(src)) {
+ src = luminance_alpha_to_red_green(src);
+
+ if (format_is_red_alpha(dst)) {
+ dst = red_alpha_to_red_green(dst);
+ } else if (!format_is_red(dst))
+ return false;
+ } else if (format_is_swizzled_rgba(src)) {
+ const struct util_format_description *src_desc = util_format_description(src);
+ const struct util_format_description *dst_desc = util_format_description(dst);
+ int swizzle[4];
+ unsigned i;
+
+ /* Make sure the format is an RGBA and not an RGBX format */
+ if (src_desc->nr_channels != 4 || src_desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)
+ return false;
+
+ if (dst_desc->nr_channels != 4 || dst_desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1)
+ return false;
+
+ for (i = 0; i < 4; i++)
+ swizzle[i] = dst_desc->swizzle[src_desc->swizzle[i]];
+
+ dst = swizzle_format(dst, swizzle);
+ if (dst == PIPE_FORMAT_NONE)
+ return false;
+
+ src = unswizzle_format(src);
+ }
+
+ *src_format = src;
+ *dst_format = dst;
+ return true;
+}
+
+static void *
+create_pbo_upload_vs(struct st_context *st)
+{
+ struct ureg_program *ureg;
+ struct ureg_src in_pos;
+ struct ureg_src in_instanceid;
+ struct ureg_dst out_pos;
+ struct ureg_dst out_layer;
+
+ ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
+
+ in_pos = ureg_DECL_vs_input(ureg, TGSI_SEMANTIC_POSITION);
+
+ out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+
+ if (st->pbo_upload.upload_layers) {
+ in_instanceid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 0);
+
+ if (!st->pbo_upload.use_gs)
+ out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
+ }
+
+ /* out_pos = in_pos */
+ ureg_MOV(ureg, out_pos, in_pos);
+
+ if (st->pbo_upload.upload_layers) {
+ if (st->pbo_upload.use_gs) {
+ /* out_pos.z = i2f(gl_InstanceID) */
+ ureg_I2F(ureg, ureg_writemask(out_pos, TGSI_WRITEMASK_Z),
+ ureg_scalar(in_instanceid, TGSI_SWIZZLE_X));
+ } else {
+ /* out_layer = gl_InstanceID */
+ ureg_MOV(ureg, out_layer, in_instanceid);
+ }
+ }
+
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, st->pipe);
+}
+
+static void *
+create_pbo_upload_gs(struct st_context *st)
+{
+ static const int zero = 0;
+ struct ureg_program *ureg;
+ struct ureg_dst out_pos;
+ struct ureg_dst out_layer;
+ struct ureg_src in_pos;
+ struct ureg_src imm;
+ unsigned i;
+
+ ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
+ if (!ureg)
+ return NULL;
+
+ ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_TRIANGLES);
+ ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_TRIANGLE_STRIP);
+ ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 3);
+
+ out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+ out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0);
+
+ in_pos = ureg_DECL_input(ureg, TGSI_SEMANTIC_POSITION, 0, 0, 1);
+
+ imm = ureg_DECL_immediate_int(ureg, &zero, 1);
+
+ for (i = 0; i < 3; ++i) {
+ struct ureg_src in_pos_vertex = ureg_src_dimension(in_pos, i);
+
+ /* out_pos = in_pos[i] */
+ ureg_MOV(ureg, out_pos, in_pos_vertex);
+
+ /* out_layer.x = f2i(in_pos[i].z) */
+ ureg_F2I(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X),
+ ureg_scalar(in_pos_vertex, TGSI_SWIZZLE_Z));
+
+ ureg_EMIT(ureg, ureg_scalar(imm, TGSI_SWIZZLE_X));
+ }
+
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, st->pipe);
+}
+
+static void *
+create_pbo_upload_fs(struct st_context *st)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct ureg_program *ureg;
+ struct ureg_dst out;
+ struct ureg_src sampler;
+ struct ureg_src pos;
+ struct ureg_src layer;
+ struct ureg_src const0;
+ struct ureg_dst temp0;
+
+ ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ if (!ureg)
+ return NULL;
+
+ out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+ sampler = ureg_DECL_sampler(ureg, 0);
+ if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
+ pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
+ } else {
+ pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
+ TGSI_INTERPOLATE_LINEAR);
+ }
+ if (st->pbo_upload.upload_layers) {
+ layer = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_LAYER, 0,
+ TGSI_INTERPOLATE_CONSTANT);
+ }
+ const0 = ureg_DECL_constant(ureg, 0);
+ temp0 = ureg_DECL_temporary(ureg);
+
+ /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */
+
+ /* temp0.xy = f2i(temp0.xy) */
+ ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
+ ureg_swizzle(pos,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
+
+ /* temp0.xy = temp0.xy + const0.xy */
+ ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY),
+ ureg_swizzle(ureg_src(temp0),
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
+ ureg_swizzle(const0,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
+
+ /* temp0.x = const0.z * temp0.y + temp0.x */
+ ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
+ ureg_scalar(const0, TGSI_SWIZZLE_Z),
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y),
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
+
+ if (st->pbo_upload.upload_layers) {
+ /* temp0.x = const0.w * layer + temp0.x */
+ ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X),
+ ureg_scalar(const0, TGSI_SWIZZLE_W),
+ ureg_scalar(layer, TGSI_SWIZZLE_X),
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X));
+ }
+
+ /* out = txf(sampler, temp0.x) */
+ ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER,
+ ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X),
+ sampler);
+
+ ureg_release_temporary(ureg, temp0);
+
+ ureg_END(ureg);
+
+ return ureg_create_shader_and_destroy(ureg, pipe);
+}
+
+static bool
+try_pbo_upload_common(struct gl_context *ctx,
+ struct pipe_surface *surface,
+ int xoffset, int yoffset,
+ unsigned upload_width, unsigned upload_height,
+ struct pipe_resource *buffer,
+ enum pipe_format src_format,
+ intptr_t buf_offset,
+ unsigned bytes_per_pixel,
+ unsigned stride,
+ unsigned image_height)
+{
+ struct st_context *st = st_context(ctx);
+ struct pipe_context *pipe = st->pipe;
+ unsigned depth = surface->u.tex.last_layer - surface->u.tex.first_layer + 1;
+ unsigned skip_pixels = 0;
+ bool success = false;
+
+ /* Check alignment. */
+ {
+ unsigned ofs = (buf_offset * bytes_per_pixel) % ctx->Const.TextureBufferOffsetAlignment;
+ if (ofs != 0) {
+ if (ofs % bytes_per_pixel != 0)
+ return false;
+
+ skip_pixels = ofs / bytes_per_pixel;
+ buf_offset -= skip_pixels;
+ }
+ }
+
+ /* Create the shaders */
+ if (!st->pbo_upload.vs) {
+ st->pbo_upload.vs = create_pbo_upload_vs(st);
+ if (!st->pbo_upload.vs)
+ return false;
+ }
+
+ if (depth != 1 && st->pbo_upload.use_gs && !st->pbo_upload.gs) {
+ st->pbo_upload.gs = create_pbo_upload_gs(st);
+ if (!st->pbo_upload.gs)
+ return false;
+ }
+
+ if (!st->pbo_upload.fs) {
+ st->pbo_upload.fs = create_pbo_upload_fs(st);
+ if (!st->pbo_upload.fs)
+ return false;
+ }
+
+ /* Set up the sampler_view */
+ {
+ unsigned first_element = buf_offset;
+ unsigned last_element = buf_offset + skip_pixels + upload_width - 1
+ + (upload_height - 1 + (depth - 1) * image_height) * stride;
+ struct pipe_sampler_view templ;
+ struct pipe_sampler_view *sampler_view;
+
+ /* This should be ensured by Mesa before calling our callbacks */
+ assert((last_element + 1) * bytes_per_pixel <= buffer->width0);
+
+ if (last_element - first_element > ctx->Const.MaxTextureBufferSize - 1)
+ return false;
+
+ memset(&templ, 0, sizeof(templ));
+ templ.format = src_format;
+ templ.u.buf.first_element = first_element;
+ templ.u.buf.last_element = last_element;
+ templ.swizzle_r = PIPE_SWIZZLE_RED;
+ templ.swizzle_g = PIPE_SWIZZLE_GREEN;
+ templ.swizzle_b = PIPE_SWIZZLE_BLUE;
+ templ.swizzle_a = PIPE_SWIZZLE_ALPHA;
+
+ sampler_view = pipe->create_sampler_view(pipe, buffer, &templ);
+ if (sampler_view == NULL)
+ return false;
+
+ cso_save_fragment_sampler_views(st->cso_context);
+ cso_set_sampler_views(st->cso_context, PIPE_SHADER_FRAGMENT, 1,
+ &sampler_view);
+
+ pipe_sampler_view_reference(&sampler_view, NULL);
+ }
+
+ /* Upload vertices */
+ {
+ struct pipe_vertex_buffer vbo;
+ struct pipe_vertex_element velem;
+
+ float x0 = (float) xoffset / surface->width * 2.0f - 1.0f;
+ float y0 = (float) yoffset / surface->height * 2.0f - 1.0f;
+ float x1 = (float) (xoffset + upload_width) / surface->width * 2.0f - 1.0f;
+ float y1 = (float) (yoffset + upload_height) / surface->height * 2.0f - 1.0f;
+
+ float *verts = NULL;
+
+ vbo.user_buffer = NULL;
+ vbo.buffer = NULL;
+ vbo.stride = 2 * sizeof(float);
+
+ u_upload_alloc(st->uploader, 0, 8 * sizeof(float), 4,
+ &vbo.buffer_offset, &vbo.buffer, (void **) &verts);
+ if (!verts)
+ goto fail_vertex_upload;
+
+ verts[0] = x0;
+ verts[1] = y0;
+ verts[2] = x0;
+ verts[3] = y1;
+ verts[4] = x1;
+ verts[5] = y0;
+ verts[6] = x1;
+ verts[7] = y1;
+
+ u_upload_unmap(st->uploader);
+
+ velem.src_offset = 0;
+ velem.instance_divisor = 0;
+ velem.vertex_buffer_index = cso_get_aux_vertex_buffer_slot(st->cso_context);
+ velem.src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+ cso_save_vertex_elements(st->cso_context);
+ cso_set_vertex_elements(st->cso_context, 1, &velem);
+
+ cso_save_aux_vertex_buffer_slot(st->cso_context);
+ cso_set_vertex_buffers(st->cso_context, velem.vertex_buffer_index,
+ 1, &vbo);
+
+ pipe_resource_reference(&vbo.buffer, NULL);
+ }
+
+ /* Upload constants */
+ {
+ struct pipe_constant_buffer cb;
+
+ struct {
+ int32_t xoffset;
+ int32_t yoffset;
+ int32_t stride;
+ int32_t image_size;
+ } constants;
+
+ constants.xoffset = -xoffset + skip_pixels;
+ constants.yoffset = -yoffset;
+ constants.stride = stride;
+ constants.image_size = stride * image_height;
+
+ if (st->constbuf_uploader) {
+ cb.buffer = NULL;
+ cb.user_buffer = NULL;
+ u_upload_data(st->constbuf_uploader, 0, sizeof(constants),
+ st->ctx->Const.UniformBufferOffsetAlignment,
+ &constants, &cb.buffer_offset, &cb.buffer);
+ if (!cb.buffer)
+ goto fail_constant_upload;
+
+ u_upload_unmap(st->constbuf_uploader);
+ } else {
+ cb.buffer = NULL;
+ cb.user_buffer = &constants;
+ cb.buffer_offset = 0;
+ }
+ cb.buffer_size = sizeof(constants);
+
+ cso_save_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
+ cso_set_constant_buffer(st->cso_context, PIPE_SHADER_FRAGMENT, 0, &cb);
+
+ pipe_resource_reference(&cb.buffer, NULL);
+ }
+
+ /* Framebuffer_state */
+ {
+ struct pipe_framebuffer_state fb;
+ memset(&fb, 0, sizeof(fb));
+ fb.width = surface->width;
+ fb.height = surface->height;
+ fb.nr_cbufs = 1;
+ pipe_surface_reference(&fb.cbufs[0], surface);
+
+ cso_save_framebuffer(st->cso_context);
+ cso_set_framebuffer(st->cso_context, &fb);
+
+ pipe_surface_reference(&fb.cbufs[0], NULL);
+ }
+
+ /* Viewport state */
+ {
+ struct pipe_viewport_state vp;
+ vp.scale[0] = 0.5f * surface->width;
+ vp.scale[1] = 0.5f * surface->height;
+ vp.scale[2] = 1.0f;
+ vp.translate[0] = 0.5f * surface->width;
+ vp.translate[1] = 0.5f * surface->height;
+ vp.translate[2] = 0.0f;
+
+ cso_save_viewport(st->cso_context);
+ cso_set_viewport(st->cso_context, &vp);
+ }
+
+ /* Blend state */
+ cso_save_blend(st->cso_context);
+ cso_set_blend(st->cso_context, &st->pbo_upload.blend);
+
+ /* Rasterizer state */
+ cso_save_rasterizer(st->cso_context);
+ cso_set_rasterizer(st->cso_context, &st->pbo_upload.raster);
+
+ /* Set up the shaders */
+ cso_save_vertex_shader(st->cso_context);
+ cso_set_vertex_shader_handle(st->cso_context, st->pbo_upload.vs);
+
+ cso_save_geometry_shader(st->cso_context);
+ cso_set_geometry_shader_handle(st->cso_context,
+ depth != 1 ? st->pbo_upload.gs : NULL);
+
+ cso_save_tessctrl_shader(st->cso_context);
+ cso_set_tessctrl_shader_handle(st->cso_context, NULL);
+
+ cso_save_tesseval_shader(st->cso_context);
+ cso_set_tesseval_shader_handle(st->cso_context, NULL);
+
+ cso_save_fragment_shader(st->cso_context);
+ cso_set_fragment_shader_handle(st->cso_context, st->pbo_upload.fs);
+
+ /* Disable stream output */
+ cso_save_stream_outputs(st->cso_context);
+ cso_set_stream_outputs(st->cso_context, 0, NULL, 0);
+
+ if (depth == 1) {
+ cso_draw_arrays(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
+ } else {
+ cso_draw_arrays_instanced(st->cso_context, PIPE_PRIM_TRIANGLE_STRIP,
+ 0, 4, 0, depth);
+ }
+
+ success = true;
+
+ cso_restore_framebuffer(st->cso_context);
+ cso_restore_viewport(st->cso_context);
+ cso_restore_blend(st->cso_context);
+ cso_restore_rasterizer(st->cso_context);
+ cso_restore_vertex_shader(st->cso_context);
+ cso_restore_geometry_shader(st->cso_context);
+ cso_restore_tessctrl_shader(st->cso_context);
+ cso_restore_tesseval_shader(st->cso_context);
+ cso_restore_fragment_shader(st->cso_context);
+ cso_restore_stream_outputs(st->cso_context);
+ cso_restore_constant_buffer_slot0(st->cso_context, PIPE_SHADER_FRAGMENT);
+fail_constant_upload:
+ cso_restore_vertex_elements(st->cso_context);
+ cso_restore_aux_vertex_buffer_slot(st->cso_context);
+fail_vertex_upload:
+ cso_restore_fragment_sampler_views(st->cso_context);
+
+ return success;
+}
+
+static bool
+try_pbo_upload(struct gl_context *ctx, GLuint dims,
+ struct gl_texture_image *texImage,
+ GLenum format, GLenum type,
+ enum pipe_format dst_format,
+ GLint xoffset, GLint yoffset, GLint zoffset,
+ GLint width, GLint height, GLint depth,
+ const void *pixels,
+ const struct gl_pixelstore_attrib *unpack)
+{
+ struct st_context *st = st_context(ctx);
+ struct st_texture_image *stImage = st_texture_image(texImage);
+ struct st_texture_object *stObj = st_texture_object(texImage->TexObject);
+ struct pipe_resource *texture = stImage->pt;
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_surface *surface = NULL;
+ enum pipe_format src_format;
+ const struct util_format_description *desc;
+ GLenum gl_target = texImage->TexObject->Target;
+ intptr_t buf_offset;
+ unsigned bytes_per_pixel;
+ unsigned stride, image_height;
+ bool success;
+
+ if (!st->pbo_upload.enabled)
+ return false;
+
+ /* From now on, we need the gallium representation of dimensions. */
+ if (gl_target == GL_TEXTURE_1D_ARRAY) {
+ depth = height;
+ height = 1;
+ zoffset = yoffset;
+ yoffset = 0;
+ image_height = 1;
+ } else {
+ image_height = unpack->ImageHeight > 0 ? unpack->ImageHeight : height;
+ }
+
+ if (depth != 1 && !st->pbo_upload.upload_layers)
+ return false;
+
+ /* Choose the source format. Initially, we do so without checking driver
+ * support at all because of the remapping we later perform and because
+ * at least the Radeon driver actually supports some formats for texture
+ * buffers which it doesn't support for regular textures. */
+ src_format = st_choose_matching_format(st, 0, format, type, unpack->SwapBytes);
+ if (!src_format) {
+ return false;
+ }
+
+ src_format = util_format_linear(src_format);
+ desc = util_format_description(src_format);
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+ return false;
+
+ if (desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB)
+ return false;
+
+ if (st->pbo_upload.rgba_only) {
+ enum pipe_format orig_dst_format = dst_format;
+
+ if (!reinterpret_formats(&src_format, &dst_format)) {
+ return false;
+ }
+
+ if (dst_format != orig_dst_format &&
+ !screen->is_format_supported(screen, dst_format, PIPE_TEXTURE_2D, 0,
+ PIPE_BIND_RENDER_TARGET)) {
+ return false;
+ }
+ }
+
+ if (!src_format ||
+ !screen->is_format_supported(screen, src_format, PIPE_BUFFER, 0,
+ PIPE_BIND_SAMPLER_VIEW)) {
+ return false;
+ }
+
+ /* Check if the offset satisfies the alignment requirements */
+ buf_offset = (intptr_t) pixels;
+ bytes_per_pixel = desc->block.bits / 8;
+
+ if (buf_offset % bytes_per_pixel) {
+ return false;
+ }
+
+ /* Convert to texels */
+ buf_offset = buf_offset / bytes_per_pixel;
+
+ /* Compute the stride, taking unpack->Alignment into account */
+ {
+ unsigned pixels_per_row = unpack->RowLength > 0 ?
+ unpack->RowLength : width;
+ unsigned bytes_per_row = pixels_per_row * bytes_per_pixel;
+ unsigned remainder = bytes_per_row % unpack->Alignment;
+ unsigned offset_rows;
+
+ if (remainder > 0)
+ bytes_per_row += (unpack->Alignment - remainder);
+
+ if (bytes_per_row % bytes_per_pixel) {
+ return false;
+ }
+
+ stride = bytes_per_row / bytes_per_pixel;
+
+ offset_rows = unpack->SkipRows;
+ if (dims == 3)
+ offset_rows += image_height * unpack->SkipImages;
+
+ buf_offset += unpack->SkipPixels + stride * offset_rows;
+ }
+
+ /* Set up the surface */
+ {
+ unsigned level = stObj->pt != stImage->pt ? 0 : texImage->TexObject->MinLevel + texImage->Level;
+ unsigned max_layer = util_max_layer(texture, level);
+
+ zoffset += texImage->Face + texImage->TexObject->MinLayer;
+
+ struct pipe_surface templ;
+ memset(&templ, 0, sizeof(templ));
+ templ.format = dst_format;
+ templ.u.tex.level = level;
+ templ.u.tex.first_layer = MIN2(zoffset, max_layer);
+ templ.u.tex.last_layer = MIN2(zoffset + depth - 1, max_layer);
+
+ surface = pipe->create_surface(pipe, texture, &templ);
+ if (!surface)
+ return false;
+ }
+
+ success = try_pbo_upload_common(ctx, surface,
+ xoffset, yoffset, width, height,
+ st_buffer_object(unpack->BufferObj)->buffer,
+ src_format,
+ buf_offset,
+ bytes_per_pixel, stride, image_height);
+
+ pipe_surface_reference(&surface, NULL);
+
+ return success;
+}
static void
st_TexSubImage(struct gl_context *ctx, GLuint dims,
goto fallback;
}
- /* See if the texture format already matches the format and type,
- * in which case the memcpy-based fast path will likely be used and
- * we don't have to blit. */
- if (_mesa_format_matches_format_and_type(texImage->TexFormat, format,
- type, unpack->SwapBytes, NULL)) {
- goto fallback;
- }
+ /* See if the destination format is supported. */
if (format == GL_DEPTH_COMPONENT || format == GL_DEPTH_STENCIL)
bind = PIPE_BIND_DEPTH_STENCIL;
else
bind = PIPE_BIND_RENDER_TARGET;
- /* See if the destination format is supported.
- * For luminance and intensity, only the red channel is stored there. */
+ /* For luminance and intensity, only the red channel is stored
+ * in the destination. */
dst_format = util_format_linear(dst->format);
dst_format = util_format_luminance_to_red(dst_format);
dst_format = util_format_intensity_to_red(dst_format);
goto fallback;
}
+ if (_mesa_is_bufferobj(unpack->BufferObj)) {
+ if (try_pbo_upload(ctx, dims, texImage, format, type, dst_format,
+ xoffset, yoffset, zoffset,
+ width, height, depth, pixels, unpack))
+ return;
+ }
+
+ /* See if the texture format already matches the format and type,
+ * in which case the memcpy-based fast path will likely be used and
+ * we don't have to blit. */
+ if (_mesa_format_matches_format_and_type(texImage->TexFormat, format,
+ type, unpack->SwapBytes, NULL)) {
+ goto fallback;
+ }
+
/* Choose the source format. */
src_format = st_choose_matching_format(st, PIPE_BIND_SAMPLER_VIEW,
format, type, unpack->SwapBytes);
/* 1D array textures.
* We need to convert gallium coords to GL coords.
*/
- GLvoid *src = _mesa_image_address3d(unpack, pixels,
+ GLvoid *src = _mesa_image_address2d(unpack, pixels,
width, depth, format,
- type, 0, slice, 0);
+ type, slice, 0);
memcpy(map, src, bytesPerRow);
}
else {
ubyte *slice_map = map;
for (row = 0; row < (unsigned) height; row++) {
- GLvoid *src = _mesa_image_address3d(unpack, pixels,
- width, height, format,
- type, slice, row, 0);
+ GLvoid *src = _mesa_image_address(dims, unpack, pixels,
+ width, height, format,
+ type, slice, row, 0);
memcpy(slice_map, src, bytesPerRow);
slice_map += transfer->stride;
}
}
+static void
+st_CompressedTexSubImage(struct gl_context *ctx, GLuint dims,
+ struct gl_texture_image *texImage,
+ GLint x, GLint y, GLint z,
+ GLsizei w, GLsizei h, GLsizei d,
+ GLenum format, GLsizei imageSize, const GLvoid *data)
+{
+ struct st_context *st = st_context(ctx);
+ struct st_texture_image *stImage = st_texture_image(texImage);
+ struct st_texture_object *stObj = st_texture_object(texImage->TexObject);
+ struct pipe_resource *texture = stImage->pt;
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_screen *screen = pipe->screen;
+ struct pipe_resource *dst = stImage->pt;
+ struct pipe_surface *surface = NULL;
+ struct compressed_pixelstore store;
+ enum pipe_format copy_format;
+ unsigned bytes_per_block;
+ unsigned bw, bh;
+ intptr_t buf_offset;
+ bool success = false;
+
+ /* Check basic pre-conditions for PBO upload */
+ if (!st->prefer_blit_based_texture_transfer) {
+ goto fallback;
+ }
+
+ if (!_mesa_is_bufferobj(ctx->Unpack.BufferObj))
+ goto fallback;
+
+ if ((_mesa_is_format_etc2(texImage->TexFormat) && !st->has_etc2) ||
+ (texImage->TexFormat == MESA_FORMAT_ETC1_RGB8 && !st->has_etc1)) {
+ /* ETC isn't supported and is represented by uncompressed formats. */
+ goto fallback;
+ }
+
+ if (!dst) {
+ goto fallback;
+ }
+
+ if (!st->pbo_upload.enabled ||
+ !screen->get_param(screen, PIPE_CAP_SURFACE_REINTERPRET_BLOCKS)) {
+ goto fallback;
+ }
+
+ /* Choose the pipe format for the upload. */
+ bytes_per_block = util_format_get_blocksize(dst->format);
+ bw = util_format_get_blockwidth(dst->format);
+ bh = util_format_get_blockheight(dst->format);
+
+ switch (bytes_per_block) {
+ case 8:
+ copy_format = PIPE_FORMAT_R16G16B16A16_UINT;
+ break;
+ case 16:
+ copy_format = PIPE_FORMAT_R32G32B32A32_UINT;
+ break;
+ default:
+ goto fallback;
+ }
+
+ if (!screen->is_format_supported(screen, copy_format, PIPE_BUFFER, 0,
+ PIPE_BIND_SAMPLER_VIEW)) {
+ goto fallback;
+ }
+
+ if (!screen->is_format_supported(screen, copy_format, dst->target,
+ dst->nr_samples, PIPE_BIND_RENDER_TARGET)) {
+ goto fallback;
+ }
+
+ /* Interpret the pixelstore settings. */
+ _mesa_compute_compressed_pixelstore(dims, texImage->TexFormat, w, h, d,
+ &ctx->Unpack, &store);
+ assert(store.CopyBytesPerRow % bytes_per_block == 0);
+ assert(store.SkipBytes % bytes_per_block == 0);
+
+ /* Compute the offset into the buffer */
+ buf_offset = (intptr_t)data + store.SkipBytes;
+
+ if (buf_offset % bytes_per_block) {
+ goto fallback;
+ }
+
+ buf_offset = buf_offset / bytes_per_block;
+
+ /* Set up the surface. */
+ {
+ unsigned level = stObj->pt != stImage->pt ? 0 : texImage->TexObject->MinLevel + texImage->Level;
+ unsigned max_layer = util_max_layer(texture, level);
+
+ z += texImage->Face + texImage->TexObject->MinLayer;
+
+ struct pipe_surface templ;
+ memset(&templ, 0, sizeof(templ));
+ templ.format = copy_format;
+ templ.u.tex.level = level;
+ templ.u.tex.first_layer = MIN2(z, max_layer);
+ templ.u.tex.last_layer = MIN2(z + d - 1, max_layer);
+
+ surface = pipe->create_surface(pipe, texture, &templ);
+ if (!surface)
+ goto fallback;
+ }
+
+ success = try_pbo_upload_common(ctx, surface,
+ x / bw, y / bh,
+ store.CopyBytesPerRow / bytes_per_block,
+ store.CopyRowsPerSlice,
+ st_buffer_object(ctx->Unpack.BufferObj)->buffer,
+ copy_format,
+ buf_offset,
+ bytes_per_block,
+ store.TotalBytesPerRow / bytes_per_block,
+ store.TotalRowsPerSlice);
+
+ pipe_surface_reference(&surface, NULL);
+
+ if (success)
+ return;
+
+fallback:
+ _mesa_store_compressed_texsubimage(ctx, dims, texImage,
+ x, y, z, w, h, d,
+ format, imageSize, data);
+}
+
static void
st_CompressedTexImage(struct gl_context *ctx, GLuint dims,
struct gl_texture_image *texImage,
GLsizei imageSize, const GLvoid *data)
{
prep_teximage(ctx, texImage, GL_NONE, GL_NONE);
- _mesa_store_compressed_teximage(ctx, dims, texImage, imageSize, data);
+
+ /* only 2D and 3D compressed images are supported at this time */
+ if (dims == 1) {
+ _mesa_problem(ctx, "Unexpected glCompressedTexImage1D call");
+ return;
+ }
+
+ /* This is pretty simple, because unlike the general texstore path we don't
+ * have to worry about the usual image unpacking or image transfer
+ * operations.
+ */
+ assert(texImage);
+ assert(texImage->Width > 0);
+ assert(texImage->Height > 0);
+ assert(texImage->Depth > 0);
+
+ /* allocate storage for texture data */
+ if (!st_AllocTextureImageBuffer(ctx, texImage)) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage%uD", dims);
+ return;
+ }
+
+ st_CompressedTexSubImage(ctx, dims, texImage,
+ 0, 0, 0,
+ texImage->Width, texImage->Height, texImage->Depth,
+ texImage->TexFormat,
+ imageSize, data);
}
functions->QuerySamplesForFormat = st_QuerySamplesForFormat;
functions->TexImage = st_TexImage;
functions->TexSubImage = st_TexSubImage;
- functions->CompressedTexSubImage = _mesa_store_compressed_texsubimage;
+ functions->CompressedTexSubImage = st_CompressedTexSubImage;
functions->CopyTexSubImage = st_CopyTexSubImage;
functions->GenerateMipmap = st_generate_mipmap;
extern void
st_init_texture_functions(struct dd_function_table *functions);
+extern void
+st_init_pbo_upload(struct st_context *st);
+
+extern void
+st_destroy_pbo_upload(struct st_context *st);
#endif /* ST_CB_TEXTURE_H */
if (barriers & GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT)
flags |= PIPE_BARRIER_MAPPED_BUFFER;
+ if (barriers & GL_ATOMIC_COUNTER_BARRIER_BIT)
+ flags |= PIPE_BARRIER_SHADER_BUFFER;
+ if (barriers & GL_SHADER_STORAGE_BARRIER_BIT)
+ flags |= PIPE_BARRIER_SHADER_BUFFER;
+
+ if (barriers & GL_QUERY_BUFFER_BARRIER_BIT)
+ flags |= PIPE_BARRIER_QUERY_BUFFER;
if (flags && pipe->memory_barrier)
pipe->memory_barrier(pipe, flags);
}
+/**
+ * Called via ctx->Driver.QueryMemoryInfo()
+ */
+static void
+st_query_memory_info(struct gl_context *ctx, struct gl_memory_info *out)
+{
+ struct pipe_screen *screen = st_context(ctx)->pipe->screen;
+ struct pipe_memory_info info;
+
+ assert(screen->query_memory_info);
+ if (!screen->query_memory_info)
+ return;
+
+ screen->query_memory_info(screen, &info);
+
+ out->total_device_memory = info.total_device_memory;
+ out->avail_device_memory = info.avail_device_memory;
+ out->total_staging_memory = info.total_staging_memory;
+ out->avail_staging_memory = info.avail_staging_memory;
+ out->device_memory_evicted = info.device_memory_evicted;
+ out->nr_device_memory_evictions = info.nr_device_memory_evictions;
+}
+
+
/**
* Called via ctx->Driver.UpdateState()
*/
st_destroy_drawpix(st);
st_destroy_drawtex(st);
st_destroy_perfmon(st);
+ st_destroy_pbo_upload(st);
for (shader = 0; shader < ARRAY_SIZE(st->state.sampler_views); shader++) {
for (i = 0; i < ARRAY_SIZE(st->state.sampler_views[0]); i++) {
st_init_bitmap(st);
st_init_clear(st);
st_init_draw( st );
+ st_init_pbo_upload(st);
/* Choose texture target for glDrawPixels, glBitmap, renderbuffers */
if (pipe->screen->get_param(pipe->screen, PIPE_CAP_NPOT_TEXTURES))
f->NewUniformBuffer = ST_NEW_UNIFORM_BUFFER;
f->NewDefaultTessLevels = ST_NEW_TESS_STATE;
f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS;
+ f->NewAtomicBuffer = ST_NEW_ATOMIC_BUFFER;
+ f->NewShaderStorageBuffer = ST_NEW_STORAGE_BUFFER;
}
struct st_context *st_create_context(gl_api api, struct pipe_context *pipe,
functions->Enable = st_Enable;
functions->UpdateState = st_invalidate_state;
+ functions->QueryMemoryInfo = st_query_memory_info;
}
#define ST_NEW_TESSCTRL_PROGRAM (1 << 9)
#define ST_NEW_TESSEVAL_PROGRAM (1 << 10)
#define ST_NEW_SAMPLER_VIEWS (1 << 11)
+#define ST_NEW_ATOMIC_BUFFER (1 << 12)
+#define ST_NEW_STORAGE_BUFFER (1 << 13)
struct st_state_flags {
void *gs_layered;
} clear;
+ /* For gl(Compressed)Tex(Sub)Image */
+ struct {
+ struct pipe_rasterizer_state raster;
+ struct pipe_blend_state blend;
+ void *vs;
+ void *gs;
+ void *fs;
+ bool enabled;
+ bool rgba_only;
+ bool upload_layers;
+ bool use_gs;
+ } pbo_upload;
+
/** used for anything using util_draw_vertex_buffer */
struct pipe_vertex_element velems_util_draw[3];
c->MaxUniformBlockSize / 4 *
pc->MaxUniformBlocks);
+ pc->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
+ pc->MaxAtomicBuffers = screen->get_shader_param(
+ screen, sh, PIPE_SHADER_CAP_MAX_SHADER_BUFFERS) / 2;
+ pc->MaxShaderStorageBlocks = pc->MaxAtomicBuffers;
+
/* Gallium doesn't really care about local vs. env parameters so use the
* same limits.
*/
screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL);
c->GLSLFrontFacingIsSysVal =
screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL);
+
+ c->MaxAtomicBufferBindings =
+ c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
+ c->MaxCombinedAtomicBuffers =
+ c->Program[MESA_SHADER_VERTEX].MaxAtomicBuffers +
+ c->Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers +
+ c->Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers +
+ c->Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers +
+ c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
+ assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS);
+
+ if (c->MaxCombinedAtomicBuffers > 0)
+ extensions->ARB_shader_atomic_counters = GL_TRUE;
+
+ c->MaxCombinedShaderOutputResources = c->MaxDrawBuffers;
+ c->ShaderStorageBufferOffsetAlignment =
+ screen->get_param(screen, PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT);
+ if (c->ShaderStorageBufferOffsetAlignment) {
+ c->MaxCombinedShaderStorageBlocks = c->MaxShaderStorageBufferBindings =
+ c->MaxCombinedAtomicBuffers;
+ c->MaxCombinedShaderOutputResources +=
+ c->MaxCombinedShaderStorageBlocks;
+ c->MaxShaderStorageBlockSize = 1 << 27;
+ extensions->ARB_shader_storage_buffer_object = GL_TRUE;
+ }
}
{ o(ARB_occlusion_query2), PIPE_CAP_OCCLUSION_QUERY },
{ o(ARB_pipeline_statistics_query), PIPE_CAP_QUERY_PIPELINE_STATISTICS },
{ o(ARB_point_sprite), PIPE_CAP_POINT_SPRITE },
+ { o(ARB_query_buffer_object), PIPE_CAP_QUERY_BUFFER_OBJECT },
{ o(ARB_sample_shading), PIPE_CAP_SAMPLE_SHADING },
{ o(ARB_seamless_cube_map), PIPE_CAP_SEAMLESS_CUBE_MAP },
{ o(ARB_shader_draw_parameters), PIPE_CAP_DRAW_PARAMETERS },
{ o(EXT_transform_feedback), PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS },
{ o(AMD_pinned_memory), PIPE_CAP_RESOURCE_FROM_USER_MEMORY },
+ { o(ATI_meminfo), PIPE_CAP_QUERY_MEMORY_INFO },
{ o(AMD_seamless_cubemap_per_texture), PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE },
{ o(ATI_separate_stencil), PIPE_CAP_TWO_SIDED_STENCIL },
{ o(ATI_texture_mirror_once), PIPE_CAP_TEXTURE_MIRROR_CLAMP },
{ o(NV_conditional_render), PIPE_CAP_CONDITIONAL_RENDER },
{ o(NV_primitive_restart), PIPE_CAP_PRIMITIVE_RESTART },
{ o(NV_texture_barrier), PIPE_CAP_TEXTURE_BARRIER },
+ { o(NVX_gpu_memory_info), PIPE_CAP_QUERY_MEMORY_INFO },
/* GL_NV_point_sprite is not supported by gallium because we don't
* support the GL_POINT_SPRITE_R_MODE_NV option. */
#include "st_mesa_to_tgsi.h"
-#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
#define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \
(1 << PROGRAM_CONSTANT) | \
(1 << PROGRAM_UNIFORM))
unsigned tex_offset_num_offset;
int dead_mask; /**< Used in dead code elimination */
+ st_src_reg buffer; /**< buffer register */
+ unsigned buffer_access; /**< buffer access type */
+
class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
const struct tgsi_opcode_info *info;
};
int samplers_used;
glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */
+ int buffers_used;
bool indirect_addr_consts;
int wpos_transform_const;
virtual void visit(ir_barrier *);
/*@}*/
+ void visit_atomic_counter_intrinsic(ir_call *);
+ void visit_ssbo_intrinsic(ir_call *);
+ void visit_membar_intrinsic(ir_call *);
+
st_src_reg result;
/** List of variable_storage */
return size_swizzles[size - 1];
}
+static bool
+is_resource_instruction(unsigned opcode)
+{
+ switch (opcode) {
+ case TGSI_OPCODE_RESQ:
+ case TGSI_OPCODE_LOAD:
+ case TGSI_OPCODE_ATOMUADD:
+ case TGSI_OPCODE_ATOMXCHG:
+ case TGSI_OPCODE_ATOMCAS:
+ case TGSI_OPCODE_ATOMAND:
+ case TGSI_OPCODE_ATOMOR:
+ case TGSI_OPCODE_ATOMXOR:
+ case TGSI_OPCODE_ATOMUMIN:
+ case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_ATOMIMIN:
+ case TGSI_OPCODE_ATOMIMAX:
+ return true;
+ default:
+ return false;
+ }
+}
+
static unsigned
num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
{
static unsigned
num_inst_src_regs(const glsl_to_tgsi_instruction *op)
{
- return op->info->is_tex ? op->info->num_src - 1 : op->info->num_src;
+ return op->info->is_tex || is_resource_instruction(op->op) ?
+ op->info->num_src - 1 : op->info->num_src;
}
glsl_to_tgsi_instruction *
}
}
- this->instructions.push_tail(inst);
-
/*
* This section contains the double processing.
* GLSL just represents doubles as single channel values,
int initial_src_swz[4], initial_src_idx[4];
int initial_dst_idx[2], initial_dst_writemask[2];
/* select the writemask for dst0 or dst1 */
- unsigned writemask = inst->dst[0].file == PROGRAM_UNDEFINED ? inst->dst[1].writemask : inst->dst[0].writemask;
+ unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask;
/* copy out the writemask, index and swizzles for all src/dsts. */
for (j = 0; j < 2; j++) {
* scan all the components in the dst writemask
* generate an instruction for each of them if required.
*/
+ st_src_reg addr;
while (writemask) {
int i = u_bit_scan(&writemask);
+ /* before emitting the instruction, see if we have to adjust store
+ * address */
+ if (i > 1 && inst->op == TGSI_OPCODE_STORE &&
+ addr.file == PROGRAM_UNDEFINED) {
+ /* We have to advance the buffer address by 16 */
+ addr = get_temp(glsl_type::uint_type);
+ emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr),
+ inst->src[0], st_src_reg_for_int(16));
+ }
+
+
/* first time use previous instruction */
if (dinst == NULL) {
dinst = inst;
*dinst = *inst;
dinst->next = NULL;
dinst->prev = NULL;
- this->instructions.push_tail(dinst);
}
+ this->instructions.push_tail(dinst);
/* modify the destination if we are splitting */
for (j = 0; j < 2; j++) {
if (dst_is_double[j]) {
dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
dinst->dst[j].index = initial_dst_idx[j];
- if (i > 1)
+ if (i > 1) {
+ if (dinst->op == TGSI_OPCODE_STORE) {
+ dinst->src[0] = addr;
+ } else {
dinst->dst[j].index++;
+ }
+ }
} else {
/* if we aren't writing to a double, just get the bit of the initial writemask
for this channel */
}
}
inst = dinst;
+ } else {
+ this->instructions.push_tail(inst);
}
assert(src1.type != GLSL_TYPE_ARRAY);
assert(src1.type != GLSL_TYPE_STRUCT);
- if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
+ if (is_resource_instruction(op))
+ type = src1.type;
+ else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
type = GLSL_TYPE_DOUBLE;
else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
type = GLSL_TYPE_FLOAT;
case3fid(FLR, FLR, DFLR);
case3fid(ROUND, ROUND, DROUND);
+ case2iu(ATOMIMAX, ATOMUMAX);
+ case2iu(ATOMIMIN, ATOMUMIN);
+
default: break;
}
emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]);
break;
+ case ir_unop_get_buffer_size: {
+ ir_constant *const_offset = ir->operands[0]->as_constant();
+ st_src_reg buffer(
+ PROGRAM_BUFFER,
+ ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
+ (const_offset ? const_offset->value.u[0] : 0),
+ GLSL_TYPE_UINT);
+ if (!const_offset) {
+ buffer.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(buffer.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
+ emit_arl(ir, sampler_reladdr, op[0]);
+ }
+ emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->buffer = buffer;
+ break;
+ }
+
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_unorm_2x16:
case ir_unop_pack_snorm_4x8:
*/
assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
break;
-
- case ir_unop_get_buffer_size:
- assert(!"Not implemented yet");
- break;
}
this->result = result_src;
return entry;
}
+void
+glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+ ir_dereference *deref = static_cast<ir_dereference *>(
+ ir->actual_parameters.get_head());
+ ir_variable *location = deref->variable_referenced();
+
+ st_src_reg buffer(
+ PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT);
+
+ /* Calculate the surface offset */
+ st_src_reg offset;
+ ir_dereference_array *deref_array = deref->as_dereference_array();
+
+ if (deref_array) {
+ offset = get_temp(glsl_type::uint_type);
+
+ deref_array->array_index->accept(this);
+
+ emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
+ this->result, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
+ emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
+ offset, st_src_reg_for_int(location->data.offset));
+ } else {
+ offset = st_src_reg_for_int(location->data.offset);
+ }
+
+ ir->return_deref->accept(this);
+ st_dst_reg dst(this->result);
+ dst.writemask = WRITEMASK_X;
+
+ glsl_to_tgsi_instruction *inst;
+
+ if (!strcmp("__intrinsic_atomic_read", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset);
+ inst->buffer = buffer;
+ } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
+ st_src_reg_for_int(1));
+ inst->buffer = buffer;
+ } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
+ st_src_reg_for_int(-1));
+ inst->buffer = buffer;
+ emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1));
+ }
+}
+
+void
+glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+ exec_node *param = ir->actual_parameters.get_head();
+
+ ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ ir_constant *const_block = block->as_constant();
+
+ st_src_reg buffer(
+ PROGRAM_BUFFER,
+ ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
+ (const_block ? const_block->value.u[0] : 0),
+ GLSL_TYPE_UINT);
+
+ if (!const_block) {
+ block->accept(this);
+ emit_arl(ir, sampler_reladdr, this->result);
+ buffer.reladdr = ralloc(mem_ctx, st_src_reg);
+ memcpy(buffer.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
+ }
+
+ /* Calculate the surface offset */
+ offset->accept(this);
+ st_src_reg off = this->result;
+
+ st_dst_reg dst = undef_dst;
+ if (ir->return_deref) {
+ ir->return_deref->accept(this);
+ dst = st_dst_reg(this->result);
+ dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
+ }
+
+ glsl_to_tgsi_instruction *inst;
+
+ if (!strcmp("__intrinsic_load_ssbo", callee)) {
+ inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
+ if (dst.type == GLSL_TYPE_BOOL)
+ emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0));
+ } else if (!strcmp("__intrinsic_store_ssbo", callee)) {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+ dst.writemask = write_mask->value.u[0];
+
+ dst.type = this->result.type;
+ inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
+ } else {
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+
+ st_src_reg data = this->result, data2 = undef_src;
+ unsigned opcode;
+ if (!strcmp("__intrinsic_atomic_add_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMUADD;
+ else if (!strcmp("__intrinsic_atomic_min_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMIMIN;
+ else if (!strcmp("__intrinsic_atomic_max_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMIMAX;
+ else if (!strcmp("__intrinsic_atomic_and_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMAND;
+ else if (!strcmp("__intrinsic_atomic_or_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMOR;
+ else if (!strcmp("__intrinsic_atomic_xor_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMXOR;
+ else if (!strcmp("__intrinsic_atomic_exchange_ssbo", callee))
+ opcode = TGSI_OPCODE_ATOMXCHG;
+ else if (!strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) {
+ opcode = TGSI_OPCODE_ATOMCAS;
+ param = param->get_next();
+ val = ((ir_instruction *)param)->as_rvalue();
+ val->accept(this);
+ data2 = this->result;
+ } else {
+ assert(!"Unexpected intrinsic");
+ return;
+ }
+
+ inst = emit_asm(ir, opcode, dst, off, data, data2);
+ }
+
+ param = param->get_next();
+ ir_constant *access = NULL;
+ if (!param->is_tail_sentinel()) {
+ access = ((ir_instruction *)param)->as_constant();
+ assert(access);
+ }
+
+ /* The emit_asm() might have actually split the op into pieces, e.g. for
+ * double stores. We have to go back and fix up all the generated ops.
+ */
+ unsigned op = inst->op;
+ do {
+ inst->buffer = buffer;
+ if (access)
+ inst->buffer_access = access->value.u[0];
+ inst = (glsl_to_tgsi_instruction *)inst->get_prev();
+ if (inst->op == TGSI_OPCODE_UADD)
+ inst = (glsl_to_tgsi_instruction *)inst->get_prev();
+ } while (inst && inst->buffer.file == PROGRAM_UNDEFINED && inst->op == op);
+}
+
+void
+glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir)
+{
+ const char *callee = ir->callee->function_name();
+
+ if (!strcmp("__intrinsic_memory_barrier", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
+ TGSI_MEMBAR_ATOMIC_BUFFER |
+ TGSI_MEMBAR_SHADER_IMAGE |
+ TGSI_MEMBAR_SHARED));
+ else if (!strcmp("__intrinsic_memory_barrier_atomic_counter", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER));
+ else if (!strcmp("__intrinsic_memory_barrier_buffer", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER));
+ else if (!strcmp("__intrinsic_memory_barrier_image", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE));
+ else if (!strcmp("__intrinsic_memory_barrier_shared", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHARED));
+ else if (!strcmp("__intrinsic_group_memory_barrier", callee))
+ emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
+ st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
+ TGSI_MEMBAR_ATOMIC_BUFFER |
+ TGSI_MEMBAR_SHADER_IMAGE |
+ TGSI_MEMBAR_SHARED |
+ TGSI_MEMBAR_THREAD_GROUP));
+ else
+ assert(!"Unexpected memory barrier intrinsic");
+}
+
void
glsl_to_tgsi_visitor::visit(ir_call *ir)
{
glsl_to_tgsi_instruction *call_inst;
ir_function_signature *sig = ir->callee;
- function_entry *entry = get_function_signature(sig);
+ const char *callee = sig->function_name();
+ function_entry *entry;
int i;
+ /* Filter out intrinsics */
+ if (!strcmp("__intrinsic_atomic_read", callee) ||
+ !strcmp("__intrinsic_atomic_increment", callee) ||
+ !strcmp("__intrinsic_atomic_predecrement", callee)) {
+ visit_atomic_counter_intrinsic(ir);
+ return;
+ }
+
+ if (!strcmp("__intrinsic_load_ssbo", callee) ||
+ !strcmp("__intrinsic_store_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_add_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_min_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_max_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_and_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_or_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_xor_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_exchange_ssbo", callee) ||
+ !strcmp("__intrinsic_atomic_comp_swap_ssbo", callee)) {
+ visit_ssbo_intrinsic(ir);
+ return;
+ }
+
+ if (!strcmp("__intrinsic_memory_barrier", callee) ||
+ !strcmp("__intrinsic_memory_barrier_atomic_counter", callee) ||
+ !strcmp("__intrinsic_memory_barrier_buffer", callee) ||
+ !strcmp("__intrinsic_memory_barrier_image", callee) ||
+ !strcmp("__intrinsic_memory_barrier_shared", callee) ||
+ !strcmp("__intrinsic_group_memory_barrier", callee)) {
+ visit_membar_intrinsic(ir);
+ return;
+ }
+
+ entry = get_function_signature(sig);
/* Process in parameters. */
foreach_two_lists(formal_node, &sig->parameters,
actual_node, &ir->actual_parameters) {
current_function = NULL;
num_address_regs = 0;
samplers_used = 0;
+ buffers_used = 0;
indirect_addr_consts = false;
wpos_transform_const = -1;
glsl_version = 0;
count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
{
v->samplers_used = 0;
+ v->buffers_used = 0;
foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
if (inst->info->is_tex) {
}
}
}
+ if (inst->buffer.file != PROGRAM_UNDEFINED && (
+ is_resource_instruction(inst->op) ||
+ inst->op == TGSI_OPCODE_STORE)) {
+ if (inst->buffer.file == PROGRAM_BUFFER)
+ v->buffers_used |= 1 << inst->buffer.index;
+ }
}
prog->SamplersUsed = v->samplers_used;
last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
}
for (j = 0; j < num_inst_dst_regs(inst); j++) {
- if (inst->dst[j].file == PROGRAM_TEMPORARY)
+ if (inst->dst[j].file == PROGRAM_TEMPORARY) {
if (first_writes[inst->dst[j].index] == -1)
first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
+ last_reads[inst->dst[j].index] = (depth == 0) ? i : -2;
+ }
}
for (j = 0; j < inst->tex_offset_num_offset; j++) {
if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
if (!inst->dead_mask || !inst->dst[0].writemask)
continue;
- else if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
+ /* No amount of dead masks should remove memory stores */
+ if (inst->info->is_store)
+ continue;
+
+ if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
inst->remove();
delete inst;
removed++;
/* Update the first_writes and last_reads arrays with the new
* values for the merged register index, and mark the newly unused
* register index as such. */
+ assert(last_reads[j] >= last_reads[i]);
last_reads[i] = last_reads[j];
first_writes[j] = -1;
last_reads[j] = -1;
struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
struct ureg_dst address[3];
struct ureg_src samplers[PIPE_MAX_SAMPLERS];
+ struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
struct ureg_src systemValues[SYSTEM_VALUE_MAX];
struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
unsigned *array_sizes;
const glsl_to_tgsi_instruction *inst)
{
struct ureg_program *ureg = t->ureg;
- GLuint i;
+ int i;
struct ureg_dst dst[2];
struct ureg_src src[4];
struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
- unsigned num_dst;
- unsigned num_src;
+ int num_dst;
+ int num_src;
unsigned tex_target;
num_dst = num_inst_dst_regs(inst);
src[num_src] =
ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
num_src++;
- for (i = 0; i < inst->tex_offset_num_offset; i++) {
+ for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
}
tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
src, num_src);
return;
+ case TGSI_OPCODE_RESQ:
+ case TGSI_OPCODE_LOAD:
+ case TGSI_OPCODE_ATOMUADD:
+ case TGSI_OPCODE_ATOMXCHG:
+ case TGSI_OPCODE_ATOMCAS:
+ case TGSI_OPCODE_ATOMAND:
+ case TGSI_OPCODE_ATOMOR:
+ case TGSI_OPCODE_ATOMXOR:
+ case TGSI_OPCODE_ATOMUMIN:
+ case TGSI_OPCODE_ATOMUMAX:
+ case TGSI_OPCODE_ATOMIMIN:
+ case TGSI_OPCODE_ATOMIMAX:
+ for (i = num_src - 1; i >= 0; i--)
+ src[i + 1] = src[i];
+ num_src++;
+ src[0] = t->buffers[inst->buffer.index];
+ if (inst->buffer.reladdr)
+ src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2]));
+ assert(src[0].File != TGSI_FILE_NULL);
+ ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
+ inst->buffer_access);
+ break;
+
+ case TGSI_OPCODE_STORE:
+ dst[0] = ureg_writemask(ureg_dst(t->buffers[inst->buffer.index]), inst->dst[0].writemask);
+ if (inst->buffer.reladdr)
+ dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2]));
+ assert(dst[0].File != TGSI_FILE_NULL);
+ ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
+ inst->buffer_access);
+ break;
+
case TGSI_OPCODE_SCS:
dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
{
struct st_translate *t;
unsigned i;
+ struct gl_program_constants *frag_const =
+ &ctx->Const.Program[MESA_SHADER_FRAGMENT];
enum pipe_error ret = PIPE_OK;
assert(numInputs <= ARRAY_SIZE(t->inputs));
assert(i == program->num_immediates);
/* texture samplers */
- for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
+ for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
if (program->samplers_used & (1 << i)) {
unsigned type;
}
}
+ for (i = 0; i < frag_const->MaxAtomicBuffers; i++) {
+ if (program->buffers_used & (1 << i)) {
+ t->buffers[i] = ureg_DECL_buffer(ureg, i, true);
+ }
+ }
+
+ for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks;
+ i++) {
+ if (program->buffers_used & (1 << i)) {
+ t->buffers[i] = ureg_DECL_buffer(ureg, i, false);
+ }
+ }
+
+
+
/* Emit each instruction in turn:
*/
foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) {
}
}
+void
+vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj);
+
void
vbo_get_minmax_indices(struct gl_context *ctx, const struct _mesa_prim *prim,
const struct _mesa_index_buffer *ib,
#include "main/enums.h"
#include "main/macros.h"
#include "main/transformfeedback.h"
-#include "main/sse_minmax.h"
-#include "x86/common_x86_asm.h"
#include "vbo_context.h"
}
-
-/**
- * Compute min and max elements by scanning the index buffer for
- * glDraw[Range]Elements() calls.
- * If primitive restart is enabled, we need to ignore restart
- * indexes when computing min/max.
- */
-static void
-vbo_get_minmax_index(struct gl_context *ctx,
- const struct _mesa_prim *prim,
- const struct _mesa_index_buffer *ib,
- GLuint *min_index, GLuint *max_index,
- const GLuint count)
-{
- const GLboolean restart = ctx->Array._PrimitiveRestart;
- const GLuint restartIndex = _mesa_primitive_restart_index(ctx, ib->type);
- const int index_size = vbo_sizeof_ib_type(ib->type);
- const char *indices;
- GLuint i;
-
- indices = (char *) ib->ptr + prim->start * index_size;
- if (_mesa_is_bufferobj(ib->obj)) {
- GLsizeiptr size = MIN2(count * index_size, ib->obj->Size);
- indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,
- GL_MAP_READ_BIT, ib->obj,
- MAP_INTERNAL);
- }
-
- switch (ib->type) {
- case GL_UNSIGNED_INT: {
- const GLuint *ui_indices = (const GLuint *)indices;
- GLuint max_ui = 0;
- GLuint min_ui = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (ui_indices[i] != restartIndex) {
- if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
- if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
- }
- }
- }
- else {
-#if defined(USE_SSE41)
- if (cpu_has_sse4_1) {
- _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
- }
- else
-#endif
- for (i = 0; i < count; i++) {
- if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
- if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
- }
- }
- *min_index = min_ui;
- *max_index = max_ui;
- break;
- }
- case GL_UNSIGNED_SHORT: {
- const GLushort *us_indices = (const GLushort *)indices;
- GLuint max_us = 0;
- GLuint min_us = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (us_indices[i] != restartIndex) {
- if (us_indices[i] > max_us) max_us = us_indices[i];
- if (us_indices[i] < min_us) min_us = us_indices[i];
- }
- }
- }
- else {
- for (i = 0; i < count; i++) {
- if (us_indices[i] > max_us) max_us = us_indices[i];
- if (us_indices[i] < min_us) min_us = us_indices[i];
- }
- }
- *min_index = min_us;
- *max_index = max_us;
- break;
- }
- case GL_UNSIGNED_BYTE: {
- const GLubyte *ub_indices = (const GLubyte *)indices;
- GLuint max_ub = 0;
- GLuint min_ub = ~0U;
- if (restart) {
- for (i = 0; i < count; i++) {
- if (ub_indices[i] != restartIndex) {
- if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
- if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
- }
- }
- }
- else {
- for (i = 0; i < count; i++) {
- if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
- if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
- }
- }
- *min_index = min_ub;
- *max_index = max_ub;
- break;
- }
- default:
- unreachable("not reached");
- }
-
- if (_mesa_is_bufferobj(ib->obj)) {
- ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
- }
-}
-
-/**
- * Compute min and max elements for nr_prims
- */
-void
-vbo_get_minmax_indices(struct gl_context *ctx,
- const struct _mesa_prim *prims,
- const struct _mesa_index_buffer *ib,
- GLuint *min_index,
- GLuint *max_index,
- GLuint nr_prims)
-{
- GLuint tmp_min, tmp_max;
- GLuint i;
- GLuint count;
-
- *min_index = ~0;
- *max_index = 0;
-
- for (i = 0; i < nr_prims; i++) {
- const struct _mesa_prim *start_prim;
-
- start_prim = &prims[i];
- count = start_prim->count;
- /* Do combination if possible to reduce map/unmap count */
- while ((i + 1 < nr_prims) &&
- (prims[i].start + prims[i].count == prims[i+1].start)) {
- count += prims[i+1].count;
- i++;
- }
- vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
- *min_index = MIN2(*min_index, tmp_min);
- *max_index = MAX2(*max_index, tmp_max);
- }
-}
-
-
/**
* Check that element 'j' of the array has reasonable data.
* Map VBO if needed.
--- /dev/null
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright 2003 VMware, Inc.
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/varray.h"
+#include "main/macros.h"
+#include "main/sse_minmax.h"
+#include "x86/common_x86_asm.h"
+#include "util/hash_table.h"
+
+
+struct minmax_cache_key {
+ GLintptr offset;
+ GLuint count;
+ GLenum type;
+};
+
+
+struct minmax_cache_entry {
+ struct minmax_cache_key key;
+ GLuint min;
+ GLuint max;
+};
+
+
+static uint32_t
+vbo_minmax_cache_hash(const struct minmax_cache_key *key)
+{
+ return _mesa_hash_data(key, sizeof(*key));
+}
+
+
+static bool
+vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
+ const struct minmax_cache_key *b)
+{
+ return (a->offset == b->offset) && (a->count == b->count) && (a->type == b->type);
+}
+
+
+static void
+vbo_minmax_cache_delete_entry(struct hash_entry *entry)
+{
+ free(entry->data);
+}
+
+
+static GLboolean
+vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
+{
+ if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
+ USAGE_ATOMIC_COUNTER_BUFFER |
+ USAGE_SHADER_STORAGE_BUFFER |
+ USAGE_TRANSFORM_FEEDBACK_BUFFER |
+ USAGE_PIXEL_PACK_BUFFER |
+ USAGE_DISABLE_MINMAX_CACHE))
+ return GL_FALSE;
+
+ if ((bufferObj->Mappings[MAP_USER].AccessFlags &
+ (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
+ (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+
+void
+vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
+{
+ _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
+ bufferObj->MinMaxCache = NULL;
+}
+
+
+static GLboolean
+vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
+ GLenum type, GLintptr offset, GLuint count,
+ GLuint *min_index, GLuint *max_index)
+{
+ GLboolean found = GL_FALSE;
+ struct minmax_cache_key key;
+ uint32_t hash;
+ struct hash_entry *result;
+
+ if (!bufferObj->MinMaxCache)
+ return GL_FALSE;
+ if (!vbo_use_minmax_cache(bufferObj))
+ return GL_FALSE;
+
+ mtx_lock(&bufferObj->Mutex);
+
+ if (bufferObj->MinMaxCacheDirty) {
+ /* Disable the cache permanently for this BO if the number of hits
+ * is asymptotically less than the number of misses. This happens when
+ * applications use the BO for streaming.
+ *
+ * However, some initial optimism allows applications that interleave
+ * draw calls with glBufferSubData during warmup.
+ */
+ unsigned optimism = bufferObj->Size;
+ if (bufferObj->MinMaxCacheMissIndices > optimism &&
+ bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
+ bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
+ vbo_delete_minmax_cache(bufferObj);
+ goto out_disable;
+ }
+
+ _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
+ bufferObj->MinMaxCacheDirty = false;
+ goto out_invalidate;
+ }
+
+ key.type = type;
+ key.offset = offset;
+ key.count = count;
+ hash = vbo_minmax_cache_hash(&key);
+ result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
+ if (result) {
+ struct minmax_cache_entry *entry = result->data;
+ *min_index = entry->min;
+ *max_index = entry->max;
+ found = GL_TRUE;
+ }
+
+out_invalidate:
+ if (found) {
+ /* The hit counter saturates so that we don't accidently disable the
+ * cache in a long-running program.
+ */
+ unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
+
+ if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
+ bufferObj->MinMaxCacheHitIndices = new_hit_count;
+ else
+ bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
+ } else {
+ bufferObj->MinMaxCacheMissIndices += count;
+ }
+
+out_disable:
+ mtx_unlock(&bufferObj->Mutex);
+ return found;
+}
+
+
+static void
+vbo_minmax_cache_store(struct gl_context *ctx,
+ struct gl_buffer_object *bufferObj,
+ GLenum type, GLintptr offset, GLuint count,
+ GLuint min, GLuint max)
+{
+ struct minmax_cache_entry *entry;
+ struct hash_entry *table_entry;
+ uint32_t hash;
+
+ if (!vbo_use_minmax_cache(bufferObj))
+ return;
+
+ mtx_lock(&bufferObj->Mutex);
+
+ if (!bufferObj->MinMaxCache) {
+ bufferObj->MinMaxCache =
+ _mesa_hash_table_create(NULL,
+ (uint32_t (*)(const void *))vbo_minmax_cache_hash,
+ (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
+ if (!bufferObj->MinMaxCache)
+ goto out;
+ }
+
+ entry = MALLOC_STRUCT(minmax_cache_entry);
+ if (!entry)
+ goto out;
+
+ entry->key.offset = offset;
+ entry->key.count = count;
+ entry->key.type = type;
+ entry->min = min;
+ entry->max = max;
+ hash = vbo_minmax_cache_hash(&entry->key);
+
+ table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
+ hash, &entry->key);
+ if (table_entry) {
+ /* It seems like this could happen when two contexts are rendering using
+ * the same buffer object from multiple threads.
+ */
+ _mesa_debug(ctx, "duplicate entry in minmax cache\n");
+ free(entry);
+ goto out;
+ }
+
+ table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
+ hash, &entry->key, entry);
+ if (!table_entry)
+ free(entry);
+
+out:
+ mtx_unlock(&bufferObj->Mutex);
+}
+
+
+/**
+ * Compute min and max elements by scanning the index buffer for
+ * glDraw[Range]Elements() calls.
+ * If primitive restart is enabled, we need to ignore restart
+ * indexes when computing min/max.
+ */
+static void
+vbo_get_minmax_index(struct gl_context *ctx,
+ const struct _mesa_prim *prim,
+ const struct _mesa_index_buffer *ib,
+ GLuint *min_index, GLuint *max_index,
+ const GLuint count)
+{
+ const GLboolean restart = ctx->Array._PrimitiveRestart;
+ const GLuint restartIndex = _mesa_primitive_restart_index(ctx, ib->type);
+ const int index_size = vbo_sizeof_ib_type(ib->type);
+ const char *indices;
+ GLuint i;
+
+ indices = (char *) ib->ptr + prim->start * index_size;
+ if (_mesa_is_bufferobj(ib->obj)) {
+ GLsizeiptr size = MIN2(count * index_size, ib->obj->Size);
+
+ if (vbo_get_minmax_cached(ib->obj, ib->type, (GLintptr) indices, count,
+ min_index, max_index))
+ return;
+
+ indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,
+ GL_MAP_READ_BIT, ib->obj,
+ MAP_INTERNAL);
+ }
+
+ switch (ib->type) {
+ case GL_UNSIGNED_INT: {
+ const GLuint *ui_indices = (const GLuint *)indices;
+ GLuint max_ui = 0;
+ GLuint min_ui = ~0U;
+ if (restart) {
+ for (i = 0; i < count; i++) {
+ if (ui_indices[i] != restartIndex) {
+ if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
+ if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
+ }
+ }
+ }
+ else {
+#if defined(USE_SSE41)
+ if (cpu_has_sse4_1) {
+ _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
+ }
+ else
+#endif
+ for (i = 0; i < count; i++) {
+ if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
+ if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
+ }
+ }
+ *min_index = min_ui;
+ *max_index = max_ui;
+ break;
+ }
+ case GL_UNSIGNED_SHORT: {
+ const GLushort *us_indices = (const GLushort *)indices;
+ GLuint max_us = 0;
+ GLuint min_us = ~0U;
+ if (restart) {
+ for (i = 0; i < count; i++) {
+ if (us_indices[i] != restartIndex) {
+ if (us_indices[i] > max_us) max_us = us_indices[i];
+ if (us_indices[i] < min_us) min_us = us_indices[i];
+ }
+ }
+ }
+ else {
+ for (i = 0; i < count; i++) {
+ if (us_indices[i] > max_us) max_us = us_indices[i];
+ if (us_indices[i] < min_us) min_us = us_indices[i];
+ }
+ }
+ *min_index = min_us;
+ *max_index = max_us;
+ break;
+ }
+ case GL_UNSIGNED_BYTE: {
+ const GLubyte *ub_indices = (const GLubyte *)indices;
+ GLuint max_ub = 0;
+ GLuint min_ub = ~0U;
+ if (restart) {
+ for (i = 0; i < count; i++) {
+ if (ub_indices[i] != restartIndex) {
+ if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
+ if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
+ }
+ }
+ }
+ else {
+ for (i = 0; i < count; i++) {
+ if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
+ if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
+ }
+ }
+ *min_index = min_ub;
+ *max_index = max_ub;
+ break;
+ }
+ default:
+ unreachable("not reached");
+ }
+
+ if (_mesa_is_bufferobj(ib->obj)) {
+ vbo_minmax_cache_store(ctx, ib->obj, ib->type, prim->start, count,
+ *min_index, *max_index);
+ ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
+ }
+}
+
+/**
+ * Compute min and max elements for nr_prims
+ */
+void
+vbo_get_minmax_indices(struct gl_context *ctx,
+ const struct _mesa_prim *prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint *min_index,
+ GLuint *max_index,
+ GLuint nr_prims)
+{
+ GLuint tmp_min, tmp_max;
+ GLuint i;
+ GLuint count;
+
+ *min_index = ~0;
+ *max_index = 0;
+
+ for (i = 0; i < nr_prims; i++) {
+ const struct _mesa_prim *start_prim;
+
+ start_prim = &prims[i];
+ count = start_prim->count;
+ /* Do combination if possible to reduce map/unmap count */
+ while ((i + 1 < nr_prims) &&
+ (prims[i].start + prims[i].count == prims[i+1].start)) {
+ count += prims[i+1].count;
+ i++;
+ }
+ vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
+ *min_index = MIN2(*min_index, tmp_min);
+ *max_index = MAX2(*max_index, tmp_max);
+ }
+}
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch 16(%rdx)
+ prefetcht1 16(%rdx)
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
p4_general_loop:
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
- prefetchw 16(%rdi)
+ prefetcht1 16(%rdi)
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
addq %rax, %rdx
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
- prefetch 16(%rdx)
+ prefetcht1 16(%rdx)
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch 16(%rdx)
+ prefetcht1 16(%rdx)
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
p4_3d_loop:
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
- prefetchw 16(%rdi)
+ prefetcht1 16(%rdi)
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
addq %rax, %rdx
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
- prefetch 16(%rdx)
+ prefetcht1 16(%rdx)
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
movq V4F_START(%rdx), %rsi /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch 64(%rsi)
- prefetchw 64(%rdi)
+ prefetcht1 64(%rsi)
+ prefetcht1 64(%rdi)
add %ecx, %ecx
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
- prefetch (%rdx)
+ prefetcht1 (%rdx)
movd (%rsi), %mm0 /* | m00 */
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
p4_3d_no_rot_loop:
- prefetchw 32(%rdi)
+ prefetcht1 32(%rdi)
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
addq $16, %rdi
decl %ecx
- prefetch 32(%rdx)
+ prefetcht1 32(%rdx)
jnz p4_3d_no_rot_loop
p4_3d_no_rot_done:
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
movq 32(%rsi), %mm2 /* m21 | m20 */
- prefetch (%rdx)
+ prefetcht1 (%rdx)
movd 40(%rsi), %mm1 /* | m22 */
p4_perspective_loop:
- prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
+ prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
addq $16, %rdi
decl %ecx
- prefetch 32(%rdx) /* hopefully stride is zero */
+ prefetcht1 32(%rdx) /* hopefully stride is zero */
jnz p4_perspective_loop
p4_perspective_done:
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
movd (%rsi), %mm0 /* | m00 */
- prefetch (%rdx)
+ prefetcht1 (%rdx)
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
movq 48(%rsi), %mm1 /* m31 | m30 */
p4_2d_no_rot_loop:
- prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
+ prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
addq %rax, %rdx
pfmul %mm1, %mm6 /* x3*m31 | x3*m30 */
- prefetch 32(%rdx) /* hopefully stride is zero */
+ prefetcht1 32(%rdx) /* hopefully stride is zero */
pfadd %mm4, %mm6 /* x1*m11+x3*m31 | x0*m00+x3*m30 */
movq %mm6, (%rdi) /* write r0, r1 */
movd (%rsi), %mm0 /* | m00 */
movd 4(%rsi), %mm1 /* | m01 */
- prefetch (%rdx)
+ prefetcht1 (%rdx)
punpckldq 16(%rsi), %mm0 /* m10 | m00 */
.byte 0x66, 0x66, 0x90 /* manual align += 4 */
p4_2d_loop:
- prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
+ prefetcht1 32(%rdi) /* prefetch 2 vertices ahead */
movq (%rdx), %mm3 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
pfacc %mm4, %mm3 /* x0*m01+x1*m11 | x0*m00+x1*m10 */
pfmul %mm2, %mm6 /* x3*m31 | x3*m30 */
- prefetch 32(%rdx) /* hopefully stride is zero */
+ prefetcht1 32(%rdx) /* hopefully stride is zero */
pfadd %mm6, %mm3 /* r1 | r0 */
ralloc_free(ht);
}
+/**
+ * Deletes all entries of the given hash table without deleting the table
+ * itself or changing its structure.
+ *
+ * If delete_function is passed, it gets called on each entry present.
+ */
+void
+_mesa_hash_table_clear(struct hash_table *ht,
+ void (*delete_function)(struct hash_entry *entry))
+{
+ struct hash_entry *entry;
+
+ for (entry = ht->table; entry != ht->table + ht->size; entry++) {
+ if (entry->key == NULL)
+ continue;
+
+ if (delete_function != NULL && entry->key != ht->deleted_key)
+ delete_function(entry);
+
+ entry->key = NULL;
+ }
+
+ ht->entries = 0;
+ ht->deleted_entries = 0;
+}
+
/** Sets the value of the key pointer used for deleted entries in the table.
*
* The assumption is that usually keys are actual pointers, so we use a
* required to avoid memory leaks, perform a search
* before inserting.
*/
- if (entry->hash == hash &&
+ if (!entry_is_deleted(ht, entry) &&
+ entry->hash == hash &&
ht->key_equals_function(key, entry->key)) {
entry->key = key;
entry->data = data;
const void *b));
void _mesa_hash_table_destroy(struct hash_table *ht,
void (*delete_function)(struct hash_entry *entry));
+void _mesa_hash_table_clear(struct hash_table *ht,
+ void (*delete_function)(struct hash_entry *entry));
void _mesa_hash_table_set_deleted_key(struct hash_table *ht,
const void *deleted_key);
+static inline uint32_t _mesa_hash_table_num_entries(struct hash_table *ht)
+{
+ return ht->entries;
+}
+
struct hash_entry *
_mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data);
struct hash_entry *
* If freeing of old keys is required to avoid memory leaks,
* perform a search before inserting.
*/
- if (entry->hash == hash &&
+ if (!entry_is_deleted(entry) &&
+ entry->hash == hash &&
ht->key_equals_function(key, entry->key)) {
entry->key = key;
return entry;
$(DLOPEN_LIBS)
TESTS = \
+ clear \
collision \
delete_and_lookup \
delete_management \
--- /dev/null
+/*
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "hash_table.h"
+
+static void *make_key(uint32_t i)
+{
+ return (void *)(uintptr_t)(1 + i);
+}
+
+static uint32_t key_id(const void *key)
+{
+ return (uintptr_t)key - 1;
+}
+
+static uint32_t key_hash(const void *key)
+{
+ return (uintptr_t)key;
+}
+
+static bool key_equal(const void *a, const void *b)
+{
+ return a == b;
+}
+
+static void delete_function(struct hash_entry *entry)
+{
+ bool *deleted = (bool *)entry->data;
+ assert(!*deleted);
+ *deleted = true;
+}
+
+int main()
+{
+ struct hash_table *ht;
+ struct hash_entry *entry;
+ const uint32_t size = 1000;
+ bool flags[size];
+ uint32_t i;
+
+ ht = _mesa_hash_table_create(NULL, key_hash, key_equal);
+
+ for (i = 0; i < size; ++i) {
+ flags[i] = false;
+ _mesa_hash_table_insert(ht, make_key(i), &flags[i]);
+ }
+
+ _mesa_hash_table_clear(ht, delete_function);
+ assert(_mesa_hash_table_next_entry(ht, NULL) == NULL);
+
+ /* Check that delete_function was called and that repopulating the table
+ * works. */
+ for (i = 0; i < size; ++i) {
+ assert(flags[i]);
+ flags[i] = false;
+ _mesa_hash_table_insert(ht, make_key(i), &flags[i]);
+ }
+
+ /* Check that exactly the right set of entries is in the table. */
+ for (i = 0; i < size; ++i) {
+ assert(_mesa_hash_table_search(ht, make_key(i)));
+ }
+
+ hash_table_foreach(ht, entry) {
+ assert(key_id(entry->key) < size);
+ }
+
+ _mesa_hash_table_destroy(ht, NULL);
+
+ return 0;
+}