working on implementing device memory
[kazan.git] / src / pipeline / pipeline.cpp
1 /*
2 * Copyright 2017 Jacob Lifshay
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in all
12 * copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 *
22 */
23 #include "pipeline.h"
24 #include "spirv_to_llvm/spirv_to_llvm.h"
25 #include "llvm_wrapper/llvm_wrapper.h"
26 #include "llvm_wrapper/orc_compile_stack.h"
27 #include "vulkan/util.h"
28 #include "util/soft_float.h"
29 #include "json/json.h"
30 #include <stdexcept>
31 #include <cassert>
32 #include <vector>
33 #include <iostream>
34
35 namespace kazan
36 {
37 namespace pipeline
38 {
39 llvm_wrapper::Module Pipeline::optimize_module(llvm_wrapper::Module module,
40 ::LLVMTargetMachineRef target_machine)
41 {
42 switch(llvm_wrapper::Target_machine::get_code_gen_opt_level(target_machine))
43 {
44 case ::LLVMCodeGenLevelNone:
45 case ::LLVMCodeGenLevelLess:
46 break;
47 case ::LLVMCodeGenLevelDefault:
48 case ::LLVMCodeGenLevelAggressive:
49 {
50 #warning finish implementing module optimizations
51 {
52 auto manager = llvm_wrapper::Pass_manager::create_function_pass_manager(module.get());
53 ::LLVMAddAnalysisPasses(target_machine, manager.get());
54 ::LLVMAddPromoteMemoryToRegisterPass(manager.get());
55 ::LLVMAddScalarReplAggregatesPass(manager.get());
56 ::LLVMAddScalarizerPass(manager.get());
57 ::LLVMAddEarlyCSEMemSSAPass(manager.get());
58 ::LLVMAddSCCPPass(manager.get());
59 ::LLVMAddAggressiveDCEPass(manager.get());
60 ::LLVMAddLICMPass(manager.get());
61 ::LLVMAddIndVarSimplifyPass(manager.get());
62 ::LLVMAddCFGSimplificationPass(manager.get());
63 ::LLVMAddReassociatePass(manager.get());
64 ::LLVMAddInstructionCombiningPass(manager.get());
65 ::LLVMAddNewGVNPass(manager.get());
66 ::LLVMAddCorrelatedValuePropagationPass(manager.get());
67 ::LLVMInitializeFunctionPassManager(manager.get());
68 for(auto fn = ::LLVMGetFirstFunction(module.get()); fn; fn = ::LLVMGetNextFunction(fn))
69 ::LLVMRunFunctionPassManager(manager.get(), fn);
70 ::LLVMFinalizeFunctionPassManager(manager.get());
71 }
72 {
73 auto manager = llvm_wrapper::Pass_manager::create_module_pass_manager();
74 ::LLVMAddAnalysisPasses(target_machine, manager.get());
75 ::LLVMAddIPSCCPPass(manager.get());
76 ::LLVMAddFunctionInliningPass(manager.get());
77 ::LLVMAddDeadArgEliminationPass(manager.get());
78 ::LLVMAddGlobalDCEPass(manager.get());
79 ::LLVMRunPassManager(manager.get(), module.get());
80 }
81 {
82 auto manager = llvm_wrapper::Pass_manager::create_function_pass_manager(module.get());
83 ::LLVMAddAnalysisPasses(target_machine, manager.get());
84 ::LLVMAddCFGSimplificationPass(manager.get());
85 ::LLVMAddPromoteMemoryToRegisterPass(manager.get());
86 ::LLVMAddScalarReplAggregatesPass(manager.get());
87 ::LLVMAddLICMPass(manager.get());
88 ::LLVMAddIndVarSimplifyPass(manager.get());
89 ::LLVMAddReassociatePass(manager.get());
90 ::LLVMAddInstructionCombiningPass(manager.get());
91 ::LLVMAddLoopUnrollPass(manager.get());
92 ::LLVMAddSLPVectorizePass(manager.get());
93 ::LLVMAddAggressiveDCEPass(manager.get());
94 ::LLVMInitializeFunctionPassManager(manager.get());
95 for(auto fn = ::LLVMGetFirstFunction(module.get()); fn; fn = ::LLVMGetNextFunction(fn))
96 ::LLVMRunFunctionPassManager(manager.get(), fn);
97 ::LLVMFinalizeFunctionPassManager(manager.get());
98 }
99 std::cerr << "optimized module:" << std::endl;
100 ::LLVMDumpModule(module.get());
101 break;
102 }
103 }
104 return module;
105 }
106
107 struct Graphics_pipeline::Implementation
108 {
109 llvm_wrapper::Context llvm_context = llvm_wrapper::Context::create();
110 spirv_to_llvm::Jit_symbol_resolver jit_symbol_resolver;
111 llvm_wrapper::Orc_compile_stack jit_stack;
112 llvm_wrapper::Target_data data_layout;
113 std::vector<spirv_to_llvm::Converted_module> compiled_shaders;
114 std::shared_ptr<spirv_to_llvm::Struct_type_descriptor> vertex_shader_output_struct;
115 std::string append_value_to_string(std::string str,
116 spirv_to_llvm::Type_descriptor &type,
117 const void *value) const
118 {
119 struct Visitor : public spirv_to_llvm::Type_descriptor::Type_visitor
120 {
121 const Implementation *this_;
122 std::string &str;
123 const void *value;
124 Visitor(const Implementation *this_, std::string &str, const void *value) noexcept
125 : this_(this_),
126 str(str),
127 value(value)
128 {
129 }
130 virtual void visit(spirv_to_llvm::Simple_type_descriptor &type) override
131 {
132 auto llvm_type = type.get_or_make_type().type;
133 switch(::LLVMGetTypeKind(llvm_type))
134 {
135 case ::LLVMVoidTypeKind:
136 case ::LLVMX86_FP80TypeKind:
137 case ::LLVMFP128TypeKind:
138 case ::LLVMPPC_FP128TypeKind:
139 case ::LLVMLabelTypeKind:
140 case ::LLVMFunctionTypeKind:
141 case ::LLVMStructTypeKind:
142 case ::LLVMArrayTypeKind:
143 case ::LLVMPointerTypeKind:
144 case ::LLVMVectorTypeKind:
145 case ::LLVMMetadataTypeKind:
146 case ::LLVMX86_MMXTypeKind:
147 case ::LLVMTokenTypeKind:
148 break;
149 case ::LLVMHalfTypeKind:
150 {
151 auto integer_value = *static_cast<const std::uint16_t *>(value);
152 auto float_value =
153 util::soft_float::ExtendedFloat::fromHalfPrecision(integer_value);
154 str = json::ast::Number_value::append_double_to_string(
155 static_cast<double>(float_value), std::move(str));
156 if(float_value.isNaN())
157 {
158 str += " (0x";
159 str = json::ast::Number_value::append_unsigned_integer_to_string(
160 integer_value, std::move(str), 0x10);
161 str += ")";
162 }
163 return;
164 }
165 case ::LLVMFloatTypeKind:
166 {
167 static_assert(sizeof(std::uint32_t) == sizeof(float)
168 && alignof(std::uint32_t) == alignof(float),
169 "");
170 union
171 {
172 std::uint32_t integer_value;
173 float float_value;
174 };
175 integer_value = *static_cast<const std::uint32_t *>(value);
176 str = json::ast::Number_value::append_double_to_string(float_value,
177 std::move(str));
178 if(std::isnan(float_value))
179 {
180 str += " (0x";
181 str = json::ast::Number_value::append_unsigned_integer_to_string(
182 integer_value, std::move(str), 0x10);
183 str += ")";
184 }
185 return;
186 }
187 case ::LLVMDoubleTypeKind:
188 {
189 static_assert(sizeof(std::uint64_t) == sizeof(double)
190 && alignof(std::uint64_t) == alignof(double),
191 "");
192 union
193 {
194 std::uint64_t integer_value;
195 double float_value;
196 };
197 integer_value = *static_cast<const std::uint64_t *>(value);
198 str = json::ast::Number_value::append_double_to_string(float_value,
199 std::move(str));
200 if(std::isnan(float_value))
201 {
202 str += " (0x";
203 str = json::ast::Number_value::append_unsigned_integer_to_string(
204 integer_value, std::move(str), 0x10);
205 str += ")";
206 }
207 return;
208 }
209 case ::LLVMIntegerTypeKind:
210 {
211 switch(::LLVMGetIntTypeWidth(llvm_type))
212 {
213 case 8:
214 {
215 auto integer_value = *static_cast<const std::uint8_t *>(value);
216 str += "0x";
217 str = json::ast::Number_value::append_unsigned_integer_to_string(
218 integer_value, std::move(str), 0x10);
219 str += " ";
220 str = json::ast::Number_value::append_unsigned_integer_to_string(
221 integer_value, std::move(str));
222 str += " ";
223 str = json::ast::Number_value::append_signed_integer_to_string(
224 static_cast<std::int8_t>(integer_value), std::move(str));
225 return;
226 }
227 case 16:
228 {
229 auto integer_value = *static_cast<const std::uint16_t *>(value);
230 str += "0x";
231 str = json::ast::Number_value::append_unsigned_integer_to_string(
232 integer_value, std::move(str), 0x10);
233 str += " ";
234 str = json::ast::Number_value::append_unsigned_integer_to_string(
235 integer_value, std::move(str));
236 str += " ";
237 str = json::ast::Number_value::append_signed_integer_to_string(
238 static_cast<std::int16_t>(integer_value), std::move(str));
239 return;
240 }
241 case 32:
242 {
243 auto integer_value = *static_cast<const std::uint32_t *>(value);
244 str += "0x";
245 str = json::ast::Number_value::append_unsigned_integer_to_string(
246 integer_value, std::move(str), 0x10);
247 str += " ";
248 str = json::ast::Number_value::append_unsigned_integer_to_string(
249 integer_value, std::move(str));
250 str += " ";
251 str = json::ast::Number_value::append_signed_integer_to_string(
252 static_cast<std::int32_t>(integer_value), std::move(str));
253 return;
254 }
255 case 64:
256 {
257 auto integer_value = *static_cast<const std::uint64_t *>(value);
258 str += "0x";
259 str = json::ast::Number_value::append_unsigned_integer_to_string(
260 integer_value, std::move(str), 0x10);
261 str += " ";
262 str = json::ast::Number_value::append_unsigned_integer_to_string(
263 integer_value, std::move(str));
264 str += " ";
265 str = json::ast::Number_value::append_signed_integer_to_string(
266 static_cast<std::int64_t>(integer_value), std::move(str));
267 return;
268 }
269 }
270 break;
271 }
272 }
273 assert(!"unhandled type");
274 throw std::runtime_error("unhandled type");
275 }
276 virtual void visit(spirv_to_llvm::Vector_type_descriptor &type) override
277 {
278 auto llvm_element_type = type.get_element_type()->get_or_make_type().type;
279 std::size_t element_size =
280 ::LLVMABISizeOfType(this_->data_layout.get(), llvm_element_type);
281 std::size_t element_count = type.get_element_count();
282 str += "<";
283 auto separator = "";
284 for(std::size_t i = 0; i < element_count; i++)
285 {
286 str += separator;
287 separator = ", ";
288 str = this_->append_value_to_string(
289 std::move(str),
290 *type.get_element_type(),
291 static_cast<const char *>(value) + i * element_size);
292 }
293 str += ">";
294 }
295 virtual void visit(spirv_to_llvm::Matrix_type_descriptor &type) override
296 {
297 assert(!"dumping matrix not implemented");
298 throw std::runtime_error("dumping matrix not implemented");
299 #warning dumping matrix not implemented
300 }
301 virtual void visit(spirv_to_llvm::Array_type_descriptor &type) override
302 {
303 auto llvm_element_type = type.get_element_type()->get_or_make_type().type;
304 std::size_t element_size =
305 ::LLVMABISizeOfType(this_->data_layout.get(), llvm_element_type);
306 std::size_t element_count = type.get_element_count();
307 str += "[";
308 auto separator = "";
309 for(std::size_t i = 0; i < element_count; i++)
310 {
311 str += separator;
312 separator = ", ";
313 str = this_->append_value_to_string(
314 std::move(str),
315 *type.get_element_type(),
316 static_cast<const char *>(value) + i * element_size);
317 }
318 str += "]";
319 }
320 virtual void visit(spirv_to_llvm::Pointer_type_descriptor &type) override
321 {
322 str += "pointer:0x";
323 str = json::ast::Number_value::append_unsigned_integer_to_string(
324 reinterpret_cast<std::uint64_t>(*static_cast<const void *const *>(value)),
325 std::move(str),
326 0x10);
327 }
328 virtual void visit(spirv_to_llvm::Function_type_descriptor &type) override
329 {
330 str += "function:0x";
331 str = json::ast::Number_value::append_unsigned_integer_to_string(
332 reinterpret_cast<std::uint64_t>(*static_cast<const void *const *>(value)),
333 std::move(str),
334 0x10);
335 }
336 virtual void visit(spirv_to_llvm::Struct_type_descriptor &type) override
337 {
338 auto &&members = type.get_members(true);
339 auto llvm_type = type.get_or_make_type().type;
340 str += "{";
341 auto separator = "";
342 for(auto &member : members)
343 {
344 str += separator;
345 separator = ", ";
346 str = this_->append_value_to_string(
347 std::move(str),
348 *member.type,
349 static_cast<const char *>(value)
350 + ::LLVMOffsetOfElement(
351 this_->data_layout.get(), llvm_type, member.llvm_member_index));
352 }
353 str += "}";
354 }
355 };
356 type.visit(Visitor(this, str, value));
357 return str;
358 }
359 };
360
361 void Graphics_pipeline::dump_vertex_shader_output_struct(const void *output_struct) const
362 {
363 std::cerr << "output: "
364 << implementation->append_value_to_string(
365 {}, *implementation->vertex_shader_output_struct, output_struct)
366 << std::endl;
367 }
368
369 void Graphics_pipeline::run(std::uint32_t vertex_start_index,
370 std::uint32_t vertex_end_index,
371 std::uint32_t instance_id,
372 const vulkan::Vulkan_image &color_attachment,
373 void *const *bindings)
374 {
375 typedef std::uint32_t Pixel_type;
376 assert(color_attachment.descriptor.tiling == VK_IMAGE_TILING_LINEAR);
377 auto color_attachment_memory_properties = color_attachment.descriptor.get_memory_properties();
378 auto color_attachment_memory_properties_color_component =
379 color_attachment_memory_properties.get_color_component();
380 std::size_t color_attachment_stride = color_attachment_memory_properties_color_component.stride;
381 std::size_t color_attachment_pixel_size =
382 color_attachment_memory_properties_color_component.pixel_size;
383 assert(color_attachment_memory_properties_color_component.offset_from_array_layer_start == 0);
384 void *color_attachment_memory = color_attachment.memory.get();
385 float viewport_x_scale, viewport_x_offset, viewport_y_scale, viewport_y_offset,
386 viewport_z_scale, viewport_z_offset;
387 {
388 float px = viewport.width;
389 float ox = viewport.x + 0.5f * viewport.width;
390 float py = viewport.height;
391 float oy = viewport.y + 0.5f * viewport.height;
392 float pz = viewport.maxDepth - viewport.minDepth;
393 float oz = viewport.minDepth;
394 viewport_x_scale = px * 0.5f;
395 viewport_x_offset = ox;
396 viewport_y_scale = py * 0.5f;
397 viewport_y_offset = oy;
398 viewport_z_scale = pz;
399 viewport_z_offset = oz;
400 }
401 constexpr std::size_t vec4_native_alignment = alignof(float) * 4;
402 constexpr std::size_t max_alignment = alignof(std::max_align_t);
403 constexpr std::size_t vec4_alignment =
404 vec4_native_alignment > max_alignment ? max_alignment : vec4_native_alignment;
405 constexpr std::size_t ivec4_native_alignment = alignof(std::int32_t) * 4;
406 constexpr std::size_t ivec4_alignment =
407 ivec4_native_alignment > max_alignment ? max_alignment : ivec4_native_alignment;
408 struct alignas(vec4_alignment) Vec4
409 {
410 float x;
411 float y;
412 float z;
413 float w;
414 constexpr Vec4() noexcept : x(), y(), z(), w()
415 {
416 }
417 constexpr explicit Vec4(float x, float y, float z, float w) noexcept : x(x),
418 y(y),
419 z(z),
420 w(w)
421 {
422 }
423 };
424 struct alignas(ivec4_alignment) Ivec4
425 {
426 std::int32_t x;
427 std::int32_t y;
428 std::int32_t z;
429 std::int32_t w;
430 constexpr Ivec4() noexcept : x(), y(), z(), w()
431 {
432 }
433 constexpr explicit Ivec4(std::int32_t x,
434 std::int32_t y,
435 std::int32_t z,
436 std::int32_t w) noexcept : x(x),
437 y(y),
438 z(z),
439 w(w)
440 {
441 }
442 };
443 auto interpolate_float = [](float t, float v0, float v1) noexcept->float
444 {
445 return t * v1 + (1.0f - t) * v0;
446 };
447 auto interpolate_vec4 = [interpolate_float](
448 float t, const Vec4 &v0, const Vec4 &v1) noexcept->Vec4
449 {
450 return Vec4(interpolate_float(t, v0.x, v1.x),
451 interpolate_float(t, v0.y, v1.y),
452 interpolate_float(t, v0.z, v1.z),
453 interpolate_float(t, v0.w, v1.w));
454 };
455 static constexpr std::size_t triangle_vertex_count = 3;
456 struct Triangle
457 {
458 Vec4 vertexes[triangle_vertex_count];
459 constexpr Triangle() noexcept : vertexes{}
460 {
461 }
462 constexpr Triangle(const Vec4 &v0, const Vec4 &v1, const Vec4 &v2) noexcept
463 : vertexes{v0, v1, v2}
464 {
465 }
466 };
467 auto solve_for_t = [](float v0, float v1) noexcept->float
468 {
469 // solves interpolate_float(t, v0, v1) == 0
470 return v0 / (v0 - v1);
471 };
472 auto clip_edge = [solve_for_t, interpolate_vec4](const Vec4 &start_vertex,
473 const Vec4 &end_vertex,
474 Vec4 *output_vertexes,
475 std::size_t &output_vertex_count,
476 auto eval_vertex) -> bool
477 {
478 // eval_vertex returns a non-negative number if the vertex is inside the clip volume
479 float start_vertex_signed_distance = eval_vertex(start_vertex);
480 float end_vertex_signed_distance = eval_vertex(end_vertex);
481 if(start_vertex_signed_distance != start_vertex_signed_distance)
482 return false; // triangle has a NaN coordinate; skip it
483 if(start_vertex_signed_distance < 0)
484 {
485 // start_vertex is outside
486 if(end_vertex_signed_distance < 0)
487 {
488 // end_vertex is outside; do nothing
489 }
490 else
491 {
492 // end_vertex is inside
493 output_vertexes[output_vertex_count++] = interpolate_vec4(
494 solve_for_t(start_vertex_signed_distance, end_vertex_signed_distance),
495 start_vertex,
496 end_vertex);
497 output_vertexes[output_vertex_count++] = end_vertex;
498 }
499 }
500 else
501 {
502 // start_vertex is inside
503 if(end_vertex_signed_distance < 0)
504 {
505 // end_vertex is outside
506 output_vertexes[output_vertex_count++] = interpolate_vec4(
507 solve_for_t(start_vertex_signed_distance, end_vertex_signed_distance),
508 start_vertex,
509 end_vertex);
510 }
511 else
512 {
513 // end_vertex is inside
514 output_vertexes[output_vertex_count++] = end_vertex;
515 }
516 }
517 return true;
518 };
519 auto clip_triangles = [clip_edge](
520 std::vector<Triangle> &triangles, std::vector<Triangle> &temp_triangles, auto eval_vertex)
521 {
522 temp_triangles.clear();
523 for(auto &input_ref : triangles)
524 {
525 Triangle input = input_ref; // copy to enable compiler optimizations
526 constexpr std::size_t max_clipped_output_vertex_count = 4;
527 Vec4 output_vertexes[max_clipped_output_vertex_count];
528 std::size_t output_vertex_count = 0;
529 bool skip_triangle = false;
530 std::size_t end_vertex_index = 1;
531 for(std::size_t start_vertex_index = 0; start_vertex_index < triangle_vertex_count;
532 start_vertex_index++)
533 {
534 if(!clip_edge(input.vertexes[start_vertex_index],
535 input.vertexes[end_vertex_index],
536 output_vertexes,
537 output_vertex_count,
538 eval_vertex))
539 {
540 skip_triangle = true;
541 break;
542 }
543 if(++end_vertex_index >= triangle_vertex_count)
544 end_vertex_index = 0;
545 }
546 if(skip_triangle)
547 continue;
548 switch(output_vertex_count)
549 {
550 case 0:
551 case 1:
552 case 2:
553 continue;
554 case 3:
555 temp_triangles.push_back(
556 Triangle(output_vertexes[0], output_vertexes[1], output_vertexes[2]));
557 continue;
558 case 4:
559 temp_triangles.push_back(
560 Triangle(output_vertexes[0], output_vertexes[1], output_vertexes[2]));
561 temp_triangles.push_back(
562 Triangle(output_vertexes[0], output_vertexes[2], output_vertexes[3]));
563 continue;
564 }
565 assert(!"clipping algorithm failed");
566 }
567 temp_triangles.swap(triangles);
568 };
569 std::vector<Triangle> triangles;
570 std::vector<Triangle> temp_triangles;
571 constexpr std::size_t chunk_max_size = 96;
572 static_assert(chunk_max_size % triangle_vertex_count == 0, "");
573 std::unique_ptr<unsigned char[]> chunk_vertex_buffer(
574 new unsigned char[get_vertex_shader_output_struct_size() * chunk_max_size]);
575 while(vertex_start_index < vertex_end_index)
576 {
577 std::uint32_t chunk_size = vertex_end_index - vertex_start_index;
578 if(chunk_size > chunk_max_size)
579 chunk_size = chunk_max_size;
580 auto current_vertex_start_index = vertex_start_index;
581 vertex_start_index += chunk_size;
582 run_vertex_shader(current_vertex_start_index,
583 current_vertex_start_index + chunk_size,
584 instance_id,
585 chunk_vertex_buffer.get(),
586 bindings);
587 const unsigned char *current_vertex =
588 chunk_vertex_buffer.get() + vertex_shader_position_output_offset;
589 triangles.clear();
590 for(std::uint32_t i = 0; i + triangle_vertex_count <= chunk_size;
591 i += triangle_vertex_count)
592 {
593 Triangle triangle;
594 for(std::size_t j = 0; j < triangle_vertex_count; j++)
595 {
596 triangle.vertexes[j] = *reinterpret_cast<const Vec4 *>(current_vertex);
597 current_vertex += vertex_shader_output_struct_size;
598 }
599 triangles.push_back(triangle);
600 }
601 // clip to 0 <= vertex.z
602 clip_triangles(triangles,
603 temp_triangles,
604 [](const Vec4 &vertex) noexcept->float
605 {
606 return vertex.z;
607 });
608 // clip to vertex.z <= vertex.w
609 clip_triangles(triangles,
610 temp_triangles,
611 [](const Vec4 &vertex) noexcept->float
612 {
613 return vertex.w - vertex.z;
614 });
615 // clip to -vertex.w <= vertex.x
616 clip_triangles(triangles,
617 temp_triangles,
618 [](const Vec4 &vertex) noexcept->float
619 {
620 return vertex.x + vertex.w;
621 });
622 // clip to vertex.x <= vertex.w
623 clip_triangles(triangles,
624 temp_triangles,
625 [](const Vec4 &vertex) noexcept->float
626 {
627 return vertex.w - vertex.x;
628 });
629 // clip to -vertex.w <= vertex.y
630 clip_triangles(triangles,
631 temp_triangles,
632 [](const Vec4 &vertex) noexcept->float
633 {
634 return vertex.y + vertex.w;
635 });
636 // clip to vertex.y <= vertex.w
637 clip_triangles(triangles,
638 temp_triangles,
639 [](const Vec4 &vertex) noexcept->float
640 {
641 return vertex.w - vertex.y;
642 });
643 VkOffset2D clipped_scissor_rect_min = scissor_rect.offset;
644 VkOffset2D clipped_scissor_rect_end = {
645 .x = scissor_rect.offset.x + static_cast<std::int32_t>(scissor_rect.extent.width),
646 .y = scissor_rect.offset.y + static_cast<std::int32_t>(scissor_rect.extent.height),
647 };
648 if(clipped_scissor_rect_min.x < 0)
649 clipped_scissor_rect_min.x = 0;
650 if(clipped_scissor_rect_min.y < 0)
651 clipped_scissor_rect_min.y = 0;
652 if(clipped_scissor_rect_end.x > color_attachment.descriptor.extent.width)
653 clipped_scissor_rect_end.x = color_attachment.descriptor.extent.width;
654 if(clipped_scissor_rect_end.y < color_attachment.descriptor.extent.height)
655 clipped_scissor_rect_end.y = color_attachment.descriptor.extent.height;
656 if(clipped_scissor_rect_end.x <= clipped_scissor_rect_min.x)
657 continue;
658 if(clipped_scissor_rect_end.y <= clipped_scissor_rect_min.y)
659 continue;
660 for(std::size_t triangle_index = 0; triangle_index < triangles.size(); triangle_index++)
661 {
662 Triangle triangle = triangles[triangle_index];
663 Vec4 projected_triangle_and_inv_w[triangle_vertex_count];
664 Vec4 framebuffer_coordinates[triangle_vertex_count];
665 for(std::size_t i = 0; i < triangle_vertex_count; i++)
666 {
667 projected_triangle_and_inv_w[i].w = 1.0f / triangle.vertexes[i].w;
668 projected_triangle_and_inv_w[i].x =
669 triangle.vertexes[i].x * projected_triangle_and_inv_w[i].w;
670 projected_triangle_and_inv_w[i].y =
671 triangle.vertexes[i].y * projected_triangle_and_inv_w[i].w;
672 projected_triangle_and_inv_w[i].z =
673 triangle.vertexes[i].z * projected_triangle_and_inv_w[i].w;
674 framebuffer_coordinates[i] =
675 Vec4(projected_triangle_and_inv_w[i].x * viewport_x_scale + viewport_x_offset,
676 projected_triangle_and_inv_w[i].y * viewport_y_scale + viewport_y_offset,
677 projected_triangle_and_inv_w[i].z * viewport_z_scale + viewport_z_offset,
678 0);
679 }
680 float orientation = 0;
681 for(std::size_t start_vertex_index = 0, end_vertex_index = 1;
682 start_vertex_index < triangle_vertex_count;
683 start_vertex_index++)
684 {
685 float x1 = framebuffer_coordinates[start_vertex_index].x;
686 float y1 = framebuffer_coordinates[start_vertex_index].y;
687 float x2 = framebuffer_coordinates[end_vertex_index].x;
688 float y2 = framebuffer_coordinates[end_vertex_index].y;
689 orientation += x2 * y1 - x1 * y2;
690 if(++end_vertex_index >= triangle_vertex_count)
691 end_vertex_index = 0;
692 }
693 if(!(orientation < 0)
694 && !(orientation > 0)) // zero area triangle or triangle coordinate is NaN
695 continue;
696 // orientation > 0 for counter-clockwise triangle
697 // orientation < 0 for clockwise triangle
698 std::int32_t min_x, end_x, min_y, end_y;
699 bool first = true;
700 for(std::size_t i = 0; i < triangle_vertex_count; i++)
701 {
702 // x and y will be >= 0 so we can use truncate instead of floor for speed
703 auto current_min_x = static_cast<std::int32_t>(framebuffer_coordinates[i].x);
704 auto current_min_y = static_cast<std::int32_t>(framebuffer_coordinates[i].y);
705 std::int32_t current_end_x = current_min_x + 1;
706 std::int32_t current_end_y = current_min_y + 1;
707 if(first || current_min_x < min_x)
708 min_x = current_min_x;
709 if(first || current_end_x > end_x)
710 end_x = current_end_x;
711 if(first || current_min_y < min_y)
712 min_y = current_min_y;
713 if(first || current_end_y > end_y)
714 end_y = current_end_y;
715 first = false;
716 }
717 if(min_x < clipped_scissor_rect_min.x)
718 min_x = clipped_scissor_rect_min.x;
719 if(end_x > clipped_scissor_rect_end.x)
720 end_x = clipped_scissor_rect_end.x;
721 if(min_y < clipped_scissor_rect_min.y)
722 min_y = clipped_scissor_rect_min.y;
723 if(end_y > clipped_scissor_rect_end.y)
724 end_y = clipped_scissor_rect_end.y;
725 constexpr int log2_scale = 16;
726 constexpr auto scale = 1LL << log2_scale;
727 typedef std::int64_t Edge_equation_integer_type;
728 struct Edge_equation
729 {
730 Edge_equation_integer_type a;
731 Edge_equation_integer_type b;
732 Edge_equation_integer_type c;
733 Edge_equation_integer_type padding;
734 constexpr Edge_equation() noexcept : a(), b(), c(), padding()
735 {
736 }
737 constexpr Edge_equation(Edge_equation_integer_type a,
738 Edge_equation_integer_type b,
739 Edge_equation_integer_type c) noexcept : a(a),
740 b(b),
741 c(c),
742 padding()
743 {
744 }
745 constexpr bool inside(std::int32_t x, std::int32_t y) const noexcept
746 {
747 return a * x + b * y + c >= 0;
748 }
749 };
750 Edge_equation edge_equations[triangle_vertex_count];
751 bool skip_triangle = false;
752 for(std::size_t start_vertex_index = 0, end_vertex_index = 1, other_vertex_index = 2;
753 start_vertex_index < triangle_vertex_count;
754 start_vertex_index++)
755 {
756 float x1_float = framebuffer_coordinates[start_vertex_index].x;
757 float y1_float = framebuffer_coordinates[start_vertex_index].y;
758 float x2_float = framebuffer_coordinates[end_vertex_index].x;
759 float y2_float = framebuffer_coordinates[end_vertex_index].y;
760 [[gnu::unused]] float x3_float = framebuffer_coordinates[other_vertex_index].x;
761 [[gnu::unused]] float y3_float = framebuffer_coordinates[other_vertex_index].y;
762 auto x1_fixed = static_cast<Edge_equation_integer_type>(x1_float * scale);
763 auto y1_fixed = static_cast<Edge_equation_integer_type>(y1_float * scale);
764 auto x2_fixed = static_cast<Edge_equation_integer_type>(x2_float * scale);
765 auto y2_fixed = static_cast<Edge_equation_integer_type>(y2_float * scale);
766 [[gnu::unused]] auto x3_fixed =
767 static_cast<Edge_equation_integer_type>(x3_float * scale);
768 [[gnu::unused]] auto y3_fixed =
769 static_cast<Edge_equation_integer_type>(y3_float * scale);
770 Edge_equation_integer_type a;
771 Edge_equation_integer_type b;
772 Edge_equation_integer_type c;
773 {
774 // solve a * x1 + b * y1 + c == 0 &&
775 // a * x2 + b * y2 + c == 0 &&
776 // a * x3 + b * y3 + c >= 0
777 if(x1_fixed == x2_fixed && y1_fixed == y2_fixed)
778 {
779 // rounded to a zero-area triangle
780 skip_triangle = true;
781 break;
782 }
783 Edge_equation_integer_type a_fixed = (y1_fixed - y2_fixed) * scale;
784 Edge_equation_integer_type b_fixed = (x2_fixed - x1_fixed) * scale;
785 Edge_equation_integer_type c_fixed =
786 (x1_fixed * y2_fixed - x2_fixed * y1_fixed);
787
788 // offset to end up checking at pixel center instead of top-left pixel corner
789 c_fixed += (a_fixed + b_fixed) / 2;
790
791 a = a_fixed;
792 b = b_fixed;
793 c = c_fixed;
794 if(orientation > 0)
795 {
796 // fix sign
797 a = -a;
798 b = -b;
799 c = -c;
800 }
801 }
802 // handle top-left fill rule
803 if(a < 0 || (a == 0 && b < 0))
804 {
805 // not a top-left edge, fixup c
806 // effectively changes the '>=' to '>' in Edge_equation::inside
807 c--;
808 }
809
810 edge_equations[start_vertex_index] = Edge_equation(a, b, c);
811 if(++end_vertex_index >= triangle_vertex_count)
812 end_vertex_index = 0;
813 if(++other_vertex_index >= triangle_vertex_count)
814 other_vertex_index = 0;
815 }
816 if(skip_triangle)
817 continue;
818 auto fs = this->fragment_shader_function;
819 for(std::int32_t y = min_y; y < end_y; y++)
820 {
821 for(std::int32_t x = min_x; x < end_x; x++)
822 {
823 bool inside = true;
824 for(auto &edge_equation : edge_equations)
825 {
826 inside &= edge_equation.inside(x, y);
827 }
828 if(inside)
829 {
830 auto *pixel = reinterpret_cast<Pixel_type *>(
831 static_cast<unsigned char *>(color_attachment_memory)
832 + (static_cast<std::size_t>(x) * color_attachment_pixel_size
833 + static_cast<std::size_t>(y) * color_attachment_stride));
834 fs(pixel);
835 }
836 }
837 }
838 };
839 }
840 }
841
842 std::unique_ptr<Graphics_pipeline> Graphics_pipeline::make(
843 Pipeline_cache *pipeline_cache, const VkGraphicsPipelineCreateInfo &create_info)
844 {
845 assert(create_info.sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
846 auto *render_pass = Render_pass::from_handle(create_info.renderPass);
847 assert(render_pass);
848 auto *pipeline_layout = Pipeline_layout::from_handle(create_info.layout);
849 assert(pipeline_layout);
850 if(create_info.flags & VK_PIPELINE_CREATE_DERIVATIVE_BIT)
851 {
852 #warning implement creating derived pipelines
853 throw std::runtime_error("creating derived pipelines is not implemented");
854 }
855 auto implementation = std::make_shared<Implementation>();
856 auto optimization_level = ::LLVMCodeGenLevelDefault;
857 if(create_info.flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
858 optimization_level = ::LLVMCodeGenLevelNone;
859 auto llvm_target_machine =
860 llvm_wrapper::Target_machine::create_native_target_machine(optimization_level);
861 implementation->compiled_shaders.reserve(create_info.stageCount);
862 util::Enum_set<spirv::Execution_model> found_shader_stages;
863 for(std::size_t i = 0; i < create_info.stageCount; i++)
864 {
865 auto &stage_info = create_info.pStages[i];
866 auto execution_models =
867 vulkan::get_execution_models_from_shader_stage_flags(stage_info.stage);
868 assert(execution_models.size() == 1);
869 auto execution_model = *execution_models.begin();
870 bool added_to_found_shader_stages =
871 std::get<1>(found_shader_stages.insert(execution_model));
872 if(!added_to_found_shader_stages)
873 throw std::runtime_error("duplicate shader stage");
874 auto *shader_module = Shader_module::from_handle(stage_info.module);
875 assert(shader_module);
876 {
877 spirv::Dump_callbacks dump_callbacks;
878 try
879 {
880 spirv::parse(dump_callbacks, shader_module->words(), shader_module->word_count());
881 }
882 catch(spirv::Parser_error &e)
883 {
884 std::cerr << dump_callbacks.ss.str() << std::endl;
885 throw;
886 }
887 std::cerr << dump_callbacks.ss.str() << std::endl;
888 }
889 assert(create_info.pVertexInputState);
890 assert(create_info.pVertexInputState->sType
891 == VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO);
892 auto compiled_shader = spirv_to_llvm::spirv_to_llvm(implementation->llvm_context.get(),
893 llvm_target_machine.get(),
894 shader_module->words(),
895 shader_module->word_count(),
896 implementation->compiled_shaders.size(),
897 execution_model,
898 stage_info.pName,
899 create_info.pVertexInputState);
900 std::cerr << "Translation to LLVM succeeded." << std::endl;
901 ::LLVMDumpModule(compiled_shader.module.get());
902 bool failed =
903 ::LLVMVerifyModule(compiled_shader.module.get(), ::LLVMPrintMessageAction, nullptr);
904 if(failed)
905 throw std::runtime_error("LLVM module verification failed");
906 implementation->compiled_shaders.push_back(std::move(compiled_shader));
907 }
908 implementation->data_layout = llvm_target_machine.create_target_data_layout();
909 implementation->jit_stack =
910 llvm_wrapper::Orc_compile_stack::create(std::move(llvm_target_machine), optimize_module);
911 Vertex_shader_function vertex_shader_function = nullptr;
912 std::size_t vertex_shader_output_struct_size = 0;
913 util::optional<std::size_t> vertex_shader_position_output_offset;
914 Fragment_shader_function fragment_shader_function = nullptr;
915 for(auto &compiled_shader : implementation->compiled_shaders)
916 {
917 implementation->jit_stack.add_eagerly_compiled_ir(
918 std::move(compiled_shader.module),
919 &spirv_to_llvm::Jit_symbol_resolver::resolve,
920 static_cast<void *>(&implementation->jit_symbol_resolver));
921 auto shader_entry_point_address = implementation->jit_stack.get_symbol_address(
922 compiled_shader.entry_function_name.c_str());
923 std::cerr << "shader entry: " << compiled_shader.entry_function_name << ": "
924 << reinterpret_cast<void *>(shader_entry_point_address) << std::endl;
925 assert(shader_entry_point_address);
926 switch(compiled_shader.execution_model)
927 {
928 case spirv::Execution_model::fragment:
929 fragment_shader_function =
930 reinterpret_cast<Fragment_shader_function>(shader_entry_point_address);
931 #warning finish implementing Graphics_pipeline::make
932 continue;
933 #warning finish implementing Graphics_pipeline::make
934 throw std::runtime_error("creating fragment shaders is not implemented");
935 case spirv::Execution_model::geometry:
936 #warning finish implementing Graphics_pipeline::make
937 throw std::runtime_error("creating geometry shaders is not implemented");
938 case spirv::Execution_model::gl_compute:
939 case spirv::Execution_model::kernel:
940 throw std::runtime_error("can't create compute shaders from Graphics_pipeline::make");
941 case spirv::Execution_model::tessellation_control:
942 case spirv::Execution_model::tessellation_evaluation:
943 #warning finish implementing Graphics_pipeline::make
944 throw std::runtime_error("creating tessellation shaders is not implemented");
945 case spirv::Execution_model::vertex:
946 {
947 vertex_shader_function =
948 reinterpret_cast<Vertex_shader_function>(shader_entry_point_address);
949 implementation->vertex_shader_output_struct = compiled_shader.outputs_struct;
950 auto llvm_vertex_shader_output_struct =
951 implementation->vertex_shader_output_struct->get_or_make_type().type;
952 vertex_shader_output_struct_size = ::LLVMABISizeOfType(
953 implementation->data_layout.get(), llvm_vertex_shader_output_struct);
954 for(auto &member : implementation->vertex_shader_output_struct->get_members(true))
955 {
956 for(auto &decoration : member.decorations)
957 {
958 if(decoration.value == spirv::Decoration::built_in)
959 {
960 auto &builtin =
961 util::get<spirv::Decoration_built_in_parameters>(decoration.parameters);
962 if(builtin.built_in == spirv::Built_in::position)
963 {
964 vertex_shader_position_output_offset =
965 ::LLVMOffsetOfElement(implementation->data_layout.get(),
966 llvm_vertex_shader_output_struct,
967 member.llvm_member_index);
968 break;
969 }
970 }
971 }
972 if(vertex_shader_position_output_offset)
973 break;
974 if(auto *struct_type =
975 dynamic_cast<spirv_to_llvm::Struct_type_descriptor *>(member.type.get()))
976 {
977 std::size_t struct_offset =
978 ::LLVMOffsetOfElement(implementation->data_layout.get(),
979 llvm_vertex_shader_output_struct,
980 member.llvm_member_index);
981 auto llvm_struct_type = struct_type->get_or_make_type().type;
982 for(auto &submember : struct_type->get_members(true))
983 {
984 for(auto &decoration : submember.decorations)
985 {
986 if(decoration.value == spirv::Decoration::built_in)
987 {
988 auto &builtin = util::get<spirv::Decoration_built_in_parameters>(
989 decoration.parameters);
990 if(builtin.built_in == spirv::Built_in::position)
991 {
992 vertex_shader_position_output_offset =
993 struct_offset
994 + ::LLVMOffsetOfElement(implementation->data_layout.get(),
995 llvm_struct_type,
996 submember.llvm_member_index);
997 break;
998 }
999 }
1000 }
1001 if(vertex_shader_position_output_offset)
1002 break;
1003 }
1004 }
1005 if(vertex_shader_position_output_offset)
1006 break;
1007 }
1008 if(!vertex_shader_position_output_offset)
1009 throw std::runtime_error("can't find vertex shader Position output");
1010 #warning finish implementing Graphics_pipeline::make
1011 continue;
1012 }
1013 }
1014 throw std::runtime_error("unknown shader kind");
1015 }
1016 #warning finish implementing Graphics_pipeline::make
1017 if(!vertex_shader_function)
1018 throw std::runtime_error("graphics pipeline doesn't have vertex shader");
1019 if(!create_info.pViewportState)
1020 throw std::runtime_error("missing viewport state");
1021 if(create_info.pViewportState->viewportCount != 1)
1022 throw std::runtime_error("unimplemented viewport count");
1023 if(!create_info.pViewportState->pViewports)
1024 throw std::runtime_error("missing viewport list");
1025 if(!create_info.pViewportState->pScissors)
1026 throw std::runtime_error("missing scissor rectangle list");
1027 assert(vertex_shader_position_output_offset);
1028 return std::unique_ptr<Graphics_pipeline>(
1029 new Graphics_pipeline(std::move(implementation),
1030 vertex_shader_function,
1031 vertex_shader_output_struct_size,
1032 *vertex_shader_position_output_offset,
1033 fragment_shader_function,
1034 create_info.pViewportState->pViewports[0],
1035 create_info.pViewportState->pScissors[0]));
1036 }
1037 }
1038 }