mirror of
https://github.com/raysan5/raylib.git
synced 2026-01-10 21:48:46 +01:00
REVIEWED: SIMD instrinsics must be explicitly enabled by developer, only SSE supported at the moment #5316
This commit is contained in:
@ -19,17 +19,22 @@
|
|||||||
*
|
*
|
||||||
* CONFIGURATION:
|
* CONFIGURATION:
|
||||||
* #define RAYMATH_IMPLEMENTATION
|
* #define RAYMATH_IMPLEMENTATION
|
||||||
* Generates the implementation of the library into the included file.
|
* Generates the implementation of the library into the included file
|
||||||
* If not defined, the library is in header only mode and can be included in other headers
|
* If not defined, the library is in header only mode and can be included in other headers
|
||||||
* or source files without problems. But only ONE file should hold the implementation.
|
* or source files without problems. But only ONE file should hold the implementation
|
||||||
*
|
*
|
||||||
* #define RAYMATH_STATIC_INLINE
|
* #define RAYMATH_STATIC_INLINE
|
||||||
* Define static inline functions code, so #include header suffices for use.
|
* Define static inline functions code, so #include header suffices for use
|
||||||
* This may use up lots of memory.
|
* This may use up lots of memory
|
||||||
*
|
*
|
||||||
* #define RAYMATH_DISABLE_CPP_OPERATORS
|
* #define RAYMATH_DISABLE_CPP_OPERATORS
|
||||||
* Disables C++ operator overloads for raymath types.
|
* Disables C++ operator overloads for raymath types.
|
||||||
*
|
*
|
||||||
|
* #define RAYMATH_USE_SIMD_INTRINSICS
|
||||||
|
* Try to enable SIMD intrinsics for MatrixMultiply()
|
||||||
|
* Note that users enabling it must be aware of the target platform where application will
|
||||||
|
* run to support the selected SIMD intrinsic, for now, only SSE is supported
|
||||||
|
*
|
||||||
* LICENSE: zlib/libpng
|
* LICENSE: zlib/libpng
|
||||||
*
|
*
|
||||||
* Copyright (c) 2015-2025 Ramon Santamaria (@raysan5)
|
* Copyright (c) 2015-2025 Ramon Santamaria (@raysan5)
|
||||||
@ -79,7 +84,6 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
//----------------------------------------------------------------------------------
|
//----------------------------------------------------------------------------------
|
||||||
// Defines and Macros
|
// Defines and Macros
|
||||||
//----------------------------------------------------------------------------------
|
//----------------------------------------------------------------------------------
|
||||||
@ -170,9 +174,35 @@ typedef struct float16 {
|
|||||||
|
|
||||||
#include <math.h> // Required for: sinf(), cosf(), tan(), atan2f(), sqrtf(), floor(), fminf(), fmaxf(), fabsf()
|
#include <math.h> // Required for: sinf(), cosf(), tan(), atan2f(), sqrtf(), floor(), fminf(), fmaxf(), fabsf()
|
||||||
|
|
||||||
#if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
|
#if defined(RAYMATH_USE_SIMD_INTRINSICS)
|
||||||
#include <xmmintrin.h>
|
// SIMD is used on the most costly raymath function MatrixMultiply()
|
||||||
#define RAYMATH_SSE_ENABLED
|
// NOTE: Only SSE intrinsics support implemented
|
||||||
|
// TODO: Consider support for other SIMD instrinsics
|
||||||
|
/*
|
||||||
|
#if defined(__SSE4_2__)
|
||||||
|
#define SW_HAS_SSE42
|
||||||
|
#include <nmmintrin.h>
|
||||||
|
#elif defined(__SSE4_1__)
|
||||||
|
#define SW_HAS_SSE41
|
||||||
|
#include <smmintrin.h>
|
||||||
|
#elif defined(__SSSE3__)
|
||||||
|
#define SW_HAS_SSSE3
|
||||||
|
#include <tmmintrin.h>
|
||||||
|
#elif defined(__SSE3__)
|
||||||
|
#define SW_HAS_SSE3
|
||||||
|
#include <pmmintrin.h>
|
||||||
|
#elif defined(__SSE2__) || (defined(_M_AMD64) || defined(_M_X64)) // SSE2 x64
|
||||||
|
#define SW_HAS_SSE2
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#elif defined(__SSE__)
|
||||||
|
#define SW_HAS_SSE
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#endif
|
||||||
|
*/
|
||||||
|
#if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1))
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#define RAYMATH_SSE_ENABLED
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//----------------------------------------------------------------------------------
|
//----------------------------------------------------------------------------------
|
||||||
@ -1652,18 +1682,20 @@ RMAPI Matrix MatrixSubtract(Matrix left, Matrix right)
|
|||||||
RMAPI Matrix MatrixMultiply(Matrix left, Matrix right)
|
RMAPI Matrix MatrixMultiply(Matrix left, Matrix right)
|
||||||
{
|
{
|
||||||
Matrix result = { 0 };
|
Matrix result = { 0 };
|
||||||
#ifdef RAYMATH_SSE_ENABLED
|
|
||||||
// Load left side and right side.
|
#if defined(RAYMATH_SSE_ENABLED)
|
||||||
|
// Load left side and right side
|
||||||
__m128 c0 = _mm_set_ps(right.m12, right.m8, right.m4, right.m0);
|
__m128 c0 = _mm_set_ps(right.m12, right.m8, right.m4, right.m0);
|
||||||
__m128 c1 = _mm_set_ps(right.m13, right.m9, right.m5, right.m1);
|
__m128 c1 = _mm_set_ps(right.m13, right.m9, right.m5, right.m1);
|
||||||
__m128 c2 = _mm_set_ps(right.m14, right.m10, right.m6, right.m2);
|
__m128 c2 = _mm_set_ps(right.m14, right.m10, right.m6, right.m2);
|
||||||
__m128 c3 = _mm_set_ps(right.m15, right.m11, right.m7, right.m3);
|
__m128 c3 = _mm_set_ps(right.m15, right.m11, right.m7, right.m3);
|
||||||
// Transpose so c0..c3 become *rows* of the right matrix in semantic order.
|
|
||||||
|
// Transpose so c0..c3 become *rows* of the right matrix in semantic order
|
||||||
_MM_TRANSPOSE4_PS(c0, c1, c2, c3);
|
_MM_TRANSPOSE4_PS(c0, c1, c2, c3);
|
||||||
|
|
||||||
|
float tmp[4] = { 0 };
|
||||||
__m128 row;
|
__m128 row;
|
||||||
float tmp[4];
|
|
||||||
|
|
||||||
// Row 0 of result: [m0, m1, m2, m3]
|
// Row 0 of result: [m0, m1, m2, m3]
|
||||||
row = _mm_mul_ps(_mm_set1_ps(left.m0), c0);
|
row = _mm_mul_ps(_mm_set1_ps(left.m0), c0);
|
||||||
row = _mm_add_ps(row, _mm_mul_ps(_mm_set1_ps(left.m1), c1));
|
row = _mm_add_ps(row, _mm_mul_ps(_mm_set1_ps(left.m1), c1));
|
||||||
@ -1707,7 +1739,6 @@ RMAPI Matrix MatrixMultiply(Matrix left, Matrix right)
|
|||||||
result.m13 = tmp[1];
|
result.m13 = tmp[1];
|
||||||
result.m14 = tmp[2];
|
result.m14 = tmp[2];
|
||||||
result.m15 = tmp[3];
|
result.m15 = tmp[3];
|
||||||
|
|
||||||
#else
|
#else
|
||||||
result.m0 = left.m0*right.m0 + left.m1*right.m4 + left.m2*right.m8 + left.m3*right.m12;
|
result.m0 = left.m0*right.m0 + left.m1*right.m4 + left.m2*right.m8 + left.m3*right.m12;
|
||||||
result.m1 = left.m0*right.m1 + left.m1*right.m5 + left.m2*right.m9 + left.m3*right.m13;
|
result.m1 = left.m0*right.m1 + left.m1*right.m5 + left.m2*right.m9 + left.m3*right.m13;
|
||||||
@ -1726,6 +1757,7 @@ RMAPI Matrix MatrixMultiply(Matrix left, Matrix right)
|
|||||||
result.m14 = left.m12*right.m2 + left.m13*right.m6 + left.m14*right.m10 + left.m15*right.m14;
|
result.m14 = left.m12*right.m2 + left.m13*right.m6 + left.m14*right.m10 + left.m15*right.m14;
|
||||||
result.m15 = left.m12*right.m3 + left.m13*right.m7 + left.m14*right.m11 + left.m15*right.m15;
|
result.m15 = left.m12*right.m3 + left.m13*right.m7 + left.m14*right.m11 + left.m15*right.m15;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user