* [PATCH olang v3] arena: optimization: ensure alignment memory access
@ 2024-02-28 14:25 Carlos Maniero
2024-02-28 14:26 ` [olang/patches/.build.yml] build success builds.sr.ht
2024-02-28 18:25 ` [PATCH olang v3] arena: optimization: ensure alignment memory access Johnny Richard
0 siblings, 2 replies; 5+ messages in thread
From: Carlos Maniero @ 2024-02-28 14:25 UTC (permalink / raw)
To: ~johnnyrichard/olang-devel; +Cc: Carlos Maniero
This commit changes the pointers returned by *arena_alloc* to always be
16 bytes aligned. Non-aligned data structure could have a huge impact on
performance. Take the example bellow:
int main() {
void *pointer = malloc(1024);
int offset = 0;
long long* data = pointer + offset;
for (int i = 0; i < INT_MAX; i++) {
*data += i;
}
printf("result = %lld", *data);
}
These are the execution times in my machine:
+----------+----------------+
| Offset | Execution time |
+----------+----------------+
| 0 bytes | 0m1.655s |
| 1 bytes | 0m2.286s |
| 2 bytes | 0m2.282s |
| 4 bytes | 0m1.716s |
| 8 bytes | 0m1.712s |
| 16 bytes | 0m1.665s |
+----------+----------------+
The reason of the performance degradation can be found at Intel's manual
[1]:
> To improve the performance of programs, data structures (especially
> stacks) should be aligned on natural boundaries whenever possible. The
> reason for this is that the processor requires two memory accesses to
> make an unaligned memory access; aligned accesses require only one
> memory access.
Double Quadwords has 16 bytes natural boundary and this is the highest
natural boundary possible on a x86_64 architecture. Also, all other
natural boundaries are power of two, meaning that any other word will
also be aligned when using 16 bytes alignment.
You can learn more about memory alignment on Drake's and Berg's
"Unaligned Memory Accesses" article [2].
[1]: Intel® 64 and IA-32 Architectures Software Developer’s Manual
Combined Volumes: 1, 2A, 2B, 2C, 2D, 3A, 3B, 3C, 3D, and 4"
Chapter 4.1.1 "Alignment of Words, Doublewords, Quadwords, and
Double Quadwords
https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html
[2]: https://www.kernel.org/doc/html/next/_sources/core-api/unaligned-memory-access.rst.txt
Signed-off-by: Carlos Maniero <carlos@maniero.me>
---
v3:
- Include more details on commit message explaining why it metters.
- Enforces 16 bytes aligment on all platforms.
- Remove duplicated test scenario (overflow test).
src/arena.c | 16 +++++++++++---
src/arena.h | 3 +++
tests/unit/arena_test.c | 48 +++++++++++++++++++++++++++++++++++------
3 files changed, 58 insertions(+), 9 deletions(-)
diff --git a/src/arena.c b/src/arena.c
index ae33e6a..ad2e535 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -28,14 +28,18 @@ arena_new(size_t size)
return arena;
}
+static uint8_t
+arena_padding(size_t bytes);
+
void *
-arena_alloc(arena_t *arena, size_t size)
+arena_alloc(arena_t *arena, size_t bytes)
{
- if ((arena->offset + size) > arena->size) {
+ if ((arena->offset + bytes) > arena->size) {
return NULL;
}
void *pointer = arena->region + arena->offset;
- arena->offset += size;
+ arena->offset += bytes + arena_padding(bytes);
+
return pointer;
}
@@ -51,3 +55,9 @@ arena_free(arena_t *arena)
arena->size = 0;
free(arena->region);
}
+
+static uint8_t
+arena_padding(size_t bytes)
+{
+ return (ARENA_ALIGNMENT_BYTES - bytes) & ARENA_ALIGNMENT_BYTES_MASK;
+}
diff --git a/src/arena.h b/src/arena.h
index 157165c..03fd803 100644
--- a/src/arena.h
+++ b/src/arena.h
@@ -19,6 +19,9 @@
#include <stdint.h>
#include <stdlib.h>
+#define ARENA_ALIGNMENT_BYTES 16
+#define ARENA_ALIGNMENT_BYTES_MASK (ARENA_ALIGNMENT_BYTES - 1)
+
typedef struct arena
{
size_t offset;
diff --git a/tests/unit/arena_test.c b/tests/unit/arena_test.c
index 13f406f..a471572 100644
--- a/tests/unit/arena_test.c
+++ b/tests/unit/arena_test.c
@@ -19,14 +19,14 @@
#include "munit.h"
static MunitResult
-arena_test(const MunitParameter params[], void *user_data_or_fixture)
+arena_alloc_test(const MunitParameter params[], void *user_data_or_fixture)
{
- arena_t arena = arena_new(sizeof(int) * 2);
+ arena_t arena = arena_new(ARENA_ALIGNMENT_BYTES * 2);
- int *a = arena_alloc(&arena, sizeof(int));
+ uint8_t *a = arena_alloc(&arena, sizeof(uint8_t));
*a = 1;
- int *b = arena_alloc(&arena, sizeof(int));
+ uint8_t *b = arena_alloc(&arena, sizeof(uint8_t));
*b = 2;
munit_assert_int(*a, ==, 1);
@@ -34,7 +34,7 @@ arena_test(const MunitParameter params[], void *user_data_or_fixture)
arena_release(&arena);
- int *c = arena_alloc(&arena, sizeof(int));
+ uint8_t *c = arena_alloc(&arena, sizeof(uint8_t));
*c = 3;
munit_assert_int(*c, ==, 3);
@@ -49,7 +49,43 @@ arena_test(const MunitParameter params[], void *user_data_or_fixture)
return MUNIT_OK;
}
-static MunitTest tests[] = { { "/arena_test", arena_test, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
+static MunitResult
+arena_padding_test(const MunitParameter params[], void *user_data_or_fixture)
+{
+ arena_t arena = arena_new(512);
+
+ // Allocated bytes is < ARENA_ALIGNMENT_BYTES
+ uint8_t *a = arena_alloc(&arena, sizeof(uint8_t));
+ uint8_t *b = arena_alloc(&arena, sizeof(uint8_t));
+
+ munit_assert_int((b - a) % ARENA_ALIGNMENT_BYTES, ==, 0);
+ munit_assert_int(b - a, ==, ARENA_ALIGNMENT_BYTES);
+
+ arena_release(&arena);
+
+ // Allocated bytes is == ARENA_ALIGNMENT_BYTES
+ a = arena_alloc(&arena, ARENA_ALIGNMENT_BYTES);
+ b = arena_alloc(&arena, sizeof(uint8_t));
+
+ munit_assert_int((b - a) % ARENA_ALIGNMENT_BYTES, ==, 0);
+ munit_assert_int(b - a, ==, ARENA_ALIGNMENT_BYTES);
+
+ arena_release(&arena);
+
+ // Allocated bytes is > ARENA_ALIGNMENT_BYTES
+ a = arena_alloc(&arena, ARENA_ALIGNMENT_BYTES + 1);
+ b = arena_alloc(&arena, sizeof(uint8_t));
+
+ munit_assert_int((b - a) % ARENA_ALIGNMENT_BYTES, ==, 0);
+ munit_assert_int(b - a, ==, ARENA_ALIGNMENT_BYTES * 2);
+
+ arena_free(&arena);
+
+ return MUNIT_OK;
+}
+
+static MunitTest tests[] = { { "/arena_alloc_test", arena_alloc_test, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
+ { "/arena_padding_test", arena_padding_test, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL },
{ NULL, NULL, NULL, NULL, MUNIT_TEST_OPTION_NONE, NULL } };
static const MunitSuite suite = { "/arena", tests, NULL, 1, MUNIT_SUITE_OPTION_NONE };
--
2.34.1
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-02-28 21:39 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-28 14:25 [PATCH olang v3] arena: optimization: ensure alignment memory access Carlos Maniero
2024-02-28 14:26 ` [olang/patches/.build.yml] build success builds.sr.ht
2024-02-28 18:25 ` [PATCH olang v3] arena: optimization: ensure alignment memory access Johnny Richard
2024-02-28 19:59 ` Carlos Maniero
2024-02-28 22:39 ` Johnny Richard
Code repositories for project(s) associated with this public inbox
https://git.johnnyrichard.com/olang.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox