libs/vulkan-compute/test_cosine.c

279 lines

1/* test_cosine.c - Test cosine distance shader against CPU reference
2 *
3 * Validates that the GPU cosine distance computation matches CPU results
4 * for 768-dimensional poem embeddings.
5 */
6
7#include "include/vk_compute.h"
8#include <stdio.h>
9#include <stdlib.h>
10#include <math.h>
11#include <time.h>
12
13#define EMBEDDING_DIM 768
14#define NUM_EMBEDDINGS 7793 /* Test with real dataset size */
15#define TOLERANCE 0.0001f /* Floating-point comparison tolerance */
16
17/* {{{ CPU reference implementation
18 */
19
20/* CPU cosine distance calculation (reference implementation) */
21float cpu_cosine_distance(const float* embedding, const float* centroid, int dim) {
22 float dot_product = 0.0f;
23 float norm_embedding = 0.0f;
24 float norm_centroid = 0.0f;
25
26 for (int i = 0; i < dim; i++) {
27 dot_product += embedding[i] * centroid[i];
28 norm_embedding += embedding[i] * embedding[i];
29 norm_centroid += centroid[i] * centroid[i];
30 }
31
32 float norm_product = sqrtf(norm_embedding) * sqrtf(norm_centroid);
33
34 if (norm_product == 0.0f) {
35 return 0.0f;
36 }
37
38 float similarity = dot_product / norm_product;
39 return 1.0f - similarity;
40}
41
42/* Calculate all distances on CPU */
43void cpu_calculate_distances(const float* embeddings,
44 const float* centroid,
45 float* distances,
46 int num_embeddings,
47 int embedding_dim) {
48 for (int i = 0; i < num_embeddings; i++) {
49 const float* emb = &embeddings[i * embedding_dim];
50 distances[i] = cpu_cosine_distance(emb, centroid, embedding_dim);
51 }
52}
53
54/* }}} */
55
56/* {{{ Test data generation
57 */
58
59/* Generate random embedding data */
60void generate_test_embeddings(float* embeddings, int num_embeddings, int dim) {
61 for (int i = 0; i < num_embeddings * dim; i++) {
62 /* Random values between -1.0 and 1.0 */
63 embeddings[i] = ((float)rand() / RAND_MAX) * 2.0f - 1.0f;
64 }
65}
66
67/* Generate random centroid */
68void generate_test_centroid(float* centroid, int dim) {
69 for (int i = 0; i < dim; i++) {
70 centroid[i] = ((float)rand() / RAND_MAX) * 2.0f - 1.0f;
71 }
72}
73
74/* }}} */
75
76int main(void) {
77 printf("=== Cosine Distance Shader Test ===\n\n");
78 printf("Configuration:\n");
79 printf(" Embedding dimension: %d\n", EMBEDDING_DIM);
80 printf(" Number of embeddings: %d\n", NUM_EMBEDDINGS);
81 printf(" Total data: %.2f MB\n",
82 (NUM_EMBEDDINGS * EMBEDDING_DIM * sizeof(float)) / (1024.0f * 1024.0f));
83
84 /* Seed RNG for reproducible tests */
85 srand(42);
86
87 /* Allocate test data */
88 printf("\n[1] Generating test data...\n");
89 float* embeddings = malloc(NUM_EMBEDDINGS * EMBEDDING_DIM * sizeof(float));
90 float* centroid = malloc(EMBEDDING_DIM * sizeof(float));
91 float* cpu_distances = malloc(NUM_EMBEDDINGS * sizeof(float));
92 float* gpu_distances = malloc(NUM_EMBEDDINGS * sizeof(float));
93
94 if (!embeddings || !centroid || !cpu_distances || !gpu_distances) {
95 fprintf(stderr, "ERROR: Memory allocation failed\n");
96 return 1;
97 }
98
99 generate_test_embeddings(embeddings, NUM_EMBEDDINGS, EMBEDDING_DIM);
100 generate_test_centroid(centroid, EMBEDDING_DIM);
101 printf(" [OK] Generated %d random embeddings\n", NUM_EMBEDDINGS);
102
103 /* CPU reference calculation */
104 printf("\n[2] Computing distances on CPU...\n");
105 clock_t cpu_start = clock();
106 cpu_calculate_distances(embeddings, centroid, cpu_distances,
107 NUM_EMBEDDINGS, EMBEDDING_DIM);
108 clock_t cpu_end = clock();
109 double cpu_time = ((double)(cpu_end - cpu_start)) / CLOCKS_PER_SEC * 1000.0;
110 printf(" [OK] CPU time: %.2f ms\n", cpu_time);
111
112 /* Initialize Vulkan */
113 printf("\n[3] Initializing Vulkan...\n");
114 VkComputeContext* ctx = vkc_init(false);
115 if (!ctx) {
116 fprintf(stderr, "ERROR: Failed to initialize Vulkan\n");
117 return 1;
118 }
119 printf(" [OK] Device: %s\n", vkc_get_device_name(ctx));
120
121 /* Create GPU buffers */
122 printf("\n[4] Creating GPU buffers...\n");
123 VkComputeBuffer* embeddings_buf = vkc_create_buffer(ctx,
124 NUM_EMBEDDINGS * EMBEDDING_DIM * sizeof(float),
125 VKC_BUFFER_DEVICE_LOCAL);
126 VkComputeBuffer* centroid_buf = vkc_create_buffer(ctx,
127 EMBEDDING_DIM * sizeof(float),
128 VKC_BUFFER_DEVICE_LOCAL);
129 VkComputeBuffer* distances_buf = vkc_create_buffer(ctx,
130 NUM_EMBEDDINGS * sizeof(float),
131 VKC_BUFFER_DEVICE_LOCAL);
132
133 if (!embeddings_buf || !centroid_buf || !distances_buf) {
134 fprintf(stderr, "ERROR: Failed to create GPU buffers\n");
135 vkc_destroy(ctx);
136 return 1;
137 }
138 printf(" [OK] Created 3 buffers\n");
139
140 /* Upload data to GPU */
141 printf("\n[5] Uploading data to GPU...\n");
142 vkc_upload_buffer(ctx, embeddings_buf, embeddings,
143 NUM_EMBEDDINGS * EMBEDDING_DIM * sizeof(float));
144 vkc_upload_buffer(ctx, centroid_buf, centroid,
145 EMBEDDING_DIM * sizeof(float));
146 printf(" [OK] Uploaded %.2f MB\n",
147 ((NUM_EMBEDDINGS * EMBEDDING_DIM + EMBEDDING_DIM) * sizeof(float)) / (1024.0f * 1024.0f));
148
149 /* Create pipeline */
150 printf("\n[6] Loading cosine distance shader...\n");
151
152 /* Push constants structure */
153 struct {
154 uint32_t num_embeddings;
155 uint32_t embedding_dim;
156 } push_constants = {
157 .num_embeddings = NUM_EMBEDDINGS,
158 .embedding_dim = EMBEDDING_DIM,
159 };
160
161 VkComputePipeline* pipeline = vkc_create_pipeline(ctx,
162 "build/cosine_distance.spv",
163 sizeof(push_constants));
164 if (!pipeline) {
165 fprintf(stderr, "ERROR: Failed to create pipeline\n");
166 vkc_destroy_buffer(ctx, embeddings_buf);
167 vkc_destroy_buffer(ctx, centroid_buf);
168 vkc_destroy_buffer(ctx, distances_buf);
169 vkc_destroy(ctx);
170 return 1;
171 }
172 printf(" [OK] Pipeline created\n");
173
174 /* Bind buffers */
175 printf("\n[7] Binding buffers to pipeline...\n");
176 vkc_bind_buffer(ctx, pipeline, 0, embeddings_buf);
177 vkc_bind_buffer(ctx, pipeline, 1, centroid_buf);
178 vkc_bind_buffer(ctx, pipeline, 2, distances_buf);
179 printf(" [OK] Buffers bound\n");
180
181 /* Dispatch shader */
182 printf("\n[8] Dispatching compute shader...\n");
183 uint32_t workgroup_count = (NUM_EMBEDDINGS + 255) / 256;
184 printf(" Workgroups: %u (256 threads each)\n", workgroup_count);
185
186 clock_t gpu_start = clock();
187 VkComputeResult result = vkc_dispatch(ctx, pipeline, workgroup_count, 1, 1,
188 &push_constants);
189 clock_t gpu_end = clock();
190
191 if (result != VKC_SUCCESS) {
192 fprintf(stderr, "ERROR: Failed to dispatch shader\n");
193 vkc_destroy_pipeline(ctx, pipeline);
194 vkc_destroy_buffer(ctx, embeddings_buf);
195 vkc_destroy_buffer(ctx, centroid_buf);
196 vkc_destroy_buffer(ctx, distances_buf);
197 vkc_destroy(ctx);
198 return 1;
199 }
200
201 double gpu_time = ((double)(gpu_end - gpu_start)) / CLOCKS_PER_SEC * 1000.0;
202 printf(" [OK] GPU time: %.2f ms\n", gpu_time);
203 printf(" Speedup: %.2fx\n", cpu_time / gpu_time);
204
205 /* Download results */
206 printf("\n[9] Downloading results from GPU...\n");
207 vkc_download_buffer(ctx, distances_buf, gpu_distances,
208 NUM_EMBEDDINGS * sizeof(float));
209 printf(" [OK] Downloaded %d distance values\n", NUM_EMBEDDINGS);
210
211 /* Compare results */
212 printf("\n[10] Comparing CPU vs GPU results...\n");
213 int mismatches = 0;
214 float max_error = 0.0f;
215 float total_error = 0.0f;
216
217 for (int i = 0; i < NUM_EMBEDDINGS; i++) {
218 float error = fabsf(cpu_distances[i] - gpu_distances[i]);
219 total_error += error;
220
221 if (error > max_error) {
222 max_error = error;
223 }
224
225 if (error > TOLERANCE) {
226 if (mismatches < 5) {
227 printf(" MISMATCH [%d]: CPU=%.6f, GPU=%.6f, error=%.6f\n",
228 i, cpu_distances[i], gpu_distances[i], error);
229 }
230 mismatches++;
231 }
232 }
233
234 float avg_error = total_error / NUM_EMBEDDINGS;
235
236 printf("\n Results:\n");
237 printf(" Matches: %d / %d\n", NUM_EMBEDDINGS - mismatches, NUM_EMBEDDINGS);
238 printf(" Average error: %.8f\n", avg_error);
239 printf(" Max error: %.8f\n", max_error);
240 printf(" Tolerance: %.8f\n", TOLERANCE);
241
242 bool passed = (mismatches == 0);
243
244 if (passed) {
245 printf("\n [SUCCESS] All results match within tolerance!\n");
246 } else {
247 printf("\n [FAILED] %d mismatches found\n", mismatches);
248 }
249
250 /* Print sample values */
251 printf("\n Sample values:\n");
252 for (int i = 0; i < 5 && i < NUM_EMBEDDINGS; i++) {
253 printf(" [%d] CPU: %.6f, GPU: %.6f\n",
254 i, cpu_distances[i], gpu_distances[i]);
255 }
256
257 /* Cleanup */
258 printf("\n[11] Cleaning up...\n");
259 free(embeddings);
260 free(centroid);
261 free(cpu_distances);
262 free(gpu_distances);
263 vkc_destroy_pipeline(ctx, pipeline);
264 vkc_destroy_buffer(ctx, embeddings_buf);
265 vkc_destroy_buffer(ctx, centroid_buf);
266 vkc_destroy_buffer(ctx, distances_buf);
267 vkc_destroy(ctx);
268
269 if (passed) {
270 printf("\n[SUCCESS] Cosine distance shader validated!\n");
271 return 0;
272 } else {
273 printf("\n[FAILED] Validation failed\n");
274 return 1;
275 }
276}
277
278/* }}} */
279