The source files datastore.cpp and linmod_diabetes.cpp showcase loading, selecting and using data with a da_datastore handle.
1/*
2 * Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 * 1. Redistributions of source code must retain the above copyright notice,
7 * this list of conditions and the following disclaimer.
8 * 2. Redistributions in binary form must reproduce the above copyright notice,
9 * this list of conditions and the following disclaimer in the documentation
10 * and/or other materials provided with the distribution.
11 * 3. Neither the name of the copyright holder nor the names of its contributors
12 * may be used to endorse or promote products derived from this software without
13 * specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28#include "aoclda.h"
29#include <iostream>
30#include <string>
31#include <vector>
32
33#ifndef DATA_DIR
34#define DATA_DIR "data"
35#endif
36
37/* Data store example
38 * This example demonstrates on a small data set how to:
39 * - load data from a CSV files
40 * - select and extract different subsets of the data
41 * - pass the extracted data to an AOCL-DA algorithms and compute a model
42 */
43
44int main() {
45 std::cout << "----------------------------------------" << std::endl;
46 std::cout << "Load data from a CSV file" << std::endl;
47
48 da_datastore store;
49 std::string filename = std::string(DATA_DIR) + "/" + "datastore_ex.csv";
50 da_status status;
51
52 // Load data
53 da_datastore_init(&store);
54 da_datastore_options_set_string(store, "datatype", "double");
55 da_datastore_options_set_int(store, "use header row", 1);
56 status = da_data_load_from_csv(store, filename.c_str());
57 if (status != da_status_success) {
58 std::cout << "Data loading unsuccessful" << std::endl;
59 return 1;
60 }
61
62 // Select the first 2 columns as the feature matrix and the last one as the response
63 std::vector<double> features(10), rhs(5);
64 da_data_select_columns(store, "features", 0, 1);
65 da_data_select_columns(store, "rhs", 2, 2);
66 da_data_extract_selection_real_d(store, "features", column_major, features.data(), 5);
67 da_data_extract_selection_real_d(store, "rhs", column_major, rhs.data(), 5);
68
69 // Define the regression problem to solve
70 da_handle handle;
71 da_handle_init_d(&handle, da_handle_linmod);
72 da_linmod_select_model_d(handle, linmod_model_mse);
73 da_linmod_define_features_d(handle, 5, 2, features.data(), 5, rhs.data());
74
75 // Solve the problem
76 status = da_linmod_fit_d(handle);
77
78 int exit = 0;
79 if (status == da_status_success) {
80 std::cout << "Regression computed successfully!" << std::endl;
81 da_int nx = 2;
82 std::vector<double> x(2);
83 da_handle_get_result_d(handle, da_result::da_linmod_coef, &nx, x.data());
84 std::cout << "Coefficients: " << x[0] << " " << x[1] << std::endl;
85 std::cout << "(Expected : " << 0.199256 << " " << 0.130354 << ")" << std::endl;
86 } else
87 exit = 1;
88
89 da_datastore_destroy(&store);
90 da_handle_destroy(&handle);
91
92 return exit;
93}
1/*
2 * Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without modification,
5 * are permitted provided that the following conditions are met:
6 * 1. Redistributions of source code must retain the above copyright notice,
7 * this list of conditions and the following disclaimer.
8 * 2. Redistributions in binary form must reproduce the above copyright notice,
9 * this list of conditions and the following disclaimer in the documentation
10 * and/or other materials provided with the distribution.
11 * 3. Neither the name of the copyright holder nor the names of its contributors
12 * may be used to endorse or promote products derived from this software without
13 * specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28#include "aoclda.h"
29#include <assert.h>
30#include <cmath>
31#include <iomanip>
32#include <iostream>
33#include <vector>
34
35/*
36 * Linear model elastic net regression example
37 * using the data set from
38 *
39 * EFRON, HASTIE, JOHNSTONE, and TIBSHIRANI (2004).
40 * Least angle regression (with discussion).
41 * Ann. Statist. 32 407–499. MR2060166
42 * https://hastie.su.domains/Papers/LARS/data64.txt
43 *
44 * The "diabetes data set" consists of 441 observations
45 * and 10 features, while the model chosen is linear and
46 * fitted with both L1 and L2 penalty terms.
47 *
48 * The example showcases how to use datastore framework to
49 * extract data, but it can be directly loaded using
50 * dense matrices using e.g., da_read_csv_d API.
51 */
52#ifndef DATA_DIR
53#define DATA_DIR "data"
54#endif
55
56int main() {
57
58 std::cout << "---------------------------------------------------" << std::endl;
59 std::cout << "Elastic net regression example using diabetes data" << std::endl;
60 std::cout << "---------------------------------------------------" << std::endl
61 << std::endl;
62 std::cout << std::fixed;
63 std::cout.precision(5);
64
65 // Problem data
66 // m: observations; n: features
67 da_int m = 442, n = 10;
68 da_int rhs_pos = 10;
69 std::vector<double> features;
70 std::vector<double> rhs;
71 std::vector<double> x;
72 // n features
73 features.resize(m * n);
74 rhs.resize(m);
75 x.resize(n + 1);
76 // Initial parameter estimates: n + intercept
77 // x.assign({0, 0, 700, 200, 100, 80, 160, 0, 300, 0});
78 x.assign(m + 1, 0);
79
80 // Reference solution
81 std::vector<double> x_ref(n + 1);
82 x_ref.assign({0, -19.4574064435, 4.3253307426, 0.6585289836, 0, 0, -1.6904339251, 0,
83 19.3039496667, 0});
84
85 da_status status;
86
87 // Load data from file
88 da_datastore csv;
89 const char filename[](DATA_DIR "/diabetes.csv");
90 da_datastore_init(&csv);
91 da_datastore_options_set_int(csv, "whitespace delimiter", 1);
92 da_datastore_options_set_string(csv, "comment", "#");
93 da_datastore_options_set_int(csv, "use header row", 1);
94 status = da_data_load_from_csv(csv, filename);
95 if (status != da_status_success) {
96 da_datastore_print_error_message(csv);
97 return 1;
98 }
99 da_int nr, nc;
100 status = da_data_get_n_rows(csv, &nr);
101 status = da_data_get_n_cols(csv, &nc);
102 if (nr != 442 || nc != 11) {
103 std::cout << "Unexpected size for the loaded data: "
104 "(rows="
105 << nr << ", cols=" << nc << ")" << std::endl;
106 return 2;
107 }
108
109 // Extract the 10 features into a dense matrix
110 bool pass = true;
111 pass = pass && da_data_select_columns(csv, "features", 0, n - 1) == da_status_success;
112 pass =
113 pass && da_data_extract_selection_real_d(csv, "features", column_major,
114 features.data(), m) == da_status_success;
115 // Extract response variable
116 pass = pass &&
117 da_data_select_columns(csv, "response", rhs_pos, rhs_pos) == da_status_success;
118 pass = pass && da_data_extract_selection_real_d(csv, "response", column_major,
119 rhs.data(), m) == da_status_success;
120 da_datastore_destroy(&csv);
121 if (!pass) {
122 std::cout
123 << "Unexpected error in the feature and response matrices extraction.\n";
124 return 1;
125 }
126
127 // Initialize the linear regression
128 pass = true;
129 da_int nx = 0;
130 da_handle handle = nullptr;
131 pass = pass && da_handle_init_d(&handle, da_handle_linmod) == da_status_success;
132 pass =
133 pass && da_linmod_select_model_d(handle, linmod_model_mse) == da_status_success;
134 pass = pass &&
135 da_options_set_string(handle, "scaling", "standardise") == da_status_success;
136 pass = pass && da_options_set_real_d(handle, "alpha", 1) == da_status_success;
137 pass = pass && da_options_set_real_d(handle, "lambda", 4) == da_status_success;
138 pass = pass &&
139 da_options_set_string(handle, "print options", "yes") == da_status_success;
140 pass = pass && da_options_set_int(handle, "intercept", 0) == da_status_success;
141 pass = pass && da_options_set_int(handle, "print level", 1) == da_status_success;
142 pass = pass && da_linmod_define_features_d(handle, m, n, features.data(), m,
143 rhs.data()) == da_status_success;
144 if (!pass) {
145 std::cout << "Unexpected error in the model definition.\n";
146 da_handle_destroy(&handle);
147 return 1;
148 }
149
150 // Compute regression
151 status = da_linmod_fit_start_d(handle, n + 1, x.data());
152 bool ok = false;
153 if (status == da_status_success) {
154 std::cout << "Regression computed" << std::endl;
155 // Query the amount of coefficient in the model (n+intercept)
156 da_handle_get_result_d(handle, da_linmod_coef, &nx, x.data());
157 x.resize(nx);
158 da_handle_get_result_d(handle, da_linmod_coef, &nx, x.data());
159 std::cout << "Coefficients: " << std::endl;
160 std::cout.precision(3);
161
162 bool oki;
163 ok = !ok;
164 for (da_int i = 0; i < nx; i++) {
165 oki = std::abs(x[i] - x_ref[i]) <= 1.0e-3;
166 std::cout << " x[" << std::setw(2) << i << "] = " << std::setw(9) << x[i]
167 << " expecting " << std::setw(9) << x_ref[i]
168 << (oki ? " (OK)" : " [WRONG]") << std::endl;
169 ok &= oki;
170 }
171 } else {
172 std::cout << "Unexpected error:" << std::endl;
173 da_handle_print_error_message(handle);
174 }
175 std::cout << "----------------------------------------" << std::endl;
176
177 da_handle_destroy(&handle);
178
179 return ok ? 0 : 7;
180}