Usage of da_datastore - 5.2 English - 68552

AOCL API Guide (68552)

Document ID
68552
Release Date
2025-12-29
Version
5.2 English

The source files datastore.cpp and linmod_diabetes.cpp showcase loading, selecting and using data with a da_datastore handle.

 1/*
 2 * Copyright (C) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
 3 *
 4 * Redistribution and use in source and binary forms, with or without modification,
 5 * are permitted provided that the following conditions are met:
 6 * 1. Redistributions of source code must retain the above copyright notice,
 7 *    this list of conditions and the following disclaimer.
 8 * 2. Redistributions in binary form must reproduce the above copyright notice,
 9 *    this list of conditions and the following disclaimer in the documentation
10 *    and/or other materials provided with the distribution.
11 * 3. Neither the name of the copyright holder nor the names of its contributors
12 *    may be used to endorse or promote products derived from this software without
13 *    specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28#include "aoclda.h"
29#include <iostream>
30#include <string>
31#include <vector>
32
33#ifndef DATA_DIR
34#define DATA_DIR "data"
35#endif
36
37/* Data store example
38 * This example demonstrates on a small data set how to:
39 * - load data from a CSV files
40 * - select and extract different subsets of the data
41 * - pass the extracted data to an AOCL-DA algorithms and compute a model
42 */
43
44int main() {
45    std::cout << "----------------------------------------" << std::endl;
46    std::cout << "Load data from a CSV file" << std::endl;
47
48    da_datastore store;
49    std::string filename = std::string(DATA_DIR) + "/" + "datastore_ex.csv";
50    da_status status;
51
52    // Load data
53    da_datastore_init(&store);
54    da_datastore_options_set_string(store, "datatype", "double");
55    da_datastore_options_set_int(store, "use header row", 1);
56    status = da_data_load_from_csv(store, filename.c_str());
57    if (status != da_status_success) {
58        std::cout << "Data loading unsuccessful" << std::endl;
59        return 1;
60    }
61
62    // Select the first 2 columns as the feature matrix and the last one as the response
63    std::vector<double> features(10), rhs(5);
64    da_data_select_columns(store, "features", 0, 1);
65    da_data_select_columns(store, "rhs", 2, 2);
66    da_data_extract_selection_real_d(store, "features", column_major, features.data(), 5);
67    da_data_extract_selection_real_d(store, "rhs", column_major, rhs.data(), 5);
68
69    // Define the regression problem to solve
70    da_handle handle;
71    da_handle_init_d(&handle, da_handle_linmod);
72    da_linmod_select_model_d(handle, linmod_model_mse);
73    da_linmod_define_features_d(handle, 5, 2, features.data(), 5, rhs.data());
74
75    // Solve the problem
76    status = da_linmod_fit_d(handle);
77
78    int exit = 0;
79    if (status == da_status_success) {
80        std::cout << "Regression computed successfully!" << std::endl;
81        da_int nx = 2;
82        std::vector<double> x(2);
83        da_handle_get_result_d(handle, da_result::da_linmod_coef, &nx, x.data());
84        std::cout << "Coefficients: " << x[0] << " " << x[1] << std::endl;
85        std::cout << "(Expected   : " << 0.199256 << " " << 0.130354 << ")" << std::endl;
86    } else
87        exit = 1;
88
89    da_datastore_destroy(&store);
90    da_handle_destroy(&handle);
91
92    return exit;
93}
  1/*
  2 * Copyright (C) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
  3 *
  4 * Redistribution and use in source and binary forms, with or without modification,
  5 * are permitted provided that the following conditions are met:
  6 * 1. Redistributions of source code must retain the above copyright notice,
  7 *    this list of conditions and the following disclaimer.
  8 * 2. Redistributions in binary form must reproduce the above copyright notice,
  9 *    this list of conditions and the following disclaimer in the documentation
 10 *    and/or other materials provided with the distribution.
 11 * 3. Neither the name of the copyright holder nor the names of its contributors
 12 *    may be used to endorse or promote products derived from this software without
 13 *    specific prior written permission.
 14 *
 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 18 * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 21 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 23 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 24 * POSSIBILITY OF SUCH DAMAGE.
 25 *
 26 */
 27
 28#include "aoclda.h"
 29#include <assert.h>
 30#include <cmath>
 31#include <iomanip>
 32#include <iostream>
 33#include <vector>
 34
 35/*
 36 * Linear model elastic net regression example
 37 * using the data set from
 38 *
 39 * EFRON, HASTIE, JOHNSTONE, and TIBSHIRANI (2004).
 40 * Least angle regression (with discussion).
 41 * Ann. Statist. 32 407–499. MR2060166
 42 * https://hastie.su.domains/Papers/LARS/data64.txt
 43 *
 44 * The "diabetes data set" consists of 441 observations
 45 * and 10 features, while the model chosen is linear and
 46 * fitted with both L1 and L2 penalty terms.
 47 *
 48 * The example showcases how to use datastore framework to
 49 * extract data, but it can be directly loaded using
 50 * dense matrices using e.g., da_read_csv_d API.
 51 */
 52#ifndef DATA_DIR
 53#define DATA_DIR "data"
 54#endif
 55
 56int main() {
 57
 58    std::cout << "---------------------------------------------------" << std::endl;
 59    std::cout << "Elastic net regression example using diabetes data" << std::endl;
 60    std::cout << "---------------------------------------------------" << std::endl
 61              << std::endl;
 62    std::cout << std::fixed;
 63    std::cout.precision(5);
 64
 65    // Problem data
 66    // m: observations; n: features
 67    da_int m = 442, n = 10;
 68    da_int rhs_pos = 10;
 69    std::vector<double> features;
 70    std::vector<double> rhs;
 71    std::vector<double> x;
 72    // n features
 73    features.resize(m * n);
 74    rhs.resize(m);
 75    x.resize(n + 1);
 76    // Initial parameter estimates: n + intercept
 77    // x.assign({0, 0, 700, 200, 100, 80, 160, 0, 300, 0});
 78    x.assign(m + 1, 0);
 79
 80    // Reference solution
 81    std::vector<double> x_ref(n + 1);
 82    x_ref.assign({0, -19.4574064435, 4.3253307426, 0.6585289836, 0, 0, -1.6904339251, 0,
 83                  19.3039496667, 0});
 84
 85    da_status status;
 86
 87    // Load data from file
 88    da_datastore csv;
 89    const char filename[](DATA_DIR "/diabetes.csv");
 90    da_datastore_init(&csv);
 91    da_datastore_options_set_int(csv, "whitespace delimiter", 1);
 92    da_datastore_options_set_string(csv, "comment", "#");
 93    da_datastore_options_set_int(csv, "use header row", 1);
 94    status = da_data_load_from_csv(csv, filename);
 95    if (status != da_status_success) {
 96        da_datastore_print_error_message(csv);
 97        return 1;
 98    }
 99    da_int nr, nc;
100    status = da_data_get_n_rows(csv, &nr);
101    status = da_data_get_n_cols(csv, &nc);
102    if (nr != 442 || nc != 11) {
103        std::cout << "Unexpected size for the loaded data: "
104                     "(rows="
105                  << nr << ", cols=" << nc << ")" << std::endl;
106        return 2;
107    }
108
109    // Extract the 10 features into a dense matrix
110    bool pass = true;
111    pass = pass && da_data_select_columns(csv, "features", 0, n - 1) == da_status_success;
112    pass =
113        pass && da_data_extract_selection_real_d(csv, "features", column_major,
114                                                 features.data(), m) == da_status_success;
115    // Extract response variable
116    pass = pass &&
117           da_data_select_columns(csv, "response", rhs_pos, rhs_pos) == da_status_success;
118    pass = pass && da_data_extract_selection_real_d(csv, "response", column_major,
119                                                    rhs.data(), m) == da_status_success;
120    da_datastore_destroy(&csv);
121    if (!pass) {
122        std::cout
123            << "Unexpected error in the feature and response matrices extraction.\n";
124        return 1;
125    }
126
127    // Initialize the linear regression
128    pass = true;
129    da_int nx = 0;
130    da_handle handle = nullptr;
131    pass = pass && da_handle_init_d(&handle, da_handle_linmod) == da_status_success;
132    pass =
133        pass && da_linmod_select_model_d(handle, linmod_model_mse) == da_status_success;
134    pass = pass &&
135           da_options_set_string(handle, "scaling", "standardise") == da_status_success;
136    pass = pass && da_options_set_real_d(handle, "alpha", 1) == da_status_success;
137    pass = pass && da_options_set_real_d(handle, "lambda", 4) == da_status_success;
138    pass = pass &&
139           da_options_set_string(handle, "print options", "yes") == da_status_success;
140    pass = pass && da_options_set_int(handle, "intercept", 0) == da_status_success;
141    pass = pass && da_options_set_int(handle, "print level", 1) == da_status_success;
142    pass = pass && da_linmod_define_features_d(handle, m, n, features.data(), m,
143                                               rhs.data()) == da_status_success;
144    if (!pass) {
145        std::cout << "Unexpected error in the model definition.\n";
146        da_handle_destroy(&handle);
147        return 1;
148    }
149
150    // Compute regression
151    status = da_linmod_fit_start_d(handle, n + 1, x.data());
152    bool ok = false;
153    if (status == da_status_success) {
154        std::cout << "Regression computed" << std::endl;
155        // Query the amount of coefficient in the model (n+intercept)
156        da_handle_get_result_d(handle, da_linmod_coef, &nx, x.data());
157        x.resize(nx);
158        da_handle_get_result_d(handle, da_linmod_coef, &nx, x.data());
159        std::cout << "Coefficients: " << std::endl;
160        std::cout.precision(3);
161
162        bool oki;
163        ok = !ok;
164        for (da_int i = 0; i < nx; i++) {
165            oki = std::abs(x[i] - x_ref[i]) <= 1.0e-3;
166            std::cout << " x[" << std::setw(2) << i << "] = " << std::setw(9) << x[i]
167                      << " expecting " << std::setw(9) << x_ref[i]
168                      << (oki ? " (OK)" : " [WRONG]") << std::endl;
169            ok &= oki;
170        }
171    } else {
172        std::cout << "Unexpected error:" << std::endl;
173        da_handle_print_error_message(handle);
174    }
175    std::cout << "----------------------------------------" << std::endl;
176
177    da_handle_destroy(&handle);
178
179    return ok ? 0 : 7;
180}