15. AOCL-Utils#
AOCL-Utils is an effort to factor out common functionalities across libraries. The main features include
Au_cpuid module
Core details
Flags available/usable
ISA available/usable
Au_core_module
Thread pinning.
Note
This library detects only the CPUs of AMD “Zen” architecture, there are no plans to add support for other x86 implementations of other CPU vendors. Some of the utilities may fail or behave in an unexpected manner on the predecessors of AMD “Zen” architecture.
Core module is internal to AOCL-utils to avail the features please link to libaoclutils which is a combined library of all available utils modules.
15.1. Requirements#
For more information on CMake versions validated, refer to Build Utilities
GCC v8.5 or later
Clang v12 or later
AOCC 4.2 or later
Refer to the supported_package_matrix.md file in the repository for more information. stdc++ library must be linked when using the AOCL-Utils static binary.
15.2. Clone and Build the AOCL-Utils Library#
Complete the following steps to clone and build the AOCL-Utils library:
Download the latest release of AOCL-Utils (amd/aocl-utils).
Clone the Git repository (amd/aocl-utils.git).
Run the command:
$ cd aocl-utils
For more information on the detailed steps to build and install AOCL-Utils (based on OS, compilers, and so on) refer to the aocl-utils/Readme..md file.
Note
For installing the AOCL-Utils library with Spack on Linux-based environment, refer to AMD Developer Central (https://www.amd.com/en/developer/zen-software-studio/applications/spack/spack-aocl.html).
15.3. Using AOCL-Utils#
AOCL-Utils is a C++ Library. This library also provides C interfaces for the calls from other C programs/libraries. After installing the AOCL-Utils library:
Depending on the functionality used. Please use the corresponding headerfiles.
15.3.1. Core Module#
Functionality |
Headerfiles(C++) |
Headerfiles(C) |
---|---|---|
thread pinning |
ThreadPinning.hh |
threadpinning.h |
15.3.2. CpuId Module#
Functionality |
Headerfiles(C) |
Headerfiles(C++) |
---|---|---|
cpu architecture detection |
cpuid.h |
X86Cpu.hh |
cpuid feature flag detection |
cpuid.h |
X86Cpu.hh |
Deprecated APIs |
arch.h |
cpu.hh |
15.3.3. Examples#
15.3.3.1. C API Example (cpuid Module)#
#include "Capi/au/cpuid/cpuid.h"
#include <stdio.h>
int main(void)
{
int core_num = 0;
if (au_cpuid_is_amd(core_num)) {
printf("AMD CPU detected...\n");
} else {
// Intel Apis are not exposed.
printf("Unknown CPU detected...\n");
}
return 0;
}
15.3.3.2. C++ API Example (cpuid Module)#
#include "Au/Cpuid/X86Cpu.hh"
using namespace Au;
int main(void)
{
X86Cpu cpu{ 0 };
if (cpu.isAMD()) {
std::cout << "AMD CPU detected..." << std::endl;
} else if (cpu.isIntel()) {
std::cout << "Intel CPU detected..." << std::endl;
} else {
std::cout << "Unknown CPU detected..." << std::endl;
}
return 0;
}
15.3.3.3. C API Example (Thread Pinning)#
#include "Capi/au/threadpinning.h"
#include <pthread.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#define NUM_THREADS 5
void* threadFunction(void* arg)
{
int id = *((int*)arg);
printf("Thread started with ID: %d\n", id);
sleep(3);
return NULL;
}
void checkThreadAffinity(pthread_t thread)
{
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
if (pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuset) != 0) {
printf("Error reading thread affinity on Linux\n");
} else {
printf("Affinity mask: %lu\n", cpuset.__bits[0]);
}
}
int main()
{
int thread_ids[NUM_THREADS];
pthread_t threads[NUM_THREADS];
for (int i = 0; i < NUM_THREADS; ++i) {
thread_ids[i] = i;
if (pthread_create(&threads[i], NULL, threadFunction, &thread_ids[i])
!= 0) {
printf("Error creating thread %d\n", i);
return 1;
}
}
// Pin threads to the processor group using different strategies
// CORE, LOGICAL, SPREAD, CUSTOM
printf(
"Pinning threads to the processor group using different strategies\n");
printf("CORE\n");
au_pin_threads_core(threads, NUM_THREADS);
for (int i = 0; i < NUM_THREADS; ++i) {
checkThreadAffinity(threads[i]);
}
printf("LOGICAL\n");
au_pin_threads_logical(threads, NUM_THREADS);
for (int i = 0; i < NUM_THREADS; ++i) {
checkThreadAffinity(threads[i]);
}
printf("SPREAD\n");
au_pin_threads_spread(threads, NUM_THREADS);
for (int i = 0; i < NUM_THREADS; ++i) {
checkThreadAffinity(threads[i]);
}
printf("CUSTOM\n");
int affinityVector[NUM_THREADS] = { 0, 1, 4, 6, 7 };
au_pin_threads_custom(threads, NUM_THREADS, affinityVector, NUM_THREADS);
for (int i = 0; i < NUM_THREADS; ++i) {
checkThreadAffinity(threads[i]);
}
for (int i = 0; i < NUM_THREADS; ++i) {
pthread_join(threads[i], NULL);
}
return 0;
}
15.3.3.4. C++ API Example (Thread Pinning)#
#include "Au/ThreadPinning.hh"
#include <iostream>
#include <thread>
#include <vector>
#ifdef _WIN32
#include <windows.h>
#else
#include <pthread.h>
#include <sched.h>
#endif
using namespace Au;
// Function for the thread to execute
void printThreadId(int id)
{
std::cout << "Thread started with ID: " << id << std::endl;
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
void checkThreadAffinity(std::thread& thread)
{
#ifdef _WIN32
// Get the native handle of the thread
HANDLE handle = (HANDLE)thread.native_handle();
// Get thread group affinity
GROUP_AFFINITY groupAffinity;
BOOL result = GetThreadGroupAffinity(handle, &groupAffinity);
if (!result) {
std::cerr << "Error reading thread affinity for thread on Windows"
<< std::endl;
} else {
std::cout << "groupAffinity.mask" << groupAffinity.Mask << std::endl;
}
#else
// Get the native handle of the thread
pthread_t handle = thread.native_handle();
// Create a CPU set and get the current affinity
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
int result = pthread_getaffinity_np(handle, sizeof(cpu_set_t), &cpuset);
if (result != 0) {
std::cerr << "Error reading thread affinity for thread on Linux"
<< std::endl;
} else {
std::cout << " affinity mask: " << cpuset.__bits[0] << std::endl;
}
#endif
}
int main()
{
int n = 5; // Number of threads
std::vector<std::thread> threads;
// Create n threads
for (int i = 0; i < n; ++i) {
threads.push_back(std::thread(printThreadId, i));
}
ThreadPinning tp;
std::vector<pthread_t> threadList;
for (int i = 0; i < n; i++) {
threadList.push_back(threads[i].native_handle());
}
std::cout << "Pinning threads using custom pin strategy" << std::endl;
// pin threads using the custom strategy.
tp.pinThreads(threadList, { 0, 5, 2, 10, 4 });
for (int i = 0; i < n; ++i) {
checkThreadAffinity(threads[i]);
}
std::cout << "Pinning threads using spread pin strategy" << std::endl;
// pin threads using the spread strategy
tp.pinThreads(threadList, 0);
// Verify thread affinity for each thread
for (int i = 0; i < n; ++i) {
checkThreadAffinity(threads[i]);
}
std::cout << "Pinning threads using core pin strategy" << std::endl;
// pin threads using the core strategy
tp.pinThreads(threadList, 1);
// Verify thread affinity for each thread
for (int i = 0; i < n; ++i) {
checkThreadAffinity(threads[i]);
}
std::cout << "Pinning threads using logical pin strategy" << std::endl;
// pin threads using the Logical strategy
tp.pinThreads(threadList, 2);
// Verify thread affinity for each thread
for (int i = 0; i < n; ++i) {
checkThreadAffinity(threads[i]);
}
// Join all threads
for (auto& th : threads) {
th.join();
}
return 0;
}
15.3.4. Output#
Finally, run build/Release/cpuid_example_c
on system and that will give
the following output:
$ ./Release/cpuid_example_c
AMD CPU detected...
Run any of the examples in the examples folder similarly.
15.3.5. Integrate with Other Libraries/Applications#
Following are the build systems to integrate in library/application with AOCL-Utils:
CMAKE:
In the CMake file, use the following:
TARGET_INCLUDE_DIRECTORIES() - path of libaoclutils include directory
TARGET_LINK_LIBRARIES() - path to link libaoclutils binaries
Make:
In the compiler flags of Make file, use the following:
-I
- path of libaoclutils include directory-l, -L
- path to link libaoclutils binaries
From command line(On linux):
# Export the libaoclutils binaries path into LD_LIBRARY_PATH variable.
$ export LD_LIBRARY_PATH=<path of libaoclutils binaries>:${LD_LIBRARY_PATH}
**Using Static Library:**
$ gcc -std=gnu11 test_c_application.c -o test_c_application.exe -L<path
of libaoclutils binaries>
-l:libaoclutils.a -lstdc++ -I<path of libaoclutils include directory>
$ g++ -std=gnu++17 test_cpp_application.cc -o test_cpp_application.exe
-L<path of libaoclutils binaries> -l:libaoclutils.a -I<path of
libaoclutils include directory>
**Using Dynamic/Shared Library:**
$ gcc -std=gnu11 test_c_application.c -o test_c_application.exe -L<path
of libaoclutils binaries>
-l:libaoclutils.so -I<path of libaoclutils include directory>
$ g++ -std=gnu++17 test_cpp_application.cc -o test_cpp_application.exe
-L<path of libaoclutils binaries> -l:libaoclutils.so -I<path of
libaoclutils include directory>
On Windows, you can build an application with the AOCL-Utils library using Clang/Clang++ Compilers as follows:
Create a 64-bit console app C++ project in Microsoft Visual Studio 17 2022.
To select Clang-cl compiler, navigate to Project > Properties > Configuration Properties > General > Platform Toolset > LLVM(Clang-cl) or llvm.
Add the sources into project using:
Project > Add Existing item > select the source from the project source directory.
Update the include path in:
Project > Properties > C/C++ > General > Additional Include Directories
Update the AOCL-Utils library path (where libaoclutils.lib or libaoclutils_static.lib exist) in:
Project > Properties > Linker > General > Additional Library Directories
Update the AOCL-Utils library name in:
Project > Properties > Linker > Input > Additional Dependencies (libaoclutils.lib or libaoclutils_static.lib)
If AOCL-Utils dynamic library is used, copy the AOCL-Utils DLL library (libaoclutils.dll) to the same project application folder.
Compile the project and run the application.