1+ cmake_minimum_required (VERSION 3.28 )
2+
13option (USE_CUDA "Support NVIDIA CUDA" OFF )
24option (PROFILE_MODE "ENABLE PROFILE MODE" OFF )
35option (USE_OMP "Use OpenMP as backend for Eigen" ON )
46option (USE_NCCL "Build project for distributed running" ON )
5- cmake_minimum_required (VERSION 3.28 )
67
7- project (infini_train VERSION 0.3 .0 LANGUAGES CXX )
8+ project (infini_train VERSION 0.5 .0 LANGUAGES CXX )
89
910set (CMAKE_CXX_STANDARD 20)
1011set (CMAKE_CXX_STANDARD_REQUIRED ON )
@@ -13,90 +14,186 @@ set(CMAKE_CXX_EXTENSIONS OFF)
1314# Generate compile_commands.json
1415set (CMAKE_EXPORT_COMPILE_COMMANDS ON )
1516
16- # Add gflags
17+ # ------------------------------------------------------------------------------
18+ # Third-party deps
19+ # ------------------------------------------------------------------------------
20+
21+ # gflags
1722add_subdirectory (third_party/gflags )
1823include_directories (${gflags_SOURCE_DIR} /include )
1924
25+ # glog
2026set (WITH_GFLAGS OFF CACHE BOOL "Disable glog finding system gflags" FORCE )
2127set (WITH_GTEST OFF CACHE BOOL "Disable glog finding system gtest" FORCE )
22-
23- # Add glog
2428add_subdirectory (third_party/glog )
2529include_directories (${glog_SOURCE_DIR} /src )
2630
27- # Add eigen
31+ # eigen
2832if (USE_OMP)
29- find_package (OpenMP REQUIRED )
33+ find_package (OpenMP REQUIRED )
3034endif ()
31- # find_package(OpenBLAS REQUIRED)
32- # include_directories(${OpenBLAS_INCLUDE_DIR})
3335add_subdirectory (third_party/eigen )
3436include_directories (${PROJECT_SOURCE_DIR} /third_party/eigen )
35- # add_definitions(-DEIGEN_USE_BLAS)
3637
3738include_directories (${PROJECT_SOURCE_DIR} )
38- file (GLOB_RECURSE SRC ${PROJECT_SOURCE_DIR} /infini_train/src/*.cc )
39- list (FILTER SRC EXCLUDE REGEX ".*kernels/cpu/.*" )
4039
4140if (PROFILE_MODE)
42- add_compile_definitions (PROFILE_MODE=1 )
41+ add_compile_definitions (PROFILE_MODE=1 )
4342endif ()
4443
45- file (GLOB_RECURSE CPU_KERNELS ${PROJECT_SOURCE_DIR} /infini_train/src/kernels/cpu/*.cc )
44+ # ------------------------------------------------------------------------------
45+ # Sources
46+ # ------------------------------------------------------------------------------
47+
48+ # Framework core sources (*.cc), excluding cpu kernels (they are built separately)
49+ file (GLOB_RECURSE SRC ${PROJECT_SOURCE_DIR} /infini_train/src/*.cc )
50+ list (FILTER SRC EXCLUDE REGEX ".*kernels/cpu/.*" )
51+
52+ # CPU kernels (*.cc)
53+ file (GLOB_RECURSE CPU_KERNELS ${PROJECT_SOURCE_DIR} /infini_train/src/kernels/cpu/*.cc )
54+
55+ # ------------------------------------------------------------------------------
56+ # CPU kernels library
57+ # ------------------------------------------------------------------------------
58+
4659add_library (infini_train_cpu_kernels STATIC ${CPU_KERNELS} )
47- target_link_libraries (infini_train_cpu_kernels glog Eigen3::Eigen )
60+ target_link_libraries (infini_train_cpu_kernels PUBLIC glog Eigen3::Eigen )
61+
4862if (USE_OMP)
49- add_compile_definitions (USE_OMP=1 )
50- target_link_libraries (infini_train_cpu_kernels OpenMP::OpenMP_CXX )
63+ add_compile_definitions (USE_OMP=1 )
64+ target_link_libraries (infini_train_cpu_kernels PUBLIC OpenMP::OpenMP_CXX )
65+ endif ()
66+
67+ # ------------------------------------------------------------------------------
68+ # CUDA kernels library (optional)
69+ # ------------------------------------------------------------------------------
70+
71+ if (USE_CUDA)
72+ add_compile_definitions (USE_CUDA=1 )
73+ enable_language (CUDA )
74+ find_package (CUDAToolkit REQUIRED )
75+ include_directories (${CUDAToolkit_INCLUDE_DIRS} )
76+
77+ # CUDA compilation options
78+ set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr" )
79+
80+ # Only compile CUDA kernels / cuda sources here (your original used src/*.cu)
81+ file (GLOB_RECURSE CUDA_KERNELS ${PROJECT_SOURCE_DIR} /infini_train/src/*.cu )
82+
83+ add_library (infini_train_cuda_kernels STATIC ${CUDA_KERNELS} )
84+ set_target_properties (infini_train_cuda_kernels PROPERTIES CUDA_ARCHITECTURES "75;80;90" )
85+
86+ target_link_libraries (infini_train_cuda_kernels
87+ PUBLIC
88+ glog
89+ CUDA::cudart
90+ CUDA::cublas
91+ CUDA::cuda_driver
92+ )
93+
94+ if (USE_NCCL)
95+ message (STATUS "Add USE_NCCL, use NCCL with CUDA" )
96+ list (APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR} /cmake)
97+ find_package (NCCL REQUIRED )
98+ add_compile_definitions (USE_NCCL=1 )
99+ target_link_libraries (infini_train_cuda_kernels PUBLIC nccl )
100+ endif ()
51101endif ()
52102
103+ # ------------------------------------------------------------------------------
104+ # Main framework library
105+ # ------------------------------------------------------------------------------
106+
107+ add_library (infini_train STATIC ${SRC} )
108+ target_link_libraries (infini_train
109+ PUBLIC
110+ glog
111+ gflags
112+ infini_train_cpu_kernels
113+ )
114+
53115if (USE_CUDA)
54- add_compile_definitions (USE_CUDA=1 )
55- enable_language (CUDA )
56- find_package (CUDAToolkit REQUIRED )
57- include_directories (${CUDAToolkit_INCLUDE_DIRS} )
58-
59- # enable CUDA-related compilation options
60- set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr" )
61- file (GLOB_RECURSE CUDA_KERNELS ${PROJECT_SOURCE_DIR} /infini_train/src/*.cu )
62- add_library (infini_train_cuda_kernels STATIC ${CUDA_KERNELS} )
63- set_target_properties (infini_train_cuda_kernels PROPERTIES CUDA_ARCHITECTURES "75;80;90" )
64- target_link_libraries (infini_train_cuda_kernels glog CUDA::cudart CUDA::cublas CUDA::cuda_driver )
65-
66- add_library (infini_train STATIC ${SRC} )
67- target_link_libraries (infini_train glog gflags "-Wl,--whole-archive" infini_train_cpu_kernels infini_train_cuda_kernels "-Wl,--no-whole-archive" )
68-
69- if (USE_NCCL)
70- message (STATUS "Add USE_NCCL, use NCCL with CUDA" )
71- list (APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR} /cmake)
72- find_package (NCCL REQUIRED )
73- add_compile_definitions (USE_NCCL=1 )
74- target_link_libraries (infini_train nccl )
75- endif ()
76- else ()
77- add_library (infini_train STATIC ${SRC} )
78- target_link_libraries (infini_train glog gflags "-Wl,--whole-archive" infini_train_cpu_kernels "-Wl,--no-whole-archive" )
116+ # infini_train contains cuda runtime wrappers (*.cc) like cuda_blas_handle.cc/cuda_guard.cc
117+ # Those may need CUDA runtime/driver/cublas symbols at final link, so attach them here too.
118+ target_link_libraries (infini_train
119+ PUBLIC
120+ infini_train_cuda_kernels
121+ CUDA::cudart
122+ CUDA::cublas
123+ CUDA::cuda_driver
124+ )
125+
126+ if (USE_NCCL)
127+ # If your core library code also directly references NCCL symbols (not only kernels),
128+ # keep this. Otherwise it's harmless.
129+ target_link_libraries (infini_train PUBLIC nccl )
130+ endif ()
79131endif ()
80132
133+ # ------------------------------------------------------------------------------
134+ # Helper: link libraries in a group to fix static lib one-pass resolution
135+ # (THIS is what fixes "undefined reference" from cuda_kernels -> core symbols)
136+ # ------------------------------------------------------------------------------
137+ function (link_infini_train_exe target_name )
138+ if (USE_CUDA)
139+ target_link_libraries (${target_name} PRIVATE
140+ "-Wl,--start-group"
141+ "-Wl,--whole-archive"
142+ infini_train
143+ infini_train_cpu_kernels
144+ infini_train_cuda_kernels
145+ "-Wl,--no-whole-archive"
146+ "-Wl,--end-group"
147+ )
148+ else ()
149+ target_link_libraries (${target_name} PRIVATE
150+ "-Wl,--start-group"
151+ "-Wl,--whole-archive"
152+ infini_train
153+ infini_train_cpu_kernels
154+ "-Wl,--no-whole-archive"
155+ "-Wl,--end-group"
156+ )
157+ endif ()
158+ endfunction ()
159+
160+
161+ # ------------------------------------------------------------------------------
81162# Examples
82- add_executable (mnist example/mnist/main.cc example/mnist/dataset.cc example/mnist/net.cc )
83- target_link_libraries (mnist infini_train )
163+ # ------------------------------------------------------------------------------
84164
85- add_executable (gpt2 example/gpt2/main.cc example/common/tiny_shakespeare_dataset.cc example/common/utils.cc example/gpt2/net.cc example/common/tokenizer.cc )
86- target_link_libraries (gpt2 infini_train )
165+ add_executable (mnist
166+ example/mnist/main.cc
167+ example/mnist/dataset.cc
168+ example/mnist/net.cc
169+ )
170+ link_infini_train_exe (mnist )
171+
172+ add_executable (gpt2
173+ example/gpt2/main.cc
174+ example/common/tiny_shakespeare_dataset.cc
175+ example/common/utils.cc
176+ example/gpt2/net.cc
177+ example/common/tokenizer.cc
178+ )
179+ link_infini_train_exe (gpt2 )
180+
181+ add_executable (llama3
182+ example/llama3/main.cc
183+ example/common/tiny_shakespeare_dataset.cc
184+ example/common/utils.cc
185+ example/llama3/net.cc
186+ example/common/tokenizer.cc
187+ )
188+ link_infini_train_exe (llama3 )
87189
88- add_executable (llama3 example/llama3/main.cc example/common/tiny_shakespeare_dataset.cc example/common/utils.cc example/llama3/net.cc example/common/tokenizer.cc )
89- target_link_libraries (llama3 infini_train )
190+ # Tools
191+ add_subdirectory (tools/infini_run )
192+ set_target_properties (infini_run PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR} )
90193
194+ # Tests
91195add_executable (test_hook test /hook/test_hook.cc )
92196target_link_libraries (test_hook infini_train )
93197
94198add_executable (test_precision_check test /hook/test_precision_check.cc )
95199target_link_libraries (test_precision_check infini_train )
96-
97- add_subdirectory (tools/infini_run )
98-
99- set_target_properties (infini_run PROPERTIES
100- RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}
101- )
102-
0 commit comments