Skip to content

Commit fcbc262

Browse files
committed
Merge 'origin/master' into hipblas
2 parents c73def1 + f4cef87 commit fcbc262

29 files changed

+1282
-469
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ models/*
3232
/vdot
3333
/Pipfile
3434

35+
build-info.h
3536
arm_neon.h
3637
compile_commands.json
3738

CMakeLists.txt

+49-1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,41 @@ option(LLAMA_HIPBLAS "llama: use hipBLAS"
7373
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
7474
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
7575

76+
#
77+
# Build info header
78+
#
79+
80+
# Write header template to binary dir to keep source directory clean
81+
file(WRITE "${CMAKE_BINARY_DIR}/BUILD_INFO.h.in" "\
82+
#ifndef BUILD_INFO_H\n\
83+
#define BUILD_INFO_H\n\
84+
\n\
85+
#define BUILD_NUMBER @BUILD_NUMBER@\n\
86+
#define BUILD_COMMIT \"@BUILD_COMMIT@\"\n\
87+
\n\
88+
#endif // BUILD_INFO_H\n\
89+
")
90+
91+
# Generate initial build-info.h
92+
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
93+
94+
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
95+
# Add a custom target for build-info.h
96+
add_custom_target(BUILD_INFO ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.h")
97+
98+
# Add a custom command to rebuild build-info.h when .git/index changes
99+
add_custom_command(
100+
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build-info.h"
101+
COMMENT "Generating build details from Git"
102+
COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake"
103+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
104+
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/.git/index"
105+
VERBATIM
106+
)
107+
else()
108+
message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.")
109+
endif()
110+
76111
#
77112
# Compile flags
78113
#
@@ -288,9 +323,22 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES
288323
# TODO: arm msvc?
289324
else()
290325
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
326+
# Apple M1, M2, etc.
327+
# Raspberry Pi 3, 4, Zero 2 (64-bit)
291328
add_compile_options(-mcpu=native)
292329
endif()
293-
# TODO: armv6,7,8 version specific flags
330+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
331+
# Raspberry Pi 1, Zero
332+
add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access)
333+
endif()
334+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
335+
# Raspberry Pi 2
336+
add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations)
337+
endif()
338+
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8")
339+
# Raspberry Pi 3, 4, Zero 2 (32-bit)
340+
add_compile_options(-mfp16-format=ieee -mno-unaligned-access)
341+
endif()
294342
endif()
295343
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
296344
message(STATUS "x86 detected")

Makefile

+37-20
Original file line numberDiff line numberDiff line change
@@ -148,19 +148,21 @@ ifdef LLAMA_PERF
148148
CXXFLAGS += -DGGML_PERF
149149
endif
150150
ifneq ($(filter aarch64%,$(UNAME_M)),)
151+
# Apple M1, M2, etc.
152+
# Raspberry Pi 3, 4, Zero 2 (64-bit)
151153
CFLAGS += -mcpu=native
152154
CXXFLAGS += -mcpu=native
153155
endif
154156
ifneq ($(filter armv6%,$(UNAME_M)),)
155-
# Raspberry Pi 1, 2, 3
157+
# Raspberry Pi 1, Zero
156158
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
157159
endif
158160
ifneq ($(filter armv7%,$(UNAME_M)),)
159-
# Raspberry Pi 4
161+
# Raspberry Pi 2
160162
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
161163
endif
162164
ifneq ($(filter armv8%,$(UNAME_M)),)
163-
# Raspberry Pi 4
165+
# Raspberry Pi 3, 4, Zero 2 (32-bit)
164166
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
165167
endif
166168

@@ -192,41 +194,56 @@ llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h
192194
common.o: examples/common.cpp examples/common.h
193195
$(CXX) $(CXXFLAGS) -c $< -o $@
194196

197+
libllama.so: llama.o ggml.o $(OBJS)
198+
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
199+
195200
clean:
196-
rm -vf *.o main quantize quantize-stats perplexity embedding benchmark-matmult
201+
rm -vf *.o main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state build-info.h
197202

198-
main: examples/main/main.cpp ggml.o llama.o common.o $(OBJS)
199-
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
203+
#
204+
# Examples
205+
#
206+
207+
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS)
208+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
200209
@echo
201210
@echo '==== Run ./main -h for help. ===='
202211
@echo
203212

204-
quantize: examples/quantize/quantize.cpp ggml.o llama.o $(OBJS)
205-
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
213+
quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o $(OBJS)
214+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
206215

207-
quantize-stats: examples/quantize-stats/quantize-stats.cpp ggml.o llama.o $(OBJS)
208-
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
216+
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o llama.o $(OBJS)
217+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
209218

210-
perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o $(OBJS)
211-
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
219+
perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o llama.o common.o $(OBJS)
220+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
212221

213-
embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o $(OBJS)
214-
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
222+
embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o common.o $(OBJS)
223+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
215224

216-
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
217-
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
225+
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS)
226+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
218227

219-
libllama.so: llama.o ggml.o $(OBJS)
220-
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
228+
build-info.h: $(wildcard .git/index) scripts/build-info.sh
229+
@scripts/build-info.sh > $@.tmp
230+
@if ! cmp -s $@.tmp $@; then \
231+
mv $@.tmp $@; \
232+
else \
233+
rm $@.tmp; \
234+
fi
221235

222236
#
223237
# Tests
224238
#
225239

226-
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp ggml.o $(OBJS)
227-
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
240+
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS)
241+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
228242
./$@
229243

244+
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
245+
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
246+
230247
.PHONY: tests
231248
tests:
232249
bash ./tests/run-tests.sh

examples/benchmark/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@ set(TARGET benchmark)
22
add_executable(${TARGET} benchmark-matmult.cpp)
33
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
44
target_compile_features(${TARGET} PRIVATE cxx_std_11)
5+
if(TARGET BUILD_INFO)
6+
add_dependencies(${TARGET} BUILD_INFO)
7+
endif()

examples/benchmark/benchmark-matmult.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include <locale.h>
22
#include "ggml.h"
3+
#include "build-info.h"
34
#include <assert.h>
45
#include <math.h>
56
#include <cstring>
@@ -90,9 +91,10 @@ int main(int argc, char ** argv) {
9091
}
9192
}
9293

93-
// create the ggml context
94+
fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
9495
printf("Starting Test\n");
9596

97+
// create the ggml context
9698
struct ggml_context * ctx;
9799
//const int sizex = 4096;
98100
//const int sizey = 11008;

examples/common.cpp

+39-10
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
#include "common.h"
22

33
#include <cassert>
4+
#include <iostream>
45
#include <cstring>
56
#include <fstream>
67
#include <string>
78
#include <iterator>
89
#include <algorithm>
910
#include <sstream>
10-
#include <iostream>
11+
12+
#if defined(__APPLE__) && defined(__MACH__)
13+
#include <sys/types.h>
14+
#include <sys/sysctl.h>
15+
#endif
1116

1217
#if defined (_WIN32)
1318
#include <fcntl.h>
@@ -25,19 +30,43 @@ extern "C" __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int
2530
#define CP_UTF8 65001
2631
#endif
2732

28-
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
29-
// determine sensible default number of threads.
30-
// std::thread::hardware_concurrency may not be equal to the number of cores, or may return 0.
33+
int32_t get_num_physical_cores() {
3134
#ifdef __linux__
3235
std::ifstream cpuinfo("/proc/cpuinfo");
33-
params.n_threads = std::count(std::istream_iterator<std::string>(cpuinfo),
34-
std::istream_iterator<std::string>(),
35-
std::string("processor"));
36-
#endif
37-
if (params.n_threads == 0) {
38-
params.n_threads = std::max(1, (int32_t) std::thread::hardware_concurrency());
36+
std::string line;
37+
while (std::getline(cpuinfo, line)) {
38+
std::size_t pos = line.find("cpu cores");
39+
if (pos != std::string::npos) {
40+
pos = line.find(": ", pos);
41+
if (pos != std::string::npos) {
42+
try {
43+
// Extract the number and return it
44+
return static_cast<int32_t>(std::stoul(line.substr(pos + 2)));
45+
} catch (const std::invalid_argument &) {
46+
// Ignore if we could not parse
47+
}
48+
}
49+
}
50+
}
51+
#elif defined(__APPLE__) && defined(__MACH__)
52+
int32_t num_physical_cores;
53+
size_t len = sizeof(num_physical_cores);
54+
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
55+
if (result == 0) {
56+
return num_physical_cores;
57+
}
58+
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
59+
if (result == 0) {
60+
return num_physical_cores;
3961
}
62+
#elif defined(_WIN32)
63+
//TODO: Implement
64+
#endif
65+
unsigned int n_threads = std::thread::hardware_concurrency();
66+
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
67+
}
4068

69+
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
4170
bool invalid_param = false;
4271
std::string arg;
4372
gpt_params default_params;

examples/common.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@
1313
//
1414
// CLI argument parsing
1515
//
16+
int32_t get_num_physical_cores();
1617

1718
struct gpt_params {
1819
int32_t seed = -1; // RNG seed
19-
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
20-
int32_t n_predict = -1; // new tokens to predict
20+
int32_t n_threads = get_num_physical_cores();
21+
int32_t n_predict = -1; // new tokens to predict
2122
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
2223
int32_t n_ctx = 512; // context size
2324
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)

examples/embedding/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@ set(TARGET embedding)
22
add_executable(${TARGET} embedding.cpp)
33
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
44
target_compile_features(${TARGET} PRIVATE cxx_std_11)
5+
if(TARGET BUILD_INFO)
6+
add_dependencies(${TARGET} BUILD_INFO)
7+
endif()

examples/embedding/embedding.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "common.h"
22
#include "llama.h"
3+
#include "build-info.h"
34

45
#include <ctime>
56

@@ -18,11 +19,13 @@ int main(int argc, char ** argv) {
1819
"expect poor results\n", __func__, params.n_ctx);
1920
}
2021

22+
fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
23+
2124
if (params.seed <= 0) {
2225
params.seed = time(NULL);
2326
}
2427

25-
fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
28+
fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
2629

2730
std::mt19937 rng(params.seed);
2831
if (params.random_prompt) {

examples/main/CMakeLists.txt

+3
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@ set(TARGET main)
22
add_executable(${TARGET} main.cpp)
33
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
44
target_compile_features(${TARGET} PRIVATE cxx_std_11)
5+
if(TARGET BUILD_INFO)
6+
add_dependencies(${TARGET} BUILD_INFO)
7+
endif()

0 commit comments

Comments
 (0)