reorganize: consolidate demo directories

- Consolidate _demo, _pydemo, _embdemo into single _demo directory structure
- Organize demos by language: _demo/{go,py,c,embed}/
- Categorize demos based on imports:
- Python library demos (py imports) → _demo/py/
- C/C++ library demos (c/cpp imports) → _demo/c/
- Go-specific demos → _demo/go/
- Embedded demos → _demo/embed/
- Move C-related demos (asm*, cabi*, cgo*, linkname, targetsbuild) from go/ to c/
- Update all path references in README.md and GitHub workflows
- Improve demo organization and navigation as requested in #1256

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Li Jie
2025-09-09 15:06:55 +08:00
parent 849b23079b
commit 64df39b3c5
96 changed files with 61 additions and 52 deletions

View File

@@ -0,0 +1,18 @@
package main
import (
"fmt"
"time"
)
//llgo:link asm llgo.asm
func asm(instruction string) {}
func main() {
start := time.Now()
for i := 0; i < 100000; i++ {
asm("nop")
}
duration := time.Since(start)
fmt.Println("Duration:", duration)
}

View File

@@ -0,0 +1,21 @@
package main
import (
"fmt"
)
//llgo:link asmFull llgo.asm
func asmFull(instruction string, regs map[string]any) uintptr { return 0 }
var testVar = 0
func main() {
verify()
}
func check(expected, actual int) {
if expected != actual {
panic(fmt.Sprintf("Expected: %d, Got: %d\n", expected, actual))
}
fmt.Println("asm check passed:", actual)
}

View File

@@ -0,0 +1,31 @@
//go:build darwin && arm64
package main
import "unsafe"
func verify() {
// 0 output & 0 input
asmFull("nop", nil)
// 0 output & 1 input with memory address
addr := uintptr(unsafe.Pointer(&testVar))
asmFull("str {value}, [{addr}]", map[string]any{
"addr": addr,
"value": 43,
})
check(43, testVar)
// 1 output & 1 input
res1 := asmFull("mov {}, {value}", map[string]any{
"value": 41,
})
check(41, int(res1))
// 1 output & 2 inputs
res2 := asmFull("add {}, {a}, {b}", map[string]any{
"a": 25,
"b": 17,
})
check(42, int(res2))
}

View File

@@ -0,0 +1,30 @@
//go:build linux && amd64
package main
import "unsafe"
func verify() {
// 0 output & 0 input
asmFull("nop", nil)
// 0 output & 1 input with memory address
addr := uintptr(unsafe.Pointer(&testVar))
asmFull("movq {value}, ({addr})", map[string]any{
"addr": addr,
"value": 43,
})
check(43, testVar)
// 1 output & 1 input
res1 := asmFull("movq {value}, {}", map[string]any{
"value": 41,
})
check(41, int(res1))
res2 := asmFull("leaq ({a},{b}), {}", map[string]any{
"a": 25,
"b": 17,
})
check(42, int(res2))
}

395
_demo/c/cabi/main.go Normal file
View File

@@ -0,0 +1,395 @@
package main
import (
_ "unsafe"
)
const (
LLGoFiles = "wrap/wrap.c"
)
type point struct {
x int32
y int32
}
//go:linkname pt C.pt
func pt(pt point) point
type point1 struct {
x int32
y int32
z int32
}
//go:linkname pt1 C.pt1
func pt1(pt point1) point1
type point2 struct {
x int8
y int32
z int32
}
//go:linkname pt2 C.pt2
func pt2(pt point2) point2
type point3 struct {
x int8
y int8
z int8
}
//go:linkname pt3 C.pt3
func pt3(pt point3) point3
type point4 struct {
x int8
y int8
z int8
m int32
}
//go:linkname pt4 C.pt4
func pt4(pt point4) point4
type point5 struct {
x int8
y int8
z int8
m int8
n int8
}
//go:linkname pt5 C.pt5
func pt5(pt point5) point5
type point6 struct {
x int8
y int8
z int8
m int8
n int8
k int32
}
//go:linkname pt6 C.pt6
func pt6(pt point6) point6
type point7 struct {
x int8
y int8
z int8
m int8
n int8
k int32
o int8
}
//go:linkname pt7 C.pt7
func pt7(pt point7) point7
type data1 struct {
x int8
y int64
}
//go:linkname fn1 C.fn1
func fn1(data1) data1
type data2 struct {
x int32
y int64
}
//go:linkname fn2 C.fn2
func fn2(data2) data2
type data3 struct {
x int64
y int8
}
//go:linkname fn3 C.fn3
func fn3(data3) data3
type fdata1 struct {
x float32
}
//go:linkname ff1 C.ff1
func ff1(fdata1) fdata1
type fdata2 struct {
x float32
y float32
}
//go:linkname ff2 C.ff2
func ff2(fdata2) fdata2
type fdata2i struct {
x float32
y int32
}
//go:linkname ff2i C.ff2i
func ff2i(fdata2i) fdata2i
type fdata3 struct {
x float32
y float32
z float32
}
//go:linkname ff3 C.ff3
func ff3(fdata3) fdata3
type fdata4 struct {
x float32
y float32
z float32
m float32
}
//go:linkname ff4 C.ff4
func ff4(fdata4) fdata4
type fdata5 struct {
x float32
y float32
z float32
m float32
n float32
}
//go:linkname ff5 C.ff5
func ff5(fdata5) fdata5
type fdata2id struct {
x int8
y int8
z float64
}
//go:linkname ff2id C.ff2id
func ff2id(fdata2id) fdata2id
type fdata7if struct {
x [7]int8
y float32
}
//go:linkname ff7if C.ff7if
func ff7if(fdata7if) fdata7if
type fdata4if struct {
x float32
y int8
z float32
m float32
}
//go:linkname ff4if C.ff4if
func ff4if(fdata4if) fdata4if
type array struct {
x [8]int32
}
//go:linkname demo64 C.demo64
func demo64(n int64) int64
//go:linkname demo32 C.demo32
func demo32(n int32) int32
type struct32 struct {
v int32
}
//go:linkname demo32s C.demo32s
func demo32s(v struct32) struct32
type point64 struct {
x int64
y int64
}
//go:linkname pt64 C.pt64
func pt64(pt point64) point64
//go:linkname demo C.demo
func demo(a array) array
//go:linkname demo2 C.demo2
func demo2(x int32) array
type ddata1 struct {
x float64
}
//go:linkname dd1 C.dd1
func dd1(d ddata1) ddata1
type ddata2 struct {
x float64
y float64
}
//go:linkname dd2 C.dd2
func dd2(d ddata2) ddata2
type ddata3 struct {
x float64
y float64
z float64
}
//go:linkname dd3 C.dd3
func dd3(d ddata3) ddata3
//llgo:type C
type Callback func(array, point, point1) array
//go:linkname callback C.callback
func callback(fn Callback, ar array)
//llgo:type C
type Callback1 func(array, point, point1) point
//go:linkname callback1 C.callback1
func callback1(fn Callback1, ar array)
//go:linkname mycallback C.mycallback
func mycallback(ar array, pt point, pt1 point1) point
func myfn1(ar array, pt point, pt1 point1) point {
println("=>", ar.x[0], ar.x[1], ar.x[7], pt.x, pt.y, pt1.x, pt1.y, pt1.z)
return point{100, 200}
}
//export export_demo
func export_demo(ar array) array {
println("=> export", ar.x[0], ar.x[1], ar.x[7])
return ar
}
func main() {
cabi_demo()
callback_demo()
}
func callback_demo() {
export_demo(array{x: [8]int32{1, 2, 3, 4, 5, 6, 7, 8}})
callback(func(ar array, pt point, pt1 point1) array {
println("=> callback", ar.x[0], ar.x[1], ar.x[7], pt.x, pt.y, pt1.x, pt1.y, pt1.z)
return array{x: [8]int32{8, 7, 6, 5, 4, 3, 2, 1}}
}, array{x: [8]int32{1, 2, 3, 4, 5, 6, 7, 8}})
callback1(func(ar array, pt point, pt1 point1) point {
println("=> callback1", ar.x[0], ar.x[1], ar.x[7], pt.x, pt.y, pt1.x, pt1.y, pt1.z)
return point{100, 200}
}, array{x: [8]int32{1, 2, 3, 4, 5, 6, 7, 8}})
ret := mycallback(array{x: [8]int32{1, 2, 3, 4, 5, 6, 7, 8}}, point{1, 2}, point1{1, 2, 3})
println("=> mycallback", ret.x, ret.y)
callback1(myfn1, array{x: [8]int32{1, 2, 3, 4, 5, 6, 7, 8}})
callback1(myfn1, array{x: [8]int32{8, 7, 6, 5, 4, 3, 2, 1}})
callback1(mycallback, array{x: [8]int32{10, 20, 30, 40, 50, 60, 70, 80}})
}
func cabi_demo() {
i32 := demo32(1024)
println("=> demo32", i32)
s32 := demo32s(struct32{100})
println("=> demo32s", s32.v)
i64 := demo64(1024)
println("=> demo64", i64)
p64 := pt64(point64{1024, -1024})
println("=> pt64", p64.x, p64.y)
r := demo(array{x: [8]int32{1, 2, 3, 4, 5, 6, 7, 8}})
println("=> demo", r.x[0], r.x[1])
r2 := demo2(100)
println("=> demo2", r2.x[0], r2.x[1], r2.x[7])
p0 := pt(point{1, 2})
println("=> pt0", p0.x, p0.y)
p1 := pt1(point1{1, 2, 3})
println("=> pt1", p1.x, p1.y, p1.z)
p2 := pt2(point2{1, 2, 3})
println("=> pt2", p2.x, p2.y, p2.z)
p3 := pt3(point3{1, 2, 3})
println("=> pt3", p3.x, p3.y, p3.z)
p4 := pt4(point4{1, 2, 3, 4})
println("=> pt4", p4.x, p4.y, p4.z, p4.m)
p5 := pt5(point5{1, 2, 3, 4, 5})
println("=> pt5", p5.x, p5.y, p5.z, p5.m, p5.n)
p6 := pt6(point6{1, 2, 3, 4, 5, 6})
println("=> pt6", p6.x, p6.y, p6.z, p6.m, p6.n, p6.k)
p7 := pt7(point7{1, 2, 3, 4, 5, 6, 7})
println("=> pt7", p7.x, p7.y, p7.z, p7.m, p7.n, p7.k, p7.o)
// skip wrap
fd1 := fn1(data1{1, 2})
println("=> fd1", fd1.x, fd1.y)
fd2 := fn2(data2{1, 2})
println("=> fd2", fd2.x, fd2.y)
fd3 := fn3(data3{1, 2})
println("=> fd3", fd3.x, fd3.y)
// float
f1 := ff1(fdata1{1.1})
println("=> f1", f1.x)
// float
f2 := ff2(fdata2{1.1, 2.1})
println("=> f2", f2.x, f2.y)
// float
f2i := ff2i(fdata2i{1.1, 2})
println("=> f2i", f2i.x, f2i.y)
// float
f3 := ff3(fdata3{1.1, 2.1, 3.1})
println("=> f3", f3.x, f3.y, f3.z)
// float
f4 := ff4(fdata4{1.1, 2.1, 3.1, 4.1})
println("=> f4", f4.x, f4.y, f4.z, f4.m)
// float
f5 := ff5(fdata5{1.1, 2.1, 3.1, 4.1, 5.1})
println("=> f5", f5.x, f5.y, f5.z, f5.m, f5.n)
f2id := ff2id(fdata2id{1, 2, 3.1})
println("=> f2id", f2id.x, f2id.y, f2id.z)
f7if := ff7if(fdata7if{[7]int8{1, 2, 3, 4, 5, 6, 7}, 3.1})
println("=> f7if", f7if.x[0], f7if.x[1], f7if.y)
f4if := ff4if(fdata4if{1.1, 2, 3.1, 4.1})
println("=> f4if", f4if.x, f4if.y, f4if.z, f4if.m)
d1 := dd1(ddata1{1.1})
println("=> dd1", d1.x)
d2 := dd2(ddata2{1.1, 2.1})
println("=> dd2", d2.x, d2.y)
d3 := dd3(ddata3{1.1, 2.1, 3.1})
println("=> dd3", d3.x, d3.y, d3.z)
}

325
_demo/c/cabi/wrap/wrap.c Normal file
View File

@@ -0,0 +1,325 @@
extern int printf(const char *format, ...);
int demo32(int v) {
return v+100;
}
long long demo64(long long v) {
return v+100;
}
struct struct32 {
int v;
};
struct point64 {
long long x;
long long y;
};
struct point64 pt64(struct point64 pt) {
printf("point64: %lld %lld\n",pt.x,pt.y);
return pt;
}
struct struct32 demo32s(struct struct32 v) {
printf("struct32: %d\n",v.v);
struct struct32 v2 = {v.v+100};
return v2;
}
struct point {
int x;
int y;
};
struct point pt(struct point pt) {
printf("point: %d %d\n",pt.x,pt.y);
return pt;
}
struct point1 {
int x;
int y;
int z;
};
struct point1 pt1(struct point1 pt) {
printf("point1: %d %d %d\n",pt.x,pt.y,pt.z);
return pt;
}
struct point2 {
char x;
int y;
int z;
};
struct point2 pt2(struct point2 pt) {
printf("point2: %d %d %d\n",pt.x,pt.y,pt.z);
return pt;
}
struct point3 {
char x;
char y;
char z;
};
struct point3 pt3(struct point3 pt) {
printf("point3: %d %d %d\n",pt.x,pt.y,pt.z);
return pt;
}
struct point4 {
char x;
char y;
char z;
int m;
};
struct point4 pt4(struct point4 pt) {
printf("point4: %d %d %d %d\n",pt.x,pt.y,pt.z,pt.m);
return pt;
}
struct point5 {
char x;
char y;
char z;
char m;
char n;
};
struct point5 pt5(struct point5 pt) {
printf("point5: %d %d %d %d %d\n",pt.x,pt.y,pt.z,pt.m,pt.n);
return pt;
}
struct point6 {
char x;
char y;
char z;
char m;
char n;
int k;
};
struct point6 pt6(struct point6 pt) {
printf("point6: %d %d %d %d %d %d\n",pt.x,pt.y,pt.z,pt.m,pt.n,pt.k);
return pt;
}
struct point7 {
char x;
char y;
char z;
char m;
char n;
int k;
char o;
};
struct point7 pt7(struct point7 pt) {
printf("point7: %d %d %d %d %d %d %d\n",pt.x,pt.y,pt.z,pt.m,pt.n,pt.k,pt.o);
return pt;
}
struct data1 {
char x;
long long y;
};
struct data1 fn1(struct data1 pt) {
printf("data1: %d %lld\n",pt.x,pt.y);
return pt;
}
struct data2 {
int x;
long long y;
};
struct data2 fn2(struct data2 pt) {
printf("data2: %d %lld\n",pt.x,pt.y);
return pt;
}
struct data3 {
long long x;
char y;
};
struct data3 fn3(struct data3 pt) {
printf("data3: %lld %d\n",pt.x,pt.y);
return pt;
}
struct fdata1 {
float x;
};
struct fdata1 ff1(struct fdata1 pt) {
printf("ff1: %f\n",pt.x);
return pt;
}
struct ddata1 {
double x;
};
struct ddata1 dd1(struct ddata1 pt) {
printf("dd1: %f\n",pt.x);
return pt;
}
struct ddata2 {
double x;
double y;
};
struct ddata2 dd2(struct ddata2 pt) {
printf("dd2: %f %f\n",pt.x,pt.y);
return pt;
}
struct ddata3 {
double x;
double y;
double z;
};
struct ddata3 dd3(struct ddata3 pt) {
printf("dd3: %f %f %f\n",pt.x,pt.y,pt.z);
return pt;
}
struct fdata2i {
float x;
int y;
};
struct fdata2i ff2i(struct fdata2i pt) {
printf("ff2i: %f %d\n",pt.x,pt.y);
return pt;
}
struct fdata2 {
float x;
float y;
};
struct fdata2 ff2(struct fdata2 pt) {
printf("ff2: %f %f\n",pt.x,pt.y);
return pt;
}
struct fdata3 {
float x;
float y;
float z;
};
struct fdata3 ff3(struct fdata3 pt) {
printf("ff3: %f %f %f\n",pt.x,pt.y,pt.z);
return pt;
}
struct fdata4 {
float x;
float y;
float z;
float m;
};
struct fdata4 ff4(struct fdata4 pt) {
printf("ff4: %f %f %f %f\n",pt.x,pt.y,pt.z,pt.m);
return pt;
}
struct fdata5 {
float x;
float y;
float z;
float m;
float n;
};
struct fdata5 ff5(struct fdata5 pt) {
printf("ff5: %f %f %f %f %f\n",pt.x,pt.y,pt.z,pt.m,pt.n);
return pt;
}
struct fdata2id {
char x;
char y;
double z;
};
struct fdata2id ff2id(struct fdata2id pt) {
printf("ff6: %d %d %f\n",pt.x,pt.y,pt.z);
return pt;
}
struct fdata7if {
char x[7];
float z;
};
struct fdata7if ff7if(struct fdata7if pt) {
printf("ff7if: %d %d %f\n",pt.x[0],pt.x[1],pt.z);
return pt;
}
struct fdata4if {
float x;
char y;
float z;
float m;
};
struct fdata4if ff4if(struct fdata4if pt) {
printf("ff4if: %f %d %f %f\n",pt.x,pt.y,pt.z,pt.m);
return pt;
}
struct array {
int x[8];
};
struct array demo(struct array a) {
printf("demo: %d %d %d\n",a.x[0],a.x[1],a.x[2]);
return a;
}
struct array demo2(int a1){
struct array x;
for (int i = 0; i < 8; i++) {
x.x[i] = i+a1;
}
return x;
}
void callback(struct array (*fn)(struct array ar, struct point pt, struct point1 pt1), struct array ar) {
demo(ar);
struct point pt = {1,2};
struct point1 pt1 = {1,2,3};
struct array ret = fn(ar,pt,pt1);
demo(ret);
}
void callback1(struct point (*fn)(struct array ar, struct point pt, struct point1 pt1), struct array ar) {
printf("callback1 array: %d %d %d\n",ar.x[0],ar.x[1],ar.x[7]);
struct point pt = {1,2};
struct point1 pt1 = {1,2,3};
struct point ret = fn(ar,pt,pt1);
printf("callback1 ret: %d,%d\n",ret.x,ret.y);
}
struct point mycallback(struct array ar, struct point pt, struct point1 pt1) {
printf("mycallback array: %d %d %d\n",ar.x[0],ar.x[1],ar.x[7]);
printf("mycallback pt: %d %d\n",pt.x,pt.y);
printf("mycallback pt1: %d %d %d\n",pt1.x,pt1.y,pt1.z);
struct point ret = {pt.x+pt1.x, pt.y+pt1.y};
return ret;
}

39
_demo/c/cabisret/main.go Normal file
View File

@@ -0,0 +1,39 @@
package main
type array9 struct {
x [9]float32
}
func demo1(a array9) array9 {
a.x[0] += 1
return a
}
func demo2(a array9) array9 {
for i := 0; i < 1024*128; i++ {
a = demo1(a)
}
return a
}
func testDemo() {
ar := array9{x: [9]float32{1, 2, 3, 4, 5, 6, 7, 8, 9}}
for i := 0; i < 1024*128; i++ {
ar = demo1(ar)
}
ar = demo2(ar)
println(ar.x[0], ar.x[1])
}
func testSlice() {
var b []byte
for i := 0; i < 1024*128; i++ {
b = append(b, byte(i))
}
_ = b
}
func main() {
testDemo()
testSlice()
}

144
_demo/c/cargs/demo.go Normal file
View File

@@ -0,0 +1,144 @@
package main
import (
"fmt"
"os"
_ "unsafe"
"github.com/goplus/lib/c"
)
const LLGoPackage string = "link: $(pkg-config --libs cargs);"
type Option struct {
Identifier c.Char
AccessLetters *c.Char
AccessName *c.Char
ValueName *c.Char
Description *c.Char
}
type OptionContext struct {
Options *Option
OptionCount c.SizeT
Argc c.Int
Argv **c.Char
Index c.Int
InnerIndex c.Int
ErrorIndex c.Int
ErrorLetter c.Char
ForcedEnd bool
Identifier c.Char
Value *c.Char
}
// llgo:type C
type Printer func(__llgo_arg_0 c.Pointer, __llgo_arg_1 *c.Char, __llgo_va_list ...interface{}) c.Int
// llgo:link (*OptionContext).OptionInit C.cag_option_init
func (recv_ *OptionContext) OptionInit(options *Option, option_count c.SizeT, argc c.Int, argv **c.Char) {
}
// llgo:link (*OptionContext).OptionFetch C.cag_option_fetch
func (recv_ *OptionContext) OptionFetch() bool {
return false
}
// llgo:link (*OptionContext).OptionGetIdentifier C.cag_option_get_identifier
func (recv_ *OptionContext) OptionGetIdentifier() c.Char {
return 0
}
// llgo:link (*OptionContext).OptionGetValue C.cag_option_get_value
func (recv_ *OptionContext) OptionGetValue() *c.Char {
return nil
}
// llgo:link (*OptionContext).OptionGetIndex C.cag_option_get_index
func (recv_ *OptionContext) OptionGetIndex() c.Int {
return 0
}
// llgo:link (*OptionContext).OptionGetErrorIndex C.cag_option_get_error_index
func (recv_ *OptionContext) OptionGetErrorIndex() c.Int {
return 0
}
// llgo:link (*OptionContext).OptionGetErrorLetter C.cag_option_get_error_letter
func (recv_ *OptionContext) OptionGetErrorLetter() c.Char {
return 0
}
// llgo:link (*OptionContext).OptionPrintError C.cag_option_print_error
func (recv_ *OptionContext) OptionPrintError(destination *c.FILE) {
}
// llgo:link (*OptionContext).OptionPrinterError C.cag_option_printer_error
func (recv_ *OptionContext) OptionPrinterError(printer Printer, printer_ctx c.Pointer) {
}
// llgo:link (*Option).OptionPrint C.cag_option_print
func (recv_ *Option) OptionPrint(option_count c.SizeT, destination *c.FILE) {
}
// llgo:link (*Option).OptionPrinter C.cag_option_printer
func (recv_ *Option) OptionPrinter(option_count c.SizeT, printer Printer, printer_ctx c.Pointer) {
}
// llgo:link (*OptionContext).OptionPrepare C.cag_option_prepare
func (recv_ *OptionContext) OptionPrepare(options *Option, option_count c.SizeT, argc c.Int, argv **c.Char) {
}
// llgo:link (*OptionContext).OptionGet C.cag_option_get
func (recv_ *OptionContext) OptionGet() c.Char {
return 0
}
func main() {
options := []Option{
{
Identifier: 'h',
AccessLetters: c.Str("h"),
AccessName: c.Str("help"),
ValueName: nil,
Description: c.Str("Show help information"),
},
{
Identifier: 'v',
AccessLetters: c.Str("v"),
AccessName: c.Str("version"),
ValueName: nil,
Description: c.Str("Show version information"),
},
}
args := os.Args
// Convert Go string array to C-style argv
argv := make([]*int8, len(args))
for i, arg := range args {
argv[i] = c.AllocaCStr(arg)
}
// Initialize option context
var context OptionContext
context.OptionInit(&options[0], uintptr(len(options)), c.Int(len(args)), &argv[0])
// Process all options
identifierFound := false
for context.OptionFetch() {
identifierFound = true
identifier := context.OptionGetIdentifier()
switch identifier {
case 'h':
fmt.Println("Help: This is a simple command-line parser demo")
case 'v':
fmt.Println("Version: 1.0.0")
}
}
// Default output if no identifier is found
if !identifierFound {
fmt.Println("Demo Command-line Tool\nIdentifier:\n\t-h: Help\n\t-v: Version")
}
}

24
_demo/c/catomic/atomic.go Normal file
View File

@@ -0,0 +1,24 @@
package main
import (
"github.com/goplus/lib/c/sync/atomic"
)
func main() {
var v int64
atomic.Store(&v, 100)
println("store:", atomic.Load(&v))
ret := atomic.Add(&v, 1)
println("ret:", ret, "v:", v)
ret, _ = atomic.CompareAndExchange(&v, 100, 102)
println("ret:", ret, "vs 100, v:", v)
ret, _ = atomic.CompareAndExchange(&v, 101, 102)
println("ret:", ret, "vs 101, v:", v)
ret = atomic.Sub(&v, 1)
println("ret:", ret, "v:", v)
}

11
_demo/c/cexec/exec.go Normal file
View File

@@ -0,0 +1,11 @@
package main
import (
"github.com/goplus/lib/c"
"github.com/goplus/lib/c/os"
)
func main() {
ls := c.Str("ls")
os.Execlp(ls, ls, c.Str("-l"), nil)
}

16
_demo/c/cgofull/bar.go Normal file
View File

@@ -0,0 +1,16 @@
package main
/*
#cgo CFLAGS: -DBAR
#include <stdio.h>
#include "foo.h"
static void foo(Foo* f) {
printf("foo in bar: %d\n", f->a);
}
*/
import "C"
func Bar(f *C.Foo) {
C.print_foo(f)
C.foo(f)
}

157
_demo/c/cgofull/cgofull.go Normal file
View File

@@ -0,0 +1,157 @@
package main
/*
#cgo windows,!amd64 CFLAGS: -D_WIN32
#cgo !windows CFLAGS: -D_POSIX
#cgo windows,amd64 CFLAGS: -D_WIN64
#cgo linux,amd64 CFLAGS: -D_LINUX64
#cgo !windows,amd64 CFLAGS: -D_UNIX64
#cgo pkg-config: python3-embed
#include <stdio.h>
#include <Python.h>
#include "foo.h"
typedef struct {
int a;
} s4;
typedef struct {
int a;
int b;
} s8;
typedef struct {
int a;
int b;
int c;
} s12;
typedef struct {
int a;
int b;
int c;
int d;
} s16;
typedef struct {
int a;
int b;
int c;
int d;
int e;
} s20;
static int test_structs(s4* s4, s8* s8, s12* s12, s16* s16, s20* s20) {
printf("s4.a: %d\n", s4->a);
printf("s8.a: %d, s8.b: %d\n", s8->a, s8->b);
printf("s12.a: %d, s12.b: %d, s12.c: %d\n", s12->a, s12->b, s12->c);
printf("s16.a: %d, s16.b: %d, s16.c: %d, s16.d: %d\n", s16->a, s16->b, s16->c, s16->d);
printf("s20.a: %d, s20.b: %d, s20.c: %d, s20.d: %d, s20.e: %d\n", s20->a, s20->b, s20->c, s20->d, s20->e);
return s4->a + s8->a + s8->b + s12->a + s12->b + s12->c + s16->a + s16->b + s16->c + s16->d + s20->a + s20->b + s20->c + s20->d + s20->e;
}
static void test_macros() {
#ifdef FOO
printf("FOO is defined\n");
#endif
#ifdef BAR
printf("BAR is defined\n");
#endif
#ifdef _WIN32
printf("WIN32 is defined\n");
#endif
#ifdef _POSIX
printf("POSIX is defined\n");
#endif
#ifdef _WIN64
printf("WIN64 is defined\n");
#endif
#ifdef _LINUX64
printf("LINUX64 is defined\n");
#endif
#ifdef _UNIX64
printf("UNIX64 is defined\n");
#endif
}
#define MY_VERSION "1.0.0"
#define MY_CODE 0x12345678
static void test_void() {
printf("test_void\n");
}
typedef int (*Cb)(int);
extern int go_callback(int);
extern int c_callback(int i);
static void test_callback(Cb cb) {
printf("test_callback, cb: %p, go_callback: %p, c_callback: %p\n", cb, go_callback, c_callback);
printf("test_callback, *cb: %p, *go_callback: %p, *c_callback: %p\n", *(void**)cb, *(void**)(go_callback), *(void**)(c_callback));
printf("cb result: %d\n", cb(123));
printf("done\n");
}
extern int go_callback_not_use_in_go(int);
static void run_callback() {
test_callback(c_callback);
test_callback(go_callback_not_use_in_go);
}
*/
import "C"
import (
"fmt"
"unsafe"
"github.com/goplus/llgo/_demo/c/cgofull/pymod1"
"github.com/goplus/llgo/_demo/c/cgofull/pymod2"
)
//export go_callback_not_use_in_go
func go_callback_not_use_in_go(i C.int) C.int {
return i + 1
}
//export go_callback
func go_callback(i C.int) C.int {
return i + 1
}
func main() {
runPy()
f := &C.Foo{a: 1}
Foo(f)
Bar(f)
C.test_macros()
r := C.test_structs(&C.s4{a: 1}, &C.s8{a: 1, b: 2}, &C.s12{a: 1, b: 2, c: 3}, &C.s16{a: 1, b: 2, c: 3, d: 4}, &C.s20{a: 1, b: 2, c: 3, d: 4, e: 5})
fmt.Println(r)
if r != 35 {
panic("test_structs failed")
}
fmt.Println(C.MY_VERSION)
fmt.Println(int(C.MY_CODE))
C.test_void()
println("call run_callback")
C.run_callback()
// test _Cgo_ptr and _cgoCheckResult
println("call with go_callback")
C.test_callback((C.Cb)(C.go_callback))
println("call with c_callback")
C.test_callback((C.Cb)(C.c_callback))
}
func runPy() {
Initialize()
defer Finalize()
Run("print('Hello, Python!')")
C.PyObject_Print((*C.PyObject)(unsafe.Pointer(pymod1.Float(1.23))), C.stderr, 0)
C.PyObject_Print((*C.PyObject)(unsafe.Pointer(pymod2.Long(123))), C.stdout, 0)
// test _Cgo_use
C.PyObject_Print((*C.PyObject)(unsafe.Pointer(C.PyComplex_FromDoubles(C.double(1.23), C.double(4.56)))), C.stdout, 0)
}

12
_demo/c/cgofull/foo.c Normal file
View File

@@ -0,0 +1,12 @@
#include <stdio.h>
#include "foo.h"
void print_foo(Foo *f)
{
printf("print_foo: %d\n", f->a);
}
int c_callback(int i)
{
return i + 1;
}

16
_demo/c/cgofull/foo.go Normal file
View File

@@ -0,0 +1,16 @@
package main
/*
#cgo CFLAGS: -DFOO
#include <stdio.h>
#include "foo.h"
static void foo(Foo* f) {
printf("foo in bar: %d\n", f->a);
}
*/
import "C"
func Foo(f *C.Foo) {
C.print_foo(f)
C.foo(f)
}

7
_demo/c/cgofull/foo.h Normal file
View File

@@ -0,0 +1,7 @@
#pragma once
typedef struct {
int a;
} Foo;
extern void print_foo(Foo* f);

24
_demo/c/cgofull/py.go Normal file
View File

@@ -0,0 +1,24 @@
package main
/*
#cgo pkg-config: python3-embed
#include <Python.h>
*/
import "C"
import "fmt"
func Initialize() {
C.Py_Initialize()
}
func Finalize() {
C.Py_Finalize()
}
func Run(code string) error {
if C.PyRun_SimpleString(C.CString(code)) != 0 {
C.PyErr_Print()
return fmt.Errorf("failed to run code")
}
return nil
}

View File

@@ -0,0 +1,11 @@
package pymod1
/*
#cgo pkg-config: python3-embed
#include <Python.h>
*/
import "C"
func Float(f float64) *C.PyObject {
return C.PyFloat_FromDouble(C.double(f))
}

View File

@@ -0,0 +1,11 @@
package pymod2
/*
#cgo pkg-config: python3-embed
#include <Python.h>
*/
import "C"
func Long(l int64) *C.PyObject {
return C.PyLong_FromLongLong(C.longlong(l))
}

21
_demo/c/concat/concat.go Normal file
View File

@@ -0,0 +1,21 @@
package main
import (
"github.com/goplus/lib/c"
)
func concat(args ...string) (ret string) {
for _, v := range args {
ret += v
}
return
}
func main() {
result := concat("Hello", " ", "World")
c.Fprintf(c.Stderr, c.Str("Hi, %s\n"), c.AllocaCStr(result))
}
/* Expected output (stderr):
Hi, Hello World
*/

View File

@@ -0,0 +1,38 @@
package main
import (
"github.com/goplus/lib/c"
"github.com/goplus/lib/c/math"
"github.com/goplus/llgo/_demo/c/cppintf/foo"
)
type Bar struct {
foo.Callback
a c.Int
}
func NewBar(a c.Int) *Bar {
return &Bar{
Callback: foo.Callback{
Vptr: &foo.CallbackVtbl{
Val: c.Func((*Bar).getA),
Calc: c.Func((*Bar).sqrt),
},
},
a: a,
}
}
func (p *Bar) getA() c.Int {
return p.a
}
func (p *Bar) sqrt(v float64) float64 {
return math.Sqrt(v)
}
func main() {
bar := NewBar(1)
foo.F(&bar.Callback)
foo.G(&bar.Callback)
}

View File

@@ -0,0 +1,15 @@
#include <stdio.h>
#define interface struct
interface ICallback {
virtual int val() = 0;
virtual double calc(double v) = 0;
};
extern "C" void f(ICallback* cb) {
printf("val: %d\ncalc(2): %lf\n", cb->val(), cb->calc(2));
}
void g(ICallback* cb) {
f(cb);
}

View File

@@ -0,0 +1,25 @@
package foo
import (
"unsafe"
)
const (
LLGoFiles = "bar/bar.cpp"
LLGoPackage = "link"
)
type Callback struct {
Vptr *CallbackVtbl
}
type CallbackVtbl struct {
Val unsafe.Pointer
Calc unsafe.Pointer
}
//go:linkname F C.f
func F(cb *Callback)
//go:linkname G C._Z1gP9ICallback
func G(cb *Callback)

View File

@@ -0,0 +1,50 @@
package main
import (
"unsafe"
"github.com/goplus/lib/c"
"github.com/goplus/lib/c/math"
"github.com/goplus/llgo/_demo/c/cppmintf/foo"
)
type Bar struct {
foo.Callback
a c.Int
}
func NewBar(a c.Int) *Bar {
return &Bar{
Callback: foo.Callback{
ICalc: foo.ICalc{
Vptr: &foo.ICalcVtbl{
Calc: c.Func((*Bar).sqrt),
},
},
IVal: foo.IVal{
Vptr: &foo.IValVtbl{
Val: c.Func(bar_IVal_getA),
},
},
},
a: a,
}
}
func (p *Bar) getA() c.Int {
return p.a
}
func bar_IVal_getA(this c.Pointer) c.Int {
const delta = -int(unsafe.Offsetof(foo.Callback{}.IVal))
return (*Bar)(c.Advance(this, delta)).getA()
}
func (p *Bar) sqrt(v float64) float64 {
return math.Sqrt(v)
}
func main() {
bar := NewBar(1)
foo.F(&bar.Callback)
}

View File

@@ -0,0 +1,17 @@
#include <stdio.h>
#define interface struct
interface ICalc {
virtual double calc(double v) = 0;
};
interface IVal {
virtual int val() = 0;
};
class Callback : public ICalc, public IVal {
};
extern "C" void f(Callback* cb) {
printf("val: %d\ncalc(2): %lf\n", cb->val(), cb->calc(2));
}

View File

@@ -0,0 +1,42 @@
package foo
import (
"unsafe"
)
const (
LLGoFiles = "bar/bar.cpp"
LLGoPackage = "link"
)
// -----------------------------------------------------------------------------
type ICalc struct {
Vptr *ICalcVtbl
}
type ICalcVtbl struct {
Calc unsafe.Pointer
}
// -----------------------------------------------------------------------------
type IVal struct {
Vptr *IValVtbl
}
type IValVtbl struct {
Val unsafe.Pointer
}
// -----------------------------------------------------------------------------
type Callback struct {
ICalc
IVal
}
//go:linkname F C.f
func F(cb *Callback)
// -----------------------------------------------------------------------------

12
_demo/c/cppstr/cppstr.go Normal file
View File

@@ -0,0 +1,12 @@
package main
import (
"github.com/goplus/lib/c"
"github.com/goplus/lib/cpp/std"
)
func main() {
s := std.Str("Hello world\n")
c.Printf(s.CStr())
print(s.Str(), s.Size(), "\n")
}

26
_demo/c/crand/rand.go Normal file
View File

@@ -0,0 +1,26 @@
package main
import (
"fmt"
"github.com/goplus/lib/c"
"github.com/goplus/lib/c/math/rand"
"github.com/goplus/lib/c/time"
)
func fastrand64() uint64 {
v1 := uint64(rand.Random())
v2 := uint64(rand.Random())
return v1 ^ (v2 << 32)
}
func main() {
rand.Srand(c.Uint(time.Time(nil)))
fmt.Printf("%x\n", rand.Rand())
fmt.Printf("%x\n", rand.Rand())
rand.Srandom(c.Uint(time.Time(nil)))
fmt.Printf("%x\n", rand.Random())
fmt.Printf("%x\n", rand.Random())
fmt.Printf("%x\n", fastrand64())
}

12
_demo/c/ctime/time.go Normal file
View File

@@ -0,0 +1,12 @@
package main
import "github.com/goplus/lib/c/time"
func main() {
var tv time.Timespec
time.ClockGettime(time.CLOCK_REALTIME, &tv)
println("REALTIME sec:", tv.Sec, "nsec:", tv.Nsec)
time.ClockGettime(time.CLOCK_MONOTONIC, &tv)
println("MONOTONIC sec:", tv.Sec, "nsec:", tv.Nsec)
}

76
_demo/c/fcntl/fcntl.go Normal file
View File

@@ -0,0 +1,76 @@
package main
import (
"unsafe"
"github.com/goplus/lib/c"
"github.com/goplus/lib/c/os"
)
func main() {
filename := c.Str("testfile.txt")
data := c.Str("Hello, os!")
var buffer [20]c.Char
// Open a file, O_CREAT|O_WRONLY|O_TRUNC means create, write only, or clear the file
fd := os.Open(filename, os.O_CREAT|os.O_WRONLY|os.O_TRUNC, 0644)
if fd == -1 {
c.Printf(c.Str("open error\n"))
return
}
// Writing data to a file
bytesWritten := os.Write(fd, c.Pointer(data), c.Strlen(data))
if bytesWritten == -1 {
c.Printf(c.Str("write error\n"))
os.Close(fd)
return
}
c.Printf(c.Str("Written %ld bytes to %s\n"), bytesWritten, filename)
// Get file status flags
flags := os.Fcntl(fd, os.F_GETFL)
if flags == -1 {
c.Printf(c.Str("os error\n"))
os.Close(fd)
return
}
c.Printf(c.Str("File flags: %d\n"), flags)
// Set the file status flag to non-blocking mode
if os.Fcntl(fd, os.F_SETFL, flags|os.O_NONBLOCK) == -1 {
c.Printf(c.Str("os error\n"))
os.Close(fd)
return
}
c.Printf(c.Str("set file status successfully\n"))
c.Printf(c.Str("111"))
// Close file
os.Close(fd)
// Reopen the file, O_RDONLY means read-only
fd = os.Open(filename, os.O_RDONLY)
if fd == -1 {
c.Printf(c.Str("open error\n"))
return
}
// Reading data from a file
// &buffer[:][0]
// unsafe.SliceData(buffer[:])
bytesRead := os.Read(fd, c.Pointer(unsafe.SliceData(buffer[:])), unsafe.Sizeof(buffer)-1)
if bytesRead == -1 {
c.Printf(c.Str("read error\n"))
os.Close(fd)
return
}
// Ensure that the buffer is null-terminated
buffer[bytesRead] = c.Char(0)
c.Printf(c.Str("Read %ld bytes: %s\n"), bytesRead, &buffer[0])
// Close file
os.Close(fd)
}

View File

@@ -0,0 +1,63 @@
package main
import (
"github.com/goplus/lib/c"
)
type generator struct {
val c.Int
}
func (g *generator) next() c.Int {
g.val++
return g.val
}
func genInts(n int, gen func() c.Int) []c.Int {
a := make([]c.Int, n)
for i := range a {
a[i] = gen()
}
return a
}
func main() {
// generate 5 random integers
for _, v := range genInts(5, c.Rand) {
c.Printf(c.Str("%d\n"), v)
}
// generate 5 integers, each is double of the previous one
initVal := c.Int(1)
ints := genInts(5, func() c.Int {
initVal *= 2
return initVal
})
for _, v := range ints {
c.Printf(c.Str("%d\n"), v)
}
// generate 5 integers, each is incremented by 1
g := &generator{val: 1}
for _, v := range genInts(5, g.next) {
c.Printf(c.Str("%d\n"), v)
}
}
/* Posible output:
16807
282475249
1622650073
984943658
1144108930
2
4
8
16
32
2
3
4
5
6
*/

11
_demo/c/getcwd/getcwd.go Normal file
View File

@@ -0,0 +1,11 @@
package main
import (
"github.com/goplus/lib/c"
"github.com/goplus/lib/c/os"
)
func main() {
wd := os.Getcwd(c.Alloca(os.PATH_MAX), os.PATH_MAX)
c.Printf(c.Str("cwd: %s\n"), wd)
}

5
_demo/c/go.mod Normal file
View File

@@ -0,0 +1,5 @@
module github.com/goplus/llgo/_demo/c
go 1.20
require github.com/goplus/lib v0.3.0

2
_demo/c/go.sum Normal file
View File

@@ -0,0 +1,2 @@
github.com/goplus/lib v0.3.0 h1:y0ZGb5Q/RikW1oMMB4Di7XIZIpuzh/7mlrR8HNbxXCA=
github.com/goplus/lib v0.3.0/go.mod h1:SgJv3oPqLLHCu0gcL46ejOP3x7/2ry2Jtxu7ta32kp0=

13
_demo/c/hello/hello.go Normal file
View File

@@ -0,0 +1,13 @@
package main
import (
"fmt"
"github.com/goplus/lib/c"
)
func main() {
println("hello world by println")
fmt.Println("hello world by fmt.Println")
c.Printf(c.Str("Hello world by c.Printf\n"))
}

15
_demo/c/helloc/helloc.go Normal file
View File

@@ -0,0 +1,15 @@
package main
import (
"unsafe"
"github.com/goplus/lib/c"
)
func main() {
c.Printf(c.Str("Hello world by c.Printf\n"))
c.Printf(c.Str("%ld\n"), unsafe.Sizeof(int(0)))
c.Printf(c.Str("%ld\n"), unsafe.Sizeof(uintptr(0)))
// var v any = int(0)
// c.Printf(c.Str("%ld\n"), unsafe.Sizeof(v))
}

View File

@@ -0,0 +1,10 @@
package main
import _ "unsafe" // for go:linkname
//go:linkname Sqrt C.sqrt
func Sqrt(x float64) float64
func main() {
println("sqrt(2) =", Sqrt(2))
}

137
_demo/c/llama2-c/README.md Normal file
View File

@@ -0,0 +1,137 @@
llama2 - Inference Llama 2 in LLGo
=====
<p align="center">
<img src="assets/llama_cute.jpg" width="300" height="300" alt="Cute Llama">
</p>
Have you ever wanted to inference a baby [Llama 2](https://ai.meta.com/llama/) model in Go? No? Well, now you can!
This is based on [llama2.c](https://github.com/karpathy/llama2.c), we didn't port anything! So it's very different from these Go implementations:
* https://github.com/nikolaydubina/llama2.go
* https://github.com/tmc/go-llama2
llgo plays a great role as a bridge, allowing the C ecosystem to be seamlessly connected to Go.
You might think that you need many billion parameter LLMs to do anything useful, but in fact very small LLMs can have surprisingly strong performance if you make the domain narrow enough (ref: [TinyStories](https://huggingface.co/datasets/roneneldan/TinyStories) paper). This repo is a "fullstack" train + inference solution for Llama 2 LLM, with focus on minimalism and simplicity.
As the architecture is identical, you can also load and inference Meta's Llama 2 models. However, the current code only inferences models in fp32, so you will most likely not be able to productively load models larger than 7B.
## feel the magic
How to run this example? The simplest way is to run it without any arguments:
```bash
llgo run .
```
This means it uses the default model checkpoint file (`stories15M.bin`), and the default prompt (`Once upon a time`).
You need download the model checkpoint file first. Download this 15M parameter model trained on the [TinyStories](https://huggingface.co/datasets/roneneldan/TinyStories) dataset (~60MB download):
```bash
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin
```
If you want to specify a prompt (eg. `Long long ago`):
```bash
llgo run . 'Long long ago'
```
We can also try a bit bigger 42M parameter model (ie. `stories42M.bin`):
```bash
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories42M.bin
llgo run . -m stories42M.bin 'Long long ago'
```
There is also an even better 110M param model available, see [models](#models).
Quick note on sampling, the recommendation for ~best results is to sample with `-t 1.0 -p 0.9`, i.e. temperature 1.0 (default) but also top-p sampling at 0.9 (default). Intuitively, top-p ensures that tokens with tiny probabilities do not get sampled, so we can't get "unlucky" during sampling, and we are less likely to go "off the rails" afterwards. More generally, to control the diversity of samples use either the temperature (i.e. vary `-t` between 0 and 1 and keep top-p off with `-p 0`) or the top-p value (i.e. vary `-p` between 0 and 1 and keep `-t 1`), but not both. Nice explainers on LLM sampling strategies include [this](https://peterchng.com/blog/2023/05/02/token-selection-strategies-top-k-top-p-and-temperature/), [this](https://docs.cohere.com/docs/controlling-generation-with-top-k-top-p) or [this](https://huggingface.co/blog/how-to-generate).
## Meta's Llama 2 models
As the neural net architecture is identical, we can also inference the Llama 2 models released by Meta. Sadly there is a bit of friction here due to licensing (I can't directly upload the checkpoints, I think). So Step 1, get the Llama 2 checkpoints by following the [Meta instructions](https://github.com/facebookresearch/llama). Once we have those checkpoints, we have to convert them into the llama2.c format.
For this we need to install the python dependencies (`pip install -r requirements.txt`) and then use the `export.py` file, e.g. for 7B model:
```bash
python export.py llama2_7b.bin --meta-llama path/to/llama/model/7B
```
The export will take ~10 minutes or so and generate a 26GB file (the weights of the 7B model in float32) called `llama2_7b.bin` in the current directory. It has been [reported](https://github.com/karpathy/llama2.c/pull/85) that despite efforts. I would not attempt to run anything above 7B right now for two reasons: first, 13B+ currently doesn't work because of integer flow in pointer arithmetic, which is yet to be fixed, and second, even if it were fixed, this repo is doing float32 inference right now, so it would be fairly unusably slow. Once the export is done, we can run it:
```bash
./run llama2_7b.bin
```
This ran at about 4 tokens/s compiled with [OpenMP](#OpenMP) on 96 threads on my CPU Linux box in the cloud. (On my MacBook Air M1, currently it's closer to 30 seconds per token if you just build with `make runfast`.) Example output:
> The purpose of this document is to highlight the state-of-the-art of CoO generation technologies, both recent developments and those in commercial use. The focus is on the technologies with the highest merit to become the dominating processes of the future and therefore to be technologies of interest to S&amp;T ... R&amp;D. As such, CoO generation technologies developed in Russia, Japan and Europe are described in some depth. The document starts with an introduction to cobalt oxides as complex products and a short view on cobalt as an essential material. The document continues with the discussion of the available CoO generation processes with respect to energy and capital consumption as well as to environmental damage.
base models... ¯\\_(ツ)_/¯. Since we can inference the base model, it should be possible to also inference the chat model quite easily, and have a conversation with it. And if we can find a way to run 7B more efficiently, we can start adding LoRA to our training script, and going wild with finetunes all within the repo!
You can also try Meta's Code Llama models even if support for them is incomplete. In particular, some hyperparameters changed (e.g. the constant in RoPE layer), so the inference is not exactly correct and a bit buggy right now. Looking into fixes. Make sure to build the tokenizer for the plain and instruct variants and pass it when doing inference.
```bash
python export.py codellama2_7b.bin --meta-llama /path/to/CodeLlama-7b
python tokenizer.py --tokenizer-model=/path/to/CodeLlama-7b/tokenizer.model
./run codellama2_7b.bin -z /path/to/CodeLlama-7b/tokenizer.bin
```
Chat with Code Llama Instruct:
```bash
python export.py codellama2_7b_instruct.bin --meta-llama /path/to/CodeLlama-7b-Instruct
python tokenizer.py --tokenizer-model=/path/to/CodeLlama-7b-Instruct/tokenizer.model
./run codellama2_7b_instruct.bin -m chat -z /path/to/CodeLlama-7b-Instruct/tokenizer.bin
```
## huggingface models
We can load any huggingface models that use the Llama 2 architecture. See the script [export.py](export.py) and the `--hf` flag to export the model .bin file.
## models
For the sake of examples of smaller, from-scratch models, I trained a small model series on TinyStories. All of these trained in a few hours on my training setup (4X A100 40GB GPUs). The 110M took around 24 hours. I am hosting them on huggingface hub [tinyllamas](https://huggingface.co/karpathy/tinyllamas), both in the original PyTorch .pt, and also in the llama2.c format .bin:
| model | dim | n_layers | n_heads | n_kv_heads | max context length | parameters | val loss | download
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
| 260K | 64 | 5 | 8 | 4 | 512 | 260K | 1.297 | [stories260K](https://huggingface.co/karpathy/tinyllamas/tree/main/stories260K)
| OG | 288 | 6 | 6 | 6 | 256 | 15M | 1.072 | [stories15M.bin](https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin) |
| 42M| 512 | 8 | 8 | 8 | 1024 | 42M | 0.847 | [stories42M.bin](https://huggingface.co/karpathy/tinyllamas/resolve/main/stories42M.bin) |
| 110M| 768 | 12 | 12 | 12 | 1024 | 110M | 0.760 | [stories110M.bin](https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.bin) |
You'll notice that the 110M model is equivalent to GPT-1 in size. Alternatively, this is also the smallest model in the GPT-2 series (`GPT-2 small`), except the max context length is only 1024 instead of 2048. The only notable changes from GPT-1/2 architecture is that Llama uses RoPE relatively positional embeddings instead of absolute/learned positional embeddings, a bit more fancy SwiGLU non-linearity in the MLP, RMSNorm instead of LayerNorm, bias=False on all Linear layers, and is optionally multiquery.
## training
Let's see how we can train a baby Llama 2 from scratch using the code in this repo. First let's download and pretokenize some source dataset, e.g. I like [TinyStories](https://huggingface.co/datasets/roneneldan/TinyStories) so this is the only example currently available in this repo. But it should be very easy to add datasets, see the code.
```bash
python tinystories.py download
python tinystories.py pretokenize
```
Then train our model:
```bash
python train.py
```
**brief training guide**. See the train.py script for more exotic launches and hyperparameter overrides. Here is a brief guide to how to set the parameters. Look at the table at the very end of the [Chinchilla paper](https://arxiv.org/abs/2203.15556) to get a sense of how the Transformer parameters (dim, n_layers, n_heads) grow or shrink together. Extrapolate/interpolate this pattern to get bigger or smaller transformers. Set the max context length however you wish, depending on the problem: this should be the max number of tokens that matter to predict the next token. E.g. Llama 2 uses 2048. Next, you want the _total_ batch size per update (printed by the script as "tokens per iteration will be:") to be somewhere around 100K tokens for medium-sized applications. For tiny applications it could be lower, for large training (e.g. GPTs/LLamas) it is usually ~0.5M, or even more. You get there by first maxing out the batch_size to whatever your system allows (e.g. mine was 16 in a recent run because after that my GPU runs out of memory), and then you want to increase gradient_accumulation_steps to be as high as necessary to reach the total batch size of ~100K. Finally, you want to tune your learning_rate (LR). You want this to be as high as your training allows. Very small networks can get away with a large LR (e.g. 1e-3 or even higher). Large networks need lower LRs. 3e-4 is a safe choice in most medium-sized applications, but can be too low for small networks, so try to increase it! Finally, max_iters is the length of training. Play with different settings. I mostly only ever tune these parameters and leave most of the others unchanged. Here is an example of how I trained the 110M model, which I don't think is anywhere near optimal, but looked sensible to me: dim 768, n_layers 12, n_heads 12 (so size of each head is 768 / 12 = 64 channels), seq len of 1024, batch size 16 (this is the most that fit my A100 40GB GPU), gradient_accumulation_steps = 8 was needed to get total tokens batch size to be 16 batch size * 1024 tokens in sequence * 8 grad_accum = 131,072 tokens per update. Good. Learning rate 4e-4 (probably a little too low). max_iters 200K (probably a bit too high). Dropout 0.1, as that usually helps a bit at medium size. That was it. I ran using Distributed Data Parallel (DDP) on 4 GPUs on my cloud machine, training took ~day or so.
Totally understand if you want to skip model training, for simple demo just download one of the pretrained models (see [models](#models) section), e.g.:
```bash
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.bin
```
Once we have the model.bin file, we can inference in C. Compile the C code first:
```bash
llgo run . -m stories15M.bin
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 183 KiB

50
_demo/c/llama2-c/run.go Normal file
View File

@@ -0,0 +1,50 @@
package main
import (
"github.com/goplus/lib/c"
"github.com/goplus/lib/c/llama2"
"github.com/goplus/lib/c/time"
)
func main() {
var prompt *c.Char = c.Str("Once upon a time")
var checkpointPath *c.Char = c.Str("stories15M.bin")
var tokenizerPath *c.Char = c.Str("tokenizer.bin")
var temperature, topp c.Float = 1.0, 0.9
var steps c.Int = 256
var rngSeed uint64 = uint64(time.Time(nil))
loop: // parse command line arguments
for {
switch c.Getopt(c.Argc, c.Argv, c.Str("m:")) {
case 'm':
checkpointPath = c.Optarg
c.Fprintf(c.Stderr, c.Str("==> use model: %s\n"), checkpointPath)
case -1:
break loop
}
}
if c.Optind < c.Argc {
prompt = c.Index(c.Argv, c.Optind)
c.Fprintf(c.Stderr, c.Str("==> prompt: %s\n"), prompt)
}
// build the Transformer via the model .bin file
var transformer llama2.Transformer
llama2.BuildTransformer(&transformer, checkpointPath)
// build the Tokenizer via the tokenizer .bin file
var tokenizer llama2.Tokenizer
llama2.BuildTokenizer(&tokenizer, tokenizerPath, transformer.Config.VocabSize)
// build the Sampler
var sampler llama2.Sampler
llama2.BuildSampler(&sampler, transformer.Config.VocabSize, temperature, topp, rngSeed)
// run!
llama2.Generate(&transformer, &tokenizer, &sampler, prompt, steps)
// memory and file handles cleanup
llama2.FreeSampler(&sampler)
llama2.FreeTokenizer(&tokenizer)
llama2.FreeTransformer(&transformer)
}

Binary file not shown.

View File

@@ -0,0 +1,20 @@
package main
import (
"github.com/goplus/lib/c"
"github.com/goplus/lib/c/net"
)
func main() {
var hints net.AddrInfo
hints.Family = net.AF_UNSPEC
hints.SockType = net.SOCK_STREAM
host := "httpbin.org"
port := "80"
var result *net.AddrInfo
c.Printf(c.Str("%d\n"), net.Getaddrinfo(c.Str(host), c.Str(port), &hints, &result))
c.Printf(c.Str("%d\n"), net.Freeaddrinfo(result))
}

25
_demo/c/qsort/qsort.go Normal file
View File

@@ -0,0 +1,25 @@
package main
import (
"unsafe"
"github.com/goplus/lib/c"
)
func main() {
a := [...]int{100, 8, 23, 2, 7}
c.Qsort(c.Pointer(&a), 5, unsafe.Sizeof(0), func(a, b c.Pointer) c.Int {
return c.Int(*(*int)(a) - *(*int)(b))
})
for _, v := range a {
c.Printf(c.Str("%d\n"), v)
}
}
/* Expected output:
2
7
8
23
100
*/

16
_demo/c/setjmp/setjmp.go Normal file
View File

@@ -0,0 +1,16 @@
package main
import (
"github.com/goplus/lib/c/setjmp"
)
func main() {
var jb setjmp.SigjmpBuf
switch ret := setjmp.Sigsetjmp(&jb, 0); ret {
case 0:
println("Hello, setjmp!")
setjmp.Siglongjmp(&jb, 1)
default:
println("exception:", ret)
}
}

View File

@@ -0,0 +1,32 @@
package main
import (
"unsafe"
"github.com/goplus/lib/c"
"github.com/goplus/lib/c/net"
"github.com/goplus/lib/c/os"
)
func main() {
sockfd := net.Socket(net.AF_INET, net.SOCK_STREAM, 0)
msg := c.Str("Hello, World!")
defer os.Close(sockfd)
server := net.GetHostByName(c.Str("localhost"))
if server == nil {
c.Perror(c.Str("hostname get error"))
return
}
servAddr := &net.SockaddrIn{}
servAddr.Family = net.AF_INET
servAddr.Port = net.Htons(uint16(1234))
c.Memcpy(unsafe.Pointer(&servAddr.Addr.Addr), unsafe.Pointer(*server.AddrList), uintptr(server.Length))
if res := net.Connect(sockfd, (*net.SockAddr)(unsafe.Pointer(servAddr)), c.Uint(16)); res < 0 {
c.Perror(c.Str("connect error"))
return
}
os.Write(sockfd, unsafe.Pointer(msg), c.Strlen(msg))
}

View File

@@ -0,0 +1,43 @@
package main
import (
"unsafe"
"github.com/goplus/lib/c"
"github.com/goplus/lib/c/net"
"github.com/goplus/lib/c/os"
)
func main() {
var buffer [256]c.Char
sockfd := net.Socket(net.AF_INET, net.SOCK_STREAM, 0)
defer os.Close(sockfd)
servAddr := &net.SockaddrIn{
Family: net.AF_INET,
Port: net.Htons(uint16(1234)),
Addr: net.InAddr{Addr: 0x00000000},
Zero: [8]c.Char{0, 0, 0, 0, 0, 0, 0, 0},
}
if res := net.Bind(sockfd, servAddr, c.Uint(unsafe.Sizeof(*servAddr))); res < 0 {
c.Perror(c.Str("bind error"))
return
}
if net.Listen(sockfd, 5) < 0 {
c.Printf(c.Str("listen error"))
return
}
c.Printf(c.Str("Listening on port 1234...\n"))
cliAddr, clilen := &net.SockaddrIn{}, c.Uint(unsafe.Sizeof(servAddr))
newsockfd := net.Accept(sockfd, cliAddr, &clilen)
defer os.Close(newsockfd)
c.Printf(c.Str("Connection accepted."))
os.Read(newsockfd, unsafe.Pointer(unsafe.SliceData(buffer[:])), 256)
c.Printf(c.Str("Received: %s"), &buffer[0])
}

View File

@@ -0,0 +1,31 @@
package main
import (
"io"
"os"
"sync"
"unsafe"
llsync "github.com/goplus/lib/c/pthread/sync"
)
type L struct {
mu sync.Mutex
s string
i int
w io.Writer
}
func main() {
l := &L{s: "hello", i: 123, w: os.Stdout}
println("sizeof(L):", unsafe.Sizeof(L{}))
println("sizeof(sync.Mutex):", unsafe.Sizeof(sync.Mutex{}))
println("sizeof(llsync.Mutex):", unsafe.Sizeof(llsync.Mutex{}))
println("l:", l, "l.s:", l.s, "l.i:", l.i, "l.w:", l.w)
l.mu.Lock()
println("locked")
println("l:", l, "l.s:", l.s, "l.i:", l.i, "l.w:", l.w)
l.w.Write([]byte(l.s))
l.w.Write([]byte("\n"))
l.mu.Unlock()
}

25
_demo/c/thread/thd.go Normal file
View File

@@ -0,0 +1,25 @@
package main
import (
"github.com/goplus/lib/c"
"github.com/goplus/lib/c/pthread"
)
var key pthread.Key
func main() {
key.Create(nil)
key.Set(c.Pointer(c.Str("main value\n")))
var thd pthread.Thread
pthread.Create(&thd, nil, func(arg c.Pointer) c.Pointer {
key.Set(c.Pointer(c.Str("thread value\n")))
c.Printf(c.Str("Hello, thread\nTLS: %s"), key.Get())
return c.Pointer(c.Str("Back to main\n"))
}, nil)
var retval c.Pointer
pthread.Join(thd, &retval)
c.Printf(c.Str("%sTLS: %s"), retval, key.Get())
}