Skip to content

Commit 9414153

Browse files
Deployed 77c7989 with MkDocs version: 1.6.1
0 parents  commit 9414153

File tree

87 files changed

+66552
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+66552
-0
lines changed

.nojekyll

Whitespace-only changes.

404.html

Lines changed: 1175 additions & 0 deletions
Large diffs are not rendered by default.

CNAME

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
docs.tinygrad.org

abstractions2.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# tinygrad is a tensor library, and as a tensor library it has multiple parts
2+
# 1. a "runtime". this allows buffer management, compilation, and running programs
3+
# 2. a "Device" that uses the runtime but specifies compute in an abstract way for all
4+
# 3. a "UOp" that fuses the compute into kernels, using memory only when needed
5+
# 4. a "Tensor" that provides an easy to use frontend with autograd ".backward()"
6+
7+
8+
print("******** first, the runtime ***********")
9+
10+
from tinygrad.runtime.ops_cpu import ClangJITCompiler, MallocAllocator, CPUProgram
11+
12+
# allocate some buffers
13+
out = MallocAllocator.alloc(4)
14+
a = MallocAllocator.alloc(4)
15+
b = MallocAllocator.alloc(4)
16+
17+
# load in some values (little endian)
18+
MallocAllocator._copyin(a, memoryview(bytearray([2,0,0,0])))
19+
MallocAllocator._copyin(b, memoryview(bytearray([3,0,0,0])))
20+
21+
# compile a program to a binary
22+
lib = ClangJITCompiler().compile("void add(int *out, int *a, int *b) { out[0] = a[0] + b[0]; }")
23+
24+
# create a runtime for the program
25+
fxn = CPUProgram("add", lib)
26+
27+
# run the program
28+
fxn(out, a, b)
29+
30+
# check the data out
31+
print(val := MallocAllocator._as_buffer(out).cast("I").tolist()[0])
32+
assert val == 5
33+
34+
35+
print("******** second, the Device ***********")
36+
37+
DEVICE = "CPU" # NOTE: you can change this!
38+
39+
import struct
40+
from tinygrad.dtype import dtypes
41+
from tinygrad.device import Buffer, Device
42+
from tinygrad.uop.ops import UOp, Ops
43+
from tinygrad.shape.shapetracker import ShapeTracker
44+
45+
# allocate some buffers + load in values
46+
out = Buffer(DEVICE, 1, dtypes.int32).allocate()
47+
a = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 2))))
48+
b = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 3))))
49+
# NOTE: a._buf is the same as the return from MallocAllocator.alloc
50+
51+
# describe the computation
52+
buf_1 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 1)
53+
buf_2 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 2)
54+
ld_1 = UOp(Ops.LOAD, dtypes.int32, (buf_1.view(ShapeTracker.from_shape((1,))),))
55+
ld_2 = UOp(Ops.LOAD, dtypes.int32, (buf_2.view(ShapeTracker.from_shape((1,))),))
56+
alu = ld_1 + ld_2
57+
output_buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 0)
58+
st_0 = UOp(Ops.STORE, dtypes.void, (output_buf.view(ShapeTracker.from_shape((1,))), alu))
59+
s = UOp(Ops.SINK, dtypes.void, (st_0,))
60+
61+
# convert the computation to a "linearized" format (print the format)
62+
from tinygrad.engine.realize import get_kernel, CompiledRunner
63+
kernel = get_kernel(Device[DEVICE].renderer, s).linearize()
64+
65+
# compile a program (and print the source)
66+
fxn = CompiledRunner(kernel.to_program())
67+
print(fxn.p.src)
68+
# NOTE: fxn.clprg is the CPUProgram
69+
70+
# run the program
71+
fxn.exec([out, a, b])
72+
73+
# check the data out
74+
assert out.as_buffer().cast('I')[0] == 5
75+
76+
77+
print("******** third, the UOp ***********")
78+
79+
from tinygrad.engine.realize import run_schedule
80+
from tinygrad.engine.schedule import create_schedule_with_vars
81+
from tinygrad.engine.grouper import get_kernelize_map
82+
83+
# allocate some values + load in values
84+
a = UOp.new_buffer(DEVICE, 1, dtypes.int32)
85+
b = UOp.new_buffer(DEVICE, 1, dtypes.int32)
86+
a.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 2))))
87+
b.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 3))))
88+
89+
# describe the computation
90+
out = a + b
91+
s = UOp(Ops.SINK, dtypes.void, (out,))
92+
93+
# group the computation into kernels
94+
becomes_map = get_kernelize_map(s)
95+
96+
# the compute maps to an assign
97+
assign = becomes_map[a+b]
98+
99+
# the first source is the output buffer (data)
100+
assert assign.src[0].op is Ops.BUFFER
101+
# the second source is the kernel (compute)
102+
assert assign.src[1].op is Ops.KERNEL
103+
104+
# schedule the kernel graph in a linear list
105+
s = UOp(Ops.SINK, dtypes.void, (assign,))
106+
sched, _ = create_schedule_with_vars(s)
107+
assert len(sched) == 1
108+
109+
# DEBUGGING: print the compute ast
110+
print(sched[-1].ast)
111+
# NOTE: sched[-1].ast is the same as st_0 above
112+
113+
# the output will be stored in a new buffer
114+
out = assign.buf_uop
115+
assert out.op is Ops.BUFFER and not out.buffer.is_allocated()
116+
print(out)
117+
118+
# run that schedule
119+
run_schedule(sched)
120+
121+
# check the data out
122+
assert out.is_realized and out.buffer.as_buffer().cast('I')[0] == 5
123+
124+
125+
print("******** fourth, the Tensor ***********")
126+
127+
from tinygrad import Tensor
128+
129+
a = Tensor([2], dtype=dtypes.int32, device=DEVICE)
130+
b = Tensor([3], dtype=dtypes.int32, device=DEVICE)
131+
out = a + b
132+
133+
# check the data out
134+
print(val:=out.item())
135+
assert val == 5

abstractions3.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# abstractions2 goes from back to front, here we will go from front to back
2+
from typing import List
3+
from tinygrad.helpers import tqdm
4+
5+
# *****
6+
# 0. Load mnist on the device
7+
8+
from tinygrad.nn.datasets import mnist
9+
X_train, Y_train, _, _ = mnist()
10+
X_train = X_train.float()
11+
X_train -= X_train.mean()
12+
13+
# *****
14+
# 1. Define an MNIST model.
15+
16+
from tinygrad import Tensor
17+
18+
l1 = Tensor.kaiming_uniform(128, 784)
19+
l2 = Tensor.kaiming_uniform(10, 128)
20+
def model(x): return x.flatten(1).dot(l1.T).relu().dot(l2.T)
21+
l1n, l2n = l1.numpy(), l2.numpy()
22+
23+
# *****
24+
# 2. Choose a batch for training and do the backward pass.
25+
26+
from tinygrad.nn.optim import SGD
27+
optim = SGD([l1, l2])
28+
29+
Tensor.training = True
30+
X, Y = X_train[(samples:=Tensor.randint(128, high=X_train.shape[0]))], Y_train[samples]
31+
optim.zero_grad()
32+
model(X).sparse_categorical_crossentropy(Y).backward()
33+
optim.schedule_step() # this will step the optimizer without running realize
34+
35+
# *****
36+
# 3. Create a schedule.
37+
38+
# The weight Tensors have been assigned to, but not yet realized. Everything is still lazy at this point
39+
# l1.lazydata and l2.lazydata define a computation graph
40+
41+
from tinygrad.engine.schedule import ScheduleItem
42+
schedule: List[ScheduleItem] = Tensor.schedule(l1, l2)
43+
44+
print(f"The schedule contains {len(schedule)} items.")
45+
for si in schedule: print(str(si)[:80])
46+
47+
# *****
48+
# 4. Lower a schedule.
49+
50+
from tinygrad.engine.realize import lower_schedule_item, ExecItem
51+
lowered: List[ExecItem] = [lower_schedule_item(si) for si in tqdm(schedule)]
52+
53+
# *****
54+
# 5. Run the schedule
55+
56+
for ei in tqdm(lowered): ei.run()
57+
58+
# *****
59+
# 6. Print the weight change
60+
61+
print("first weight change\n", l1.numpy()-l1n)
62+
print("second weight change\n", l2.numpy()-l2n)

0 commit comments

Comments
 (0)