Block-sparse GPU kernels
We’re releasing highly-optimized GPU kernels for an underexplored class of neural network architectures: networks with block-sparse weights. Depending on the chosen sparsity, these kernels can run ord
1
from blocksparse.matmul import BlocksparseMatMul
2
import tensorflow as tf
3
import numpy as np
4
5
hidden_size = 4096
6
block_size = 32
7
minibatch_size = 64
8
9
# Create a (random) sparsity pattern
10
sparsity = np.random.randint(2, size=(hidden_size//block_size,hidden_size//block_size))
11
12
# Initialize the sparse matrix multiplication object
13
bsmm = BlocksparseMatMul(sparsity, block_size=block_size)
14
15
# Input to graph
16
x = tf.placeholder(tf.float32, shape=[None, hidden_size])
17
18
# Initialize block-sparse weights
19
w = tf.get_variable("w", bsmm.w_shape, dtype=tf.float32)
20
21
# Block-sparse matrix multiplication
22
y = bsmm(x, w)
23
24
# Run
25
sess = tf.InteractiveSession()
26
sess.run(tf.global_variables_initializer())
27
result = sess.run([y], feed_dict = {x: np.ones((minibatch_size,hidden_size), dtype='float32')})
28
print(result)