Block-sparse GPU kernels

We’re releasing highly-optimized GPU kernels for an underexplored class of neural network architectures: networks with block-sparse weights. Depending on the chosen sparsity, these kernels can run ord

1

from blocksparse.matmul import BlocksparseMatMul

2

import tensorflow as tf

3

import numpy as np

4

5

hidden_size = 4096

6

block_size = 32

7

minibatch_size = 64

8

9

# Create a (random) sparsity pattern

10

sparsity = np.random.randint(2, size=(hidden_size//block_size,hidden_size//block_size))

11

12

# Initialize the sparse matrix multiplication object

13

bsmm = BlocksparseMatMul(sparsity, block_size=block_size)

14

15

# Input to graph

16

x = tf.placeholder(tf.float32, shape=[None, hidden_size])

17

18

# Initialize block-sparse weights

19

w = tf.get_variable("w", bsmm.w_shape, dtype=tf.float32)

20

21

# Block-sparse matrix multiplication

22

y = bsmm(x, w)

23

24

# Run

25

sess = tf.InteractiveSession()

26

sess.run(tf.global_variables_initializer())

27

result = sess.run([y], feed_dict = {x: np.ones((minibatch_size,hidden_size), dtype='float32')})

28

print(result)