baba
jen pro inspiraci, kdyz uz to mam pusteny ... je to jen scitani, takze rychlejsi - milion pod desetinu vteriny :
ipython qtconsole
In [1]: import pyopencl as cl
...: import numpy
...: import numpy.linalg as la
...: import time
...:
...: _tlast = 0
...:
...: def TimingInit():
...: global _tlast
...: _tlast=time.clock()
...:
...: def Timing():
...: global _tlast
...: t0=time.clock()
...: print t0-_tlast
...: _tlast=t0
...:
...: ### ----- main ---
...:
In [2]: TimingInit()
...: a = numpy.random.rand(1e6).astype(numpy.float32)
...: Timing()
...: b = numpy.random.rand(1e6).astype(numpy.float32)
...: Timing()
...: ctx = cl.create_some_context()
...: Timing()
...: queue = cl.CommandQueue(ctx)
...: mf = cl.mem_flags
...: a_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a)
...: b_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b)
...: dest_buf = cl.Buffer(ctx, mf.WRITE_ONLY, b.nbytes)
...: Timing()
...: prg = cl.Program(ctx, """
...: __kernel void sum(__global const float *a,
...: __global const float *b, __global float *c)
...: {
...: int gid = get_global_id(0);
...: c[gid] = a[gid] + b[gid];
...: }
...: """).build()
...: Timing()
...: prg.sum(queue, a.shape, None, a_buf, b_buf, dest_buf)
...: Timing()
...: a_plus_b = numpy.empty_like(a)
...: Timing()
...: cl.enqueue_copy(queue, a_plus_b, dest_buf)
...: Timing()
...: print(la.norm(a_plus_b - (a+b)), la.norm(a_plus_b), a_plus_b.size)
...:
0.012044
0.011675
0.061999
0.00342
0.003968
0.001592
8.8e-05
0.000712
(0.0, 1080.2306, 1000000)