
    wg                         d Z ddlZddlmZmZ ddlmZ  ed       G d de             Zedk(  r ej                          yy)	a  
Matrix multiplication example via `cuda.jit`.

Reference: https://stackoverflow.com/a/64198479/13697228 by @RobertCrovella

Contents in this file are referenced from the sphinx-generated docs.
"magictoken" is used for markers as beginning and ending of example text.
    N)CUDATestCaseskip_on_cudasim)captured_stdoutz4cudasim doesn't support cuda import at non-top-levelc                   2     e Zd ZdZ fdZ fdZd Z xZS )
TestMatMulzo
    Text matrix multiplication using simple, shared memory/square, and shared
    memory/nonsquare cases.
    c                 t    t               | _        | j                  j                          t        |           y N)r   _captured_stdout	__enter__supersetUpself	__class__s    n/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/numba/cuda/tests/doc_examples/test_matmul.pyr   zTestMatMul.setUp   s)     / 1'')    c                 \    | j                   j                  d d d        t        |           y r	   )r
   __exit__r   tearDownr   s    r   r   zTestMatMul.tearDown   s&    &&tT48r   c                    ddl mm ddl}ddl}j
                  fd       }|j                  d      j                  ddg      }|j                  ddg      }|j                  ddg      }j                  |      }j                  |      }j                  |      }	d}
|j                  |j                  d   |
d   z        }|j                  |j                  d   |
d   z        }||f} |||
f   |||	       |	j                         }t        |       t        ||z         dj
                  fd	       }|j                  d      j                  ddg      }|j                  ddg      }|j                  ddg      }j                  |      }j                  |      }j                  |      }	f}
|j                  |j                  d   |
d   z        }|j                  |j                  d   |
d   z        }||f} |||
f   |||	       |	j                         }t        |       t        ||z         d
}| j                  |j!                  |||z  k(        |       |j                  d      j                  ddg      }|j                  ddg      }|j                  ddg      }j                  |      }j                  |      }j                  |      }	f}
t#        |j                  d   |j                  d         }t#        |j                  d   |j                  d         }|j                  ||
d   z        }|j                  ||
d   z        }||f} |||
f   |||	       |	j                         }t        |       t        ||z         d}| j                  |j!                  |||z  k(        |       y)z/Test of matrix multiplication on various cases.r   )cudafloat32Nc                     j                  d      \  }}||j                  d   k  rL||j                  d   k  r9d}t        | j                  d         D ]  }|| ||f   |||f   z  z  } ||||f<   yyy)z2Perform square matrix multiplication of C = A * B.   r              N)gridshaperange)ABCijtmpkr   s          r   matmulz)TestMatMul.test_ex_matmul.<locals>.matmul)   s     99Q<DAq1771:~!aggaj.qwwqz* -A1QT7Qq!tW,,C-!Q$	 #1~r         )r(   r(   r   c                 H   j                   j                  f      }j                   j                  f      }j                  d      \  }}j                  j                  }j                  j
                  }j                  j                  }	 d      }
t        |	      D ]  }d|||f<   d|||f<   || j                  d   k  r*||z  z   | j                  d   k  r| |||z  z   f   |||f<   ||j                  d   k  r*||z  z   |j                  d   k  r|||z  z   |f   |||f<   j                          t              D ]  }|
|||f   |||f   z  z  }
 j                           ||j                  d   k  r||j                  d   k  r|
|||f<   yyy)z
            Perform matrix multiplication of C = A * B using CUDA shared memory.

            Reference: https://stackoverflow.com/a/64198479/13697228 by @RobertCrovella
            )r   dtyper   r   r   r   N)
sharedarrayr   	threadIdxxygridDimr   r   syncthreads)r    r!   r"   sAsBr/   r0   txtybpgr%   r#   r$   TPBr   r   s                r   fast_matmulz.TestMatMul.test_ex_matmul.<locals>.fast_matmulN   s    ""#s7"CB""#s7"CB99Q<DAq!!B!!B,,..C "+C3Z #2r6
2r6
qwwqz>rAG|qwwqz&A!"1b1s7l?!3Br2vJqwwqz>rAG|qwwqz&A!"2C<?!3Br2vJ   " s 1A2b!e9r!R%y00C1   "##$ 1771:~!aggaj.!Q$ #1~r   z5fast_matmul incorrect for shared memory, square case.)msgs            z9fast_matmul incorrect for shared memory, non-square case.)numbar   r   numpymathjitarangereshapeoneszeros	to_deviceceilr   copy_to_hostprint
assertTrueallmax)r   nprA   r'   x_hy_hz_hx_dy_dz_dthreadsperblockblockspergrid_xblockspergrid_yblockspergridr9   r:   
grid_y_max
grid_x_maxr8   r   r   s                     @@@r   test_ex_matmulzTestMatMul.test_ex_matmul    s    	( 
	 
	 iim##QF+ggq!fohh1vnnS!nnS!nnS!"))CIIaL?13E$EF))CIIaL?13E$EF(/:.}o-.sC= c
cCi 	'	 
'	V iim##QF+ggq!fohh1vnnS!nnS!nnS!*))CIIaL?13E$EF))CIIaL?13E$EF(/:3M?23CcB c
cCi FscCi/0c: iin$$aW-ggr1ghh1vnnS!nnS!nnS!*1syy|4
1syy|4
))J1C$CD))J1C$CD(/:3M?23CcB c
cCi JscCi/0c:r   )__name__
__module____qualname____doc__r   r   r[   __classcell__)r   s   @r   r   r      s    

I;r   r   __main__)
r_   unittestnumba.cuda.testingr   r   numba.tests.supportr   r   r\   main r   r   <module>rg      sU     < / GHZ; Z; IZ;z zHMMO r   