
    wgR#                         d dl Zd dlmZmZmZmZmZ d dlm	Z	m
Z
mZ d dlmZ d Zd Zd Zd Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Z ed       G d de
             Zedk(  r e	j<                          yy)    N)cudaint32int64float32float64)unittestCUDATestCaseskip_on_cudasim)configc                 |    t        j                  d      }|dk(  rd| d<   t        j                  d       | d   | |<   y )N   r   *       )r   gridsyncwarp)aryis     j/home/mcse/projects/flask/flask-venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_warp_ops.pyuseful_syncwarpr      s8    		!AAvAMM*VCF    c                 f    t        j                  d      }t        j                  d||      }|| |<   y Nr   r   r   r   	shfl_sync)r   idxr   vals       r   use_shfl_sync_idxr      s*    		!A
..Q
,CCFr   c                 f    t        j                  d      }t        j                  d||      }|| |<   y r   )r   r   shfl_up_syncr   deltar   r   s       r   use_shfl_sync_upr"      s,    		!A


J5
1CCFr   c                 f    t        j                  d      }t        j                  d||      }|| |<   y r   )r   r   shfl_down_syncr    s       r   use_shfl_sync_downr%      s,    		!A


j!U
3CCFr   c                 f    t        j                  d      }t        j                  d||      }|| |<   y r   )r   r   shfl_xor_sync)r   xorr   r   s       r   use_shfl_sync_xorr)   !   s,    		!A


ZC
0CCFr   c                 f    t        j                  d      }t        j                  d|d      }|| |<   y Nr   r   r   r   )r   intor   r   s       r   use_shfl_sync_with_valr-   '   s*    		!A
..T1
-CCFr   c                 j    t        j                  d      }t        j                  d| |         }|||<   y r   )r   r   all_syncary_inary_outr   preds       r   use_vote_sync_allr4   -   ,    		!A==VAY/DGAJr   c                 j    t        j                  d      }t        j                  d| |         }|||<   y r   )r   r   any_syncr0   s       r   use_vote_sync_anyr8   3   r5   r   c                 j    t        j                  d      }t        j                  d| |         }|||<   y r   )r   r   eq_syncr0   s       r   use_vote_sync_eqr;   9   s,    		!A<<
F1I.DGAJr   c                 n    t         j                  j                  }t        j                  dd      }|| |<   y )Nr   Tr   	threadIdxxballot_sync)r   r   ballots      r   use_vote_sync_ballotrB   ?   s,    Aj$/FCFr   c                 j    t        j                  d      }t        j                  d| |         }|||<   y r   )r   r   match_any_sync)r1   r2   r   rA   s       r   use_match_any_syncrE   E   s.    		!A  VAY7FGAJr   c                     t        j                  d      }t        j                  d| |         \  }}|r|||<   y d||<   y r+   )r   r   match_all_sync)r1   r2   r   rA   r3   s        r   use_match_all_syncrH   K   s<    		!A&&z6!9=LFDGAJQGAJr   c                 8   t         j                  j                  }|dz  dk(  rt        j                  dd      }n\|dz  dk(  rt        j                  dd      }n=|dz  dk(  rt        j                  dd      }n|dz  d	k(  rt        j                  d
d      }| |<   y )N   r   Tr   """"   DDDD       r=   )arrr   rA   s      r   use_independent_schedulingrR   Q   s    A1uz!!*d3	
Q!!!*d3	
Q!!!*d3	
Q!!!*d3CFr   c                 f    t         j                  ryt        j                         j                  | k\  S )NT)r   ENABLE_CUDASIMr   get_current_devicecompute_capability)ccs    r   _safe_cc_checkrX   ^   s(    &&(;;rAAr   z2Warp Operations are not yet implemented on cudasimc                      e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Z ej                   ed      d      d        Z ej                   ed      d      d        Z ej                   ed      d      d        Zd Zd Zy)TestCudaWarpOperationsc                      t        j                  d      t              }d}t        j                  |t        j
                        } |d|f   |       | j                  t        j                  |dk(               y )Nzvoid(int32[:])    dtyper   r   )r   jitr   npemptyr   
assertTrueallselfcompilednelemr   s       r   test_useful_syncwarpz+TestCudaWarpOperations.test_useful_syncwarpg   s]    -488,-o>hhuBHH-E3sby)*r   c                      t        j                  d      t              }d}d}t        j                  |t        j
                        } |d|f   ||       | j                  t        j                  ||k(               y Nvoid(int32[:], int32)r\   rJ   r]   r   )r   r_   r   r`   ra   r   rb   rc   )re   rf   rg   r   r   s        r   test_shfl_sync_idxz)TestCudaWarpOperations.test_shfl_sync_idxn   se    4488345FGhhuBHH-E3$scz*+r   c                 d    t        j                  d      t              }d}d}t        j                  |t        j
                        }t        j                  |t        j
                        }||d xxx |z  ccc  |d|f   ||       | j                  t        j                  ||k(               y rj   )	r   r_   r"   r`   ra   r   arangerb   rc   re   rf   rg   r!   r   exps         r   test_shfl_sync_upz(TestCudaWarpOperations.test_shfl_sync_upv   s    4488345EFhhuBHH-iiRXX.EFuE3&scz*+r   c                 f    t        j                  d      t              }d}d}t        j                  |t        j
                        }t        j                  |t        j
                        }|d | xxx |z  ccc  |d|f   ||       | j                  t        j                  ||k(               y rj   )	r   r_   r%   r`   ra   r   rn   rb   rc   ro   s         r   test_shfl_sync_downz*TestCudaWarpOperations.test_shfl_sync_down   s    4488345GHhhuBHH-iiRXX.GeVE3&scz*+r   c                 N    t        j                  d      t              }d}d}t        j                  |t        j
                        }t        j                  |t        j
                        |z  } |d|f   ||       | j                  t        j                  ||k(               y )Nrk   r\      r]   r   )	r   r_   r)   r`   ra   r   rn   rb   rc   )re   rf   rg   r(   r   rp   s         r   test_shfl_sync_xorz)TestCudaWarpOperations.test_shfl_sync_xor   s~    4488345FGhhuBHH-iiRXX.4E3$scz*+r   c                 ,   t         t        t        t        f}t	        j                   d      t	        j                  d      t	        j                  t        j
                        t	        j                  t        j
                        f}t        ||      D ]  \  }} t        j                  |d d  |f      t              }d}t	        j                  ||j                        } |d|f   ||       | j                  t	        j                  ||k(                y )Nl        r\   r]   r   )r   r   r   r   r`   pizipr   r_   r-   ra   r^   rb   rc   )re   typesvaluestypr   rf   rg   r   s           r   test_shfl_sync_typesz+TestCudaWarpOperations.test_shfl_sync_types   s    ugw.((2, 1**RUU#RZZ%68E6* 	0HC.txxQ./EFHE((5		2CHQXsC(OOBFF3#:./	0r   c                     t        j                  d      t              }d}t        j                  |t        j
                        }t        j                  |t        j
                        } |d|f   ||       | j                  t        j                  |dk(               d|d<    |d|f   ||       | j                  t        j                  |dk(               y )Nvoid(int32[:], int32[:])r\   r]   r   r   rx   )	r   r_   r4   r`   onesr   ra   rb   rc   re   rf   rg   r1   r2   s        r   test_vote_sync_allz)TestCudaWarpOperations.test_vote_sync_all   s    7488678IJbhh/((51E67+w!|,-r
E67+w!|,-r   c                     t        j                  d      t              }d}t        j                  |t        j
                        }t        j                  |t        j
                        } |d|f   ||       | j                  t        j                  |dk(               d|d<   d|d<    |d|f   ||       | j                  t        j                  |dk(               y )Nr   r\   r]   r   r   rM      )	r   r_   r8   r`   zerosr   ra   rb   rc   r   s        r   test_vote_sync_anyz)TestCudaWarpOperations.test_vote_sync_any   s    7488678IJ%rxx0((51E67+w!|,-q	q	E67+w!|,-r   c                 ,    t        j                  d      t              }d}t        j                  |t        j
                        }t        j                  |t        j
                        } |d|f   ||       | j                  t        j                  |dk(               d|d<    |d|f   ||       | j                  t        j                  |dk(               d|d d   |d|f   ||       | j                  t        j                  |dk(               y )Nr   r\   r]   r   r   )	r   r_   r;   r`   r   r   ra   rb   rc   r   s        r   test_vote_sync_eqz(TestCudaWarpOperations.test_vote_sync_eq   s    7488678HI%rxx0((51E67+w!|,-q	E67+w!|,-q	E67+w!|,-r   c                     t        j                  d      t              }d}t        j                  |t        j
                        } |d|f   |       | j                  t        j                  |t        j
                  d      k(               y )Nvoid(uint32[:])r\   r]   r   r   )r   r_   rB   r`   ra   uint32rb   rc   rd   s       r   test_vote_sync_ballotz,TestCudaWarpOperations.test_vote_sync_ballot   sh    .488-./CDhhuBII.E3sbii
&;;<=r   )   r   z-Matching requires at least Volta Architecturec                 v    t        j                  d      t              }d}t        j                  |t        j
                        dz  }t        j                  |t        j
                        }t        j                  dd      } |d|f   ||       | j                  t        j                  ||k(               y )Nr   
   r]   rM   )iU  i  r   r   )
r   r_   rE   r`   rn   r   ra   tilerb   rc   )re   rf   rg   r1   r2   rp   s         r   test_match_any_syncz*TestCudaWarpOperations.test_match_any_sync   s     8488678JK51A5((51gg2A6E67+w#~./r   c                     t        j                  d      t              }d}t        j                  |t        j
                        }t        j                  |t        j
                        } |d|f   ||       | j                  t        j                  |dk(               d|d<    |d|f   ||       | j                  t        j                  |dk(               y )Nr   r   r]   r   i  rJ   r   )	r   r_   rH   r`   r   r   ra   rb   rc   r   s        r   test_match_all_syncz*TestCudaWarpOperations.test_match_all_sync   s     8488678JK%rxx0((51E67+w,678q	E67+w!|,-r   z;Independent scheduling requires at least Volta Architecturec                     t        j                  d      t              }t        j                  dt        j
                        }t        j                  dd      } |d   |       | j                  t        j                  ||k(               y )Nr   r\   r]   )rK   rL   rN   rP      r   r\   )	r   r_   rR   r`   ra   r   r   rb   rc   )re   rf   rQ   rp   s       r   test_independent_schedulingz2TestCudaWarpOperations.test_independent_scheduling   sg     /488-./IJhhr+ggFJscz*+r   c                     t         j                  d        }t        j                  dt        j                        } |d   |       t        j
                  dd      }t        j                  j                  ||       y )Nc                     t        j                  d      }|dz  dk(  rt        j                         | |<   y t        j                         | |<   y )Nr   rM   r   )r   r   
activemaskr?   r   s     r   use_activemaskz>TestCudaWarpOperations.test_activemask.<locals>.use_activemask   s>    		!AA!| (! (!r   r\   r]   r   )iUUUUl   *UU ru   )r   r_   r`   r   r   r   testingassert_equal)re   r   outexpecteds       r   test_activemaskz&TestCudaWarpOperations.test_activemask   sd    			) 
		) hhr+uc" 773R8


#.r   c                 ^   t         j                  d        }t        j                  dt        j                        } |d   |       t        j
                  t        d      D cg c]
  }d|z  dz
   c}t        j                        }t        j                  j                  ||       y c c}w )Nc                 \    t        j                  d      }t        j                         | |<   y )Nr   )r   r   lanemask_ltr   s     r   use_lanemask_ltz@TestCudaWarpOperations.test_lanemask_lt.<locals>.use_lanemask_lt  s!    		!A##%AaDr   r\   r]   r   rM   r   )	r   r_   r`   r   r   asarrayranger   r   )re   r   r   r   r   s        r   test_lanemask_ltz'TestCudaWarpOperations.test_lanemask_lt  s    		& 
	& hhr+s# ::U2Y?Q!|?$&II/


#. @s   #B*N)__name__
__module____qualname__rh   rl   rq   rs   rv   r~   r   r   r   r   r   
skipUnlessrX   r   r   r   r   r    r   r   rZ   rZ   e   s    +,,,,	0	.
..> X/HJ0J0 X/HJ	.J	. X/(),),/*/r   rZ   __main__)numpyr`   numbar   r   r   r   r   numba.cuda.testingr   r	   r
   
numba.corer   r   r   r"   r%   r)   r-   r4   r8   r;   rB   rE   rH   rR   rX   rZ   r   mainr   r   r   <module>r      s     6 6 F F '
B EFj/\ j/ Gj/Z zHMMO r   