#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
file : tile_list.py
created time : 2022/05/04
author : Zhenyu Wei
copyright : (C)Copyright 2021-present, mdpy organization
"""
import os
import math
import numpy as np
import cupy as cp
import numba.cuda as cuda
from mdpy import SPATIAL_DIM
from mdpy.core import (
MAX_NUM_EXCLUDED_PARTICLES,
NUM_NEIGHBOR_CELLS,
NUM_PARTICLES_PER_TILE,
)
from mdpy.environment import *
from mdpy.utils import *
from mdpy.unit import *
from mdpy.error import *
SKIN_WIDTH = Quantity(1.5, angstrom)
THREAD_PER_BLOCK = 32
NUM_TILES_PER_THREAD = 16
cur_dir = os.path.dirname(os.path.abspath(__file__))
lookup_table_dir = os.path.join(cur_dir, "../../data/space_filling_curve_lookup_table/")
[docs]class TileList:
[docs] def __init__(
self, pbc_matrix: np.ndarray, skin_width=SKIN_WIDTH, num_bits=7
) -> None:
self.set_pbc_matrix(pbc_matrix)
self._skin_width = check_quantity_value(skin_width, default_length_unit)
self._num_bits = num_bits
# Attribute
self.set_cutoff_radius(Quantity(1, angstrom))
self._num_cells_vec = np.ceil(self._pbc_diag / self._cell_width).astype(
NUMPY_INT
)
self._tile_list = None
self._tile_cell_index = None
self._particle_tile_information = None
self._cell_tile_information = None
self._num_tiles = 0
# lookup table
lookup_file = os.path.join(lookup_table_dir, "hilbert_%d_bits.npy" % num_bits)
self._code_range = 2**self._num_bits
self._code_normalize_factor = self._code_range**3
self._lookup_table = np.load(lookup_file).astype(NUMPY_INT)
self._device_lookup_table = cp.array(self._lookup_table, CUPY_INT)
# Device attribute
self._device_pbc_matrix = cp.array(self._pbc_matrix, CUPY_FLOAT)
self._device_pbc_diag = cp.array(self._pbc_diag, CUPY_FLOAT)
self._device_half_pbc_diag = cp.array(self._half_pbc_diag, CUPY_FLOAT)
# Kernel
self._construct_tile_list = cuda.jit(
nb.void(
NUMBA_FLOAT[:, ::1], # positions
NUMBA_INT[::1], # sorted_particle_index
NUMBA_INT[:, ::1], # cell_particle_information
NUMBA_INT[:, ::1], # cell_tile_information
NUMBA_INT[::1], # num_cells_vec
NUMBA_INT[::1], # sorted_matrix_index
NUMBA_FLOAT[:, ::1], # tile_box
NUMBA_INT[:, ::1], # tile_list
NUMBA_INT[:, ::1], # tile_cell_index
)
)(self._construct_tile_list_kernel)
self._find_tile_neighbors = cuda.jit(
nb.void(
NUMBA_INT[::1], # num_cells_vec
NUMBA_FLOAT[::1], # cell_width
NUMBA_FLOAT[:, ::1], # pbc_matrix
NUMBA_INT[:, ::1], # tile_cell_index
NUMBA_INT[:, ::1], # cell_tile_information
NUMBA_FLOAT[:, ::1], # tile_box
NUMBA_INT[::1], # tile_num_neighbors
NUMBA_INT[:, ::1], # tile_neighbors
)
)(self._find_tile_neighbors_kernel)
# sort
self._sort_int_matrix = cuda.jit(
nb.void(
NUMBA_INT[:, ::1], # unsorted_matrix
NUMBA_INT[::1], # matrix_id_mapping_index
NUMBA_INT[:, ::1], # sorted_matrix
)
)(self._sort_matrix_kernel)
self._sort_float_matrix = cuda.jit(
nb.void(
NUMBA_FLOAT[:, ::1], # unsorted_matrix
NUMBA_INT[::1], # matrix_id_mapping_index
NUMBA_FLOAT[:, ::1], # sorted_matrix
)
)(self._sort_matrix_kernel)
# unsort
self._unsort_int_matrix = cuda.jit(
nb.void(
NUMBA_INT[:, ::1], # sorted_matrix
NUMBA_INT[::1], # matrix_id_mapping_index
NUMBA_INT[:, ::1], # unsorted_matrix
)
)(self._unsort_matrix_kernel)
self._unsort_float_matrix = cuda.jit(
nb.void(
NUMBA_FLOAT[:, ::1], # sorted_matrix
NUMBA_INT[::1], # matrix_id_mapping_index
NUMBA_FLOAT[:, ::1], # unsorted_matrix
)
)(self._unsort_matrix_kernel)
# exclusion mask
self._generate_exclusion_mask_map = cuda.jit(
nb.void(
NUMBA_FLOAT[::1], # cutoff_radius
NUMBA_FLOAT[:, ::1], # pbc_matrix
NUMBA_FLOAT[:, ::1], # sorted_positions
NUMBA_INT[:, ::1], # excluded_particles
NUMBA_INT[:, ::1], # tile_neighbors
NUMBA_INT[::1], # sorted_particle_index
NUMBA_BIT[:, ::1], # mask_map
)
)(self._generate_exclusion_mask_map_kernel)
def set_pbc_matrix(self, pbc_matrix: np.ndarray) -> None:
pbc_matrix = check_quantity_value(pbc_matrix, default_length_unit)
self._pbc_matrix = check_pbc_matrix(pbc_matrix)
self._pbc_diag = self._pbc_matrix.diagonal()
self._half_pbc_diag = self._pbc_diag / 2
self._device_pbc_matrix = cp.array(self._pbc_matrix, CUPY_FLOAT)
self._device_pbc_diag = cp.array(self._pbc_diag, CUPY_FLOAT)
self._device_half_pbc_diag = cp.array(self._half_pbc_diag, CUPY_FLOAT)
def set_cutoff_radius(self, cutoff_radius) -> None:
cutoff_radius = check_quantity_value(cutoff_radius, default_length_unit)
if cutoff_radius == 0:
raise TileListPoorDefinedError(
"Cutoff radius is poor defined, current value %.3f" % (cutoff_radius)
)
cell_matrix = np.ones(SPATIAL_DIM) * cutoff_radius
num_cells_vec = np.floor(self._pbc_diag / cell_matrix).astype(NUMPY_INT)
for i in num_cells_vec:
if i < 2:
raise TileListPoorDefinedError(
"The cutoff_radius is too large to create cell list"
)
# Attributes
self._cutoff_radius = cutoff_radius
self._cell_width = self._cutoff_radius + self._skin_width
self._num_cells_vec = np.floor(self._pbc_diag / self._cell_width).astype(
NUMPY_INT
)
self._num_cells_vec[self._num_cells_vec < 3] = 3
# Device attributes
self._device_cutoff_radius = cp.array([self._cutoff_radius], CUPY_FLOAT)
self._device_cell_width = cp.array([self._cell_width], CUPY_FLOAT)
self._device_num_cells_vec = cp.array(self._num_cells_vec, CUPY_INT)
self._device_cell_scale_factor = (
self._device_num_cells_vec / self._device_pbc_diag
).astype(CUPY_FLOAT)
def _encode_particles(self, positive_positions: cp.ndarray):
scaled_positions = positive_positions * self._device_cell_scale_factor
scaled_int_part = cp.floor(scaled_positions)
scaled_fraction_part = scaled_positions - scaled_int_part
scaled_int_part = scaled_int_part.astype(CUPY_INT)
# cell index
cell_index = (
scaled_int_part[:, 2]
+ scaled_int_part[:, 1] * self._num_cells_vec[2]
+ scaled_int_part[:, 0] * self._num_cells_vec[2] * self._num_cells_vec[1]
)
# cell particle information
num_particles_each_cell = cp.bincount(
cell_index, minlength=np.prod(self._num_cells_vec)
)
particle_start_index = cp.zeros_like(num_particles_each_cell, CUPY_INT)
particle_start_index[1:] = cp.cumsum(num_particles_each_cell)[:-1]
self._cell_particle_information = cp.stack(
[particle_start_index, num_particles_each_cell], axis=1
).astype(CUPY_INT)
# cell tile information
num_tiles_each_ceil = cp.ceil(
num_particles_each_cell / NUM_PARTICLES_PER_TILE
).astype(CUPY_INT)
self._num_tiles = int(num_tiles_each_ceil.sum())
self._max_num_tiles_per_cell = int(num_tiles_each_ceil.max())
tile_start_index = cp.zeros_like(num_tiles_each_ceil, CUPY_INT)
tile_start_index[1:] = cp.cumsum(num_tiles_each_ceil)[:-1]
self._cell_tile_information = cp.stack(
[tile_start_index, num_tiles_each_ceil], axis=1
).astype(CUPY_INT)
# Subcell space filling index
subcell_index = cp.round(scaled_fraction_part * self._code_range).astype(
CUPY_INT
)
subcell_index = (
self._device_lookup_table[
subcell_index[:, 0],
subcell_index[:, 1],
subcell_index[:, 2],
]
/ self._code_normalize_factor
)
# particle index
particle_code = cell_index + subcell_index
self._sorted_particle_index = cp.argsort(particle_code).astype(CUPY_INT)
@staticmethod
def _construct_tile_list_kernel(
positions,
sorted_particle_index,
cell_particle_information,
cell_tile_information,
num_cells_vec,
matrix_id_mapping_index,
tile_box,
tile_list,
tile_cell_index,
):
tile_index = cuda.grid(1)
if tile_index >= tile_list.shape[0]:
return
# Binary search of tile's cell index
low, high = 0, cell_tile_information.shape[0] - 1
is_found = False
while low <= high:
mid = (low + high) // 2
tile_start_index = cell_tile_information[mid, 0]
if tile_index < tile_start_index:
high = mid - 1
elif tile_index > tile_start_index:
low = mid + 1
else:
cell_index = mid
is_found = True
break
if not is_found:
cell_index = low - 1
# Skip cell with 0 tile. The start index will be the same for those cells
# E.g cell 10: [305, 0], cell 11: [305, 0], cell 12: [305, 2]
while cell_particle_information[cell_index, 1] == 0:
cell_index += 1
# Get cell information
cell_tile_start_index = cell_tile_information[cell_index, 0]
cell_particle_start_index = cell_particle_information[cell_index, 0]
cell_particle_end_index = (
cell_particle_start_index + cell_particle_information[cell_index, 1]
)
# Get tile information
tile_index_cur_cell = tile_index - cell_tile_start_index
tile_particle_start_index = (
cell_particle_start_index + tile_index_cur_cell * NUM_PARTICLES_PER_TILE
)
tile_particle_end_index = tile_particle_start_index + NUM_PARTICLES_PER_TILE
if tile_particle_end_index >= cell_particle_end_index:
tile_particle_end_index = cell_particle_end_index
# Assign particles to tile
local_box = cuda.local.array((SPATIAL_DIM), NUMBA_FLOAT)
box_max = cuda.local.array((SPATIAL_DIM), NUMBA_FLOAT)
box_min = cuda.local.array((SPATIAL_DIM), NUMBA_FLOAT)
tile_particle_index = 0
sorted_matrix_index = (
tile_index * NUM_PARTICLES_PER_TILE
) # The index of sorted_particle
# first particle
particle_index = sorted_particle_index[tile_particle_start_index]
tile_list[tile_index, tile_particle_index] = particle_index
matrix_id_mapping_index[sorted_matrix_index] = particle_index
for i in range(3):
data = positions[particle_index, i]
local_box[i] = data
box_max[i] = data
box_min[i] = data
tile_particle_index += 1
sorted_matrix_index += 1
for index in range(tile_particle_start_index + 1, tile_particle_end_index):
particle_index = sorted_particle_index[index]
tile_list[tile_index, tile_particle_index] = particle_index
matrix_id_mapping_index[sorted_matrix_index] = particle_index
for i in range(SPATIAL_DIM):
data = positions[particle_index, i]
local_box[i] += data
if data > box_max[i]:
box_max[i] = data
if data < box_min[i]:
box_min[i] = data
tile_particle_index += 1
sorted_matrix_index += 1
diag = NUMBA_FLOAT(0)
for i in range(SPATIAL_DIM):
tile_box[tile_index, i] = local_box[i] / tile_particle_index
tile_box[tile_index, i + SPATIAL_DIM] = (
box_max[i] - box_min[i]
) * NUMBA_FLOAT(0.5)
diag += tile_box[tile_index, i + SPATIAL_DIM] ** 2
tile_box[tile_index, 6] = math.sqrt(diag)
# Get 3d cell index
decomposition = cell_index
num_cells_z = num_cells_vec[2]
num_cells_yz = num_cells_vec[1] * num_cells_z
cell_x = decomposition // num_cells_yz
decomposition -= cell_x * num_cells_yz
cell_y = decomposition // num_cells_z
cell_z = decomposition - cell_y * num_cells_z
tile_cell_index[tile_index, 0] = cell_x
tile_cell_index[tile_index, 1] = cell_y
tile_cell_index[tile_index, 2] = cell_z
@staticmethod
def _find_tile_neighbors_kernel(
num_cells_vec,
cell_width,
pbc_matrix,
tile_cell_index,
cell_tile_information,
tile_box,
tile_num_neighbors,
tile_neighbors,
):
tile_id = cuda.grid(1)
if tile_id >= tile_neighbors.shape[0]:
return
central_cell_index = cuda.local.array((SPATIAL_DIM), NUMBA_INT)
num_cells_x = num_cells_vec[0]
num_cells_y = num_cells_vec[1]
num_cells_z = num_cells_vec[2]
num_cells_yz = NUMBA_INT(num_cells_y * num_cells_z)
central_cell_index[0] = tile_cell_index[tile_id, 0]
central_cell_index[1] = tile_cell_index[tile_id, 1]
central_cell_index[2] = tile_cell_index[tile_id, 2]
# shared data
local_thread_x = cuda.threadIdx.x
shared_pbc_matrix = cuda.shared.array((SPATIAL_DIM), NUMBA_FLOAT)
shared_half_pbc_matrix = cuda.shared.array((SPATIAL_DIM), NUMBA_FLOAT)
if local_thread_x <= 2:
shared_pbc_matrix[local_thread_x] = pbc_matrix[
local_thread_x, local_thread_x
]
shared_half_pbc_matrix[local_thread_x] = shared_pbc_matrix[
local_thread_x
] * NUMBA_FLOAT(0.5)
neighbor_cell_index = cuda.local.array((SPATIAL_DIM), NUMBA_INT)
local_box = cuda.local.array((7), NUMBA_FLOAT)
vec = cuda.local.array((SPATIAL_DIM), NUMBA_FLOAT)
cutoff_radius = cell_width[0]
for i in range(7):
local_box[i] = tile_box[tile_id, i]
cur_neighbor_tile_index = 0
for i in range(-1, 2):
neighbor_cell_index[0] = central_cell_index[0] + i
if neighbor_cell_index[0] >= num_cells_x:
neighbor_cell_index[0] -= num_cells_x
elif neighbor_cell_index[0] < 0:
neighbor_cell_index[0] += num_cells_x
for j in range(-1, 2):
neighbor_cell_index[1] = central_cell_index[1] + j
if neighbor_cell_index[1] >= num_cells_y:
neighbor_cell_index[1] -= num_cells_y
elif neighbor_cell_index[1] < 0:
neighbor_cell_index[1] += num_cells_y
for k in range(-1, 2):
neighbor_cell_index[2] = central_cell_index[2] + k
if neighbor_cell_index[2] >= num_cells_z:
neighbor_cell_index[2] -= num_cells_z
elif neighbor_cell_index[2] < 0:
neighbor_cell_index[2] += num_cells_z
cell_index = (
neighbor_cell_index[2]
+ neighbor_cell_index[1] * num_cells_z
+ neighbor_cell_index[0] * num_cells_yz
)
cur_tile_index = cell_tile_information[cell_index, 0]
for _ in range(cell_tile_information[cell_index, 1]):
neighbor_tile_id = cur_tile_index
is_neighbor = True
r = NUMBA_FLOAT(0)
for i in range(SPATIAL_DIM):
vec[i] = abs(tile_box[neighbor_tile_id, i] - local_box[i])
if vec[i] > shared_half_pbc_matrix[i]:
vec[i] = shared_pbc_matrix[i] - vec[i]
if (
vec[i]
- local_box[i + 3]
- tile_box[neighbor_tile_id, i + 3]
>= cutoff_radius
):
is_neighbor = False
break
r += vec[i] ** 2
# is_neighbor = True
if is_neighbor:
if (
math.sqrt(r)
- local_box[6]
- tile_box[neighbor_tile_id, 6]
< cutoff_radius
):
tile_neighbors[
tile_id, cur_neighbor_tile_index
] = neighbor_tile_id
cur_neighbor_tile_index += 1
cur_tile_index += 1
tile_num_neighbors[tile_id] = cur_neighbor_tile_index
def update(self, positions: cp.ndarray) -> None:
self._num_particles = positions.shape[0]
self._positions = positions
# Encode particles
positive_positions = self._positions + self._device_half_pbc_diag
self._encode_particles(positive_positions)
# Create tile list
block_per_grid = int(np.ceil(self._num_tiles / THREAD_PER_BLOCK))
self._tile_box = cp.zeros((self._num_tiles, 7), CUPY_FLOAT)
self._tile_list = (
cp.zeros((self._num_tiles, NUM_PARTICLES_PER_TILE), CUPY_INT) - 1
)
self._tile_cell_index = cp.zeros((self._num_tiles, SPATIAL_DIM), CUPY_INT)
self._matrix_id_mapping_index = (
cp.zeros(self._num_tiles * NUM_PARTICLES_PER_TILE, CUPY_INT) - 1
)
self._construct_tile_list[block_per_grid, THREAD_PER_BLOCK](
positions,
self._sorted_particle_index,
self._cell_particle_information,
self._cell_tile_information,
self._device_num_cells_vec,
self._matrix_id_mapping_index,
self._tile_box,
self._tile_list,
self._tile_cell_index,
)
# Find tile neighbors
thread_per_block = 32
block_per_grid = int(np.ceil(self._num_tiles / thread_per_block))
self._tile_neighbors = (
cp.zeros(
(
self._num_tiles,
int(self._max_num_tiles_per_cell) * NUM_NEIGHBOR_CELLS,
),
CUPY_INT,
)
- 1
)
self._tile_num_neighbors = cp.zeros((self._num_tiles), CUPY_INT)
self._find_tile_neighbors[block_per_grid, thread_per_block](
self._device_num_cells_vec,
self._device_cell_width,
self._device_pbc_matrix,
self._tile_cell_index,
self._cell_tile_information,
self._tile_box,
self._tile_num_neighbors,
self._tile_neighbors,
)
self._tile_neighbors = cp.array(
self._tile_neighbors[:, : int(cp.max(self._tile_num_neighbors))], CUPY_INT
)
def sort_matrix(self, unsorted_matrix: cp.ndarray) -> cp.ndarray:
matrix_type = unsorted_matrix.dtype
sorted_matrix = cp.zeros(
(unsorted_matrix.shape[1], self._num_tiles * NUM_PARTICLES_PER_TILE),
matrix_type,
)
thread_per_block = NUM_PARTICLES_PER_TILE
block_per_grid = self._num_tiles
if matrix_type == CUPY_INT:
self._sort_int_matrix[block_per_grid, thread_per_block](
unsorted_matrix, self._matrix_id_mapping_index, sorted_matrix
)
elif matrix_type == CUPY_FLOAT:
self._sort_float_matrix[block_per_grid, thread_per_block](
unsorted_matrix, self._matrix_id_mapping_index, sorted_matrix
)
return sorted_matrix
@staticmethod
def _sort_matrix_kernel(unsorted_matrix, matrix_id_mapping_index, sorted_matrix):
idx = cuda.grid(1)
unsorted_index = matrix_id_mapping_index[idx]
if unsorted_index == -1:
return
for i in range(unsorted_matrix.shape[1]):
sorted_matrix[i, idx] = unsorted_matrix[unsorted_index, i]
def unsort_matrix(self, sorted_matrix: cp.ndarray) -> cp.ndarray:
matrix_type = sorted_matrix.dtype
unsorted_matrix = cp.zeros(
(self._num_particles, sorted_matrix.shape[0]), matrix_type
)
thread_per_block = NUM_PARTICLES_PER_TILE
block_per_grid = self._num_tiles
if matrix_type == CUPY_INT:
self._unsort_int_matrix[block_per_grid, thread_per_block](
sorted_matrix, self._matrix_id_mapping_index, unsorted_matrix
)
elif matrix_type == CUPY_FLOAT:
self._unsort_float_matrix[block_per_grid, thread_per_block](
sorted_matrix, self._matrix_id_mapping_index, unsorted_matrix
)
return unsorted_matrix
@staticmethod
def _unsort_matrix_kernel(sorted_matrix, matrix_id_mapping_index, unsorted_matrix):
idx = cuda.grid(1)
unsorted_index = matrix_id_mapping_index[idx]
if unsorted_index == -1:
return
for i in range(unsorted_matrix.shape[1]):
unsorted_matrix[unsorted_index, i] = sorted_matrix[i, idx]
def generate_exclusion_mask_map(
self, device_excluded_particles: cp.ndarray
) -> cp.ndarray:
sorted_positions = self.sort_matrix(self._positions)
mask_map = cp.zeros(
(self._tile_neighbors.shape[1], self._num_tiles * NUM_PARTICLES_PER_TILE),
CUPY_BIT,
)
thread_per_block = (NUM_PARTICLES_PER_TILE, 1)
block_per_grid_x = self._num_tiles
block_per_grid_y = int(
np.ceil(self._tile_neighbors.shape[1] / NUM_TILES_PER_THREAD)
)
block_per_grid = (block_per_grid_x, block_per_grid_y)
self._generate_exclusion_mask_map[block_per_grid, thread_per_block](
self._device_cutoff_radius,
self._device_pbc_matrix,
sorted_positions,
device_excluded_particles,
self._tile_neighbors,
self._matrix_id_mapping_index,
mask_map,
)
return mask_map
@staticmethod
def _generate_exclusion_mask_map_kernel(
cutoff_radius,
pbc_matrix,
sorted_positions,
excluded_particles,
tile_neighbors,
sorted_particle_index,
mask_map,
):
tile_id1 = cuda.blockIdx.x
tile_id2_start = cuda.blockIdx.y * NUM_TILES_PER_THREAD
tile_id2_end = tile_id2_start + NUM_TILES_PER_THREAD
if tile_id2_end >= tile_neighbors.shape[1]:
tile_id2_end = tile_neighbors.shape[1]
# Particle index information
local_thread_x = cuda.threadIdx.x
global_thread_x = local_thread_x + cuda.blockIdx.x * cuda.blockDim.x
# shared data
shared_pbc_matrix = cuda.shared.array((SPATIAL_DIM), NUMBA_FLOAT)
shared_half_pbc_matrix = cuda.shared.array((SPATIAL_DIM), NUMBA_FLOAT)
if local_thread_x <= 2:
shared_pbc_matrix[local_thread_x] = pbc_matrix[
local_thread_x, local_thread_x
]
shared_half_pbc_matrix[local_thread_x] = shared_pbc_matrix[
local_thread_x
] * NUMBA_FLOAT(0.5)
tile1_positions = cuda.shared.array(
(SPATIAL_DIM, NUM_PARTICLES_PER_TILE), NUMBA_FLOAT
)
tile2_positions = cuda.shared.array(
(SPATIAL_DIM, NUM_PARTICLES_PER_TILE), NUMBA_FLOAT
)
tile2_particle_index = cuda.shared.array((NUM_PARTICLES_PER_TILE), NUMBA_INT)
tile1_index = tile_id1 * NUM_PARTICLES_PER_TILE + local_thread_x
cuda.syncthreads()
# Read data
for i in range(SPATIAL_DIM):
tile1_positions[i, local_thread_x] = sorted_positions[i, tile1_index]
# Local data
local_forces = cuda.local.array((SPATIAL_DIM), NUMBA_FLOAT)
vec = cuda.local.array((SPATIAL_DIM), NUMBA_FLOAT)
cutoff_radius = cutoff_radius[0]
for i in range(SPATIAL_DIM):
local_forces[i] = 0
for tile_index in range(tile_id2_start, tile_id2_end):
tile_id2 = tile_neighbors[tile_id1, tile_index]
if tile_id2 == -1:
mask_map[tile_index, global_thread_x] = NUMBA_BIT(4294967295)
continue
tile2_index = tile_id2 * NUM_PARTICLES_PER_TILE + local_thread_x
for i in range(SPATIAL_DIM):
tile2_positions[i, local_thread_x] = sorted_positions[i, tile2_index]
tile2_particle_index[local_thread_x] = sorted_particle_index[tile2_index]
cuda.syncthreads()
exclusion_flag = NUMBA_BIT(0)
particle1 = sorted_particle_index[global_thread_x]
if particle1 == -1:
mask_map[tile_index, global_thread_x] = 4294967295 # 2**32 - 1 all 1
continue
for particle_index in range(NUM_PARTICLES_PER_TILE):
particle2 = tile2_particle_index[particle_index]
if particle2 == -1 or particle1 == particle2:
exclusion_flag = exclusion_flag ^ (1 << particle_index)
else:
for information_index in range(MAX_NUM_EXCLUDED_PARTICLES):
particle2 = excluded_particles[particle1, information_index]
if particle2 == -1:
break
elif particle2 == tile2_particle_index[particle_index]:
exclusion_flag = exclusion_flag ^ (1 << particle_index)
break
r = NUMBA_FLOAT(0)
for i in range(SPATIAL_DIM):
vec[i] = (
tile2_positions[i, particle_index]
- tile1_positions[i, local_thread_x]
)
if vec[i] < -shared_half_pbc_matrix[i]:
vec[i] += shared_pbc_matrix[i]
elif vec[i] > shared_half_pbc_matrix[i]:
vec[i] -= shared_pbc_matrix[i]
r += vec[i] ** 2
r = math.sqrt(r)
if r > cutoff_radius:
exclusion_flag = exclusion_flag ^ (1 << particle_index)
mask_map[tile_index, global_thread_x] = exclusion_flag
@property
def cell_width(self):
return self._cell_width
@property
def cutoff_radius(self):
return self._cutoff_radius
@property
def pbc_matrix(self):
return self._pbc_matrix
@property
def num_tiles(self) -> int:
return self._num_tiles
@property
def tile_list(self) -> cp.ndarray:
return self._tile_list
@property
def tile_neighbors(self) -> cp.ndarray:
return self._tile_neighbors
@property
def sorted_particle_index(self) -> cp.ndarray:
return self._sorted_particle_index
@property
def num_cells_vec(self) -> np.ndarray:
return self._num_cells_vec
@property
def device_num_cells_vec(self) -> cp.ndarray:
return self._device_num_cells_vec
if __name__ == "__main__":
import time
import mdpy as md
from cupy.cuda.nvtx import RangePush, RangePop
pdb = md.io.PDBParser(
"/home/zhenyuwei/nutstore/ZhenyuWei/Note_Research/mdpy/mdpy/benchmark/str/medium.pdb"
)
psf = md.io.PSFParser(
"/home/zhenyuwei/nutstore/ZhenyuWei/Note_Research/mdpy/mdpy/benchmark/str/medium.psf"
)
positions = cp.array(pdb.positions, CUPY_FLOAT)
positive_positions = positions + cp.array(np.diagonal(pdb.pbc_matrix)) / 2
tile_list = TileList(pdb.pbc_matrix)
tile_list.set_cutoff_radius(8)
tile_list.update(positions)