Viewing File: /home/ubuntu/.local/lib/python3.10/site-packages/sklearn/neighbors/_ball_tree.pyx.tp

{{py:

# Generated file: _ball_tree.pyx

implementation_specific_values = [
    # The values are arranged as follows:
    #
    #       name_suffix, INPUT_DTYPE_t, INPUT_DTYPE
    #
    ('64', 'float64_t', 'np.float64'),
    ('32', 'float32_t', 'np.float32')
]

# Author: Jake Vanderplas <vanderplas@astro.washington.edu>
# License: BSD 3 clause

}}


__all__ = ['BallTree', 'BallTree64', 'BallTree32']

{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}

DOC_DICT{{name_suffix}} = {
    'BinaryTree': 'BallTree{{name_suffix}}',
    'binary_tree': 'ball_tree{{name_suffix}}',
}

VALID_METRICS{{name_suffix}} = [
    'BrayCurtisDistance{{name_suffix}}',
    'CanberraDistance{{name_suffix}}',
    'ChebyshevDistance{{name_suffix}}',
    'DiceDistance{{name_suffix}}',
    'EuclideanDistance{{name_suffix}}',
    'HammingDistance{{name_suffix}}',
    'HaversineDistance{{name_suffix}}',
    'JaccardDistance{{name_suffix}}',
    'MahalanobisDistance{{name_suffix}}',
    'ManhattanDistance{{name_suffix}}',
    'MinkowskiDistance{{name_suffix}}',
    'PyFuncDistance{{name_suffix}}',
    'RogersTanimotoDistance{{name_suffix}}',
    'RussellRaoDistance{{name_suffix}}',
    'SEuclideanDistance{{name_suffix}}',
    'SokalMichenerDistance{{name_suffix}}',
    'SokalSneathDistance{{name_suffix}}',
    'WMinkowskiDistance{{name_suffix}}',
]

{{endfor}}

include "_binary_tree.pxi"

{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}

# Inherit BallTree{{name_suffix}} from BinaryTree{{name_suffix}}
cdef class BallTree{{name_suffix}}(BinaryTree{{name_suffix}}):
    __doc__ = CLASS_DOC.format(**DOC_DICT{{name_suffix}})
    pass

{{endfor}}


#----------------------------------------------------------------------
# The functions below specialized the Binary Tree as a Ball Tree
#
#   Note that these functions use the concept of "reduced distance".
#   The reduced distance, defined for some metrics, is a quantity which
#   is more efficient to compute than the distance, but preserves the
#   relative rankings of the true distance.  For example, the reduced
#   distance for the Euclidean metric is the squared-euclidean distance.
#   For some metrics, the reduced distance is simply the distance.

{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}

cdef int allocate_data{{name_suffix}}(
    BinaryTree{{name_suffix}} tree,
    intp_t n_nodes,
    intp_t n_features,
) except -1:
    """Allocate arrays needed for the KD Tree"""
    tree.node_bounds = np.zeros((1, n_nodes, n_features), dtype={{INPUT_DTYPE}})
    return 0


cdef int init_node{{name_suffix}}(
    BinaryTree{{name_suffix}} tree,
    NodeData_t[::1] node_data,
    intp_t i_node,
    intp_t idx_start,
    intp_t idx_end,
) except -1:
    """Initialize the node for the dataset stored in tree.data"""
    cdef intp_t n_features = tree.data.shape[1]
    cdef intp_t n_points = idx_end - idx_start

    cdef intp_t i, j
    cdef float64_t radius
    cdef const {{INPUT_DTYPE_t}} *this_pt

    cdef intp_t* idx_array = &tree.idx_array[0]
    cdef const {{INPUT_DTYPE_t}}* data = &tree.data[0, 0]
    cdef {{INPUT_DTYPE_t}}* centroid = &tree.node_bounds[0, i_node, 0]

    cdef bint with_sample_weight = tree.sample_weight is not None
    cdef const {{INPUT_DTYPE_t}}* sample_weight
    cdef float64_t sum_weight_node
    if with_sample_weight:
        sample_weight = &tree.sample_weight[0]

    # determine Node centroid
    for j in range(n_features):
        centroid[j] = 0

    if with_sample_weight:
        sum_weight_node = 0
        for i in range(idx_start, idx_end):
            sum_weight_node += sample_weight[idx_array[i]]
            this_pt = data + n_features * idx_array[i]
            for j from 0 <= j < n_features:
                centroid[j] += this_pt[j] * sample_weight[idx_array[i]]

        for j in range(n_features):
            centroid[j] /= sum_weight_node
    else:
        for i in range(idx_start, idx_end):
            this_pt = data + n_features * idx_array[i]
            for j from 0 <= j < n_features:
                centroid[j] += this_pt[j]

        for j in range(n_features):
            centroid[j] /= n_points

    # determine Node radius
    radius = 0
    for i in range(idx_start, idx_end):
        radius = fmax(radius,
                      tree.rdist(centroid,
                                 data + n_features * idx_array[i],
                                 n_features))

    node_data[i_node].radius = tree.dist_metric._rdist_to_dist(radius)
    node_data[i_node].idx_start = idx_start
    node_data[i_node].idx_end = idx_end
    return 0


cdef inline float64_t min_dist{{name_suffix}}(
    BinaryTree{{name_suffix}} tree,
    intp_t i_node,
    const {{INPUT_DTYPE_t}}* pt,
) except -1 nogil:
    """Compute the minimum distance between a point and a node"""
    cdef float64_t dist_pt = tree.dist(pt, &tree.node_bounds[0, i_node, 0],
                                     tree.data.shape[1])
    return fmax(0, dist_pt - tree.node_data[i_node].radius)


cdef inline float64_t max_dist{{name_suffix}}(
    BinaryTree{{name_suffix}} tree,
    intp_t i_node,
    const {{INPUT_DTYPE_t}}* pt,
) except -1:
    """Compute the maximum distance between a point and a node"""
    cdef float64_t dist_pt = tree.dist(pt, &tree.node_bounds[0, i_node, 0],
                                     tree.data.shape[1])
    return dist_pt + tree.node_data[i_node].radius


cdef inline int min_max_dist{{name_suffix}}(
    BinaryTree{{name_suffix}} tree,
    intp_t i_node,
    const {{INPUT_DTYPE_t}}* pt,
    float64_t* min_dist,
    float64_t* max_dist,
) except -1 nogil:
    """Compute the minimum and maximum distance between a point and a node"""
    cdef float64_t dist_pt = tree.dist(pt, &tree.node_bounds[0, i_node, 0],
                                     tree.data.shape[1])
    cdef float64_t rad = tree.node_data[i_node].radius
    min_dist[0] = fmax(0, dist_pt - rad)
    max_dist[0] = dist_pt + rad
    return 0


cdef inline float64_t min_rdist{{name_suffix}}(
    BinaryTree{{name_suffix}} tree,
    intp_t i_node,
    const {{INPUT_DTYPE_t}}* pt,
) except -1 nogil:
    """Compute the minimum reduced-distance between a point and a node"""
    if tree.euclidean:
        return euclidean_dist_to_rdist{{name_suffix}}(
            min_dist{{name_suffix}}(tree, i_node, pt)
        )
    else:
        return tree.dist_metric._dist_to_rdist(
            min_dist{{name_suffix}}(tree, i_node, pt)
        )


cdef inline float64_t max_rdist{{name_suffix}}(
    BinaryTree{{name_suffix}} tree,
    intp_t i_node,
    const {{INPUT_DTYPE_t}}* pt,
) except -1:
    """Compute the maximum reduced-distance between a point and a node"""
    if tree.euclidean:
        return euclidean_dist_to_rdist{{name_suffix}}(
            max_dist{{name_suffix}}(tree, i_node, pt)
        )
    else:
        return tree.dist_metric._dist_to_rdist(
            max_dist{{name_suffix}}(tree, i_node, pt)
        )


cdef inline float64_t min_dist_dual{{name_suffix}}(
    BinaryTree{{name_suffix}} tree1,
    intp_t i_node1,
    BinaryTree{{name_suffix}} tree2,
    intp_t i_node2,
) except -1:
    """compute the minimum distance between two nodes"""
    cdef float64_t dist_pt = tree1.dist(&tree2.node_bounds[0, i_node2, 0],
                                      &tree1.node_bounds[0, i_node1, 0],
                                      tree1.data.shape[1])
    return fmax(0, (dist_pt - tree1.node_data[i_node1].radius
                    - tree2.node_data[i_node2].radius))


cdef inline float64_t max_dist_dual{{name_suffix}}(
    BinaryTree{{name_suffix}} tree1,
    intp_t i_node1,
    BinaryTree{{name_suffix}} tree2,
    intp_t i_node2,
) except -1:
    """compute the maximum distance between two nodes"""
    cdef float64_t dist_pt = tree1.dist(&tree2.node_bounds[0, i_node2, 0],
                                      &tree1.node_bounds[0, i_node1, 0],
                                      tree1.data.shape[1])
    return (dist_pt + tree1.node_data[i_node1].radius
            + tree2.node_data[i_node2].radius)


cdef inline float64_t min_rdist_dual{{name_suffix}}(
    BinaryTree{{name_suffix}} tree1,
    intp_t i_node1,
    BinaryTree{{name_suffix}} tree2,
    intp_t i_node2,
) except -1:
    """compute the minimum reduced distance between two nodes"""
    if tree1.euclidean:
        return euclidean_dist_to_rdist{{name_suffix}}(
            min_dist_dual{{name_suffix}}(tree1, i_node1, tree2, i_node2)
        )
    else:
        return tree1.dist_metric._dist_to_rdist(
            min_dist_dual{{name_suffix}}(tree1, i_node1, tree2, i_node2)
        )


cdef inline float64_t max_rdist_dual{{name_suffix}}(
    BinaryTree{{name_suffix}} tree1,
    intp_t i_node1,
    BinaryTree{{name_suffix}} tree2,
    intp_t i_node2,
) except -1:
    """compute the maximum reduced distance between two nodes"""
    if tree1.euclidean:
        return euclidean_dist_to_rdist{{name_suffix}}(
            max_dist_dual{{name_suffix}}(tree1, i_node1, tree2, i_node2)
        )
    else:
        return tree1.dist_metric._dist_to_rdist(
            max_dist_dual{{name_suffix}}(tree1, i_node1, tree2, i_node2)
        )

{{endfor}}


class BallTree(BallTree64):
    __doc__ = CLASS_DOC.format(BinaryTree="BallTree")
    pass
Back to Directory File Manager