Implementation#
import numpy as np
from rich.pretty import pprint
from rich import print
def euclidean_distance(
x_1: np.ndarray, x_2: np.ndarray, squared: bool = False
) -> float:
if not squared:
_euclidean_distance = np.sum(np.square(x_1 - x_2))
else:
_euclidean_distance = np.sqrt(np.sum(np.square(x_1 - x_2)))
return _euclidean_distance
def cosine_similarity(x_1: np.ndarray, x_2: np.ndarray) -> float:
numerator = np.dot(x_1, x_2)
origin = np.zeros(shape=(x_1.shape)) # origin is a vector of zeros
norm_x1 = np.linalg.norm(x_1)
norm_x2 = np.linalg.norm(x_2)
np.testing.assert_allclose(norm_x1, euclidean_distance(x_1, origin, squared=True))
denominator = norm_x1 * norm_x2
_cosine_similarity = numerator / denominator
return _cosine_similarity
x1 = np.asarray([1, 2, 3])
x2 = np.asarray([3, 4, 5])
my_cosine_sim = cosine_similarity(x1, x2)
print(f"My cosine similarity: {my_cosine_sim}")
My cosine similarity: 0.9827076298239908
from sklearn.metrics.pairwise import cosine_similarity
sklearn_cosine_sim = cosine_similarity(x1.reshape(1, -1), x2.reshape(1, -1))
print(f"Sklearn cosine similarity: {sklearn_cosine_sim}")
Sklearn cosine similarity: [[0.98270763]]
assert np.allclose(
my_cosine_sim, sklearn_cosine_sim
), "The two cosine similarity values are not equal."
Scikit-learn’s implementation returns the kernel matrix, which is a matrix of the pairwise cosine similarities between the vectors in the input matrix.
X1 = np.asarray([[1, 2, 3], [5, 6, 12]])
X2 = np.asarray([[3, 4, 5], [111, 2222, 333]])
sklearn_cosine_sim = cosine_similarity(X1, X2)
pprint(sklearn_cosine_sim)
array([[0.98270763, 0.65985027], │ [0.9778523 , 0.55522241]])