• Manhattan Distance:
def manhattan(user1, user2): # in python
    distance = 0
    for key in user1:
        if key in user2:
            distance += abs(user[key] - user2[key])
    return distance
  • Euclidian Distance:
def euclidian(user1, user2): # in python
    distance = 0
    for key in user1:
        if key in user2:
            distance += pow(pow(abs(user1[key] - user2[key]), 2), 1/2)
    return distance
  • Find Nearest Neighbor:
def findNearestNeighbor(username, users):
    distances = []
    for user in users:
        if user != username:
            distance = manhattan(users[user], user[username])
    distances.sort()
    return distances
  • Minkowski Distance:
def minkowski(user1, user2, r):
    distance = 0
    for key in user1:
        if key in user2:
            distance += pow(pow(abs(user1[key] - user2[key]), r), 1/r)
    return distance
  • Pearson Correlation Coefficient:
def pearson_correlation_coefficient(result1, result2):
    sum_x = 0
    sum_y = 0
    sum_xy = 0
    total = 0
    sum_sq_x = 0
    sum_sq_y = 0
    for key in result1:
        if key in result2:
            sum_x += result1[key]
            sum_y += result2[key]
            sum_xy += result1[key] * result2[key]
            total += 1
            sum_sq_x += result1[key] ** result1[key]
            sum_sq_y += result2[key] ** result2[key]
    numerator = sum_xy - (sum_x * sum_y / total)
    denominator = pow((sum_sq_x - (pow(sum_x, 2) / total), 1/2) * pow((sum_sq_y - (pow(sum_y, 2) / total), 1/2)
    if denominator == 0:
        return 0
    else:
        return numerator / denominator
  • Cosine Similarity:
    This works great on zero/null values contained in a set
def cosine_similarity(result1, result2):
    sum_sq_x = 0
    sum_sq_y = 0
    numerator = 0
    for key in result1:
        x = result1[key]
        y = result2[key]
        if x == Null:
            x = 0
        if y == Null:
            y = 0
        sum_sq_x += x ** x
        sum_sq_y += y ** y
        numerator += x * y
    if sum_sq_x == 0 or sum_sq_y == 0:
        return 0
    else:
        return numerator / (sum_sq_x * sum_sq_y)
  • K-Nearest Neighbors: