""" TRACLUS: A Trajectory Clustering Algorithm (A Partition and Group Framework) Implemented for Python 3 This is an implementation of the TRACLUS algorithm as described in the paper: "Trajectory Clustering: A Partition-and-Group Framework" by Lee, Han, & Whang (2007) [http://hanj.cs.illinois.edu/pdf/sigmod07_jglee.pdf] Implementation Author: Adriel Isaiah V. Amoguis (De La Salle University) Implementation Date: 2023-03-19 This implementation was done as part of the algorithms required for the implementation author's undergraduate thesis. The implementation is not guaranteed to be bug-free and may not be optimized for certain use-cases. The implementation author is not responsible for any damages caused by the use of this implementation. Use at your own risk. End-users are encouraged to examine the code in the case of any issues. If you find any bugs, please report them to the implementation author via the repository's issues page on GitHub. """ import argparse import numpy as np from sklearn.cluster import OPTICS from sklearn.metrics.pairwise import cosine_similarity from scipy.spatial.distance import euclidean as d_euclidean import pickle import os import warnings # UTILITY FUNCTIONS def load_trajectories(filepath): """ Load the trajectories from a pickle file. """ if not os.path.exists(filepath): raise FileNotFoundError("File not found at {}".format(filepath)) with open(filepath, 'rb') as f: trajectories = pickle.load(f) return trajectories def get_point_projection_on_line(point, line): """ Get the projection of a point on a line. """ # Get the slope of the line using the start and end points line_slope = (line[-1, 1] - line[0, 1]) / (line[-1, 0] - line[0, 0]) if line[-1, 0] != line[0, 0] else np.inf # In case the slope is infinite, we can directly get the projection if np.isinf(line_slope): return np.array([line[0,0], point[1]]) # Convert the slope to a rotation matrix R = slope_to_rotation_matrix(line_slope) # Rotate the line and point rot_line = np.matmul(line, R.T) rot_point = np.matmul(point, R.T) # Get the projection proj = np.array([rot_point[0], rot_line[0,1]]) # Undo the rotation for the projection R_inverse = np.linalg.inv(R) proj = np.matmul(proj, R_inverse.T) return proj ################# EQUATIONS ################# # Euclidean Distance : Accepts two points of type np.ndarray([x,y]) # DEPRECATED IN FAVOR OF THE SCIPY IMPLEMENTATION OF THE EUCLIDEAN DISTANCE # d_euclidean = lambda p1, p2: np.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2) # Perpendicular Distance def d_perpendicular(l1, l2): """ Calculate the perpendicular distance between two lines. """ # Find the shorter line and assign that as l_shorter l_shorter = l_longer = None l1_len, l2_len = d_euclidean(l1[0], l1[-1]), d_euclidean(l2[0], l2[-1]) if l1_len < l2_len: l_shorter = l1 l_longer = l2 else: l_shorter = l2 l_longer = l1 ps = get_point_projection_on_line(l_shorter[0], l_longer) pe = get_point_projection_on_line(l_shorter[-1], l_longer) lehmer_1 = d_euclidean(l_shorter[0], ps) lehmer_2 = d_euclidean(l_shorter[-1], pe) if lehmer_1 == 0 and lehmer_2 == 0: return 0 return (lehmer_1**2 + lehmer_2**2) / (lehmer_1 + lehmer_2)#, ps, pe, l_shorter[0], l_shorter[-1] # Parallel Distance def d_parallel(l1, l2): """ Calculate the parallel distance between two lines. """ # Find the shorter line and assign that as l_shorter l_shorter = l_longer = None l1_len, l2_len = d_euclidean(l1[0], l1[-1]), d_euclidean(l2[0], l2[-1]) if l1_len < l2_len: l_shorter = l1 l_longer = l2 else: l_shorter = l2 l_longer = l1 ps = get_point_projection_on_line(l_shorter[0], l_longer) pe = get_point_projection_on_line(l_shorter[-1], l_longer) parallel_1 = min(d_euclidean(l_longer[0], ps), d_euclidean(l_longer[-1], ps)) parallel_2 = min(d_euclidean(l_longer[0], pe), d_euclidean(l_longer[-1], pe)) return min(parallel_1, parallel_2) # Angular Distance def d_angular(l1, l2, directional=True): """ Calculate the angular distance between two lines. """ # Find the shorter line and assign that as l_shorter l_shorter = l_longer = None l1_len, l2_len = d_euclidean(l1[0], l1[-1]), d_euclidean(l2[0], l2[-1]) if l1_len < l2_len: l_shorter = l1 l_longer = l2 else: l_shorter = l2 l_longer = l1 # Get the minimum intersecting angle between both lines shorter_slope = (l_shorter[-1,1] - l_shorter[0,1]) / (l_shorter[-1,0] - l_shorter[0,0]) if l_shorter[-1,0] - l_shorter[0,0] != 0 else np.inf longer_slope = (l_longer[-1,1] - l_longer[0,1]) / (l_longer[-1,0] - l_longer[0,0]) if l_longer[-1,0] - l_longer[0,0] != 0 else np.inf # The case of a vertical line theta = None if np.isinf(shorter_slope): # Get the angle of the longer line with the x-axis and subtract it from 90 degrees tan_theta0 = longer_slope tan_theta1 = tan_theta0 * -1 theta0 = np.abs(np.arctan(tan_theta0)) theta1 = np.abs(np.arctan(tan_theta1)) theta = min(theta0, theta1) elif np.isinf(longer_slope): # Get the angle of the shorter line with the x-axis and subtract it from 90 degrees tan_theta0 = shorter_slope tan_theta1 = tan_theta0 * -1 theta0 = np.abs(np.arctan(tan_theta0)) theta1 = np.abs(np.arctan(tan_theta1)) theta = min(theta0, theta1) else: tan_theta0 = (shorter_slope - longer_slope) / (1 + shorter_slope * longer_slope) tan_theta1 = tan_theta0 * -1 theta0 = np.abs(np.arctan(tan_theta0)) theta1 = np.abs(np.arctan(tan_theta1)) theta = min(theta0, theta1) if directional: return np.sin(theta) * d_euclidean(l_longer[0], l_longer[-1]) if 0 <= theta < (90 * np.pi / 180): return np.sin(theta) * d_euclidean(l_longer[0], l_longer[-1]) elif (90 * np.pi / 180) <= theta <= np.pi: return np.sin(theta) else: raise ValueError("Theta is not in the range of 0 to 180 degrees.") # Total Trajectory Distance def distance(l1, l2, directional=True, w_perpendicular=1, w_parallel=1, w_angular=1): """ Get the total trajectory distance using all three distance formulas. """ perpendicular_distance = d_perpendicular(l1, l2) parallel_distance = d_parallel(l1, l2) angular_distance = d_angular(l1, l2, directional=directional) return (w_perpendicular * perpendicular_distance) + (w_parallel * parallel_distance) + (w_angular * angular_distance) # Minimum Description Length def minimum_desription_length(start_idx, curr_idx, trajectory, w_angular=1, w_perpendicular=1, par=True, directional=True): """ Calculate the minimum description length. """ LH = LDH = 0 for i in range(start_idx, curr_idx-1): ed = d_euclidean(trajectory[i], trajectory[i+1]) # print("ed:", ed) LH += max(0, np.log2(ed, where=ed>0)) if par: for j in range(start_idx, i-1): # print() # print(np.array([trajectory[start_idx], trajectory[i]])) # print(np.array([trajectory[j], trajectory[j+1]])) LDH += w_perpendicular * d_perpendicular(np.array([trajectory[start_idx], trajectory[i]]), np.array([trajectory[j], trajectory[j+1]])) LDH += w_angular * d_angular(np.array([trajectory[start_idx], trajectory[i]]), np.array([trajectory[j], trajectory[j+1]]), directional=directional) # print("LDH:", LDH) if par: return LDH + LH return LH # Slope to rotation matrix def slope_to_rotation_matrix(slope): """ Convert slope to rotation matrix. """ return np.array([[1, slope], [-slope, 1]]) ############################################# def partition(trajectory, directional=True, progress_bar=False, edis=30, w_perpendicular=1, w_angular=1): """ Partition a trajectory into segments. """ # Ensure that the trajectory is a numpy array of shape (n, 2) if not isinstance(trajectory, np.ndarray): raise TypeError("Trajectory must be a numpy array") elif trajectory.shape[1] != 2: raise ValueError("Trajectory must be a numpy array of shape (n, 2)") # Initialize the characteristic points, add the first point as a characteristic point cp_indices = [] cp_indices.append(0) traj_len = trajectory.shape[0] start_idx = 0 length = 1 while start_idx + length < traj_len: if progress_bar: print(f'\r{round(((start_idx + length) / traj_len) * 100, 2)}%', end='') # print(f'Current Index: {start_idx + length}, Trajectory Length: {traj_len}') curr_idx = start_idx + length # print(start_idx, curr_idx) # print(f"Current Index: {curr_idx}, Current point: {trajectory[curr_idx]}") cost_par = minimum_desription_length(start_idx, curr_idx, trajectory, w_angular=w_angular, w_perpendicular=w_perpendicular, directional=directional) cost_nopar = minimum_desription_length(start_idx, curr_idx, trajectory, par=False, directional=directional) # cost_par += 0 if 1 - cos == 0 else w_feats / (1 - cos) # print(f'Cost with partition: {cost_par}, Cost without partition: {cost_nopar}') if cost_par > cost_nopar and d_euclidean(trajectory[start_idx], trajectory[curr_idx]) > edis: # print('edp:', d_euclidean(trajectory[start_idx], trajectory[curr_idx])) # print(f"Added characteristic point: {trajectory[curr_idx-1]} with index {curr_idx-1}") cp_indices.append(curr_idx-1) start_idx = curr_idx-1 length = 1 else: length += 1 # Add last point to characteristic points cp_indices.append(len(trajectory) - 1) # print(cp_indices) return np.array([trajectory[i] for i in cp_indices]), cp_indices # Create the script version that takes in a file path for inputs if __name__ == "__main__": # Parse the arguments parser = argparse.ArgumentParser(description="Trajectory Clustering Algorithm") parser.add_argument("input_file", help="The input file path (pickle format)") parser.add_argument("-p", "--progress_bar", help="Show the progress bar", action="store_true") args = parser.parse_args() # Load the trajectories trajectories = load_trajectories(args.input_file) # Run the partition algorithm partitions, indices = tr.partition(points, progress_bar=args.progress_bar, w_perpendicular=100, w_angular=10)