1 year ago
#370880
Lamett
Closest pair algorithm - Optimizing my code for huge sets of points
I'm trying to create an algorithm that finds the closest pair of points (2D) within a set of points. I'm using a divide and conquer method which is explained here Closest Pair of Points Algorithm.
However, my algorithm is really the best when it comes to huge sets of points, it takes a lot more time than it should.
Any idea on how to optimize my code?
#!/usr/bin/env python3
from timeit import timeit
from sys import argv
from geo.point import Point
from geo.tycat import tycat
from geo.segment import Segment
def load_instance(filename):
"""
loads .mnt file.
returns list of points.
"""
with open(filename, "r") as instance_file:
points = [Point((float(p[0]), float(p[1]))) for p in (l.split(',') for l in instance_file)]
return points
def brute(points):
d_min = float('inf')
n = len(points)
for i in range(n):
for j in range(i+1, n):
d = points[i].distance_to(points[j])
if d < d_min:
d_min = d
p1 = points[i]
p2 = points[j]
return p1, p2, d_min
def closest_pair(points_x, points_y):
n = len(points_x)
if n <= 3:
if n == 2: return points_x[0], points_x[1], points_x[0].distance_to(points_x[1])
return brute(points_x)
mid = n // 2
points_x_left = points_x[:mid]
set_x_left = set(points_x_left)
points_x_right = points_x[mid:]
midpoint = points_x[mid].coordinates[0]
points_y_left = []
points_y_right = []
for x in points_y:
if x in set_x_left:
points_y_left.append(x)
else:
points_y_right.append(x)
(p1, q1, d1) = closest_pair(points_x_left, points_y_left)
(p2, q2, d2) = closest_pair(points_x_right, points_y_right)
if d1 <= d2:
d = d1
pair_min = (p1, q1)
else:
d = d2
pair_min = (p2, q2)
(p3, q3, d3) = closest_pair_boundary(points_x, points_y, d, pair_min)
if d <= d3:
return pair_min[0], pair_min[1], d
else:
return p3, q3, d3
def closest_pair_boundary(points_x, points_y, d, pair_min):
n = len(points_x)
midx = points_x[n//2].coordinates[0]
s_y = [x for x in points_y if midx - d <= x.coordinates[0] <= midx + d]
d_min = d
m = len(s_y)
for i in range (m-1):
for j in range(i+1, min(i+7, m)):
p, q = s_y[i], s_y[j]
dist = p.distance_to(q)
if dist < d_min:
pair_min = p, q
d_min = dist
return pair_min[0], pair_min[1], d_min
def print_solution(points):
points_x = sorted(points, key=lambda p: p.coordinates[0])
points_y = sorted(points, key=lambda p: p.coordinates[1])
p1, p2, d_min = closest_pair(points_x, points_y)
print(f'{p1}; {p2}')
def main():
for instance in argv[1:]:
points = load_instance(instance)
print_solution(points)
main()
I'm using search in a set instead of a comparison because it proves more effective. Still, my code could do better. Any help? Maybe there are algorithms that are better suited for bigger sets of data? Or maybe some functions I've used don't work well with big sets or big lists. I'm just looking for any insights really.
(I know my algorithm doesn't do well enough because I have an optimal proved time I'm supposed to be as close to as possible and, while for sets of ~10000 points I am, for ~1000000 points I'm completely behind.)
Thanks for reading!
python
optimization
divide-and-conquer
0 Answers
Your Answer