-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.py
147 lines (123 loc) · 5.11 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import cv2
import numpy as np
import matplotlib.pyplot as plt
from matchers import match_blobs_sumassign, match_blobs_ransac
from blobwatch import BlobWatch
from helperutils import load_intrinsics, load_extrinsics
from constutils import triangulate_dlt
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
sc = ax.scatter([], [], [], c='red', s=20)
plt.ion()
ax.set_xlim(-1, 1)
ax.set_ylim(-0, 1)
ax.set_zlim(-1, 1)
frame_width = 2560
frame_height = 720
split_width = 1280
def main_demo():
#init camera
cap = cv2.VideoCapture(2, cv2.CAP_DSHOW)
#camera settings
cap.set(cv2.CAP_PROP_AUTO_EXPOSURE, 0)
cap.set(cv2.CAP_PROP_EXPOSURE, -6)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height)
#init blob find/track
bwL = BlobWatch(pixel_threshold=240, min_area=50, max_area=3000, pyramid_scales = [1.5, 1.0, 0.5])
bwR = BlobWatch(pixel_threshold=240, min_area=50, max_area=3000, pyramid_scales = [1.5, 1.0, 0.5])
#load camera parameters from calibration
K0, dist0 = load_intrinsics("camera_parameters/camera0_intrinsics.dat")
K1, dist1 = load_intrinsics("camera_parameters/camera1_intrinsics.dat")
R0, T0 = load_extrinsics("camera_parameters/camera0_rot_trans.dat")
R1, T1 = load_extrinsics("camera_parameters/camera1_rot_trans.dat")
# init projection matrices
Rt0 = np.concatenate([np.eye(3), [[0], [0], [0]]], axis=-1) #the first camera has no rotation or translation (AKA the origin)
P0 = K0 @ Rt0
Rt1 = np.hstack([R1, T1])
P1 = K1 @ Rt1
#aLL the fun starts here :3
while True:
ret, frame = cap.read()
if not ret:
break
#the stereo camera im sends both frames as a single frame (concatnated together) so here we split them
left_img = frame[:, :split_width]
right_img = frame[:, split_width:]
grayL = cv2.cvtColor(left_img, cv2.COLOR_BGR2GRAY)
grayR = cv2.cvtColor(right_img, cv2.COLOR_BGR2GRAY)
observationL = bwL.process_frame(grayL)
observationR = bwR.process_frame(grayR)
matches, unmatched_left, unmatched_right = match_blobs_ransac(
observationL.blobs,
observationR.blobs,
ransac_threshold = 0.1,
max_match_distance=500,
size_threshold = 0.5,
)
combined = np.hstack((left_img.copy(), right_img.copy()))
#just for visualization, no fun math happens here
for blob in observationL.blobs:
p1 = (int(blob.left), int(blob.top))
p2 = (int(blob.left + blob.width), int(blob.top + blob.height))
cv2.rectangle(combined, p1, p2, (0, 255, 0), 2)
text_pos = (int(blob.x), int(blob.y))
cv2.putText(
combined,
f"LID:{blob.blob_id}",
text_pos,
cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 255, 0), 1
)
#also just for vis
for blob in observationR.blobs:
p1 = (int(blob.left + split_width), int(blob.top))
p2 = (int(blob.left + blob.width + split_width), int(blob.top + blob.height))
cv2.rectangle(combined, p1, p2, (255, 0, 0), 2)
text_pos = (int(blob.x + split_width), int(blob.y))
cv2.putText(
combined,
f"RID:{blob.blob_id}",
text_pos,
cv2.FONT_HERSHEY_SIMPLEX,
0.5, (255, 0, 0), 1
)
#MORE VIS!!
for (i, j) in matches:
left_blob = observationL.blobs[i]
right_blob = observationR.blobs[j]
pt_left = (int(left_blob.x), int(left_blob.y))
pt_right = (int(right_blob.x + split_width), int(right_blob.y))
cv2.line(combined, pt_left, pt_right, (0, 0, 255), 2)
# I should prob turn this into its own function, but it converts camera space to camera space in meters
dim3_points = []
for (i, j) in matches:
left_blob = observationL.blobs[i]
right_blob = observationR.blobs[j]
x0 = (left_blob.x, left_blob.y)
x1 = (right_blob.x, right_blob.y)
T1_meters = T1 / 1000.0
Rt1 = np.hstack([R1, T1_meters])
P1 = K1 @ Rt1
X, Y, Z = triangulate_dlt(P0, P1, x0, x1)
print(left_blob.vx)
dim3_points.append((X, Y, Z))
# This gets the points into the 3d visualazer
dim3_points = np.array(dim3_points)
if len(dim3_points) > 0:
sc._offsets3d = (dim3_points[:, 0],
dim3_points[:, 2],
-dim3_points[:, 1])
if len(dim3_points) > 1:
print(abs(dim3_points[0][2]-dim3_points[1][2]))
view_resize = 2
combined = cv2.resize(combined, (int(combined.shape[1] / view_resize), int(combined.shape[0] / view_resize)))
cv2.imshow("Blob Matches", combined)
plt.draw()
plt.pause(0.01)
if cv2.waitKey(1) & 0xFF == 27:
break
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main_demo()