import os import json from PIL import Image import numpy as np import random import torch from torch.utils.data import Dataset class CARLA_Data(Dataset): def __init__(self, root, config): self.seq_len = config.seq_len self.pred_len = config.pred_len self.ignore_sides = config.ignore_sides self.ignore_rear = config.ignore_rear self.input_resolution = config.input_resolution self.scale = config.scale self.lidar = [] self.front = [] self.left = [] self.right = [] self.rear = [] self.x = [] self.y = [] self.x_command = [] self.y_command = [] self.theta = [] self.steer = [] self.throttle = [] self.brake = [] self.command = [] self.velocity = [] for sub_root in root: preload_file = os.path.join(sub_root, 'rg_lidar_diag_pl_'+str(self.seq_len)+'_'+str(self.pred_len)+'.npy') # dump to npy if no preload if not os.path.exists(preload_file): preload_front = [] preload_left = [] preload_right = [] preload_rear = [] preload_lidar = [] preload_x = [] preload_y = [] preload_x_command = [] preload_y_command = [] preload_theta = [] preload_steer = [] preload_throttle = [] preload_brake = [] preload_command = [] preload_velocity = [] # list sub-directories in root root_files = os.listdir(sub_root) routes = [folder for folder in root_files if not os.path.isfile(os.path.join(sub_root,folder))] for route in routes: route_dir = os.path.join(sub_root, route) print(route_dir) # subtract final frames (pred_len) since there are no future waypoints # first frame of sequence not used num_seq = (len(os.listdir(route_dir+"/rgb_front/"))-self.pred_len-2)//self.seq_len for seq in range(num_seq): fronts = [] lefts = [] rights = [] rears = [] lidars = [] xs = [] ys = [] thetas = [] # read files sequentially (past and current frames) for i in range(self.seq_len): # images filename = f"{str(seq*self.seq_len+1+i).zfill(4)}.png" fronts.append(route_dir+"/rgb_front/"+filename) lefts.append(route_dir+"/rgb_left/"+filename) rights.append(route_dir+"/rgb_right/"+filename) rears.append(route_dir+"/rgb_rear/"+filename) # point cloud lidars.append(route_dir + f"/lidar/{str(seq*self.seq_len+1+i).zfill(4)}.npy") # position with open(route_dir + f"/measurements/{str(seq*self.seq_len+1+i).zfill(4)}.json", "r") as read_file: data = json.load(read_file) xs.append(data['x']) ys.append(data['y']) thetas.append(data['theta']) # get control value of final frame in sequence preload_x_command.append(data['x_command']) preload_y_command.append(data['y_command']) preload_steer.append(data['steer']) preload_throttle.append(data['throttle']) preload_brake.append(data['brake']) preload_command.append(data['command']) preload_velocity.append(data['speed']) # read files sequentially (future frames) for i in range(self.seq_len, self.seq_len + self.pred_len): # point cloud lidars.append(route_dir + f"/lidar/{str(seq*self.seq_len+1+i).zfill(4)}.npy") # position with open(route_dir + f"/measurements/{str(seq*self.seq_len+1+i).zfill(4)}.json", "r") as read_file: data = json.load(read_file) xs.append(data['x']) ys.append(data['y']) # fix for theta=nan in some measurements if np.isnan(data['theta']): thetas.append(0) else: thetas.append(data['theta']) preload_front.append(fronts) preload_left.append(lefts) preload_right.append(rights) preload_rear.append(rears) preload_lidar.append(lidars) preload_x.append(xs) preload_y.append(ys) preload_theta.append(thetas) # dump to npy preload_dict = {} preload_dict['front'] = preload_front preload_dict['left'] = preload_left preload_dict['right'] = preload_right preload_dict['rear'] = preload_rear preload_dict['lidar'] = preload_lidar preload_dict['x'] = preload_x preload_dict['y'] = preload_y preload_dict['x_command'] = preload_x_command preload_dict['y_command'] = preload_y_command preload_dict['theta'] = preload_theta preload_dict['steer'] = preload_steer preload_dict['throttle'] = preload_throttle preload_dict['brake'] = preload_brake preload_dict['command'] = preload_command preload_dict['velocity'] = preload_velocity np.save(preload_file, preload_dict) # load from npy if available preload_dict = np.load(preload_file, allow_pickle=True) self.front += preload_dict.item()['front'] self.left += preload_dict.item()['left'] self.right += preload_dict.item()['right'] self.rear += preload_dict.item()['rear'] self.lidar += preload_dict.item()['lidar'] self.x += preload_dict.item()['x'] self.y += preload_dict.item()['y'] self.x_command += preload_dict.item()['x_command'] self.y_command += preload_dict.item()['y_command'] self.theta += preload_dict.item()['theta'] self.steer += preload_dict.item()['steer'] self.throttle += preload_dict.item()['throttle'] self.brake += preload_dict.item()['brake'] self.command += preload_dict.item()['command'] self.velocity += preload_dict.item()['velocity'] print("Preloading " + str(len(preload_dict.item()['front'])) + " sequences from " + preload_file) def __len__(self): """Returns the length of the dataset. """ return len(self.lidar) def __getitem__(self, index): """Returns the item at index idx. """ data = dict() data['fronts'] = [] data['lefts'] = [] data['rights'] = [] data['rears'] = [] data['lidars'] = [] data['bev_points'] = [] data['cam_points'] = [] seq_fronts = self.front[index] seq_lefts = self.left[index] seq_rights = self.right[index] seq_rears = self.rear[index] seq_lidars = self.lidar[index] seq_x = self.x[index] seq_y = self.y[index] seq_theta = self.theta[index] full_lidar = [] pos = [] neg = [] for i in range(self.seq_len): data['fronts'].append(torch.from_numpy(np.array( scale_and_crop_image(Image.open(seq_fronts[i]), scale=self.scale, crop=self.input_resolution)))) if not self.ignore_sides: data['lefts'].append(torch.from_numpy(np.array( scale_and_crop_image(Image.open(seq_lefts[i]), scale=self.scale, crop=self.input_resolution)))) data['rights'].append(torch.from_numpy(np.array( scale_and_crop_image(Image.open(seq_rights[i]), scale=self.scale, crop=self.input_resolution)))) if not self.ignore_rear: data['rears'].append(torch.from_numpy(np.array( scale_and_crop_image(Image.open(seq_rears[i]), scale=self.scale, crop=self.input_resolution)))) lidar_unprocessed = np.load(seq_lidars[i])[...,:3] # lidar: XYZI full_lidar.append(lidar_unprocessed) # fix for theta=nan in some measurements if np.isnan(seq_theta[i]): seq_theta[i] = 0. ego_x = seq_x[i] ego_y = seq_y[i] ego_theta = seq_theta[i] # future frames for i in range(self.seq_len, self.seq_len + self.pred_len): lidar_unprocessed = np.load(seq_lidars[i]) full_lidar.append(lidar_unprocessed) # lidar and waypoint processing to local coordinates waypoints = [] for i in range(self.seq_len + self.pred_len): # waypoint is the transformed version of the origin in local coordinates # we use 90-theta instead of theta # LBC code uses 90+theta, but x is to the right and y is downwards here local_waypoint = transform_2d_points(np.zeros((1,3)), np.pi/2-seq_theta[i], -seq_x[i], -seq_y[i], np.pi/2-ego_theta, -ego_x, -ego_y) waypoints.append(tuple(local_waypoint[0,:2])) # process only past lidar point clouds if i < self.seq_len: # convert coordinate frame of point cloud full_lidar[i][:,1] *= -1 # inverts x, y full_lidar[i] = transform_2d_points(full_lidar[i], np.pi/2-seq_theta[i], -seq_x[i], -seq_y[i], np.pi/2-ego_theta, -ego_x, -ego_y) lidar_processed = lidar_to_histogram_features(full_lidar[i], crop=self.input_resolution) data['lidars'].append(lidar_processed) bev_points, cam_points = lidar_bev_cam_correspondences(full_lidar[i], crop=self.input_resolution) data['bev_points'].append(bev_points) data['cam_points'].append(cam_points) data['waypoints'] = waypoints # convert x_command, y_command to local coordinates # taken from LBC code (uses 90+theta instead of theta) R = np.array([ [np.cos(np.pi/2+ego_theta), -np.sin(np.pi/2+ego_theta)], [np.sin(np.pi/2+ego_theta), np.cos(np.pi/2+ego_theta)] ]) local_command_point = np.array([self.x_command[index]-ego_x, self.y_command[index]-ego_y]) local_command_point = R.T.dot(local_command_point) data['target_point'] = tuple(local_command_point) data['steer'] = self.steer[index] data['throttle'] = self.throttle[index] data['brake'] = self.brake[index] data['command'] = self.command[index] data['velocity'] = self.velocity[index] return data def correspondences_at_one_scale(valid_bev_points, valid_cam_points, crop, scale): """ Compute projections between LiDAR BEV and image space """ cam_to_bev_proj_locs = np.zeros((crop, crop, 5, 2)) bev_to_cam_proj_locs = np.zeros((crop, crop, 5, 2)) tmp_bev = np.empty((crop, crop, ), dtype=object) tmp_cam = np.empty((crop, crop, ), dtype=object) for i in range(crop): for j in range(crop): tmp_bev[i,j] = [] tmp_cam[i,j] = [] for i in range(valid_bev_points.shape[0]): tmp_bev[valid_bev_points[i][0]//scale, valid_bev_points[i][1]//scale].append(valid_cam_points[i]//scale) tmp_cam[valid_cam_points[i][0]//scale, valid_cam_points[i][1]//scale].append(valid_bev_points[i]//scale) for i in range(crop): for j in range(crop): cam_to_bev_points = tmp_bev[i,j] bev_to_cam_points = tmp_cam[i,j] if len(cam_to_bev_points) > 5: cam_to_bev_proj_locs[i,j] = np.array(random.sample(cam_to_bev_points, 5)) elif len(cam_to_bev_points) > 0: num_points = len(cam_to_bev_points) cam_to_bev_proj_locs[i,j,:num_points] = np.array(cam_to_bev_points) if len(bev_to_cam_points) > 5: bev_to_cam_proj_locs[i,j] = np.array(random.sample(bev_to_cam_points, 5)) elif len(bev_to_cam_points) > 0: num_points = len(bev_to_cam_points) bev_to_cam_proj_locs[i,j,:num_points] = np.array(bev_to_cam_points) return cam_to_bev_proj_locs, bev_to_cam_proj_locs def lidar_bev_cam_correspondences(world, crop=256): """ Convert LiDAR point cloud to camera co-ordinates """ pixels_per_world = 8 w = 400 h = 300 fov = 100 F = w / (2 * np.tan(fov * np.pi / 360)) fy = F fx = 1.1 * F cam_height = 2.3 # get valid points in 32x32 grid world[:,1] *= -1 lidar = world[abs(world[:,0])<16] # 16m to the sides lidar = lidar[lidar[:,1]<32] # 32m to the front lidar = lidar[lidar[:,1]>0] # 0m to the back # convert to cam space z = lidar[..., 1] x = (fx * lidar[..., 0]) / z + w / 2 y = (fy * cam_height) / z + h / 2 result = np.stack([x, y], 1) result[:,0] = np.clip(result[..., 0], 0, w-1) result[:,1] = np.clip(result[..., 1], 0, h-1) start_x = w // 2 - crop // 2 start_y = h // 2 - crop // 2 end_x = start_x + crop end_y = start_y + crop valid_lidar_points = [] valid_bev_points = [] valid_cam_points = [] for i in range(lidar.shape[0]): if result[i][0] >= start_x and result[i][0] < end_x and result[i][1] >= start_y and result[i][1] < end_y: result[i][0] -= start_x result[i][1] -= start_y valid_lidar_points.append(lidar[i]) valid_cam_points.append([int(result[i][0]), int(result[i][1])]) bev_x = min(int((lidar[i][0] + 16) * pixels_per_world), crop-1) bev_y = min(int(lidar[i][1] * pixels_per_world), crop-1) valid_bev_points.append([bev_x, bev_y]) valid_lidar_points = np.array(valid_lidar_points) valid_bev_points = np.array(valid_bev_points) valid_cam_points = np.array(valid_cam_points) bev_points, cam_points = correspondences_at_one_scale(valid_bev_points, valid_cam_points, 8, 32) return bev_points, cam_points def lidar_to_histogram_features(lidar, crop=256): """ Convert LiDAR point cloud into 2-bin histogram over 256x256 grid """ def splat_points(point_cloud): # 256 x 256 grid pixels_per_meter = 8 hist_max_per_pixel = 5 x_meters_max = 16 y_meters_max = 32 xbins = np.linspace(-2*x_meters_max, 2*x_meters_max+1, 2*x_meters_max*pixels_per_meter+1) ybins = np.linspace(-y_meters_max, 0, y_meters_max*pixels_per_meter+1) hist = np.histogramdd(point_cloud[...,:2], bins=(xbins, ybins))[0] hist[hist>hist_max_per_pixel] = hist_max_per_pixel overhead_splat = hist/hist_max_per_pixel return overhead_splat below = lidar[lidar[...,2]<=2] above = lidar[lidar[...,2]>2] below_features = splat_points(below) above_features = splat_points(above) features = np.stack([below_features, above_features], axis=-1) features = np.transpose(features, (2, 0, 1)).astype(np.float32) return features def scale_and_crop_image(image, scale=1, crop=256): """ Scale and crop a PIL image, returning a channels-first numpy array. """ # image = Image.open(filename) (width, height) = (int(image.width // scale), int(image.height // scale)) im_resized = image.resize((width, height)) image = np.asarray(im_resized) start_x = height//2 - crop//2 start_y = width//2 - crop//2 cropped_image = image[start_x:start_x+crop, start_y:start_y+crop] cropped_image = np.transpose(cropped_image, (2,0,1)) return cropped_image def transform_2d_points(xyz, r1, t1_x, t1_y, r2, t2_x, t2_y): """ Build a rotation matrix and take the dot product. """ # z value to 1 for rotation xy1 = xyz.copy() xy1[:,2] = 1 c, s = np.cos(r1), np.sin(r1) r1_to_world = np.matrix([[c, s, t1_x], [-s, c, t1_y], [0, 0, 1]]) # np.dot converts to a matrix, so we explicitly change it back to an array world = np.asarray(r1_to_world @ xy1.T) c, s = np.cos(r2), np.sin(r2) r2_to_world = np.matrix([[c, s, t2_x], [-s, c, t2_y], [0, 0, 1]]) world_to_r2 = np.linalg.inv(r2_to_world) out = np.asarray(world_to_r2 @ world).T # reset z-coordinate out[:,2] = xyz[:,2] return out