forked from Pointcept/SegmentAnything3D
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgia3d_AY_matterport.py
758 lines (614 loc) · 33.9 KB
/
gia3d_AY_matterport.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
"""
Main Script
Author: Yunhan Yang (yhyang.myron@gmail.com)
Updated by
Abdurrahman Yilmaz (ayilmaz@lincoln.ac.uk) v08 for Matterport Dataset
12 Jan 2025
"""
import os
import cv2
import numpy as np
import open3d as o3d
import torch
import copy
import multiprocessing as mp
import pointops
import random
import argparse
import zipfile
import tempfile
import csv
import colorsys
import itertools
import pickle
from segment_anything import build_sam, SamAutomaticMaskGenerator
from concurrent.futures import ProcessPoolExecutor
from itertools import repeat
from PIL import Image
from os.path import join
from util import *
import networkx as nx
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import linkage, fcluster
from scipy.spatial.distance import squareform
import gc
import math
verbose = False # To print out intermediate data
verbose_graph = False # To plot correspondence graphs before and after optimization
verbose_comparisons = False # To plot comparison output
maxVal = 0
maxVal_stab = 0
maxVal_pred = 0
maxVal_iou = 0
MATTERPORT_DEPTH_SCALE = 4000.0
def get_sam(image, mask_generator):
masks = mask_generator.generate(image)
#print(masks)
group_ids = np.full((image.shape[0], image.shape[1]), -1, dtype=int)
stability_scores = np.full((image.shape[0], image.shape[1]), 0.0, dtype=float)
predicted_ious = np.full((image.shape[0], image.shape[1]), 0.0, dtype=float)
feature_shape = len(masks[0]["features"])
features = np.zeros((image.shape[0], image.shape[1], feature_shape), dtype=float)
num_masks = len(masks)
group_counter = 0
for i in reversed(range(num_masks)):
group_ids[masks[i]["segmentation"]] = group_counter
stability_scores[masks[i]["segmentation"]] = masks[i]["stability_score"]
predicted_ious[masks[i]["segmentation"]] = masks[i]["predicted_iou"]
features[masks[i]["segmentation"]] = masks[i]["features"]
group_counter += 1
# with np.printoptions(threshold=np.inf): # print all values
return group_ids, stability_scores, predicted_ious, features
def get_camera_parameters(camera_param_file, panorama_uuid, camera_index, yaw_index):
"""
Parses camera parameters from the undistorted_camera_parameters file.
Args:
camera_param_file (str): Path to the camera parameter file.
panorama_uuid (str): Panorama UUID.
camera_index (int): Camera index.
yaw_index (int): Yaw index.
Returns:
dict: Intrinsic and extrinsic parameters.
"""
intrinsics = {}
extrinsics = []
with open(camera_param_file, 'r') as f:
lines = f.readlines()
for line in lines:
if line.startswith(f"scan {panorama_uuid}_d{camera_index}_{yaw_index}"):
data = line.strip().split()
# Extract intrinsics
fx, fy, cx, cy = map(float, data[3:7])
intrinsics.update({'fx': fx, 'fy': fy, 'cx': cx, 'cy': cy, 'width': int(data[-6]), 'height': int(data[-5])})
# Extract extrinsics (16 values)
extrinsics = np.array(data[7:23], dtype=float).reshape((4, 4))
break
if not extrinsics:
raise ValueError(f"Camera parameters for {panorama_uuid}_d{camera_index}_{yaw_index} not found.")
return intrinsics, extrinsics
def get_pcd_matterport(house_id, panorama_uuid, camera_index, yaw_index, base_path):
"""
Reads the depth and RGB image for the given Matterport dataset identifiers,
and returns a point cloud in global coordinates.
Args:
house_id (str): House identifier.
panorama_uuid (str): Unique panorama identifier.
camera_index (int): Camera index (0-2).
yaw_index (int): Yaw index (0-5).
base_path (str): Root path to the Matterport dataset.
Returns:
open3d.geometry.PointCloud: The generated point cloud.
"""
depth_filename = os.path.join(
base_path,
house_id,
"undistorted_depth_images",
f"{panorama_uuid}_d{camera_index}_{yaw_index}.png"
)
color_filename = os.path.join(
base_path,
house_id,
"undistorted_color_images",
f"{panorama_uuid}_i{camera_index}_{yaw_index}.jpg"
)
camera_param_file = os.path.join(
base_path, house_id, "undistorted_camera_parameters.txt"
)
if not os.path.exists(depth_filename) or not os.path.exists(color_filename):
raise FileNotFoundError(f"Missing depth or color image for {house_id}, {panorama_uuid}")
depth_image = o3d.io.read_image(depth_filename)
color_image = o3d.io.read_image(color_filename)
# Load camera parameters for the specific image
intrinsics, extrinsics = get_camera_parameters(camera_param_file, panorama_uuid, camera_index, yaw_index)
# Convert images to point cloud
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
color_image, depth_image, depth_scale=MATTERPORT_DEPTH_SCALE, convert_rgb_to_intensity=False
)
camera_intrinsics = o3d.camera.PinholeCameraIntrinsic(
intrinsics['width'],
intrinsics['height'],
intrinsics['fx'],
intrinsics['fy'],
intrinsics['cx'],
intrinsics['cy'],
)
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
rgbd_image, camera_intrinsics
)
pcd.transform(extrinsics) # Apply transformation to global coordinates
return pcd
def get_pcd(scene_name, color_name, rgb_path, mask_generator, save_2dmask_path):
# Define possible paths for the intrinsic file
intrinsic_path1 = os.path.join(rgb_path, scene_name, 'intrinsics_depth.txt')
intrinsic_path2 = os.path.join(rgb_path, scene_name, 'intrinsics', 'intrinsic_depth.txt')
# Check which path exists
if os.path.exists(intrinsic_path1):
intrinsic_path = intrinsic_path1
elif os.path.exists(intrinsic_path2):
intrinsic_path = intrinsic_path2
else:
print(f"Intrinsic file not found in either {intrinsic_path1} or {intrinsic_path2}")
# Load the intrinsic file
depth_intrinsic = np.loadtxt(intrinsic_path)
pose = join(rgb_path, scene_name, 'pose', color_name[0:-4] + '.txt')
depth = join(rgb_path, scene_name, 'depth', color_name[0:-4] + '.png')
color = join(rgb_path, scene_name, 'color', color_name)
depth_img = cv2.imread(depth, -1) # read 16bit grayscale image
mask = (depth_img != 0)
color_image = cv2.imread(color)
color_image = cv2.resize(color_image, (640, 480))
save_2dmask_path = join(save_2dmask_path, scene_name)
if mask_generator is not None:
group_ids, stability_scores, predicted_ious, features = get_sam(color_image, mask_generator)
if not os.path.exists(save_2dmask_path):
os.makedirs(save_2dmask_path)
img = Image.fromarray(num_to_natural(group_ids).astype(np.int16), mode='I;16')
img.save(join(save_2dmask_path, color_name[0:-4] + '.png'))
else:
group_path = join(save_2dmask_path, color_name[0:-4] + '.png')
img = Image.open(group_path)
group_ids = np.array(img, dtype=np.int16)
color_image = np.reshape(color_image[mask], [-1,3])
group_ids = group_ids[mask]
stability_scores = stability_scores[mask]
predicted_ious = predicted_ious[mask]
features = features[mask]
colors = np.zeros_like(color_image)
colors[:,0] = color_image[:,2]
colors[:,1] = color_image[:,1]
colors[:,2] = color_image[:,0]
pose = np.loadtxt(pose)
depth_shift = 1000.0
x,y = np.meshgrid(np.linspace(0,depth_img.shape[1]-1,depth_img.shape[1]), np.linspace(0,depth_img.shape[0]-1,depth_img.shape[0]))
uv_depth = np.zeros((depth_img.shape[0], depth_img.shape[1], 3))
uv_depth[:,:,0] = x
uv_depth[:,:,1] = y
uv_depth[:,:,2] = depth_img/depth_shift
uv_depth = np.reshape(uv_depth, [-1,3])
uv_depth = uv_depth[np.where(uv_depth[:,2]!=0),:].squeeze()
intrinsic_inv = np.linalg.inv(depth_intrinsic)
fx = depth_intrinsic[0,0]
fy = depth_intrinsic[1,1]
cx = depth_intrinsic[0,2]
cy = depth_intrinsic[1,2]
bx = depth_intrinsic[0,3]
by = depth_intrinsic[1,3]
n = uv_depth.shape[0]
points = np.ones((n,4))
X = (uv_depth[:,0]-cx)*uv_depth[:,2]/fx + bx
Y = (uv_depth[:,1]-cy)*uv_depth[:,2]/fy + by
points[:,0] = X
points[:,1] = Y
points[:,2] = uv_depth[:,2]
points_world = np.dot(points, np.transpose(pose))
group_ids = num_to_natural(group_ids)
save_dict = dict(coord=points_world[:,:3], color=colors, group=group_ids, stability_score=stability_scores, predicted_iou=predicted_ious,feature=features)
return save_dict
def make_open3d_point_cloud(input_dict, voxelize, th):
input_dict["group"] = remove_small_group(input_dict["group"], th)
# input_dict = voxelize(input_dict)
xyz = input_dict["coord"]
if np.isnan(xyz).any():
return None
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(xyz)
return pcd
# Focus on this function for global mask_ID solution
def seg_pcd(scene_name, rgb_path, data_path, save_path, mask_generator, voxel_size, voxelize, th, train_scenes, val_scenes, save_2dmask_path, gt_data_path):
print(scene_name, flush=True)
if scene_name in train_scenes:
scene_path = join(data_path, "train", scene_name + ".pth")
elif scene_name in val_scenes:
scene_path = join(data_path, "val", scene_name + ".pth")
else:
scene_path = join(data_path, "test", scene_name + ".pth")
#data_dict = torch.load(scene_path)
#print("Available keys:", data_dict.keys())
if os.path.exists(join(save_path, scene_name + "_comparisons_output_part1.pth")):
return
# Step 1 in pipeline: SAM Generate Masks
step1_output_path = os.path.join(save_2dmask_path, scene_name + "_step1.pth")
# Returns the names of the multi-images in the scene
color_names = sorted(os.listdir(os.path.join(rgb_path, scene_name, 'color')), key=lambda a: int(os.path.basename(a).split('.')[0]))
voxelize_new = Voxelize(voxel_size=args.voxel_size, mode="train", keys=("coord", "color", "group", "feature", "predicted_iou", "stability_score"))
# If the output of Step 1 and 2 already exists, load it
if os.path.exists(step1_output_path):
data = torch.load(step1_output_path)
pcd_list, pcd_list_ = data['pcd_list'], data['pcd_list_']
print("Loaded Step 1 and 2 output from file.")
else:
pcd_list = []
pcd_list_ = []
for color_id, color_name in enumerate(color_names):
print(color_name, flush=True) # print name of the image used for mask generation
pcd_dict = get_pcd(scene_name, color_name, rgb_path, mask_generator, save_2dmask_path)
if len(pcd_dict["coord"]) == 0:
continue
viewpoint_ids = [color_id + 1] * len(pcd_dict["coord"])
viewpoint_names = [color_name] * len(pcd_dict["coord"])
pcd_dict.update(viewpoint_id=viewpoint_ids, viewpoint_name=viewpoint_names)
if verbose:
# Extract data from the dictionary
coords = pcd_dict_['coord']
colors = pcd_dict_['color']
group_ids = pcd_dict_['group']
viewpoint_ids = pcd_dict_['viewpoint_id']
stability_scores = pcd_dict_['stability_score']
predicted_ious = pcd_dict_['predicted_iou']
features = pcd_dict_['feature']
# Print the header
print(f"{'Viewpoint ID':<10} {'Coord':<65} {'Color':<20} {'Group ID':<10} {'Stability Score':<20} {'Prediction IOU':<20} {'Feature':<40}")
for viewpoint_id, coord, color, group_id, stability_score, predicted_iou, feature in zip(viewpoint_ids, coords, colors, group_ids, stability_scores, predicted_ious, features):
coord_str = ', '.join(map(str, coord))
color_str = ', '.join(map(str, color))
feature_str = ', '.join(map(str, feature))
print(f"{viewpoint_id:<10} {coord_str:<65} {color_str:<20} {group_id:<10} {stability_score:<20} {predicted_iou:<20} {feature_str:<40}")
pcd_dict_ = voxelize_new(pcd_dict)
pcd_dict = voxelize(pcd_dict)
if verbose:
# Extract data from the dictionary
coords = pcd_dict_['coord']
colors = pcd_dict_['color']
group_ids = pcd_dict_['group']
viewpoint_ids = pcd_dict_['viewpoint_id']
stability_scores = pcd_dict_['stability_score']
predicted_ious = pcd_dict_['predicted_iou']
features = pcd_dict_['feature']
# Print the header
print(f"{'Viewpoint ID':<10} {'Coord':<65} {'Color':<20} {'Group ID':<10} {'Stability Score':<20} {'Prediction IOU':<20} {'Feature':<40}")
for viewpoint_id, coord, color, group_id, stability_score, predicted_iou, feature in zip(viewpoint_ids, coords, colors, group_ids, stability_scores, predicted_ious, features):
coord_str = ', '.join(map(str, coord))
color_str = ', '.join(map(str, color))
feature_str = ', '.join(map(str, feature))
print(f"{viewpoint_id:<10} {coord_str:<65} {color_str:<20} {group_id:<10} {stability_score:<20} {predicted_iou:<20} {feature_str:<40}")
pcd_list.append(pcd_dict)
pcd_list_.append(pcd_dict_)
def seg_pcd_matterport(scene_name, rgb_zip, depth_zip, save_path, mask_generator, voxel_size, voxelize, th, save_2dmask_path):
print(f"Starting segmentation for scene: {scene_name}", flush=True)
# Check if RGB data is available
if not rgb_zip.namelist():
raise FileNotFoundError(f"RGB data not found for scene {scene_name} at {rgb_zip}")
print(f"RGB data found for scene: {scene_name}", flush=True)
# Check if depth data is available
if not depth_zip.namelist():
raise FileNotFoundError(f"Depth data not found for scene {scene_name} at {depth_zip}")
print(f"Depth data found for scene: {scene_name}", flush=True)
step1_output_path = os.path.join(save_2dmask_path, scene_name + "_step1.pth")
print(f"Step 1 output path: {step1_output_path}", flush=True)
# Get and sort the ground truth files
gt_files = sorted([name for name in gt_zip.namelist() if name.endswith('.png') or name.endswith('.jpg')])
print(f"Found {len(gt_files)} ground truth files: {gt_files}", flush=True)
# Extract color image names from the RGB zip
color_names = sorted(
[name for name in rgb_zip.namelist() if name.endswith('.jpg')],
key=lambda a: int(os.path.splitext(os.path.basename(a))[0])
)
print(f"Found {len(color_names)} RGB images: {color_names}", flush=True)
voxelize_new = Voxelize(voxel_size=voxel_size, mode="train", keys=("coord", "color", "group", "feature", "predicted_iou", "stability_score"))
print(f"Voxelizer initialized with voxel size: {voxel_size}", flush=True)
if os.path.exists(step1_output_path):
data = torch.load(step1_output_path)
pcd_list = data['pcd_list']
print("Loaded Step 1 and 2 output from file.")
else:
pcd_list = []
print("Step 1 and 2 data not found, processing color images...", flush=True)
for color_id, color_name in enumerate(color_names):
print(f"Processing color image {color_id + 1}/{len(color_names)}: {color_name}", flush=True)
# Extract color image file from zip
with rgb_zip.open(color_name) as color_file:
print(f"Extracting point cloud data from {color_name}", flush=True)
pcd_dict = get_pcd(scene_name, color_name, color_file, mask_generator, save_2dmask_path)
if len(pcd_dict["coord"]) == 0:
print(f"No coordinates found for {color_name}, skipping...", flush=True)
continue
viewpoint_ids = [color_id + 1] * len(pcd_dict["coord"])
viewpoint_names = [color_name] * len(pcd_dict["coord"])
pcd_dict.update(viewpoint_id=viewpoint_ids, viewpoint_name=viewpoint_names)
# Append the point cloud data
pcd_list.append(pcd_dict)
print(f"Processed {len(pcd_dict['coord'])} points for {color_name}", flush=True)
print(f"Processed a total of {len(pcd_list)} point clouds.", flush=True)
# Now that we have the point clouds, perform further processing or saving if needed.
print(f"Segmentation for scene {scene_name} completed.", flush=True)
def generate_colors(num_colors):
"""
Generate a list of visually distinct RGB colors by varying all HSV components.
Args:
num_colors (int): Number of colors to generate.
Returns:
list of list: List of RGB colors with values in range [0, 1].
"""
random.seed(42) # Fix seed for reproducibility
colors = []
# Use golden ratio conjugate for better color distribution
golden_ratio_conjugate = 0.618033988749895
hue = random.random() # Start with a random hue
for _ in range(num_colors):
hue = (hue + golden_ratio_conjugate) % 1.0 # Distribute hue across the spectrum
saturation = 0.5 + random.random() * 0.5 # Vary saturation between 0.5 and 1.0
value = 0.5 + random.random() * 0.5 # Vary brightness between 0.5 and 1.0
r, g, b = colorsys.hsv_to_rgb(hue, saturation, value)
colors.append([r, g, b])
return colors
def generate_random_color():
"""
Generate a random RGB color.
Returns:
A numpy array representing an RGB color.
"""
return np.random.rand(3)
def load_point_cloud_with_faces(ply_path):
mesh = o3d.io.read_triangle_mesh(ply_path) # Read as a mesh to access faces
if mesh.is_empty():
raise ValueError("Empty point cloud or mesh.")
vertices = np.asarray(mesh.vertices) # Use vertices directly
faces = np.asarray(mesh.triangles) # Face indices for vertex references
point_cloud = o3d.geometry.PointCloud()
point_cloud.points = o3d.utility.Vector3dVector(vertices)
if mesh.has_vertex_colors():
point_cloud.colors = o3d.utility.Vector3dVector(np.asarray(mesh.vertex_colors))
return point_cloud, faces
def assign_labels_from_faces(point_cloud, semseg_data, fseg_data, faces, category_mapping):
"""
Assign semantic and instance IDs to points based on face indices and segmentations.
Args:
point_cloud: Open3D PointCloud object.
semseg_data: Data containing segmentation groups and their corresponding labels.
fseg_data: Data containing the face indices of each segment.
faces: List of faces where each face is represented by indices of points.
category_mapping: Dictionary that maps raw categories to semantic IDs.
Returns:
point_cloud: The PointCloud object with assigned semantic and instance IDs.
"""
# Initialize arrays to store the IDs
num_points = len(point_cloud.points)
semantic_ids = np.full(num_points, -1, dtype=int) # Default to -1
instance_ids = np.full(num_points, -1, dtype=int) # Default to -1
assigned_points = set() # To avoid re-assigning points
# Retrieve segIndices (face indices) from fseg data
seg_indices = fseg_data.get("segIndices", [])
print(f"length of seg_indices: {len(seg_indices)}")
print(f"length of faces_region: {len(faces)}")
semantic_colors = np.zeros((num_points, 3), dtype=np.float64)
instance_colors = np.zeros((num_points, 3), dtype=np.float64)
instance_color_map = {}
# For each segment in semseg_data, assign the corresponding instance and semantic IDs
for seg_group in semseg_data.get("segGroups", []):
instance_id = seg_group["id"]
raw_category = seg_group["label"] # The label is the raw category
print(f"raw_category: {raw_category}")
# Map the raw category to its corresponding semantic ID using category_mapping
category_info = category_mapping.get(raw_category, {})
semantic_id = category_info.get("semantic_id", -1) # Default to -1 if not found
semantic_rgb = category_info.get("rgb", (0.0, 0.0, 0.0))
print(f"semantic_id in category_info: {semantic_id}")
for segment_idx in seg_group["segments"]: # Segment indices
# Find all occurrences of segment_idx in seg_indices
face_indices = [i for i, x in enumerate(seg_indices) if x == segment_idx]
#print(f"length of face_indices: {len(face_indices)}")
for face_idx in face_indices: # Iterate over all matching face indices
if face_idx < len(faces): # Check if the face exists
# Get the point indices corresponding to the face
point_indices = faces[face_idx]
# Assign semantic and instance IDs to the points belonging to the face
for point_idx in point_indices:
if point_idx not in assigned_points: # Avoid redundant assignments
semantic_ids[point_idx] = semantic_id
instance_ids[point_idx] = instance_id
assigned_points.add(point_idx)
semantic_colors[point_idx] = semantic_rgb
# Check if color for this instance_id is already generated
if instance_id not in instance_color_map:
instance_color_map[instance_id] = np.random.rand(3) # Assign a random color
# Assign the color to the point
instance_colors[point_idx] = instance_color_map[instance_id]
# Add scalar fields for semantic and instance IDs
point_cloud_gt_semantic = o3d.geometry.PointCloud()
point_cloud_gt_semantic.points = o3d.utility.Vector3dVector(np.asarray(point_cloud.points))
point_cloud_gt_semantic.colors = o3d.utility.Vector3dVector(semantic_colors)
point_cloud_gt_instance = o3d.geometry.PointCloud()
point_cloud_gt_instance.points = o3d.utility.Vector3dVector(np.asarray(point_cloud.points))
point_cloud_gt_instance.colors = o3d.utility.Vector3dVector(instance_colors)
return point_cloud_gt_semantic, point_cloud_gt_instance
def load_category_mapping(category_mapping_file):
category_mapping = {}
# Open and read the category_mapping file
with open(category_mapping_file, mode='r') as file:
reader = csv.DictReader(file, delimiter='\t')
# Read each row and extract the mapping information along with RGB values
for row in reader:
raw_category = row['raw_category']
semantic_id = int(row['index'])
# Extract the RGB string and convert it to a tuple (R, G, B)
rgb = tuple(map(float, row['rgb'].split(',')))
# Store in the dictionary: raw_category -> (semantic_id, rgb)
category_mapping[raw_category] = {
'semantic_id': semantic_id,
'rgb': rgb
}
return category_mapping
def write_category_colors_mapping(category_mapping_file):
category_mapping = {}
# Open and read the category_mapping file
with open(category_mapping_file, mode='r') as file:
reader = list(csv.DictReader(file, delimiter='\t'))
# Generate RGB colors based on number of rows (categories)
semantic_colors = generate_colors(len(reader))
# Prepare the updated rows with colors
updated_rows = []
for i, row in enumerate(reader):
raw_category = row['raw_category']
semantic_id = int(row['index'])
color = semantic_colors[i]
category_mapping[raw_category] = {
'semantic_id': semantic_id,
'rgb': color
}
# Add the color as columns to the row
updated_row = row.copy() # Copy the original row
updated_row['rgb'] = f"{color[0]:.6f},{color[1]:.6f},{color[2]:.6f}" # Add the RGB as a string
updated_rows.append(updated_row)
# Write the updated CSV file with RGB colors
with open(category_mapping_file, mode='w', newline='') as file:
fieldnames = list(reader[0].keys()) + ['rgb'] # Add 'rgb' field to header
writer = csv.DictWriter(file, fieldnames=fieldnames, delimiter='\t')
writer.writeheader() # Write the header
writer.writerows(updated_rows) # Write the updated rows
return category_mapping
def acquire_matterport_gt(scene_name,gt_scene_zip,gt_region_zip,save_path,category_mapping_file):
category_mapping = load_category_mapping(category_mapping_file)
if not gt_scene_zip.namelist():
print(f"Contents of scene segmentations zip {gt_scene_zip}:")
for file_name in gt_scene_zip.namelist():
print(f" - {file_name}")
raise FileNotFoundError(f"Ground truth data not found for scene {scene_name} at {gt_scene_zip}")
if not gt_region_zip.namelist():
print(f"Contents of region segmentations zip {gt_region_zip}:")
for file_name in gt_region_zip.namelist():
print(f" - {file_name}")
raise FileNotFoundError(f"Ground truth data not found for regions of {scene_name} at {gt_region_zip}")
region_pc_files = [f for f in gt_region_zip.namelist() if "region" in f and f.endswith(".ply")]
region_semseg_files = [f for f in gt_region_zip.namelist() if "semseg.json" in f]
region_fseg_files = [f for f in gt_region_zip.namelist() if "fsegs.json" in f]
print(f"region_pc_files: {region_pc_files}, count: {len(region_pc_files)}")
print(f"region_semseg_files: {region_semseg_files}, count: {len(region_semseg_files)}")
print(f"region_fseg_files: {region_fseg_files}, count: {len(region_fseg_files)}")
for region_pc_file, region_semseg_file, region_fseg_file in zip(region_pc_files, region_semseg_files, region_fseg_files):
region_segmentations_path = os.path.join(save_path, scene_name, "region_segmentations")
os.makedirs(region_segmentations_path, exist_ok=True)
base_name, _ = os.path.splitext(region_pc_file)
save_semantic_ply_path = os.path.join(save_path, f"{base_name}_semantic_gt.ply")
save_instance_ply_path = os.path.join(save_path, f"{base_name}_instance_gt.ply")
if os.path.exists(save_semantic_ply_path) and os.path.exists(save_instance_ply_path):
print(f"Semantic and instance GT files already exist for the region {region_pc_file} at {save_semantic_ply_path} and {save_instance_ply_path}")
else:
with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as temp_ply_file:
temp_ply_file.write(gt_region_zip.read(region_pc_file))
temp_ply_file.close()
point_cloud_region, faces_region = load_point_cloud_with_faces(temp_ply_file.name)
os.remove(temp_ply_file.name)
with gt_region_zip.open(region_semseg_file) as semseg_file_region:
semseg_data_region = json.load(semseg_file_region)
with gt_region_zip.open(region_fseg_file) as fseg_file_region:
fseg_data_region = json.load(fseg_file_region)
pc_region_semantic_gt, pc_region_instance_gt = assign_labels_from_faces(point_cloud_region, semseg_data_region, fseg_data_region, faces_region, category_mapping)
if o3d.io.write_point_cloud(save_semantic_ply_path, pc_region_semantic_gt):
print(f"Saved region point cloud with instance and semantic IDs to {save_semantic_ply_path}")
else:
print(f"Failed to save point cloud to {save_semantic_ply_path}")
if o3d.io.write_point_cloud(save_instance_ply_path, pc_region_instance_gt):
print(f"Saved region point cloud with instance and semantic IDs to {save_instance_ply_path}")
else:
print(f"Failed to save point cloud to {save_instance_ply_path}")
'''scene_pc_file = [f for f in gt_scene_zip.namelist() if scene_name in f and f.endswith(".ply")]
scene_semseg_file = [f for f in gt_scene_zip.namelist() if "semseg.json" in f]
if scene_pc_file:
with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as temp_ply_file:
temp_ply_file.write(gt_scene_zip.read(scene_pc_file[0]))
temp_ply_file.close()
point_cloud_scene, faces_scene = load_point_cloud_with_faces(temp_ply_file.name)
os.remove(temp_ply_file.name)
else:
raise FileNotFoundError(f"No .ply file found in {gt_scene_zip.filename} for {scene_name}")
if scene_semseg_file:
with gt_scene_zip.open(scene_semseg_file[0]) as semseg_file:
semseg_data_scene= json.load(semseg_file)
print(f"Top-level keys in semseg_data: {semseg_data_scene.keys()}")
# Print the number of segment groups
seg_groups = semseg_data_scene.get("segGroups", [])
print(f"Number of segment groups: {len(seg_groups)}")
# Inspect the first segment group
if seg_groups:
first_group = seg_groups[0]
print("\nFirst Segment Group Info:")
for key, value in first_group.items():
print(f" {key}: {value}")
else:
print("No segment groups found.")
else:
raise FileNotFoundError(f"No semseg.json file found in {gt_scene_zip.filename} for {scene_name}")
point_cloud_gt = assign_labels_from_faces(point_cloud_scene, semseg_data_scene, faces_scene)
scene_gt_ply_path = os.path.join(save_path, scene_name)
o3d.io.write_point_cloud(scene_gt_ply_path, point_cloud_gt)
print(f"Saved ground truth point cloud with semantic and instance labels to {scene_gt_ply_path}")
region_files = [f for f in gt_region_zip.namelist() if "region" in f and f.endswith(".ply")]
region_semseg_files = [f for f in gt_region_zip.namelist() if "semseg.json" in f]
for region_ply_file, semseg_file in zip(region_files, region_semseg_files):
# Extracting region point cloud
gt_region_zip.extract(region_ply_file, path=save_path)
region_ply_path = os.path.join(save_path, region_ply_file)
point_cloud = o3d.io.read_point_cloud(region_ply_path)
points = np.asarray(point_cloud.points)
# Load semantic data
semseg_path = os.path.join(save_path, semseg_file)
semseg_data = load_json(semseg_path)
segment_to_label = {seg['segments'][0]: seg['label'] for seg in semseg_data['segGroups']}
# Add custom fields to point cloud (Placeholder logic)
# Assuming a fixed mapping from `segment_id` to each point
semantic_classes = np.zeros(len(points), dtype=np.int32) # Dummy initialization
instance_ids = np.zeros(len(points), dtype=np.int32) # Dummy initialization
# Add semantic and instance fields to the point cloud
point_cloud.colors = o3d.utility.Vector3dVector(points[:, :3]) # Use original RGB
point_cloud.points = o3d.utility.Vector3dVector(points[:, :3]) # XYZ coordinates
# Save augmented point cloud with new attributes
output_path = region_ply_path.replace('.ply', '_gt.ply')
o3d.io.write_point_cloud(output_path, point_cloud)
print(f"Saved ground truth point cloud with semantic and instance labels to {output_path}")'''
def get_args():
'''Command line arguments.'''
parser = argparse.ArgumentParser(description='Segment Anything on Matterport dataset.')
parser.add_argument('--scans_file_path', type=str, required=True, help='Path to scans.txt file containing scene names')
parser.add_argument('--scans_root', type=str, required=True, help='Root path to the Matterport dataset scans folder')
parser.add_argument('--save_path', type=str, required=True, help='Directory where to save the PCD results')
parser.add_argument('--save_2dmask_path', type=str, default='', help='Directory where to save 2D segmentation results')
parser.add_argument('--sam_checkpoint_path', type=str, required=True, help='Path to the SAM checkpoint')
parser.add_argument('--voxel_size', type=float, default=0.05, help='Voxel size for downsampling')
parser.add_argument('--th', type=int, default=50, help='Threshold for ignoring small groups to reduce noise')
parser.add_argument('--category_mapping_path', type=str, default='', help='Path to Matterport dataset semantic class id mapping')
return parser.parse_args()
if __name__ == '__main__':
args = get_args()
# Load scene names from scans.txt
with open(args.scans_file_path, 'r') as scans_file:
scene_names = scans_file.read().splitlines()
mask_generator = SamAutomaticMaskGenerator(build_sam(checkpoint=args.sam_checkpoint_path).to(device="cuda"))
voxelize = Voxelize(voxel_size=args.voxel_size, mode="train", keys=("coord", "color", "group"))
os.makedirs(args.save_path, exist_ok=True)
#write_category_colors_mapping(args.category_mapping_path) # Run once to save RGB colormap for semantic classes
for scene_name in scene_names:
scene_path = os.path.join(args.scans_root, scene_name)
if not os.path.isdir(scene_path):
print(f"Warning: Scene folder not found for {scene_name}")
continue
# Extract relevant data from zipped files within each scene
with zipfile.ZipFile(os.path.join(scene_path, 'undistorted_color_images.zip'), 'r') as rgb_zip, \
zipfile.ZipFile(os.path.join(scene_path, 'undistorted_depth_images.zip'), 'r') as depth_zip, \
zipfile.ZipFile(os.path.join(scene_path, 'region_segmentations.zip'), 'r') as gt_region_zip, \
zipfile.ZipFile(os.path.join(scene_path, 'house_segmentations.zip'), 'r') as gt_scene_zip :
print(f"Processing scene: {scene_name}")
acquire_matterport_gt(scene_name, gt_scene_zip, gt_region_zip, args.save_path, args.category_mapping_path)
#seg_pcd_matterport(
# scene_name, rgb_zip, depth_zip, args.save_path,
# mask_generator, args.voxel_size, voxelize, args.th,
# args.save_2dmask_path)