The code:#
Please see some brief description of the scripts in this project.
__main__.py#
This is the main file that will be run. It will call the other files and run the program. It re-uses some components of the dynamic-rim module to read the video pts and ts as well as to get the correct video frame for an specific timestamp. In here you can also see how the audio and video are ts are merged into a single Pandas DataFrame. The DataFrame is also cropped using the start and end event timestamps.
pose.py#
This file contains the main functions to run the densepose. A setup_config function that will load the config file for the model, as well as the weights. It also defines the visualizers, the extractor and more importantly the predictor.
These are passed back to main.
1 args.confidence, args.device
2 )
3 merged_video = merged_video.reset_index(drop=True, inplace=False)
4
Finally, get_densepose
is the main call that will run the densepose on the video.
Runs the predictor on the frame, which gives the outputs.
The results are a DensePoseChart and PredictionBoxes.
1def get_densepose(
2 frame,
3 predictor,
4 visualizer,
5 extractor,
6 cfg,
7 xy,
8 starter=None,
9 ender=None,
10 timings=0,
11 circle_size=50,
12 frameid=0,
13 progress_bar=None,
14 poses_task=None,
15 labels_onimg=True,
16):
17 with torch.no_grad():
18 # Let the GPU WARM UP and measure inference time after 60 frames
19 if starter is not None and 60 < frameid < (len(timings) + 60):
20 starter.record()
21 outputs = predictor(frame)["instances"]
22 ender.record()
23 torch.cuda.synchronize()
24 timings[frameid - 60] = starter.elapsed_time(ender)
25 else:
26 outputs = predictor(frame)["instances"]
27 result = {}
28 extractor_r = extractor
29 if outputs.has("scores"):
30 result["scores"] = outputs.get("scores").cpu()
31 if outputs.has("pred_boxes"):
32 result["pred_boxes_XYXY"] = outputs.get("pred_boxes").tensor.cpu()
33 if outputs.has("pred_densepose"):
34 if isinstance(outputs.pred_densepose, DensePoseChartPredictorOutput):
35 extractor_r = DensePoseResultExtractor()
36 elif isinstance(outputs.pred_densepose, DensePoseEmbeddingPredictorOutput):
37 extractor_r = DensePoseOutputsExtractor()
38 result["pred_densepose"] = extractor_r(outputs)[0]
39 logging.debug(f"DensePose result: {result}")
40 # execute on outputs
41 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
42 frame = np.tile(frame[:, :, np.newaxis], (1, 1, 3)) / 255
43 data = extractor(outputs)
44 id_part = []
45 # As of now, it checks the gaze point for labels from densepose.
46 if not np.isnan(xy).any() and xy is not None and len(result["pred_boxes_XYXY"]) > 0:
47 if progress_bar is not None and poses_task is not None:
48 progress_bar.reset(
49 poses_task,
50 total=len(result["pred_boxes_XYXY"]),
51 description=f"🤸♀️ Estimating poses at frame:{frameid}",
52 )
53 pointsCircle = getpointsCircle(xy, circle_size)
54 for point in pointsCircle:
55 for i, box in enumerate(result["pred_boxes_XYXY"]):
56 if (
57 point[0] > box[0]
58 and point[0] < box[2]
59 and point[1] > box[1]
60 and point[1] < box[3]
61 ):
62 # Labels on a person found bounding box
63 labels_bb = result["pred_densepose"][i].labels.cpu().numpy()
64 # Gaze point relative to the bounding box
65 x = int(np.floor(point[0] - box[0]))
66 y = int(np.floor(point[1] - box[1]))
67 x = x - 1 if x != 0 else x
68 y = y - 1 if y != 0 else y
69 id_part.append(labels_bb[y, x])
70 else:
71 id_part.append(0)
72 if progress_bar is not None and poses_task is not None:
73 progress_bar.advance(poses_task)
74 else:
75 id_part.append(0)
76 # Get id name of the body part gazed at
77 # Get unique ids
78 id_part = list(set(id_part))
79 id_name = []
80 for i in range(len(id_part)):
81 if id_part != 0:
82 id_name.append(PartsDefinition(id_part[i]).name)
83 text_id_name = ", ".join(id_name)
84 logging.debug(f"DensePose frame {frameid} - looking at part {text_id_name}")
85
86 # Draw segmentation
87 frame = (frame * 255).astype(np.uint8)
88 if not np.isnan(xy).any() and xy is not None and len(result["pred_boxes_XYXY"]) > 0:
89 frame_vis = pl_dp_vis.vis_pose(frame, result, id_part, xy)
90 else:
91 frame_vis = frame
92
93 # write body part in the bottom left corner of the image
94 if labels_onimg:
95 cv2.putText(
96 frame_vis,
97 text_id_name,
98 (10, 1000),
99 cv2.FONT_HERSHEY_SIMPLEX,
100 1,
101 (255, 255, 255),
102 lineType=1,
103 )
104 return frame_vis, result, text_id_name, starter, ender, timings, poses_task
This is called at __main__ here:
1 (
2 frame,
3 _,
4 id_name,
5 starter,
6 ender,
7 timings,
8 poses_task,
9 ) = pose.get_densepose(
10 frame,
11 predictor,
12 visualizer,
13 extractor,
14 cfg,
15 xy,
16 starter,
17 ender,
18 timings,
19 args.circle_size,
20 frameid=num_processed_frames,
21 progress_bar=progress_bar,
22 poses_task=poses_task,
23 ) # frame must be BGR
and the predictor, visualizer, config are passed along with the frame, circle_size, and gaze coordinates.
Inference#
On L138 is the call to the predictor and where inference is run.
vis.py#
vis_pose#
A function to visualize the densepose parts, onto video frame.
1def vis_pose(frame, result, id_part, xy, bbox=True, scores=True, parts=True):
2 """Visualize DensePose data on a frame."""
3 for i, box in enumerate(result["pred_boxes_XYXY"]):
4 box = np.floor(box.cpu().numpy()).astype(np.int32)
5 roi = frame[box[1] : box[3], box[0] : box[2]]
6 if bbox:
7 cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 180, 0), 2)
8 if scores:
9 # Put the score on the frame
10 cv2.putText(
11 frame,
12 f"{result['scores'][i]:.2f}",
13 (box[0], box[1] - 10),
14 cv2.FONT_HERSHEY_SIMPLEX,
15 0.9,
16 (255, 255, 255),
17 2,
18 )
19 if parts:
20 labels_bb = result["pred_densepose"][i].labels.cpu().numpy()
21 # Resize to bounding box
22 labels_bb = cv2.resize(
23 labels_bb,
24 (box[2] - box[0], box[3] - box[1]),
25 interpolation=cv2.INTER_NEAREST,
26 )
27 # get the background mask (remain as the frame)
28 mask_inv = cv2.bitwise_not(labels_bb.astype(np.uint8))
29 bkg = cv2.bitwise_and(roi, roi, mask=mask_inv)
30
31 # get the mask of the body part and apply a color map to the parts
32 fg = labels_bb * 8
33 fg = fg.astype(np.uint8)
34 fg = cv2.applyColorMap(fg, cv2.COLORMAP_OCEAN)
35 fg = cv2.bitwise_and(fg, fg, mask=labels_bb.astype(np.uint8))
36
37 # plot gazed part in a different color
38 if (
39 id_part is not None
40 and xy[0] < box[2]
41 and xy[0] > box[0]
42 and xy[1] < box[3]
43 and xy[1] > box[1]
44 ):
45 if len(id_part) == 1 and id_part[0] == 0:
46 continue
47 # remove 0 from id_part
48 id_part = id_part[1:] if id_part[0] == 0 else id_part
49 gazed = labels_bb
50 gazed[np.isin(labels_bb, id_part, invert=True)] = 0
51 gazed[gazed > 0] = 255
52 gazed_mask = gazed.astype(np.uint8)
53 g = np.stack(
54 [np.zeros_like(gazed_mask), gazed_mask, gazed_mask],
55 axis=2,
56 )
57 g = cv2.bitwise_and(g, g, mask=gazed_mask.astype(np.uint8))
58 inv_mask = cv2.bitwise_not(gazed_mask.astype(np.uint8))
59 fg = cv2.bitwise_and(fg, fg, mask=inv_mask)
60 # add the gazed part to the foreground
61 fg = cv2.add(fg, g)
62
63 # merge the foreground and background
64 blended = cv2.add(bkg, fg)
65
66 # Add transparency
67 frame[box[1] : box[3], box[0] : box[2]] = blended
68 # cv2.addWeighted(roi, 0.3, blended, 0.7, 0)
69 return frame
report#
Generate a plot and csv file with parts count.
1def report(pandas_df, out_dir):
2 """This function takes the final pandas dataframe and returns a report
3 with the number of frames with each body part gazed at.
4 """
5 parts = pandas_df["densepose"]
6 parts = parts.str.replace("BACKGROUND", "")
7 parts = parts.str.replace(",", "", 1)
8 parts = parts.str.split(",")
9 parts = parts.apply(lambda x: [i for i in x if i])
10 parts = parts.apply(lambda x: [i.strip() for i in x])
11 parts = [item for sublist in parts for item in sublist]
12 while any(" " in s for s in parts):
13 parts = [i.split(" ") for i in parts]
14 parts = [item for sublist in parts for item in sublist]
15 if any("" in s for s in parts):
16 for s in parts:
17 if s == "":
18 parts.remove(s)
19
20 # Count the number of times each part is gazed at
21 parts_count = {i: parts.count(i) for i in parts}
22 parts_count = dict(
23 sorted(parts_count.items(), key=lambda item: item[1], reverse=True)
24 )
25 # Make parts count into a Pandas
26 parts_count = pd.DataFrame.from_dict(parts_count, orient="index")
27 parts_count.columns = ["count"]
28 parts_count.index.name = "part"
29 parts_count = parts_count.reset_index()
30
31 # Save it as a csv
32 parts_count.to_csv(os.path.join(out_dir, "parts_count.csv"), index=False)
33
34 # Load the graphs from the assets folder
35 base_body = cv2.imread(
36 os.path.join(os.path.dirname(__file__), "assets/body_shape.png")
37 )
38 col_body = cv2.imread(
39 os.path.join(os.path.dirname(__file__), "assets/body_shape_coloured.png")
40 )
41 part_pixels = dict()
42 for part in PartsColour:
43 part_pixels[part.name] = np.where(np.all(col_body == part.value, axis=-1))
44
45 step = 255 / parts_count["count"].max()
46 for i, row in parts_count.iterrows():
47 part = row["part"]
48 count = row["count"]
49 base_body[part_pixels[part]] = (
50 255 - (count * step),
51 255 - (count * step),
52 255 - (count * step),
53 )
54
55 logos = base_body[:200, :, :]
56 base_body = base_body[200:, :, :]
57 base_body = cv2.applyColorMap(base_body, cv2.COLORMAP_HOT)
58
59 # Add the logos
60 gazemap = np.concatenate((logos, base_body), axis=0)
61
62 # Add a colorbar
63 margin = np.full((gazemap.shape[0], 100, 3), 255, dtype=np.uint8)
64 colorbar = np.zeros((255, 50, 3), dtype=np.uint8)
65 for i in range(255):
66 colorbar[254 - i, :, :] = (255 - i, 255 - i, 255 - i)
67 colorbar = cv2.applyColorMap(colorbar, cv2.COLORMAP_HOT)
68
69 colorbar = cv2.resize(colorbar, (20, gazemap.shape[0]))
70
71 # add values
72 font = cv2.FONT_HERSHEY_SIMPLEX
73 fontScale = 0.5
74 fontColor = (0, 0, 0)
75 lineType = 2
76
77 step = gazemap.shape[0] / parts_count["count"].max()
78 for i in range(0, parts_count["count"].max(), 25):
79 cv2.putText(
80 margin,
81 "{}".format(parts_count["count"].max() - i),
82 (50, int(np.round(i * step))),
83 font,
84 fontScale,
85 fontColor,
86 lineType,
87 )
88 gazemap = np.concatenate((gazemap, margin), axis=1)
89 gazemap = np.concatenate((gazemap, colorbar), axis=1)
90
91 # save the gazemap in rgb
92 cv2.cvtColor(gazemap, cv2.COLOR_BGR2RGB)
93 cv2.imwrite(os.path.join(out_dir, "gazemap.png"), gazemap)
94 return