models.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. # Copyright 2025 Yakhyokhuja Valikhujaev
  2. # Author: Yakhyokhuja Valikhujaev
  3. # GitHub: https://github.com/yakhyo
  4. from typing import Tuple
  5. import cv2
  6. import numpy as np
  7. from uniface.constants import LandmarkWeights
  8. from uniface.face_utils import bbox_center_alignment, transform_points_2d
  9. from uniface.log import Logger
  10. from uniface.model_store import verify_model_weights
  11. from uniface.onnx_utils import create_onnx_session
  12. from .base import BaseLandmarker
  13. __all__ = ['Landmark106']
  14. class Landmark106(BaseLandmarker):
  15. """Facial landmark model for predicting 106 facial keypoints.
  16. This class implements the BaseLandmarker and provides an end-to-end
  17. pipeline for 106-point facial landmark detection. It handles model
  18. loading, preprocessing of a face crop based on a bounding box,
  19. inference, and post-processing to map landmarks back to the
  20. original image coordinates.
  21. Args:
  22. model_name (LandmarkWeights): The enum specifying the landmark model to load.
  23. Defaults to `LandmarkWeights.DEFAULT`.
  24. input_size (Tuple[int, int]): The resolution (width, height) for the model's
  25. input. Defaults to (192, 192).
  26. Example:
  27. >>> # Assume 'image' is a loaded image and 'bbox' is a face bounding box
  28. >>> # bbox = [x1, y1, x2, y2]
  29. >>>
  30. >>> landmarker = Landmark106()
  31. >>> landmarks = landmarker.get_landmarks(image, bbox)
  32. >>> print(landmarks.shape)
  33. (106, 2)
  34. """
  35. def __init__(
  36. self,
  37. model_name: LandmarkWeights = LandmarkWeights.DEFAULT,
  38. input_size: Tuple[int, int] = (192, 192),
  39. ) -> None:
  40. Logger.info(f'Initializing Facial Landmark with model={model_name}, input_size={input_size}')
  41. self.input_size = input_size
  42. self.input_std = 1.0
  43. self.input_mean = 0.0
  44. self.model_path = verify_model_weights(model_name)
  45. self._initialize_model()
  46. def _initialize_model(self):
  47. """
  48. Initialize the ONNX model from the stored model path.
  49. Raises:
  50. RuntimeError: If the model fails to load or initialize.
  51. """
  52. try:
  53. self.session = create_onnx_session(self.model_path)
  54. # Get input configuration
  55. input_metadata = self.session.get_inputs()[0]
  56. input_shape = input_metadata.shape
  57. self.input_size = tuple(input_shape[2:4][::-1]) # Update input size from model
  58. # Get input/output names
  59. self.input_names = [input.name for input in self.session.get_inputs()]
  60. self.output_names = [output.name for output in self.session.get_outputs()]
  61. # Determine landmark dimensions from output shape
  62. output_shape = self.session.get_outputs()[0].shape
  63. self.lmk_dim = 2 # x,y coordinates
  64. self.lmk_num = output_shape[1] // self.lmk_dim # Number of landmarks
  65. Logger.info(f'Model initialized with {self.lmk_num} landmarks')
  66. except Exception as e:
  67. Logger.error(f"Failed to load landmark model from '{self.model_path}'", exc_info=True)
  68. raise RuntimeError(f'Failed to initialize landmark model: {e}') from e
  69. def preprocess(self, image: np.ndarray, bbox: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
  70. """Prepares a face crop for inference.
  71. This method takes a face bounding box, performs a center alignment to
  72. warp the face into the model's required input size, and then creates
  73. a normalized blob ready for the ONNX session.
  74. Args:
  75. image (np.ndarray): The full source image in BGR format.
  76. bbox (np.ndarray): The bounding box of the face [x1, y1, x2, y2].
  77. Returns:
  78. Tuple[np.ndarray, np.ndarray]: A tuple containing:
  79. - The preprocessed image blob ready for inference.
  80. - The affine transformation matrix used for alignment.
  81. """
  82. width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
  83. center = ((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2)
  84. scale = self.input_size[0] / (max(width, height) * 1.5)
  85. aligned_face, transform_matrix = bbox_center_alignment(image, center, self.input_size[0], scale, 0.0)
  86. face_blob = cv2.dnn.blobFromImage(
  87. aligned_face,
  88. 1.0 / self.input_std,
  89. self.input_size,
  90. (self.input_mean, self.input_mean, self.input_mean),
  91. swapRB=True,
  92. )
  93. return face_blob, transform_matrix
  94. def postprocess(self, predictions: np.ndarray, transform_matrix: np.ndarray) -> np.ndarray:
  95. """Converts raw model predictions back to original image coordinates.
  96. This method reshapes the model's flat output array into landmark points,
  97. denormalizes them to the model's input space, and then applies an
  98. inverse affine transformation to map them back to the original image space.
  99. Args:
  100. predictions (np.ndarray): Raw landmark coordinates from the model output.
  101. transform_matrix (np.ndarray): The affine transformation matrix from preprocessing.
  102. Returns:
  103. np.ndarray: An array of landmark points in the original image's coordinates.
  104. """
  105. landmarks = predictions.reshape((-1, 2))
  106. landmarks[:, 0:2] += 1
  107. landmarks[:, 0:2] *= self.input_size[0] // 2
  108. inverse_matrix = cv2.invertAffineTransform(transform_matrix)
  109. landmarks = transform_points_2d(landmarks, inverse_matrix)
  110. return landmarks
  111. def get_landmarks(self, image: np.ndarray, bbox: np.ndarray) -> np.ndarray:
  112. """Predicts facial landmarks for the given image and face bounding box.
  113. This is the main public method that orchestrates the full pipeline of
  114. preprocessing, inference, and post-processing.
  115. Args:
  116. image (np.ndarray): The full source image in BGR format.
  117. bbox (np.ndarray): A bounding box of a face [x1, y1, x2, y2].
  118. Returns:
  119. np.ndarray: An array of predicted landmark points with shape (106, 2).
  120. """
  121. face_blob, transform_matrix = self.preprocess(image, bbox)
  122. raw_predictions = self.session.run(self.output_names, {self.input_names[0]: face_blob})[0][0]
  123. landmarks = self.postprocess(raw_predictions, transform_matrix)
  124. return landmarks
  125. # Testing code
  126. if __name__ == '__main__':
  127. from uniface.detection import RetinaFace
  128. from uniface.landmark import Landmark106
  129. face_detector = RetinaFace()
  130. landmarker = Landmark106()
  131. cap = cv2.VideoCapture(0)
  132. if not cap.isOpened():
  133. print('Webcam not available.')
  134. exit()
  135. print("Press 'q' to quit.")
  136. while True:
  137. ret, frame = cap.read()
  138. if not ret:
  139. print('Frame capture failed.')
  140. break
  141. # 2. The detect method returns a list of dictionaries
  142. faces = face_detector.detect(frame)
  143. if not faces:
  144. cv2.imshow('Facial Landmark Detection', frame)
  145. if cv2.waitKey(1) & 0xFF == ord('q'):
  146. break
  147. continue
  148. # 3. Loop through the list of face dictionaries
  149. for face in faces:
  150. # Extract the bounding box
  151. bbox = face['bbox']
  152. # 4. Get landmarks for the current face using its bounding box
  153. landmarks = landmarker.get_landmarks(frame, bbox)
  154. # --- Drawing Logic ---
  155. # Draw the landmarks
  156. for x, y in landmarks.astype(int):
  157. cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
  158. # Draw the bounding box
  159. x1, y1, x2, y2 = map(int, bbox)
  160. cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
  161. cv2.imshow('Facial Landmark Detection', frame)
  162. if cv2.waitKey(1) & 0xFF == ord('q'):
  163. break
  164. cap.release()
  165. cv2.destroyAllWindows()