face_utils.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. # Copyright 2025 Yakhyokhuja Valikhujaev
  2. # Author: Yakhyokhuja Valikhujaev
  3. # GitHub: https://github.com/yakhyo
  4. from typing import Tuple, Union
  5. import cv2
  6. import numpy as np
  7. from skimage.transform import SimilarityTransform
  8. __all__ = [
  9. 'face_alignment',
  10. 'compute_similarity',
  11. 'bbox_center_alignment',
  12. 'transform_points_2d',
  13. ]
  14. # Reference alignment for facial landmarks (ArcFace)
  15. reference_alignment: np.ndarray = np.array(
  16. [
  17. [38.2946, 51.6963],
  18. [73.5318, 51.5014],
  19. [56.0252, 71.7366],
  20. [41.5493, 92.3655],
  21. [70.7299, 92.2041],
  22. ],
  23. dtype=np.float32,
  24. )
  25. def estimate_norm(landmark: np.ndarray, image_size: Union[int, Tuple[int, int]] = 112) -> Tuple[np.ndarray, np.ndarray]:
  26. """
  27. Estimate the normalization transformation matrix for facial landmarks.
  28. Args:
  29. landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
  30. image_size (Union[int, Tuple[int, int]], optional): The size of the output image.
  31. Can be an integer (for square images) or a tuple (width, height). Default is 112.
  32. Returns:
  33. np.ndarray: The 2x3 transformation matrix for aligning the landmarks.
  34. np.ndarray: The 2x3 inverse transformation matrix for aligning the landmarks.
  35. Raises:
  36. AssertionError: If the input landmark array does not have the shape (5, 2)
  37. or if image_size is not a multiple of 112 or 128.
  38. """
  39. assert landmark.shape == (5, 2), 'Landmark array must have shape (5, 2).'
  40. # Handle both int and tuple inputs
  41. if isinstance(image_size, tuple):
  42. size = image_size[0] # Use width for ratio calculation
  43. else:
  44. size = image_size
  45. assert size % 112 == 0 or size % 128 == 0, 'Image size must be a multiple of 112 or 128.'
  46. if size % 112 == 0:
  47. ratio = float(size) / 112.0
  48. diff_x = 0.0
  49. else:
  50. ratio = float(size) / 128.0
  51. diff_x = 8.0 * ratio
  52. # Adjust reference alignment based on ratio and diff_x
  53. alignment = reference_alignment * ratio
  54. alignment[:, 0] += diff_x
  55. # Compute the transformation matrix
  56. transform = SimilarityTransform()
  57. transform.estimate(landmark, alignment)
  58. matrix = transform.params[0:2, :]
  59. inverse_matrix = np.linalg.inv(transform.params)[0:2, :]
  60. return matrix, inverse_matrix
  61. def face_alignment(
  62. image: np.ndarray,
  63. landmark: np.ndarray,
  64. image_size: Union[int, Tuple[int, int]] = 112,
  65. ) -> Tuple[np.ndarray, np.ndarray]:
  66. """
  67. Align the face in the input image based on the given facial landmarks.
  68. Args:
  69. image (np.ndarray): Input image as a NumPy array.
  70. landmark (np.ndarray): Array of shape (5, 2) representing the coordinates of the facial landmarks.
  71. image_size (Union[int, Tuple[int, int]], optional): The size of the aligned output image.
  72. Can be an integer (for square images) or a tuple (width, height). Default is 112.
  73. Returns:
  74. np.ndarray: The aligned face as a NumPy array.
  75. np.ndarray: The 2x3 transformation matrix used for alignment.
  76. """
  77. # Get the transformation matrix
  78. M, M_inv = estimate_norm(landmark, image_size)
  79. # Handle both int and tuple for warpAffine output size
  80. if isinstance(image_size, int):
  81. output_size = (image_size, image_size)
  82. else:
  83. output_size = image_size
  84. # Warp the input image to align the face
  85. warped = cv2.warpAffine(image, M, output_size, borderValue=0.0)
  86. return warped, M_inv
  87. def compute_similarity(feat1: np.ndarray, feat2: np.ndarray, normalized: bool = False) -> np.float32:
  88. """Computing Similarity between two faces.
  89. Args:
  90. feat1 (np.ndarray): First embedding.
  91. feat2 (np.ndarray): Second embedding.
  92. normalized (bool): Set True if the embeddings are already L2 normalized.
  93. Returns:
  94. np.float32: Cosine similarity.
  95. """
  96. feat1 = feat1.ravel()
  97. feat2 = feat2.ravel()
  98. if normalized:
  99. return np.dot(feat1, feat2)
  100. else:
  101. return np.dot(feat1, feat2) / (np.linalg.norm(feat1) * np.linalg.norm(feat2) + 1e-5)
  102. def bbox_center_alignment(image, center, output_size, scale, rotation):
  103. """
  104. Apply center-based alignment, scaling, and rotation to an image.
  105. Args:
  106. image (np.ndarray): Input image.
  107. center (Tuple[float, float]): Center point (e.g., face center from bbox).
  108. output_size (int): Desired output image size (square).
  109. scale (float): Scaling factor to zoom in/out.
  110. rotation (float): Rotation angle in degrees (clockwise).
  111. Returns:
  112. cropped (np.ndarray): Aligned and cropped image.
  113. M (np.ndarray): 2x3 affine transform matrix used.
  114. """
  115. # Convert rotation from degrees to radians
  116. rot = float(rotation) * np.pi / 180.0
  117. # Scale the image
  118. t1 = SimilarityTransform(scale=scale)
  119. # Translate the center point to the origin (after scaling)
  120. cx = center[0] * scale
  121. cy = center[1] * scale
  122. t2 = SimilarityTransform(translation=(-1 * cx, -1 * cy))
  123. # Apply rotation around origin (center of face)
  124. t3 = SimilarityTransform(rotation=rot)
  125. # Translate origin to center of output image
  126. t4 = SimilarityTransform(translation=(output_size / 2, output_size / 2))
  127. # Combine all transformations in order: scale → center shift → rotate → recentralize
  128. t = t1 + t2 + t3 + t4
  129. # Extract 2x3 affine matrix
  130. M = t.params[0:2]
  131. # Warp the image using OpenCV
  132. cropped = cv2.warpAffine(image, M, (output_size, output_size), borderValue=0.0)
  133. return cropped, M
  134. def transform_points_2d(points: np.ndarray, transform: np.ndarray) -> np.ndarray:
  135. """
  136. Apply a 2D affine transformation to an array of 2D points.
  137. Args:
  138. points (np.ndarray): An (N, 2) array of 2D points.
  139. transform (np.ndarray): A (2, 3) affine transformation matrix.
  140. Returns:
  141. np.ndarray: Transformed (N, 2) array of points.
  142. """
  143. transformed = np.zeros_like(points, dtype=np.float32)
  144. for i in range(points.shape[0]):
  145. point = np.array([points[i, 0], points[i, 1], 1.0], dtype=np.float32)
  146. result = np.dot(transform, point)
  147. transformed[i] = result[:2]
  148. return transformed