base.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. # Copyright 2025 Yakhyokhuja Valikhujaev
  2. # Author: Yakhyokhuja Valikhujaev
  3. # GitHub: https://github.com/yakhyo
  4. from abc import ABC, abstractmethod
  5. from dataclasses import dataclass
  6. from typing import List, Tuple, Union
  7. import cv2
  8. import numpy as np
  9. from uniface.face_utils import face_alignment
  10. from uniface.log import Logger
  11. from uniface.onnx_utils import create_onnx_session
  12. @dataclass
  13. class PreprocessConfig:
  14. """
  15. Configuration for preprocessing images before feeding them into the model.
  16. """
  17. input_mean: Union[float, List[float]] = 127.5
  18. input_std: Union[float, List[float]] = 127.5
  19. input_size: Tuple[int, int] = (112, 112)
  20. class BaseRecognizer(ABC):
  21. """
  22. Abstract Base Class for all face recognition models.
  23. It provides the core functionality for preprocessing, inference, and embedding extraction.
  24. """
  25. @abstractmethod
  26. def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None:
  27. """
  28. Initializes the model. Subclasses must call this.
  29. Args:
  30. model_path (str): The direct path to the verified ONNX model.
  31. preprocessing (PreprocessConfig): The configuration for preprocessing.
  32. """
  33. self.input_mean = preprocessing.input_mean
  34. self.input_std = preprocessing.input_std
  35. self.input_size = preprocessing.input_size
  36. self.model_path = model_path
  37. self._initialize_model()
  38. def _initialize_model(self) -> None:
  39. """
  40. Loads the ONNX model and prepares it for inference.
  41. Raises:
  42. RuntimeError: If the model fails to load or initialize.
  43. """
  44. try:
  45. # Initialize model session with available providers
  46. self.session = create_onnx_session(self.model_path)
  47. # Extract input configuration
  48. input_cfg = self.session.get_inputs()[0]
  49. self.input_name = input_cfg.name
  50. # Verify input dimensions match our configuration
  51. input_shape = input_cfg.shape
  52. model_input_size = tuple(input_shape[2:4][::-1]) # (width, height)
  53. if model_input_size != self.input_size:
  54. Logger.warning(f'Model input size {model_input_size} differs from configured size {self.input_size}')
  55. # Extract output configuration
  56. self.output_names = [output.name for output in self.session.get_outputs()]
  57. self.output_shape = self.session.get_outputs()[0].shape
  58. assert len(self.output_names) == 1, 'Expected only one output node.'
  59. Logger.info(f'Successfully initialized face encoder from {self.model_path}')
  60. except Exception as e:
  61. Logger.error(
  62. f"Failed to load face encoder model from '{self.model_path}'",
  63. exc_info=True,
  64. )
  65. raise RuntimeError(f"Failed to initialize model session for '{self.model_path}'") from e
  66. def preprocess(self, face_img: np.ndarray) -> np.ndarray:
  67. """
  68. Preprocess the image: resize, normalize, and convert it to a blob.
  69. Args:
  70. face_img: Input image in BGR format.
  71. Returns:
  72. Preprocessed image as a NumPy array ready for inference.
  73. """
  74. resized_img = cv2.resize(face_img, self.input_size)
  75. if isinstance(self.input_std, (list, tuple)):
  76. # Per-channel normalization
  77. rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
  78. normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / np.array(
  79. self.input_std, dtype=np.float32
  80. )
  81. # Change to NCHW (batch, channels, height, width)
  82. blob = np.transpose(normalized_img, (2, 0, 1)) # CHW
  83. blob = np.expand_dims(blob, axis=0) # NCHW
  84. else:
  85. # Single-value normalization
  86. blob = cv2.dnn.blobFromImage(
  87. resized_img,
  88. scalefactor=1.0 / self.input_std,
  89. size=self.input_size,
  90. mean=(self.input_mean, self.input_mean, self.input_mean),
  91. swapRB=True, # Convert BGR to RGB
  92. )
  93. return blob
  94. def get_embedding(self, image: np.ndarray, landmarks: np.ndarray = None) -> np.ndarray:
  95. """
  96. Extracts face embedding from an image.
  97. Args:
  98. image: Input face image (BGR format). If already aligned (112x112), landmarks can be None.
  99. landmarks: Facial landmarks (5 points for alignment). Optional if image is already aligned.
  100. Returns:
  101. Face embedding vector (typically 512-dimensional).
  102. """
  103. # If landmarks are provided, align the face first
  104. if landmarks is not None:
  105. aligned_face, _ = face_alignment(image, landmarks, image_size=self.input_size)
  106. else:
  107. # Assume image is already aligned
  108. aligned_face = image
  109. # Generate embedding from aligned face
  110. face_blob = self.preprocess(aligned_face)
  111. embedding = self.session.run(self.output_names, {self.input_name: face_blob})[0]
  112. return embedding
  113. def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
  114. """
  115. Extracts a l2 normalized face embedding vector from an image.
  116. Args:
  117. image: Input face image (BGR format).
  118. landmarks: Facial landmarks (5 points for alignment).
  119. Returns:
  120. Normalized face embedding vector (typically 512-dimensional).
  121. """
  122. embedding = self.get_embedding(image, landmarks)
  123. norm = np.linalg.norm(embedding)
  124. return embedding / norm if norm > 0 else embedding