| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156 |
- # Copyright 2025 Yakhyokhuja Valikhujaev
- # Author: Yakhyokhuja Valikhujaev
- # GitHub: https://github.com/yakhyo
- from abc import ABC, abstractmethod
- from dataclasses import dataclass
- from typing import List, Tuple, Union
- import cv2
- import numpy as np
- from uniface.face_utils import face_alignment
- from uniface.log import Logger
- from uniface.onnx_utils import create_onnx_session
- @dataclass
- class PreprocessConfig:
- """
- Configuration for preprocessing images before feeding them into the model.
- """
- input_mean: Union[float, List[float]] = 127.5
- input_std: Union[float, List[float]] = 127.5
- input_size: Tuple[int, int] = (112, 112)
- class BaseRecognizer(ABC):
- """
- Abstract Base Class for all face recognition models.
- It provides the core functionality for preprocessing, inference, and embedding extraction.
- """
- @abstractmethod
- def __init__(self, model_path: str, preprocessing: PreprocessConfig) -> None:
- """
- Initializes the model. Subclasses must call this.
- Args:
- model_path (str): The direct path to the verified ONNX model.
- preprocessing (PreprocessConfig): The configuration for preprocessing.
- """
- self.input_mean = preprocessing.input_mean
- self.input_std = preprocessing.input_std
- self.input_size = preprocessing.input_size
- self.model_path = model_path
- self._initialize_model()
- def _initialize_model(self) -> None:
- """
- Loads the ONNX model and prepares it for inference.
- Raises:
- RuntimeError: If the model fails to load or initialize.
- """
- try:
- # Initialize model session with available providers
- self.session = create_onnx_session(self.model_path)
- # Extract input configuration
- input_cfg = self.session.get_inputs()[0]
- self.input_name = input_cfg.name
- # Verify input dimensions match our configuration
- input_shape = input_cfg.shape
- model_input_size = tuple(input_shape[2:4][::-1]) # (width, height)
- if model_input_size != self.input_size:
- Logger.warning(f'Model input size {model_input_size} differs from configured size {self.input_size}')
- # Extract output configuration
- self.output_names = [output.name for output in self.session.get_outputs()]
- self.output_shape = self.session.get_outputs()[0].shape
- assert len(self.output_names) == 1, 'Expected only one output node.'
- Logger.info(f'Successfully initialized face encoder from {self.model_path}')
- except Exception as e:
- Logger.error(
- f"Failed to load face encoder model from '{self.model_path}'",
- exc_info=True,
- )
- raise RuntimeError(f"Failed to initialize model session for '{self.model_path}'") from e
- def preprocess(self, face_img: np.ndarray) -> np.ndarray:
- """
- Preprocess the image: resize, normalize, and convert it to a blob.
- Args:
- face_img: Input image in BGR format.
- Returns:
- Preprocessed image as a NumPy array ready for inference.
- """
- resized_img = cv2.resize(face_img, self.input_size)
- if isinstance(self.input_std, (list, tuple)):
- # Per-channel normalization
- rgb_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB).astype(np.float32)
- normalized_img = (rgb_img - np.array(self.input_mean, dtype=np.float32)) / np.array(
- self.input_std, dtype=np.float32
- )
- # Change to NCHW (batch, channels, height, width)
- blob = np.transpose(normalized_img, (2, 0, 1)) # CHW
- blob = np.expand_dims(blob, axis=0) # NCHW
- else:
- # Single-value normalization
- blob = cv2.dnn.blobFromImage(
- resized_img,
- scalefactor=1.0 / self.input_std,
- size=self.input_size,
- mean=(self.input_mean, self.input_mean, self.input_mean),
- swapRB=True, # Convert BGR to RGB
- )
- return blob
- def get_embedding(self, image: np.ndarray, landmarks: np.ndarray = None) -> np.ndarray:
- """
- Extracts face embedding from an image.
- Args:
- image: Input face image (BGR format). If already aligned (112x112), landmarks can be None.
- landmarks: Facial landmarks (5 points for alignment). Optional if image is already aligned.
- Returns:
- Face embedding vector (typically 512-dimensional).
- """
- # If landmarks are provided, align the face first
- if landmarks is not None:
- aligned_face, _ = face_alignment(image, landmarks, image_size=self.input_size)
- else:
- # Assume image is already aligned
- aligned_face = image
- # Generate embedding from aligned face
- face_blob = self.preprocess(aligned_face)
- embedding = self.session.run(self.output_names, {self.input_name: face_blob})[0]
- return embedding
- def get_normalized_embedding(self, image: np.ndarray, landmarks: np.ndarray) -> np.ndarray:
- """
- Extracts a l2 normalized face embedding vector from an image.
- Args:
- image: Input face image (BGR format).
- landmarks: Facial landmarks (5 points for alignment).
- Returns:
- Normalized face embedding vector (typically 512-dimensional).
- """
- embedding = self.get_embedding(image, landmarks)
- norm = np.linalg.norm(embedding)
- return embedding / norm if norm > 0 else embedding
|