synthcity.metrics._utils module

class GeneratorInterface

Bases: object

abstract fit(data: pandas.core.frame.DataFrame) synthcity.metrics._utils.GeneratorInterface
abstract generate(count: int) pandas.core.frame.DataFrame
compute_log_p_x(model: torch.nn.modules.module.Module, x_mb: torch.Tensor) torch.Tensor
compute_metrics_baseline(y_scores: numpy.ndarray, y_true: numpy.ndarray, sample_weight: Optional[numpy.ndarray] = None) Tuple[float, float]
compute_wd(X_syn: numpy.ndarray, X: numpy.ndarray) float
create_model(n_dims: int, n_flows: int = 5, n_layers: int = 3, hidden_dim: int = 32, residual: Optional[str] = 'gated', verbose: bool = False, device: Any = device(type='cpu'), batch_dim: int = 50) torch.nn.modules.module.Module
density_estimator_trainer(data_train: numpy.ndarray, data_val: Optional[numpy.ndarray] = None, data_test: Optional[numpy.ndarray] = None, batch_dim: int = 50, flows: int = 5, layers: int = 3, hidden_dim: int = 32, residual: Optional[str] = 'gated', workspace: pathlib.Path = PosixPath('workspace'), decay: float = 0.5, patience: int = 20, cooldown: int = 10, min_lr: float = 0.0005, early_stopping: int = 100, device: Any = device(type='cpu'), epochs: int = 50, learning_rate: float = 0.01, clip_norm: float = 0.1, polyak: float = 0.998, save: bool = True, load: bool = True) Tuple[Callable, torch.nn.modules.module.Module]
evaluate_auc(y_test: numpy.ndarray, y_pred_proba: numpy.ndarray, classes: Optional[numpy.ndarray] = None) Tuple[float, float]
class gaussian(X: numpy.ndarray)

Bases: object

pdf(Z: numpy.ndarray) numpy.ndarray
get_features(X: pandas.core.frame.DataFrame, sensitive_features: List[str] = []) List

Return the non-sensitive features from dataset X

get_frequency(X_gt: pandas.core.frame.DataFrame, X_synth: pandas.core.frame.DataFrame, n_histogram_bins: int = 10) dict

Get percentual frequencies for each possible real categorical value.

Returns

The observed and expected frequencies (as a percent).

get_y_pred_proba_hlpr(y_pred_proba: numpy.ndarray, nclasses: int) numpy.ndarray
load_dataset(data_train: Optional[numpy.ndarray] = None, data_valid: Optional[numpy.ndarray] = None, data_test: Optional[numpy.ndarray] = None, device: Any = device(type='cpu'), batch_dim: int = 50) Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader]
load_model(model: torch.nn.modules.module.Module, optimizer: Any, workspace: pathlib.Path = PosixPath('workspace')) Callable
class normal_func(X: numpy.ndarray)

Bases: object

pdf(Z: numpy.ndarray) numpy.ndarray
class normal_func_feat(X: numpy.ndarray, continuous: list)

Bases: object

pdf(Z: numpy.ndarray) numpy.ndarray
save_model(model: torch.nn.modules.module.Module, optimizer: Any, epoch: int, save: bool = False, workspace: pathlib.Path = PosixPath('workspace')) Callable
train(model: torch.nn.modules.module.Module, optimizer: Any, scheduler: Any, data_loader_train: torch.utils.data.dataloader.DataLoader, data_loader_valid: torch.utils.data.dataloader.DataLoader, data_loader_test: torch.utils.data.dataloader.DataLoader, workspace: pathlib.Path = PosixPath('workspace'), start_epoch: int = 0, device: Any = device(type='cpu'), epochs: int = 50, save: bool = False, clip_norm: float = 0.1) Callable