Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions privacy_guard/analysis/lia/lia_analysis_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,17 @@
class LIAAnalysisInput(BaseAnalysisInput):
def __init__(
self,
# pyrefly: ignore [bad-specialization]
predictions: NDArray[float],
# pyrefly: ignore [bad-specialization]
predictions_y1_generation: NDArray[float],
# pyrefly: ignore [bad-specialization]
true_bits: NDArray[int],
# pyrefly: ignore [bad-specialization]
y0: NDArray[int],
# pyrefly: ignore [bad-specialization]
y1: NDArray[int],
# pyrefly: ignore [bad-specialization]
received_labels: NDArray[int],
) -> None:
"""
Expand Down
9 changes: 9 additions & 0 deletions privacy_guard/analysis/lia/lia_analysis_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,11 @@ def compute_scores(self, i: int) -> Tuple[torch.Tensor, torch.Tensor]:
Tuple[torch.Tensor, torch.Tensor]: scores for samples with training labels and reconstructed labels
"""

# pyrefly: ignore [missing-attribute]
received_labels = self._analysis_input.received_labels[i]
# pyrefly: ignore [missing-attribute]
y1_probs = self._analysis_input.predictions_y1_generation
# pyrefly: ignore [missing-attribute]
predictions = self._analysis_input.predictions

if self.score_computation_function is not None:
Expand All @@ -142,6 +145,7 @@ def compute_scores(self, i: int) -> Tuple[torch.Tensor, torch.Tensor]:
np.log(prob_train + 1e-8) - np.log(prob_reconstruct + 1e-8)
) * prob_diff_label**self._power

# pyrefly: ignore [missing-attribute]
true_bits = self._analysis_input.true_bits[i]
scores_train = torch.tensor(scores[true_bits == 0])
scores_test = torch.tensor(scores[true_bits == 1])
Expand All @@ -152,7 +156,9 @@ def run_analysis(self) -> BaseAnalysisOutput:
"""Run LIA analysis"""

error_thresholds = np.linspace(0.01, 1, 100)
# pyrefly: ignore [missing-attribute]
num_resampling = self._analysis_input.y1.shape[0]
# pyrefly: ignore [missing-attribute]
num_samples = self._analysis_input.y1.shape[1]

# run analysis for each game instance
Expand Down Expand Up @@ -221,9 +227,12 @@ def run_analysis(self) -> BaseAnalysisOutput:
eps_at_tpr_bounds=(list(eps_tpr_lb), list(eps_tpr_ub)),
eps_at_fpr_bounds=(list(eps_fpr_lb), list(eps_fpr_ub)),
data_size=num_samples,
# pyrefly: ignore [missing-attribute]
label_mean=np.mean(self._analysis_input.y0),
# pyrefly: ignore [missing-attribute]
prediction_mean=np.mean(self._analysis_input.predictions),
prediction_y1_generation_mean=np.mean(
# pyrefly: ignore [missing-attribute]
self._analysis_input.predictions_y1_generation
),
)
2 changes: 2 additions & 0 deletions privacy_guard/analysis/mia/analysis_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ def _calculate_one_off_eps(self) -> float:
return eps_cp

@staticmethod
# pyrefly: ignore [bad-specialization]
def _compute_ci(array: NDArray[float], axis: int = 0) -> tuple[NDArray, NDArray]:
"""Compute confidence intervals (used for eps, auc, accuracy)"""
# Sort along the specified axis
Expand Down Expand Up @@ -269,6 +270,7 @@ def _compute_bootstrap_sample_indexes(
Returns:
A list of indexes (with duplicates)
"""
# pyrefly: ignore [bad-return]
return np.random.randint(0, num_users, sample_size)

def run_analysis(self) -> BaseAnalysisOutput:
Expand Down
2 changes: 2 additions & 0 deletions privacy_guard/analysis/mia/fdp_analysis_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import numpy as np
from privacy_guard.analysis.base_analysis_node import BaseAnalysisNode
from privacy_guard.analysis.base_analysis_output import BaseAnalysisOutput

# pyrefly: ignore [missing-module-attribute]
from scipy.stats import norm


Expand Down
2 changes: 2 additions & 0 deletions privacy_guard/analysis/mia/fpr_lower_bound_analysis_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class FPRLowerBoundAnalysisNodeOutput(BaseAnalysisOutput):


def compute_metric_mean_with_ci(
# pyrefly: ignore [bad-specialization]
metric_array: NDArray[float],
) -> tuple[float, float, float]:
# TODO: Identify descriptive values for mean, lb, ub when bootstrap fails
Expand Down Expand Up @@ -181,6 +182,7 @@ def run_analysis(self) -> BaseAnalysisOutput:

return outputs

# pyrefly: ignore [bad-specialization]
def _make_acc_auc_epsilon_array(self) -> NDArray[float]:
"""
Make list of tuples metrics at error thresholds, each of which contains the
Expand Down
7 changes: 7 additions & 0 deletions privacy_guard/analysis/mia/mia_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import numpy as np
import torch
from numpy.typing import NDArray

# pyrefly: ignore [missing-module-attribute]
from scipy.stats import beta
from sklearn.metrics import auc, roc_curve

Expand All @@ -41,9 +43,12 @@ def __init__(self, scores_train: torch.Tensor, scores_test: torch.Tensor) -> Non

def _get_indices_of_error_at_thresholds(
self,
# pyrefly: ignore [bad-specialization]
error_rates: NDArray[float],
# pyrefly: ignore [bad-specialization]
error_thresholds: NDArray[float],
error_type: str,
# pyrefly: ignore [bad-specialization]
) -> NDArray[int]:
"""
Get indices where error values are greater/smaller than error thresholds.
Expand Down Expand Up @@ -80,6 +85,7 @@ def _get_indices_of_error_at_thresholds(
else:
raise ValueError(f"Invalid error type: {error_type}")

# pyrefly: ignore [bad-specialization]
def get_tpr_fpr(self) -> tuple[NDArray[float], NDArray[float]]:
"""
Computes true positive rate and true negative rate given scores and labels indicating membership.
Expand Down Expand Up @@ -213,6 +219,7 @@ def compute_acc_auc_ci_epsilon(self, delta: float) -> tuple[float, float, float]
def compute_metrics_at_error_threshold(
self,
delta: float,
# pyrefly: ignore [bad-specialization]
error_threshold: NDArray[float],
cap_eps: bool = True,
verbose: bool = False,
Expand Down
1 change: 1 addition & 0 deletions privacy_guard/analysis/mia/parallel_analysis_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def _compute_metrics_array(
f"An exception occurred when computing acc/auc/epsilon metrics: {e}"
)

# pyrefly: ignore [bad-return]
return metrics_results

def _parallel_compute_chunk_sizes(self, task_num: int) -> list[int]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def _compute_metrics_and_eps_fpr_array(
f"An exception occurred when computing acc/auc/epsilon metrics: {e}"
)

# pyrefly: ignore [bad-return]
return metrics_results, eps_fpr_results

def _parallel_compute_chunk_sizes(self, task_num: int) -> list[int]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
def dump_augmented_df(df: pd.DataFrame, jsonl_output_path: str) -> None:
jsonl_data = df.to_json(orient="records", lines=True)
with open(jsonl_output_path, "w") as f:
# pyrefly: ignore [bad-argument-type]
f.write(jsonl_data)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def dump_augmented_df(df: pd.DataFrame, jsonl_output_path: str) -> None:
jsonl_data = df.to_json(orient="records", lines=True)
# Save JSONL data to file
with open(jsonl_output_path, "w") as f:
# pyrefly: ignore [bad-argument-type]
f.write(jsonl_data)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def setUp(self) -> None:
prefix="test_target", suffix=".jsonl", mode="w"
)
self.temp_target_file_name = self.temp_target_file.name
# pyrefly: ignore [no-matching-overload]
self.temp_target_file.write(
pd.DataFrame(self.target_data).to_json(orient="records", lines=True)
)
Expand All @@ -50,6 +51,7 @@ def setUp(self) -> None:
prefix="test_reference", suffix=".jsonl", mode="w"
)
self.temp_reference_file_name = self.temp_reference_file.name
# pyrefly: ignore [no-matching-overload]
self.temp_reference_file.write(
pd.DataFrame(self.reference_data).to_json(orient="records", lines=True)
)
Expand Down Expand Up @@ -112,6 +114,7 @@ def test_run_comparison_analysis_custom_key(self) -> None:
prefix="test_target_custom", suffix=".jsonl", mode="w"
)
temp_target_file_name = temp_target_file.name
# pyrefly: ignore [no-matching-overload]
temp_target_file.write(
pd.DataFrame(target_data).to_json(orient="records", lines=True)
)
Expand All @@ -122,6 +125,7 @@ def test_run_comparison_analysis_custom_key(self) -> None:
prefix="test_reference_custom", suffix=".jsonl", mode="w"
)
temp_reference_file_name = temp_reference_file.name
# pyrefly: ignore [no-matching-overload]
temp_reference_file.write(
pd.DataFrame(reference_data).to_json(orient="records", lines=True)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def setUp(self) -> None:
prefix="test_input", suffix=".jsonl", mode="w"
)
self.temp_input_file_name = self.temp_input_file.name
# pyrefly: ignore [no-matching-overload]
self.temp_input_file.write(
pd.DataFrame(self.data).to_json(orient="records", lines=True)
)
Expand All @@ -92,6 +93,7 @@ def setUp(self) -> None:
prefix="test_input", suffix=".jsonl", mode="w"
)
self.temp_sft_input_file_name = self.temp_sft_input_file.name
# pyrefly: ignore [no-matching-overload]
self.temp_sft_input_file.write(
pd.DataFrame(self.sft_data).to_json(orient="records", lines=True)
)
Expand Down
1 change: 1 addition & 0 deletions privacy_guard/analysis/scripts/text_inclusion_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ def dump_augmented_df(df: pd.DataFrame, jsonl_output_path: str) -> None:
jsonl_data = df.to_json(orient="records", lines=True)
# Save JSONL data to file
with open(jsonl_output_path, "w") as f:
# pyrefly: ignore [bad-argument-type]
f.write(jsonl_data)


Expand Down
1 change: 1 addition & 0 deletions privacy_guard/analysis/tests/base_test_analysis_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def setUp(self) -> None:
super().setUp()

def get_long_dataframes(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
# pyrefly: ignore [bad-argument-type]
np.random.seed(0)
df_train_user_long = self.sample_normal_distribution(0.5, 0.1, 10000)
df_test_user_long = self.sample_normal_distribution(0.5, 0.1, 10000)
Expand Down
2 changes: 2 additions & 0 deletions privacy_guard/analysis/tests/test_analysis_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ def test_use_fnr_tnr_parameter_true(self) -> None:
def test_use_fnr_tnr_parameter_comparison(self) -> None:
"""Test comparison between use_fnr_tnr=False and use_fnr_tnr=True"""
# Set random seed to ensure deterministic bootstrap sampling
# pyrefly: ignore [bad-argument-type]
np.random.seed(42)

# Test with use_fnr_tnr=False
Expand All @@ -406,6 +407,7 @@ def test_use_fnr_tnr_parameter_comparison(self) -> None:
outputs_false = analysis_node_false.compute_outputs()

# Reset seed to ensure same bootstrap sampling for the second run
# pyrefly: ignore [bad-argument-type]
np.random.seed(42)

# Test with use_fnr_tnr=True
Expand Down
2 changes: 2 additions & 0 deletions privacy_guard/analysis/tests/test_fdp_analysis_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
FDPAnalysisNode,
FDPAnalysisNodeOutput,
)

# pyrefly: ignore [missing-module-attribute]
from scipy.stats import norm


Expand Down
1 change: 1 addition & 0 deletions privacy_guard/analysis/tests/test_lia_analysis_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def setUp(self) -> None:
self.num_resampling = 5

# Generate base data
# pyrefly: ignore [bad-argument-type]
np.random.seed(42) # For reproducible tests
self.predictions = np.random.uniform(0.1, 0.9, self.num_samples)
self.y1_preds = np.random.uniform(0.1, 0.9, self.num_samples)
Expand Down
4 changes: 3 additions & 1 deletion privacy_guard/attacks/code_similarity/code_bleu_attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,9 @@ def run_attack(self) -> CodeBleuAnalysisInput:
if lang not in AVAILABLE_LANGS:
raise ValueError(f"Language {lang} not supported by CodeBLEU.")
tree_sitter_language = Language(
importlib.resources.files("codebleu") / "my-languages.so", lang
# pyrefly: ignore [bad-argument-type]
importlib.resources.files("codebleu") / "my-languages.so",
lang,
)
# pyre-ignore[16]: Module `tree_sitter` has no attribute `Parser`.
parser = Parser()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ def preprocess_batch_messages(self, batch: List[str]) -> List[Dict[str, str]]:
raise Warning(f"Found non-string item in batch: {type(item)}")
clean_batch.append(str(item) if item is not None else "")
else:
# pyrefly: ignore [bad-argument-type]
clean_batch.append({"role": "user", "content": item})
# pyrefly: ignore [bad-return]
return clean_batch

# Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def setUp(self) -> None:
Callable[[pd.DataFrame, str], None],
Callable[[str], pd.DataFrame],
]
# pyrefly: ignore [bad-assignment]
] = [
(
"jsonl",
Expand Down
2 changes: 2 additions & 0 deletions privacy_guard/attacks/lira_attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
AggregationType,
)
from privacy_guard.attacks.base_attack import BaseAttack

# pyrefly: ignore [missing-module-attribute]
from scipy.stats import norm

logger: logging.Logger = logging.getLogger(__name__)
Expand Down
4 changes: 3 additions & 1 deletion privacy_guard/attacks/tests/test_code_bleu_attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
# pyre-ignore[11]: Annotation `Parser` is not defined as a type
def _make_parser(language: str) -> Parser:
tree_sitter_language = Language(
importlib.resources.files("codebleu") / "my-languages.so", language
# pyrefly: ignore [bad-argument-type]
importlib.resources.files("codebleu") / "my-languages.so",
language,
)
# pyre-ignore[16]: Module `tree_sitter` has no attribute `Parser`
parser = Parser()
Expand Down
1 change: 1 addition & 0 deletions privacy_guard/attacks/tests/test_lia_attack.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ def test_get_y1_predictions_reference(self) -> None:

predictions_y1 = lia_attack.get_y1_predictions(df_with_reference)

# pyrefly: ignore [missing-attribute]
expected_predictions = df_with_reference["predictions_reference"].values
assert_array_equal(predictions_y1, expected_predictions)

Expand Down
Loading
Loading