Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files
__init__.py
CHANGED
|
@@ -1,9 +1 @@
|
|
| 1 |
-
#
|
| 2 |
-
from .masking import (
|
| 3 |
-
mask_email, mask_name, mask_date,
|
| 4 |
-
mask_ssn, mask_itin, mask_phone,
|
| 5 |
-
mask_email_udf, mask_name_udf, mask_date_udf,
|
| 6 |
-
mask_ssn_udf, mask_itin_udf, mask_phone_udf
|
| 7 |
-
)
|
| 8 |
-
|
| 9 |
-
from .utils import apply_masking
|
|
|
|
| 1 |
+
# Init for tests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (135 Bytes). View file
|
|
|
__pycache__/test_masking.cpython-311-pytest-8.4.1.pyc
ADDED
|
Binary file (13.8 kB). View file
|
|
|
__pycache__/test_schema_masking.cpython-311-pytest-8.4.1.pyc
ADDED
|
Binary file (4.46 kB). View file
|
|
|
test_masking.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from anonyspark.masking import (
|
| 2 |
+
mask_email, mask_name, mask_date,
|
| 3 |
+
mask_ssn, mask_itin, mask_phone
|
| 4 |
+
)
|
| 5 |
+
|
| 6 |
+
def test_mask_email():
|
| 7 |
+
assert mask_email("[email protected]") == "***@example.com"
|
| 8 |
+
assert mask_email("") is None
|
| 9 |
+
assert mask_email(None) is None
|
| 10 |
+
|
| 11 |
+
def test_mask_name():
|
| 12 |
+
assert mask_name("John") == "J***"
|
| 13 |
+
assert mask_name("") is None
|
| 14 |
+
assert mask_name(None) is None
|
| 15 |
+
|
| 16 |
+
def test_mask_date():
|
| 17 |
+
assert mask_date("1991-08-14") == "***-**-14"
|
| 18 |
+
assert mask_date("invalid") is None
|
| 19 |
+
assert mask_date(None) is None
|
| 20 |
+
|
| 21 |
+
def test_mask_ssn():
|
| 22 |
+
assert mask_ssn("123-45-6789") == "***-**-6789"
|
| 23 |
+
assert mask_ssn("invalid") is None
|
| 24 |
+
|
| 25 |
+
def test_mask_itin():
|
| 26 |
+
assert mask_itin("912-73-1234") == "***-**-1234"
|
| 27 |
+
assert mask_itin("123-45-6789") is None
|
| 28 |
+
|
| 29 |
+
def test_mask_phone():
|
| 30 |
+
assert mask_phone("123-456-7890") == "***-***-7890"
|
| 31 |
+
assert mask_phone("(123) 456-7890") == "***-***-7890"
|
| 32 |
+
assert mask_phone("invalid") is None
|
test_schema_masking.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tests/test_schema_masking.py
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
sys.path.append("/content/anonyspark")
|
| 7 |
+
|
| 8 |
+
from pyspark.sql import SparkSession
|
| 9 |
+
from anonyspark.utils import apply_masking
|
| 10 |
+
|
| 11 |
+
def test_schema_masking():
|
| 12 |
+
spark = SparkSession.builder.master("local[1]").appName("Test").getOrCreate()
|
| 13 |
+
|
| 14 |
+
df = spark.createDataFrame([{
|
| 15 |
+
"email": "[email protected]",
|
| 16 |
+
"name": "John",
|
| 17 |
+
"dob": "1991-08-14",
|
| 18 |
+
"ssn": "123-45-6789",
|
| 19 |
+
"itin": "912-73-1234",
|
| 20 |
+
"phone": "123-456-7890"
|
| 21 |
+
}])
|
| 22 |
+
|
| 23 |
+
schema = {
|
| 24 |
+
"email": "email",
|
| 25 |
+
"name": "name",
|
| 26 |
+
"dob": "dob",
|
| 27 |
+
"ssn": "ssn",
|
| 28 |
+
"itin": "itin",
|
| 29 |
+
"phone": "phone"
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
masked_df = apply_masking(df, schema)
|
| 33 |
+
result = masked_df.collect()[0].asDict()
|
| 34 |
+
|
| 35 |
+
assert result["masked_email"] == "***@example.com"
|
| 36 |
+
assert result["masked_name"] == "J***"
|
| 37 |
+
assert result["masked_dob"] == "***-**-14"
|
| 38 |
+
assert result["masked_ssn"] == "***-**-6789"
|
| 39 |
+
assert result["masked_itin"] == "***-**-1234"
|
| 40 |
+
assert result["masked_phone"] == "***-***-7890"
|