Spaces:
Running
Running
| from pyspark.sql.functions import col | |
| def apply_masking(df, schema): | |
| """ | |
| Apply masking UDFs to specified columns based on schema. | |
| Schema = { "original_col": "mask_type" } | |
| """ | |
| from .masking import ( | |
| mask_email_udf, mask_name_udf, mask_date_udf, | |
| mask_ssn_udf, mask_itin_udf, mask_phone_udf | |
| ) | |
| masking_map = { | |
| "email": mask_email_udf, | |
| "name": mask_name_udf, | |
| "dob": mask_date_udf, | |
| "ssn": mask_ssn_udf, | |
| "itin": mask_itin_udf, | |
| "phone": mask_phone_udf, | |
| } | |
| for col_name, mask_type in schema.items(): | |
| if mask_type in masking_map: | |
| df = df.withColumn(f"masked_{col_name}", masking_map[mask_type](col(col_name))) | |
| return df | |