// Set up a callback to acknowledge a message. This closes around an event
// so that it can signal that it is done and the main thread can continue.
job_done = threading.Event()
def callback(message):
try:
if (message.attributes["DlpJobName"] == operation.name):
// This is the message we"re looking for, so acknowledge it.
message.ack()
// Now that the job is done, fetch the results and print them.
job = dlp.get_dlp_job(operation.name)
histogram_buckets = (job.risk_details
.categorical_stats_result
.value_frequency_histogram_buckets)
// Print bucket stats
for i, bucket in enumerate(histogram_buckets):
print("Bucket {}:".format(i))
print(" Most common value occurs {} time(s)".format(
bucket.value_frequency_upper_bound))
print(" Least common value occurs {} time(s)".format(
bucket.value_frequency_lower_bound))
print(" {} unique values total.".format(
bucket.bucket_size))
for value in bucket.bucket_values:
print(" Value {} occurs {} time(s)".format(
value.value.integer_value, value.count))
// Signal to the main thread that we can exit.
job_done.set()
else:
// This is not the message we"re looking for.
message.drop()
except Exception as e:
// Because this is executing in a thread, an exception won"t be
// noted unless we print it manually.
print(e)
raise
// Register the callback and wait on the event.
subscription.open(callback)
finished = job_done.wait(timeout=timeout)if not finished:
print("No event received before the timeout. Please verify that the "
"subscription provided is subscribed to the topic provided.")
// [END dlp_categorical_stats]
// [START dlp_k_anonymity]
def k_anonymity_analysis(project, table_project_id, dataset_id, table_id,
topic_id, subscription_id, quasi_ids, timeout=300):
Uses the Data Loss Prevention API to compute the k-anonymity of a
column set in a Google BigQuery table.