Skip to content

Include partial anonymization and stats #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"symfony/config": "^4.2|^5.0",
"symfony/yaml": "^4.2|^5.0"
},
"minimum-stability": "dev",
"suggest": {
"symfony/config": "To configure anonymizer.",
"symfony/yaml": "To configure anonymizer using yaml files."
Expand Down
124 changes: 110 additions & 14 deletions src/Anonymizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

namespace WebnetFr\DatabaseAnonymizer;

use WebnetFr\DatabaseAnonymizer\Event\AnonymizerEvent;
use Doctrine\DBAL\Connection;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Symfony\Contracts\EventDispatcher\EventDispatcherInterface;
use WebnetFr\DatabaseAnonymizer\Event\AnonymizerEvent;
use WebnetFr\DatabaseAnonymizer\Exception\InvalidAnonymousValueException;
use WebnetFr\DatabaseAnonymizer\Helper\AnonymizerCacheHelper;

/**
* Database anonymizer.
Expand All @@ -13,8 +15,12 @@
*/
class Anonymizer
{
public function __construct(?EventDispatcherInterface $dispatcher = null)
private $cacheHelper;
private $dispatcher;

public function __construct(AnonymizerCacheHelper $cacheHelper, ?EventDispatcherInterface $dispatcher = null)
{
$this->cacheHelper = $cacheHelper;
$this->dispatcher = $dispatcher;
}

Expand All @@ -25,8 +31,9 @@ public function __construct(?EventDispatcherInterface $dispatcher = null)
* @param TargetTable[] $targets
*
* @throws \Doctrine\DBAL\DBALException
* @throws \Exception
*/
public function anonymize(Connection $connection, array $targets)
public function anonymize(Connection $connection, array $targets, array $config = [])
{
foreach ($targets as $targetTable) {
if ($targetTable->isTruncate()) {
Expand All @@ -36,23 +43,112 @@ public function anonymize(Connection $connection, array $targets)
$connection->executeUpdate($truncateQuery);
$connection->query('SET FOREIGN_KEY_CHECKS=1');
} else {
// reset or not the anonymization
$reset = $config['reset'] ?? true;
$numberOfParts = $config['number_of_parts'] ?? 1;
$currentPart = $config['current_part'] ?? 1;
$maximumId = $this->getMaximumId($connection, $targetTable->getName(), $targetTable->getPrimaryKey());
$calculatedPart = $this->splitTable($maximumId, $numberOfParts)[$currentPart - 1];
// $cachedNumberOfParts = $this->cacheHelper->getNumberOfParts($targetTable->getName());

// We reset the lastId on cache
// if ($reset) {
// $this->cacheHelper->reset($targetTable->getName(), $currentPart);
// }

// if (null !== $cachedNumberOfParts && $cachedNumberOfParts !== $numberOfParts) {
// throw new \Exception(
// 'Last ids from cache could not be used as the number of parts are changed.' .
// ' try running with --reset option'
// );
// }

$minimumId = $calculatedPart[0];
$maximumId = $calculatedPart[1];

// if (null !== $lastId = $this->cacheHelper->getLastId($targetTable->getName(), $currentPart)) {
// $minimumId = $lastId;
// }

echo sprintf(">> Anonymizing table %s from id = %d to id = %d%s", $targetTable->getName(),
$minimumId,
$maximumId,
PHP_EOL
);

$allFieldNames = $targetTable->getAllFieldNames();
$pk = $targetTable->getPrimaryKey();

// Select all rows form current table:
// SELECT <all target fields> FROM <target table>
$fetchRowsSQL = $connection->createQueryBuilder()
->select(implode(',', $allFieldNames))
->from($targetTable->getName())
->where(sprintf('%s >= :from_id', $pk[0]))
->andWhere(sprintf('%s <= :to_id', $pk[0]))
->getSQL()
;
$fetchRowsStmt = $connection->prepare($fetchRowsSQL);
$fetchRowsStmt->execute([
'from_id' => $minimumId,
'to_id' => $maximumId,
]);

// Anonymize all rows in current target table.
$values = [];
foreach ($targetTable->getTargetFields() as $targetField) {
if (!isset($this->fakerCache[$targetField->getName()])) {
$this->fakerCache[$targetField->getName()] = $targetField->generate();
while ($row = $fetchRowsStmt->fetch()) {
$values = [];
// Anonymize all target fields in current row.
foreach ($targetTable->getTargetFields() as $targetField) {
$anonValue = $targetField->isTruncate()
? null
: $targetField->generate();

// Set anonymized value.
$values[$targetField->getName()] = $anonValue;
}

// Set anonymized value.
$values[$targetField->getName()] = $this->fakerCache[$targetField->getName()];
}
$pkValues = [];
foreach ($pk as $pkField) {
$pkValues[$pkField] = $row[$pkField];
}

$connection->update($targetTable->getName(), $values, [true => true]);
$connection->update($targetTable->getName(), $values, $pkValues);

if (null !== $this->dispatcher) {
$this->dispatcher->dispatch(new AnonymizerEvent($targetTable->getName(), $values));
if ($this->dispatcher) {
$this->dispatcher->dispatch(new AnonymizerEvent($row[$pk[0]], $targetTable->getName(), $values));
}
// $this->cacheHelper->saveLastId($targetTable->getName(), $currentPart, $row['id']);
}
}
}
}

private function getMaximumId(Connection $connection, string $tableName, array $pKey): int
{
$sql = $connection->createQueryBuilder()
->select(sprintf('max(%s) as max_id', $pKey[0]))
->from($tableName)
->getSQL();

$fetchCountStmt = $connection->prepare($sql);
$fetchCountStmt->execute();

return (int) $fetchCountStmt->fetch()['max_id'];
}

private function splitTable(int $maximumId, int $totalParts): array
{
$itemsPerPart = (int) floor($maximumId / $totalParts);

$parts = [[1, $itemsPerPart]];

for ($i=1; $i<$totalParts; $i++) {
$parts[] = [
$parts[$i-1][1] + 1,
min($parts[$i-1][1] + 1 + $itemsPerPart, $maximumId),
];
}

return $parts;
}
}
2 changes: 1 addition & 1 deletion src/Config/TargetFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public function createTargets(array $config): array

foreach ($tableConfig['fields'] as $fieldName => $fieldConfig) {
$generator = $this->generatorFactory->getGenerator($fieldConfig);
$targetFields[] = new TargetField($fieldName, $generator);
$targetFields[] = new TargetField($fieldName, $generator, $fieldConfig['truncate'] ?? false);
}

$primaryKey = $tableConfig['primary_key'] ?? null;
Expand Down
4 changes: 3 additions & 1 deletion src/Event/AnonymizerEvent.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@

class AnonymizerEvent extends Event
{
public $id;
public $tableName;
public $values;

public function __construct(string $tableName, array $values = [])
public function __construct(int $id, string $tableName, array $values = [])
{
$this->id = $id;
$this->tableName = $tableName;
$this->values = $values;
}
Expand Down
54 changes: 54 additions & 0 deletions src/Helper/AnonymizerCacheHelper.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
<?php

namespace WebnetFr\DatabaseAnonymizer\Helper;

use Psr\Cache\CacheItemPoolInterface;

class AnonymizerCacheHelper
{
private const CACHE_PREFIX = 'anonymization_';
private const CACHE_NUMBER_OF_PARTS_SUFFIX = '_number_of_parts';

private $cache;

public function __construct(CacheItemPoolInterface $cache)
{
$this->cache = $cache;
}

public function reset(string $table, int $part): void
{
$this->cache->deleteItem(self::CACHE_PREFIX . $table . '_' . $part);
}

public function getNumberOfParts(string $table): ?int
{
$item = $this->cache->getItem(self::CACHE_PREFIX . $table. '_' . self::CACHE_NUMBER_OF_PARTS_SUFFIX);

return is_int($value = $item->get()) ? $value : null;
}

public function getLastId(string $table, int $part): ?int
{
$item = $this->cache->getItem(self::CACHE_PREFIX . $table. '_' . $part);

return is_int($value = $item->get()) ? $value : null;
}

public function saveNumberOfParts(string $table, int $parts): void
{
$this->save(self::CACHE_PREFIX . $table. self::CACHE_NUMBER_OF_PARTS_SUFFIX , $parts);
}

public function saveLastId(string $table, int $part, int $lastId): void
{
$this->save(self::CACHE_PREFIX . $table. '_' . $part, $lastId);
}

private function save(string $key, $value): void
{
$item = $this->cache->getItem($key);
$item->set($value);
$this->cache->save($item);
}
}
20 changes: 19 additions & 1 deletion src/TargetField.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,22 @@ class TargetField
*/
private $generator;

/**
* Truncate the field.
*
* @var bool
*/
private $truncate;

/**
* @param string $name
* @param GeneratorInterface $generator
*/
public function __construct(string $name, GeneratorInterface $generator)
public function __construct(string $name, GeneratorInterface $generator, bool $truncate = false)
{
$this->name = $name;
$this->generator = $generator;
$this->truncate = $truncate;
}

/**
Expand All @@ -55,4 +63,14 @@ public function generate()
{
return $this->generator->generate();
}

/**
* Get the truncate value for this field
*
* @return string|null
*/
public function isTruncate()
{
return $this->truncate;
}
}