diff --git a/composer.json b/composer.json index 6a39d8f..edfa04d 100644 --- a/composer.json +++ b/composer.json @@ -23,6 +23,7 @@ "symfony/config": "^4.2|^5.0", "symfony/yaml": "^4.2|^5.0" }, + "minimum-stability": "dev", "suggest": { "symfony/config": "To configure anonymizer.", "symfony/yaml": "To configure anonymizer using yaml files." diff --git a/src/Anonymizer.php b/src/Anonymizer.php index d4f507b..9b2e416 100644 --- a/src/Anonymizer.php +++ b/src/Anonymizer.php @@ -2,9 +2,11 @@ namespace WebnetFr\DatabaseAnonymizer; -use WebnetFr\DatabaseAnonymizer\Event\AnonymizerEvent; use Doctrine\DBAL\Connection; -use Symfony\Component\EventDispatcher\EventDispatcherInterface; +use Symfony\Contracts\EventDispatcher\EventDispatcherInterface; +use WebnetFr\DatabaseAnonymizer\Event\AnonymizerEvent; +use WebnetFr\DatabaseAnonymizer\Exception\InvalidAnonymousValueException; +use WebnetFr\DatabaseAnonymizer\Helper\AnonymizerCacheHelper; /** * Database anonymizer. @@ -13,8 +15,12 @@ */ class Anonymizer { - public function __construct(?EventDispatcherInterface $dispatcher = null) + private $cacheHelper; + private $dispatcher; + + public function __construct(AnonymizerCacheHelper $cacheHelper, ?EventDispatcherInterface $dispatcher = null) { + $this->cacheHelper = $cacheHelper; $this->dispatcher = $dispatcher; } @@ -25,8 +31,9 @@ public function __construct(?EventDispatcherInterface $dispatcher = null) * @param TargetTable[] $targets * * @throws \Doctrine\DBAL\DBALException + * @throws \Exception */ - public function anonymize(Connection $connection, array $targets) + public function anonymize(Connection $connection, array $targets, array $config = []) { foreach ($targets as $targetTable) { if ($targetTable->isTruncate()) { @@ -36,23 +43,112 @@ public function anonymize(Connection $connection, array $targets) $connection->executeUpdate($truncateQuery); $connection->query('SET FOREIGN_KEY_CHECKS=1'); } else { + // reset or not the anonymization + $reset = $config['reset'] ?? true; + $numberOfParts = $config['number_of_parts'] ?? 1; + $currentPart = $config['current_part'] ?? 1; + $maximumId = $this->getMaximumId($connection, $targetTable->getName(), $targetTable->getPrimaryKey()); + $calculatedPart = $this->splitTable($maximumId, $numberOfParts)[$currentPart - 1]; +// $cachedNumberOfParts = $this->cacheHelper->getNumberOfParts($targetTable->getName()); + + // We reset the lastId on cache +// if ($reset) { +// $this->cacheHelper->reset($targetTable->getName(), $currentPart); +// } + +// if (null !== $cachedNumberOfParts && $cachedNumberOfParts !== $numberOfParts) { +// throw new \Exception( +// 'Last ids from cache could not be used as the number of parts are changed.' . +// ' try running with --reset option' +// ); +// } + + $minimumId = $calculatedPart[0]; + $maximumId = $calculatedPart[1]; + +// if (null !== $lastId = $this->cacheHelper->getLastId($targetTable->getName(), $currentPart)) { +// $minimumId = $lastId; +// } + + echo sprintf(">> Anonymizing table %s from id = %d to id = %d%s", $targetTable->getName(), + $minimumId, + $maximumId, + PHP_EOL + ); + + $allFieldNames = $targetTable->getAllFieldNames(); + $pk = $targetTable->getPrimaryKey(); + + // Select all rows form current table: + // SELECT FROM + $fetchRowsSQL = $connection->createQueryBuilder() + ->select(implode(',', $allFieldNames)) + ->from($targetTable->getName()) + ->where(sprintf('%s >= :from_id', $pk[0])) + ->andWhere(sprintf('%s <= :to_id', $pk[0])) + ->getSQL() + ; + $fetchRowsStmt = $connection->prepare($fetchRowsSQL); + $fetchRowsStmt->execute([ + 'from_id' => $minimumId, + 'to_id' => $maximumId, + ]); + // Anonymize all rows in current target table. - $values = []; - foreach ($targetTable->getTargetFields() as $targetField) { - if (!isset($this->fakerCache[$targetField->getName()])) { - $this->fakerCache[$targetField->getName()] = $targetField->generate(); + while ($row = $fetchRowsStmt->fetch()) { + $values = []; + // Anonymize all target fields in current row. + foreach ($targetTable->getTargetFields() as $targetField) { + $anonValue = $targetField->isTruncate() + ? null + : $targetField->generate(); + + // Set anonymized value. + $values[$targetField->getName()] = $anonValue; } - // Set anonymized value. - $values[$targetField->getName()] = $this->fakerCache[$targetField->getName()]; - } + $pkValues = []; + foreach ($pk as $pkField) { + $pkValues[$pkField] = $row[$pkField]; + } - $connection->update($targetTable->getName(), $values, [true => true]); + $connection->update($targetTable->getName(), $values, $pkValues); - if (null !== $this->dispatcher) { - $this->dispatcher->dispatch(new AnonymizerEvent($targetTable->getName(), $values)); + if ($this->dispatcher) { + $this->dispatcher->dispatch(new AnonymizerEvent($row[$pk[0]], $targetTable->getName(), $values)); + } +// $this->cacheHelper->saveLastId($targetTable->getName(), $currentPart, $row['id']); } } } } + + private function getMaximumId(Connection $connection, string $tableName, array $pKey): int + { + $sql = $connection->createQueryBuilder() + ->select(sprintf('max(%s) as max_id', $pKey[0])) + ->from($tableName) + ->getSQL(); + + $fetchCountStmt = $connection->prepare($sql); + $fetchCountStmt->execute(); + + return (int) $fetchCountStmt->fetch()['max_id']; + } + + private function splitTable(int $maximumId, int $totalParts): array + { + $itemsPerPart = (int) floor($maximumId / $totalParts); + + $parts = [[1, $itemsPerPart]]; + + for ($i=1; $i<$totalParts; $i++) { + $parts[] = [ + $parts[$i-1][1] + 1, + min($parts[$i-1][1] + 1 + $itemsPerPart, $maximumId), + ]; + } + + return $parts; + } } diff --git a/src/Config/TargetFactory.php b/src/Config/TargetFactory.php index 55165f5..8d38d7c 100644 --- a/src/Config/TargetFactory.php +++ b/src/Config/TargetFactory.php @@ -75,7 +75,7 @@ public function createTargets(array $config): array foreach ($tableConfig['fields'] as $fieldName => $fieldConfig) { $generator = $this->generatorFactory->getGenerator($fieldConfig); - $targetFields[] = new TargetField($fieldName, $generator); + $targetFields[] = new TargetField($fieldName, $generator, $fieldConfig['truncate'] ?? false); } $primaryKey = $tableConfig['primary_key'] ?? null; diff --git a/src/Event/AnonymizerEvent.php b/src/Event/AnonymizerEvent.php index e353e9a..c1d4b38 100644 --- a/src/Event/AnonymizerEvent.php +++ b/src/Event/AnonymizerEvent.php @@ -6,11 +6,13 @@ class AnonymizerEvent extends Event { + public $id; public $tableName; public $values; - public function __construct(string $tableName, array $values = []) + public function __construct(int $id, string $tableName, array $values = []) { + $this->id = $id; $this->tableName = $tableName; $this->values = $values; } diff --git a/src/Helper/AnonymizerCacheHelper.php b/src/Helper/AnonymizerCacheHelper.php new file mode 100644 index 0000000..77e2559 --- /dev/null +++ b/src/Helper/AnonymizerCacheHelper.php @@ -0,0 +1,54 @@ +cache = $cache; + } + + public function reset(string $table, int $part): void + { + $this->cache->deleteItem(self::CACHE_PREFIX . $table . '_' . $part); + } + + public function getNumberOfParts(string $table): ?int + { + $item = $this->cache->getItem(self::CACHE_PREFIX . $table. '_' . self::CACHE_NUMBER_OF_PARTS_SUFFIX); + + return is_int($value = $item->get()) ? $value : null; + } + + public function getLastId(string $table, int $part): ?int + { + $item = $this->cache->getItem(self::CACHE_PREFIX . $table. '_' . $part); + + return is_int($value = $item->get()) ? $value : null; + } + + public function saveNumberOfParts(string $table, int $parts): void + { + $this->save(self::CACHE_PREFIX . $table. self::CACHE_NUMBER_OF_PARTS_SUFFIX , $parts); + } + + public function saveLastId(string $table, int $part, int $lastId): void + { + $this->save(self::CACHE_PREFIX . $table. '_' . $part, $lastId); + } + + private function save(string $key, $value): void + { + $item = $this->cache->getItem($key); + $item->set($value); + $this->cache->save($item); + } +} diff --git a/src/TargetField.php b/src/TargetField.php index 996f78a..f7c5afd 100644 --- a/src/TargetField.php +++ b/src/TargetField.php @@ -26,14 +26,22 @@ class TargetField */ private $generator; + /** + * Truncate the field. + * + * @var bool + */ + private $truncate; + /** * @param string $name * @param GeneratorInterface $generator */ - public function __construct(string $name, GeneratorInterface $generator) + public function __construct(string $name, GeneratorInterface $generator, bool $truncate = false) { $this->name = $name; $this->generator = $generator; + $this->truncate = $truncate; } /** @@ -55,4 +63,14 @@ public function generate() { return $this->generator->generate(); } + + /** + * Get the truncate value for this field + * + * @return string|null + */ + public function isTruncate() + { + return $this->truncate; + } }