forked from rebillar/site-accueil-insa
		
	
		
			
				
	
	
		
			395 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			395 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| /**
 | |
|  * Matomo - free/libre analytics platform
 | |
|  *
 | |
|  * @link https://matomo.org
 | |
|  * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
 | |
|  *
 | |
|  */
 | |
| namespace Piwik\Plugins\PrivacyManager;
 | |
| 
 | |
| use Piwik\Common;
 | |
| use Piwik\DataAccess\ArchiveTableCreator;
 | |
| use Piwik\Date;
 | |
| use Piwik\Db;
 | |
| use Piwik\DbHelper;
 | |
| use Piwik\Piwik;
 | |
| 
 | |
| /**
 | |
|  * Purges archived reports and metrics that are considered old.
 | |
|  */
 | |
| class ReportsPurger
 | |
| {
 | |
|     // constant used in database purging estimate to signify a table should be dropped
 | |
|     const DROP_TABLE = -1;
 | |
| 
 | |
|     /**
 | |
|      * The max set of rows each table scan select should query at one time.
 | |
|      */
 | |
|     public static $selectSegmentSize = 100000;
 | |
| 
 | |
|     /**
 | |
|      * The number of months after which report/metric data is considered old.
 | |
|      */
 | |
|     private $deleteReportsOlderThan;
 | |
| 
 | |
|     /**
 | |
|      * Whether to keep basic metrics or not.
 | |
|      */
 | |
|     private $keepBasicMetrics;
 | |
| 
 | |
|     /**
 | |
|      * Array of period types. Reports for these periods will not be purged.
 | |
|      */
 | |
|     private $reportPeriodsToKeep;
 | |
| 
 | |
|     /**
 | |
|      * Whether to keep reports for segments or not.
 | |
|      */
 | |
|     private $keepSegmentReports;
 | |
| 
 | |
|     /**
 | |
|      * The maximum number of rows to delete per DELETE query.
 | |
|      */
 | |
|     private $maxRowsToDeletePerQuery;
 | |
| 
 | |
|     /**
 | |
|      * List of metrics that should be kept when purging. If $keepBasicMetrics is true,
 | |
|      * these metrics will be saved.
 | |
|      */
 | |
|     private $metricsToKeep;
 | |
| 
 | |
|     /**
 | |
|      * Array that maps a year and month ('2012_01') with lists of archive IDs for segmented
 | |
|      * archives. Used to keep segmented reports when purging.
 | |
|      */
 | |
|     private $segmentArchiveIds = null;
 | |
| 
 | |
|     /**
 | |
|      * Constructor.
 | |
|      *
 | |
|      * @param int $deleteReportsOlderThan The number of months after which report/metric data
 | |
|      *                                    is considered old.
 | |
|      * @param bool $keepBasicMetrics Whether to keep basic metrics or not.
 | |
|      * @param array $reportPeriodsToKeep Array of period types. Reports for these periods will not
 | |
|      *                                   be purged.
 | |
|      * @param bool $keepSegmentReports Whether to keep reports for segments or not.
 | |
|      * @param array $metricsToKeep List of metrics that should be kept. if $keepBasicMetrics
 | |
|      *                             is true, these metrics will be saved.
 | |
|      * @param int $maxRowsToDeletePerQuery The maximum number of rows to delete per DELETE query.
 | |
|      */
 | |
|     public function __construct($deleteReportsOlderThan, $keepBasicMetrics, $reportPeriodsToKeep,
 | |
|                                 $keepSegmentReports, $metricsToKeep, $maxRowsToDeletePerQuery)
 | |
|     {
 | |
|         $this->deleteReportsOlderThan = (int) $deleteReportsOlderThan;
 | |
|         $this->keepBasicMetrics = (bool) $keepBasicMetrics;
 | |
|         $this->reportPeriodsToKeep = $reportPeriodsToKeep;
 | |
|         $this->keepSegmentReports = (bool) $keepSegmentReports;
 | |
|         $this->metricsToKeep = $metricsToKeep;
 | |
|         $this->maxRowsToDeletePerQuery = (int) $maxRowsToDeletePerQuery;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Purges old report/metric data.
 | |
|      *
 | |
|      * If $keepBasicMetrics is false, old numeric tables will be dropped, otherwise only
 | |
|      * the metrics not in $metricsToKeep will be deleted.
 | |
|      *
 | |
|      * If $reportPeriodsToKeep is an empty array, old blob tables will be dropped. Otherwise,
 | |
|      * specific reports will be deleted, except reports for periods in $reportPeriodsToKeep.
 | |
|      *
 | |
|      * @param bool $optimize If tables should be optimized after rows are deleted. Normally,
 | |
|      *                       this is handled by a scheduled task.
 | |
|      */
 | |
|     public function purgeData($optimize = false)
 | |
|     {
 | |
|         list($oldNumericTables, $oldBlobTables) = $this->getArchiveTablesToPurge();
 | |
| 
 | |
|         // process blob tables first, since archive status is stored in the numeric archives
 | |
|         if (!empty($oldBlobTables)) {
 | |
|             foreach ($oldBlobTables as $table) {
 | |
|                 $where = $this->getBlobTableWhereExpr($oldNumericTables, $table);
 | |
|                 if (!empty($where)) {
 | |
|                     $where = "WHERE $where";
 | |
|                 }
 | |
| 
 | |
|                 Db::deleteAllRows($table, $where, "idarchive ASC", $this->maxRowsToDeletePerQuery);
 | |
|             }
 | |
| 
 | |
|             if ($optimize) {
 | |
|                 Db::optimizeTables($oldBlobTables);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         $this->segmentArchiveIds = null;
 | |
| 
 | |
|         if (!empty($oldNumericTables)) {
 | |
|             foreach ($oldNumericTables as $table) {
 | |
|                 $conditions = array("name NOT LIKE 'done%'");
 | |
|                 $bind       = array();
 | |
| 
 | |
|                 if ($this->keepBasicMetrics && !empty($this->metricsToKeep)) {
 | |
|                     $metricFields = Common::getSqlStringFieldsArray($this->metricsToKeep);
 | |
|                     $bind         = $this->metricsToKeep;
 | |
|                     $conditions[] = sprintf("name NOT IN (%s)", $metricFields);
 | |
|                 }
 | |
| 
 | |
|                 $keepWhere = $this->getBlobTableWhereExpr($oldNumericTables, $table);
 | |
| 
 | |
|                 if (!empty($keepWhere)) {
 | |
|                     $conditions[] = $keepWhere;
 | |
|                 }
 | |
| 
 | |
|                 $where  = 'WHERE ' . implode(' AND ', $conditions);
 | |
| 
 | |
|                 Db::deleteAllRows($table, $where, "idarchive ASC", $this->maxRowsToDeletePerQuery, $bind);
 | |
|             }
 | |
| 
 | |
|             if ($optimize) {
 | |
|                 Db::optimizeTables($oldNumericTables);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Returns an array describing what data would be purged if purging were invoked.
 | |
|      *
 | |
|      * This function returns an array that maps table names with the number of rows
 | |
|      * that will be deleted. If a table name is mapped with self::DROP_TABLE, the table
 | |
|      * will be dropped.
 | |
|      *
 | |
|      * @return array
 | |
|      */
 | |
|     public function getPurgeEstimate()
 | |
|     {
 | |
|         $result = array();
 | |
| 
 | |
|         // get archive tables that will be purged
 | |
|         list($oldNumericTables, $oldBlobTables) = $this->getArchiveTablesToPurge();
 | |
| 
 | |
|         // process blob tables first, since archive status is stored in the numeric archives
 | |
|         if (empty($this->reportPeriodsToKeep) && !$this->keepSegmentReports) {
 | |
|             // not keeping any reports, so drop all tables
 | |
|             foreach ($oldBlobTables as $table) {
 | |
|                 $result[$table] = self::DROP_TABLE;
 | |
|             }
 | |
|         } else {
 | |
|             // figure out which rows will be deleted
 | |
|             foreach ($oldBlobTables as $table) {
 | |
|                 $rowCount = $this->getBlobTableDeleteCount($oldNumericTables, $table);
 | |
|                 if ($rowCount > 0) {
 | |
|                     $result[$table] = $rowCount;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         // deal w/ numeric tables
 | |
|         if ($this->keepBasicMetrics) {
 | |
|             // figure out which rows will be deleted
 | |
|             foreach ($oldNumericTables as $table) {
 | |
|                 $rowCount = $this->getNumericTableDeleteCount($table);
 | |
|                 if ($rowCount > 0) {
 | |
|                     $result[$table] = $rowCount;
 | |
|                 }
 | |
|             }
 | |
|         } else {
 | |
|             // not keeping any metrics, so drop the entire table
 | |
|             foreach ($oldNumericTables as $table) {
 | |
|                 $result[$table] = self::DROP_TABLE;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         return $result;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Utility function that finds every archive table whose reports are considered
 | |
|      * old.
 | |
|      *
 | |
|      * @return array An array of two arrays. The first holds the numeric archive table
 | |
|      *               names, and the second holds the blob archive table names.
 | |
|      */
 | |
|     private function getArchiveTablesToPurge()
 | |
|     {
 | |
|         // get month for which reports as old or older than, should be deleted
 | |
|         // reports whose creation date <= this month will be deleted
 | |
|         // (NOTE: we ignore how far we are in the current month)
 | |
|         $toRemoveDate = Date::factory('today')->subMonth(1 + $this->deleteReportsOlderThan);
 | |
| 
 | |
|         // find all archive tables that are older than N months
 | |
|         $oldNumericTables = array();
 | |
|         $oldBlobTables = array();
 | |
|         foreach (DbHelper::getTablesInstalled() as $table) {
 | |
|             $type = ArchiveTableCreator::getTypeFromTableName($table);
 | |
|             if ($type === false) {
 | |
|                 continue;
 | |
|             }
 | |
|             $date = ArchiveTableCreator::getDateFromTableName($table);
 | |
|             list($year, $month) = explode('_', $date);
 | |
| 
 | |
|             if (self::shouldReportBePurged($year, $month, $toRemoveDate)) {
 | |
|                 if ($type == ArchiveTableCreator::NUMERIC_TABLE) {
 | |
|                     $oldNumericTables[] = $table;
 | |
|                 } else {
 | |
|                     $oldBlobTables[] = $table;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         return array($oldNumericTables, $oldBlobTables);
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Returns true if a report with the given year & month should be purged or not.
 | |
|      *
 | |
|      * @param int $reportDateYear The year of the report in question.
 | |
|      * @param int $reportDateMonth The month of the report in question.
 | |
|      * @param Date $toRemoveDate The date a report must be older than in order to be purged.
 | |
|      * @return bool
 | |
|      */
 | |
|     public static function shouldReportBePurged($reportDateYear, $reportDateMonth, $toRemoveDate)
 | |
|     {
 | |
|         $toRemoveYear = (int)$toRemoveDate->toString('Y');
 | |
|         $toRemoveMonth = (int)$toRemoveDate->toString('m');
 | |
| 
 | |
|         return $reportDateYear < $toRemoveYear
 | |
|         || ($reportDateYear == $toRemoveYear && $reportDateMonth <= $toRemoveMonth);
 | |
|     }
 | |
| 
 | |
|     private function getNumericTableDeleteCount($table)
 | |
|     {
 | |
|         $maxIdArchive = Db::fetchOne("SELECT MAX(idarchive) FROM $table");
 | |
| 
 | |
|         $sql = "SELECT COUNT(*) FROM $table
 | |
|                  WHERE name NOT IN ('" . implode("','", $this->metricsToKeep) . "')
 | |
|                    AND name NOT LIKE 'done%'
 | |
|                    AND idarchive >= ?
 | |
|                    AND idarchive < ?";
 | |
| 
 | |
|         $segments = Db::segmentedFetchOne($sql, 0, $maxIdArchive, self::$selectSegmentSize);
 | |
|         return array_sum($segments);
 | |
|     }
 | |
| 
 | |
|     private function getBlobTableDeleteCount($oldNumericTables, $table)
 | |
|     {
 | |
|         $maxIdArchive = Db::fetchOne("SELECT MAX(idarchive) FROM $table");
 | |
| 
 | |
|         $blobTableWhere = $this->getBlobTableWhereExpr($oldNumericTables, $table);
 | |
|         if (empty($blobTableWhere)) {
 | |
|             return 0;
 | |
|         }
 | |
| 
 | |
|         $sql = "SELECT COUNT(*) FROM $table
 | |
|                  WHERE " . $blobTableWhere . "
 | |
|                    AND idarchive >= ?
 | |
|                    AND idarchive < ?";
 | |
| 
 | |
|         $segments = Db::segmentedFetchOne($sql, 0, $maxIdArchive, self::$selectSegmentSize);
 | |
|         return array_sum($segments);
 | |
|     }
 | |
| 
 | |
|     /** Returns SQL WHERE expression used to find reports that should be purged. */
 | |
|     private function getBlobTableWhereExpr($oldNumericTables, $table)
 | |
|     {
 | |
|         $where = "";
 | |
|         if (!empty($this->reportPeriodsToKeep)) // if keeping reports
 | |
|         {
 | |
|             $where = "period NOT IN (" . implode(',', $this->reportPeriodsToKeep) . ")";
 | |
| 
 | |
|             // if not keeping segments make sure segments w/ kept periods are also deleted
 | |
|             if (!$this->keepSegmentReports) {
 | |
|                 $this->findSegmentArchives($oldNumericTables);
 | |
| 
 | |
|                 $dateFromTable = ArchiveTableCreator::getDateFromTableName($table);
 | |
| 
 | |
|                 if (!empty($this->segmentArchiveIds[$dateFromTable])) {
 | |
|                     $archiveIds = $this->segmentArchiveIds[$dateFromTable];
 | |
|                     $where     .= " OR idarchive IN (" . implode(',', $archiveIds) . ")";
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             $where = "($where)";
 | |
|         }
 | |
|         return $where;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * If we're going to keep segmented reports, we need to know which archives are
 | |
|      * for segments. This info is only in the numeric tables, so we must query them.
 | |
|      */
 | |
|     private function findSegmentArchives($numericTables)
 | |
|     {
 | |
|         if (!is_null($this->segmentArchiveIds) || empty($numericTables)) {
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         foreach ($numericTables as $table) {
 | |
|             $tableDate = ArchiveTableCreator::getDateFromTableName($table);
 | |
| 
 | |
|             $maxIdArchive = Db::fetchOne("SELECT MAX(idarchive) FROM $table");
 | |
| 
 | |
|             $sql = "SELECT idarchive FROM $table
 | |
|                      WHERE name != 'done'
 | |
|                        AND name LIKE 'done_%.%'
 | |
|                        AND idarchive >= ?
 | |
|                        AND idarchive < ?";
 | |
| 
 | |
|             if (is_null($this->segmentArchiveIds)) {
 | |
|                 $this->segmentArchiveIds = array();
 | |
|             }
 | |
| 
 | |
|             $this->segmentArchiveIds[$tableDate] = array();
 | |
|             foreach (Db::segmentedFetchAll($sql, 0, $maxIdArchive, self::$selectSegmentSize) as $row) {
 | |
|                 $this->segmentArchiveIds[$tableDate][] = $row['idarchive'];
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Utility function. Creates a new instance of ReportsPurger with the supplied array
 | |
|      * of settings.
 | |
|      *
 | |
|      * $settings must contain the following keys:
 | |
|      * -'delete_reports_older_than': The number of months after which reports/metrics are
 | |
|      *                               considered old.
 | |
|      * -'delete_reports_keep_basic_metrics': 1 if basic metrics should be kept, 0 if otherwise.
 | |
|      * -'delete_reports_keep_day_reports': 1 if daily reports should be kept, 0 if otherwise.
 | |
|      * -'delete_reports_keep_week_reports': 1 if weekly reports should be kept, 0 if otherwise.
 | |
|      * -'delete_reports_keep_month_reports': 1 if monthly reports should be kept, 0 if otherwise.
 | |
|      * -'delete_reports_keep_year_reports': 1 if yearly reports should be kept, 0 if otherwise.
 | |
|      * -'delete_reports_keep_range_reports': 1 if range reports should be kept, 0 if otherwise.
 | |
|      * -'delete_reports_keep_segment_reports': 1 if reports for segments should be kept, 0 if otherwise.
 | |
|      * -'delete_logs_max_rows_per_query': Maximum number of rows to delete in one DELETE query.
 | |
|      */
 | |
|     public static function make($settings, $metricsToKeep)
 | |
|     {
 | |
|         return new ReportsPurger(
 | |
|             $settings['delete_reports_older_than'],
 | |
|             $settings['delete_reports_keep_basic_metrics'] == 1,
 | |
|             self::getReportPeriodsToKeep($settings),
 | |
|             $settings['delete_reports_keep_segment_reports'] == 1,
 | |
|             $metricsToKeep,
 | |
|             $settings['delete_logs_max_rows_per_query']
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Utility function that returns an array period values based on the 'delete_reports_keep_*'
 | |
|      * settings. The period values returned are the integer values stored in the DB.
 | |
|      *
 | |
|      * @param array $settings The settings to use.
 | |
|      * @return array An array of period values that should be kept when purging old data.
 | |
|      */
 | |
|     private static function getReportPeriodsToKeep($settings)
 | |
|     {
 | |
|         $keepReportPeriods = array();
 | |
|         foreach (Piwik::$idPeriods as $strPeriod => $intPeriod) {
 | |
|             $optionName = "delete_reports_keep_{$strPeriod}_reports";
 | |
|             if ($settings[$optionName] == 1) {
 | |
|                 $keepReportPeriods[] = $intPeriod;
 | |
|             }
 | |
|         }
 | |
|         return $keepReportPeriods;
 | |
|     }
 | |
| }
 | |
| 
 |