site-accueil-insa/matomo/core/RankingQuery.php

387 lines
13 KiB
PHP

<?php
/**
* Matomo - free/libre analytics platform
*
* @link https://matomo.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
namespace Piwik;
use Exception;
/**
* The ranking query class wraps an arbitrary SQL query with more SQL that limits
* the number of results while aggregating the rest in an a new "Others" row. It also
* allows for some more fancy things that can be configured via method calls of this
* class. The advanced use cases are explained in the doc comments of the methods.
*
* The general use case looks like this:
*
* // limit to 500 rows + "Others"
* $rankingQuery = new RankingQuery();
* $rankingQuery->setLimit(500);
*
* // idaction_url will be "Others" in the row that contains the aggregated rest
* $rankingQuery->addLabelColumn('idaction_url');
*
* // the actual query. it's important to sort it before the limit is applied
* $sql = 'SELECT idaction_url, COUNT(*) AS nb_hits
* FROM log_link_visit_action
* GROUP BY idaction_url
* ORDER BY nb_hits DESC';
*
* // execute the query
* $rankingQuery->execute($sql);
*
* For more examples, see RankingQueryTest.php
*
* @api
*/
class RankingQuery
{
// a special label used to mark the 'Others' row in a ranking query result set. this is mapped to the
// datatable summary row during archiving.
const LABEL_SUMMARY_ROW = '__mtm_ranking_query_others__';
/**
* Contains the labels of the inner query.
* Format: "label" => true (to make sure labels don't appear twice)
* @var array
*/
private $labelColumns = array();
/**
* The columns of the inner query that are not labels
* Format: "label" => "aggregation function" or false for no aggregation
* @var array
*/
private $additionalColumns = array();
/**
* The limit for each group
* @var int
*/
private $limit = 5;
/**
* The name of the columns that marks rows to be excluded from the limit
* @var string
*/
private $columnToMarkExcludedRows = false;
/**
* The column that is used to partition the result
* @var bool|string
*/
private $partitionColumn = false;
/**
* The possible values for the column $this->partitionColumn
* @var array
*/
private $partitionColumnValues = array();
/**
* The value to use in the label of the 'Others' row.
* @var string
*/
private $othersLabelValue = self::LABEL_SUMMARY_ROW;
/**
* Constructor.
*
* @param int|false $limit The result row limit. See {@link setLimit()}.
*/
public function __construct($limit = false)
{
if ($limit !== false) {
$this->setLimit($limit);
}
}
/**
* Set the limit after which everything is grouped to "Others".
*
* @param int $limit
*/
public function setLimit($limit)
{
$this->limit = $limit;
}
/**
* Set the value to use for the label in the 'Others' row.
*
* @param string $value
*/
public function setOthersLabel($value)
{
$this->othersLabelValue = $value;
}
/**
* Add a label column.
* Labels are the columns that are replaced with "Others" after the limit.
*
* @param string|array $labelColumn
*/
public function addLabelColumn($labelColumn)
{
if (is_array($labelColumn)) {
foreach ($labelColumn as $label) {
$this->addLabelColumn($label);
}
return;
}
$this->labelColumns[$labelColumn] = true;
}
/**
* @return array
*/
public function getLabelColumns()
{
return $this->labelColumns;
}
/**
* Add a column that has be added to the outer queries.
*
* @param $column
* @param string|bool $aggregationFunction If set, this function is used to aggregate the values of "Others",
* eg, `'min'`, `'max'` or `'sum'`.
*/
public function addColumn($column, $aggregationFunction = false)
{
if (is_array($column)) {
foreach ($column as $c) {
$this->addColumn($c, $aggregationFunction);
}
return;
}
$this->additionalColumns[$column] = $aggregationFunction;
}
/**
* Sets a column that will be used to filter the result into two categories.
* Rows where this column has a value > 0 will be removed from the result and put
* into another array. Both the result and the array of excluded rows are returned
* by {@link execute()}.
*
* @param $column string Name of the column.
* @throws Exception if method is used more than once.
*/
public function setColumnToMarkExcludedRows($column)
{
if ($this->columnToMarkExcludedRows !== false) {
throw new Exception("setColumnToMarkExcludedRows can only be used once");
}
$this->columnToMarkExcludedRows = $column;
$this->addColumn($this->columnToMarkExcludedRows);
}
/**
* This method can be used to partition the result based on the possible values of one
* table column. This means the query will split the result set into other sets of rows
* for each possible value you provide (where the rows of each set have a column value
* that equals a possible value). Each of these new sets of rows will be individually
* limited resulting in several limited result sets.
*
* For example, you can run a query aggregating some data on the log_action table and
* partition by log_action.type with the possible values of {@link Piwik\Tracker\Action::TYPE_PAGE_URL},
* {@link Piwik\Tracker\Action::TYPE_OUTLINK}, {@link Piwik\Tracker\Action::TYPE_DOWNLOAD}.
* The result will be three separate result sets that are aggregated the same ways, but for rows
* where `log_action.type = TYPE_OUTLINK`, for rows where `log_action.type = TYPE_ACTION_URL` and for
* rows `log_action.type = TYPE_DOWNLOAD`.
*
* @param $partitionColumn string The column name to partition by.
* @param $possibleValues Array of possible column values.
* @throws Exception if method is used more than once.
*/
public function partitionResultIntoMultipleGroups($partitionColumn, $possibleValues)
{
if ($this->partitionColumn !== false) {
throw new Exception("partitionResultIntoMultipleGroups can only be used once");
}
$this->partitionColumn = $partitionColumn;
$this->partitionColumnValues = $possibleValues;
$this->addColumn($partitionColumn);
}
/**
* Executes the query.
* The object has to be configured first using the other methods.
*
* @param $innerQuery string The "payload" query that does the actual data aggregation. The ordering
* has to be specified in this query. {@link RankingQuery} cannot apply ordering
* itself.
* @param $bind array Bindings for the inner query.
* @param int $timeLimitInMs Adds a MAX_EXECUTION_TIME query hint to the query if $timeLimitInMs > 0
* @return array The format depends on which methods have been used
* to configure the ranking query.
*/
public function execute($innerQuery, $bind = array(), $timeLimitInMs = 0)
{
$query = $this->generateRankingQuery($innerQuery);
$query = DbHelper::addMaxExecutionTimeHintToQuery($query, $timeLimitInMs);
$data = Db::getReader()->fetchAll($query, $bind);
if ($this->columnToMarkExcludedRows !== false) {
// split the result into the regular result and the rows with special treatment
$excludedFromLimit = array();
$result = array();
foreach ($data as &$row) {
if ($row[$this->columnToMarkExcludedRows] != 0) {
$excludedFromLimit[] = $row;
} else {
$result[] = $row;
}
}
$data = array(
'result' => &$result,
'excludedFromLimit' => &$excludedFromLimit
);
}
if ($this->partitionColumn !== false) {
if ($this->columnToMarkExcludedRows !== false) {
$data['result'] = $this->splitPartitions($data['result']);
} else {
$data = $this->splitPartitions($data);
}
}
return $data;
}
private function splitPartitions(&$data)
{
$result = array();
foreach ($data as &$row) {
$partition = $row[$this->partitionColumn];
if (!isset($result[$partition])) {
$result[$partition] = array();
}
$result[$partition][] = & $row;
}
return $result;
}
/**
* Generate the SQL code that does the magic.
* If you want to get the result, use execute() instead. If you want to run the query
* yourself, use this method.
*
* @param $innerQuery string The "payload" query that does the actual data aggregation. The ordering
* has to be specified in this query. {@link RankingQuery} cannot apply ordering
* itself.
* @return string The entire ranking query SQL.
*/
public function generateRankingQuery($innerQuery)
{
// +1 to include "Others"
$limit = $this->limit + 1;
$counterExpression = $this->getCounterExpression($limit);
// generate select clauses for label columns
$labelColumnsString = '`' . implode('`, `', array_keys($this->labelColumns)) . '`';
$labelColumnsOthersSwitch = array();
foreach ($this->labelColumns as $column => $true) {
$labelColumnsOthersSwitch[] = "
CASE
WHEN counter = $limit THEN '" . $this->othersLabelValue . "'
ELSE `$column`
END AS `$column`
";
}
$labelColumnsOthersSwitch = implode(', ', $labelColumnsOthersSwitch);
// generate select clauses for additional columns
$additionalColumnsString = '';
$additionalColumnsAggregatedString = '';
foreach ($this->additionalColumns as $additionalColumn => $aggregation) {
$additionalColumnsString .= ', `' . $additionalColumn . '`';
if ($aggregation !== false) {
$additionalColumnsAggregatedString .= ', ' . $aggregation . '(`' . $additionalColumn . '`) AS `' . $additionalColumn . '`';
} else {
$additionalColumnsAggregatedString .= ', `' . $additionalColumn . '`';
}
}
// initialize the counters
if ($this->partitionColumn !== false) {
$initCounter = '';
foreach ($this->partitionColumnValues as $value) {
$initCounter .= '( SELECT @counter' . intval($value) . ':=0 ) initCounter' . intval($value) . ', ';
}
} else {
$initCounter = '( SELECT @counter:=0 ) initCounter,';
}
// add a counter to the query
// we rely on the sorting of the inner query
$withCounter = "
SELECT
$labelColumnsString,
$counterExpression AS counter
$additionalColumnsString
FROM
$initCounter
( $innerQuery ) actualQuery
";
// group by the counter - this groups "Others" because the counter stops at $limit
$groupBy = 'counter';
if ($this->partitionColumn !== false) {
$groupBy .= ', `' . $this->partitionColumn . '`';
}
$groupOthers = "
SELECT
$labelColumnsOthersSwitch
$additionalColumnsAggregatedString
FROM ( $withCounter ) AS withCounter
GROUP BY $groupBy
";
return $groupOthers;
}
private function getCounterExpression($limit)
{
$whens = array();
if ($this->columnToMarkExcludedRows !== false) {
// when a row has been specified that marks which records should be excluded
// from limiting, we don't give those rows the normal counter but -1 times the
// value they had before. this way, they have a separate number space (i.e. negative
// integers).
$whens[] = "WHEN {$this->columnToMarkExcludedRows} != 0 THEN -1 * {$this->columnToMarkExcludedRows}";
}
if ($this->partitionColumn !== false) {
// partition: one counter per possible value
foreach ($this->partitionColumnValues as $value) {
$isValue = '`' . $this->partitionColumn . '` = ' . intval($value);
$counter = '@counter' . intval($value);
$whens[] = "WHEN $isValue AND $counter = $limit THEN $limit";
$whens[] = "WHEN $isValue THEN $counter:=$counter+1";
}
$whens[] = "ELSE 0";
} else {
// no partitioning: add a single counter
$whens[] = "WHEN @counter = $limit THEN $limit";
$whens[] = "ELSE @counter:=@counter+1";
}
return "
CASE
" . implode("
", $whens) . "
END
";
}
}