forked from vergnet/site-accueil-insa
333 lines
13 KiB
PHP
333 lines
13 KiB
PHP
<?php
|
|
/**
|
|
* Matomo - free/libre analytics platform
|
|
*
|
|
* @link https://matomo.org
|
|
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
|
*
|
|
*/
|
|
|
|
namespace Piwik\DataAccess\LogQueryBuilder;
|
|
|
|
use Exception;
|
|
use Piwik\Common;
|
|
use Piwik\DataAccess\LogAggregator;
|
|
use Piwik\Tracker\LogTable;
|
|
|
|
class JoinGenerator
|
|
{
|
|
/**
|
|
* @var JoinTables
|
|
*/
|
|
protected $tables;
|
|
|
|
/**
|
|
* @var bool
|
|
*/
|
|
private $joinWithSubSelect = false;
|
|
|
|
/**
|
|
* @var string
|
|
*/
|
|
private $joinString = '';
|
|
|
|
/**
|
|
* @var array
|
|
*/
|
|
private $nonVisitJoins = array();
|
|
|
|
public function __construct(JoinTables $tables)
|
|
{
|
|
$this->tables = $tables;
|
|
$this->addMissingTablesNeededForJoins();
|
|
}
|
|
|
|
private function addMissingTablesNeededForJoins()
|
|
{
|
|
foreach ($this->tables as $index => $table) {
|
|
if (is_array($table)) {
|
|
continue;
|
|
}
|
|
|
|
$logTable = $this->tables->getLogTable($table);
|
|
|
|
if (!$logTable->getColumnToJoinOnIdVisit()) {
|
|
$tableNameToJoin = $logTable->getLinkTableToBeAbleToJoinOnVisit();
|
|
|
|
if (empty($tableNameToJoin) && $logTable->getWaysToJoinToOtherLogTables()) {
|
|
foreach ($logTable->getWaysToJoinToOtherLogTables() as $otherLogTable => $column) {
|
|
if ($this->tables->hasJoinedTable($otherLogTable)) {
|
|
$this->tables->addTableDependency($table, $otherLogTable);
|
|
continue;
|
|
}
|
|
if ($this->tables->isTableJoinableOnVisit($otherLogTable) || $this->tables->isTableJoinableOnAction($otherLogTable)) {
|
|
$this->addMissingTablesForOtherTableJoin($otherLogTable, $table);
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if ($index > 0 && !$this->tables->hasJoinedTable($tableNameToJoin)) {
|
|
$this->tables->addTableToJoin($tableNameToJoin);
|
|
}
|
|
|
|
if ($this->tables->hasJoinedTable($tableNameToJoin)) {
|
|
$this->generateNonVisitJoins($table, $tableNameToJoin, $index);
|
|
}
|
|
}
|
|
}
|
|
|
|
foreach ($this->tables as $index => $table) {
|
|
if (is_array($table)) {
|
|
if (!isset($table['tableAlias'])) {
|
|
$tableName = $table['table'];
|
|
$numTables = count($this->tables);
|
|
for ($j = $index + 1; $j < $numTables; $j++) {
|
|
if (!isset($this->tables[$j])) {
|
|
continue;
|
|
}
|
|
|
|
$tableOther = $this->tables[$j];
|
|
if (is_string($tableOther) && $tableOther === $tableName) {
|
|
unset($this->tables[$j]);
|
|
}
|
|
}
|
|
}
|
|
} elseif (is_string($table)) {
|
|
$numTables = count($this->tables);
|
|
|
|
for ($j = $index + 1; $j < $numTables; $j++) {
|
|
if (isset($this->tables[$j]) && is_array($this->tables[$j]) && !isset($this->tables[$j]['tableAlias'])) {
|
|
$tableOther = $this->tables[$j];
|
|
if ($table === $tableOther['table']) {
|
|
$message = sprintf('Please reorganize the joined tables as the table %s in %s cannot be joined correctly. We recommend to join tables with arrays first. %s', $table, json_encode($this->tables), json_encode(debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 10)));
|
|
throw new Exception($message);
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private function addMissingTablesForOtherTableJoin($tableName, $dependentTable)
|
|
{
|
|
$this->tables->addTableDependency($dependentTable, $tableName);
|
|
|
|
if ($this->tables->hasJoinedTable($tableName)) {
|
|
return;
|
|
}
|
|
|
|
$table = $this->tables->getLogTable($tableName);
|
|
|
|
if ($table->getColumnToJoinOnIdAction() || $table->getColumnToJoinOnIdVisit() || $table->getLinkTableToBeAbleToJoinOnVisit()) {
|
|
$this->tables->addTableToJoin($tableName);
|
|
return;
|
|
}
|
|
|
|
$otherTableJoins = $table->getWaysToJoinToOtherLogTables();
|
|
|
|
foreach ($otherTableJoins as $logTable => $column) {
|
|
$this->addMissingTablesForOtherTableJoin($logTable, $tableName);
|
|
}
|
|
|
|
$this->tables->addTableToJoin($tableName);
|
|
}
|
|
|
|
/**
|
|
* Generate the join sql based on the needed tables
|
|
* @throws Exception if tables can't be joined
|
|
* @return array
|
|
*/
|
|
public function generate()
|
|
{
|
|
/** @var LogTable[] $availableLogTables */
|
|
$availableLogTables = array();
|
|
|
|
$this->tables->sort();
|
|
|
|
foreach ($this->tables as $i => $table) {
|
|
if (is_array($table)) {
|
|
|
|
// join condition provided
|
|
$alias = isset($table['tableAlias']) ? $table['tableAlias'] : $table['table'];
|
|
|
|
if (isset($table['join'])) {
|
|
$this->joinString .= ' ' . $table['join'];
|
|
} else {
|
|
$this->joinString .= ' LEFT JOIN';
|
|
}
|
|
|
|
if (!isset($table['joinOn']) && $this->tables->getLogTable($table['table'])) {
|
|
$logTable = $this->tables->getLogTable($table['table']);
|
|
if (!empty($availableLogTables)) {
|
|
$table['joinOn'] = $this->findJoinCriteriasForTables($logTable, $availableLogTables);
|
|
}
|
|
if (!isset($table['tableAlias'])) {
|
|
// eg array('table' => 'log_link_visit_action', 'join' => 'RIGHT JOIN')
|
|
// we treat this like a regular string table which we can join automatically
|
|
$availableLogTables[$table['table']] = $logTable;
|
|
}
|
|
}
|
|
|
|
$this->joinString .= ' ' . Common::prefixTable($table['table']) . " AS " . $alias
|
|
. " ON " . $table['joinOn'];
|
|
continue;
|
|
}
|
|
|
|
$tableSql = Common::prefixTable($table) . " AS $table";
|
|
|
|
$logTable = $this->tables->getLogTable($table);
|
|
|
|
if ($i == 0) {
|
|
// first table
|
|
$this->joinString .= $tableSql;
|
|
} else {
|
|
|
|
$join = $this->findJoinCriteriasForTables($logTable, $availableLogTables);
|
|
|
|
if ($join === null) {
|
|
$availableLogTables[$table] = $logTable;
|
|
continue;
|
|
}
|
|
|
|
$joinName = 'LEFT JOIN';
|
|
if ($i > 0
|
|
&& $this->tables[$i - 1]
|
|
&& is_string($this->tables[$i - 1])
|
|
&& strpos($this->tables[$i - 1], LogAggregator::LOG_TABLE_SEGMENT_TEMPORARY_PREFIX) === 0) {
|
|
$joinName = 'INNER JOIN';
|
|
// when we archive a segment there will be eg `logtmpsegment$HASH` as first table.
|
|
// then we join log_conversion for example... if we didn't use INNER JOIN we would as a result
|
|
// get rows for visits even when they didn't have a conversion. Instead we only want to find rows
|
|
// that have an entry in both tables when doing eg
|
|
// logtmpsegment57cd546b7203d68a41027547c4abe1a2.idvisit = log_conversion.idvisit
|
|
}
|
|
// the join sql the default way
|
|
$this->joinString .= " $joinName $tableSql ON " . $join;
|
|
}
|
|
|
|
$availableLogTables[$table] = $logTable;
|
|
}
|
|
}
|
|
|
|
public function getJoinString()
|
|
{
|
|
return $this->joinString;
|
|
}
|
|
|
|
public function shouldJoinWithSelect()
|
|
{
|
|
return $this->joinWithSubSelect;
|
|
}
|
|
|
|
/**
|
|
* @param LogTable $logTable
|
|
* @param LogTable[] $availableLogTables
|
|
* @return string|null returns null in case the table is already joined, or the join string if the table needs
|
|
* to be joined
|
|
* @throws Exception if table cannot be joined for segmentation
|
|
*/
|
|
public function findJoinCriteriasForTables(LogTable $logTable, $availableLogTables)
|
|
{
|
|
$join = null;
|
|
$alternativeJoin = null;
|
|
$table = $logTable->getName();
|
|
|
|
foreach ($availableLogTables as $availableLogTable) {
|
|
if ($logTable->getColumnToJoinOnIdVisit() && $availableLogTable->getColumnToJoinOnIdVisit()) {
|
|
|
|
$join = sprintf("%s.%s = %s.%s", $table, $logTable->getColumnToJoinOnIdVisit(),
|
|
$availableLogTable->getName(), $availableLogTable->getColumnToJoinOnIdVisit());
|
|
$alternativeJoin = sprintf("%s.%s = %s.%s", $availableLogTable->getName(), $availableLogTable->getColumnToJoinOnIdVisit(),
|
|
$table, $logTable->getColumnToJoinOnIdVisit());
|
|
|
|
if ($availableLogTable->shouldJoinWithSubSelect()) {
|
|
$this->joinWithSubSelect = true;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
if ($logTable->getColumnToJoinOnIdAction() && $availableLogTable->getColumnToJoinOnIdAction()) {
|
|
if (isset($this->nonVisitJoins[$logTable->getName()][$availableLogTable->getName()])) {
|
|
$join = $this->nonVisitJoins[$logTable->getName()][$availableLogTable->getName()];
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
$otherJoins = $logTable->getWaysToJoinToOtherLogTables();
|
|
foreach ($otherJoins as $joinTable => $column) {
|
|
if($availableLogTable->getName() == $joinTable) {
|
|
$join = sprintf("`%s`.`%s` = `%s`.`%s`", $table, $column, $availableLogTable->getName(), $column);
|
|
break;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if (!isset($join)) {
|
|
throw new Exception("Table '$table' can't be joined for segmentation");
|
|
}
|
|
|
|
if ($this->tables->hasJoinedTableManually($table, $join)
|
|
|| $this->tables->hasJoinedTableManually($table, $alternativeJoin)) {
|
|
// already joined, no need to join it again
|
|
return null;
|
|
}
|
|
|
|
if ($table == 'log_conversion_item') { // by default we don't want to consider deleted columns
|
|
$join .= sprintf(' AND `%s`.deleted = 0', $table);
|
|
}
|
|
|
|
return $join;
|
|
}
|
|
|
|
/**
|
|
* This code is a bit tricky. We have to execute this right at the beginning before actually iterating over all the
|
|
* tables and generating the join string as we may have to delete a table from the tables. If we did not delete
|
|
* this table upfront, we would have maybe already added a joinString for that table, even though it will be later
|
|
* removed by another table. This means if we wouldn't delete/unset that table upfront, we would need to alter
|
|
* an already generated join string which would not be really nice code as well.
|
|
*
|
|
* Next problem is, because we are deleting a table, we have to remember the "joinOn" string for that table in a
|
|
* property "nonVisitJoins". Otherwise we would not be able to generate the correct "joinOn" string when actually
|
|
* iterating over all the tables to generate that string.
|
|
*
|
|
* @param $tableName
|
|
* @param $tableNameToJoin
|
|
* @param $index
|
|
*/
|
|
protected function generateNonVisitJoins($tableName, $tableNameToJoin, $index)
|
|
{
|
|
$logTable = $this->tables->getLogTable($tableName);
|
|
$logTableToJoin = $this->tables->getLogTable($tableNameToJoin);
|
|
|
|
$nonVisitJoin = sprintf("%s.%s = %s.%s", $logTableToJoin->getName(), $logTableToJoin->getColumnToJoinOnIdAction(),
|
|
$tableName, $logTable->getColumnToJoinOnIdAction());
|
|
|
|
$altNonVisitJoin = sprintf("%s.%s = %s.%s", $tableName, $logTable->getColumnToJoinOnIdAction(),
|
|
$logTableToJoin->getName(), $logTableToJoin->getColumnToJoinOnIdAction());
|
|
|
|
if ($index > 0
|
|
&& $this->tables->hasAddedTableManually($tableName)
|
|
&& !$this->tables->hasJoinedTableManually($tableName, $nonVisitJoin)
|
|
&& !$this->tables->hasJoinedTableManually($tableName, $altNonVisitJoin)) {
|
|
$tableIndex = $this->tables->findIndexOfManuallyAddedTable($tableName);
|
|
$nonVisitJoin = '(' . $this->tables[$tableIndex]['joinOn'] . ' AND ' . $nonVisitJoin . ')';
|
|
unset($this->tables[$tableIndex]);
|
|
}
|
|
|
|
if (!isset($this->nonVisitJoins[$tableName])) {
|
|
$this->nonVisitJoins[$tableName] = array();
|
|
}
|
|
|
|
if (!isset($this->nonVisitJoins[$tableNameToJoin])) {
|
|
$this->nonVisitJoins[$tableNameToJoin] = array();
|
|
}
|
|
|
|
$this->nonVisitJoins[$tableName][$tableNameToJoin] = $nonVisitJoin;
|
|
$this->nonVisitJoins[$tableNameToJoin][$tableName] = $nonVisitJoin;
|
|
}
|
|
}
|