forked from vergnet/site-accueil-insa
586 lines
21 KiB
PHP
586 lines
21 KiB
PHP
<?php
|
|
/**
|
|
* Matomo - free/libre analytics platform
|
|
*
|
|
* @link https://matomo.org
|
|
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
|
|
*
|
|
*/
|
|
namespace Piwik\ArchiveProcessor;
|
|
|
|
use Piwik\Archive\ArchiveInvalidator;
|
|
use Piwik\Cache;
|
|
use Piwik\Common;
|
|
use Piwik\Config;
|
|
use Piwik\Container\StaticContainer;
|
|
use Piwik\Context;
|
|
use Piwik\DataAccess\ArchiveSelector;
|
|
use Piwik\DataAccess\ArchiveWriter;
|
|
use Piwik\DataAccess\Model;
|
|
use Piwik\DataAccess\RawLogDao;
|
|
use Piwik\Date;
|
|
use Piwik\Period;
|
|
use Piwik\Piwik;
|
|
use Piwik\SettingsServer;
|
|
use Piwik\Site;
|
|
use Psr\Log\LoggerInterface;
|
|
use Piwik\CronArchive\SegmentArchiving;
|
|
|
|
/**
|
|
* This class uses PluginsArchiver class to trigger data aggregation and create archives.
|
|
*/
|
|
class Loader
|
|
{
|
|
private static $archivingDepth = 0;
|
|
|
|
/**
|
|
* @var Parameters
|
|
*/
|
|
protected $params;
|
|
|
|
/**
|
|
* @var ArchiveInvalidator
|
|
*/
|
|
private $invalidator;
|
|
|
|
/**
|
|
* @var \Matomo\Cache\Cache
|
|
*/
|
|
private $cache;
|
|
|
|
/**
|
|
* @var LoggerInterface
|
|
*/
|
|
private $logger;
|
|
|
|
/**
|
|
* @var RawLogDao
|
|
*/
|
|
private $rawLogDao;
|
|
|
|
/**
|
|
* @var Model
|
|
*/
|
|
private $dataAccessModel;
|
|
|
|
|
|
public function __construct(Parameters $params, $invalidateBeforeArchiving = false)
|
|
{
|
|
$this->params = $params;
|
|
$this->invalidateBeforeArchiving = $invalidateBeforeArchiving;
|
|
$this->invalidator = StaticContainer::get(ArchiveInvalidator::class);
|
|
$this->cache = Cache::getTransientCache();
|
|
$this->logger = StaticContainer::get(LoggerInterface::class);
|
|
$this->rawLogDao = new RawLogDao();
|
|
$this->dataAccessModel = new Model();
|
|
}
|
|
|
|
/**
|
|
* @return bool
|
|
*/
|
|
protected function isThereSomeVisits($visits)
|
|
{
|
|
return $visits > 0;
|
|
}
|
|
|
|
/**
|
|
* @return bool
|
|
*/
|
|
protected function mustProcessVisitCount($visits)
|
|
{
|
|
return $visits === false;
|
|
}
|
|
|
|
public function prepareArchive($pluginName)
|
|
{
|
|
return Context::changeIdSite($this->params->getSite()->getId(), function () use ($pluginName) {
|
|
try {
|
|
++self::$archivingDepth;
|
|
return $this->prepareArchiveImpl($pluginName);
|
|
} finally {
|
|
--self::$archivingDepth;
|
|
}
|
|
});
|
|
}
|
|
|
|
/**
|
|
* @throws \Exception
|
|
*/
|
|
private function prepareArchiveImpl($pluginName)
|
|
{
|
|
$this->params->setRequestedPlugin($pluginName);
|
|
|
|
if (SettingsServer::isArchivePhpTriggered()) {
|
|
$requestedReport = Common::getRequestVar('requestedReport', '', 'string');
|
|
if (!empty($requestedReport)) {
|
|
$this->params->setArchiveOnlyReport($requestedReport);
|
|
}
|
|
}
|
|
|
|
// invalidate existing archives before we start archiving in case data was tracked in the past. if the archive is
|
|
// made invalid, we will correctly re-archive below.
|
|
if ($this->invalidateBeforeArchiving
|
|
&& Rules::isBrowserTriggerEnabled()
|
|
) {
|
|
$this->invalidatedReportsIfNeeded();
|
|
}
|
|
// load existing data from archive
|
|
$data = $this->loadArchiveData();
|
|
if (sizeof($data) == 2) {
|
|
return $data;
|
|
}
|
|
list($idArchives, $visits, $visitsConverted) = $data;
|
|
|
|
// only lock meet those conditions
|
|
if ($this->params->isRootArchiveRequest() && !SettingsServer::isArchivePhpTriggered()) {
|
|
$lockId = $this->makeArchivingLockId();
|
|
|
|
//ini lock
|
|
$lock = new LoaderLock($lockId);
|
|
|
|
//set mysql lock the entire process if another process is running
|
|
$lock->setLock();
|
|
|
|
try {
|
|
$data = $this->loadArchiveData();
|
|
|
|
if (sizeof($data) == 2) {
|
|
return $data;
|
|
}
|
|
|
|
list($idArchives, $visits, $visitsConverted) = $data;
|
|
|
|
return $this->insertArchiveData($visits, $visitsConverted);
|
|
} finally {
|
|
$lock->unlock();
|
|
}
|
|
} else {
|
|
|
|
return $this->insertArchiveData($visits, $visitsConverted);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* @param $visits
|
|
* @param $visitsConverted
|
|
* @return array|false[]
|
|
*/
|
|
protected function insertArchiveData($visits, $visitsConverted)
|
|
{
|
|
if (SettingsServer::isArchivePhpTriggered()) {
|
|
$this->logger->info("initiating archiving via core:archive for " . $this->params);
|
|
}
|
|
|
|
list($visits, $visitsConverted) = $this->prepareCoreMetricsArchive($visits, $visitsConverted);
|
|
list($idArchive, $visits) = $this->prepareAllPluginsArchive($visits, $visitsConverted);
|
|
|
|
if ($this->isThereSomeVisits($visits) || PluginsArchiver::doesAnyPluginArchiveWithoutVisits()) {
|
|
return [[$idArchive], $visits];
|
|
}
|
|
|
|
return [false, false];
|
|
}
|
|
|
|
/**
|
|
* @return string
|
|
* @throws \Exception
|
|
*/
|
|
private function makeArchivingLockId()
|
|
{
|
|
$doneFlag = Rules::getDoneStringFlagFor([$this->params->getSite()->getId()], $this->params->getSegment(),
|
|
$this->params->getPeriod()->getLabel(), $this->params->getRequestedPlugin());
|
|
|
|
return $this->params->getPeriod()->getDateStart()->toString() . $this->params->getPeriod()->getDateEnd()->toString() .'.'. $doneFlag;
|
|
|
|
}
|
|
|
|
/**
|
|
* @return array|false[]
|
|
*/
|
|
protected function loadArchiveData()
|
|
{
|
|
// this hack was used to check the main function goes to return or continue
|
|
// NOTE: $idArchives will contain the latest DONE_OK/DONE_INVALIDATED archive as well as any partial archives
|
|
// with a ts_archived >= the DONE_OK/DONE_INVALIDATED date.
|
|
list($idArchives, $visits, $visitsConverted, $isAnyArchiveExists, $tsArchived, $value) = $this->loadExistingArchiveIdFromDb();
|
|
|
|
if (!empty($idArchives)
|
|
&& !Rules::isActuallyForceArchivingSinglePlugin()
|
|
&& !$this->shouldForceInvalidatedArchive($value, $tsArchived)) {
|
|
// we have a usable idarchive (it's not invalidated and it's new enough), and we are not archiving
|
|
// a single report
|
|
return [$idArchives, $visits];
|
|
}
|
|
|
|
// NOTE: this optimization helps when archiving large periods. eg, if archiving a year w/ a segment where
|
|
// there are not visits in the entire year, we don't have to go through and do anything. but, w/o this
|
|
// code, we will end up launching archiving for each month, week and day, even though we don't have to.
|
|
//
|
|
// we don't create an archive in this case, because the archive may be in progress in some way, so a 0
|
|
// visits archive can be inaccurate in the long run.
|
|
if ($this->canSkipThisArchive()) {
|
|
if (!empty($idArchives)) {
|
|
return [$idArchives, $visits];
|
|
} else {
|
|
return [false, 0];
|
|
}
|
|
}
|
|
|
|
return [$idArchives, $visits, $visitsConverted];
|
|
}
|
|
|
|
/**
|
|
* Prepares the core metrics if needed.
|
|
*
|
|
* @param $visits
|
|
* @return array
|
|
*/
|
|
protected function prepareCoreMetricsArchive($visits, $visitsConverted)
|
|
{
|
|
$createSeparateArchiveForCoreMetrics = $this->mustProcessVisitCount($visits)
|
|
&& !$this->doesRequestedPluginIncludeVisitsSummary();
|
|
|
|
if ($createSeparateArchiveForCoreMetrics) {
|
|
$requestedPlugin = $this->params->getRequestedPlugin();
|
|
$requestedReport = $this->params->getArchiveOnlyReport();
|
|
$isPartialArchive = $this->params->isPartialArchive();
|
|
|
|
$this->params->setRequestedPlugin('VisitsSummary');
|
|
$this->params->setArchiveOnlyReport(null);
|
|
$this->params->setIsPartialArchive(false);
|
|
|
|
$metrics = Context::executeWithQueryParameters(['requestedReport' => ''], function () {
|
|
$pluginsArchiver = new PluginsArchiver($this->params);
|
|
$metrics = $pluginsArchiver->callAggregateCoreMetrics();
|
|
$pluginsArchiver->finalizeArchive();
|
|
return $metrics;
|
|
});
|
|
|
|
$this->params->setRequestedPlugin($requestedPlugin);
|
|
$this->params->setArchiveOnlyReport($requestedReport);
|
|
$this->params->setIsPartialArchive($isPartialArchive);
|
|
|
|
$visits = $metrics['nb_visits'];
|
|
$visitsConverted = $metrics['nb_visits_converted'];
|
|
}
|
|
|
|
return array($visits, $visitsConverted);
|
|
}
|
|
|
|
protected function prepareAllPluginsArchive($visits, $visitsConverted)
|
|
{
|
|
$pluginsArchiver = new PluginsArchiver($this->params);
|
|
|
|
if ($this->mustProcessVisitCount($visits)
|
|
|| $this->doesRequestedPluginIncludeVisitsSummary()
|
|
) {
|
|
$metrics = $pluginsArchiver->callAggregateCoreMetrics();
|
|
$visits = $metrics['nb_visits'];
|
|
$visitsConverted = $metrics['nb_visits_converted'];
|
|
}
|
|
|
|
$forceArchivingWithoutVisits = !$this->isThereSomeVisits($visits) && $this->shouldArchiveForSiteEvenWhenNoVisits();
|
|
$pluginsArchiver->callAggregateAllPlugins($visits, $visitsConverted, $forceArchivingWithoutVisits);
|
|
|
|
$idArchive = $pluginsArchiver->finalizeArchive();
|
|
|
|
return array($idArchive, $visits);
|
|
}
|
|
|
|
protected function doesRequestedPluginIncludeVisitsSummary()
|
|
{
|
|
$processAllReportsIncludingVisitsSummary =
|
|
Rules::shouldProcessReportsAllPlugins(array($this->params->getSite()->getId()), $this->params->getSegment(), $this->params->getPeriod()->getLabel());
|
|
$doesRequestedPluginIncludeVisitsSummary = $processAllReportsIncludingVisitsSummary
|
|
|| $this->params->getRequestedPlugin() == 'VisitsSummary';
|
|
return $doesRequestedPluginIncludeVisitsSummary;
|
|
}
|
|
|
|
protected function isArchivingForcedToTrigger()
|
|
{
|
|
$period = $this->params->getPeriod()->getLabel();
|
|
$debugSetting = 'always_archive_data_period'; // default
|
|
|
|
if ($period == 'day') {
|
|
$debugSetting = 'always_archive_data_day';
|
|
} elseif ($period == 'range') {
|
|
$debugSetting = 'always_archive_data_range';
|
|
}
|
|
|
|
return (bool) Config::getInstance()->Debug[$debugSetting];
|
|
}
|
|
|
|
/**
|
|
* Returns the idArchive if the archive is available in the database for the requested plugin.
|
|
* Returns false if the archive needs to be processed.
|
|
*
|
|
* (public for tests)
|
|
*
|
|
* @return array
|
|
*/
|
|
public function loadExistingArchiveIdFromDb()
|
|
{
|
|
if ($this->isArchivingForcedToTrigger()) {
|
|
$this->logger->debug("Archiving forced to trigger for {$this->params}.");
|
|
|
|
// return no usable archive found, and no existing archive. this will skip invalidation, which should
|
|
// be fine since we just force archiving.
|
|
return [false, false, false, false, false, false];
|
|
}
|
|
|
|
$minDatetimeArchiveProcessedUTC = $this->getMinTimeArchiveProcessed();
|
|
$result = ArchiveSelector::getArchiveIdAndVisits($this->params, $minDatetimeArchiveProcessedUTC);
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* Returns the minimum archive processed datetime to look at. Only public for tests.
|
|
*
|
|
* @return int|bool Datetime timestamp, or false if must look at any archive available
|
|
*/
|
|
protected function getMinTimeArchiveProcessed()
|
|
{
|
|
// for range periods we can archive in a browser request request, make sure to check for the ttl no matter what
|
|
$isRangeArchiveAndArchivingEnabled = $this->params->getPeriod()->getLabel() == 'range'
|
|
&& Rules::isArchivingEnabledFor([$this->params->getSite()->getId()], $this->params->getSegment(), $this->params->getPeriod()->getLabel());
|
|
|
|
if (!$isRangeArchiveAndArchivingEnabled) {
|
|
$endDateTimestamp = self::determineIfArchivePermanent($this->params->getDateEnd());
|
|
if ($endDateTimestamp) {
|
|
// past archive
|
|
return $endDateTimestamp;
|
|
}
|
|
}
|
|
|
|
$dateStart = $this->params->getDateStart();
|
|
$period = $this->params->getPeriod();
|
|
$segment = $this->params->getSegment();
|
|
$site = $this->params->getSite();
|
|
// in-progress archive
|
|
return Rules::getMinTimeProcessedForInProgressArchive($dateStart, $period, $segment, $site);
|
|
}
|
|
|
|
protected static function determineIfArchivePermanent(Date $dateEnd)
|
|
{
|
|
$now = time();
|
|
$endTimestampUTC = strtotime($dateEnd->getDateEndUTC());
|
|
|
|
if ($endTimestampUTC <= $now) {
|
|
// - if the period we are looking for is finished, we look for a ts_archived that
|
|
// is greater than the last day of the archive
|
|
return $endTimestampUTC;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
private function shouldArchiveForSiteEvenWhenNoVisits()
|
|
{
|
|
$idSitesToArchive = $this->getIdSitesToArchiveWhenNoVisits();
|
|
return in_array($this->params->getSite()->getId(), $idSitesToArchive);
|
|
}
|
|
|
|
private function getIdSitesToArchiveWhenNoVisits()
|
|
{
|
|
$cache = Cache::getTransientCache();
|
|
$cacheKey = 'Archiving.getIdSitesToArchiveWhenNoVisits';
|
|
|
|
if (!$cache->contains($cacheKey)) {
|
|
$idSites = array();
|
|
|
|
// leaving undocumented unless decided otherwise
|
|
Piwik::postEvent('Archiving.getIdSitesToArchiveWhenNoVisits', array(&$idSites));
|
|
|
|
$cache->save($cacheKey, $idSites);
|
|
}
|
|
|
|
return $cache->fetch($cacheKey);
|
|
}
|
|
|
|
// public for tests
|
|
public function getReportsToInvalidate()
|
|
{
|
|
$sitesPerDays = $this->invalidator->getRememberedArchivedReportsThatShouldBeInvalidated();
|
|
|
|
foreach ($sitesPerDays as $dateStr => $siteIds) {
|
|
if (empty($siteIds)
|
|
|| !in_array($this->params->getSite()->getId(), $siteIds)
|
|
) {
|
|
unset($sitesPerDays[$dateStr]);
|
|
}
|
|
|
|
$date = Date::factory($dateStr);
|
|
if ($date->isEarlier($this->params->getPeriod()->getDateStart())
|
|
|| $date->isLater($this->params->getPeriod()->getDateEnd())
|
|
) { // date in list is not the current date, so ignore it
|
|
unset($sitesPerDays[$dateStr]);
|
|
}
|
|
}
|
|
|
|
return $sitesPerDays;
|
|
}
|
|
|
|
private function invalidatedReportsIfNeeded()
|
|
{
|
|
$sitesPerDays = $this->getReportsToInvalidate();
|
|
if (empty($sitesPerDays)) {
|
|
return;
|
|
}
|
|
|
|
foreach ($sitesPerDays as $date => $siteIds) {
|
|
try {
|
|
$this->invalidator->markArchivesAsInvalidated([$this->params->getSite()->getId()], array(Date::factory($date)), false, $this->params->getSegment());
|
|
} catch (\Exception $e) {
|
|
Site::clearCache();
|
|
throw $e;
|
|
}
|
|
}
|
|
|
|
Site::clearCache();
|
|
}
|
|
|
|
public function canSkipThisArchive()
|
|
{
|
|
$params = $this->params;
|
|
$idSite = $params->getSite()->getId();
|
|
|
|
$isWebsiteUsingTracker = $this->isWebsiteUsingTheTracker($idSite);
|
|
$isArchivingForcedWhenNoVisits = $this->shouldArchiveForSiteEvenWhenNoVisits();
|
|
$hasSiteVisitsBetweenTimeframe = $this->hasSiteVisitsBetweenTimeframe($idSite, $params->getPeriod());
|
|
$hasChildArchivesInPeriod = $this->dataAccessModel->hasChildArchivesInPeriod($idSite, $params->getPeriod());
|
|
|
|
if ($this->canSkipArchiveForSegment()) {
|
|
return true;
|
|
}
|
|
|
|
return $isWebsiteUsingTracker
|
|
&& !$isArchivingForcedWhenNoVisits
|
|
&& !$hasSiteVisitsBetweenTimeframe
|
|
&& !$hasChildArchivesInPeriod;
|
|
}
|
|
|
|
public function canSkipArchiveForSegment()
|
|
{
|
|
$params = $this->params;
|
|
|
|
if ($params->getSegment()->isEmpty()) {
|
|
return false;
|
|
}
|
|
|
|
if (!empty($params->getRequestedPlugin()) && Rules::isSegmentPluginArchivingDisabled($params->getRequestedPlugin(), $params->getSite()->getId())) {
|
|
return true;
|
|
}
|
|
|
|
/** @var SegmentArchiving */
|
|
$segmentArchiving = StaticContainer::get(SegmentArchiving::class);
|
|
$segmentInfo = $segmentArchiving->findSegmentForHash($params->getSegment()->getHash(), $params->getSite()->getId());
|
|
|
|
if (!$segmentInfo) {
|
|
return false;
|
|
}
|
|
|
|
$segmentArchiveStartDate = $segmentArchiving->getReArchiveSegmentStartDate($segmentInfo);
|
|
|
|
if ($segmentArchiveStartDate !==null && $segmentArchiveStartDate->isLater($params->getPeriod()->getDateEnd()->getEndOfDay())) {
|
|
$doneFlag = Rules::getDoneStringFlagFor(
|
|
[$params->getSite()->getId()],
|
|
$params->getSegment(),
|
|
$params->getPeriod()->getLabel(),
|
|
$params->getRequestedPlugin()
|
|
);
|
|
|
|
// if there is no invalidation where the report is null, we can skip
|
|
// if we have invalidations for the period and name, but only for a specific reports, we can skip
|
|
// if the report is not null we only want to rearchive if we have invalidation for that report
|
|
// if we don't find invalidation for that report, we can skip
|
|
return !$this->dataAccessModel->hasInvalidationForPeriodAndName($params->getSite()->getId(), $params->getPeriod(), $doneFlag, $params->getArchiveOnlyReport());
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
private function isWebsiteUsingTheTracker($idSite)
|
|
{
|
|
$idSitesNotUsingTracker = self::getSitesNotUsingTracker();
|
|
|
|
$isUsingTracker = !in_array($idSite, $idSitesNotUsingTracker);
|
|
|
|
return $isUsingTracker;
|
|
}
|
|
|
|
public static function getSitesNotUsingTracker()
|
|
{
|
|
$cache = Cache::getTransientCache();
|
|
|
|
$cacheKey = 'Archiving.isWebsiteUsingTheTracker';
|
|
$idSitesNotUsingTracker = $cache->fetch($cacheKey);
|
|
if ($idSitesNotUsingTracker === false || !isset($idSitesNotUsingTracker)) {
|
|
// we want to trigger event only once
|
|
$idSitesNotUsingTracker = array();
|
|
|
|
/**
|
|
* This event is triggered when detecting whether there are sites that do not use the tracker.
|
|
*
|
|
* By default we only archive a site when there was actually any visit since the last archiving.
|
|
* However, some plugins do import data from another source instead of using the tracker and therefore
|
|
* will never have any visits for this site. To make sure we still archive data for such a site when
|
|
* archiving for this site is requested, you can listen to this event and add the idSite to the list of
|
|
* sites that do not use the tracker.
|
|
*
|
|
* @param bool $idSitesNotUsingTracker The list of idSites that rather import data instead of using the tracker
|
|
*/
|
|
Piwik::postEvent('CronArchive.getIdSitesNotUsingTracker', array(&$idSitesNotUsingTracker));
|
|
|
|
$cache->save($cacheKey, $idSitesNotUsingTracker);
|
|
}
|
|
return $idSitesNotUsingTracker;
|
|
}
|
|
|
|
private function hasSiteVisitsBetweenTimeframe($idSite, Period $period)
|
|
{
|
|
$timezone = Site::getTimezoneFor($idSite);
|
|
list($date1, $date2) = $period->getBoundsInTimezone($timezone);
|
|
|
|
return $this->rawLogDao->hasSiteVisitsBetweenTimeframe($date1->getDatetime(), $date2->getDatetime(), $idSite);
|
|
}
|
|
|
|
public static function getArchivingDepth()
|
|
{
|
|
return self::$archivingDepth;
|
|
}
|
|
|
|
private function shouldForceInvalidatedArchive($value, $tsArchived)
|
|
{
|
|
$params = $this->params;
|
|
|
|
// the archive is invalidated and we are in a browser request that is allowed archive it
|
|
if ($value == ArchiveWriter::DONE_INVALIDATED
|
|
&& Rules::isArchivingEnabledFor([$params->getSite()->getId()], $params->getSegment(), $params->getPeriod()->getLabel())
|
|
) {
|
|
// if coming from core:archive, force rearchiving, since if we don't the entry will be removed from archive_invalidations
|
|
// w/o being rearchived
|
|
if (SettingsServer::isArchivePhpTriggered()) {
|
|
return true;
|
|
}
|
|
|
|
// if coming from a browser request, and period does not contain today, force rearchiving
|
|
$timezone = $params->getSite()->getTimezone();
|
|
if (!$params->getPeriod()->isDateInPeriod(Date::factoryInTimezone('today', $timezone))) {
|
|
return true;
|
|
}
|
|
|
|
// if coming from a browser request, and period does contain today, check the ttl for the period (done just below this)
|
|
$minDatetimeArchiveProcessedUTC = Rules::getMinTimeProcessedForInProgressArchive(
|
|
$params->getDateStart(), $params->getPeriod(), $params->getSegment(), $params->getSite());
|
|
$minDatetimeArchiveProcessedUTC = Date::factory($minDatetimeArchiveProcessedUTC);
|
|
if ($minDatetimeArchiveProcessedUTC
|
|
&& Date::factory($tsArchived)->isEarlier($minDatetimeArchiveProcessedUTC)
|
|
) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|