// Specifically, the partial price indexer is taking huge amounts of time because something is putting in // a lot of duplicate entries, i.e. 24 rows for the same product, this can add up to tens of thousands or millions of // of rows in the change log table, but indexer status shows the "distinct" count, but the partial indexer, based on // the way it batches does not. Pretty sure it is same as these tickets // https://github.com/magento/magento2/issues/30012 // https://github.com/magento/magento2/issues/35072 // (not good description because the full indexer gets ids a different way and does not have this issue) // Not ideal for us to be trying to mess with core indexer functionality, and likely not the cleanest solution, but // works to not have the partial price indexer effectively stuck and blocking all other indexing. // Even without duplicates, on cloud, since increment ids go up by 3, the current batching would not seem to work // ideally because a batch of 1000 based on version_id auto-increments would only do about 333. // The delete query is also very slow when there are a lot of rows. // https://github.com/magento/magento2/commit/04f6ac08242db072a832350faf17cc2f90ad7aee --- a/vendor/magento/framework/Mview/View.php +++ b/vendor/magento/framework/Mview/View.php @@ -304,15 +304,45 @@ $vsFrom = $lastVersionId; while ($vsFrom < $currentVersionId) { - $walker = $this->getWalker(); - $ids = $walker->walk($this->getChangelog(), $vsFrom, $currentVersionId, $batchSize); + $ids = $this->getBatchOfIds($vsFrom, $currentVersionId); + // We run the actual indexer in batches. + // Chunked AFTER loading to avoid duplicates in separate chunks. + $chunks = array_chunk($ids, $batchSize); + foreach ($chunks as $ids) { + $action->execute($ids); + } + } + } - if (empty($ids)) { + /** + * Max versions to load from database at a time + */ + private static $maxVersionQueryBatch = 100000; + + /** + * Get batch of entity ids + * + * @param int $lastVersionId + * @param int $currentVersionId + * @return array + */ + private function getBatchOfIds(int &$lastVersionId, int $currentVersionId): array + { + $ids = []; + $versionBatchSize = self::$maxVersionQueryBatch; + $idsBatchSize = self::$maxVersionQueryBatch; + for ($vsFrom = $lastVersionId; $vsFrom < $currentVersionId; $vsFrom += $versionBatchSize) { + // Don't go past the current version for atomicity. + $versionTo = min($currentVersionId, $vsFrom + $versionBatchSize); + /** To avoid duplicate ids need to flip and merge the array */ + $ids += array_flip($this->getChangelog()->getList($vsFrom, $versionTo)); + $lastVersionId = $versionTo; + if (count($ids) >= $idsBatchSize) { break; } - $vsFrom += $batchSize; - $action->execute($ids); } + + return array_keys($ids); } /**