bookmarks/lib/Service/FaviconPreviewer.php

149 lines
3.4 KiB
PHP
Raw Normal View History

2018-04-25 22:39:20 +00:00
<?php
2020-09-21 12:25:50 +00:00
/*
* Copyright (c) 2020-2024. The Nextcloud Bookmarks contributors.
2018-04-25 22:39:20 +00:00
*
2020-09-21 12:25:50 +00:00
* This file is licensed under the Affero General Public License version 3 or later. See the COPYING file.
2018-04-25 22:39:20 +00:00
*/
2019-10-26 12:10:49 +00:00
namespace OCA\Bookmarks\Service;
2018-04-25 22:39:20 +00:00
2020-09-21 12:17:46 +00:00
use Exception;
2020-08-02 08:21:53 +00:00
use OCA\Bookmarks\Contract\IBookmarkPreviewer;
use OCA\Bookmarks\Contract\IImage;
2019-10-26 12:10:49 +00:00
use OCA\Bookmarks\Db\Bookmark;
2020-07-26 16:59:56 +00:00
use OCA\Bookmarks\Image;
use OCP\Files\NotFoundException;
use OCP\Files\NotPermittedException;
2020-08-02 08:21:53 +00:00
use OCP\Http\Client\IClient;
use OCP\Http\Client\IClientService;
use Psr\Log\LoggerInterface;
2020-08-02 08:21:53 +00:00
class FaviconPreviewer implements IBookmarkPreviewer {
2020-07-26 16:59:56 +00:00
public const CACHE_TTL = 4 * 4 * 7 * 24 * 60 * 60; // cache for one month
2020-08-02 08:21:53 +00:00
public const HTTP_TIMEOUT = 10 * 1000;
public const CACHE_PREFIX = 'bookmarks.FaviconPreviewer';
2020-07-26 16:59:56 +00:00
2020-08-02 08:21:53 +00:00
/**
* @var FileCache
*/
private $cache;
/**
* @var LinkExplorer
*/
private $linkExplorer;
/**
* @var LoggerInterface
2020-08-02 08:21:53 +00:00
*/
private $logger;
/**
* @var IClient
*/
private $client;
/**
* @var \OCP\IConfig
*/
private $config;
/**
* @var string
*/
private $enabled;
2020-08-02 08:21:53 +00:00
public function __construct(FileCache $cache, LinkExplorer $linkExplorer, LoggerInterface $logger, IClientService $clientService, \OCP\IConfig $config) {
2020-08-02 08:21:53 +00:00
$this->cache = $cache;
$this->linkExplorer = $linkExplorer;
$this->logger = $logger;
$this->client = $clientService->newClient();
$this->enabled = $config->getAppValue('bookmarks', 'privacy.enableScraping', 'false');
2020-08-02 08:21:53 +00:00
}
2019-10-26 12:10:49 +00:00
/**
* @param Bookmark $bookmark
*
* @return Image|null
2019-10-26 12:10:49 +00:00
*/
public function getImage($bookmark): ?IImage {
if ($this->enabled === 'false') {
return null;
}
if (!isset($bookmark)) {
return null;
}
if (!$bookmark->isWebLink()) {
return null;
}
2020-07-26 16:59:56 +00:00
$key = self::CACHE_PREFIX . '-' . md5($bookmark->getUrl());
// Try cache first
try {
if ($image = $this->cache->get($key)) {
if ($image === 'null') {
return null;
}
return Image::deserialize($image);
}
} catch (NotFoundException $e) {
} catch (NotPermittedException $e) {
}
2019-10-26 12:10:49 +00:00
$url = $bookmark->getUrl();
$site = $this->scrapeUrl($url);
if (isset($site['favicon'])) {
2020-07-26 16:59:56 +00:00
$image = $this->fetchImage($site['favicon']);
if ($image !== null) {
2020-07-26 16:59:56 +00:00
$this->cache->set($key, $image->serialize(), self::CACHE_TTL);
return $image;
}
}
2019-10-26 12:10:49 +00:00
$url_parts = parse_url($bookmark->getUrl());
2019-02-05 18:35:38 +00:00
if (isset($url_parts['scheme'], $url_parts['host'])) {
2020-07-26 16:59:56 +00:00
$image = $this->fetchImage(
2019-02-05 18:35:38 +00:00
$url_parts['scheme'] . '://' . $url_parts['host'] . '/favicon.ico'
);
2020-07-26 16:59:56 +00:00
if ($image !== null) {
$this->cache->set($key, $image->serialize(), self::CACHE_TTL);
return $image;
}
2019-02-05 18:35:38 +00:00
}
2020-07-26 16:59:56 +00:00
$this->cache->set($key, 'null', self::CACHE_TTL);
2019-02-05 18:35:38 +00:00
return null;
2018-04-25 22:39:20 +00:00
}
2020-08-02 08:21:53 +00:00
public function scrapeUrl($url): array {
2020-08-02 08:21:53 +00:00
return $this->linkExplorer->get($url);
}
/**
* @param $url
* @return Image|null
*/
protected function fetchImage(string $url): ?Image {
2020-08-02 08:21:53 +00:00
try {
$response = $this->client->get($url, ['timeout' => self::HTTP_TIMEOUT]);
2020-09-21 12:17:46 +00:00
} catch (Exception $e) {
2020-10-11 11:50:30 +00:00
$this->logger->debug($e->getMessage(), ['app' => 'bookmarks']);
2020-08-02 08:21:53 +00:00
return null;
}
$body = $response->getBody();
$contentType = $response->getHeader('Content-Type');
// Some HTPP Error occured :/
if ($response->getStatusCode() !== 200) {
2020-08-02 08:21:53 +00:00
return null;
}
// It's not actually an image, doh.
if (!isset($contentType) || stripos($contentType, 'image') !== 0) {
return null;
}
return new Image($contentType, $body);
}
2018-04-25 22:39:20 +00:00
}