Open Data Catalog v
Referencer.php
Go to the documentation of this file.
1 <?php
2 
4 
5 use Drupal\Core\Config\ConfigFactoryInterface;
6 
13 
14 use Contracts\FactoryInterface;
15 use GuzzleHttp\Client as GuzzleClient;
16 
20 class Referencer {
21  use HelperTrait;
22  use LoggerTrait;
23 
29  protected const DEFAULT_MIME_TYPE = 'text/plain';
30 
36  private $storageFactory;
37 
41  public function __construct(ConfigFactoryInterface $configService, FactoryInterface $storageFactory) {
42  $this->setConfigService($configService);
43  $this->storageFactory = $storageFactory;
44  $this->setLoggerFactory(\Drupal::service('logger.factory'));
45  }
46 
56  public function reference($data) {
57  if (!is_object($data)) {
58  throw new \Exception("data must be an object.");
59  }
60  // Cycle through the dataset properties we seek to reference.
61  foreach ($this->getPropertyList() as $property_id) {
62  if (isset($data->{$property_id})) {
63  $data->{$property_id} = $this->referenceProperty($property_id, $data->{$property_id});
64  }
65  }
66  return $data;
67  }
68 
80  private function referenceProperty(string $property_id, $data) {
81  if (is_array($data)) {
82  return $this->referenceMultiple($property_id, $data);
83  }
84  else {
85  // Case for $data being an object or a string.
86  return $this->referenceSingle($property_id, $data);
87  }
88  }
89 
101  private function referenceMultiple(string $property_id, array $values) : array {
102  $result = [];
103  foreach ($values as $value) {
104  $data = $this->referenceSingle($property_id, $value);
105  if (NULL !== $data) {
106  $result[] = $data;
107  }
108  }
109  return $result;
110  }
111 
123  private function referenceSingle(string $property_id, $value) {
124 
125  if ($property_id == 'distribution') {
126  $value = $this->distributionHandling($value);
127  }
128 
129  $uuid = $this->checkExistingReference($property_id, $value);
130  if (!$uuid) {
131  $uuid = $this->createPropertyReference($property_id, $value);
132  }
133  if ($uuid) {
134  return $uuid;
135  }
136  else {
137  $this->log(
138  'value_referencer',
139  'Neither found an existing nor could create a new reference for property_id: @property_id with value: @value',
140  [
141  '@property_id' => $property_id,
142  '@value' => var_export($value, TRUE),
143  ]
144  );
145  return NULL;
146  }
147  }
148 
161  private function distributionHandling($distribution): object {
162  // Ensure the supplied distribution has a valid resource before attempting
163  // to register it with the resource mapper.
164  if (is_object($distribution) && isset($distribution->downloadURL)) {
165  // Register this distribution's resource with the resource mapper and
166  // replace the download URL with a unique ID registered in the resource
167  // mapper.
168  $distribution->downloadURL = $this->registerWithResourceMapper(
169  $this->hostify($distribution->downloadURL), $this->getMimeType($distribution));
170  }
171 
172  return $distribution;
173  }
174 
186  private function registerWithResourceMapper(string $downloadUrl, string $mimeType): string {
187  try {
188  // Create a new resource using the supplied resource details.
189  $resource = new Resource($downloadUrl, $mimeType);
190 
191  // Attempt to register the url with the resource file mapper.
192  if ($this->getFileMapper()->register($resource)) {
193  // Upon successful registration, replace the download URL with a unique
194  // ID generated by the resource mapper.
195  $downloadUrl = $resource->getUniqueIdentifier();
196  }
197  }
198  catch (AlreadyRegistered $e) {
199  $info = json_decode($e->getMessage());
200 
201  // If resource mapper registration failed due to this resource already
202  // being registered, generate a new version of the resource and update the
203  // download URL with the new version ID.
204  if (isset($info[0]->identifier)) {
206  $stored = $this->getFileMapper()->get($info[0]->identifier, Resource::DEFAULT_SOURCE_PERSPECTIVE);
207  $downloadUrl = $this->handleExistingResource($info, $stored, $mimeType);
208  }
209  }
210 
211  return $downloadUrl;
212  }
213 
217  private function handleExistingResource($info, $stored, $mimeType) {
218  if ($info[0]->perspective == Resource::DEFAULT_SOURCE_PERSPECTIVE &&
219  (ResourceMapper::newRevision() == 1 || $stored->getMimeType() != $mimeType)) {
220  $new = $stored->createNewVersion();
221  // Update the MIME type, since this may be updated by the user.
222  $new->changeMimeType($mimeType);
223 
224  $this->getFileMapper()->registerNewVersion($new);
225  $downloadUrl = $new->getUniqueIdentifier();
226  }
227  else {
228  $downloadUrl = $stored->getUniqueIdentifier();
229  }
230  return $downloadUrl;
231  }
232 
236  private function getFileMapper(): ResourceMapper {
237  return \Drupal::service('dkan.metastore.resource_mapper');
238  }
239 
249  public static function hostify(string $resourceUrl): string {
250  // Get HTTP server public files URL and extract the host.
251  $serverPublicFilesUrl = UrlHostTokenResolver::getServerPublicFilesUrl();
252  $serverPublicFilesUrl = isset($serverPublicFilesUrl) ? parse_url($serverPublicFilesUrl) : NULL;
253  $serverHost = $serverPublicFilesUrl['host'] ?? \Drupal::request()->getHost();
254  // Determine whether the resource URL has the same host as this server.
255  $resourceParsedUrl = parse_url($resourceUrl);
256  if (isset($resourceParsedUrl['host']) && $resourceParsedUrl['host'] == $serverHost) {
257  // Swap out the host portion of the resource URL with the localhost token.
258  $resourceParsedUrl['host'] = UrlHostTokenResolver::TOKEN;
259  $resourceUrl = self::unparseUrl($resourceParsedUrl);
260  }
261  return $resourceUrl;
262  }
263 
267  private static function unparseUrl($parsedUrl) {
268  $url = '';
269  $urlParts = [
270  'scheme',
271  'host',
272  'port',
273  'user',
274  'pass',
275  'path',
276  'query',
277  'fragment',
278  ];
279 
280  foreach ($urlParts as $part) {
281  if (!isset($parsedUrl[$part])) {
282  continue;
283  }
284  $url .= ($part == "port") ? ':' : '';
285  $url .= ($part == "query") ? '?' : '';
286  $url .= ($part == "fragment") ? '#' : '';
287  $url .= $parsedUrl[$part];
288  $url .= ($part == "scheme") ? '://' : '';
289  }
290 
291  return $url;
292  }
293 
303  private function getLocalMimeType(string $downloadUrl): ?string {
304  $mime_type = NULL;
305 
306  // Retrieve and decode the file name from the supplied download URL's path.
307  $filename = \Drupal::service('file_system')->basename($downloadUrl);
308  $filename = urldecode($filename);
309 
310  // Attempt to load the file by file name.
311  $files = \Drupal::entityTypeManager()
312  ->getStorage('file')
313  ->loadByProperties(['filename' => $filename]);
314  $file = reset($files);
315 
316  // If a valid file was found for the given file name, extract the file's
317  // mime type...
318  if ($file !== FALSE) {
319  $mime_type = $file->getMimeType();
320  }
321  // Otherwise, log an error notifying the user that a file was not found.
322  else {
323  $this->log('value_referencer', 'Unable to determine mime type of file with name "@name", because no file was found with that name.', [
324  '@name' => $filename,
325  ]);
326  }
327 
328  return $mime_type;
329  }
330 
340  private function getRemoteMimeType(string $downloadUrl): ?string {
341  $mime_type = NULL;
342 
343  // Perform HTTP Head request against the supplied URL in order to determine
344  // the content type of the remote resource.
345  $client = new GuzzleClient();
346  $response = $client->head($downloadUrl);
347  // Extract the full value of the content type header.
348  $content_type = $response->getHeader('Content-Type');
349  // Attempt to extract the mime type from the content type header.
350  if (isset($content_type[0])) {
351  $mime_type = $content_type[0];
352  }
353 
354  return $mime_type;
355  }
356 
368  private function getMimeType($distribution): string {
369  $mimeType = "text/plain";
370 
371  // If we have a mediaType set, use that.
372  if (isset($distribution->mediaType)) {
373  $mimeType = $distribution->mediaType;
374  }
375  // Fall back if we have an importable format set.
376  elseif (isset($distribution->format) && $distribution->format == 'csv') {
377  $mimeType = 'text/csv';
378  }
379  elseif (isset($distribution->format) && $distribution->format == 'tsv') {
380  $mimeType = 'text/tab-separated-values';
381  }
382  // Otherwise, determine the proper mime type using the distribution's
383  // download URL.
384  elseif (isset($distribution->downloadURL)) {
385  // Determine whether the supplied distribution has a local or remote
386  // resource.
387  $is_local = $distribution->downloadURL !== $this->hostify($distribution->downloadURL);
388  $mimeType = $is_local ?
389  $this->getLocalMimeType($distribution->downloadURL) :
390  $this->getRemoteMimeType($distribution->downloadURL);
391  }
392 
393  return $mimeType ?? self::DEFAULT_MIME_TYPE;
394  }
395 
410  private function checkExistingReference(string $property_id, $data) {
411  $storage = $this->storageFactory->getInstance($property_id);
412  $nodes = $storage->getEntityStorage()->loadByProperties([
413  'field_data_type' => $property_id,
414  'title' => Service::metadataHash($data),
415  ]);
416 
417  if ($node = reset($nodes)) {
418  // If an existing but orphaned data node is found,
419  // change the state back to published.
420  // @ToDo: if the referencing node is in a draft state, do not publish the referenced node.
421  $node->set('moderation_state', 'published');
422  $node->save();
423  return $node->uuid();
424  }
425  return NULL;
426  }
427 
443  private function createPropertyReference(string $property_id, $value) {
444  // Create json metadata for the reference.
445  $data = new \stdClass();
446  $data->identifier = $this->getUuidService()->generate($property_id, $value);
447  $data->data = $value;
448  $json = json_encode($data);
449 
450  // Create node to store this reference.
451  $storage = $this->storageFactory->getInstance($property_id);
452  $entity_uuid = $storage->store($json, $data->identifier);
453  return $entity_uuid;
454  }
455 
456 }
Drupal\common\LoggerTrait
trait LoggerTrait
Definition: LoggerTrait.php:11
Drupal\metastore\Exception\AlreadyRegistered
Definition: AlreadyRegistered.php:10
Drupal\common\Resource
Definition: Resource.php:29
Drupal\metastore\Reference\Referencer\DEFAULT_MIME_TYPE
const DEFAULT_MIME_TYPE
Definition: Referencer.php:29
Drupal\metastore\Reference\HelperTrait
trait HelperTrait
Definition: HelperTrait.php:11
Drupal\metastore\Service
Definition: Uuid5.php:5
Drupal\metastore\Reference\Referencer\__construct
__construct(ConfigFactoryInterface $configService, FactoryInterface $storageFactory)
Definition: Referencer.php:41
Drupal\metastore\Reference
Definition: Dereferencer.php:3
Drupal\metastore\Reference\Referencer\reference
reference($data)
Definition: Referencer.php:56
Drupal\metastore\Reference\Referencer\hostify
static hostify(string $resourceUrl)
Definition: Referencer.php:249
Drupal\metastore\ResourceMapper
Definition: ResourceMapper.php:15
Drupal\common\setLoggerFactory
setLoggerFactory(LoggerChannelFactory $loggerService)
Definition: LoggerTrait.php:48
Drupal\metastore\Reference\Referencer
Definition: Referencer.php:20
Drupal\common\UrlHostTokenResolver
Definition: UrlHostTokenResolver.php:10