Project

General

Profile

« Previous | Next » 

Revision 2978b0c8

Added by Andreas Kohlbecker over 5 years ago

ref #7658 moving all tagged text functions to separate file

View differences:

modules/cdm_dataportal/cdm_api/cdm_api.module
27 27
  module_load_include('php', 'cdm_api', 'enums');
28 28
  module_load_include('php', 'cdm_api', 'webservice_uris');
29 29
  module_load_include('php', 'cdm_api', 'cdm_node');
30
  module_load_include('inc', 'cdm_api', 'tagged_text');
30 31

  
31 32
  /**
32 33
   * Timeout used to override the default of 30 seconds
......
190 191
  cache_clear_all(NULL, 'cache_cdm_ws');
191 192
}
192 193

  
193
// ===================== Tagged Text functions ================== //
194

  
195
function tagged_text_new($tag_type, $text = null){
196
  $tt = new stdClass();
197
  $tt->type = $tag_type;
198
  $tt->text = $text;
199
  return $tt;
200
}
201

  
202
/**
203
 * Walks the passed TaggedText array to find all elements which have a
204
 * TaggedText->entityReference. For each of these the taggedTexts is loaded
205
 * from the webservice and the original entry in the TaggedText array will be
206
 * replaced by the newly loaded array.
207
 *
208
 * @param array $taggedtxt
209
 *    The original TaggedText array
210
 * @param array $skiptags
211
 *    Optional list of tag names to skip
212
 * @return array
213
 *    The new tagged text with all TaggedText->entityReference objects expanded
214
 */
215
function cdm_tagged_text_expand_entity_references(array $taggedtxt, $skiptags = array()) {
216
  $tagged_text_expanded = array();
217
  foreach ($taggedtxt as $tt) {
218
    if (isset($tt->entityReference) && !in_array($tt->type, $skiptags)) {
219
      $base_uri = cdm_ws_base_uri($tt->entityReference->type);
220
      if($base_uri){
221
        $tagged_text_method = "/taggedText";
222
        if($base_uri == CDM_WS_NAME){
223
          $tagged_text_method = "/taggedName";
224
        }
225
        $referenced_tt = cdm_ws_get($base_uri . "/" . $tt->entityReference->uuid . $tagged_text_method);
226
        if($referenced_tt){
227
          $tagged_text_expanded = array_merge($tagged_text_expanded, $referenced_tt);
228
          continue;
229
        }
230
      }
231
    }
232
    // default case
233
    $tagged_text_expanded[] = $tt;
234
  }
235
  return $tagged_text_expanded;
236
}
237

  
238
/**
239
 * Converts an array of TaggedText items into corresponding html tags.
240
 *
241
 * Each item is provided with a class attribute which is set to the key of the
242
 * TaggedText item.
243
 *
244
 * Tagged text where the type starts with 'PLACEHOLDER_' will be added to the markup as plain text whereas the
245
 * taggedText->type wrapped in curly brackets: '{'. $tt->text . '}' is used as text.
246
 * see tagged_text_extract_secref()
247
 *
248
 * @param array $taggedtxt
249
 *   Array with text items to convert.
250
 * @param array $skiptags
251
 *   Array of tag names to skip
252
 *
253
 * @return string
254
 *   The markup.
255
 */
256
function cdm_tagged_text_to_markup(array $taggedtxt, $skiptags = array()) {
257

  
258
  $tag = 'span';
259
  $out = '';
260
  $was_separator = false;
261
  $i = 0;
262
  foreach ($taggedtxt as $tt) {
263
    if (!in_array($tt->type, $skiptags) && $tt->text) {
264
      $class_attr = $tt->type;
265

  
266
      if(isset($tt->entityReference)){
267
        $class_attr .= " " . html_class_attribute_ref($tt->entityReference);
268
      }
269
      $is_last = $i + 1 == count($taggedtxt);
270
      $is_separator = is_tagged_text_sepatator_type($tt->type);
271
      $glue = !$is_separator && !$was_separator && !$is_last ? ' ' : '';
272
      if(str_beginsWith($tt->type, 'PLACEHOLDER_')){
273
        $out .=  '{'. $tt->type . '}';
274
      } else {
275
        $out .= '<' . $tag . ' class="' . $class_attr . '">'
276
          . t('@text', array('@text' => $tt->text))
277
          . '</' . $tag . '>'
278
          . $glue;
279
        }
280
      $was_separator = $is_separator;
281
    }
282
    $i++;
283
  }
284
  return $out;
285
}
286

  
287
/**
288
 * Converts an array of TaggedText items into corresponding plain text string
289
 *
290
 * Each item is provided with a class attribute which is set to the key of the
291
 * TaggedText item.
292
 *
293
 * @param array $taggedtxt
294
 *   Array with text items to convert.
295
 * @param array $skiptags
296
 *   Array of tag names to skip
297
 *
298
 * @return string
299
 *   The plain text
300
 */
301
function cdm_tagged_text_to_string(array $taggedtxt, $skiptags = array()) {
302

  
303
  $out = '';
304
  $was_separator = false;
305
  $i = 0;
306
  foreach ($taggedtxt as $tt) {
307
    if (!in_array($tt->type, $skiptags) && $tt->text) {
308
      $is_last = $i + 1 == count($taggedtxt);
309
      $is_separator = is_tagged_text_sepatator_type($tt->type);
310
      $glue = !$is_separator && !$was_separator && !$is_last ? ' ' : '';
311
      $out .= t('@text', array('@text' => $tt->text)) . $glue;
312
      $was_separator = $is_separator;
313
    }
314
    $i++;
315
  }
316
  return $out;
317
}
318

  
319
/**
320
 * See cdmlib: boolean eu.etaxonomy.cdm.strategy.cache.TagEnum.isSeparator();
321
 *
322
 * @return bool
323
 */
324
function is_tagged_text_sepatator_type($tagged_text_type){
325
    static $separator_names = array('separator', 'postSeparator');
326
		$result = array_search($tagged_text_type, $separator_names) !== false;
327
    return $result;
328
}
329

  
330

  
331
/**
332
 * Finds the text tagged with $tag_type in an array of taggedText instances.
333
 *
334
 *
335
 * @param array $taggedtxt
336
 *   Array with text items.
337
 * @param array $include_tag_types
338
 *   Array of the tag types for which to find text items in the $taggedtxt array, or NULL
339
 *   to return all texts.
340
 *
341
 * @return array
342
 *   An array with the texts mapped by $tag_type.
343
 */
344
function cdm_tagged_text_values(array $taggedtxt, $include_tag_types = NULL) {
345
  $tokens = array();
346
  if (!empty($taggedtxt)) {
347
    foreach ($taggedtxt as $tagtxt) {
348
      if ($include_tag_types === NULL || array_search($tagtxt->type, $include_tag_types) !== false) {
349
        $tokens[] = $tagtxt->text;
350
      }
351
    }
352
  }
353
  return $tokens;
354
}
355

  
356
/**
357
 * Preprocess the taggedTitle arrays.
358
 *
359
 * Step 1: Turns 'newly' introduces tag types ("hybridSign")
360
 * into tag type "name"
361
 *
362
 * Step 2: Two taggedTexts which have the same type and which have
363
 * a separator between them are merged together.
364
 *
365
 * @param array $taggedTextList
366
 *    An array of TaggedText objects
367
 */
368
function normalize_tagged_text(&$taggedTextList) {
369

  
370
  if (is_array($taggedTextList)) {
371

  
372
    // First pass: rename.
373
    for ($i = 0; $i < count($taggedTextList); $i++) {
374

  
375
      if ($taggedTextList[$i]->type == "hybridSign") {
376
        $taggedTextList[$i]->type = "name";
377
      }
378
    }
379

  
380
    // Second pass: resolve separators.
381
    $taggedNameListNew = array();
382
    for ($i = 0; $i < count($taggedTextList); $i++) {
383

  
384
      // elements of the same type concatenated by a separator should be merged together
385
      if (isset($taggedTextList[$i + 2]) && $taggedTextList[$i + 1]->type == "separator" && $taggedTextList[$i]->type == $taggedTextList[$i + 2]->type) {
386
        $taggedName = clone $taggedTextList[$i];
387
        $taggedName->text = $taggedName->text . $taggedTextList[$i + 1]->text . $taggedTextList[$i + 2]->text;
388
        $taggedNameListNew[] = $taggedName;
389
        ++$i;
390
        ++$i;
391
        continue;
392
      }
393
      // no special handling
394
      $taggedNameListNew[] = $taggedTextList[$i];
395

  
396
    }
397
    $taggedTextList = $taggedNameListNew;
398
  }
399
}
400

  
401
/**
402
 * Extracts the tagged text for sec references with separator and citation detail from a tagged text array.
403
 * @param $tagged_text
404
 *    The tagged text to operate on
405
 * @param string $ref_tag_type
406
 *    The tagtype for a secreference is "secReference", but "relSecReference" is also used in case of relationships.
407
 * @param bool $replace_with_placeholder
408
 *    Indicates the method to add a empty placeholder tagged text alement as relpacement for the extrated tagged text
409
 *    elements.
410
 * @return array
411
 */
412
function tagged_text_extract_secref(&$tagged_text, $ref_tag_type = "secReference", $replace_with_placeholder = false) {
413

  
414
  $extracted_tt = array();
415
  if (is_array($tagged_text)) {
416
    $extract_pos = null;
417
    for ($i = 0; $i < count($tagged_text) - 1; $i++) {
418
      if ($tagged_text[$i + 1]->type == $ref_tag_type && $tagged_text[$i]->type == "separator"){
419
        $extracted_tt[0] = $tagged_text[$i];
420
        $extracted_tt[1] = $tagged_text[$i + 1];
421

  
422
        if($replace_with_placeholder){
423
          // text must not be null, see cdm_tagged_text_to_markup()
424
          $tagged_text[$i] = tagged_text_new("PLACEHOLDER_" . $ref_tag_type, "PLACEHOLDER_" . $ref_tag_type);
425
        } else {
426
          unset($tagged_text[$i]);
427
        }
428
        unset($tagged_text[$i + 1]);
429
        // also get the microreference which could be in $tagged_text[$i + 3]
430
        if(isset($tagged_text[$i + 3])  && $tagged_text[$i + 2]->type == "separator" && $tagged_text[$i + 3]->type == $ref_tag_type){
431
          $extracted_tt[2] = $tagged_text[$i + 2];
432
          $extracted_tt[3] = $tagged_text[$i + 3];
433
          unset($tagged_text[$i + 2]);
434
          unset($tagged_text[$i + 3]);
435
        }
436
        break;
437
      }
438
    }
439
  }
440
  $tagged_text = array_values($tagged_text); // re-index array to make it continuous again
441
  return $extracted_tt;
442
}
443

  
444
function tagged_text_extract_nomstatus(&$tagged_text) {
445

  
446
  $extracted_tt = array();
447
  if (is_array($tagged_text)) {
448
    for ($i = 0; $i < count($tagged_text) - 1; $i++) {
449
      if ($tagged_text[$i]->type == "nomStatus"){
450
        $extracted_tt[] = $tagged_text[$i];
451
        if(isset($tagged_text[$i + 1]) && $tagged_text[$i + 1]->type == "postSeparator"){
452
          $extracted_tt[] = $tagged_text[$i + 1];
453
          unset($tagged_text[$i + 1]);
454
        }
455
        if ($tagged_text[$i - 1]->type == "separator"){
456
          array_unshift($extracted_tt, $tagged_text[$i - 1]);
457
          unset($tagged_text[$i - 1]);
458
        }
459
        unset($tagged_text[$i]);
460
        break;
461
      }
462
    }
463
  }
464
  return $extracted_tt;
465
}
466

  
467
function find_tagged_text_elements($taggedTextList, $type){
468
  $matching_elements = array();
469
  if (is_array($taggedTextList)) {
470
    for ($i = 0; $i < count($taggedTextList) - 1; $i++) {
471
      if($taggedTextList[$i]->type == $type){
472
        $matching_elements[] = $taggedTextList[$i];
473
      }
474
    }
475
  }
476
  return $matching_elements;
477
}
478

  
479
// ===================== END of Tagged Text functions ================== //
480

  
481 194
/**
482 195
 * Lists the classifications a taxon belongs to
483 196
 *
modules/cdm_dataportal/cdm_api/tagged_text.inc
1
<?php
2

  
3
/**
4
 * @file
5
 * Functions for handling CDM TaggedText arrays
6
 *
7
 *
8
 * @copyright
9
 *   (C) 2007-2018 EDIT
10
 *   European Distributed Institute of Taxonomy
11
 *   http://www.e-taxonomy.eu
12
 *
13
 *   The contents of this module are subject to the Mozilla
14
 *   Public License Version 1.1.
15
 * @see http://www.mozilla.org/MPL/MPL-1.1.html
16
 *
17
 * @author
18
 *   - Andreas Kohlbecker <a.kohlbecker@BGBM.org>
19
 */
20

  
21
function tagged_text_new($tag_type, $text = null){
22
  $tt = new stdClass();
23
  $tt->type = $tag_type;
24
  $tt->text = $text;
25
  return $tt;
26
}
27

  
28
/**
29
 * Walks the passed TaggedText array to find all elements which have a
30
 * TaggedText->entityReference. For each of these the taggedTexts is loaded
31
 * from the webservice and the original entry in the TaggedText array will be
32
 * replaced by the newly loaded array.
33
 *
34
 * @param array $taggedtxt
35
 *    The original TaggedText array
36
 * @param array $skiptags
37
 *    Optional list of tag names to skip
38
 * @return array
39
 *    The new tagged text with all TaggedText->entityReference objects expanded
40
 */
41
function cdm_tagged_text_expand_entity_references(array $taggedtxt, $skiptags = array()) {
42
  $tagged_text_expanded = array();
43
  foreach ($taggedtxt as $tt) {
44
    if (isset($tt->entityReference) && !in_array($tt->type, $skiptags)) {
45
      $base_uri = cdm_ws_base_uri($tt->entityReference->type);
46
      if($base_uri){
47
        $tagged_text_method = "/taggedText";
48
        if($base_uri == CDM_WS_NAME){
49
          $tagged_text_method = "/taggedName";
50
        }
51
        $referenced_tt = cdm_ws_get($base_uri . "/" . $tt->entityReference->uuid . $tagged_text_method);
52
        if($referenced_tt){
53
          $tagged_text_expanded = array_merge($tagged_text_expanded, $referenced_tt);
54
          continue;
55
        }
56
      }
57
    }
58
    // default case
59
    $tagged_text_expanded[] = $tt;
60
  }
61
  return $tagged_text_expanded;
62
}
63

  
64
/**
65
 * Converts an array of TaggedText items into corresponding html tags.
66
 *
67
 * Each item is provided with a class attribute which is set to the key of the
68
 * TaggedText item.
69
 *
70
 * Tagged text where the type starts with 'PLACEHOLDER_' will be added to the markup as plain text whereas the
71
 * taggedText->type wrapped in curly brackets: '{'. $tt->text . '}' is used as text.
72
 * see tagged_text_extract_secref()
73
 *
74
 * @param array $taggedtxt
75
 *   Array with text items to convert.
76
 * @param array $skiptags
77
 *   Array of tag names to skip
78
 *
79
 * @return string
80
 *   The markup.
81
 */
82
function cdm_tagged_text_to_markup(array $taggedtxt, $skiptags = array()) {
83

  
84
  $tag = 'span';
85
  $out = '';
86
  $was_separator = false;
87
  $i = 0;
88
  foreach ($taggedtxt as $tt) {
89
    if (!in_array($tt->type, $skiptags) && $tt->text) {
90
      $class_attr = $tt->type;
91

  
92
      if(isset($tt->entityReference)){
93
        $class_attr .= " " . html_class_attribute_ref($tt->entityReference);
94
      }
95
      $is_last = $i + 1 == count($taggedtxt);
96
      $is_separator = is_tagged_text_sepatator_type($tt->type);
97
      $glue = !$is_separator && !$was_separator && !$is_last ? ' ' : '';
98
      if(str_beginsWith($tt->type, 'PLACEHOLDER_')){
99
        $out .=  '{'. $tt->type . '}';
100
      } else {
101
        $out .= '<' . $tag . ' class="' . $class_attr . '">'
102
          . t('@text', array('@text' => $tt->text))
103
          . '</' . $tag . '>'
104
          . $glue;
105
      }
106
      $was_separator = $is_separator;
107
    }
108
    $i++;
109
  }
110
  return $out;
111
}
112

  
113
/**
114
 * Converts an array of TaggedText items into corresponding plain text string
115
 *
116
 * Each item is provided with a class attribute which is set to the key of the
117
 * TaggedText item.
118
 *
119
 * @param array $taggedtxt
120
 *   Array with text items to convert.
121
 * @param array $skiptags
122
 *   Array of tag names to skip
123
 *
124
 * @return string
125
 *   The plain text
126
 */
127
function cdm_tagged_text_to_string(array $taggedtxt, $skiptags = array()) {
128

  
129
  $out = '';
130
  $was_separator = false;
131
  $i = 0;
132
  foreach ($taggedtxt as $tt) {
133
    if (!in_array($tt->type, $skiptags) && $tt->text) {
134
      $is_last = $i + 1 == count($taggedtxt);
135
      $is_separator = is_tagged_text_sepatator_type($tt->type);
136
      $glue = !$is_separator && !$was_separator && !$is_last ? ' ' : '';
137
      $out .= t('@text', array('@text' => $tt->text)) . $glue;
138
      $was_separator = $is_separator;
139
    }
140
    $i++;
141
  }
142
  return $out;
143
}
144

  
145
/**
146
 * See cdmlib: boolean eu.etaxonomy.cdm.strategy.cache.TagEnum.isSeparator();
147
 *
148
 * @return bool
149
 */
150
function is_tagged_text_sepatator_type($tagged_text_type){
151
  static $separator_names = array('separator', 'postSeparator');
152
  $result = array_search($tagged_text_type, $separator_names) !== false;
153
  return $result;
154
}
155

  
156

  
157
/**
158
 * Finds the text tagged with $tag_type in an array of taggedText instances.
159
 *
160
 *
161
 * @param array $taggedtxt
162
 *   Array with text items.
163
 * @param array $include_tag_types
164
 *   Array of the tag types for which to find text items in the $taggedtxt array, or NULL
165
 *   to return all texts.
166
 *
167
 * @return array
168
 *   An array with the texts mapped by $tag_type.
169
 */
170
function cdm_tagged_text_values(array $taggedtxt, $include_tag_types = NULL) {
171
  $tokens = array();
172
  if (!empty($taggedtxt)) {
173
    foreach ($taggedtxt as $tagtxt) {
174
      if ($include_tag_types === NULL || array_search($tagtxt->type, $include_tag_types) !== false) {
175
        $tokens[] = $tagtxt->text;
176
      }
177
    }
178
  }
179
  return $tokens;
180
}
181

  
182
/**
183
 * Preprocess the taggedTitle arrays.
184
 *
185
 * Step 1: Turns 'newly' introduces tag types ("hybridSign")
186
 * into tag type "name"
187
 *
188
 * Step 2: Two taggedTexts which have the same type and which have
189
 * a separator between them are merged together.
190
 *
191
 * @param array $taggedTextList
192
 *    An array of TaggedText objects
193
 */
194
function normalize_tagged_text(&$taggedTextList) {
195

  
196
  if (is_array($taggedTextList)) {
197

  
198
    // First pass: rename.
199
    for ($i = 0; $i < count($taggedTextList); $i++) {
200

  
201
      if ($taggedTextList[$i]->type == "hybridSign") {
202
        $taggedTextList[$i]->type = "name";
203
      }
204
    }
205

  
206
    // Second pass: resolve separators.
207
    $taggedNameListNew = array();
208
    for ($i = 0; $i < count($taggedTextList); $i++) {
209

  
210
      // elements of the same type concatenated by a separator should be merged together
211
      if (isset($taggedTextList[$i + 2]) && $taggedTextList[$i + 1]->type == "separator" && $taggedTextList[$i]->type == $taggedTextList[$i + 2]->type) {
212
        $taggedName = clone $taggedTextList[$i];
213
        $taggedName->text = $taggedName->text . $taggedTextList[$i + 1]->text . $taggedTextList[$i + 2]->text;
214
        $taggedNameListNew[] = $taggedName;
215
        ++$i;
216
        ++$i;
217
        continue;
218
      }
219
      // no special handling
220
      $taggedNameListNew[] = $taggedTextList[$i];
221

  
222
    }
223
    $taggedTextList = $taggedNameListNew;
224
  }
225
}
226

  
227
/**
228
 * Extracts the tagged text for sec references with separator and citation detail from a tagged text array.
229
 * @param $tagged_text
230
 *    The tagged text to operate on
231
 * @param string $ref_tag_type
232
 *    The tagtype for a secreference is "secReference", but "relSecReference" is also used in case of relationships.
233
 * @param bool $replace_with_placeholder
234
 *    Indicates the method to add a empty placeholder tagged text alement as relpacement for the extrated tagged text
235
 *    elements.
236
 * @return array
237
 */
238
function tagged_text_extract_secref(&$tagged_text, $ref_tag_type = "secReference", $replace_with_placeholder = false) {
239

  
240
  $extracted_tt = array();
241
  if (is_array($tagged_text)) {
242
    $extract_pos = null;
243
    for ($i = 0; $i < count($tagged_text) - 1; $i++) {
244
      if ($tagged_text[$i + 1]->type == $ref_tag_type && $tagged_text[$i]->type == "separator"){
245
        $extracted_tt[0] = $tagged_text[$i];
246
        $extracted_tt[1] = $tagged_text[$i + 1];
247

  
248
        if($replace_with_placeholder){
249
          // text must not be null, see cdm_tagged_text_to_markup()
250
          $tagged_text[$i] = tagged_text_new("PLACEHOLDER_" . $ref_tag_type, "PLACEHOLDER_" . $ref_tag_type);
251
        } else {
252
          unset($tagged_text[$i]);
253
        }
254
        unset($tagged_text[$i + 1]);
255
        // also get the microreference which could be in $tagged_text[$i + 3]
256
        if(isset($tagged_text[$i + 3])  && $tagged_text[$i + 2]->type == "separator" && $tagged_text[$i + 3]->type == $ref_tag_type){
257
          $extracted_tt[2] = $tagged_text[$i + 2];
258
          $extracted_tt[3] = $tagged_text[$i + 3];
259
          unset($tagged_text[$i + 2]);
260
          unset($tagged_text[$i + 3]);
261
        }
262
        break;
263
      }
264
    }
265
  }
266
  $tagged_text = array_values($tagged_text); // re-index array to make it continuous again
267
  return $extracted_tt;
268
}
269

  
270
function tagged_text_extract_nomstatus(&$tagged_text) {
271

  
272
  $extracted_tt = array();
273
  if (is_array($tagged_text)) {
274
    for ($i = 0; $i < count($tagged_text) - 1; $i++) {
275
      if ($tagged_text[$i]->type == "nomStatus"){
276
        $extracted_tt[] = $tagged_text[$i];
277
        if(isset($tagged_text[$i + 1]) && $tagged_text[$i + 1]->type == "postSeparator"){
278
          $extracted_tt[] = $tagged_text[$i + 1];
279
          unset($tagged_text[$i + 1]);
280
        }
281
        if ($tagged_text[$i - 1]->type == "separator"){
282
          array_unshift($extracted_tt, $tagged_text[$i - 1]);
283
          unset($tagged_text[$i - 1]);
284
        }
285
        unset($tagged_text[$i]);
286
        break;
287
      }
288
    }
289
  }
290
  return $extracted_tt;
291
}
292

  
293
function find_tagged_text_elements($taggedTextList, $type){
294
  $matching_elements = array();
295
  if (is_array($taggedTextList)) {
296
    for ($i = 0; $i < count($taggedTextList) - 1; $i++) {
297
      if($taggedTextList[$i]->type == $type){
298
        $matching_elements[] = $taggedTextList[$i];
299
      }
300
    }
301
  }
302
  return $matching_elements;
303
}

Also available in: Unified diff