1
|
<?php
|
2
|
|
3
|
/**
|
4
|
* @file
|
5
|
* Functions for handling CDM TaggedText arrays
|
6
|
*
|
7
|
*
|
8
|
* @copyright
|
9
|
* (C) 2007-2018 EDIT
|
10
|
* European Distributed Institute of Taxonomy
|
11
|
* http://www.e-taxonomy.eu
|
12
|
*
|
13
|
* The contents of this module are subject to the Mozilla
|
14
|
* Public License Version 1.1.
|
15
|
* @see http://www.mozilla.org/MPL/MPL-1.1.html
|
16
|
*
|
17
|
* @author
|
18
|
* - Andreas Kohlbecker <a.kohlbecker@BGBM.org>
|
19
|
*/
|
20
|
|
21
|
function tagged_text_new($tag_type, $text = null){
|
22
|
$tt = new stdClass();
|
23
|
$tt->type = $tag_type;
|
24
|
$tt->text = $text;
|
25
|
return $tt;
|
26
|
}
|
27
|
|
28
|
/**
|
29
|
* Adds an array of render options to specific tagged text elements.
|
30
|
*
|
31
|
* The attributes are added as $tagged_text_item['attributes']. Existing 'attributes' are
|
32
|
* preserved by merging new ones to the existing array.
|
33
|
*
|
34
|
* @param array $taggedtxt
|
35
|
* @param array $attributes_map array
|
36
|
* An array of arrays with following elements in each element array:
|
37
|
* - 'filter-type': the tagged text type to which the attributes are applicable
|
38
|
* - 'filter-uuid': (optional) and optional filter to only match a specific uuid.
|
39
|
* - 'attributes': array of attributes as accepted by drupal_attributes()
|
40
|
* - 'prefix': like the drupal_render() '#prefix' option
|
41
|
* - 'suffix': like the drupal_render() '#prefix' option
|
42
|
*/
|
43
|
function cdm_tagged_text_add_options(array &$taggedtxt, array $attributes_map){
|
44
|
|
45
|
foreach ($attributes_map as $attribute_data){
|
46
|
foreach ($taggedtxt as &$tt){
|
47
|
if($tt->type == $attribute_data['filter-type']){
|
48
|
if(isset($attribute_data['filter-uuid'])){
|
49
|
if($tt->uuid !== $attribute_data['filter-uuid']){
|
50
|
// ignore
|
51
|
continue;
|
52
|
}
|
53
|
}
|
54
|
// $tt matched type and filter-uuid if set, apply options
|
55
|
foreach (array('attributes', 'prefix', 'suffix') as $key ){
|
56
|
if(isset($attribute_data[$key])){
|
57
|
$tt->$key = $attribute_data[$key];
|
58
|
}
|
59
|
}
|
60
|
}
|
61
|
}
|
62
|
}
|
63
|
}
|
64
|
|
65
|
/**
|
66
|
* Walks the passed TaggedText array to find all elements which have a
|
67
|
* TaggedText->entityReference. For each of these the taggedTexts is loaded
|
68
|
* from the webservice and the original entry in the TaggedText array will be
|
69
|
* replaced by the newly loaded array.
|
70
|
*
|
71
|
* Existing 'attributes' (@see cdm_tagged_text_add_options()) fields will be preserved by
|
72
|
* copying them to each of the replacement tagged text items. 'prefix' will be added to
|
73
|
* the first of the replace items and 'suffix' to the last one.
|
74
|
*
|
75
|
*
|
76
|
* @param array $taggedtxt
|
77
|
* The original TaggedText array
|
78
|
* @param array $skiptags
|
79
|
* Optional list of tag names to skip
|
80
|
* @return array
|
81
|
* The new tagged text with all TaggedText->entityReference objects expanded
|
82
|
*/
|
83
|
function cdm_tagged_text_expand_entity_references(array $taggedtxt, $skiptags = array()) {
|
84
|
$tagged_text_expanded = array();
|
85
|
foreach ($taggedtxt as $tt) {
|
86
|
if (isset($tt->entityReference) && !in_array($tt->type, $skiptags)) {
|
87
|
$base_uri = cdm_ws_base_uri($tt->entityReference->type);
|
88
|
if($base_uri){
|
89
|
if(str_endsWith($base_uri, '$0')){
|
90
|
// sanitize placeholders at the end
|
91
|
$base_uri = substr($base_uri, 0, strlen($base_uri) - 2);
|
92
|
}
|
93
|
$tagged_text_method = "/taggedText";
|
94
|
if($base_uri == CDM_WS_NAME){
|
95
|
$tagged_text_method = "/taggedName";
|
96
|
}
|
97
|
if(!str_endsWith($base_uri, '/')){
|
98
|
$base_uri .= '/';
|
99
|
}
|
100
|
$referenced_tt = cdm_ws_get($base_uri . $tt->entityReference->uuid . $tagged_text_method);
|
101
|
if($referenced_tt){
|
102
|
if(isset($tt->attributes)){
|
103
|
foreach($referenced_tt as $reftt){
|
104
|
$reftt->attributes = $tt->attributes;
|
105
|
}
|
106
|
}
|
107
|
if(isset($tt->prefix)){
|
108
|
$referenced_tt[0]->prefix = $tt->prefix;
|
109
|
}
|
110
|
if(isset($tt->suffix)){
|
111
|
$referenced_tt[count($referenced_tt)-1]->suffix = $tt->suffix;
|
112
|
}
|
113
|
$tagged_text_expanded = array_merge($tagged_text_expanded, $referenced_tt);
|
114
|
continue;
|
115
|
}
|
116
|
}
|
117
|
}
|
118
|
// default case
|
119
|
$tagged_text_expanded[] = $tt;
|
120
|
}
|
121
|
return $tagged_text_expanded;
|
122
|
}
|
123
|
|
124
|
/**
|
125
|
* Converts an array of TaggedText items into corresponding html tags.
|
126
|
*
|
127
|
* Each item is provided with a class attribute which is set to the key of the
|
128
|
* TaggedText item.
|
129
|
*
|
130
|
* Tagged text where the type starts with 'PLACEHOLDER_' will be added to the markup as plain text whereas the
|
131
|
* taggedText->type wrapped in curly brackets: '{'. $tt->text . '}' is used as text.
|
132
|
* see tagged_text_extract_reference_and_detail()
|
133
|
*
|
134
|
* In addition to the tagged text element fields as defined in the cdm this method also recognizes:
|
135
|
* - 'attributes': array of attributes as accepted by drupal_attributes()
|
136
|
* - 'prefix': like the drupal_render() '#prefix' option
|
137
|
* - 'suffix': like the drupal_render() '#prefix' option
|
138
|
* See also cdm_tagged_text_add_options()
|
139
|
*
|
140
|
* The algorithm of this functions is basically the same as for
|
141
|
* eu.etaxonomy.cdm.strategy.cache.TaggedCacheHelper.createString(List<TaggedText> tags, HTMLTagRules htmlRules)
|
142
|
*
|
143
|
* @param array $taggedtxt
|
144
|
* Array with text items to convert.
|
145
|
* @param array $skiptags
|
146
|
* Array of tag names to skip
|
147
|
* @param $tag
|
148
|
* @param $options
|
149
|
* - 'html' (default FALSE): Whether $text is HTML or just plain-text. NOTE! in this case the text is not
|
150
|
* translated by t()
|
151
|
*
|
152
|
* @return string
|
153
|
* The markup.
|
154
|
*/
|
155
|
function cdm_tagged_text_to_markup(array $taggedtxt, $skiptags = array(), $tag = 'span', $options = array()) {
|
156
|
|
157
|
$out = '';
|
158
|
$was_separator = false;
|
159
|
$last_type = null;
|
160
|
$last_suffix = '';
|
161
|
$i = 0;
|
162
|
foreach ($taggedtxt as $tt) {
|
163
|
if (!in_array($tt->type, $skiptags) && $tt->text) {
|
164
|
|
165
|
$is_first = $i == 0;
|
166
|
$is_separator = is_tagged_text_sepatator_type($tt->type);
|
167
|
if(str_beginsWith($tt->type, 'PLACEHOLDER_')){
|
168
|
$out .= '{'. $tt->type . '}';
|
169
|
} else {
|
170
|
|
171
|
// attributes
|
172
|
$attributes = array();
|
173
|
if(isset($tt->attributes)){
|
174
|
$attributes = $tt->attributes;
|
175
|
}
|
176
|
if(!isset($attributes['class'])){
|
177
|
$attributes['class'] = array();
|
178
|
}
|
179
|
$attributes['class'][] = $tt->type;
|
180
|
if(isset($tt->entityReference)){
|
181
|
$attributes['class'][] = html_class_attribute_ref($tt->entityReference);
|
182
|
}
|
183
|
|
184
|
// prefix and suffix
|
185
|
$prefix = '';
|
186
|
$suffix = '';
|
187
|
if(isset($tt->prefix)){
|
188
|
$prefix = $tt->prefix;
|
189
|
}
|
190
|
if(isset($tt->suffix)){
|
191
|
$suffix = $tt->suffix;
|
192
|
}
|
193
|
|
194
|
if(($last_suffix || $last_type && $last_type != $tt->type) && $tag) {
|
195
|
$out .= '</' . $tag . '>' . $last_suffix;
|
196
|
}
|
197
|
if(($prefix || !$last_type || $last_type != $tt->type) && $tag){
|
198
|
$out .= $prefix . '<' . $tag . drupal_attributes($attributes) . '>';
|
199
|
}
|
200
|
if(!$is_separator && !$was_separator && !$is_first){
|
201
|
$out .= " ";
|
202
|
}
|
203
|
if(isset($options['html']) && $options['html']){
|
204
|
$out .= $tt->text;
|
205
|
} else {
|
206
|
$out .= t('@text', array('@text' => $tt->text));
|
207
|
}
|
208
|
$was_separator = $is_separator;
|
209
|
$last_type = $tt->type;
|
210
|
$last_suffix = $suffix;
|
211
|
}
|
212
|
}
|
213
|
$i++;
|
214
|
}
|
215
|
if($tag) {
|
216
|
$out .= '</' . $tag . '>';
|
217
|
}
|
218
|
return $out;
|
219
|
}
|
220
|
|
221
|
/**
|
222
|
* Converts an array of TaggedText items into corresponding plain text string
|
223
|
*
|
224
|
* Each item is provided with a class attribute which is set to the key of the
|
225
|
* TaggedText item.
|
226
|
*
|
227
|
* The algorithm of this functions is basically the same as for
|
228
|
* eu.etaxonomy.cdm.strategy.cache.TaggedCacheHelper.createString(List<TaggedText> tags)
|
229
|
*
|
230
|
* @param array $taggedtxt
|
231
|
* Array with text items to convert.
|
232
|
* @param array $skiptags
|
233
|
* Array of tag names to skip
|
234
|
*
|
235
|
* @return string
|
236
|
* The plain text
|
237
|
*/
|
238
|
function cdm_tagged_text_to_string(array $taggedtxt, $skiptags = array()) {
|
239
|
|
240
|
return cdm_tagged_text_to_markup($taggedtxt, $skiptags, null);
|
241
|
}
|
242
|
|
243
|
/**
|
244
|
* See cdmlib: boolean eu.etaxonomy.cdm.strategy.cache.TagEnum.isSeparator();
|
245
|
*
|
246
|
* @return bool
|
247
|
*/
|
248
|
function is_tagged_text_sepatator_type($tagged_text_type){
|
249
|
static $separator_names = array('separator', 'postSeparator');
|
250
|
$result = array_search($tagged_text_type, $separator_names) !== false;
|
251
|
return $result;
|
252
|
}
|
253
|
|
254
|
|
255
|
/**
|
256
|
* Finds the text tagged with $tag_type in an array of taggedText instances.
|
257
|
*
|
258
|
*
|
259
|
* @param array $taggedtxt
|
260
|
* Array with text items.
|
261
|
* @param array $include_tag_types
|
262
|
* Array of the tag types for which to find text items in the $taggedtxt array, or NULL
|
263
|
* to return all texts.
|
264
|
*
|
265
|
* @return array
|
266
|
* An array with the texts mapped by $tag_type.
|
267
|
*/
|
268
|
function cdm_tagged_text_values(array $taggedtxt, $include_tag_types = NULL) {
|
269
|
$tokens = array();
|
270
|
if (!empty($taggedtxt)) {
|
271
|
foreach ($taggedtxt as $tagtxt) {
|
272
|
if ($include_tag_types === NULL || array_search($tagtxt->type, $include_tag_types) !== false) {
|
273
|
$tokens[] = $tagtxt->text;
|
274
|
}
|
275
|
}
|
276
|
}
|
277
|
return $tokens;
|
278
|
}
|
279
|
|
280
|
/**
|
281
|
* Preprocess the taggedTitle arrays.
|
282
|
*
|
283
|
* Step 1: Turns 'newly' introduces tag types ("hybridSign")
|
284
|
* into tag type "name"
|
285
|
*
|
286
|
* Step 2: Two taggedTexts which have the same type and which have
|
287
|
* a separator between them are merged together.
|
288
|
*
|
289
|
* @param array $taggedTextList
|
290
|
* An array of TaggedText objects
|
291
|
*/
|
292
|
function normalize_tagged_text(&$taggedTextList) {
|
293
|
|
294
|
if (is_array($taggedTextList)) {
|
295
|
|
296
|
// First pass: rename.
|
297
|
for ($i = 0; $i < count($taggedTextList); $i++) {
|
298
|
|
299
|
if ($taggedTextList[$i]->type == "hybridSign") {
|
300
|
$taggedTextList[$i]->type = "name";
|
301
|
}
|
302
|
}
|
303
|
|
304
|
// Second pass: resolve separators.
|
305
|
$taggedNameListNew = array();
|
306
|
for ($i = 0; $i < count($taggedTextList); $i++) {
|
307
|
|
308
|
// elements of the same type concatenated by a separator should be merged together
|
309
|
if (isset($taggedTextList[$i + 2]) && $taggedTextList[$i + 1]->type == "separator" && $taggedTextList[$i]->type == $taggedTextList[$i + 2]->type) {
|
310
|
$taggedName = clone $taggedTextList[$i];
|
311
|
$taggedName->text = $taggedName->text . $taggedTextList[$i + 1]->text . $taggedTextList[$i + 2]->text;
|
312
|
$taggedNameListNew[] = $taggedName;
|
313
|
++$i;
|
314
|
++$i;
|
315
|
continue;
|
316
|
}
|
317
|
// no special handling
|
318
|
$taggedNameListNew[] = $taggedTextList[$i];
|
319
|
|
320
|
}
|
321
|
$taggedTextList = $taggedNameListNew;
|
322
|
}
|
323
|
}
|
324
|
|
325
|
/**
|
326
|
* Extracts from a tagged text array all tagged text for references which is concatenated with citation
|
327
|
* detail separator and citation detail into one single element.
|
328
|
*
|
329
|
* @param $tagged_text
|
330
|
* The tagged text to operate on
|
331
|
* @param string $ref_tag_type
|
332
|
* The tag type for a reference is "reference", this is uses as default.
|
333
|
* @param bool $replace_with_placeholder
|
334
|
* Indicates the method to add a empty placeholder tagged text alement as relpacement for the extrated tagged text
|
335
|
* elements.
|
336
|
* @return array
|
337
|
*/
|
338
|
function tagged_text_extract_reference(&$tagged_text, $ref_tag_type = "reference", $replace_with_placeholder = false) {
|
339
|
|
340
|
$extracted_tt = array();
|
341
|
if (is_array($tagged_text)) {
|
342
|
$extract_pos = null;
|
343
|
$tt_legth = count($tagged_text);
|
344
|
for ($i = 0; $i < $tt_legth; $i++) {
|
345
|
if ($tagged_text[$i]->type == $ref_tag_type){
|
346
|
if ($i > 0 && $tagged_text[$i - 1]->type == 'separator') {
|
347
|
// the reference may be preceeded by a separator in case it is not a in-reference
|
348
|
$extracted_tt[] = $tagged_text[$i - 1];
|
349
|
unset($tagged_text[$i - 1]);
|
350
|
} else {
|
351
|
// need to add a separator since the reference tagged text will become the first element
|
352
|
// ant thus will not be preceded by a separator
|
353
|
$extracted_tt[] = tagged_text_new('separator', ' ');
|
354
|
}
|
355
|
$extracted_tt[] = $tagged_text[$i];
|
356
|
if ($replace_with_placeholder) {
|
357
|
// text must not be null, see cdm_tagged_text_to_markup()
|
358
|
$tagged_text[$i] = tagged_text_new("PLACEHOLDER_" . $ref_tag_type, "PLACEHOLDER_" . $ref_tag_type);
|
359
|
} else {
|
360
|
unset($tagged_text[$i]);
|
361
|
}
|
362
|
}
|
363
|
}
|
364
|
}
|
365
|
$tagged_text = array_values($tagged_text); // re-index array to make it continuous again
|
366
|
return $extracted_tt;
|
367
|
}
|
368
|
|
369
|
/**
|
370
|
* Extracts the tagged text for sec references with separator and citation detail from a tagged text array.
|
371
|
* @param $tagged_text
|
372
|
* The tagged text to operate on
|
373
|
* @param string $ref_tag_type
|
374
|
* The tagtype for a secreference is "secReference", but "relSecReference" is also used in case of relationships.
|
375
|
* @param bool $replace_with_placeholder
|
376
|
* Indicates the method to add a empty placeholder tagged text alement as relpacement for the extrated tagged text
|
377
|
* elements.
|
378
|
* @param string $ref_detail_tag_type
|
379
|
* @return array
|
380
|
*/
|
381
|
function tagged_text_extract_reference_and_detail(&$tagged_text, $ref_tag_type = "secReference", $replace_with_placeholder = false) {
|
382
|
|
383
|
$extracted_tt = array();
|
384
|
if (is_array($tagged_text)) {
|
385
|
$extract_pos = null;
|
386
|
$tt_length = count($tagged_text);
|
387
|
for ($i = 0; $i < $tt_length - 1; $i++) {
|
388
|
if ($tagged_text[$i + 1]->type == $ref_tag_type && $tagged_text[$i]->type == "separator"){
|
389
|
$extracted_tt[0] = $tagged_text[$i];
|
390
|
$extracted_tt[1] = $tagged_text[$i + 1];
|
391
|
|
392
|
if($replace_with_placeholder){
|
393
|
// text must not be null, see cdm_tagged_text_to_markup()
|
394
|
$tagged_text[$i] = tagged_text_new("PLACEHOLDER_" . $ref_tag_type, "PLACEHOLDER_" . $ref_tag_type);
|
395
|
} else {
|
396
|
unset($tagged_text[$i]);
|
397
|
}
|
398
|
unset($tagged_text[$i + 1]);
|
399
|
// also get the microreference which could be in $tagged_text[$i + 3]
|
400
|
if(isset($tagged_text[$i + 3]) && $tagged_text[$i + 2]->type == "separator" && $tagged_text[$i + 3]->type == "secMicroReference"){
|
401
|
$extracted_tt[2] = $tagged_text[$i + 2];
|
402
|
$extracted_tt[3] = $tagged_text[$i + 3];
|
403
|
unset($tagged_text[$i + 2]);
|
404
|
unset($tagged_text[$i + 3]);
|
405
|
}
|
406
|
break;
|
407
|
}
|
408
|
}
|
409
|
}
|
410
|
$tagged_text = array_values($tagged_text); // re-index array to make it continuous again
|
411
|
return $extracted_tt;
|
412
|
}
|
413
|
|
414
|
function tagged_text_extract_nomstatus(&$tagged_text) {
|
415
|
|
416
|
$extracted_tt = [];
|
417
|
if (is_array($tagged_text)) {
|
418
|
$itemcnt = count($tagged_text); // preserve the count since the array may shrink while processing
|
419
|
for ($i = 0; $i < $itemcnt; $i++) {
|
420
|
if(isset($tagged_text[$i])){ // prevent from accessing removed indices
|
421
|
if ($tagged_text[$i]->type == "nomStatus"){
|
422
|
$extracted_status_items = [];
|
423
|
$extracted_status_items[] = $tagged_text[$i];
|
424
|
if(isset($tagged_text[$i + 1]) && $tagged_text[$i + 1]->type == "postSeparator"){
|
425
|
$extracted_status_items[] = $tagged_text[$i + 1];
|
426
|
unset($tagged_text[$i + 1]);
|
427
|
}
|
428
|
if ($tagged_text[$i - 1]->type == "separator"){
|
429
|
array_unshift($extracted_status_items, $tagged_text[$i - 1]);
|
430
|
unset($tagged_text[$i - 1]);
|
431
|
}
|
432
|
unset($tagged_text[$i]);
|
433
|
$extracted_tt = array_merge($extracted_tt, $extracted_status_items);
|
434
|
}
|
435
|
}
|
436
|
}
|
437
|
}
|
438
|
$tagged_text = array_values($tagged_text); // re-index array to make it continuous again
|
439
|
return $extracted_tt;
|
440
|
}
|
441
|
|
442
|
function tagged_text_extract(&$tagged_text, $type, $replace_with_placeholder = false) {
|
443
|
$matching_elements = array();
|
444
|
if (is_array($tagged_text)) {
|
445
|
for ($i = 0; $i < count($tagged_text); $i++) {
|
446
|
if($tagged_text[$i]->type == $type){
|
447
|
$matching_elements[] = $tagged_text[$i];
|
448
|
if($replace_with_placeholder){
|
449
|
// text must not be null, see cdm_tagged_text_to_markup()
|
450
|
$tagged_text[$i] = tagged_text_new("PLACEHOLDER_" . $type, "PLACEHOLDER_" . $type);
|
451
|
} else {
|
452
|
unset($tagged_text[$i]);
|
453
|
}
|
454
|
}
|
455
|
}
|
456
|
}
|
457
|
$tagged_text = array_values($tagged_text); // re-index array to make it continuous again
|
458
|
return $matching_elements;
|
459
|
}
|
460
|
|
461
|
function find_tagged_text_elements(&$tagged_text, $type){
|
462
|
$matching_elements = array();
|
463
|
if (is_array($tagged_text)) {
|
464
|
for ($i = 0; $i < count($tagged_text) - 1; $i++) {
|
465
|
if($tagged_text[$i]->type == $type){
|
466
|
$matching_elements[] = $tagged_text[$i];
|
467
|
}
|
468
|
}
|
469
|
}
|
470
|
return $matching_elements;
|
471
|
}
|