1
|
<?php
|
2
|
|
3
|
/**
|
4
|
* @file
|
5
|
* Functions for handling CDM TaggedText arrays
|
6
|
*
|
7
|
*
|
8
|
* @copyright
|
9
|
* (C) 2007-2018 EDIT
|
10
|
* European Distributed Institute of Taxonomy
|
11
|
* http://www.e-taxonomy.eu
|
12
|
*
|
13
|
* The contents of this module are subject to the Mozilla
|
14
|
* Public License Version 1.1.
|
15
|
* @see http://www.mozilla.org/MPL/MPL-1.1.html
|
16
|
*
|
17
|
* @author
|
18
|
* - Andreas Kohlbecker <a.kohlbecker@BGBM.org>
|
19
|
*/
|
20
|
|
21
|
function tagged_text_new($tag_type, $text = null){
|
22
|
$tt = new stdClass();
|
23
|
$tt->type = $tag_type;
|
24
|
$tt->text = $text;
|
25
|
return $tt;
|
26
|
}
|
27
|
|
28
|
/**
|
29
|
* Walks the passed TaggedText array to find all elements which have a
|
30
|
* TaggedText->entityReference. For each of these the taggedTexts is loaded
|
31
|
* from the webservice and the original entry in the TaggedText array will be
|
32
|
* replaced by the newly loaded array.
|
33
|
*
|
34
|
* @param array $taggedtxt
|
35
|
* The original TaggedText array
|
36
|
* @param array $skiptags
|
37
|
* Optional list of tag names to skip
|
38
|
* @return array
|
39
|
* The new tagged text with all TaggedText->entityReference objects expanded
|
40
|
*/
|
41
|
function cdm_tagged_text_expand_entity_references(array $taggedtxt, $skiptags = array()) {
|
42
|
$tagged_text_expanded = array();
|
43
|
foreach ($taggedtxt as $tt) {
|
44
|
if (isset($tt->entityReference) && !in_array($tt->type, $skiptags)) {
|
45
|
$base_uri = cdm_ws_base_uri($tt->entityReference->type);
|
46
|
if($base_uri){
|
47
|
$tagged_text_method = "/taggedText";
|
48
|
if($base_uri == CDM_WS_NAME){
|
49
|
$tagged_text_method = "/taggedName";
|
50
|
}
|
51
|
$referenced_tt = cdm_ws_get($base_uri . "/" . $tt->entityReference->uuid . $tagged_text_method);
|
52
|
if($referenced_tt){
|
53
|
$tagged_text_expanded = array_merge($tagged_text_expanded, $referenced_tt);
|
54
|
continue;
|
55
|
}
|
56
|
}
|
57
|
}
|
58
|
// default case
|
59
|
$tagged_text_expanded[] = $tt;
|
60
|
}
|
61
|
return $tagged_text_expanded;
|
62
|
}
|
63
|
|
64
|
/**
|
65
|
* Converts an array of TaggedText items into corresponding html tags.
|
66
|
*
|
67
|
* Each item is provided with a class attribute which is set to the key of the
|
68
|
* TaggedText item.
|
69
|
*
|
70
|
* Tagged text where the type starts with 'PLACEHOLDER_' will be added to the markup as plain text whereas the
|
71
|
* taggedText->type wrapped in curly brackets: '{'. $tt->text . '}' is used as text.
|
72
|
* see tagged_text_extract_secref()
|
73
|
*
|
74
|
* @param array $taggedtxt
|
75
|
* Array with text items to convert.
|
76
|
* @param array $skiptags
|
77
|
* Array of tag names to skip
|
78
|
*
|
79
|
* @return string
|
80
|
* The markup.
|
81
|
*/
|
82
|
function cdm_tagged_text_to_markup(array $taggedtxt, $skiptags = array()) {
|
83
|
|
84
|
$tag = 'span';
|
85
|
$out = '';
|
86
|
$was_separator = false;
|
87
|
$i = 0;
|
88
|
foreach ($taggedtxt as $tt) {
|
89
|
if (!in_array($tt->type, $skiptags) && $tt->text) {
|
90
|
$class_attr = $tt->type;
|
91
|
|
92
|
if(isset($tt->entityReference)){
|
93
|
$class_attr .= " " . html_class_attribute_ref($tt->entityReference);
|
94
|
}
|
95
|
$is_last = $i + 1 == count($taggedtxt);
|
96
|
$is_separator = is_tagged_text_sepatator_type($tt->type);
|
97
|
$glue = !$is_separator && !$was_separator && !$is_last ? ' ' : '';
|
98
|
if(str_beginsWith($tt->type, 'PLACEHOLDER_')){
|
99
|
$out .= '{'. $tt->type . '}';
|
100
|
} else {
|
101
|
$out .= '<' . $tag . ' class="' . $class_attr . '">'
|
102
|
. t('@text', array('@text' => $tt->text))
|
103
|
. '</' . $tag . '>'
|
104
|
. $glue;
|
105
|
}
|
106
|
$was_separator = $is_separator;
|
107
|
}
|
108
|
$i++;
|
109
|
}
|
110
|
return $out;
|
111
|
}
|
112
|
|
113
|
/**
|
114
|
* Converts an array of TaggedText items into corresponding plain text string
|
115
|
*
|
116
|
* Each item is provided with a class attribute which is set to the key of the
|
117
|
* TaggedText item.
|
118
|
*
|
119
|
* @param array $taggedtxt
|
120
|
* Array with text items to convert.
|
121
|
* @param array $skiptags
|
122
|
* Array of tag names to skip
|
123
|
*
|
124
|
* @return string
|
125
|
* The plain text
|
126
|
*/
|
127
|
function cdm_tagged_text_to_string(array $taggedtxt, $skiptags = array()) {
|
128
|
|
129
|
$out = '';
|
130
|
$was_separator = false;
|
131
|
$i = 0;
|
132
|
foreach ($taggedtxt as $tt) {
|
133
|
if (!in_array($tt->type, $skiptags) && $tt->text) {
|
134
|
$is_last = $i + 1 == count($taggedtxt);
|
135
|
$is_separator = is_tagged_text_sepatator_type($tt->type);
|
136
|
$glue = !$is_separator && !$was_separator && !$is_last ? ' ' : '';
|
137
|
$out .= t('@text', array('@text' => $tt->text)) . $glue;
|
138
|
$was_separator = $is_separator;
|
139
|
}
|
140
|
$i++;
|
141
|
}
|
142
|
return $out;
|
143
|
}
|
144
|
|
145
|
/**
|
146
|
* See cdmlib: boolean eu.etaxonomy.cdm.strategy.cache.TagEnum.isSeparator();
|
147
|
*
|
148
|
* @return bool
|
149
|
*/
|
150
|
function is_tagged_text_sepatator_type($tagged_text_type){
|
151
|
static $separator_names = array('separator', 'postSeparator');
|
152
|
$result = array_search($tagged_text_type, $separator_names) !== false;
|
153
|
return $result;
|
154
|
}
|
155
|
|
156
|
|
157
|
/**
|
158
|
* Finds the text tagged with $tag_type in an array of taggedText instances.
|
159
|
*
|
160
|
*
|
161
|
* @param array $taggedtxt
|
162
|
* Array with text items.
|
163
|
* @param array $include_tag_types
|
164
|
* Array of the tag types for which to find text items in the $taggedtxt array, or NULL
|
165
|
* to return all texts.
|
166
|
*
|
167
|
* @return array
|
168
|
* An array with the texts mapped by $tag_type.
|
169
|
*/
|
170
|
function cdm_tagged_text_values(array $taggedtxt, $include_tag_types = NULL) {
|
171
|
$tokens = array();
|
172
|
if (!empty($taggedtxt)) {
|
173
|
foreach ($taggedtxt as $tagtxt) {
|
174
|
if ($include_tag_types === NULL || array_search($tagtxt->type, $include_tag_types) !== false) {
|
175
|
$tokens[] = $tagtxt->text;
|
176
|
}
|
177
|
}
|
178
|
}
|
179
|
return $tokens;
|
180
|
}
|
181
|
|
182
|
/**
|
183
|
* Preprocess the taggedTitle arrays.
|
184
|
*
|
185
|
* Step 1: Turns 'newly' introduces tag types ("hybridSign")
|
186
|
* into tag type "name"
|
187
|
*
|
188
|
* Step 2: Two taggedTexts which have the same type and which have
|
189
|
* a separator between them are merged together.
|
190
|
*
|
191
|
* @param array $taggedTextList
|
192
|
* An array of TaggedText objects
|
193
|
*/
|
194
|
function normalize_tagged_text(&$taggedTextList) {
|
195
|
|
196
|
if (is_array($taggedTextList)) {
|
197
|
|
198
|
// First pass: rename.
|
199
|
for ($i = 0; $i < count($taggedTextList); $i++) {
|
200
|
|
201
|
if ($taggedTextList[$i]->type == "hybridSign") {
|
202
|
$taggedTextList[$i]->type = "name";
|
203
|
}
|
204
|
}
|
205
|
|
206
|
// Second pass: resolve separators.
|
207
|
$taggedNameListNew = array();
|
208
|
for ($i = 0; $i < count($taggedTextList); $i++) {
|
209
|
|
210
|
// elements of the same type concatenated by a separator should be merged together
|
211
|
if (isset($taggedTextList[$i + 2]) && $taggedTextList[$i + 1]->type == "separator" && $taggedTextList[$i]->type == $taggedTextList[$i + 2]->type) {
|
212
|
$taggedName = clone $taggedTextList[$i];
|
213
|
$taggedName->text = $taggedName->text . $taggedTextList[$i + 1]->text . $taggedTextList[$i + 2]->text;
|
214
|
$taggedNameListNew[] = $taggedName;
|
215
|
++$i;
|
216
|
++$i;
|
217
|
continue;
|
218
|
}
|
219
|
// no special handling
|
220
|
$taggedNameListNew[] = $taggedTextList[$i];
|
221
|
|
222
|
}
|
223
|
$taggedTextList = $taggedNameListNew;
|
224
|
}
|
225
|
}
|
226
|
|
227
|
/**
|
228
|
* Extracts the tagged text for sec references with separator and citation detail from a tagged text array.
|
229
|
* @param $tagged_text
|
230
|
* The tagged text to operate on
|
231
|
* @param string $ref_tag_type
|
232
|
* The tagtype for a secreference is "secReference", but "relSecReference" is also used in case of relationships.
|
233
|
* @param bool $replace_with_placeholder
|
234
|
* Indicates the method to add a empty placeholder tagged text alement as relpacement for the extrated tagged text
|
235
|
* elements.
|
236
|
* @return array
|
237
|
*/
|
238
|
function tagged_text_extract_secref(&$tagged_text, $ref_tag_type = "secReference", $replace_with_placeholder = false) {
|
239
|
|
240
|
$extracted_tt = array();
|
241
|
if (is_array($tagged_text)) {
|
242
|
$extract_pos = null;
|
243
|
for ($i = 0; $i < count($tagged_text) - 1; $i++) {
|
244
|
if ($tagged_text[$i + 1]->type == $ref_tag_type && $tagged_text[$i]->type == "separator"){
|
245
|
$extracted_tt[0] = $tagged_text[$i];
|
246
|
$extracted_tt[1] = $tagged_text[$i + 1];
|
247
|
|
248
|
if($replace_with_placeholder){
|
249
|
// text must not be null, see cdm_tagged_text_to_markup()
|
250
|
$tagged_text[$i] = tagged_text_new("PLACEHOLDER_" . $ref_tag_type, "PLACEHOLDER_" . $ref_tag_type);
|
251
|
} else {
|
252
|
unset($tagged_text[$i]);
|
253
|
}
|
254
|
unset($tagged_text[$i + 1]);
|
255
|
// also get the microreference which could be in $tagged_text[$i + 3]
|
256
|
if(isset($tagged_text[$i + 3]) && $tagged_text[$i + 2]->type == "separator" && $tagged_text[$i + 3]->type == $ref_tag_type){
|
257
|
$extracted_tt[2] = $tagged_text[$i + 2];
|
258
|
$extracted_tt[3] = $tagged_text[$i + 3];
|
259
|
unset($tagged_text[$i + 2]);
|
260
|
unset($tagged_text[$i + 3]);
|
261
|
}
|
262
|
break;
|
263
|
}
|
264
|
}
|
265
|
}
|
266
|
$tagged_text = array_values($tagged_text); // re-index array to make it continuous again
|
267
|
return $extracted_tt;
|
268
|
}
|
269
|
|
270
|
function tagged_text_extract_nomstatus(&$tagged_text) {
|
271
|
|
272
|
$extracted_tt = array();
|
273
|
if (is_array($tagged_text)) {
|
274
|
for ($i = 0; $i < count($tagged_text) - 1; $i++) {
|
275
|
if ($tagged_text[$i]->type == "nomStatus"){
|
276
|
$extracted_tt[] = $tagged_text[$i];
|
277
|
if(isset($tagged_text[$i + 1]) && $tagged_text[$i + 1]->type == "postSeparator"){
|
278
|
$extracted_tt[] = $tagged_text[$i + 1];
|
279
|
unset($tagged_text[$i + 1]);
|
280
|
}
|
281
|
if ($tagged_text[$i - 1]->type == "separator"){
|
282
|
array_unshift($extracted_tt, $tagged_text[$i - 1]);
|
283
|
unset($tagged_text[$i - 1]);
|
284
|
}
|
285
|
unset($tagged_text[$i]);
|
286
|
break;
|
287
|
}
|
288
|
}
|
289
|
}
|
290
|
return $extracted_tt;
|
291
|
}
|
292
|
|
293
|
function find_tagged_text_elements($taggedTextList, $type){
|
294
|
$matching_elements = array();
|
295
|
if (is_array($taggedTextList)) {
|
296
|
for ($i = 0; $i < count($taggedTextList) - 1; $i++) {
|
297
|
if($taggedTextList[$i]->type == $type){
|
298
|
$matching_elements[] = $taggedTextList[$i];
|
299
|
}
|
300
|
}
|
301
|
}
|
302
|
return $matching_elements;
|
303
|
}
|