1 |
2978b0c8
|
Andreas Kohlbecker
|
<?php
|
2 |
|
|
|
3 |
|
|
/**
|
4 |
|
|
* @file
|
5 |
|
|
* Functions for handling CDM TaggedText arrays
|
6 |
|
|
*
|
7 |
|
|
*
|
8 |
|
|
* @copyright
|
9 |
|
|
* (C) 2007-2018 EDIT
|
10 |
|
|
* European Distributed Institute of Taxonomy
|
11 |
|
|
* http://www.e-taxonomy.eu
|
12 |
|
|
*
|
13 |
|
|
* The contents of this module are subject to the Mozilla
|
14 |
|
|
* Public License Version 1.1.
|
15 |
|
|
* @see http://www.mozilla.org/MPL/MPL-1.1.html
|
16 |
|
|
*
|
17 |
|
|
* @author
|
18 |
|
|
* - Andreas Kohlbecker <a.kohlbecker@BGBM.org>
|
19 |
|
|
*/
|
20 |
|
|
|
21 |
|
|
function tagged_text_new($tag_type, $text = null){
|
22 |
|
|
$tt = new stdClass();
|
23 |
|
|
$tt->type = $tag_type;
|
24 |
|
|
$tt->text = $text;
|
25 |
|
|
return $tt;
|
26 |
|
|
}
|
27 |
|
|
|
28 |
|
|
/**
|
29 |
|
|
* Walks the passed TaggedText array to find all elements which have a
|
30 |
|
|
* TaggedText->entityReference. For each of these the taggedTexts is loaded
|
31 |
|
|
* from the webservice and the original entry in the TaggedText array will be
|
32 |
|
|
* replaced by the newly loaded array.
|
33 |
|
|
*
|
34 |
|
|
* @param array $taggedtxt
|
35 |
|
|
* The original TaggedText array
|
36 |
|
|
* @param array $skiptags
|
37 |
|
|
* Optional list of tag names to skip
|
38 |
|
|
* @return array
|
39 |
|
|
* The new tagged text with all TaggedText->entityReference objects expanded
|
40 |
|
|
*/
|
41 |
|
|
function cdm_tagged_text_expand_entity_references(array $taggedtxt, $skiptags = array()) {
|
42 |
|
|
$tagged_text_expanded = array();
|
43 |
|
|
foreach ($taggedtxt as $tt) {
|
44 |
|
|
if (isset($tt->entityReference) && !in_array($tt->type, $skiptags)) {
|
45 |
|
|
$base_uri = cdm_ws_base_uri($tt->entityReference->type);
|
46 |
|
|
if($base_uri){
|
47 |
|
|
$tagged_text_method = "/taggedText";
|
48 |
|
|
if($base_uri == CDM_WS_NAME){
|
49 |
|
|
$tagged_text_method = "/taggedName";
|
50 |
|
|
}
|
51 |
|
|
$referenced_tt = cdm_ws_get($base_uri . "/" . $tt->entityReference->uuid . $tagged_text_method);
|
52 |
|
|
if($referenced_tt){
|
53 |
|
|
$tagged_text_expanded = array_merge($tagged_text_expanded, $referenced_tt);
|
54 |
|
|
continue;
|
55 |
|
|
}
|
56 |
|
|
}
|
57 |
|
|
}
|
58 |
|
|
// default case
|
59 |
|
|
$tagged_text_expanded[] = $tt;
|
60 |
|
|
}
|
61 |
|
|
return $tagged_text_expanded;
|
62 |
|
|
}
|
63 |
|
|
|
64 |
|
|
/**
|
65 |
|
|
* Converts an array of TaggedText items into corresponding html tags.
|
66 |
|
|
*
|
67 |
|
|
* Each item is provided with a class attribute which is set to the key of the
|
68 |
|
|
* TaggedText item.
|
69 |
|
|
*
|
70 |
|
|
* Tagged text where the type starts with 'PLACEHOLDER_' will be added to the markup as plain text whereas the
|
71 |
|
|
* taggedText->type wrapped in curly brackets: '{'. $tt->text . '}' is used as text.
|
72 |
|
|
* see tagged_text_extract_secref()
|
73 |
|
|
*
|
74 |
67c8cca9
|
Andreas Kohlbecker
|
* The algorithm of this functions is basically the same as for
|
75 |
|
|
* eu.etaxonomy.cdm.strategy.cache.TaggedCacheHelper.createString(List<TaggedText> tags, HTMLTagRules htmlRules)
|
76 |
|
|
*
|
77 |
2978b0c8
|
Andreas Kohlbecker
|
* @param array $taggedtxt
|
78 |
|
|
* Array with text items to convert.
|
79 |
|
|
* @param array $skiptags
|
80 |
|
|
* Array of tag names to skip
|
81 |
|
|
*
|
82 |
|
|
* @return string
|
83 |
|
|
* The markup.
|
84 |
|
|
*/
|
85 |
67c8cca9
|
Andreas Kohlbecker
|
function cdm_tagged_text_to_markup(array $taggedtxt, $skiptags = array(), $tag = 'span') {
|
86 |
2978b0c8
|
Andreas Kohlbecker
|
|
87 |
|
|
$out = '';
|
88 |
|
|
$was_separator = false;
|
89 |
67c8cca9
|
Andreas Kohlbecker
|
$last_type = null;
|
90 |
2978b0c8
|
Andreas Kohlbecker
|
$i = 0;
|
91 |
|
|
foreach ($taggedtxt as $tt) {
|
92 |
|
|
if (!in_array($tt->type, $skiptags) && $tt->text) {
|
93 |
|
|
if(isset($tt->entityReference)){
|
94 |
67c8cca9
|
Andreas Kohlbecker
|
$class_attr = $tt->type . " " . html_class_attribute_ref($tt->entityReference);
|
95 |
|
|
} else {
|
96 |
|
|
$class_attr = $tt->type;
|
97 |
2978b0c8
|
Andreas Kohlbecker
|
}
|
98 |
67c8cca9
|
Andreas Kohlbecker
|
$is_first = $i == 0;
|
99 |
2978b0c8
|
Andreas Kohlbecker
|
$is_separator = is_tagged_text_sepatator_type($tt->type);
|
100 |
|
|
if(str_beginsWith($tt->type, 'PLACEHOLDER_')){
|
101 |
|
|
$out .= '{'. $tt->type . '}';
|
102 |
|
|
} else {
|
103 |
67c8cca9
|
Andreas Kohlbecker
|
if(($last_type && $last_type != $tt->type) && $tag) {
|
104 |
|
|
$out .= '</' . $tag . '>';
|
105 |
|
|
}
|
106 |
|
|
if((!$last_type || $last_type != $tt->type) && $tag){
|
107 |
|
|
$out .= '<' . $tag . ' class="' . $class_attr . '">';
|
108 |
|
|
}
|
109 |
|
|
if(!$is_separator && !$was_separator && !$is_first){
|
110 |
|
|
$out .= " ";
|
111 |
|
|
}
|
112 |
|
|
$out .= t('@text', array('@text' => $tt->text));
|
113 |
|
|
$was_separator = $is_separator;
|
114 |
|
|
$last_type = $tt->type;
|
115 |
2978b0c8
|
Andreas Kohlbecker
|
}
|
116 |
|
|
}
|
117 |
|
|
$i++;
|
118 |
|
|
}
|
119 |
23ebb91f
|
Andreas Kohlbecker
|
if($tag) {
|
120 |
|
|
$out .= '</' . $tag . '>';
|
121 |
|
|
}
|
122 |
2978b0c8
|
Andreas Kohlbecker
|
return $out;
|
123 |
|
|
}
|
124 |
|
|
|
125 |
|
|
/**
|
126 |
|
|
* Converts an array of TaggedText items into corresponding plain text string
|
127 |
|
|
*
|
128 |
|
|
* Each item is provided with a class attribute which is set to the key of the
|
129 |
|
|
* TaggedText item.
|
130 |
|
|
*
|
131 |
67c8cca9
|
Andreas Kohlbecker
|
* The algorithm of this functions is basically the same as for
|
132 |
|
|
* eu.etaxonomy.cdm.strategy.cache.TaggedCacheHelper.createString(List<TaggedText> tags)
|
133 |
|
|
*
|
134 |
2978b0c8
|
Andreas Kohlbecker
|
* @param array $taggedtxt
|
135 |
|
|
* Array with text items to convert.
|
136 |
|
|
* @param array $skiptags
|
137 |
|
|
* Array of tag names to skip
|
138 |
|
|
*
|
139 |
|
|
* @return string
|
140 |
|
|
* The plain text
|
141 |
|
|
*/
|
142 |
|
|
function cdm_tagged_text_to_string(array $taggedtxt, $skiptags = array()) {
|
143 |
|
|
|
144 |
67c8cca9
|
Andreas Kohlbecker
|
return cdm_tagged_text_to_markup($taggedtxt, $skiptags, null);
|
145 |
2978b0c8
|
Andreas Kohlbecker
|
}
|
146 |
|
|
|
147 |
|
|
/**
|
148 |
|
|
* See cdmlib: boolean eu.etaxonomy.cdm.strategy.cache.TagEnum.isSeparator();
|
149 |
|
|
*
|
150 |
|
|
* @return bool
|
151 |
|
|
*/
|
152 |
|
|
function is_tagged_text_sepatator_type($tagged_text_type){
|
153 |
|
|
static $separator_names = array('separator', 'postSeparator');
|
154 |
|
|
$result = array_search($tagged_text_type, $separator_names) !== false;
|
155 |
|
|
return $result;
|
156 |
|
|
}
|
157 |
|
|
|
158 |
|
|
|
159 |
|
|
/**
|
160 |
|
|
* Finds the text tagged with $tag_type in an array of taggedText instances.
|
161 |
|
|
*
|
162 |
|
|
*
|
163 |
|
|
* @param array $taggedtxt
|
164 |
|
|
* Array with text items.
|
165 |
|
|
* @param array $include_tag_types
|
166 |
|
|
* Array of the tag types for which to find text items in the $taggedtxt array, or NULL
|
167 |
|
|
* to return all texts.
|
168 |
|
|
*
|
169 |
|
|
* @return array
|
170 |
|
|
* An array with the texts mapped by $tag_type.
|
171 |
|
|
*/
|
172 |
|
|
function cdm_tagged_text_values(array $taggedtxt, $include_tag_types = NULL) {
|
173 |
|
|
$tokens = array();
|
174 |
|
|
if (!empty($taggedtxt)) {
|
175 |
|
|
foreach ($taggedtxt as $tagtxt) {
|
176 |
|
|
if ($include_tag_types === NULL || array_search($tagtxt->type, $include_tag_types) !== false) {
|
177 |
|
|
$tokens[] = $tagtxt->text;
|
178 |
|
|
}
|
179 |
|
|
}
|
180 |
|
|
}
|
181 |
|
|
return $tokens;
|
182 |
|
|
}
|
183 |
|
|
|
184 |
|
|
/**
|
185 |
|
|
* Preprocess the taggedTitle arrays.
|
186 |
|
|
*
|
187 |
|
|
* Step 1: Turns 'newly' introduces tag types ("hybridSign")
|
188 |
|
|
* into tag type "name"
|
189 |
|
|
*
|
190 |
|
|
* Step 2: Two taggedTexts which have the same type and which have
|
191 |
|
|
* a separator between them are merged together.
|
192 |
|
|
*
|
193 |
|
|
* @param array $taggedTextList
|
194 |
|
|
* An array of TaggedText objects
|
195 |
|
|
*/
|
196 |
|
|
function normalize_tagged_text(&$taggedTextList) {
|
197 |
|
|
|
198 |
|
|
if (is_array($taggedTextList)) {
|
199 |
|
|
|
200 |
|
|
// First pass: rename.
|
201 |
|
|
for ($i = 0; $i < count($taggedTextList); $i++) {
|
202 |
|
|
|
203 |
|
|
if ($taggedTextList[$i]->type == "hybridSign") {
|
204 |
|
|
$taggedTextList[$i]->type = "name";
|
205 |
|
|
}
|
206 |
|
|
}
|
207 |
|
|
|
208 |
|
|
// Second pass: resolve separators.
|
209 |
|
|
$taggedNameListNew = array();
|
210 |
|
|
for ($i = 0; $i < count($taggedTextList); $i++) {
|
211 |
|
|
|
212 |
|
|
// elements of the same type concatenated by a separator should be merged together
|
213 |
|
|
if (isset($taggedTextList[$i + 2]) && $taggedTextList[$i + 1]->type == "separator" && $taggedTextList[$i]->type == $taggedTextList[$i + 2]->type) {
|
214 |
|
|
$taggedName = clone $taggedTextList[$i];
|
215 |
|
|
$taggedName->text = $taggedName->text . $taggedTextList[$i + 1]->text . $taggedTextList[$i + 2]->text;
|
216 |
|
|
$taggedNameListNew[] = $taggedName;
|
217 |
|
|
++$i;
|
218 |
|
|
++$i;
|
219 |
|
|
continue;
|
220 |
|
|
}
|
221 |
|
|
// no special handling
|
222 |
|
|
$taggedNameListNew[] = $taggedTextList[$i];
|
223 |
|
|
|
224 |
|
|
}
|
225 |
|
|
$taggedTextList = $taggedNameListNew;
|
226 |
|
|
}
|
227 |
|
|
}
|
228 |
|
|
|
229 |
|
|
/**
|
230 |
|
|
* Extracts the tagged text for sec references with separator and citation detail from a tagged text array.
|
231 |
|
|
* @param $tagged_text
|
232 |
|
|
* The tagged text to operate on
|
233 |
|
|
* @param string $ref_tag_type
|
234 |
|
|
* The tagtype for a secreference is "secReference", but "relSecReference" is also used in case of relationships.
|
235 |
|
|
* @param bool $replace_with_placeholder
|
236 |
|
|
* Indicates the method to add a empty placeholder tagged text alement as relpacement for the extrated tagged text
|
237 |
|
|
* elements.
|
238 |
|
|
* @return array
|
239 |
|
|
*/
|
240 |
|
|
function tagged_text_extract_secref(&$tagged_text, $ref_tag_type = "secReference", $replace_with_placeholder = false) {
|
241 |
|
|
|
242 |
|
|
$extracted_tt = array();
|
243 |
|
|
if (is_array($tagged_text)) {
|
244 |
|
|
$extract_pos = null;
|
245 |
|
|
for ($i = 0; $i < count($tagged_text) - 1; $i++) {
|
246 |
|
|
if ($tagged_text[$i + 1]->type == $ref_tag_type && $tagged_text[$i]->type == "separator"){
|
247 |
|
|
$extracted_tt[0] = $tagged_text[$i];
|
248 |
|
|
$extracted_tt[1] = $tagged_text[$i + 1];
|
249 |
|
|
|
250 |
|
|
if($replace_with_placeholder){
|
251 |
|
|
// text must not be null, see cdm_tagged_text_to_markup()
|
252 |
|
|
$tagged_text[$i] = tagged_text_new("PLACEHOLDER_" . $ref_tag_type, "PLACEHOLDER_" . $ref_tag_type);
|
253 |
|
|
} else {
|
254 |
|
|
unset($tagged_text[$i]);
|
255 |
|
|
}
|
256 |
|
|
unset($tagged_text[$i + 1]);
|
257 |
|
|
// also get the microreference which could be in $tagged_text[$i + 3]
|
258 |
23ebb91f
|
Andreas Kohlbecker
|
if(isset($tagged_text[$i + 3]) && $tagged_text[$i + 2]->type == "separator" && $tagged_text[$i + 3]->type == "secMicroReference"){
|
259 |
2978b0c8
|
Andreas Kohlbecker
|
$extracted_tt[2] = $tagged_text[$i + 2];
|
260 |
|
|
$extracted_tt[3] = $tagged_text[$i + 3];
|
261 |
|
|
unset($tagged_text[$i + 2]);
|
262 |
|
|
unset($tagged_text[$i + 3]);
|
263 |
|
|
}
|
264 |
|
|
break;
|
265 |
|
|
}
|
266 |
|
|
}
|
267 |
|
|
}
|
268 |
|
|
$tagged_text = array_values($tagged_text); // re-index array to make it continuous again
|
269 |
|
|
return $extracted_tt;
|
270 |
|
|
}
|
271 |
|
|
|
272 |
|
|
function tagged_text_extract_nomstatus(&$tagged_text) {
|
273 |
|
|
|
274 |
|
|
$extracted_tt = array();
|
275 |
|
|
if (is_array($tagged_text)) {
|
276 |
|
|
for ($i = 0; $i < count($tagged_text) - 1; $i++) {
|
277 |
|
|
if ($tagged_text[$i]->type == "nomStatus"){
|
278 |
|
|
$extracted_tt[] = $tagged_text[$i];
|
279 |
|
|
if(isset($tagged_text[$i + 1]) && $tagged_text[$i + 1]->type == "postSeparator"){
|
280 |
|
|
$extracted_tt[] = $tagged_text[$i + 1];
|
281 |
|
|
unset($tagged_text[$i + 1]);
|
282 |
|
|
}
|
283 |
|
|
if ($tagged_text[$i - 1]->type == "separator"){
|
284 |
|
|
array_unshift($extracted_tt, $tagged_text[$i - 1]);
|
285 |
|
|
unset($tagged_text[$i - 1]);
|
286 |
|
|
}
|
287 |
|
|
unset($tagged_text[$i]);
|
288 |
|
|
break;
|
289 |
|
|
}
|
290 |
|
|
}
|
291 |
|
|
}
|
292 |
23ebb91f
|
Andreas Kohlbecker
|
$tagged_text = array_values($tagged_text); // re-index array to make it continuous again
|
293 |
2978b0c8
|
Andreas Kohlbecker
|
return $extracted_tt;
|
294 |
|
|
}
|
295 |
|
|
|
296 |
23ebb91f
|
Andreas Kohlbecker
|
function tagged_text_extract(&$tagged_text, $type) {
|
297 |
2978b0c8
|
Andreas Kohlbecker
|
$matching_elements = array();
|
298 |
23ebb91f
|
Andreas Kohlbecker
|
if (is_array($tagged_text)) {
|
299 |
|
|
for ($i = 0; $i < count($tagged_text) - 1; $i++) {
|
300 |
|
|
if($tagged_text[$i]->type == $type){
|
301 |
|
|
$matching_elements[] = $tagged_text[$i];
|
302 |
|
|
unset($tagged_text[$i]);
|
303 |
|
|
}
|
304 |
|
|
}
|
305 |
|
|
}
|
306 |
|
|
$tagged_text = array_values($tagged_text); // re-index array to make it continuous again
|
307 |
|
|
return $matching_elements;
|
308 |
|
|
}
|
309 |
|
|
|
310 |
|
|
function find_tagged_text_elements(&$tagged_text, $type){
|
311 |
|
|
$matching_elements = array();
|
312 |
|
|
if (is_array($tagged_text)) {
|
313 |
|
|
for ($i = 0; $i < count($tagged_text) - 1; $i++) {
|
314 |
|
|
if($tagged_text[$i]->type == $type){
|
315 |
|
|
$matching_elements[] = $tagged_text[$i];
|
316 |
2978b0c8
|
Andreas Kohlbecker
|
}
|
317 |
|
|
}
|
318 |
|
|
}
|
319 |
|
|
return $matching_elements;
|
320 |
|
|
}
|