Revision 2978b0c8
Added by Andreas Kohlbecker over 5 years ago
modules/cdm_dataportal/cdm_api/cdm_api.module | ||
---|---|---|
27 | 27 |
module_load_include('php', 'cdm_api', 'enums'); |
28 | 28 |
module_load_include('php', 'cdm_api', 'webservice_uris'); |
29 | 29 |
module_load_include('php', 'cdm_api', 'cdm_node'); |
30 |
module_load_include('inc', 'cdm_api', 'tagged_text'); |
|
30 | 31 |
|
31 | 32 |
/** |
32 | 33 |
* Timeout used to override the default of 30 seconds |
... | ... | |
190 | 191 |
cache_clear_all(NULL, 'cache_cdm_ws'); |
191 | 192 |
} |
192 | 193 |
|
193 |
// ===================== Tagged Text functions ================== // |
|
194 |
|
|
195 |
function tagged_text_new($tag_type, $text = null){ |
|
196 |
$tt = new stdClass(); |
|
197 |
$tt->type = $tag_type; |
|
198 |
$tt->text = $text; |
|
199 |
return $tt; |
|
200 |
} |
|
201 |
|
|
202 |
/** |
|
203 |
* Walks the passed TaggedText array to find all elements which have a |
|
204 |
* TaggedText->entityReference. For each of these the taggedTexts is loaded |
|
205 |
* from the webservice and the original entry in the TaggedText array will be |
|
206 |
* replaced by the newly loaded array. |
|
207 |
* |
|
208 |
* @param array $taggedtxt |
|
209 |
* The original TaggedText array |
|
210 |
* @param array $skiptags |
|
211 |
* Optional list of tag names to skip |
|
212 |
* @return array |
|
213 |
* The new tagged text with all TaggedText->entityReference objects expanded |
|
214 |
*/ |
|
215 |
function cdm_tagged_text_expand_entity_references(array $taggedtxt, $skiptags = array()) { |
|
216 |
$tagged_text_expanded = array(); |
|
217 |
foreach ($taggedtxt as $tt) { |
|
218 |
if (isset($tt->entityReference) && !in_array($tt->type, $skiptags)) { |
|
219 |
$base_uri = cdm_ws_base_uri($tt->entityReference->type); |
|
220 |
if($base_uri){ |
|
221 |
$tagged_text_method = "/taggedText"; |
|
222 |
if($base_uri == CDM_WS_NAME){ |
|
223 |
$tagged_text_method = "/taggedName"; |
|
224 |
} |
|
225 |
$referenced_tt = cdm_ws_get($base_uri . "/" . $tt->entityReference->uuid . $tagged_text_method); |
|
226 |
if($referenced_tt){ |
|
227 |
$tagged_text_expanded = array_merge($tagged_text_expanded, $referenced_tt); |
|
228 |
continue; |
|
229 |
} |
|
230 |
} |
|
231 |
} |
|
232 |
// default case |
|
233 |
$tagged_text_expanded[] = $tt; |
|
234 |
} |
|
235 |
return $tagged_text_expanded; |
|
236 |
} |
|
237 |
|
|
238 |
/** |
|
239 |
* Converts an array of TaggedText items into corresponding html tags. |
|
240 |
* |
|
241 |
* Each item is provided with a class attribute which is set to the key of the |
|
242 |
* TaggedText item. |
|
243 |
* |
|
244 |
* Tagged text where the type starts with 'PLACEHOLDER_' will be added to the markup as plain text whereas the |
|
245 |
* taggedText->type wrapped in curly brackets: '{'. $tt->text . '}' is used as text. |
|
246 |
* see tagged_text_extract_secref() |
|
247 |
* |
|
248 |
* @param array $taggedtxt |
|
249 |
* Array with text items to convert. |
|
250 |
* @param array $skiptags |
|
251 |
* Array of tag names to skip |
|
252 |
* |
|
253 |
* @return string |
|
254 |
* The markup. |
|
255 |
*/ |
|
256 |
function cdm_tagged_text_to_markup(array $taggedtxt, $skiptags = array()) { |
|
257 |
|
|
258 |
$tag = 'span'; |
|
259 |
$out = ''; |
|
260 |
$was_separator = false; |
|
261 |
$i = 0; |
|
262 |
foreach ($taggedtxt as $tt) { |
|
263 |
if (!in_array($tt->type, $skiptags) && $tt->text) { |
|
264 |
$class_attr = $tt->type; |
|
265 |
|
|
266 |
if(isset($tt->entityReference)){ |
|
267 |
$class_attr .= " " . html_class_attribute_ref($tt->entityReference); |
|
268 |
} |
|
269 |
$is_last = $i + 1 == count($taggedtxt); |
|
270 |
$is_separator = is_tagged_text_sepatator_type($tt->type); |
|
271 |
$glue = !$is_separator && !$was_separator && !$is_last ? ' ' : ''; |
|
272 |
if(str_beginsWith($tt->type, 'PLACEHOLDER_')){ |
|
273 |
$out .= '{'. $tt->type . '}'; |
|
274 |
} else { |
|
275 |
$out .= '<' . $tag . ' class="' . $class_attr . '">' |
|
276 |
. t('@text', array('@text' => $tt->text)) |
|
277 |
. '</' . $tag . '>' |
|
278 |
. $glue; |
|
279 |
} |
|
280 |
$was_separator = $is_separator; |
|
281 |
} |
|
282 |
$i++; |
|
283 |
} |
|
284 |
return $out; |
|
285 |
} |
|
286 |
|
|
287 |
/** |
|
288 |
* Converts an array of TaggedText items into corresponding plain text string |
|
289 |
* |
|
290 |
* Each item is provided with a class attribute which is set to the key of the |
|
291 |
* TaggedText item. |
|
292 |
* |
|
293 |
* @param array $taggedtxt |
|
294 |
* Array with text items to convert. |
|
295 |
* @param array $skiptags |
|
296 |
* Array of tag names to skip |
|
297 |
* |
|
298 |
* @return string |
|
299 |
* The plain text |
|
300 |
*/ |
|
301 |
function cdm_tagged_text_to_string(array $taggedtxt, $skiptags = array()) { |
|
302 |
|
|
303 |
$out = ''; |
|
304 |
$was_separator = false; |
|
305 |
$i = 0; |
|
306 |
foreach ($taggedtxt as $tt) { |
|
307 |
if (!in_array($tt->type, $skiptags) && $tt->text) { |
|
308 |
$is_last = $i + 1 == count($taggedtxt); |
|
309 |
$is_separator = is_tagged_text_sepatator_type($tt->type); |
|
310 |
$glue = !$is_separator && !$was_separator && !$is_last ? ' ' : ''; |
|
311 |
$out .= t('@text', array('@text' => $tt->text)) . $glue; |
|
312 |
$was_separator = $is_separator; |
|
313 |
} |
|
314 |
$i++; |
|
315 |
} |
|
316 |
return $out; |
|
317 |
} |
|
318 |
|
|
319 |
/** |
|
320 |
* See cdmlib: boolean eu.etaxonomy.cdm.strategy.cache.TagEnum.isSeparator(); |
|
321 |
* |
|
322 |
* @return bool |
|
323 |
*/ |
|
324 |
function is_tagged_text_sepatator_type($tagged_text_type){ |
|
325 |
static $separator_names = array('separator', 'postSeparator'); |
|
326 |
$result = array_search($tagged_text_type, $separator_names) !== false; |
|
327 |
return $result; |
|
328 |
} |
|
329 |
|
|
330 |
|
|
331 |
/** |
|
332 |
* Finds the text tagged with $tag_type in an array of taggedText instances. |
|
333 |
* |
|
334 |
* |
|
335 |
* @param array $taggedtxt |
|
336 |
* Array with text items. |
|
337 |
* @param array $include_tag_types |
|
338 |
* Array of the tag types for which to find text items in the $taggedtxt array, or NULL |
|
339 |
* to return all texts. |
|
340 |
* |
|
341 |
* @return array |
|
342 |
* An array with the texts mapped by $tag_type. |
|
343 |
*/ |
|
344 |
function cdm_tagged_text_values(array $taggedtxt, $include_tag_types = NULL) { |
|
345 |
$tokens = array(); |
|
346 |
if (!empty($taggedtxt)) { |
|
347 |
foreach ($taggedtxt as $tagtxt) { |
|
348 |
if ($include_tag_types === NULL || array_search($tagtxt->type, $include_tag_types) !== false) { |
|
349 |
$tokens[] = $tagtxt->text; |
|
350 |
} |
|
351 |
} |
|
352 |
} |
|
353 |
return $tokens; |
|
354 |
} |
|
355 |
|
|
356 |
/** |
|
357 |
* Preprocess the taggedTitle arrays. |
|
358 |
* |
|
359 |
* Step 1: Turns 'newly' introduces tag types ("hybridSign") |
|
360 |
* into tag type "name" |
|
361 |
* |
|
362 |
* Step 2: Two taggedTexts which have the same type and which have |
|
363 |
* a separator between them are merged together. |
|
364 |
* |
|
365 |
* @param array $taggedTextList |
|
366 |
* An array of TaggedText objects |
|
367 |
*/ |
|
368 |
function normalize_tagged_text(&$taggedTextList) { |
|
369 |
|
|
370 |
if (is_array($taggedTextList)) { |
|
371 |
|
|
372 |
// First pass: rename. |
|
373 |
for ($i = 0; $i < count($taggedTextList); $i++) { |
|
374 |
|
|
375 |
if ($taggedTextList[$i]->type == "hybridSign") { |
|
376 |
$taggedTextList[$i]->type = "name"; |
|
377 |
} |
|
378 |
} |
|
379 |
|
|
380 |
// Second pass: resolve separators. |
|
381 |
$taggedNameListNew = array(); |
|
382 |
for ($i = 0; $i < count($taggedTextList); $i++) { |
|
383 |
|
|
384 |
// elements of the same type concatenated by a separator should be merged together |
|
385 |
if (isset($taggedTextList[$i + 2]) && $taggedTextList[$i + 1]->type == "separator" && $taggedTextList[$i]->type == $taggedTextList[$i + 2]->type) { |
|
386 |
$taggedName = clone $taggedTextList[$i]; |
|
387 |
$taggedName->text = $taggedName->text . $taggedTextList[$i + 1]->text . $taggedTextList[$i + 2]->text; |
|
388 |
$taggedNameListNew[] = $taggedName; |
|
389 |
++$i; |
|
390 |
++$i; |
|
391 |
continue; |
|
392 |
} |
|
393 |
// no special handling |
|
394 |
$taggedNameListNew[] = $taggedTextList[$i]; |
|
395 |
|
|
396 |
} |
|
397 |
$taggedTextList = $taggedNameListNew; |
|
398 |
} |
|
399 |
} |
|
400 |
|
|
401 |
/** |
|
402 |
* Extracts the tagged text for sec references with separator and citation detail from a tagged text array. |
|
403 |
* @param $tagged_text |
|
404 |
* The tagged text to operate on |
|
405 |
* @param string $ref_tag_type |
|
406 |
* The tagtype for a secreference is "secReference", but "relSecReference" is also used in case of relationships. |
|
407 |
* @param bool $replace_with_placeholder |
|
408 |
* Indicates the method to add a empty placeholder tagged text alement as relpacement for the extrated tagged text |
|
409 |
* elements. |
|
410 |
* @return array |
|
411 |
*/ |
|
412 |
function tagged_text_extract_secref(&$tagged_text, $ref_tag_type = "secReference", $replace_with_placeholder = false) { |
|
413 |
|
|
414 |
$extracted_tt = array(); |
|
415 |
if (is_array($tagged_text)) { |
|
416 |
$extract_pos = null; |
|
417 |
for ($i = 0; $i < count($tagged_text) - 1; $i++) { |
|
418 |
if ($tagged_text[$i + 1]->type == $ref_tag_type && $tagged_text[$i]->type == "separator"){ |
|
419 |
$extracted_tt[0] = $tagged_text[$i]; |
|
420 |
$extracted_tt[1] = $tagged_text[$i + 1]; |
|
421 |
|
|
422 |
if($replace_with_placeholder){ |
|
423 |
// text must not be null, see cdm_tagged_text_to_markup() |
|
424 |
$tagged_text[$i] = tagged_text_new("PLACEHOLDER_" . $ref_tag_type, "PLACEHOLDER_" . $ref_tag_type); |
|
425 |
} else { |
|
426 |
unset($tagged_text[$i]); |
|
427 |
} |
|
428 |
unset($tagged_text[$i + 1]); |
|
429 |
// also get the microreference which could be in $tagged_text[$i + 3] |
|
430 |
if(isset($tagged_text[$i + 3]) && $tagged_text[$i + 2]->type == "separator" && $tagged_text[$i + 3]->type == $ref_tag_type){ |
|
431 |
$extracted_tt[2] = $tagged_text[$i + 2]; |
|
432 |
$extracted_tt[3] = $tagged_text[$i + 3]; |
|
433 |
unset($tagged_text[$i + 2]); |
|
434 |
unset($tagged_text[$i + 3]); |
|
435 |
} |
|
436 |
break; |
|
437 |
} |
|
438 |
} |
|
439 |
} |
|
440 |
$tagged_text = array_values($tagged_text); // re-index array to make it continuous again |
|
441 |
return $extracted_tt; |
|
442 |
} |
|
443 |
|
|
444 |
function tagged_text_extract_nomstatus(&$tagged_text) { |
|
445 |
|
|
446 |
$extracted_tt = array(); |
|
447 |
if (is_array($tagged_text)) { |
|
448 |
for ($i = 0; $i < count($tagged_text) - 1; $i++) { |
|
449 |
if ($tagged_text[$i]->type == "nomStatus"){ |
|
450 |
$extracted_tt[] = $tagged_text[$i]; |
|
451 |
if(isset($tagged_text[$i + 1]) && $tagged_text[$i + 1]->type == "postSeparator"){ |
|
452 |
$extracted_tt[] = $tagged_text[$i + 1]; |
|
453 |
unset($tagged_text[$i + 1]); |
|
454 |
} |
|
455 |
if ($tagged_text[$i - 1]->type == "separator"){ |
|
456 |
array_unshift($extracted_tt, $tagged_text[$i - 1]); |
|
457 |
unset($tagged_text[$i - 1]); |
|
458 |
} |
|
459 |
unset($tagged_text[$i]); |
|
460 |
break; |
|
461 |
} |
|
462 |
} |
|
463 |
} |
|
464 |
return $extracted_tt; |
|
465 |
} |
|
466 |
|
|
467 |
function find_tagged_text_elements($taggedTextList, $type){ |
|
468 |
$matching_elements = array(); |
|
469 |
if (is_array($taggedTextList)) { |
|
470 |
for ($i = 0; $i < count($taggedTextList) - 1; $i++) { |
|
471 |
if($taggedTextList[$i]->type == $type){ |
|
472 |
$matching_elements[] = $taggedTextList[$i]; |
|
473 |
} |
|
474 |
} |
|
475 |
} |
|
476 |
return $matching_elements; |
|
477 |
} |
|
478 |
|
|
479 |
// ===================== END of Tagged Text functions ================== // |
|
480 |
|
|
481 | 194 |
/** |
482 | 195 |
* Lists the classifications a taxon belongs to |
483 | 196 |
* |
Also available in: Unified diff
ref #7658 moving all tagged text functions to separate file