2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.io
.csv
.redlist
.demo
;
12 import java
.io
.FileNotFoundException
;
13 import java
.io
.PrintWriter
;
14 import java
.util
.ArrayList
;
15 import java
.util
.Collections
;
16 import java
.util
.Comparator
;
17 import java
.util
.HashSet
;
18 import java
.util
.List
;
20 import java
.util
.UUID
;
22 import org
.apache
.commons
.lang
.StringUtils
;
23 import org
.apache
.log4j
.Logger
;
24 import org
.springframework
.stereotype
.Component
;
25 import org
.springframework
.transaction
.TransactionStatus
;
27 import eu
.etaxonomy
.cdm
.common
.monitor
.IProgressMonitor
;
28 import eu
.etaxonomy
.cdm
.model
.common
.CdmBase
;
29 import eu
.etaxonomy
.cdm
.model
.common
.IdentifiableSource
;
30 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
31 import eu
.etaxonomy
.cdm
.model
.common
.RelationshipTermBase
;
32 import eu
.etaxonomy
.cdm
.model
.description
.CategoricalData
;
33 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
34 import eu
.etaxonomy
.cdm
.model
.description
.Distribution
;
35 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
36 import eu
.etaxonomy
.cdm
.model
.description
.State
;
37 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
38 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
39 import eu
.etaxonomy
.cdm
.model
.location
.NamedArea
;
40 import eu
.etaxonomy
.cdm
.model
.name
.NonViralName
;
41 import eu
.etaxonomy
.cdm
.model
.reference
.Reference
;
42 import eu
.etaxonomy
.cdm
.model
.taxon
.Classification
;
43 import eu
.etaxonomy
.cdm
.model
.taxon
.Synonym
;
44 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationship
;
45 import eu
.etaxonomy
.cdm
.model
.taxon
.SynonymRelationshipType
;
46 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
47 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonBase
;
48 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonNode
;
49 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonRelationship
;
50 import eu
.etaxonomy
.cdm
.model
.taxon
.TaxonRelationshipType
;
59 public class CsvDemoExport
extends CsvDemoBase
{
60 private static final Logger logger
= Logger
.getLogger(CsvDemoExport
.class);
62 private static final String ROW_TYPE
= "http://rs.tdwg.org/dwc/terms/Taxon";
63 private static final String fileName
= "RedlistCoreTax.csv";
65 public CsvDemoExport() {
67 this.ioName
= this.getClass().getSimpleName();
71 /** Retrieves data from a CDM DB and serializes them CDM to CSV.
72 * Starts with root taxa and traverses the classification to retrieve
73 * children taxa, synonyms, relationships, descriptive data, red list
75 * Taxa that are not part of the classification are not found.
82 protected void doInvoke(CsvDemoExportState state
){
83 CsvDemoExportConfigurator config
= state
.getConfig();
84 TransactionStatus txStatus
= startTransaction(true);
86 List
<NamedArea
> selectedAreas
= config
.getNamedAreas();
87 Set
<Classification
> classificationSet
= assembleClassificationSet(config
);
89 IProgressMonitor progressMonitor
= null;
90 if(config
.getProgressMonitor() != null) {
91 progressMonitor
= config
.getProgressMonitor();
93 PrintWriter writer
= null;
95 //json/xml result list
96 List
<CsvDemoRecord
> recordList
= null;
97 if(config
.getRecordList() != null){
98 recordList
= config
.getRecordList();
99 performJsonXMLPagination(state
, config
, txStatus
, classificationSet
, recordList
);
103 if(!config
.getDestination().isDirectory()){
105 writer
= new PrintWriter(config
.getDestination());
106 performCSVExport(state
, config
, txStatus
, classificationSet
, progressMonitor
, writer
);
107 } catch (FileNotFoundException e
) {
111 } catch (ClassCastException e
) {
118 this.clearExistingRecordIds();
120 // commitTransaction(txStatus);
129 * @param classificationSet
132 private void performJsonXMLPagination(CsvDemoExportState state
, CsvDemoExportConfigurator config
,
133 TransactionStatus txStatus
, Set
<Classification
> classificationSet
, List
<CsvDemoRecord
> recordList
) {
134 // TODO Auto-generated method stub
135 Classification classification
= null;
136 for(Classification c
: classificationSet
){
138 //this sets the total amount of records for pagination
139 config
.setTaxonNodeListSize(getTaxonNodeService().countAllNodesForClassification(c
));
141 //calculate pagination
142 int start
= config
.getPageSize() * config
.getPageNumber();
143 List
<TaxonNode
> result
= getTaxonNodeService().listAllNodesForClassification(classification
, start
, config
.getPageSize());
145 for (TaxonNode node
: result
){
146 Taxon taxon
= CdmBase
.deproxy(node
.getTaxon(), Taxon
.class);
147 CsvDemoRecord record
= assembleRecord(state
);
148 NonViralName
<?
> name
= CdmBase
.deproxy(taxon
.getName(), NonViralName
.class);
149 config
.setClassificationTitleCache(classification
.getTitleCache());
150 if (! this.recordExists(taxon
)){
151 handleTaxonBase(record
, taxon
, name
, classification
, null, false, false, config
, node
);
152 recordList
.add(record
);
153 this.addExistingRecord(taxon
);
156 commitTransaction(txStatus
);
164 * @param classificationSet
165 * @param progressMonitor
169 private void performCSVExport(CsvDemoExportState state
, CsvDemoExportConfigurator config
,
170 TransactionStatus txStatus
, Set
<Classification
> classificationSet
, IProgressMonitor progressMonitor
,
171 PrintWriter writer
) {
172 //obtain chuncks of taxonNodes
179 //TODO: Questionable if this information is really necessary, with respect to memory usage
180 Classification classification
= null;
181 for(Classification c
: classificationSet
){
183 totalWork
= getTaxonNodeService().countAllNodesForClassification(c
);
186 if(progressMonitor
!= null) {
187 progressMonitor
.beginTask("", totalWork
);
189 List
<TaxonNode
> result
= new ArrayList
<TaxonNode
>();
190 int totalNodes
= getTaxonNodeService().count(TaxonNode
.class);
192 for(int i
= 0 ; i
< totalNodes
; i
++){
194 //geographical Filter
195 // List<TaxonNode> taxonNodes = handleGeographicalFilter(state, classificationSet, config, limit, start);
197 result
= getTaxonNodeService().listAllNodesForClassification(classification
, start
, limit
);
199 logger
.info(result
.size());
202 for (TaxonNode node
: result
){
203 Taxon taxon
= CdmBase
.deproxy(node
.getTaxon(), Taxon
.class);
204 CsvDemoRecord record
= assembleRecord(state
);
205 NonViralName
<?
> name
= CdmBase
.deproxy(taxon
.getName(), NonViralName
.class);
206 // Classification classification = node.getClassification();
207 config
.setClassificationTitleCache(classification
.getTitleCache());
208 if (! this.recordExists(taxon
)){
210 handleTaxonBase(record
, taxon
, name
, classification
, null, false, false, config
, node
);
211 if(config
.getDestination() != null){
212 record
.write(writer
);
214 this.addExistingRecord(taxon
);
217 //handleMisapplication(taxon, writer, classification, record, config, node);
219 if(progressMonitor
!=null) {
220 if(work
< totalWork
-1) {
221 progressMonitor
.worked(1);
228 commitTransaction(txStatus
);
229 txStatus
= startTransaction(true);
231 //get next 1000 results
232 if(result
.size()%limit
== 0){
233 //increase only once to avoid same row
237 start
= start
+ limit
;
248 //TODO: Exception handling
254 protected Set
<Classification
> assembleClassificationSet(CsvDemoExportConfigurator config
){
256 Set
<UUID
> classificationUuidSet
= config
.getClassificationUuids();
257 List
<Classification
> classificationList
= getClassificationService().find(classificationUuidSet
);
258 Set
<Classification
> classificationSet
= new HashSet
<Classification
>();
259 classificationSet
.addAll(classificationList
);
260 return classificationSet
;
265 //TODO: Exception handling
271 private CsvDemoRecord
assembleRecord(CsvDemoExportState state
) {
273 CsvDemoExportConfigurator config
= state
.getConfig();
274 CsvDemoMetaDataRecord metaRecord
= new CsvDemoMetaDataRecord(true, fileName
, ROW_TYPE
);
275 state
.addMetaRecord(metaRecord
);
276 CsvDemoRecord record
= new CsvDemoRecord(metaRecord
, config
);
283 * Takes positive List of areas and iterates over a given classification
284 * and their {@link Taxon} to return all {@link Taxon} with the desired
285 * geographical attribute.
289 * If selectedAreas is null all {@link TaxonNode}s of the given {@link Classification} will be returned.
291 * @param selectedAreas
292 * @param classificationSet
297 // protected List<TaxonNode> handleGeographicalFilter(CsvDemoExportState state,
298 // Set<Classification> classificationSet, CsvDemoExportConfigurator config, int limit, int start) {
299 // List<TaxonNode> filteredNodes = new ArrayList<TaxonNode>();
300 // List<TaxonNode> allNodes = new ArrayList<TaxonNode>();
301 // //Check if json/XML export
302 // if(config.getRecordList() != null){
303 // if(config.getProgressMonitor() != null) {
304 // config.getProgressMonitor().subTask("Calculate size of export...");
306 // //FIXME does not filter for classifications
307 // allNodes = getTaxonNodeService().list(TaxonNode.class, config.getPageSize(), config.getPageNumber(), null, null);
308 // config.setTaxonNodeListSize(getAllNodes(classificationSet).size());
309 // //getTaxonNodeService().page(TaxonNode.class, config.getPageSize(), config.getPageNumber(), null, null).getRecords();
312 // //do your own pagination
313 // allNodes = getAllNodes(classificationSet);
316 // //Geographical filter
317 // if(state.getConfig().isDoGeographicalFilter()){
318 // List<NamedArea> selectedAreas = state.getConfig().getNamedAreas();
319 // logger.info(selectedAreas.size());
320 // if(selectedAreas != null && !selectedAreas.isEmpty() && selectedAreas.size() < 16){
321 // // if(selectedAreas.size() == 16){
322 // // //Germany TDWG Level 3
323 // // String germany="uu7b7c2db5-aa44-4302-bdec-6556fd74b0b9id";
324 // // selectedAreas.add((NamedArea) getTermService().find(UUID.fromString(germany)));
326 // for (TaxonNode node : allNodes){
327 // Taxon taxon = CdmBase.deproxy(node.getTaxon(), Taxon.class);
328 // Set<TaxonDescription> descriptions = taxon.getDescriptions();
329 // for (TaxonDescription description : descriptions){
330 // for (DescriptionElementBase el : description.getElements()){
331 // if (el.isInstanceOf(Distribution.class) ){
332 // Distribution distribution = CdmBase.deproxy(el, Distribution.class);
333 // NamedArea area = distribution.getArea();
334 // for(NamedArea selectedArea:selectedAreas){
335 // if(selectedArea.getUuid().equals(area.getUuid())){
336 // filteredNodes.add(node);
344 // filteredNodes = allNodes;
347 // return filteredNodes;
351 * handles misapplied {@link Taxon}
354 * @param classification
359 // private void handleMisapplication(Taxon taxon, PrintWriter writer, Classification classification, CsvDemoRecord record, CsvDemoExportConfigurator config, TaxonNode node) {
360 // Set<Taxon> misappliedNames = taxon.getMisappliedNames();
361 // for (Taxon misappliedName : misappliedNames ){
362 //// CsvTaxRecordRedlist record = new CsvTaxRecordRedlist(metaRecord, config);
363 // TaxonRelationshipType relType = TaxonRelationshipType.MISAPPLIED_NAME_FOR();
364 // NonViralName<?> name = CdmBase.deproxy(misappliedName.getName(), NonViralName.class);
366 // if (! this.recordExists(misappliedName)){
367 // handleTaxonBase(record, misappliedName, name, taxon, classification, relType, false, false, config, node);
368 // if(writer != null){
369 // record.write(writer);
371 // this.addExistingRecord(misappliedName);
377 * handles the information record for the actual {@link Taxon} including {@link Classification classification}, Taxon Name, Taxon ID,
378 * Taxon Status, Synonyms, {@link Feature features} data
379 * @param record the concrete information record
380 * @param taxonBase {@link Taxon}
382 * @param acceptedTaxon
392 private void handleTaxonBase(CsvDemoRecord record
,TaxonBase
<?
> taxonBase
,
393 NonViralName
<?
> name
, Classification classification
,
394 RelationshipTermBase
<?
> relType
, boolean isProParte
, boolean isPartial
,
395 CsvDemoExportConfigurator config
, TaxonNode node
) {
397 Taxon taxon
= (Taxon
) taxonBase
;
398 List
<Feature
> features
= config
.getFeatures();
399 if(config
.getDestination() != null){
400 record
.setHeadLinePrinted(config
.isHasHeaderLines());
401 if(config
.isRedlistFeatures()){
402 if(features
!= null){
403 record
.setPrintFeatures(features
);
406 config
.setHasHeaderLines(false);
408 if(config
.isClassification()){
409 record
.setDatasetName(classification
.getTitleCache());
411 if(config
.isTaxonName()){
412 record
.setScientificName(name
.getNameCache());
414 if(config
.isTaxonNameID()){
415 record
.setScientificNameId(name
.getUuid().toString());
417 if(config
.isAuthor()){
418 String authorshipCache
= name
.getAuthorshipCache();
419 if(authorshipCache
== null){
420 authorshipCache
= "";
422 record
.setAuthorName(authorshipCache
);
426 if(taxon
.getName().getRank() == null){
429 rank
= taxon
.getName().getRank().toString();
431 record
.setRank(rank
);
433 if(config
.isTaxonStatus()){
434 handleTaxonomicStatus(record
, name
, relType
, isProParte
, isPartial
);
436 if(config
.isAcceptedName()){
437 //TODO write routine for accepted Name
439 if(config
.isTaxonConceptID()){
440 UUID taxonUuid
= taxonBase
.getUuid();
441 if(taxonUuid
== null){
442 taxonUuid
= UUID
.fromString("");
444 record
.setTaxonConceptID(taxonUuid
.toString());
446 if(config
.isParentID()){
448 if(node
.getParent().getTaxon() == null){
451 parentUUID
= node
.getParent().getTaxon().getUuid().toString();
453 record
.setParentUUID(parentUUID
);
455 if(config
.isLastChange()){
457 if(taxon
.getUpdated() == null){
460 lastChange
= taxon
.getUpdated().toString();
462 record
.setLastChange(lastChange
);
464 if(config
.isSynonyms()){
465 handleSynonyms(record
,taxon
);
467 if(config
.isDistributions()){
468 handleDiscriptionData(record
, taxon
);
470 if(config
.isRedlistFeatures()){
471 if(features
!= null) {
473 List
<List
<String
>> featureCells
= new ArrayList
<List
<String
>>(features
.size());
474 for(int i
= 0; i
< features
.size(); i
++) {
475 featureCells
.add(new ArrayList
<String
>());
477 handleRelatedRedlistStatus(record
, taxon
, false, featureCells
, features
);
478 handleRelatedRedlistStatus(record
, taxon
, true, featureCells
, features
);
483 if(config
.isExternalID()){
484 Set
<IdentifiableSource
> sources
= taxonBase
.getSources();
485 for(IdentifiableSource source
:sources
){
486 Reference
<?
> citation
= source
.getCitation();
488 * TODO: handle this more generic.
492 if(citation
.getId() == 22){
493 String idInSource
= source
.getIdInSource();
494 if(idInSource
== null){
497 record
.setExternalID(idInSource
);
511 private void handleTaxonomicStatus(
512 CsvDemoRecord record
,
513 NonViralName
<?
> name
,
514 RelationshipTermBase
<?
> type
,
517 if (type
== null && name
.getNomenclaturalCode()!= null && name
.getNomenclaturalCode().acceptedTaxonStatusLabel() != null){
518 String acceptedTaxonStatusLabel
= name
.getNomenclaturalCode().acceptedTaxonStatusLabel();
519 if(StringUtils
.isEmpty(acceptedTaxonStatusLabel
)){
520 acceptedTaxonStatusLabel
="";
522 record
.setTaxonomicStatus(acceptedTaxonStatusLabel
);
523 }else if(name
.getNomenclaturalCode() != null && name
.getNomenclaturalCode().synonymStatusLabel() != null){
524 String status
= name
.getNomenclaturalCode().synonymStatusLabel();
525 if (type
.equals(SynonymRelationshipType
.HETEROTYPIC_SYNONYM_OF())){
526 status
= "heterotypicSynonym";
527 }else if(type
.equals(SynonymRelationshipType
.HOMOTYPIC_SYNONYM_OF())){
528 status
= "homotypicSynonym";
529 }else if(type
.equals(TaxonRelationshipType
.MISAPPLIED_NAME_FOR())){
530 status
= "misapplied";
533 status
= "proParteSynonym";
534 }else if (isPartial
){
535 String message
= "Partial synonym is not part of the gbif toxonomic status vocabulary";
536 logger
.warn(message
);
537 status
= "partialSynonym";
540 record
.setTaxonomicStatus(status
);
546 * This method concatenates several synonyms in a list.
551 private void handleSynonyms(CsvDemoRecord record
, Taxon taxon
) {
553 Set
<SynonymRelationship
> synRels
= taxon
.getSynonymRelations();
554 ArrayList
<String
> synonyms
= new ArrayList
<String
>();
555 for (SynonymRelationship synRel
:synRels
){
556 Synonym synonym
= synRel
.getSynonym();
557 SynonymRelationshipType type
= synRel
.getType();
558 if (type
== null){ // should not happen
559 type
= SynonymRelationshipType
.SYNONYM_OF();
561 NonViralName
<?
> name
= CdmBase
.deproxy(synonym
.getName(), NonViralName
.class);
562 synonyms
.add(name
.getTitleCache());
564 record
.setSynonyms(synonyms
);
572 private void handleDiscriptionData(CsvDemoRecord record
, Taxon taxon
) {
574 Set
<TaxonDescription
> descriptions
= taxon
.getDescriptions();
575 ArrayList
<String
> distributions
= new ArrayList
<String
>();
576 for (TaxonDescription description
: descriptions
){
577 for (DescriptionElementBase el
: description
.getElements()){
578 if (el
.isInstanceOf(Distribution
.class) ){
579 Distribution distribution
= CdmBase
.deproxy(el
, Distribution
.class);
580 NamedArea area
= distribution
.getArea();
581 distributions
.add(area
.getTitleCache());
586 record
.setCountryCode(distributions
);
592 * @param featureCells
595 private void handleRedlistStatus(CsvDemoRecord record
, Taxon taxon
, List
<List
<String
>> featureCells
, List
<Feature
> features
){
596 Set
<TaxonDescription
> descriptions
= taxon
.getDescriptions();
598 for (TaxonDescription description
: descriptions
){
599 for (DescriptionElementBase el
: description
.getElements()){
600 if(el
.isInstanceOf(CategoricalData
.class)){
601 CategoricalData categoricalData
= CdmBase
.deproxy(el
, CategoricalData
.class);
602 for(State state
:categoricalData
.getStatesOnly()){
603 Feature stateFeature
= categoricalData
.getFeature();
604 // find matching feature and put data into according cell
605 for(int i
= 0; i
< features
.size(); i
++) {
606 if(features
.get(i
).equals(stateFeature
)){
607 List
<String
> cell
= featureCells
.get(i
);
608 cell
.add(state
.toString());
612 }else if(el
.isInstanceOf(TextData
.class)){
613 TextData textData
= CdmBase
.deproxy(el
, TextData
.class);
614 Feature textFeature
= textData
.getFeature();
615 // find matching feature and put data into according cell
616 for(int i
= 0; i
< features
.size(); i
++) {
617 if(features
.get(i
).equals(textFeature
)){
618 List
<String
> cell
= featureCells
.get(i
);
619 String text
= textData
.getText(Language
.GERMAN());
620 text
= text
.replaceAll(System
.getProperty("line.separator"), "");
621 text
= text
.replaceAll(" ", " ");
629 record
.setFeatures(featureCells
);
636 * @param relationFrom
637 * @param featureCells
640 private void handleRelatedRedlistStatus(CsvDemoRecord record
, Taxon taxon
, boolean relationFrom
, List
<List
<String
>> featureCells
, List
<Feature
> features
) {
643 handleRedlistStatus(record
, taxon
, featureCells
, features
);
647 Set
<TaxonRelationship
> taxRels
;
649 taxRels
= taxon
.getRelationsFromThisTaxon();
651 taxRels
= taxon
.getRelationsToThisTaxon();
653 for (TaxonRelationship taxRel
:taxRels
){
654 if(taxRel
.getType().equals(TaxonRelationshipType
.CONGRUENT_TO())){
657 relatedTaxon
= taxRel
.getToTaxon();
659 relatedTaxon
= taxRel
.getFromTaxon();
661 handleRedlistStatus(record
, relatedTaxon
, featureCells
, features
);
672 private void sortTaxonNodes(List
<TaxonNode
> taxonNodes
) {
673 Collections
.sort(taxonNodes
, new Comparator
<TaxonNode
>() {
676 public int compare(TaxonNode tn1
, TaxonNode tn2
) {
677 Taxon taxon1
= tn1
.getTaxon();
678 Taxon taxon2
= tn2
.getTaxon();
679 if(taxon1
!= null && taxon2
!= null){
680 return taxon1
.getTitleCache().compareTo(taxon2
.getTitleCache());
690 protected boolean doCheck(CsvDemoExportState state
) {
691 boolean result
= true;
692 logger
.warn("No check implemented for " + this.ioName
);
697 protected boolean isIgnore(CsvDemoExportState state
) {
698 return ! state
.getConfig().isDoTaxa();