2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
10 package eu
.etaxonomy
.cdm
.app
.wp6
.diptera
;
12 import java
.util
.HashMap
;
13 import java
.util
.HashSet
;
14 import java
.util
.List
;
18 import org
.apache
.log4j
.Logger
;
19 import org
.springframework
.transaction
.TransactionStatus
;
21 import eu
.etaxonomy
.cdm
.api
.application
.CdmApplicationController
;
22 import eu
.etaxonomy
.cdm
.api
.service
.INameService
;
23 import eu
.etaxonomy
.cdm
.api
.service
.pager
.Pager
;
24 import eu
.etaxonomy
.cdm
.app
.common
.CdmDestinations
;
25 import eu
.etaxonomy
.cdm
.database
.DbSchemaValidation
;
26 import eu
.etaxonomy
.cdm
.database
.ICdmDataSource
;
27 import eu
.etaxonomy
.cdm
.io
.common
.ImportResult
;
28 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
29 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionBase
;
30 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
31 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementSource
;
32 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
33 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
34 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
35 import eu
.etaxonomy
.cdm
.model
.name
.TaxonName
;
36 import eu
.etaxonomy
.cdm
.model
.reference
.OriginalSourceType
;
37 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
44 public class DipteraPostImportUpdater
{
45 private static final Logger logger
= Logger
.getLogger(DipteraPostImportUpdater
.class);
47 static final ICdmDataSource cdmDestination
= CdmDestinations
.localH2Palmae();
50 * This method updateds the citation text by deleting <code>OriginalName</code> tags and
51 * adding the original name to the source either as a link to an existing taxon name
52 * or as a string. The later becomes true if there is not exactly one matching name
56 public ImportResult
updateCitations(ICdmDataSource dataSource
) {
58 logger
.warn("start updating citations");
59 ImportResult result
= new ImportResult();
61 CdmApplicationController cdmApp
= CdmApplicationController
.NewInstance(dataSource
, DbSchemaValidation
.VALIDATE
);
62 Set
<DescriptionElementBase
> citationsToSave
= new HashSet
<DescriptionElementBase
>();
63 TransactionStatus tx
= cdmApp
.startTransaction();
65 logger
.warn("start updating citations ... application context started");
68 List
<Taxon
> taxonList
= cdmApp
.getTaxonService().list(Taxon
.class, 100000, page
, null, null);
69 List
<TaxonName
> nameList
= cdmApp
.getNameService().list(null, 100000, page
, null, null);
70 Map
<String
, TaxonName
> nameMap
= new HashMap
<>();
71 Map
<String
, TaxonName
> nameDuplicateMap
= new HashMap
<>();
72 fillNameMaps(nameList
, nameMap
, nameDuplicateMap
);
76 for (Taxon taxon
: taxonList
){
77 if ((i
++ % modCount
) == 0){ logger
.warn("taxa handled: " + (i
-1));}
79 Set
<TextData
> citations
= getCitations(taxon
);
80 for (TextData citation
: citations
){
81 Language language
= Language
.DEFAULT();
82 String text
= citation
.getText(language
);
83 String originalNameString
= parseOriginalNameString(text
);
84 String newText
= parseNewText(text
);
85 citation
.removeText(language
);
86 citation
.putText(language
, newText
);
87 TaxonName scientificName
= getScientificName(originalNameString
, nameMap
, nameDuplicateMap
);
89 Set
<DescriptionElementSource
> sources
= citation
.getSources();
90 if (sources
.size() > 1){
91 logger
.warn("There are more then 1 sources for a description");
92 }else if (sources
.size() == 0){
93 DescriptionElementSource source
= DescriptionElementSource
.NewInstance(OriginalSourceType
.PrimaryTaxonomicSource
);
94 citation
.addSource(source
);
95 sources
= citation
.getSources();
97 for (DescriptionElementSource source
: sources
){
98 if (scientificName
!= null){
99 source
.setNameUsedInSource(scientificName
);
101 source
.setOriginalNameString(originalNameString
);
105 citationsToSave
.add(citation
);
109 cdmApp
.getDescriptionService().saveDescriptionElement(citationsToSave
);
111 cdmApp
.commitTransaction(tx
);
112 logger
.warn("Citations updated!");
114 } catch (Exception e
) {
116 result
.addError("ERROR in citation update");
117 result
.addException(e
);
123 public ImportResult
updateCollections(ICdmDataSource dataSource
){
124 DipteraCollectionImport collectionImport
= new DipteraCollectionImport();
125 return collectionImport
.invoke(dataSource
);
129 private void fillNameMaps(List
<TaxonName
> nameList
, Map
<String
, TaxonName
> nameMap
, Map
<String
, TaxonName
> duplicateMap
) {
130 for (TaxonName name
: nameList
){
131 String nameCache
= name
.getNameCache();
132 if (nameMap
.containsKey(nameCache
)){
133 duplicateMap
.put(nameCache
, name
);
135 nameMap
.put(nameCache
, name
);
141 private TaxonName
getScientificName(String originalNameString
, Map
<String
, TaxonName
> nameMap
, Map
<String
, TaxonName
> nameDuplicateMap
) {
142 originalNameString
= originalNameString
.trim();
143 TaxonName result
= nameMap
.get(originalNameString
);
144 if (nameDuplicateMap
.containsKey(originalNameString
)){
150 private TaxonName
getScientificName(String originalNameString
, INameService nameService
) {
151 Pager
<TaxonName
> names
= nameService
.findByName(null, originalNameString
, null, null, null, null, null, null);
152 if (names
.getCount() != 1){
155 return names
.getRecords().get(0);
159 private String
parseOriginalNameString(String text
) {
160 String originalName
= "<OriginalName>";
161 int start
= text
.indexOf(originalName
);
162 int end
= text
.indexOf("</OriginalName>");
164 text
= text
.substring(start
+ originalName
.length(), end
);
170 private String
parseNewText(String text
) {
171 int start
= text
.indexOf("</OriginalName>");
172 text
= text
.substring(start
+ "</OriginalName>".length());
174 if (text
.startsWith(":")){
175 text
= text
.substring(1);
181 private Set
<TextData
> getCitations(Taxon taxon
) {
182 Set
<TextData
> result
= new HashSet
<TextData
>();
183 Set
<TaxonDescription
> descriptions
= taxon
.getDescriptions();
184 for (DescriptionBase description
: descriptions
){
185 Set
<DescriptionElementBase
> elements
= description
.getElements();
186 for (DescriptionElementBase element
: elements
){
187 Feature feature
= element
.getFeature();
188 if (feature
.equals(Feature
.CITATION())){
189 if (! element
.isInstanceOf(TextData
.class)){
190 logger
.warn("Citation is not of class TextData but " + element
.getClass().getSimpleName());
192 TextData textData
= element
.deproxy(element
, TextData
.class);
193 result
.add(textData
);
207 public static void main(String
[] args
) {
208 DipteraPostImportUpdater updater
= new DipteraPostImportUpdater();
210 updater
.updateCitations(cdmDestination
);
211 } catch (Exception e
) {
213 logger
.error("ERROR in feature tree update");