2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
9 package eu
.etaxonomy
.cdm
.app
.wp6
.diptera
;
11 import java
.util
.HashMap
;
12 import java
.util
.HashSet
;
13 import java
.util
.List
;
17 import org
.apache
.log4j
.Logger
;
18 import org
.springframework
.transaction
.TransactionStatus
;
20 import eu
.etaxonomy
.cdm
.api
.application
.CdmApplicationController
;
21 import eu
.etaxonomy
.cdm
.api
.service
.INameService
;
22 import eu
.etaxonomy
.cdm
.api
.service
.pager
.Pager
;
23 import eu
.etaxonomy
.cdm
.app
.common
.CdmDestinations
;
24 import eu
.etaxonomy
.cdm
.database
.DbSchemaValidation
;
25 import eu
.etaxonomy
.cdm
.database
.ICdmDataSource
;
26 import eu
.etaxonomy
.cdm
.io
.common
.ImportResult
;
27 import eu
.etaxonomy
.cdm
.model
.common
.Language
;
28 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionBase
;
29 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementBase
;
30 import eu
.etaxonomy
.cdm
.model
.description
.DescriptionElementSource
;
31 import eu
.etaxonomy
.cdm
.model
.description
.Feature
;
32 import eu
.etaxonomy
.cdm
.model
.description
.TaxonDescription
;
33 import eu
.etaxonomy
.cdm
.model
.description
.TextData
;
34 import eu
.etaxonomy
.cdm
.model
.name
.TaxonName
;
35 import eu
.etaxonomy
.cdm
.model
.reference
.OriginalSourceType
;
36 import eu
.etaxonomy
.cdm
.model
.taxon
.Taxon
;
42 public class DipteraPostImportUpdater
{
43 private static final Logger logger
= Logger
.getLogger(DipteraPostImportUpdater
.class);
45 static final ICdmDataSource cdmDestination
= CdmDestinations
.localH2Palmae();
48 * This method updateds the citation text by deleting <code>OriginalName</code> tags and
49 * adding the original name to the source either as a link to an existing taxon name
50 * or as a string. The later becomes true if there is not exactly one matching name
54 public ImportResult
updateCitations(ICdmDataSource dataSource
) {
56 logger
.warn("start updating citations");
57 ImportResult result
= new ImportResult();
59 CdmApplicationController cdmApp
= CdmApplicationController
.NewInstance(dataSource
, DbSchemaValidation
.VALIDATE
);
60 Set
<DescriptionElementBase
> citationsToSave
= new HashSet
<>();
61 TransactionStatus tx
= cdmApp
.startTransaction();
63 logger
.warn("start updating citations ... application context started");
66 List
<Taxon
> taxonList
= cdmApp
.getTaxonService().list(Taxon
.class, 100000, page
, null, null);
67 List
<TaxonName
> nameList
= cdmApp
.getNameService().list(null, 100000, page
, null, null);
68 Map
<String
, TaxonName
> nameMap
= new HashMap
<>();
69 Map
<String
, TaxonName
> nameDuplicateMap
= new HashMap
<>();
70 fillNameMaps(nameList
, nameMap
, nameDuplicateMap
);
74 for (Taxon taxon
: taxonList
){
75 if ((i
++ % modCount
) == 0){ logger
.warn("taxa handled: " + (i
-1));}
77 Set
<TextData
> citations
= getCitations(taxon
);
78 for (TextData citation
: citations
){
79 Language language
= Language
.DEFAULT();
80 String text
= citation
.getText(language
);
81 String originalNameString
= parseOriginalNameString(text
);
82 String newText
= parseNewText(text
);
83 citation
.removeText(language
);
84 citation
.putText(language
, newText
);
85 TaxonName scientificName
= getScientificName(originalNameString
, nameMap
, nameDuplicateMap
);
87 Set
<DescriptionElementSource
> sources
= citation
.getSources();
88 if (sources
.size() > 1){
89 logger
.warn("There are more then 1 sources for a description");
90 }else if (sources
.size() == 0){
91 DescriptionElementSource source
= DescriptionElementSource
.NewInstance(OriginalSourceType
.PrimaryTaxonomicSource
);
92 citation
.addSource(source
);
93 sources
= citation
.getSources();
95 for (DescriptionElementSource source
: sources
){
96 if (scientificName
!= null){
97 source
.setNameUsedInSource(scientificName
);
99 source
.setOriginalNameString(originalNameString
);
103 citationsToSave
.add(citation
);
107 cdmApp
.getDescriptionElementService().save(citationsToSave
);
109 cdmApp
.commitTransaction(tx
);
110 logger
.warn("Citations updated!");
112 } catch (Exception e
) {
114 result
.addError("ERROR in citation update");
115 result
.addException(e
);
121 public ImportResult
updateCollections(ICdmDataSource dataSource
){
122 DipteraCollectionImport collectionImport
= new DipteraCollectionImport();
123 return collectionImport
.invoke(dataSource
);
127 private void fillNameMaps(List
<TaxonName
> nameList
, Map
<String
, TaxonName
> nameMap
, Map
<String
, TaxonName
> duplicateMap
) {
128 for (TaxonName name
: nameList
){
129 String nameCache
= name
.getNameCache();
130 if (nameMap
.containsKey(nameCache
)){
131 duplicateMap
.put(nameCache
, name
);
133 nameMap
.put(nameCache
, name
);
139 private TaxonName
getScientificName(String originalNameString
, Map
<String
, TaxonName
> nameMap
, Map
<String
, TaxonName
> nameDuplicateMap
) {
140 originalNameString
= originalNameString
.trim();
141 TaxonName result
= nameMap
.get(originalNameString
);
142 if (nameDuplicateMap
.containsKey(originalNameString
)){
148 private TaxonName
getScientificName(String originalNameString
, INameService nameService
) {
149 Pager
<TaxonName
> names
= nameService
.findByName(null, originalNameString
, null, null, null, null, null, null);
150 if (names
.getCount() != 1){
153 return names
.getRecords().get(0);
157 private String
parseOriginalNameString(String text
) {
158 String originalName
= "<OriginalName>";
159 int start
= text
.indexOf(originalName
);
160 int end
= text
.indexOf("</OriginalName>");
162 text
= text
.substring(start
+ originalName
.length(), end
);
168 private String
parseNewText(String text
) {
169 int start
= text
.indexOf("</OriginalName>");
170 text
= text
.substring(start
+ "</OriginalName>".length());
172 if (text
.startsWith(":")){
173 text
= text
.substring(1);
179 private Set
<TextData
> getCitations(Taxon taxon
) {
180 Set
<TextData
> result
= new HashSet
<TextData
>();
181 Set
<TaxonDescription
> descriptions
= taxon
.getDescriptions();
182 for (DescriptionBase description
: descriptions
){
183 Set
<DescriptionElementBase
> elements
= description
.getElements();
184 for (DescriptionElementBase element
: elements
){
185 Feature feature
= element
.getFeature();
186 if (feature
.equals(Feature
.CITATION())){
187 if (! element
.isInstanceOf(TextData
.class)){
188 logger
.warn("Citation is not of class TextData but " + element
.getClass().getSimpleName());
190 TextData textData
= element
.deproxy(element
, TextData
.class);
191 result
.add(textData
);
205 public static void main(String
[] args
) {
206 DipteraPostImportUpdater updater
= new DipteraPostImportUpdater();
208 updater
.updateCitations(cdmDestination
);
209 } catch (Exception e
) {
211 logger
.error("ERROR in feature tree update");