1 |
066bfef4
|
Andreas Müller
|
/**
|
2 |
|
|
* Copyright (C) 2016 EDIT
|
3 |
|
|
* European Distributed Institute of Taxonomy
|
4 |
|
|
* http://www.e-taxonomy.eu
|
5 |
|
|
*
|
6 |
|
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7 |
|
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8 |
|
|
*/
|
9 |
|
|
package eu.etaxonomy.cdm.io.greece;
|
10 |
|
|
|
11 |
|
|
import java.util.Arrays;
|
12 |
|
|
import java.util.List;
|
13 |
539b7820
|
Andreas Müller
|
import java.util.Map;
|
14 |
066bfef4
|
Andreas Müller
|
import java.util.Set;
|
15 |
|
|
import java.util.regex.Matcher;
|
16 |
|
|
import java.util.regex.Pattern;
|
17 |
|
|
|
18 |
|
|
import org.apache.log4j.Logger;
|
19 |
|
|
import org.springframework.stereotype.Component;
|
20 |
|
|
|
21 |
|
|
import eu.etaxonomy.cdm.api.service.config.MatchingTaxonConfigurator;
|
22 |
|
|
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
23 |
|
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
24 |
|
|
import eu.etaxonomy.cdm.model.common.Language;
|
25 |
|
|
import eu.etaxonomy.cdm.model.description.Feature;
|
26 |
|
|
import eu.etaxonomy.cdm.model.description.TaxonDescription;
|
27 |
|
|
import eu.etaxonomy.cdm.model.description.TextData;
|
28 |
de81ab2a
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.reference.OriginalSourceType;
|
29 |
066bfef4
|
Andreas Müller
|
import eu.etaxonomy.cdm.model.reference.Reference;
|
30 |
|
|
import eu.etaxonomy.cdm.model.taxon.Synonym;
|
31 |
|
|
import eu.etaxonomy.cdm.model.taxon.Taxon;
|
32 |
|
|
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
|
33 |
|
|
|
34 |
|
|
/**
|
35 |
|
|
* @author a.mueller
|
36 |
53d7c437
|
Andreas Müller
|
* @since 14.12.2016
|
37 |
066bfef4
|
Andreas Müller
|
*
|
38 |
|
|
*/
|
39 |
|
|
|
40 |
|
|
@Component
|
41 |
|
|
public class FloraHellenicaImageCaptionImport<CONFIG extends FloraHellenicaImportConfigurator>
|
42 |
|
|
extends FloraHellenicaImportBase<CONFIG>{
|
43 |
|
|
|
44 |
|
|
private static final long serialVersionUID = 2629253144140992196L;
|
45 |
|
|
private static final Logger logger = Logger.getLogger(FloraHellenicaImageCaptionImport.class);
|
46 |
|
|
|
47 |
|
|
private static final String TEXT = "Text";
|
48 |
|
|
protected static Integer startPage = 316;
|
49 |
|
|
|
50 |
|
|
|
51 |
|
|
private static List<String> expectedKeys= Arrays.asList(new String[]{
|
52 |
|
|
TEXT
|
53 |
|
|
});
|
54 |
|
|
|
55 |
|
|
|
56 |
|
|
@Override
|
57 |
9eca8b4a
|
Andreas Müller
|
protected String getWorksheetName(CONFIG config) {
|
58 |
066bfef4
|
Andreas Müller
|
return "Captions";
|
59 |
|
|
}
|
60 |
|
|
|
61 |
|
|
/**
|
62 |
|
|
* {@inheritDoc}
|
63 |
|
|
*/
|
64 |
|
|
@Override
|
65 |
|
|
protected void firstPass(SimpleExcelTaxonImportState<CONFIG> state) {
|
66 |
|
|
|
67 |
|
|
String line = state.getCurrentLine() + ": ";
|
68 |
539b7820
|
Andreas Müller
|
Map<String, String> record = state.getOriginalRecord();
|
69 |
066bfef4
|
Andreas Müller
|
|
70 |
|
|
Set<String> keys = record.keySet();
|
71 |
|
|
for (String key: keys) {
|
72 |
|
|
if (! expectedKeys.contains(key)){
|
73 |
|
|
logger.warn(line + "Unexpected key: " + key);
|
74 |
|
|
}
|
75 |
|
|
}
|
76 |
|
|
|
77 |
|
|
makeCaption(state, line, record);
|
78 |
|
|
|
79 |
|
|
}
|
80 |
|
|
|
81 |
|
|
|
82 |
|
|
|
83 |
|
|
Integer plateNo = 0;
|
84 |
|
|
private Taxon makeCaption(SimpleExcelTaxonImportState<CONFIG> state,
|
85 |
|
|
String line,
|
86 |
539b7820
|
Andreas Müller
|
Map<String, String> record) {
|
87 |
066bfef4
|
Andreas Müller
|
|
88 |
|
|
String text = getValue(record, TEXT);
|
89 |
|
|
if (text.matches("Plate \\d\\d?")){
|
90 |
|
|
plateNo = Integer.valueOf(text.substring(6));
|
91 |
|
|
return null;
|
92 |
|
|
}
|
93 |
|
|
Pattern pattern = Pattern.compile(
|
94 |
|
|
"(\\d)\\. ([A-Z][a-z\\-]+ [a-z\\-]+(?: subsp. [a-z\\-]+)?) (\\([A-Z][a-z\\-]+\\))(.*)");
|
95 |
|
|
Matcher matcher = pattern.matcher(text);
|
96 |
|
|
if (!matcher.matches()){
|
97 |
|
|
logger.warn(line + "String caption not recognized: " + text);
|
98 |
|
|
return null;
|
99 |
|
|
}else{
|
100 |
|
|
String subNo = matcher.group(1);
|
101 |
|
|
String name = matcher.group(2);
|
102 |
|
|
name = adaptName(name);
|
103 |
|
|
MatchingTaxonConfigurator matchConfig = new MatchingTaxonConfigurator();
|
104 |
|
|
matchConfig.setTaxonNameTitle(name);
|
105 |
|
|
matchConfig.setIncludeSynonyms(false);
|
106 |
|
|
List<TaxonBase> taxa = getTaxonService().findTaxaByName(matchConfig);
|
107 |
|
|
TaxonBase<?> taxonBase;
|
108 |
|
|
if (taxa.isEmpty()){
|
109 |
|
|
logger.warn(line + "Taxon not found for name: " + name);
|
110 |
|
|
return null;
|
111 |
|
|
}else if (taxa.size() > 1){
|
112 |
|
|
logger.warn(line + "Found more then 1 taxon for name: " + name);
|
113 |
|
|
}
|
114 |
|
|
taxonBase = taxa.get(0);
|
115 |
|
|
Taxon taxon;
|
116 |
|
|
if (taxonBase.isInstanceOf(Synonym.class)){
|
117 |
|
|
taxon = CdmBase.deproxy(taxonBase, Synonym.class).getAcceptedTaxon();
|
118 |
|
|
logger.warn(line + "Taxon name is synonym: " + name);
|
119 |
|
|
}else{
|
120 |
|
|
taxon = CdmBase.deproxy(taxonBase, Taxon.class);
|
121 |
|
|
}
|
122 |
|
|
TaxonDescription td = getTaxonDescription(taxon, false, true);
|
123 |
|
|
|
124 |
|
|
String laterText = matcher.group(4);
|
125 |
|
|
if (laterText.startsWith(". This")){
|
126 |
|
|
laterText = laterText.substring(6);
|
127 |
|
|
}
|
128 |
|
|
text = matcher.group(2) + laterText;
|
129 |
|
|
|
130 |
d45450b6
|
Andreas Müller
|
Feature feature = getFeature(state, FloraHellenicaTransformer.uuidFloraHellenicaTaxonInfoFeature);
|
131 |
|
|
TextData textData = TextData.NewInstance(feature, text, Language.ENGLISH(), null);
|
132 |
066bfef4
|
Andreas Müller
|
td.addElement(textData);
|
133 |
|
|
Reference citation = this.getSecReference(state);
|
134 |
|
|
Integer myPage = startPage + plateNo * 2;
|
135 |
|
|
|
136 |
|
|
textData.addSource(OriginalSourceType.PrimaryTaxonomicSource, plateNo + "." +subNo
|
137 |
|
|
, "Plate", citation, "p. " + myPage);
|
138 |
|
|
|
139 |
|
|
return taxon;
|
140 |
|
|
|
141 |
|
|
}
|
142 |
|
|
}
|
143 |
|
|
|
144 |
|
|
/**
|
145 |
|
|
* @param name
|
146 |
|
|
* @return
|
147 |
|
|
*/
|
148 |
|
|
private String adaptName(String name) {
|
149 |
|
|
if (name.equals("Lathraea rhodopaea")){
|
150 |
|
|
name = "Lathraea rhodopea";
|
151 |
|
|
}else if (name.equals("Soldanella chrysosticha subsp. chrysosticha")){
|
152 |
|
|
name = "Soldanella chrysosticta subsp. chrysosticta";
|
153 |
|
|
}
|
154 |
|
|
return name;
|
155 |
|
|
}
|
156 |
|
|
|
157 |
|
|
}
|