latest changes for Cyprus Altitude Import
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / app / cyprus / CyprusAltitudeActivator.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.app.cyprus;
11
12 import java.io.FileNotFoundException;
13 import java.net.URI;
14 import java.net.URISyntaxException;
15 import java.util.ArrayList;
16 import java.util.HashMap;
17 import java.util.HashSet;
18 import java.util.Set;
19 import java.util.UUID;
20 import java.util.regex.Matcher;
21 import java.util.regex.Pattern;
22
23 import org.apache.commons.lang.StringUtils;
24 import org.apache.log4j.Logger;
25 import org.springframework.transaction.TransactionStatus;
26
27 import eu.etaxonomy.cdm.api.application.CdmApplicationController;
28 import eu.etaxonomy.cdm.app.common.CdmDestinations;
29 import eu.etaxonomy.cdm.common.ExcelUtils;
30 import eu.etaxonomy.cdm.database.DbSchemaValidation;
31 import eu.etaxonomy.cdm.database.ICdmDataSource;
32 import eu.etaxonomy.cdm.io.api.application.CdmIoApplicationController;
33 import eu.etaxonomy.cdm.io.common.IImportConfigurator.CHECK;
34 import eu.etaxonomy.cdm.model.common.CdmBase;
35 import eu.etaxonomy.cdm.model.common.OriginalSourceType;
36 import eu.etaxonomy.cdm.model.common.TermVocabulary;
37 import eu.etaxonomy.cdm.model.description.Feature;
38 import eu.etaxonomy.cdm.model.description.MeasurementUnit;
39 import eu.etaxonomy.cdm.model.description.QuantitativeData;
40 import eu.etaxonomy.cdm.model.description.StatisticalMeasure;
41 import eu.etaxonomy.cdm.model.description.StatisticalMeasurementValue;
42 import eu.etaxonomy.cdm.model.description.TaxonDescription;
43 import eu.etaxonomy.cdm.model.name.TaxonNameBase;
44 import eu.etaxonomy.cdm.model.reference.Reference;
45 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
46 import eu.etaxonomy.cdm.model.taxon.Synonym;
47 import eu.etaxonomy.cdm.model.taxon.SynonymRelationship;
48 import eu.etaxonomy.cdm.model.taxon.Taxon;
49 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
50
51 /**
52 * @author a.mueller
53 * @created 16.12.2010
54 * @version 1.0
55 */
56 public class CyprusAltitudeActivator {
57 private static final Logger logger = Logger.getLogger(CyprusAltitudeActivator.class);
58
59 //database validation status (create, update, validate ...)
60 static DbSchemaValidation hbm2dll = DbSchemaValidation.VALIDATE;
61 // static final URI source = cyprus_distribution();
62 static final URI source = cyprus_altitude();
63
64
65 // static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
66 // static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql();
67 static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_dev();
68 // static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cyprus_production();
69
70
71 //feature tree uuid
72 public static final UUID featureTreeUuid = UUID.fromString("14d1e912-5ec2-4d10-878b-828788b70a87");
73
74 //classification
75 static final UUID classificationUuid = UUID.fromString("0c2b5d25-7b15-4401-8b51-dd4be0ee5cab");
76
77 private static final String sourceReferenceTitle = "Cyprus Excel Altitude Import";
78
79
80 //TODO move to Feature vocabulary
81 private static final UUID uuidAltitudeFeature = UUID.fromString("1a28ed59-e15f-4001-b5c2-ea89f0012671");
82
83 //check - import
84 static final CHECK check = CHECK.IMPORT_WITHOUT_CHECK;
85
86 private void doImport(ICdmDataSource cdmDestination){
87
88
89 ArrayList<HashMap<String, String>> excel;
90 try {
91 excel = ExcelUtils.parseXLS(source, "coreTax");
92 } catch (FileNotFoundException e) {
93 e.printStackTrace();
94 return;
95 }
96
97 CdmApplicationController app = CdmIoApplicationController.NewInstance(cdmDestination, hbm2dll);
98
99 Set<TaxonBase> taxaToSave = new HashSet<TaxonBase>();
100
101 TransactionStatus tx = app.startTransaction();
102
103 UUID uuidMikle77 = UUID.fromString("9f5fa7ee-538b-4ae5-bd82-2a9503fea1d6");
104 UUID uuidMikle85 = UUID.fromString("994403c4-c400-413d-9a1a-8531a40bfd8c");
105
106 Reference<?> mikle77 = app.getReferenceService().find(uuidMikle77);
107 Reference<?> mikle85 = app.getReferenceService().find(uuidMikle85);
108
109
110 Feature altitudeFeature = (Feature) app.getTermService().find(uuidAltitudeFeature);
111 if (altitudeFeature == null){
112 // altitudeFeature = Feature.NewInstance("Altitude", "Altitude", "alt.");
113 // altitudeFeature.setUuid(uuidAltitudeFeature);
114 // featureVoc = app.getVocabularyService().find(UUID.fromString("b187d555-f06f-4d65-9e53-da7c93f8eaa8"));
115 // featureVoc.addTerm(altitudeFeature);
116 throw new RuntimeException("Could not find altitudinal range feature");
117 }
118
119 MeasurementUnit meter = (MeasurementUnit)app.getTermService().find(UUID.fromString("8bef5055-789c-41e5-bea2-8dc2ea8ecdf6"));
120 // NamedArea cyprus = (NamedArea)app.getTermService().find(UUID.fromString("da4cce9a-439b-4cc4-8073-85dc75bae169"));
121
122 int count =1;
123 for (HashMap<String, String> row : excel){
124 count++;
125 UUID baseUuid = makeUuid(row, "uuid");
126 UUID acceptedUuid = makeUuid(row, "acceptedNameUuid");
127 UUID parentUuid = makeUuid(row, "parentUuid");
128
129 String altitude = row.get("Altitude-kumuliert");
130
131 String altitudeMin = row.get("Min");
132 String altitudeMax = row.get("Max");
133 String acceptedName = row.get("AcceptedName");
134
135
136
137 String source = row.get("Source");
138
139 if (StringUtils.isBlank(altitudeMin)){
140 continue;
141 }
142
143 boolean hasAltitude = false;
144 Reference<?> sourceRef = getSource(source, mikle77, mikle85);
145 Taxon taxon = getTaxon(app, baseUuid, acceptedUuid, parentUuid, acceptedName, count);
146 if (taxon != null){
147 TaxonDescription desc = getDescription(taxon, sourceRef);
148
149 hasAltitude = makeAltitude(altitudeMin, altitudeMax, altitudeFeature, sourceRef, desc, meter, count);
150 // hasAltitude = makeAltitudeOld(altitude, altitudeFeature, sourceRef, desc, meter, count);
151 if (hasAltitude){
152 if(desc.getTaxon() == null){
153 taxon.addDescription(desc);
154 }
155 taxaToSave.add(taxon);
156 }else{
157 logger.warn("HasALtitude is false in " + count);
158 }
159 }else{
160 logger.warn("Taxon not recognized in line " + count);
161 }
162 }
163
164 app.getTaxonService().saveOrUpdate(taxaToSave);
165
166 // tx.setRollbackOnly();
167 app.commitTransaction(tx);
168 }
169
170
171 private Taxon getTaxon(CdmApplicationController app, UUID baseUuid, UUID acceptedUuid, UUID parentUuid, String acceptedName, int row) {
172 TaxonBase<?> base = app.getTaxonService().find(baseUuid);
173 // TaxonBase<?> parent = app.getTaxonService().find(parentUuid);
174
175 //TODO
176 Taxon result = null;
177 if (base.isInstanceOf(Taxon.class)){
178 Taxon t = CdmBase.deproxy(base, Taxon.class);
179 if (t.getTaxonNodes().size() == 1 && t.getTaxonNodes().iterator().next().getClassification().getUuid().equals(classificationUuid)){
180 result = t;
181 }else{
182 logger.warn("Base taxon (uuid) not in classification. Row: " + row + ", Taxon: " + base.getTitleCache());
183 }
184 }
185 if (result == null){
186 TaxonBase<?> accepted = app.getTaxonService().find(acceptedUuid);
187 Taxon t = CdmBase.deproxy(accepted, Taxon.class);;
188 if (t.getTaxonNodes().size() == 1 && t.getTaxonNodes().iterator().next().getClassification().getUuid().equals(classificationUuid)){
189 if (hasSynonym(t, base)){
190 result = t;
191 }else{
192 logger.warn("Synonym relation has changed somehow. Row: " + row + ", Taxon: " + base.getTitleCache());
193 }
194
195 }else{
196 logger.warn("Accepted taxon not in classification. Row: " + row + ", Taxon: " + base.getTitleCache());
197 }
198 }
199
200 if (result != null){
201 if (! result.getName().getTitleCache().equals(acceptedName)){
202 logger.warn("AcceptedName and taxon name is not equal in " + row + ".\n" +
203 " Accepted Name: " + acceptedName + ";\n" +
204 " Taxon Name: " + result.getName().getTitleCache());
205 }
206 }
207
208 return result;
209 }
210
211 private boolean hasSynonym(Taxon t, TaxonBase<?> base) {
212 if (base.isInstanceOf(Synonym.class)){
213 for (SynonymRelationship rel : t.getSynonymRelations()){
214 if (rel.getSynonym().equals(base)){
215 return true;
216 }
217 }
218 }
219 return false;
220 }
221
222 private static final Pattern altitudePattern = Pattern.compile("\\d{1,4}(-\\d{1,4})?");
223
224
225 private boolean makeAltitude(String altitudeMin, String altitudeMax, Feature altitudeFeature,
226 Reference<?> sourceRef, TaxonDescription desc, MeasurementUnit meter, int row) {
227
228 QuantitativeData data = QuantitativeData.NewInstance(altitudeFeature);
229
230 //Meikle
231 if (source != null){
232 TaxonNameBase<?,?> nameUsedInSource = null; //TODO
233 data.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, sourceRef, null, nameUsedInSource, null);
234 }
235 // //Excel //excel source not wanted by Ralf
236 // TaxonNameBase<?,?> nameUsedInSource = null; //TODO probably we don't want this
237 // data.addSource(OriginalSourceType.Import, String.valueOf(row), "row", getSourceReference(), null, nameUsedInSource, null);
238
239 data.setUnit(meter);
240
241 Integer min = Integer.valueOf(altitudeMin);
242 StatisticalMeasurementValue minValue = StatisticalMeasurementValue.NewInstance(StatisticalMeasure.MIN(), min);
243 data.addStatisticalValue(minValue);
244
245 Integer max = Integer.valueOf(altitudeMax);
246 StatisticalMeasurementValue maxValue = StatisticalMeasurementValue.NewInstance(StatisticalMeasure.MAX(), max);
247 data.addStatisticalValue(maxValue);
248
249 desc.addElement(data);
250 return true;
251 }
252
253 private boolean makeAltitudeOld(String altitudeOrig, Feature feature, Reference<?> source, TaxonDescription desc, MeasurementUnit meter, int row) {
254 String altitude = altitudeOrig.trim().replace(" ", "");
255 Matcher matcher = altitudePattern.matcher(altitude);
256
257 if (matcher.matches()){
258 QuantitativeData data = QuantitativeData.NewInstance(feature);
259
260 //Meikle
261 if (source != null){
262 TaxonNameBase<?,?> nameUsedInSource = null; //TODO
263 data.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, source, null, nameUsedInSource, null);
264 }
265 //Excel
266 TaxonNameBase<?,?> nameUsedInSource = null; //TODO probably we don't want this
267 data.addSource(OriginalSourceType.Import, String.valueOf(row), "row", getSourceReference(), null, nameUsedInSource, null);
268 data.setUnit(meter);
269
270 String[] split = altitude.split("-");
271
272 Integer min = Integer.valueOf(split[0]);
273 StatisticalMeasurementValue minValue = StatisticalMeasurementValue.NewInstance(StatisticalMeasure.MIN(), min);
274 data.addStatisticalValue(minValue);
275
276 if (split.length > 1){
277 Integer max = Integer.valueOf(split[1]);
278 StatisticalMeasurementValue maxValue = StatisticalMeasurementValue.NewInstance(StatisticalMeasure.MAX(), max);
279 data.addStatisticalValue(maxValue);
280 }
281 desc.addElement(data);
282 return true;
283 }else{
284 logger.warn("Altitude does not match in row " + row + ": " + altitudeOrig);
285 return false;
286 }
287 }
288
289 private TaxonDescription getDescription(Taxon taxon, Reference<?> sourceRef) {
290 if (taxon != null){
291 //TODO Mikle existiert derzeit nicht also Source
292
293 TaxonDescription desc = TaxonDescription.NewInstance();
294 desc.setTitleCache("Import from " + getSourceReference().getTitleCache(), true);
295 desc.addSource(OriginalSourceType.PrimaryTaxonomicSource, null, null, sourceRef,null);
296 desc.addSource(OriginalSourceType.Import, null, null, getSourceReference(), null);
297
298 return desc;
299 }
300 return null;
301 }
302
303 private Reference<?> getSource(String source, Reference<?> m77, Reference<?> m85) {
304 if(StringUtils.isNotBlank(source)){
305 if (source.equals("Meikle 1977")){
306 return m77;
307 }else if (source.equals("Meikle 1985")){
308 return m85;
309 }else{
310 logger.warn("Source not recognized: " + source);
311 }
312 }
313 return null;
314 }
315
316 /**
317 * @param row
318 * @return
319 */
320 private UUID makeUuid(HashMap<String, String> row, String colName) {
321 if (StringUtils.isBlank(row.get(colName))){
322 return null;
323 }else{
324 return UUID.fromString(row.get(colName));
325 }
326 }
327
328 // private void getRowValues(HashMap<String, String> row) {
329 // // TODO Auto-generated method stub
330 // HashMap<String, Object> = new HashM
331 // row
332 //
333 //
334 // }
335
336
337 Reference<?> sourceReference;
338 private Reference<?> getSourceReference() {
339 if (sourceReference == null){
340 sourceReference = ReferenceFactory.newGeneric();
341 sourceReference.setTitleCache(sourceReferenceTitle, true);
342
343 }
344 return sourceReference;
345
346 }
347
348
349 //Cyprus
350 public static URI cyprus_altitude() {
351 URI sourceUrl;
352 try {
353 sourceUrl = new URI("file:/F:/data/cyprus/Cyprus-altitude-import-neu.xls");
354 // sourceUrl = new URI("file:/F:/data/cyprus/Zypern-Altitude.xls");
355 return sourceUrl;
356 } catch (URISyntaxException e) {
357 e.printStackTrace();
358 return null;
359 }
360 }
361
362
363 /**
364 * @param args
365 */
366 public static void main(String[] args) {
367 CyprusAltitudeActivator me = new CyprusAltitudeActivator();
368 me.doImport(cdmDestination);
369 me.testMatcher();
370 }
371
372 private void testMatcher() {
373 // makeAltitude("0-4400", null, null);
374
375 }
376
377 }