update Globis import
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / globis / GlobisSpecTaxImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.globis;
11
12 import java.sql.ResultSet;
13 import java.sql.SQLException;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.Map;
17 import java.util.Set;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20
21 import org.apache.commons.lang.StringUtils;
22 import org.apache.log4j.Logger;
23 import org.springframework.stereotype.Component;
24
25 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade;
26 import eu.etaxonomy.cdm.api.facade.DerivedUnitFacade.DerivedUnitType;
27 import eu.etaxonomy.cdm.common.CdmUtils;
28 import eu.etaxonomy.cdm.io.common.IImportConfigurator;
29 import eu.etaxonomy.cdm.io.common.IOValidator;
30 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
31 import eu.etaxonomy.cdm.io.common.mapping.IMappingImport;
32 import eu.etaxonomy.cdm.io.globis.validation.GlobisReferenceImportValidator;
33 import eu.etaxonomy.cdm.io.globis.validation.GlobisSpecTaxaImportValidator;
34 import eu.etaxonomy.cdm.model.common.CdmBase;
35 import eu.etaxonomy.cdm.model.common.Extension;
36 import eu.etaxonomy.cdm.model.common.Marker;
37 import eu.etaxonomy.cdm.model.common.MarkerType;
38 import eu.etaxonomy.cdm.model.location.WaterbodyOrCountry;
39 import eu.etaxonomy.cdm.model.name.NomenclaturalCode;
40 import eu.etaxonomy.cdm.model.name.Rank;
41 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignation;
42 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationStatus;
43 import eu.etaxonomy.cdm.model.name.SpecimenTypeDesignationTest;
44 import eu.etaxonomy.cdm.model.name.ZoologicalName;
45 import eu.etaxonomy.cdm.model.occurrence.Collection;
46 import eu.etaxonomy.cdm.model.occurrence.DerivationEvent;
47 import eu.etaxonomy.cdm.model.occurrence.DerivationEventType;
48 import eu.etaxonomy.cdm.model.occurrence.DerivedUnitBase;
49 import eu.etaxonomy.cdm.model.occurrence.FieldObservation;
50 import eu.etaxonomy.cdm.model.occurrence.Specimen;
51 import eu.etaxonomy.cdm.model.reference.Reference;
52 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
53 import eu.etaxonomy.cdm.model.reference.ReferenceType;
54 import eu.etaxonomy.cdm.model.taxon.Synonym;
55 import eu.etaxonomy.cdm.model.taxon.SynonymRelationshipType;
56 import eu.etaxonomy.cdm.model.taxon.Taxon;
57 import eu.etaxonomy.cdm.model.taxon.TaxonBase;
58 import eu.etaxonomy.cdm.strategy.exceptions.UnknownCdmTypeException;
59
60
61 /**
62 * @author a.mueller
63 * @created 20.02.2010
64 * @version 1.0
65 */
66 @Component
67 public class GlobisSpecTaxImport extends GlobisImportBase<Reference> implements IMappingImport<Reference, GlobisImportState>{
68 private static final Logger logger = Logger.getLogger(GlobisSpecTaxImport.class);
69
70 private int modCount = 10000;
71 private static final String pluralString = "taxa";
72 private static final String dbTableName = "specTax";
73 private static final Class cdmTargetClass = Reference.class;
74
75 public GlobisSpecTaxImport(){
76 super(pluralString, dbTableName, cdmTargetClass);
77 }
78
79
80
81
82 /* (non-Javadoc)
83 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#getIdQuery()
84 */
85 @Override
86 protected String getIdQuery() {
87 String strRecordQuery =
88 " SELECT specTaxId " +
89 " FROM " + dbTableName;
90 return strRecordQuery;
91 }
92
93
94
95
96 /* (non-Javadoc)
97 * @see eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportBase#getRecordQuery(eu.etaxonomy.cdm.io.berlinModel.in.BerlinModelImportConfigurator)
98 */
99 @Override
100 protected String getRecordQuery(GlobisImportConfigurator config) {
101 String strRecordQuery =
102 " SELECT t.*, t.DateCreated as Created_When, t.CreatedBy as Created_Who," +
103 " t.ModifiedBy as Updated_who, t.DateModified as Updated_When, t.SpecRemarks as Notes " +
104 " FROM " + getTableName() + " t " +
105 " WHERE ( t.specTaxId IN (" + ID_LIST_TOKEN + ") )";
106 return strRecordQuery;
107 }
108
109
110
111 /* (non-Javadoc)
112 * @see eu.etaxonomy.cdm.io.globis.GlobisImportBase#doPartition(eu.etaxonomy.cdm.io.common.ResultSetPartitioner, eu.etaxonomy.cdm.io.globis.GlobisImportState)
113 */
114 @Override
115 public boolean doPartition(ResultSetPartitioner partitioner, GlobisImportState state) {
116 boolean success = true;
117
118 Set<TaxonBase> objectsToSave = new HashSet<TaxonBase>();
119
120 Map<String, Taxon> taxonMap = (Map<String, Taxon>) partitioner.getObjectMap(TAXON_NAMESPACE);
121 Map<String, Reference> referenceMap = (Map<String, Reference>) partitioner.getObjectMap(REFERENCE_NAMESPACE);
122
123 ResultSet rs = partitioner.getResultSet();
124
125 try {
126
127 int i = 0;
128
129 //for each reference
130 while (rs.next()){
131
132 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info(pluralString + " handled: " + (i-1));}
133
134 Integer specTaxId = rs.getInt("SpecTaxId");
135 Integer acceptedTaxonId = nullSafeInt(rs, "SpecCurrspecID");
136 String specSystaxRank = rs.getString("SpecSystaxRank");
137
138 try {
139
140 //source ref
141 Reference<?> sourceRef = state.getTransactionalSourceReference();
142
143 Taxon acceptedTaxon = taxonMap.get(String.valueOf(acceptedTaxonId));
144 TaxonBase<?> thisTaxon = null;
145
146 if (isBlank(specSystaxRank) ){
147 //TODO
148 }else if (specSystaxRank.equals("synonym")){
149 Synonym synonym = getSynonym(state, rs);
150 if (acceptedTaxon == null){
151 //TODO
152 logger.warn("Accepted taxon (" + acceptedTaxonId + ") not found for synonym "+ specTaxId);
153 }else{
154 acceptedTaxon.addSynonym(synonym, SynonymRelationshipType.SYNONYM_OF());
155 thisTaxon = synonym;
156 }
157 }else if (specSystaxRank.equals("species")){
158 validateAcceptedTaxon(acceptedTaxon, rs, specTaxId, acceptedTaxonId);
159 thisTaxon = acceptedTaxon;
160 }else{
161 logger.warn(String.format("Unhandled specSystaxRank %s in specTaxId %d", specSystaxRank, specTaxId));
162 }
163
164 if (thisTaxon != null){
165 ZoologicalName name = CdmBase.deproxy(thisTaxon.getName(), ZoologicalName.class);
166
167 handleNomRef(state, referenceMap, rs, name);
168
169 handleTypeInformation(state,rs, name);
170
171
172 // this.doIdCreatedUpdatedNotes(state, ref, rs, refId, REFERENCE_NAMESPACE);
173
174 objectsToSave.add(acceptedTaxon);
175 }
176
177
178 } catch (Exception e) {
179 logger.warn("Exception in specTax: SpecTaxId " + specTaxId + ". " + e.getMessage());
180 e.printStackTrace();
181 }
182
183 }
184
185 // logger.warn("Specimen: " + countSpecimen + ", Descriptions: " + countDescriptions );
186
187 logger.warn(pluralString + " to save: " + objectsToSave.size());
188 getTaxonService().save(objectsToSave);
189
190 return success;
191 } catch (Exception e) {
192 logger.error("Exception: " + e);
193 return false;
194 }
195 }
196
197
198 private Pattern patternAll = Pattern.compile("(.+,\\s.+)(\\(.+\\))");
199
200
201 private void handleTypeInformation(GlobisImportState state, ResultSet rs, ZoologicalName name) throws SQLException {
202
203 String specTypeDepositoriesStr = rs.getString("SpecTypeDepository");
204 String countryString = rs.getString("SpecTypeCountry");
205
206 if (! hasTypeInformation(specTypeDepositoriesStr, countryString)){
207 return;
208 }
209
210 FieldObservation fieldObservation = makeTypeFieldObservation(state, countryString);
211
212 String[] specTypeDepositories = specTypeDepositoriesStr.split(";");
213 //TODO different issues
214 if (specTypeDepositories.length == 0){
215 //TODO
216 }
217 for (String specTypeDepositoryStr : specTypeDepositories){
218 specTypeDepositoryStr = specTypeDepositoryStr.trim();
219
220 //Specimen
221 Specimen specimen = makeSingleTypeSpecimen(fieldObservation);
222
223 if (specTypeDepositoryStr.equals("??")){
224 //unknown
225 //TODO
226 specimen.setTitleCache("??", true);
227 }else{
228 specTypeDepositoryStr = makeAdditionalSpecimenInformation(
229 specTypeDepositoryStr, specimen);
230
231 makeCollection(specTypeDepositoryStr, specimen);
232 }
233
234 //type Designation
235 makeTypeDesignation(name, rs, specimen);
236 }
237
238 }
239
240
241
242
243 private boolean hasTypeInformation(String specTypeDepositoriesStr, String countryString) {
244 boolean result = false;
245 result |= isNotBlank(specTypeDepositoriesStr) || isNotBlank(countryString);
246 return result;
247 }
248
249
250
251 /**
252 * @param specTypeDepositoryStr
253 * @param specimen
254 */
255 protected void makeCollection(String specTypeDepositoryStr, Specimen specimen) {
256 //TODO deduplicate
257 Map<String, Collection> collectionMap = new HashMap<String, Collection>();
258
259
260 //Collection
261 String[] split = specTypeDepositoryStr.split(",");
262 if (split.length != 2){
263 if (split.length == 1 && split[0].startsWith("coll.")){
264 Collection collection = Collection.NewInstance();
265 collection.setName(split[0]);
266 }else{
267 logger.warn("Split size is not 2: " + specTypeDepositoryStr);
268 }
269
270 }else{
271 String collectionStr = split[0];
272 String location = split[1];
273
274
275 Collection collection = collectionMap.get(collectionStr);
276 if (collection == null){
277 collection = Collection.NewInstance();
278 collection.setCode(collectionStr);
279 collection.setTownOrLocation(split[1]);
280 }else if (CdmUtils.nullSafeEqual(location, collection.getTownOrLocation())){
281 String message = "Location (%s) is not equal to location (%s) of existing collection";
282 logger.warn(String.format(message, location, collection.getTownOrLocation(), collection.getCode()));
283 }
284
285 specimen.setCollection(collection);
286
287 }
288 }
289
290
291
292
293 /**
294 * @param specTypeDepositoriesStr
295 * @param specTypeDepositoryStr
296 * @param specimen
297 * @return
298 */
299 protected String makeAdditionalSpecimenInformation( String specTypeDepositoryStr,
300 Specimen specimen) {
301 //doubful
302 if (specTypeDepositoryStr.endsWith("?")){
303 Marker.NewInstance(specimen, true, MarkerType.IS_DOUBTFUL());
304 specTypeDepositoryStr = specTypeDepositoryStr.substring(0, specTypeDepositoryStr.length() -1).trim();
305 }
306
307 //brackets
308 Matcher matcher = patternAll.matcher(specTypeDepositoryStr);
309 if (matcher.find()){
310 //has brackets
311 String brackets = matcher.group(2);
312 brackets = brackets.substring(1, brackets.length()-1);
313
314 brackets = brackets.replace("[mm]", "\u2642\u2642");
315 brackets = brackets.replace("[m]", "\u2642");
316 brackets = brackets.replace("[ff]", "\u2640\u2640");
317 brackets = brackets.replace("[f]", "\u2640");
318
319 if (brackets.contains("[") || brackets.contains("]")){
320 logger.warn ("There are still '[', ']' in the bracket part: " + brackets);
321 }
322
323 //TODO replace mm/ff by Unicode male
324 specimen.setTitleCache(brackets, true);
325 specTypeDepositoryStr = matcher.group(1).trim();
326 }
327 return specTypeDepositoryStr;
328 }
329
330
331
332
333 /**
334 * @param fieldObservation
335 * @return
336 */
337 protected Specimen makeSingleTypeSpecimen(FieldObservation fieldObservation) {
338 DerivationEvent derivEvent = DerivationEvent.NewInstance();
339 // derivEvent.setType(DerivationEventType.ACCESSIONING());
340 fieldObservation.addDerivationEvent(derivEvent);
341 Specimen specimen = Specimen.NewInstance();
342 specimen.setDerivedFrom(derivEvent);
343 return specimen;
344 }
345
346
347
348
349 /**
350 * @param state
351 * @return
352 * @throws SQLException
353 */
354 protected FieldObservation makeTypeFieldObservation(GlobisImportState state,
355 String countryString) throws SQLException {
356
357 DerivedUnitType unitType = DerivedUnitType.Specimen;
358 DerivedUnitFacade facade = DerivedUnitFacade.NewInstance(unitType);
359
360 WaterbodyOrCountry typeCountry = getCountry(state, countryString);
361 facade.setCountry(typeCountry);
362 FieldObservation fieldObservation = facade.innerFieldObservation();
363 return fieldObservation;
364 }
365
366
367
368
369 /**
370 * @param name
371 * @param rs
372 * @param status
373 * @param specimen
374 * @throws SQLException
375 */
376 protected void makeTypeDesignation(ZoologicalName name, ResultSet rs, Specimen specimen) throws SQLException {
377 //type
378 String specType = rs.getString("SpecType");
379 SpecimenTypeDesignationStatus status = getTypeDesigType(specType);
380
381 SpecimenTypeDesignation typeDesignation = SpecimenTypeDesignation.NewInstance();
382 typeDesignation.setTypeStatus(status);
383 typeDesignation.setTypeSpecimen(specimen);
384
385 name.addTypeDesignation(typeDesignation, true);
386 }
387
388
389
390
391 private SpecimenTypeDesignationStatus getTypeDesigType(String specType) {
392 if (isBlank(specType) ){
393 return null;
394 }else if (specType.matches("Holotype(Holotypus)?")){
395 return SpecimenTypeDesignationStatus.HOLOTYPE();
396 }else if (specType.matches("Neotype")){
397 return SpecimenTypeDesignationStatus.NEOTYPE();
398 }else if (specType.matches("Syntype(\\(s\\))?")){
399 return SpecimenTypeDesignationStatus.SYNTYPE();
400 }else if (specType.matches("Lectotype")){
401 return SpecimenTypeDesignationStatus.LECTOTYPE();
402 }else{
403 logger.warn("SpecimenTypeDesignationStatus does not match: " + specType);
404 return null;
405 }
406 }
407
408
409
410
411 /**
412 * @param state
413 * @param referenceMap
414 * @param rs
415 * @param name
416 * @return
417 * @throws SQLException
418 */
419 private Reference<?> handleNomRef(GlobisImportState state, Map<String, Reference> referenceMap, ResultSet rs,
420 ZoologicalName name) throws SQLException {
421 //ref
422 Integer refId = nullSafeInt(rs, "fiSpecRefID");
423 Reference<?> nomRef = null;
424 if (refId != null){
425 nomRef = referenceMap.get(String.valueOf(refId));
426 if (nomRef == null && state.getConfig().getDoReferences().equals(state.getConfig().getDoReferences().ALL)){
427 logger.warn("Reference " + refId + " could not be found.");
428 }else if (nomRef != null){
429 name.setNomenclaturalReference(nomRef);
430 }
431 }
432
433 //refDetail
434 String refDetail = rs.getString("SpecPage");
435 if (isNotBlank(refDetail)){
436 name.setNomenclaturalMicroReference(refDetail);
437 }
438 return nomRef;
439 }
440
441
442
443
444 private void validateAcceptedTaxon(Taxon acceptedTaxon, ResultSet rs, Integer specTaxId, Integer acceptedTaxonId) throws SQLException {
445 if (acceptedTaxon == null){
446 logger.warn("Accepted taxon is null for taxon taxon to validate: ");
447 return;
448 }
449
450 //TODO
451 ZoologicalName name = CdmBase.deproxy(acceptedTaxon.getName(), ZoologicalName.class);
452
453 String specName = rs.getString("SpecName");
454 if (! name.getSpecificEpithet().equals(specName)){
455 logger.warn(String.format("Species epithet is not equal for accepted taxon: %s - %s", name.getSpecificEpithet(), specName));
456 }
457 //TODO
458 }
459
460
461
462
463 private Synonym getSynonym(GlobisImportState state, ResultSet rs) throws SQLException {
464 //rank
465 String rankStr = rs.getString("SpecRank");
466 Rank rank = null;
467 if (isNotBlank(rankStr)){
468 try {
469 rank = Rank.getRankByNameOrAbbreviation(rankStr, NomenclaturalCode.ICZN, true);
470 } catch (UnknownCdmTypeException e) {
471 e.printStackTrace();
472 }
473 }
474
475 //name
476 ZoologicalName name = ZoologicalName.NewInstance(rank);
477 makeNamePartsAndCache(state, rs, rankStr, name);
478
479
480 // name.setGenusOrUninomial(genusOrUninomial);
481 String authorStr = rs.getString("SpecAuthor");
482 String yearStr = rs.getString("SpecYear");
483 String authorAndYearStr = CdmUtils.concat(", ", authorStr, yearStr);
484 handleAuthorAndYear(authorAndYearStr, name);
485
486 Synonym synonym = Synonym.NewInstance(name, state.getTransactionalSourceReference());
487
488 return synonym;
489 }
490
491
492
493
494 private void makeNamePartsAndCache(GlobisImportState state, ResultSet rs, String rank, ZoologicalName name) throws SQLException {
495 String citedFamily = rs.getString("SpecCitedFamily");
496 String citedGenus = rs.getString("SpecCitedGenus");
497 String citedSpecies = rs.getString("SpecCitedSpecies");
498 String citedSubspecies = rs.getString("SpecCitedSubspecies");
499 String lastEpithet = rs.getString("SpecName");
500
501
502 String cache = CdmUtils.concat(" ", new String[]{citedFamily, citedGenus, citedSpecies, citedSubspecies, rank, lastEpithet});
503 name.setGenusOrUninomial(citedGenus);
504 //TODO sperate authors
505 if (isBlank(citedSpecies)){
506 name.setSpecificEpithet(lastEpithet);
507 }else{
508 name.setSpecificEpithet(citedSpecies);
509 if (isBlank(citedSubspecies)){
510 name.setInfraSpecificEpithet(lastEpithet);
511 }
512 }
513
514 //TODO check if cache needs protection
515 name.setNameCache(cache, true);
516 }
517
518
519
520
521 private boolean isInfraSpecies(GlobisImportState state, ResultSet rs, Rank rank) {
522 // TODO Auto-generated method stub
523 return false;
524 }
525
526
527
528
529 private Reference<?> getJournal(GlobisImportState state, ResultSet rs, String refJournal) throws SQLException {
530
531
532 Reference<?> journal = ReferenceFactory.newJournal();
533 String issn = rs.getString("RefISSN");
534 if (StringUtils.isNotBlank(issn)){
535 issn.replaceAll("ISSN", "").trim();
536 journal.setIssn(issn);
537 }
538
539
540
541 //TODO deduplicate
542 journal.setTitle(refJournal);
543 return journal;
544 }
545
546
547
548
549 /* (non-Javadoc)
550 * @see eu.etaxonomy.cdm.io.common.mapping.IMappingImport#createObject(java.sql.ResultSet, eu.etaxonomy.cdm.io.common.ImportStateBase)
551 */
552 public Reference<?> createObject(ResultSet rs, GlobisImportState state)
553 throws SQLException {
554 Reference<?> ref;
555 String refType = rs.getString("RefType");
556 if (refType == null){
557 ref = ReferenceFactory.newGeneric();
558 }else if (refType == "book"){
559 ref = ReferenceFactory.newBook();
560 }else if (refType == "paper in journal"){
561 ref = ReferenceFactory.newArticle();
562 }else if (refType.startsWith("unpublished") ){
563 ref = ReferenceFactory.newGeneric();
564 }else if (refType.endsWith("paper in journal")){
565 ref = ReferenceFactory.newArticle();
566 }else if (refType == "paper in book"){
567 ref = ReferenceFactory.newBookSection();
568 }else if (refType == "paper in journalwebsite"){
569 ref = ReferenceFactory.newArticle();
570 }else{
571 logger.warn("Unknown reference type: " + refType);
572 ref = ReferenceFactory.newGeneric();
573 }
574 return ref;
575 }
576
577 /* (non-Javadoc)
578 * @see eu.etaxonomy.cdm.io.berlinModel.in.IPartitionedIO#getRelatedObjectsForPartition(java.sql.ResultSet)
579 */
580 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs) {
581 String nameSpace;
582 Class cdmClass;
583 Set<String> idSet;
584 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<Object, Map<String, ? extends CdmBase>>();
585 try{
586 Set<String> taxonIdSet = new HashSet<String>();
587 Set<String> referenceIdSet = new HashSet<String>();
588
589 while (rs.next()){
590 handleForeignKey(rs, taxonIdSet, "SpecCurrspecID");
591 handleForeignKey(rs, referenceIdSet, "fiSpecRefID");
592 }
593
594 //taxon map
595 nameSpace = TAXON_NAMESPACE;
596 cdmClass = Taxon.class;
597 idSet = taxonIdSet;
598 Map<String, Taxon> objectMap = (Map<String, Taxon>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
599 result.put(nameSpace, objectMap);
600
601 //reference map
602 nameSpace = REFERENCE_NAMESPACE;
603 cdmClass = Reference.class;
604 idSet = referenceIdSet;
605 Map<String, Reference> referenceMap = (Map<String, Reference>)getCommonService().getSourcedObjectsByIdInSource(cdmClass, idSet, nameSpace);
606 result.put(nameSpace, referenceMap);
607
608
609 } catch (SQLException e) {
610 throw new RuntimeException(e);
611 }
612 return result;
613 }
614
615 /* (non-Javadoc)
616 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
617 */
618 @Override
619 protected boolean doCheck(GlobisImportState state){
620 IOValidator<GlobisImportState> validator = new GlobisSpecTaxaImportValidator();
621 return validator.validate(state);
622 }
623
624
625 /* (non-Javadoc)
626 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
627 */
628 protected boolean isIgnore(GlobisImportState state){
629 return ! state.getConfig().isDoSpecTaxa();
630 }
631
632
633
634
635
636 }