Revision 8ef74e91
Added by Andreas Müller over 5 years ago
app-import/src/main/java/eu/etaxonomy/cdm/io/berlinModel/in/BerlinModelCommonNamesImport.java | ||
---|---|---|
24 | 24 |
import org.apache.commons.lang.StringUtils; |
25 | 25 |
import org.apache.log4j.Logger; |
26 | 26 |
import org.springframework.stereotype.Component; |
27 |
import org.springframework.transaction.TransactionStatus; |
|
27 | 28 |
|
28 | 29 |
import eu.etaxonomy.cdm.common.CdmUtils; |
29 | 30 |
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer; |
... | ... | |
31 | 32 |
import eu.etaxonomy.cdm.io.common.IOValidator; |
32 | 33 |
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner; |
33 | 34 |
import eu.etaxonomy.cdm.io.common.Source; |
34 |
import eu.etaxonomy.cdm.io.common.TdwgAreaProvider; |
|
35 | 35 |
import eu.etaxonomy.cdm.model.common.CdmBase; |
36 | 36 |
import eu.etaxonomy.cdm.model.common.Extension; |
37 | 37 |
import eu.etaxonomy.cdm.model.common.ExtensionType; |
38 |
import eu.etaxonomy.cdm.model.common.IdentifiableSource; |
|
38 | 39 |
import eu.etaxonomy.cdm.model.common.Language; |
39 | 40 |
import eu.etaxonomy.cdm.model.common.Marker; |
40 | 41 |
import eu.etaxonomy.cdm.model.common.MarkerType; |
42 |
import eu.etaxonomy.cdm.model.common.OrderedTermVocabulary; |
|
41 | 43 |
import eu.etaxonomy.cdm.model.common.Representation; |
42 | 44 |
import eu.etaxonomy.cdm.model.common.TermVocabulary; |
43 | 45 |
import eu.etaxonomy.cdm.model.description.CommonTaxonName; |
44 | 46 |
import eu.etaxonomy.cdm.model.description.DescriptionElementSource; |
45 | 47 |
import eu.etaxonomy.cdm.model.description.TaxonDescription; |
46 |
import eu.etaxonomy.cdm.model.location.Country; |
|
47 | 48 |
import eu.etaxonomy.cdm.model.location.NamedArea; |
48 | 49 |
import eu.etaxonomy.cdm.model.name.TaxonName; |
49 | 50 |
import eu.etaxonomy.cdm.model.reference.Reference; |
... | ... | |
74 | 75 |
|
75 | 76 |
|
76 | 77 |
//map that stores the regions (named areas) and makes them accessible via the regionFk |
77 |
private Map<String, NamedArea> regionMap = new HashMap<>(); |
|
78 |
private Map<String, NamedArea> regionFkToAreaMap = new HashMap<>();
|
|
78 | 79 |
|
79 | 80 |
public BerlinModelCommonNamesImport(){ |
80 | 81 |
super(dbTableName, pluralString); |
... | ... | |
141 | 142 |
*/ |
142 | 143 |
private void makeRegions(BerlinModelImportState state) { |
143 | 144 |
try { |
144 |
SortedSet<Integer> regionFks = new TreeSet<>(); |
|
145 |
TransactionStatus tx = startTransaction(); |
|
146 |
SortedSet<Integer> regionFks = new TreeSet<>(); |
|
145 | 147 |
Source source = state.getConfig().getSource(); |
146 | 148 |
|
147 | 149 |
//fill set with all regionFk from emCommonName.regionFks |
148 |
getRegionFks(state, regionFks, source);
|
|
150 |
fillRegionFks(state, regionFks, source);
|
|
149 | 151 |
//concat filter string |
150 | 152 |
String sqlWhere = getSqlWhere(regionFks); |
151 | 153 |
|
152 | 154 |
//get E+M - TDWG Mapping |
153 |
Map<String, String> emTdwgMap = getEmTdwgMap(source); |
|
155 |
// Map<String, String> emTdwgMap = getEmTdwgMap(source); |
|
156 |
Map<String, NamedArea> emCodeToAreaMap = getEmCodeToAreaMap(source); |
|
154 | 157 |
//fill regionMap |
155 |
fillRegionMap(state, sqlWhere, emTdwgMap); |
|
158 |
fillRegionMap(state, sqlWhere, emCodeToAreaMap); |
|
159 |
|
|
160 |
commitTransaction(tx); |
|
156 | 161 |
|
157 | 162 |
return; |
158 | 163 |
} catch (NumberFormatException e) { |
... | ... | |
167 | 172 |
} |
168 | 173 |
|
169 | 174 |
|
170 |
@Override
|
|
175 |
@Override
|
|
171 | 176 |
public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state) { |
172 | 177 |
boolean success = true ; |
173 | 178 |
|
... | ... | |
269 | 274 |
} |
270 | 275 |
commonTaxonNames.add(commonTaxonName); |
271 | 276 |
regionFk = regionFk.trim(); |
272 |
NamedArea area = regionMap.get(regionFk); |
|
277 |
NamedArea area = regionFkToAreaMap.get(regionFk);
|
|
273 | 278 |
if (area == null){ |
274 | 279 |
if (isNotBlank(regionFk) && regionFk != NO_REGION){ |
275 | 280 |
logger.warn("Area for " + regionFk + " not defined in regionMap."); |
... | ... | |
311 | 316 |
commonTaxonName.addSource(source); |
312 | 317 |
} |
313 | 318 |
|
314 |
|
|
315 | 319 |
//MisNameRef |
316 | 320 |
if (misNameRefFk != null){ |
317 | 321 |
//Taxon misappliedName = getMisappliedName(biblioRefMap, nomRefMap, misNameRefFk, taxon); |
... | ... | |
515 | 519 |
* @throws SQLException |
516 | 520 |
* |
517 | 521 |
*/ |
518 |
private void getRegionFks(BerlinModelImportState state, SortedSet<Integer> regionFks, Source source) throws SQLException { |
|
519 |
String sql = " SELECT DISTINCT RegionFks FROM emCommonName"; |
|
522 |
private void fillRegionFks(BerlinModelImportState state, SortedSet<Integer> regionFks, |
|
523 |
Source source) throws SQLException { |
|
524 |
String sql = |
|
525 |
" SELECT DISTINCT RegionFks " |
|
526 |
+ " FROM emCommonName"; |
|
520 | 527 |
if (state.getConfig().getCommonNameFilter() != null){ |
521 | 528 |
sql += " WHERE " + state.getConfig().getCommonNameFilter(); |
522 | 529 |
} |
... | ... | |
553 | 560 |
* @throws SQLException |
554 | 561 |
*/ |
555 | 562 |
private void fillRegionMap(BerlinModelImportState state, String sqlWhere, |
556 |
Map<String, String> emTdwgMap) throws SQLException { |
|
557 |
Source source = state.getConfig().getSource(); |
|
558 |
String sql; |
|
559 |
ResultSet rs; |
|
560 |
sql = " SELECT RegionId, Region FROM emLanguageRegion WHERE RegionId IN ("+ sqlWhere+ ") "; |
|
561 |
rs = source.getResultSet(sql); |
|
563 |
Map<String, NamedArea> emCodeToAreaMap) throws SQLException { |
|
564 |
|
|
565 |
Source source = state.getConfig().getSource(); |
|
566 |
String sql = |
|
567 |
" SELECT RegionId, Region " |
|
568 |
+ " FROM emLanguageRegion " |
|
569 |
+ " WHERE RegionId IN ("+ sqlWhere+ ") "; |
|
570 |
ResultSet rs = source.getResultSet(sql); |
|
562 | 571 |
while (rs.next()){ |
563 | 572 |
Object regionId = rs.getObject("RegionId"); |
564 | 573 |
String region = rs.getString("Region"); |
... | ... | |
566 | 575 |
if (splitRegion.length <= 1){ |
567 | 576 |
NamedArea newArea = getNamedArea(state, null, region, "Language region '" + region + "'", null, null, null); |
568 | 577 |
// getTermService().save(newArea); |
569 |
regionMap.put(String.valueOf(regionId), newArea); |
|
570 |
logger.info("Found new area: " + region);
|
|
578 |
regionFkToAreaMap.put(String.valueOf(regionId), newArea);
|
|
579 |
logger.warn("Found new area: " + region);
|
|
571 | 580 |
}else if (splitRegion.length == 2){ |
572 |
String emCode = splitRegion[1].trim(); |
|
573 |
String tdwgCode = emTdwgMap.get(emCode); |
|
574 |
if (isNotBlank(tdwgCode) ){ |
|
575 |
NamedArea tdwgArea = getNamedArea(state, tdwgCode); |
|
576 |
regionMap.put(String.valueOf(regionId), tdwgArea); |
|
577 |
}else { |
|
578 |
NamedArea area = getOtherAreas(state, emCode, tdwgCode); |
|
579 |
if (area != null){ |
|
580 |
regionMap.put(String.valueOf(regionId), area); |
|
581 |
}else{ |
|
582 |
logger.warn("emCode did not map to valid tdwgCode: " + CdmUtils.Nz(emCode) + "->" + CdmUtils.Nz(tdwgCode)); |
|
583 |
} |
|
581 |
String emCode = splitRegion[1].trim().replace(" ", ""); |
|
582 |
|
|
583 |
NamedArea area = emCodeToAreaMap.get(emCode); |
|
584 |
if (area == null){ |
|
585 |
String[] splits = emCode.split("/"); |
|
586 |
if (splits.length == 2){ |
|
587 |
area = emCodeToAreaMap.get(splits[0]); |
|
588 |
} |
|
589 |
if (area != null){ |
|
590 |
logger.warn("emCode ambigous. Use larger area: " + CdmUtils.Nz(emCode) + "->" + regionId); |
|
591 |
}else{ |
|
592 |
logger.warn("emCode not recognized. Region not defined: " + CdmUtils.Nz(emCode) + "->" + regionId); |
|
593 |
} |
|
594 |
} |
|
595 |
if (area != null){ |
|
596 |
regionFkToAreaMap.put(String.valueOf(regionId), area); |
|
584 | 597 |
} |
585 | 598 |
} |
586 | 599 |
} |
587 | 600 |
} |
588 | 601 |
|
589 |
|
|
590 |
/** |
|
591 |
* Returns the are for a given TDWG code. See {@link #getEmTdwgMap(Source)} for exceptions from |
|
592 |
* the TDWG code |
|
593 |
* @param state |
|
594 |
* @param tdwgCode |
|
595 |
*/ |
|
596 |
private NamedArea getNamedArea(BerlinModelImportState state, String tdwgCode) { |
|
597 |
NamedArea area; |
|
598 |
if (tdwgCode.equalsIgnoreCase("Ab")){ |
|
599 |
area = getNamedArea(state, BerlinModelTransformer.uuidAb, "Azerbaijan & Nakhichevan", "Azerbaijan (including Nakhichevan)", "Ab", null, null); |
|
600 |
getTermService().saveOrUpdate(area); |
|
601 |
}else if (tdwgCode.equalsIgnoreCase("Uk")){ |
|
602 |
area = getNamedArea(state, BerlinModelTransformer.uuidUk , "Ukraine & Crimea", "Ukraine (including Crimea)", "Uk", null, null); |
|
603 |
getTermService().saveOrUpdate(area); |
|
604 |
}else if (tdwgCode.equalsIgnoreCase("Rf")){ |
|
605 |
// area = getNamedArea(state, BerlinModelTransformer.uuidRf , "Ukraine & Crimea", "Ukraine (including Crimea)", "Uk", null, null); |
|
606 |
// getTermService().saveOrUpdate(area); |
|
607 |
area = Country.RUSSIANFEDERATION(); |
|
608 |
}else if (tdwgCode.equalsIgnoreCase("Gg")){ |
|
609 |
area = Country.GEORGIA(); |
|
610 |
}else if (tdwgCode.equalsIgnoreCase("SM")){ |
|
611 |
area = getNamedArea(state, BerlinModelTransformer.uuidSM , "Serbia & Montenegro", "Serbia & Montenegro", "SM", null, null); |
|
612 |
getTermService().saveOrUpdate(area); |
|
613 |
}else if (tdwgCode.equalsIgnoreCase("Tu")){ |
|
614 |
area = Country.TURKEYREPUBLICOF(); |
|
615 |
}else{ |
|
616 |
area = TdwgAreaProvider.getAreaByTdwgAbbreviation(tdwgCode); |
|
617 |
} |
|
618 |
if (area == null){ |
|
619 |
logger.warn("Area is null for " + tdwgCode); |
|
620 |
} |
|
621 |
return area; |
|
622 |
} |
|
623 |
|
|
624 | 602 |
/** |
625 | 603 |
* @param regionFks |
626 | 604 |
* @return |
... | ... | |
634 | 612 |
return sqlWhere; |
635 | 613 |
} |
636 | 614 |
|
637 |
/** |
|
638 |
* Returns a map which is filled by the emCode->TdwgCode mapping defined in emArea. |
|
639 |
* Some exceptions are defined for emCode 'Ab','Rf','Uk' and some additional mapping is added |
|
640 |
* for 'Ab / Ab(A)', 'Ga / Ga(F)', 'It / It(I)', 'Ar / Ar(A)','Hs / Hs(S)' |
|
641 |
* @param source |
|
642 |
* @throws SQLException |
|
643 |
*/ |
|
644 |
private Map<String, String> getEmTdwgMap(Source source) throws SQLException { |
|
645 |
String sql; |
|
646 |
ResultSet rs; |
|
647 |
Map<String, String> emTdwgMap = new HashMap<>(); |
|
648 |
sql = " SELECT EmCode, TDWGCode FROM emArea "; |
|
649 |
rs = source.getResultSet(sql); |
|
650 |
while (rs.next()){ |
|
651 |
String emCode = rs.getString("EMCode"); |
|
652 |
String TDWGCode = rs.getString("TDWGCode"); |
|
653 |
if (isNotBlank(emCode) ){ |
|
654 |
emCode = emCode.trim(); |
|
655 |
if (emCode.equalsIgnoreCase("Ab") || emCode.equalsIgnoreCase("Rf")|| |
|
656 |
emCode.equalsIgnoreCase("Uk") || emCode.equalsIgnoreCase("Gg") |
|
657 |
|| emCode.equalsIgnoreCase("SM") || emCode.equalsIgnoreCase("Tu")){ |
|
658 |
emTdwgMap.put(emCode, emCode); |
|
659 |
}else if (isNotBlank(TDWGCode)){ |
|
660 |
emTdwgMap.put(emCode, TDWGCode.trim()); |
|
661 |
} |
|
662 |
} |
|
663 |
} |
|
664 |
emTdwgMap.put("Ab / Ab(A)", "Ab"); |
|
665 |
emTdwgMap.put("Ga / Ga(F)", "FRA-FR"); |
|
666 |
emTdwgMap.put("It / It(I)", "ITA"); |
|
667 |
emTdwgMap.put("Uk / Uk(U)", "Uk"); |
|
668 |
emTdwgMap.put("Ar / Ar(A)", "TCS-AR"); |
|
669 |
emTdwgMap.put("Hs / Hs(S)", "SPA-SP"); |
|
670 |
emTdwgMap.put("Hb / Hb(E)", "IRE-IR"); |
|
671 |
|
|
672 |
return emTdwgMap; |
|
673 |
} |
|
615 |
// /** |
|
616 |
// * Returns a map which is filled by the emCode->TdwgCode mapping defined in emArea. |
|
617 |
// * Some exceptions are defined for emCode 'Ab','Rf','Uk' and some additional mapping is added |
|
618 |
// * for 'Ab / Ab(A)', 'Ga / Ga(F)', 'It / It(I)', 'Ar / Ar(A)','Hs / Hs(S)' |
|
619 |
// * @param source |
|
620 |
// * @throws SQLException |
|
621 |
// */ |
|
622 |
// private Map<String, String> getEmTdwgMap(Source source) throws SQLException { |
|
623 |
// |
|
624 |
// Map<String, String> emTdwgMap = new HashMap<>(); |
|
625 |
// String sql = " SELECT EmCode, TDWGCode " |
|
626 |
// + " FROM emArea "; |
|
627 |
// ResultSet rs = source.getResultSet(sql); |
|
628 |
// while (rs.next()){ |
|
629 |
// String emCode = rs.getString("EMCode"); |
|
630 |
// String TDWGCode = rs.getString("TDWGCode"); |
|
631 |
// if (isNotBlank(emCode) ){ |
|
632 |
// emCode = emCode.trim(); |
|
633 |
// if (emCode.equalsIgnoreCase("Ab") || emCode.equalsIgnoreCase("Rf")|| |
|
634 |
// emCode.equalsIgnoreCase("Uk") || emCode.equalsIgnoreCase("Gg") |
|
635 |
// || emCode.equalsIgnoreCase("SM") || emCode.equalsIgnoreCase("Tu")){ |
|
636 |
// emTdwgMap.put(emCode, emCode); |
|
637 |
// }else if (isNotBlank(TDWGCode)){ |
|
638 |
// emTdwgMap.put(emCode, TDWGCode.trim()); |
|
639 |
// } |
|
640 |
// } |
|
641 |
// } |
|
642 |
// emTdwgMap.put("Ab / Ab(A)", "Ab"); |
|
643 |
// emTdwgMap.put("Ga / Ga(F)", "FRA-FR"); |
|
644 |
// emTdwgMap.put("It / It(I)", "ITA"); |
|
645 |
// emTdwgMap.put("Uk / Uk(U)", "Uk"); |
|
646 |
// emTdwgMap.put("Ar / Ar(A)", "TCS-AR"); |
|
647 |
// emTdwgMap.put("Hs / Hs(S)", "SPA-SP"); |
|
648 |
// emTdwgMap.put("Hb / Hb(E)", "IRE-IR"); |
|
649 |
// |
|
650 |
// return emTdwgMap; |
|
651 |
// } |
|
652 |
|
|
653 |
|
|
654 |
|
|
655 |
/** |
|
656 |
* @param source |
|
657 |
* @return |
|
658 |
* @throws SQLException |
|
659 |
*/ |
|
660 |
private Map<String, NamedArea> getEmCodeToAreaMap(Source source) throws SQLException { |
|
661 |
Map<String, NamedArea> emCodeToAreaMap = new HashMap<>(); |
|
662 |
String sql = |
|
663 |
" SELECT EmCode, AreaId " |
|
664 |
+ " FROM emArea "; |
|
665 |
ResultSet rs = source.getResultSet(sql); |
|
666 |
while (rs.next()){ |
|
667 |
|
|
668 |
String emCode = rs.getString("EMCode"); |
|
669 |
if (isNotBlank(emCode)){ |
|
670 |
Integer areaId = rs.getInt("AreaId"); |
|
671 |
NamedArea area = getAreaByAreaId(areaId); |
|
672 |
if (area != null){ |
|
673 |
emCodeToAreaMap.put(emCode.trim(), area); |
|
674 |
}else{ |
|
675 |
logger.warn("Area not found for areaId " + areaId); |
|
676 |
} |
|
677 |
} |
|
678 |
|
|
679 |
} |
|
680 |
|
|
681 |
// emTdwgMap.put("Ab / Ab(A)", "Ab"); |
|
682 |
|
|
683 |
return emCodeToAreaMap; |
|
684 |
} |
|
685 |
|
|
686 |
/** |
|
687 |
* @param emCode |
|
688 |
* @return |
|
689 |
*/ |
|
690 |
private NamedArea getAreaByAreaId(int areaId) { |
|
691 |
NamedArea result = null; |
|
692 |
String areaIdStr = String.valueOf(areaId); |
|
693 |
OrderedTermVocabulary<NamedArea> voc = getAreaVoc(); |
|
694 |
getVocabularyService().update(voc); |
|
695 |
for (NamedArea area : voc.getTerms()){ |
|
696 |
for (IdentifiableSource source : area.getSources()){ |
|
697 |
if (areaIdStr.equals(source.getIdInSource()) && BerlinModelAreaImport.NAMESPACE.equals(source.getIdNamespace())){ |
|
698 |
if (result != null){ |
|
699 |
logger.warn("Result for areaId already exists. areaId: " + areaId); |
|
700 |
} |
|
701 |
result = area; |
|
702 |
} |
|
703 |
} |
|
704 |
} |
|
705 |
return result; |
|
706 |
} |
|
707 |
|
|
708 |
private OrderedTermVocabulary<NamedArea> areaVoc; |
|
709 |
@SuppressWarnings("unchecked") |
|
710 |
private OrderedTermVocabulary<NamedArea> getAreaVoc(){ |
|
711 |
if (areaVoc == null){ |
|
712 |
areaVoc = (OrderedTermVocabulary<NamedArea>)getVocabularyService().find(BerlinModelTransformer.uuidVocEuroMedAreas); |
|
713 |
} |
|
714 |
return areaVoc; |
|
715 |
} |
|
674 | 716 |
|
675 | 717 |
|
676 | 718 |
/** |
... | ... | |
739 | 781 |
result.put(nameSpace, referenceMap); |
740 | 782 |
// TODO remove if problem with duplicate DescElement_Annot id is solved |
741 | 783 |
} catch (SQLException e) { |
742 |
throw new RuntimeException("pos: " +pos, e); |
|
784 |
throw new RuntimeException("pos: " + pos, e);
|
|
743 | 785 |
} catch (NullPointerException nep){ |
744 | 786 |
logger.error("NullPointerException in getRelatedObjectsForPartition()"); |
745 | 787 |
} |
Also available in: Unified diff
ref #7346 use E+M area vocabulary for common names