Project

General

Profile

« Previous | Next » 

Revision 8ef74e91

Added by Andreas Müller over 5 years ago

ref #7346 use E+M area vocabulary for common names

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/io/berlinModel/in/BerlinModelCommonNamesImport.java
24 24
import org.apache.commons.lang.StringUtils;
25 25
import org.apache.log4j.Logger;
26 26
import org.springframework.stereotype.Component;
27
import org.springframework.transaction.TransactionStatus;
27 28

  
28 29
import eu.etaxonomy.cdm.common.CdmUtils;
29 30
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
......
31 32
import eu.etaxonomy.cdm.io.common.IOValidator;
32 33
import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
33 34
import eu.etaxonomy.cdm.io.common.Source;
34
import eu.etaxonomy.cdm.io.common.TdwgAreaProvider;
35 35
import eu.etaxonomy.cdm.model.common.CdmBase;
36 36
import eu.etaxonomy.cdm.model.common.Extension;
37 37
import eu.etaxonomy.cdm.model.common.ExtensionType;
38
import eu.etaxonomy.cdm.model.common.IdentifiableSource;
38 39
import eu.etaxonomy.cdm.model.common.Language;
39 40
import eu.etaxonomy.cdm.model.common.Marker;
40 41
import eu.etaxonomy.cdm.model.common.MarkerType;
42
import eu.etaxonomy.cdm.model.common.OrderedTermVocabulary;
41 43
import eu.etaxonomy.cdm.model.common.Representation;
42 44
import eu.etaxonomy.cdm.model.common.TermVocabulary;
43 45
import eu.etaxonomy.cdm.model.description.CommonTaxonName;
44 46
import eu.etaxonomy.cdm.model.description.DescriptionElementSource;
45 47
import eu.etaxonomy.cdm.model.description.TaxonDescription;
46
import eu.etaxonomy.cdm.model.location.Country;
47 48
import eu.etaxonomy.cdm.model.location.NamedArea;
48 49
import eu.etaxonomy.cdm.model.name.TaxonName;
49 50
import eu.etaxonomy.cdm.model.reference.Reference;
......
74 75

  
75 76

  
76 77
	//map that stores the regions (named areas) and makes them accessible via the regionFk
77
	private Map<String, NamedArea> regionMap = new HashMap<>();
78
	private Map<String, NamedArea> regionFkToAreaMap = new HashMap<>();
78 79

  
79 80
	public BerlinModelCommonNamesImport(){
80 81
		super(dbTableName, pluralString);
......
141 142
	 */
142 143
	private void makeRegions(BerlinModelImportState state) {
143 144
		try {
144
			SortedSet<Integer> regionFks = new TreeSet<>();
145
			TransactionStatus tx = startTransaction();
146
		    SortedSet<Integer> regionFks = new TreeSet<>();
145 147
			Source source = state.getConfig().getSource();
146 148

  
147 149
			//fill set with all regionFk from emCommonName.regionFks
148
			getRegionFks(state, regionFks, source);
150
			fillRegionFks(state, regionFks, source);
149 151
			//concat filter string
150 152
			String sqlWhere = getSqlWhere(regionFks);
151 153

  
152 154
			//get E+M - TDWG Mapping
153
			Map<String, String> emTdwgMap = getEmTdwgMap(source);
155
//			Map<String, String> emTdwgMap = getEmTdwgMap(source);
156
			Map<String, NamedArea> emCodeToAreaMap = getEmCodeToAreaMap(source);
154 157
			//fill regionMap
155
			fillRegionMap(state, sqlWhere, emTdwgMap);
158
			fillRegionMap(state, sqlWhere, emCodeToAreaMap);
159

  
160
			commitTransaction(tx);
156 161

  
157 162
			return;
158 163
		} catch (NumberFormatException e) {
......
167 172
	}
168 173

  
169 174

  
170
	@Override
175
    @Override
171 176
	public boolean doPartition(ResultSetPartitioner partitioner, BerlinModelImportState state)  {
172 177
		boolean success = true ;
173 178

  
......
269 274
					}
270 275
					commonTaxonNames.add(commonTaxonName);
271 276
					regionFk = regionFk.trim();
272
					NamedArea area = regionMap.get(regionFk);
277
					NamedArea area = regionFkToAreaMap.get(regionFk);
273 278
					if (area == null){
274 279
						if (isNotBlank(regionFk) && regionFk != NO_REGION){
275 280
							logger.warn("Area for " + regionFk + " not defined in regionMap.");
......
311 316
	                commonTaxonName.addSource(source);
312 317
				}
313 318

  
314

  
315 319
				//MisNameRef
316 320
				if (misNameRefFk != null){
317 321
					//Taxon misappliedName = getMisappliedName(biblioRefMap, nomRefMap, misNameRefFk, taxon);
......
515 519
	 * @throws SQLException
516 520
	 *
517 521
	 */
518
	private void getRegionFks(BerlinModelImportState state, SortedSet<Integer> regionFks, Source source) throws SQLException {
519
		String sql = " SELECT DISTINCT RegionFks FROM emCommonName";
522
	private void fillRegionFks(BerlinModelImportState state, SortedSet<Integer> regionFks,
523
	        Source source) throws SQLException {
524
		String sql =
525
		          " SELECT DISTINCT RegionFks "
526
		        + " FROM emCommonName";
520 527
		if (state.getConfig().getCommonNameFilter() != null){
521 528
			sql += " WHERE " + state.getConfig().getCommonNameFilter();
522 529
		}
......
553 560
	 * @throws SQLException
554 561
	 */
555 562
	private void fillRegionMap(BerlinModelImportState state, String sqlWhere,
556
			Map<String, String> emTdwgMap) throws SQLException {
557
		Source source = state.getConfig().getSource();
558
		String sql;
559
		ResultSet rs;
560
		sql = " SELECT RegionId, Region FROM emLanguageRegion WHERE RegionId IN ("+ sqlWhere+ ") ";
561
		rs = source.getResultSet(sql);
563
			Map<String, NamedArea> emCodeToAreaMap) throws SQLException {
564

  
565
	    Source source = state.getConfig().getSource();
566
		String sql =
567
		      " SELECT RegionId, Region "
568
		    + " FROM  emLanguageRegion "
569
		    + " WHERE RegionId IN ("+ sqlWhere+ ") ";
570
		ResultSet rs = source.getResultSet(sql);
562 571
		while (rs.next()){
563 572
			Object regionId = rs.getObject("RegionId");
564 573
			String region = rs.getString("Region");
......
566 575
			if (splitRegion.length <= 1){
567 576
				NamedArea newArea = getNamedArea(state, null, region, "Language region '" + region + "'", null, null, null);
568 577
//				getTermService().save(newArea);
569
				regionMap.put(String.valueOf(regionId), newArea);
570
				logger.info("Found new area: " +  region);
578
				regionFkToAreaMap.put(String.valueOf(regionId), newArea);
579
				logger.warn("Found new area: " +  region);
571 580
			}else if (splitRegion.length == 2){
572
				String emCode = splitRegion[1].trim();
573
				String tdwgCode = emTdwgMap.get(emCode);
574
				if (isNotBlank(tdwgCode) ){
575
					NamedArea tdwgArea = getNamedArea(state, tdwgCode);
576
					regionMap.put(String.valueOf(regionId), tdwgArea);
577
				}else {
578
					NamedArea area = getOtherAreas(state, emCode, tdwgCode);
579
					if (area != null){
580
						regionMap.put(String.valueOf(regionId), area);
581
					}else{
582
						logger.warn("emCode did not map to valid tdwgCode: " +  CdmUtils.Nz(emCode) + "->" + CdmUtils.Nz(tdwgCode));
583
					}
581
				String emCode = splitRegion[1].trim().replace(" ", "");
582

  
583
				NamedArea area = emCodeToAreaMap.get(emCode);
584
				if (area == null){
585
				    String[] splits = emCode.split("/");
586
				    if (splits.length == 2){
587
				        area = emCodeToAreaMap.get(splits[0]);
588
		            }
589
				    if (area != null){
590
				        logger.warn("emCode ambigous. Use larger area: " +  CdmUtils.Nz(emCode) + "->" + regionId);
591
				    }else{
592
				        logger.warn("emCode not recognized. Region not defined: " +  CdmUtils.Nz(emCode) + "->" + regionId);
593
				    }
594
				}
595
				if (area != null){
596
				    regionFkToAreaMap.put(String.valueOf(regionId), area);
584 597
				}
585 598
			}
586 599
		}
587 600
	}
588 601

  
589

  
590
	/**
591
	 * Returns the are for a given TDWG code. See {@link #getEmTdwgMap(Source)} for exceptions from
592
	 * the TDWG code
593
	 * @param state
594
	 * @param tdwgCode
595
	 */
596
	private NamedArea getNamedArea(BerlinModelImportState state, String tdwgCode) {
597
		NamedArea area;
598
		if (tdwgCode.equalsIgnoreCase("Ab")){
599
			area = getNamedArea(state, BerlinModelTransformer.uuidAb, "Azerbaijan & Nakhichevan", "Azerbaijan (including Nakhichevan)",  "Ab", null, null);
600
			getTermService().saveOrUpdate(area);
601
		}else if (tdwgCode.equalsIgnoreCase("Uk")){
602
			area = getNamedArea(state, BerlinModelTransformer.uuidUk , "Ukraine & Crimea", "Ukraine (including Crimea)", "Uk", null, null);
603
			getTermService().saveOrUpdate(area);
604
		}else if (tdwgCode.equalsIgnoreCase("Rf")){
605
//			area = getNamedArea(state, BerlinModelTransformer.uuidRf , "Ukraine & Crimea", "Ukraine (including Crimea)", "Uk", null, null);
606
//			getTermService().saveOrUpdate(area);
607
			area = Country.RUSSIANFEDERATION();
608
		}else if (tdwgCode.equalsIgnoreCase("Gg")){
609
			area = Country.GEORGIA();
610
		}else if (tdwgCode.equalsIgnoreCase("SM")){
611
            area = getNamedArea(state, BerlinModelTransformer.uuidSM , "Serbia & Montenegro", "Serbia & Montenegro", "SM", null, null);
612
            getTermService().saveOrUpdate(area);
613
        }else if (tdwgCode.equalsIgnoreCase("Tu")){
614
            area = Country.TURKEYREPUBLICOF();
615
        }else{
616
			area = TdwgAreaProvider.getAreaByTdwgAbbreviation(tdwgCode);
617
		}
618
		if (area == null){
619
			logger.warn("Area is null for " + tdwgCode);
620
		}
621
		return area;
622
	}
623

  
624 602
	/**
625 603
	 * @param regionFks
626 604
	 * @return
......
634 612
		return sqlWhere;
635 613
	}
636 614

  
637
	/**
638
	 * Returns a map which is filled by the emCode->TdwgCode mapping defined in emArea.
639
	 * Some exceptions are defined for emCode 'Ab','Rf','Uk' and some additional mapping is added
640
	 * for 'Ab / Ab(A)', 'Ga / Ga(F)', 'It / It(I)', 'Ar / Ar(A)','Hs / Hs(S)'
641
	 * @param source
642
	 * @throws SQLException
643
	 */
644
	private Map<String, String> getEmTdwgMap(Source source) throws SQLException {
645
		String sql;
646
		ResultSet rs;
647
		Map<String, String> emTdwgMap = new HashMap<>();
648
		sql = " SELECT EmCode, TDWGCode FROM emArea ";
649
		rs = source.getResultSet(sql);
650
		while (rs.next()){
651
			String emCode = rs.getString("EMCode");
652
			String TDWGCode = rs.getString("TDWGCode");
653
			if (isNotBlank(emCode) ){
654
				emCode = emCode.trim();
655
				if (emCode.equalsIgnoreCase("Ab") || emCode.equalsIgnoreCase("Rf")||
656
						emCode.equalsIgnoreCase("Uk") || emCode.equalsIgnoreCase("Gg")
657
						|| emCode.equalsIgnoreCase("SM") || emCode.equalsIgnoreCase("Tu")){
658
					emTdwgMap.put(emCode, emCode);
659
				}else if (isNotBlank(TDWGCode)){
660
					emTdwgMap.put(emCode, TDWGCode.trim());
661
				}
662
			}
663
		}
664
		emTdwgMap.put("Ab / Ab(A)", "Ab");
665
		emTdwgMap.put("Ga / Ga(F)", "FRA-FR");
666
		emTdwgMap.put("It / It(I)", "ITA");
667
		emTdwgMap.put("Uk / Uk(U)", "Uk");
668
		emTdwgMap.put("Ar / Ar(A)", "TCS-AR");
669
		emTdwgMap.put("Hs / Hs(S)", "SPA-SP");
670
		emTdwgMap.put("Hb / Hb(E)", "IRE-IR");
671

  
672
		return emTdwgMap;
673
	}
615
//	/**
616
//	 * Returns a map which is filled by the emCode->TdwgCode mapping defined in emArea.
617
//	 * Some exceptions are defined for emCode 'Ab','Rf','Uk' and some additional mapping is added
618
//	 * for 'Ab / Ab(A)', 'Ga / Ga(F)', 'It / It(I)', 'Ar / Ar(A)','Hs / Hs(S)'
619
//	 * @param source
620
//	 * @throws SQLException
621
//	 */
622
//	private Map<String, String> getEmTdwgMap(Source source) throws SQLException {
623
//
624
//		Map<String, String> emTdwgMap = new HashMap<>();
625
//		String sql = " SELECT EmCode, TDWGCode "
626
//		    + " FROM emArea ";
627
//		ResultSet rs = source.getResultSet(sql);
628
//		while (rs.next()){
629
//			String emCode = rs.getString("EMCode");
630
//			String TDWGCode = rs.getString("TDWGCode");
631
//			if (isNotBlank(emCode) ){
632
//				emCode = emCode.trim();
633
//				if (emCode.equalsIgnoreCase("Ab") || emCode.equalsIgnoreCase("Rf")||
634
//						emCode.equalsIgnoreCase("Uk") || emCode.equalsIgnoreCase("Gg")
635
//						|| emCode.equalsIgnoreCase("SM") || emCode.equalsIgnoreCase("Tu")){
636
//					emTdwgMap.put(emCode, emCode);
637
//				}else if (isNotBlank(TDWGCode)){
638
//					emTdwgMap.put(emCode, TDWGCode.trim());
639
//				}
640
//			}
641
//		}
642
//		emTdwgMap.put("Ab / Ab(A)", "Ab");
643
//		emTdwgMap.put("Ga / Ga(F)", "FRA-FR");
644
//		emTdwgMap.put("It / It(I)", "ITA");
645
//		emTdwgMap.put("Uk / Uk(U)", "Uk");
646
//		emTdwgMap.put("Ar / Ar(A)", "TCS-AR");
647
//		emTdwgMap.put("Hs / Hs(S)", "SPA-SP");
648
//		emTdwgMap.put("Hb / Hb(E)", "IRE-IR");
649
//
650
//		return emTdwgMap;
651
//	}
652

  
653

  
654

  
655
    /**
656
     * @param source
657
     * @return
658
     * @throws SQLException
659
     */
660
    private Map<String, NamedArea> getEmCodeToAreaMap(Source source) throws SQLException {
661
        Map<String, NamedArea> emCodeToAreaMap = new HashMap<>();
662
        String sql =
663
              " SELECT EmCode, AreaId "
664
            + " FROM emArea ";
665
        ResultSet rs = source.getResultSet(sql);
666
        while (rs.next()){
667

  
668
            String emCode = rs.getString("EMCode");
669
            if (isNotBlank(emCode)){
670
                Integer areaId = rs.getInt("AreaId");
671
                NamedArea area = getAreaByAreaId(areaId);
672
                if (area != null){
673
                    emCodeToAreaMap.put(emCode.trim(), area);
674
                }else{
675
                    logger.warn("Area not found for areaId " + areaId);
676
                }
677
            }
678

  
679
        }
680

  
681
//        emTdwgMap.put("Ab / Ab(A)", "Ab");
682

  
683
        return emCodeToAreaMap;
684
    }
685

  
686
    /**
687
     * @param emCode
688
     * @return
689
     */
690
    private NamedArea getAreaByAreaId(int areaId) {
691
        NamedArea result = null;
692
        String areaIdStr = String.valueOf(areaId);
693
        OrderedTermVocabulary<NamedArea> voc = getAreaVoc();
694
        getVocabularyService().update(voc);
695
        for (NamedArea area : voc.getTerms()){
696
            for (IdentifiableSource source : area.getSources()){
697
                if (areaIdStr.equals(source.getIdInSource()) && BerlinModelAreaImport.NAMESPACE.equals(source.getIdNamespace())){
698
                    if (result != null){
699
                        logger.warn("Result for areaId already exists. areaId: " + areaId);
700
                    }
701
                    result = area;
702
                }
703
            }
704
        }
705
        return result;
706
    }
707

  
708
    private OrderedTermVocabulary<NamedArea> areaVoc;
709
    @SuppressWarnings("unchecked")
710
    private OrderedTermVocabulary<NamedArea> getAreaVoc(){
711
        if (areaVoc == null){
712
            areaVoc = (OrderedTermVocabulary<NamedArea>)getVocabularyService().find(BerlinModelTransformer.uuidVocEuroMedAreas);
713
        }
714
        return areaVoc;
715
    }
674 716

  
675 717

  
676 718
	/**
......
739 781
			result.put(nameSpace, referenceMap);
740 782
			// TODO remove if problem with duplicate DescElement_Annot id is solved
741 783
		} catch (SQLException e) {
742
			throw new RuntimeException("pos: " +pos, e);
784
			throw new RuntimeException("pos: " + pos, e);
743 785
		} catch (NullPointerException nep){
744 786
			logger.error("NullPointerException in getRelatedObjectsForPartition()");
745 787
		}

Also available in: Unified diff