Project

General

Profile

« Previous | Next » 

Revision 2dc91504

Added by Andreas Müller over 2 years ago

ref #9932 improve parsing of subgeneric names and some refactoring

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/io/mexico/MexicoEfloraTaxonImport.java
90 90
	public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, MexicoEfloraImportState state) {
91 91
	    sourceReference = this.getSourceReference(state.getConfig().getSourceReference());
92 92

  
93
	    state.getDeduplicationHelper().reset();
93 94
	    boolean success = true ;
94 95
	    @SuppressWarnings("rawtypes")
95 96
        Set<TaxonBase> taxaToSave = new HashSet<>();
......
102 103
		try{
103 104
//		    System.out.println();
104 105
			while (rs.next()){
105

  
106
			//	if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("PTaxa handled: " + (i-1));}
107
//			    System.out.println("i++");
108
				//create Taxon element
109
				String taxonId = rs.getString("IdCAT");
110
				String status = rs.getString("EstatusNombre");
111
				String rankStr = rs.getString("CategoriaTaxonomica");
112
				String nameStr = rs.getString("Nombre");
113
				String autorStr = rs.getString("AutorSinAnio");
114
				String fullNameStr = nameStr + " " + autorStr;
115
				String citaNomenclaturalStr = rs.getString("CitaNomenclatural");
116
			    String annotationStr = rs.getString("AnotacionTaxon");
117
			    String type = rs.getString("NomPublicationType");
118
			    String year = rs.getString("Anio");
119
			    String uuidStr = rs.getString("uuid");
120
			    UUID uuid = UUID.fromString(uuidStr);
121
				Integer secFk = nullSafeInt(rs, "IdBibliografiaSec");
122

  
123
				//name OLD handling
124
				Rank rank = getRank(rankStr);
125
				NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
126
				TaxonName taxonName = (TaxonName)parser.parseFullName(fullNameStr, NomenclaturalCode.ICNAFP, rank);
127
                //.. identifier
128
				DefinedTerm conabioIdentifier = getIdentiferType(state, MexicoConabioTransformer.uuidConabioTaxonIdIdentifierType,
129
                        "CONABIO Taxon Identifier", "CONABIO Taxon Identifier", "CONABIO", null);
130
                taxonName.addIdentifier(taxonId, conabioIdentifier);
131
//                .. nom Ref
132
                  Reference nomRef = ReferenceFactory.newGeneric();
133
                  nomRef.setAbbrevTitleCache(citaNomenclaturalStr, true);
134
                  nomRef.setDatePublished(TimePeriodParser.parseStringVerbatim(year));
135
                  taxonName.setNomenclaturalReference(nomRef);
136

  
137
//                TaxonName taxonName= makeName(taxonId, state, autorStr,
138
//                        nameStr, citaNomenclaturalStr, type, rankStr, annotationStr, year);
139

  
140
				//sec
141
				Reference sec = null;
142
				if (secFk != null) {
143
				    String refFkStr = String.valueOf(secFk);
144
				    sec = refMap.get(refFkStr);
145
				    if (sec == null && firstMissingSec) {
146
				        logger.warn("There are missing sec refs but they are not logged anymore.");
147
				        logger.debug("Sec not found for taxonId " +  taxonId +" and secId " + refFkStr);
148
				        firstMissingSec = false;
149
				    }
150
				}
151

  
152
				//taxon
153
				TaxonBase<?> taxonBase;
154
				Synonym synonym;
155
				Taxon taxon;
156
				try {
157
					if ("aceptado".equals(status)){
158
						taxon = Taxon.NewInstance(taxonName, sec);
159
						taxonBase = taxon;
160
					}else if ("sinónimo".equals(status)){
161
						synonym = Synonym.NewInstance(taxonName, sec);
162
						taxonBase = synonym;
163
					}else {
164
					    taxonBase = null;
165
					    logger.error("Status not yet implemented: " + status);
166
					    return false;
167
					}
168
					taxonBase.setUuid(uuid);
169

  
170
					partitioner.startDoSave();
171
					taxaToSave.add(taxonBase);
172
				} catch (Exception e) {
173
					logger.warn("An exception (" +e.getMessage()+") occurred when creating taxon with id " + taxonId + ". Taxon could not be saved.");
174
					success = false;
175
				}
106
			    success = handleSingleRecord(partitioner, state, success, taxaToSave, refMap, rs, i++);
176 107
			}
177 108
		} catch (Exception e) {
178
			logger.error("SQLException:" +  e);
109
		    e.printStackTrace();
110
			logger.error("Exception:" +  e);
179 111
			return false;
180 112
		}
181 113

  
......
183 115
		return success;
184 116
	}
185 117

  
118
    private boolean handleSingleRecord(ResultSetPartitioner partitioner, MexicoEfloraImportState state, boolean success,
119
            Set<TaxonBase> taxaToSave, Map<String, Reference> refMap, ResultSet rs, int i) throws SQLException {
120
		if ((i % 1000) == 0 && i!= 1 ){ logger.info("Taxa handled: " + (i-1));}
121
//			    System.out.println("i++");
122
		//create Taxon element
123
		String taxonId = rs.getString("IdCAT");
124
		String status = rs.getString("EstatusNombre");
125
		String rankStr = rs.getString("CategoriaTaxonomica");
126
		String nameStr = rs.getString("Nombre");
127
		String autorStr = rs.getString("AutorSinAnio");
128
		String fullNameStr = nameStr + " " + autorStr;
129
		String citaNomenclaturalStr = rs.getString("CitaNomenclatural");
130
	    String annotationStr = rs.getString("AnotacionTaxon");
131
	    String type = rs.getString("NomPublicationType");
132
	    String year = rs.getString("Anio");
133
	    String uuidStr = rs.getString("uuid");
134
	    UUID uuid = UUID.fromString(uuidStr);
135
		Integer secFk = nullSafeInt(rs, "IdBibliografiaSec");
136

  
137
		//name OLD handling
138
//		Rank rank = getRank(rankStr);
139
//		NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
140
//		TaxonName taxonName = (TaxonName)parser.parseFullName(fullNameStr, NomenclaturalCode.ICNAFP, rank);
141
//        //.. identifier
142
//		DefinedTerm conabioIdentifier = getIdentiferType(state, MexicoConabioTransformer.uuidConabioTaxonIdIdentifierType,
143
//                "CONABIO Taxon Identifier", "CONABIO Taxon Identifier", "CONABIO", null);
144
//        taxonName.addIdentifier(taxonId, conabioIdentifier);
145
//        //.. nom Ref
146
//        Reference nomRef = ReferenceFactory.newGeneric();
147
//        nomRef.setAbbrevTitleCache(citaNomenclaturalStr, true);
148
//        nomRef.setDatePublished(TimePeriodParser.parseStringVerbatim(year));
149
//        taxonName.setNomenclaturalReference(nomRef);
150

  
151
        TaxonName taxonName= makeName(taxonId, state, autorStr,
152
           nameStr, citaNomenclaturalStr, type, rankStr, annotationStr, year);
153

  
154
		//sec
155
		Reference sec = null;
156
		if (secFk != null) {
157
		    String refFkStr = String.valueOf(secFk);
158
		    sec = refMap.get(refFkStr);
159
		    if (sec == null && firstMissingSec) {
160
		        logger.warn("There are missing sec refs but they are not logged anymore.");
161
		        logger.debug("Sec not found for taxonId " +  taxonId +" and secId " + refFkStr);
162
		        firstMissingSec = false;
163
		    }
164
		}
165

  
166
		//taxon
167
		TaxonBase<?> taxonBase;
168
		Synonym synonym;
169
		Taxon taxon;
170
		try {
171
			if ("aceptado".equals(status)){
172
				taxon = Taxon.NewInstance(taxonName, sec);
173
				taxonBase = taxon;
174
			}else if ("sinónimo".equals(status)){
175
				synonym = Synonym.NewInstance(taxonName, sec);
176
				taxonBase = synonym;
177
			}else {
178
			    taxonBase = null;
179
			    logger.error("Status not yet implemented: " + status);
180
			    return false;
181
			}
182
			taxonBase.setUuid(uuid);
183

  
184
			partitioner.startDoSave();
185
			taxaToSave.add(taxonBase);
186
		} catch (Exception e) {
187
			logger.warn("An exception (" +e.getMessage()+") occurred when creating taxon with id " + taxonId + ". Taxon could not be saved.");
188
			success = false;
189
		}
190
        return success;
191
    }
192

  
186 193
	boolean isFirstDedup = true;
187 194
    private TaxonName makeName(String taxonId, MexicoEfloraImportState state,
188 195
            String authorStr, String nameStr, String nomRefStr, String refType, String rankStr,
......
190 197

  
191 198
        //rank
192 199
        Rank rank = getRank(rankStr);
200
        //TODO hybrido and race
201
        boolean isHybrid = rank == null && "híbrido".equals(rankStr);
202
        boolean isRace = Rank.RACE().equals(rank);
193 203
//        rank = state.getTransformer().getRankByKey(rankStr);
194 204

  
205
        nameStr = removeSubgenusBracket(nameStr, rank);
206

  
195 207
        //name + author
196 208
        String fullNameStr = nameStr + (authorStr != null ? " " + authorStr : "");
197 209

  
198 210
        TaxonName fullName = nameParser.parseFullName(fullNameStr, NomenclaturalCode.ICNAFP, rank);
199 211
        if (fullName.isProtectedTitleCache()){
200
            logger.warn(taxonId + ": Name could not be parsed: " + fullNameStr );
212
            logger.info(taxonId + ": Name could not be parsed: " + fullNameStr );
201 213
        }else{
202 214
            if (isFirstDedup) {
203 215
                logger.warn("Deduplication is still switcht off!");
......
256 268
        return result;
257 269
    }
258 270

  
271
    private String removeSubgenusBracket(String nameStr, Rank rank) {
272
        if (nameStr.matches("[A-Z][a-z]+\\s+\\([A-Za-z]+\\)\\s+[a-z]+.*")) {
273
            //species and below: remove bracket completely
274
            nameStr = nameStr.substring(0, nameStr.indexOf("(")) + nameStr.substring(nameStr.indexOf(")")+1);
275
        }else if (nameStr.matches("[A-Z][a-z]+\\s+\\([A-Za-z]+\\)")) {
276
            //subgenus: replace (...) bei subg. ...
277
            nameStr = nameStr.substring(0, nameStr.indexOf("(")) + "subg. " + nameStr.substring(nameStr.indexOf("(")+1, nameStr.length()-1);
278
        }
279
        return nameStr;
280
    }
281

  
259 282
    private void adaptRefTypeForGeneric(IBotanicalName referencedName, String refTypeStr) {
260 283
        INomenclaturalReference ref = referencedName.getNomenclaturalReference();
261 284
        if (ref == null){
......
327 350
        else if ("subsección".equals(rank)){ return Rank.SUBSECTION_BOTANY();}
328 351
        else if ("serie".equals(rank)){ return Rank.SERIES();}
329 352
        else if ("grupo".equals(rank)){ return Rank.SPECIESGROUP();}
330
        //TODO rank hibrido
331
//        else if ("híbrido".equals(rank)){ return Rank.GENUS;}
353
        else if ("híbrido".equals(rank)){ return null;}  //will be handled later
332 354
        else if ("especie".equals(rank)){ return Rank.SPECIES();}
333 355
        else if ("subespecie".equals(rank)){ return Rank.SUBSPECIES();}
334
        //TODO rank raza
335 356
        else if ("raza".equals(rank)){ return Rank.RACE();}
336 357
        else if ("variedad".equals(rank)){ return Rank.VARIETY();}
337 358
        else if ("subvariedad".equals(rank)){ return Rank.SUBVARIETY();}
338 359
        else if ("forma".equals(rank)){ return Rank.FORM();}
339 360
        else if ("subforma".equals(rank)){ return Rank.SUBFORM();}
340
        //TODO rank raza
341 361
        else if ("raza".equals(rank)){ return Rank.RACE();}
342 362
        else {
343 363
            logger.warn("Rank not recognized: "+ rank);

Also available in: Unified diff