Revision 2dc91504
Added by Andreas Müller over 2 years ago
app-import/src/main/java/eu/etaxonomy/cdm/io/mexico/MexicoEfloraTaxonImport.java | ||
---|---|---|
90 | 90 |
public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, MexicoEfloraImportState state) { |
91 | 91 |
sourceReference = this.getSourceReference(state.getConfig().getSourceReference()); |
92 | 92 |
|
93 |
state.getDeduplicationHelper().reset(); |
|
93 | 94 |
boolean success = true ; |
94 | 95 |
@SuppressWarnings("rawtypes") |
95 | 96 |
Set<TaxonBase> taxaToSave = new HashSet<>(); |
... | ... | |
102 | 103 |
try{ |
103 | 104 |
// System.out.println(); |
104 | 105 |
while (rs.next()){ |
105 |
|
|
106 |
// if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("PTaxa handled: " + (i-1));} |
|
107 |
// System.out.println("i++"); |
|
108 |
//create Taxon element |
|
109 |
String taxonId = rs.getString("IdCAT"); |
|
110 |
String status = rs.getString("EstatusNombre"); |
|
111 |
String rankStr = rs.getString("CategoriaTaxonomica"); |
|
112 |
String nameStr = rs.getString("Nombre"); |
|
113 |
String autorStr = rs.getString("AutorSinAnio"); |
|
114 |
String fullNameStr = nameStr + " " + autorStr; |
|
115 |
String citaNomenclaturalStr = rs.getString("CitaNomenclatural"); |
|
116 |
String annotationStr = rs.getString("AnotacionTaxon"); |
|
117 |
String type = rs.getString("NomPublicationType"); |
|
118 |
String year = rs.getString("Anio"); |
|
119 |
String uuidStr = rs.getString("uuid"); |
|
120 |
UUID uuid = UUID.fromString(uuidStr); |
|
121 |
Integer secFk = nullSafeInt(rs, "IdBibliografiaSec"); |
|
122 |
|
|
123 |
//name OLD handling |
|
124 |
Rank rank = getRank(rankStr); |
|
125 |
NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance(); |
|
126 |
TaxonName taxonName = (TaxonName)parser.parseFullName(fullNameStr, NomenclaturalCode.ICNAFP, rank); |
|
127 |
//.. identifier |
|
128 |
DefinedTerm conabioIdentifier = getIdentiferType(state, MexicoConabioTransformer.uuidConabioTaxonIdIdentifierType, |
|
129 |
"CONABIO Taxon Identifier", "CONABIO Taxon Identifier", "CONABIO", null); |
|
130 |
taxonName.addIdentifier(taxonId, conabioIdentifier); |
|
131 |
// .. nom Ref |
|
132 |
Reference nomRef = ReferenceFactory.newGeneric(); |
|
133 |
nomRef.setAbbrevTitleCache(citaNomenclaturalStr, true); |
|
134 |
nomRef.setDatePublished(TimePeriodParser.parseStringVerbatim(year)); |
|
135 |
taxonName.setNomenclaturalReference(nomRef); |
|
136 |
|
|
137 |
// TaxonName taxonName= makeName(taxonId, state, autorStr, |
|
138 |
// nameStr, citaNomenclaturalStr, type, rankStr, annotationStr, year); |
|
139 |
|
|
140 |
//sec |
|
141 |
Reference sec = null; |
|
142 |
if (secFk != null) { |
|
143 |
String refFkStr = String.valueOf(secFk); |
|
144 |
sec = refMap.get(refFkStr); |
|
145 |
if (sec == null && firstMissingSec) { |
|
146 |
logger.warn("There are missing sec refs but they are not logged anymore."); |
|
147 |
logger.debug("Sec not found for taxonId " + taxonId +" and secId " + refFkStr); |
|
148 |
firstMissingSec = false; |
|
149 |
} |
|
150 |
} |
|
151 |
|
|
152 |
//taxon |
|
153 |
TaxonBase<?> taxonBase; |
|
154 |
Synonym synonym; |
|
155 |
Taxon taxon; |
|
156 |
try { |
|
157 |
if ("aceptado".equals(status)){ |
|
158 |
taxon = Taxon.NewInstance(taxonName, sec); |
|
159 |
taxonBase = taxon; |
|
160 |
}else if ("sinónimo".equals(status)){ |
|
161 |
synonym = Synonym.NewInstance(taxonName, sec); |
|
162 |
taxonBase = synonym; |
|
163 |
}else { |
|
164 |
taxonBase = null; |
|
165 |
logger.error("Status not yet implemented: " + status); |
|
166 |
return false; |
|
167 |
} |
|
168 |
taxonBase.setUuid(uuid); |
|
169 |
|
|
170 |
partitioner.startDoSave(); |
|
171 |
taxaToSave.add(taxonBase); |
|
172 |
} catch (Exception e) { |
|
173 |
logger.warn("An exception (" +e.getMessage()+") occurred when creating taxon with id " + taxonId + ". Taxon could not be saved."); |
|
174 |
success = false; |
|
175 |
} |
|
106 |
success = handleSingleRecord(partitioner, state, success, taxaToSave, refMap, rs, i++); |
|
176 | 107 |
} |
177 | 108 |
} catch (Exception e) { |
178 |
logger.error("SQLException:" + e); |
|
109 |
e.printStackTrace(); |
|
110 |
logger.error("Exception:" + e); |
|
179 | 111 |
return false; |
180 | 112 |
} |
181 | 113 |
|
... | ... | |
183 | 115 |
return success; |
184 | 116 |
} |
185 | 117 |
|
118 |
private boolean handleSingleRecord(ResultSetPartitioner partitioner, MexicoEfloraImportState state, boolean success, |
|
119 |
Set<TaxonBase> taxaToSave, Map<String, Reference> refMap, ResultSet rs, int i) throws SQLException { |
|
120 |
if ((i % 1000) == 0 && i!= 1 ){ logger.info("Taxa handled: " + (i-1));} |
|
121 |
// System.out.println("i++"); |
|
122 |
//create Taxon element |
|
123 |
String taxonId = rs.getString("IdCAT"); |
|
124 |
String status = rs.getString("EstatusNombre"); |
|
125 |
String rankStr = rs.getString("CategoriaTaxonomica"); |
|
126 |
String nameStr = rs.getString("Nombre"); |
|
127 |
String autorStr = rs.getString("AutorSinAnio"); |
|
128 |
String fullNameStr = nameStr + " " + autorStr; |
|
129 |
String citaNomenclaturalStr = rs.getString("CitaNomenclatural"); |
|
130 |
String annotationStr = rs.getString("AnotacionTaxon"); |
|
131 |
String type = rs.getString("NomPublicationType"); |
|
132 |
String year = rs.getString("Anio"); |
|
133 |
String uuidStr = rs.getString("uuid"); |
|
134 |
UUID uuid = UUID.fromString(uuidStr); |
|
135 |
Integer secFk = nullSafeInt(rs, "IdBibliografiaSec"); |
|
136 |
|
|
137 |
//name OLD handling |
|
138 |
// Rank rank = getRank(rankStr); |
|
139 |
// NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance(); |
|
140 |
// TaxonName taxonName = (TaxonName)parser.parseFullName(fullNameStr, NomenclaturalCode.ICNAFP, rank); |
|
141 |
// //.. identifier |
|
142 |
// DefinedTerm conabioIdentifier = getIdentiferType(state, MexicoConabioTransformer.uuidConabioTaxonIdIdentifierType, |
|
143 |
// "CONABIO Taxon Identifier", "CONABIO Taxon Identifier", "CONABIO", null); |
|
144 |
// taxonName.addIdentifier(taxonId, conabioIdentifier); |
|
145 |
// //.. nom Ref |
|
146 |
// Reference nomRef = ReferenceFactory.newGeneric(); |
|
147 |
// nomRef.setAbbrevTitleCache(citaNomenclaturalStr, true); |
|
148 |
// nomRef.setDatePublished(TimePeriodParser.parseStringVerbatim(year)); |
|
149 |
// taxonName.setNomenclaturalReference(nomRef); |
|
150 |
|
|
151 |
TaxonName taxonName= makeName(taxonId, state, autorStr, |
|
152 |
nameStr, citaNomenclaturalStr, type, rankStr, annotationStr, year); |
|
153 |
|
|
154 |
//sec |
|
155 |
Reference sec = null; |
|
156 |
if (secFk != null) { |
|
157 |
String refFkStr = String.valueOf(secFk); |
|
158 |
sec = refMap.get(refFkStr); |
|
159 |
if (sec == null && firstMissingSec) { |
|
160 |
logger.warn("There are missing sec refs but they are not logged anymore."); |
|
161 |
logger.debug("Sec not found for taxonId " + taxonId +" and secId " + refFkStr); |
|
162 |
firstMissingSec = false; |
|
163 |
} |
|
164 |
} |
|
165 |
|
|
166 |
//taxon |
|
167 |
TaxonBase<?> taxonBase; |
|
168 |
Synonym synonym; |
|
169 |
Taxon taxon; |
|
170 |
try { |
|
171 |
if ("aceptado".equals(status)){ |
|
172 |
taxon = Taxon.NewInstance(taxonName, sec); |
|
173 |
taxonBase = taxon; |
|
174 |
}else if ("sinónimo".equals(status)){ |
|
175 |
synonym = Synonym.NewInstance(taxonName, sec); |
|
176 |
taxonBase = synonym; |
|
177 |
}else { |
|
178 |
taxonBase = null; |
|
179 |
logger.error("Status not yet implemented: " + status); |
|
180 |
return false; |
|
181 |
} |
|
182 |
taxonBase.setUuid(uuid); |
|
183 |
|
|
184 |
partitioner.startDoSave(); |
|
185 |
taxaToSave.add(taxonBase); |
|
186 |
} catch (Exception e) { |
|
187 |
logger.warn("An exception (" +e.getMessage()+") occurred when creating taxon with id " + taxonId + ". Taxon could not be saved."); |
|
188 |
success = false; |
|
189 |
} |
|
190 |
return success; |
|
191 |
} |
|
192 |
|
|
186 | 193 |
boolean isFirstDedup = true; |
187 | 194 |
private TaxonName makeName(String taxonId, MexicoEfloraImportState state, |
188 | 195 |
String authorStr, String nameStr, String nomRefStr, String refType, String rankStr, |
... | ... | |
190 | 197 |
|
191 | 198 |
//rank |
192 | 199 |
Rank rank = getRank(rankStr); |
200 |
//TODO hybrido and race |
|
201 |
boolean isHybrid = rank == null && "híbrido".equals(rankStr); |
|
202 |
boolean isRace = Rank.RACE().equals(rank); |
|
193 | 203 |
// rank = state.getTransformer().getRankByKey(rankStr); |
194 | 204 |
|
205 |
nameStr = removeSubgenusBracket(nameStr, rank); |
|
206 |
|
|
195 | 207 |
//name + author |
196 | 208 |
String fullNameStr = nameStr + (authorStr != null ? " " + authorStr : ""); |
197 | 209 |
|
198 | 210 |
TaxonName fullName = nameParser.parseFullName(fullNameStr, NomenclaturalCode.ICNAFP, rank); |
199 | 211 |
if (fullName.isProtectedTitleCache()){ |
200 |
logger.warn(taxonId + ": Name could not be parsed: " + fullNameStr );
|
|
212 |
logger.info(taxonId + ": Name could not be parsed: " + fullNameStr );
|
|
201 | 213 |
}else{ |
202 | 214 |
if (isFirstDedup) { |
203 | 215 |
logger.warn("Deduplication is still switcht off!"); |
... | ... | |
256 | 268 |
return result; |
257 | 269 |
} |
258 | 270 |
|
271 |
private String removeSubgenusBracket(String nameStr, Rank rank) { |
|
272 |
if (nameStr.matches("[A-Z][a-z]+\\s+\\([A-Za-z]+\\)\\s+[a-z]+.*")) { |
|
273 |
//species and below: remove bracket completely |
|
274 |
nameStr = nameStr.substring(0, nameStr.indexOf("(")) + nameStr.substring(nameStr.indexOf(")")+1); |
|
275 |
}else if (nameStr.matches("[A-Z][a-z]+\\s+\\([A-Za-z]+\\)")) { |
|
276 |
//subgenus: replace (...) bei subg. ... |
|
277 |
nameStr = nameStr.substring(0, nameStr.indexOf("(")) + "subg. " + nameStr.substring(nameStr.indexOf("(")+1, nameStr.length()-1); |
|
278 |
} |
|
279 |
return nameStr; |
|
280 |
} |
|
281 |
|
|
259 | 282 |
private void adaptRefTypeForGeneric(IBotanicalName referencedName, String refTypeStr) { |
260 | 283 |
INomenclaturalReference ref = referencedName.getNomenclaturalReference(); |
261 | 284 |
if (ref == null){ |
... | ... | |
327 | 350 |
else if ("subsección".equals(rank)){ return Rank.SUBSECTION_BOTANY();} |
328 | 351 |
else if ("serie".equals(rank)){ return Rank.SERIES();} |
329 | 352 |
else if ("grupo".equals(rank)){ return Rank.SPECIESGROUP();} |
330 |
//TODO rank hibrido |
|
331 |
// else if ("híbrido".equals(rank)){ return Rank.GENUS;} |
|
353 |
else if ("híbrido".equals(rank)){ return null;} //will be handled later |
|
332 | 354 |
else if ("especie".equals(rank)){ return Rank.SPECIES();} |
333 | 355 |
else if ("subespecie".equals(rank)){ return Rank.SUBSPECIES();} |
334 |
//TODO rank raza |
|
335 | 356 |
else if ("raza".equals(rank)){ return Rank.RACE();} |
336 | 357 |
else if ("variedad".equals(rank)){ return Rank.VARIETY();} |
337 | 358 |
else if ("subvariedad".equals(rank)){ return Rank.SUBVARIETY();} |
338 | 359 |
else if ("forma".equals(rank)){ return Rank.FORM();} |
339 | 360 |
else if ("subforma".equals(rank)){ return Rank.SUBFORM();} |
340 |
//TODO rank raza |
|
341 | 361 |
else if ("raza".equals(rank)){ return Rank.RACE();} |
342 | 362 |
else { |
343 | 363 |
logger.warn("Rank not recognized: "+ rank); |
Also available in: Unified diff
ref #9932 improve parsing of subgeneric names and some refactoring