22 |
22 |
import org.springframework.stereotype.Component;
|
23 |
23 |
import org.springframework.transaction.TransactionStatus;
|
24 |
24 |
|
|
25 |
import eu.etaxonomy.cdm.api.service.pager.Pager;
|
25 |
26 |
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
|
26 |
|
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
|
27 |
27 |
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
|
28 |
28 |
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
|
29 |
29 |
import eu.etaxonomy.cdm.model.agent.Person;
|
... | ... | |
86 |
86 |
|
87 |
87 |
private Map<String,NamedArea> areaMap;
|
88 |
88 |
|
89 |
|
private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper;
|
90 |
89 |
private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
|
91 |
90 |
|
92 |
91 |
|
... | ... | |
123 |
122 |
|
124 |
123 |
Rank rank = getRank(state);
|
125 |
124 |
TaxonName taxonName = makeName(state, line, rank);
|
126 |
|
if (0 < getNameService().countByTitle(TaxonName.class, taxonName.getTitleCache(), MatchMode.EXACT, null)){
|
127 |
|
logger.warn(line + "Possbile name duplicate: " + taxonName.getTitleCache());
|
|
125 |
getNameService().saveOrUpdate(taxonName);
|
|
126 |
if (1 < getNameService().countByTitle(TaxonName.class, taxonName.getTitleCache(), MatchMode.EXACT, null)){
|
|
127 |
Pager<TaxonName> candidates = getNameService().findByTitle(TaxonName.class, taxonName.getTitleCache(), MatchMode.EXACT, null, null, null, null, null);
|
|
128 |
boolean fullMatchExists = false;
|
|
129 |
for (TaxonName candidate : candidates.getRecords()){
|
|
130 |
if (candidate.getId() != taxonName.getId() && candidate.getFullTitleCache().equals(taxonName.getFullTitleCache())){
|
|
131 |
logger.warn(line + "Possbile referenced name duplicate: " + taxonName.getFullTitleCache());
|
|
132 |
fullMatchExists = true;
|
|
133 |
}
|
|
134 |
}
|
|
135 |
if (!fullMatchExists){
|
|
136 |
logger.warn(line + "Possbile name duplicate: " + taxonName.getTitleCache());
|
|
137 |
}
|
128 |
138 |
}
|
129 |
|
// getNameService().findByTitle(TaxonName.class, taxonName.getTitleCache(), MatchMode.EXACT, null, null, null, null, null);
|
130 |
139 |
TaxonNode parent = getParent(state, line, genusNode, taxonName, rank);
|
131 |
140 |
Reference sec = getSec(parent);
|
132 |
141 |
Taxon taxon = Taxon.NewInstance(taxonName, sec);
|
... | ... | |
175 |
184 |
Map<String, String> record = state.getOriginalRecord();
|
176 |
185 |
String allAreaStr = getValue(record, EM_GEO);
|
177 |
186 |
if(isBlank(allAreaStr)){
|
178 |
|
logger.warn(line+"No distribution data exists.");
|
|
187 |
logger.warn(line+"No distribution data exists: " + taxon.getName().getTitleCache());
|
179 |
188 |
}else{
|
180 |
189 |
String[] areaSplit = allAreaStr.split(",");
|
181 |
190 |
for (String areaStr: areaSplit){
|
... | ... | |
255 |
264 |
if (!nameCache.equals(name.getNameCache())){
|
256 |
265 |
logger.warn(line + "Namecache not equal: " + nameCache +" <-> " + name.getNameCache());
|
257 |
266 |
}
|
258 |
|
TeamOrPersonBase<?> authors = getAuthors(state, line);
|
|
267 |
TeamOrPersonBase<?>[] authors = getAuthors(state, line);
|
259 |
268 |
//all authors are combination authors, no basionym authors exist, according to ERS 2019-10-24
|
260 |
|
name.setCombinationAuthorship(authors);
|
261 |
|
Reference ref = getReference(state, line, authors);
|
|
269 |
name.setCombinationAuthorship(authors[0]);
|
|
270 |
name.setExCombinationAuthorship(authors[1]);
|
|
271 |
|
|
272 |
Reference ref = getReference(state, line, authors[0]);
|
262 |
273 |
name.setNomenclaturalReference(ref);
|
263 |
274 |
String[] collSplit = getCollationSplit(state, line);
|
264 |
275 |
name.setNomenclaturalMicroReference(collSplit[1]);
|
265 |
|
makeNameRemarks(state, line, name);
|
|
276 |
makeNameRemarks(state, name);
|
266 |
277 |
|
267 |
278 |
addImportSource(state, name);
|
268 |
279 |
return name;
|
269 |
280 |
}
|
270 |
281 |
|
271 |
282 |
@SuppressWarnings("deprecation")
|
272 |
|
private void makeNameRemarks(SimpleExcelTaxonImportState<CONFIG> state, String line, TaxonName name) {
|
|
283 |
private void makeNameRemarks(SimpleExcelTaxonImportState<CONFIG> state, TaxonName name) {
|
273 |
284 |
Map<String, String> record = state.getOriginalRecord();
|
274 |
285 |
String remarksStr = getValue(record, REFERENCE_REMARKS);
|
275 |
286 |
if (isBlank(remarksStr) || remarksStr.equals("[epublished]")||remarksStr.equals("(epublished)")){
|
... | ... | |
318 |
329 |
example.setAuthorship(authors);
|
319 |
330 |
String[] collSplit = getCollationSplit(state, line);
|
320 |
331 |
example.setVolume(collSplit[0]);
|
321 |
|
example.setDatePublished(getYear(state, line));
|
|
332 |
example.setDatePublished(getYear(state));
|
322 |
333 |
Reference journal = getExistingJournal(state, line);
|
323 |
334 |
example.setInJournal(journal);
|
324 |
335 |
result = getExistingArticle(state, line, example);
|
325 |
336 |
if(result != example){
|
326 |
337 |
logger.debug(line+ "article existed");
|
327 |
338 |
}else{
|
328 |
|
makeReferenceRemarks(state, line, example);
|
|
339 |
makeReferenceRemarks(state, example);
|
329 |
340 |
}
|
330 |
341 |
}else if ("BS".equals(pTypeStr)){
|
331 |
342 |
IBookSection example = ReferenceFactory.newBookSection();
|
332 |
343 |
String publicationStr = getValue(record, PUBLICATION);
|
333 |
344 |
String authorsForFlIber = getValue(record, AUTHORS);
|
334 |
|
TeamOrPersonBase<?> bookAuthor = getBookSectionBookAuthors(state, line, publicationStr, authorsForFlIber);
|
|
345 |
TeamOrPersonBase<?> bookAuthor = getBookSectionBookAuthors(line, publicationStr, authorsForFlIber);
|
335 |
346 |
if (bookAuthor == null){
|
336 |
347 |
logger.warn(line + "No author found for booksection of " + publicationStr);
|
337 |
348 |
}
|
... | ... | |
342 |
353 |
if (result != example){
|
343 |
354 |
logger.debug(line+ "book section existed");
|
344 |
355 |
}else{
|
345 |
|
makeReferenceRemarks(state, line, example);
|
|
356 |
makeReferenceRemarks(state, example);
|
346 |
357 |
}
|
347 |
|
//TODO after import BookSection authors need to be checked for correct in-authors
|
348 |
358 |
}else if ("BO".equals(pTypeStr)){
|
349 |
359 |
result = getExistingBook(state, line, authors);
|
350 |
360 |
}else{
|
... | ... | |
354 |
364 |
return result;
|
355 |
365 |
}
|
356 |
366 |
|
357 |
|
private TeamOrPersonBase<?> getBookSectionBookAuthors(SimpleExcelTaxonImportState<CONFIG> state, String line,
|
|
367 |
private TeamOrPersonBase<?> getBookSectionBookAuthors(String line,
|
358 |
368 |
String publicationStr, String authorsForFlIber) {
|
359 |
369 |
if ("Fl. Gr. Brit. Ireland".equals(publicationStr)){
|
360 |
370 |
return CdmBase.deproxy(getAgentService().find(UUID.fromString("009cda5a-f6a7-41bf-a323-dc72f83e6066")),Team.class);
|
... | ... | |
398 |
408 |
return null;
|
399 |
409 |
}
|
400 |
410 |
|
401 |
|
private void makeReferenceRemarks(SimpleExcelTaxonImportState<CONFIG> state, String line, IReference ref) {
|
|
411 |
private void makeReferenceRemarks(SimpleExcelTaxonImportState<CONFIG> state, IReference ref) {
|
402 |
412 |
Map<String, String> record = state.getOriginalRecord();
|
403 |
413 |
String remarksStr = getValue(record, REFERENCE_REMARKS);
|
404 |
414 |
if (isBlank(remarksStr)){
|
... | ... | |
470 |
480 |
example.setAbbrevTitle(publicationStr);
|
471 |
481 |
String[] collSplit = getCollationSplit(state, line);
|
472 |
482 |
example.setVolume(collSplit[0]);
|
473 |
|
example.setDatePublished(getYear(state, line));
|
|
483 |
example.setDatePublished(getYear(state));
|
474 |
484 |
example.setAuthorship(author);
|
475 |
485 |
|
476 |
486 |
Set<String> includeProperties = new HashSet<>();
|
... | ... | |
483 |
493 |
if (result != example){
|
484 |
494 |
logger.debug("book existed");
|
485 |
495 |
}else{
|
486 |
|
makeReferenceRemarks(state, line, example);
|
|
496 |
makeReferenceRemarks(state, example);
|
487 |
497 |
}
|
488 |
498 |
return result;
|
489 |
499 |
}
|
... | ... | |
499 |
509 |
addImportSource(state, example);
|
500 |
510 |
return example;
|
501 |
511 |
}else{
|
502 |
|
existingRefs = findBestMatchingRef(state, line, existingRefs, publicationStr);
|
|
512 |
existingRefs = findBestMatchingRef(existingRefs, publicationStr);
|
503 |
513 |
if(existingRefs.size()>1){
|
504 |
514 |
logger.warn(line+"More than 1 reference found for " + publicationStr + ". Use arbitrary one.");
|
505 |
515 |
}
|
... | ... | |
510 |
520 |
}
|
511 |
521 |
}
|
512 |
522 |
|
513 |
|
private List<Reference> findBestMatchingRef(SimpleExcelTaxonImportState<CONFIG> state, String line,
|
514 |
|
List<Reference> existingRefs, String publicationStr) {
|
|
523 |
private List<Reference> findBestMatchingRef(List<Reference> existingRefs, String publicationStr) {
|
515 |
524 |
Set<Reference> noTitleCandidates = new HashSet<>();
|
516 |
525 |
Set<Reference> sameTitleCandidates = new HashSet<>();
|
517 |
526 |
for(Reference ref : existingRefs){
|
... | ... | |
532 |
541 |
}
|
533 |
542 |
}
|
534 |
543 |
|
535 |
|
private VerbatimTimePeriod getYear(SimpleExcelTaxonImportState<CONFIG> state, String line) {
|
|
544 |
private VerbatimTimePeriod getYear(SimpleExcelTaxonImportState<CONFIG> state) {
|
536 |
545 |
Map<String, String> record = state.getOriginalRecord();
|
537 |
546 |
String yearStr = getValue(record, YEAR);
|
538 |
547 |
VerbatimTimePeriod result = TimePeriodParser.parseStringVerbatim(yearStr);
|
539 |
548 |
return result;
|
540 |
549 |
}
|
541 |
550 |
|
542 |
|
private Map<String,TeamOrPersonBase> authorMap = new HashMap<>();
|
543 |
|
private TeamOrPersonBase<?> getAuthors(SimpleExcelTaxonImportState<CONFIG> state, String line) {
|
|
551 |
private TeamOrPersonBase<?>[] getAuthors(SimpleExcelTaxonImportState<CONFIG> state, String line) {
|
544 |
552 |
Map<String, String> record = state.getOriginalRecord();
|
545 |
553 |
String authorsStr = getValue(record, AUTHORS);
|
|
554 |
String[] split = authorsStr.split(" ex ");
|
|
555 |
TeamOrPersonBase<?>[] result = new TeamOrPersonBase<?>[2];
|
|
556 |
if (split.length == 1){
|
|
557 |
result[0] = getAuthor(state, line, split[0]);
|
|
558 |
}else{
|
|
559 |
result[0] = getAuthor(state, line, split[1]);
|
|
560 |
result[1] = getAuthor(state, line, split[0]);
|
|
561 |
}
|
|
562 |
return result;
|
|
563 |
}
|
|
564 |
|
|
565 |
private Map<String,TeamOrPersonBase<?>> authorMap = new HashMap<>();
|
|
566 |
private TeamOrPersonBase<?> getAuthor(SimpleExcelTaxonImportState<CONFIG> state, String line, String authorsStr) {
|
546 |
567 |
if (authorMap.get(authorsStr)!= null){
|
547 |
568 |
return authorMap.get(authorsStr);
|
548 |
569 |
}else{
|
... | ... | |
572 |
593 |
return example;
|
573 |
594 |
}else{
|
574 |
595 |
if(existingAuthors.size()>1){
|
575 |
|
existingAuthors = findBestMatchingAuthor(state, line, existingAuthors, authorsStr);
|
|
596 |
existingAuthors = findBestMatchingAuthor(existingAuthors, authorsStr);
|
576 |
597 |
if(existingAuthors.size()>1){
|
577 |
598 |
logger.warn(line+"More than 1 author with same matching found for '" + authorsStr + "'. Use arbitrary one.");
|
578 |
599 |
}else{
|
... | ... | |
603 |
624 |
return newPerson;
|
604 |
625 |
}else{
|
605 |
626 |
if(existingPersons.size()>1){
|
606 |
|
existingPersons = findBestMatchingPerson(state, line, existingPersons, authorsStr);
|
|
627 |
existingPersons = findBestMatchingPerson(existingPersons, authorsStr);
|
607 |
628 |
if(existingPersons.size()>1){
|
608 |
|
existingPersons = findBestMatchingPerson(state, line, existingPersons, authorsStr);
|
|
629 |
existingPersons = findBestMatchingPerson(existingPersons, authorsStr);
|
609 |
630 |
logger.warn(line+"More than 1 person with same matching found for '" + authorsStr + "'. Use arbitrary one.");
|
610 |
631 |
}else{
|
611 |
632 |
logger.debug(line+"Found exactly 1 person with same matching for " +authorsStr);
|
... | ... | |
620 |
641 |
}
|
621 |
642 |
}
|
622 |
643 |
|
623 |
|
private List<TeamOrPersonBase<?>> findBestMatchingAuthor(SimpleExcelTaxonImportState<CONFIG> state, String line,
|
624 |
|
List<TeamOrPersonBase<?>> existingAuthors, String authorsStr) {
|
|
644 |
private List<TeamOrPersonBase<?>> findBestMatchingAuthor(List<TeamOrPersonBase<?>> existingAuthors,
|
|
645 |
String authorsStr) {
|
625 |
646 |
Set<TeamOrPersonBase<?>> noTitleCandidates = new HashSet<>();
|
626 |
647 |
Set<TeamOrPersonBase<?>> sameTitleCandidates = new HashSet<>();
|
627 |
648 |
for(TeamOrPersonBase<?> author : existingAuthors){
|
... | ... | |
638 |
659 |
}
|
639 |
660 |
}
|
640 |
661 |
|
641 |
|
private List<Person> findBestMatchingPerson(SimpleExcelTaxonImportState<CONFIG> state, String line,
|
|
662 |
private List<Person> findBestMatchingPerson(
|
642 |
663 |
List<Person> existingPersons, String authorsStr) {
|
|
664 |
|
643 |
665 |
Set<Person> noTitleCandidates = new HashSet<>();
|
644 |
666 |
Set<Person> sameTitleCandidates = new HashSet<>();
|
645 |
667 |
for(Person person : existingPersons){
|
fix #8612 final changes to E+MIpniImport