Project

General

Profile

« Previous | Next » 

Revision 910fd9d6

Added by Andreas Müller over 4 years ago

fix #8612 final changes to E+MIpniImport

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/app/common/CdmDestinations.java
154 154
        return CdmDestinations.makeDestination(dbType, cdmServer, cdmDB, port, cdmUserName, null);
155 155
    }
156 156

  
157
    public static ICdmDataSource cdm_local_mysql_tmpTest(){
158
        DatabaseTypeEnum dbType = DatabaseTypeEnum.MySQL;
159
        String cdmServer = "127.0.0.1";
160
        String cdmDB = "tmpTest";
161
        String cdmUserName = "edit";
162
        int port = 3306;
163
        return CdmDestinations.makeDestination(dbType, cdmServer, cdmDB, port, cdmUserName, null);
164
    }
165

  
157 166
	public static ICdmDataSource cdm_local_euromed(){
158 167
		DatabaseTypeEnum dbType = DatabaseTypeEnum.MySQL;
159 168
		String cdmServer = "127.0.0.1";
app-import/src/main/java/eu/etaxonomy/cdm/app/euromed/EuroMedIpniImportActivator.java
37 37
    //database validation status (create, update, validate ...)
38 38
    static DbSchemaValidation dbSchemaValidation = DbSchemaValidation.VALIDATE;
39 39

  
40
//   static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
41 40
    static final ICdmDataSource cdmDestination = CdmDestinations.cdm_local_mysql_pesi_euromed();
42
//   static final ICdmDataSource cdmDestination = CdmDestinations.cdm_local_euromed2();
41
//   static final ICdmDataSource cdmDestination = CdmDestinations.cdm_local_mysql_tmpTest();
43 42
//   static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_euroMed();
44 43
//   static final ICdmDataSource cdmDestination = CdmDestinations.cdm_production_euromed();
45 44

  
46
    private static boolean addAuthorsToReferences = true;
47

  
48 45
    //check - import
49 46
    static CHECK check = CHECK.IMPORT_WITHOUT_CHECK;
50 47

  
app-import/src/main/java/eu/etaxonomy/cdm/io/euromed/IpniImport.java
22 22
import org.springframework.stereotype.Component;
23 23
import org.springframework.transaction.TransactionStatus;
24 24

  
25
import eu.etaxonomy.cdm.api.service.pager.Pager;
25 26
import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
26
import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
27 27
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImport;
28 28
import eu.etaxonomy.cdm.io.mexico.SimpleExcelTaxonImportState;
29 29
import eu.etaxonomy.cdm.model.agent.Person;
......
86 86

  
87 87
    private Map<String,NamedArea> areaMap;
88 88

  
89
    private ImportDeduplicationHelper<SimpleExcelTaxonImportState<?>> deduplicationHelper;
90 89
    private NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
91 90

  
92 91

  
......
123 122

  
124 123
        Rank rank = getRank(state);
125 124
        TaxonName taxonName = makeName(state, line, rank);
126
        if (0 < getNameService().countByTitle(TaxonName.class, taxonName.getTitleCache(), MatchMode.EXACT, null)){
127
            logger.warn(line + "Possbile name duplicate: " + taxonName.getTitleCache());
125
        getNameService().saveOrUpdate(taxonName);
126
        if (1 < getNameService().countByTitle(TaxonName.class, taxonName.getTitleCache(), MatchMode.EXACT, null)){
127
            Pager<TaxonName> candidates = getNameService().findByTitle(TaxonName.class, taxonName.getTitleCache(), MatchMode.EXACT, null, null, null, null, null);
128
            boolean fullMatchExists = false;
129
            for (TaxonName candidate : candidates.getRecords()){
130
                if (candidate.getId() != taxonName.getId() && candidate.getFullTitleCache().equals(taxonName.getFullTitleCache())){
131
                    logger.warn(line + "Possbile referenced name duplicate: " + taxonName.getFullTitleCache());
132
                    fullMatchExists = true;
133
                }
134
            }
135
            if (!fullMatchExists){
136
                logger.warn(line + "Possbile name duplicate: " + taxonName.getTitleCache());
137
            }
128 138
        }
129
//        getNameService().findByTitle(TaxonName.class, taxonName.getTitleCache(), MatchMode.EXACT, null, null, null, null, null);
130 139
        TaxonNode parent = getParent(state, line, genusNode, taxonName, rank);
131 140
        Reference sec = getSec(parent);
132 141
        Taxon taxon = Taxon.NewInstance(taxonName, sec);
......
175 184
        Map<String, String> record = state.getOriginalRecord();
176 185
        String allAreaStr = getValue(record, EM_GEO);
177 186
        if(isBlank(allAreaStr)){
178
            logger.warn(line+"No distribution data exists.");
187
            logger.warn(line+"No distribution data exists: " + taxon.getName().getTitleCache());
179 188
        }else{
180 189
            String[] areaSplit = allAreaStr.split(",");
181 190
            for (String areaStr: areaSplit){
......
255 264
        if (!nameCache.equals(name.getNameCache())){
256 265
            logger.warn(line + "Namecache not equal: " + nameCache +" <-> " + name.getNameCache());
257 266
        }
258
        TeamOrPersonBase<?> authors = getAuthors(state, line);
267
        TeamOrPersonBase<?>[] authors = getAuthors(state, line);
259 268
        //all authors are combination authors, no basionym authors exist, according to ERS 2019-10-24
260
        name.setCombinationAuthorship(authors);
261
        Reference ref = getReference(state, line, authors);
269
        name.setCombinationAuthorship(authors[0]);
270
        name.setExCombinationAuthorship(authors[1]);
271

  
272
        Reference ref = getReference(state, line, authors[0]);
262 273
        name.setNomenclaturalReference(ref);
263 274
        String[] collSplit = getCollationSplit(state, line);
264 275
        name.setNomenclaturalMicroReference(collSplit[1]);
265
        makeNameRemarks(state, line, name);
276
        makeNameRemarks(state, name);
266 277

  
267 278
        addImportSource(state, name);
268 279
        return name;
269 280
    }
270 281

  
271 282
    @SuppressWarnings("deprecation")
272
    private void makeNameRemarks(SimpleExcelTaxonImportState<CONFIG> state, String line, TaxonName name) {
283
    private void makeNameRemarks(SimpleExcelTaxonImportState<CONFIG> state, TaxonName name) {
273 284
        Map<String, String> record = state.getOriginalRecord();
274 285
        String remarksStr = getValue(record, REFERENCE_REMARKS);
275 286
        if (isBlank(remarksStr) || remarksStr.equals("[epublished]")||remarksStr.equals("(epublished)")){
......
318 329
            example.setAuthorship(authors);
319 330
            String[] collSplit = getCollationSplit(state, line);
320 331
            example.setVolume(collSplit[0]);
321
            example.setDatePublished(getYear(state, line));
332
            example.setDatePublished(getYear(state));
322 333
            Reference journal = getExistingJournal(state, line);
323 334
            example.setInJournal(journal);
324 335
            result = getExistingArticle(state, line, example);
325 336
            if(result != example){
326 337
                logger.debug(line+ "article existed");
327 338
            }else{
328
                makeReferenceRemarks(state, line, example);
339
                makeReferenceRemarks(state, example);
329 340
            }
330 341
        }else if ("BS".equals(pTypeStr)){
331 342
            IBookSection example = ReferenceFactory.newBookSection();
332 343
            String publicationStr = getValue(record, PUBLICATION);
333 344
            String authorsForFlIber = getValue(record, AUTHORS);
334
            TeamOrPersonBase<?> bookAuthor = getBookSectionBookAuthors(state, line, publicationStr, authorsForFlIber);
345
            TeamOrPersonBase<?> bookAuthor = getBookSectionBookAuthors(line, publicationStr, authorsForFlIber);
335 346
            if (bookAuthor == null){
336 347
                logger.warn(line + "No author found for booksection of " + publicationStr);
337 348
            }
......
342 353
            if (result != example){
343 354
                logger.debug(line+ "book section existed");
344 355
            }else{
345
                makeReferenceRemarks(state, line, example);
356
                makeReferenceRemarks(state, example);
346 357
            }
347
            //TODO after import BookSection authors need to be checked for correct in-authors
348 358
        }else if ("BO".equals(pTypeStr)){
349 359
            result = getExistingBook(state, line, authors);
350 360
        }else{
......
354 364
        return result;
355 365
    }
356 366

  
357
    private TeamOrPersonBase<?> getBookSectionBookAuthors(SimpleExcelTaxonImportState<CONFIG> state, String line,
367
    private TeamOrPersonBase<?> getBookSectionBookAuthors(String line,
358 368
            String publicationStr, String authorsForFlIber) {
359 369
        if ("Fl. Gr. Brit. Ireland".equals(publicationStr)){
360 370
            return CdmBase.deproxy(getAgentService().find(UUID.fromString("009cda5a-f6a7-41bf-a323-dc72f83e6066")),Team.class);
......
398 408
        return null;
399 409
    }
400 410

  
401
    private void makeReferenceRemarks(SimpleExcelTaxonImportState<CONFIG> state, String line, IReference ref) {
411
    private void makeReferenceRemarks(SimpleExcelTaxonImportState<CONFIG> state, IReference ref) {
402 412
        Map<String, String> record = state.getOriginalRecord();
403 413
        String remarksStr = getValue(record, REFERENCE_REMARKS);
404 414
        if (isBlank(remarksStr)){
......
470 480
        example.setAbbrevTitle(publicationStr);
471 481
        String[] collSplit = getCollationSplit(state, line);
472 482
        example.setVolume(collSplit[0]);
473
        example.setDatePublished(getYear(state, line));
483
        example.setDatePublished(getYear(state));
474 484
        example.setAuthorship(author);
475 485

  
476 486
        Set<String> includeProperties = new HashSet<>();
......
483 493
        if (result != example){
484 494
            logger.debug("book existed");
485 495
        }else{
486
            makeReferenceRemarks(state, line, example);
496
            makeReferenceRemarks(state, example);
487 497
        }
488 498
        return result;
489 499
    }
......
499 509
                addImportSource(state, example);
500 510
                return example;
501 511
            }else{
502
                existingRefs = findBestMatchingRef(state, line, existingRefs, publicationStr);
512
                existingRefs = findBestMatchingRef(existingRefs, publicationStr);
503 513
                if(existingRefs.size()>1){
504 514
                    logger.warn(line+"More than 1 reference found for " + publicationStr + ". Use arbitrary one.");
505 515
                }
......
510 520
        }
511 521
    }
512 522

  
513
    private List<Reference> findBestMatchingRef(SimpleExcelTaxonImportState<CONFIG> state, String line,
514
            List<Reference> existingRefs, String publicationStr) {
523
    private List<Reference> findBestMatchingRef(List<Reference> existingRefs, String publicationStr) {
515 524
        Set<Reference> noTitleCandidates = new HashSet<>();
516 525
        Set<Reference> sameTitleCandidates = new HashSet<>();
517 526
        for(Reference ref : existingRefs){
......
532 541
        }
533 542
    }
534 543

  
535
    private VerbatimTimePeriod getYear(SimpleExcelTaxonImportState<CONFIG> state, String line) {
544
    private VerbatimTimePeriod getYear(SimpleExcelTaxonImportState<CONFIG> state) {
536 545
        Map<String, String> record = state.getOriginalRecord();
537 546
        String yearStr = getValue(record, YEAR);
538 547
        VerbatimTimePeriod result = TimePeriodParser.parseStringVerbatim(yearStr);
539 548
        return result;
540 549
    }
541 550

  
542
    private Map<String,TeamOrPersonBase> authorMap = new HashMap<>();
543
    private TeamOrPersonBase<?> getAuthors(SimpleExcelTaxonImportState<CONFIG> state, String line) {
551
    private TeamOrPersonBase<?>[] getAuthors(SimpleExcelTaxonImportState<CONFIG> state, String line) {
544 552
        Map<String, String> record = state.getOriginalRecord();
545 553
        String authorsStr = getValue(record, AUTHORS);
554
        String[] split = authorsStr.split(" ex ");
555
        TeamOrPersonBase<?>[] result = new TeamOrPersonBase<?>[2];
556
        if (split.length == 1){
557
            result[0] = getAuthor(state, line, split[0]);
558
        }else{
559
            result[0] = getAuthor(state, line, split[1]);
560
            result[1] = getAuthor(state, line, split[0]);
561
        }
562
        return result;
563
    }
564

  
565
    private Map<String,TeamOrPersonBase<?>> authorMap = new HashMap<>();
566
    private TeamOrPersonBase<?> getAuthor(SimpleExcelTaxonImportState<CONFIG> state, String line, String authorsStr) {
546 567
        if (authorMap.get(authorsStr)!= null){
547 568
            return authorMap.get(authorsStr);
548 569
        }else{
......
572 593
                return example;
573 594
            }else{
574 595
                if(existingAuthors.size()>1){
575
                    existingAuthors = findBestMatchingAuthor(state, line, existingAuthors, authorsStr);
596
                    existingAuthors = findBestMatchingAuthor(existingAuthors, authorsStr);
576 597
                    if(existingAuthors.size()>1){
577 598
                        logger.warn(line+"More than 1 author with same matching found for '" + authorsStr + "'. Use arbitrary one.");
578 599
                    }else{
......
603 624
                return newPerson;
604 625
            }else{
605 626
                if(existingPersons.size()>1){
606
                    existingPersons = findBestMatchingPerson(state, line, existingPersons, authorsStr);
627
                    existingPersons = findBestMatchingPerson(existingPersons, authorsStr);
607 628
                    if(existingPersons.size()>1){
608
                        existingPersons = findBestMatchingPerson(state, line, existingPersons, authorsStr);
629
                        existingPersons = findBestMatchingPerson(existingPersons, authorsStr);
609 630
                        logger.warn(line+"More than 1 person with same matching found for '" + authorsStr + "'. Use arbitrary one.");
610 631
                    }else{
611 632
                        logger.debug(line+"Found exactly 1 person with same matching for " +authorsStr);
......
620 641
        }
621 642
    }
622 643

  
623
    private List<TeamOrPersonBase<?>> findBestMatchingAuthor(SimpleExcelTaxonImportState<CONFIG> state, String line,
624
            List<TeamOrPersonBase<?>> existingAuthors, String authorsStr) {
644
    private List<TeamOrPersonBase<?>> findBestMatchingAuthor(List<TeamOrPersonBase<?>> existingAuthors,
645
            String authorsStr) {
625 646
        Set<TeamOrPersonBase<?>> noTitleCandidates = new HashSet<>();
626 647
        Set<TeamOrPersonBase<?>> sameTitleCandidates = new HashSet<>();
627 648
        for(TeamOrPersonBase<?> author : existingAuthors){
......
638 659
        }
639 660
    }
640 661

  
641
    private List<Person> findBestMatchingPerson(SimpleExcelTaxonImportState<CONFIG> state, String line,
662
    private List<Person> findBestMatchingPerson(
642 663
            List<Person> existingPersons, String authorsStr) {
664

  
643 665
        Set<Person> noTitleCandidates = new HashSet<>();
644 666
        Set<Person> sameTitleCandidates = new HashSet<>();
645 667
        for(Person person : existingPersons){

Also available in: Unified diff