Project

General

Profile

« Previous | Next » 

Revision 5cdaf78e

Added by Andreas Müller about 8 years ago

Latest cuba import changes

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/app/cuba/CubaActivator.java
39 39
	//database validation status (create, update, validate ...)
40 40
	static DbSchemaValidation hbm2dll = DbSchemaValidation.CREATE;
41 41

  
42
//    static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
42
    static final ICdmDataSource cdmDestination = CdmDestinations.localH2();
43 43
//  static final ICdmDataSource cdmDestination = CdmDestinations.cdm_test_local_mysql_test();
44
    static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cuba_production();
45

  
46
	boolean invers = false;
47

  
48
	boolean include = !invers;
49
	boolean exists = true;
50

  
51
    boolean doAsteraceae = include && false;
52
    boolean doConvolvulaceae = include && exists;   //x
53
    boolean doCyperaceae = include && exists;   //x
54
    boolean doDicotA_C = include && false;
55
    boolean doDicotD_M = include && false;
56
    boolean doDicotN_Z = include && false;
57
    boolean doEuphorbiaceae = include && false;
58
    boolean doFabaceae = include && exists;
59
    boolean doGymnospermae = include && false;
60
    boolean doLamVerbenaceae = include && false;
61
    boolean doMalpighiaceae = include && exists;
62
    boolean doMelastomataceae = include && exists;
63
    boolean doMonocots = include && exists;
64
    boolean doMyrtaceae = include && exists;
65
    boolean doOrchidaceae = include && false;
66
    boolean doRubiaceae = include && exists;
67
    boolean doUrticaceae = include && exists;
44
//    static final ICdmDataSource cdmDestination = CdmDestinations.cdm_cuba_production();
45

  
46
	static boolean invers = false;
47

  
48
    boolean doAsteraceae = include;
49
    boolean doConvolvulaceae = include;
50
    boolean doCyperaceae = include;
51
    boolean doDicotA_C = include;
52
    boolean doDicotD_M = include;
53
    boolean doDicotN_Z = include;
54
    boolean doEuphorbiaceae = include;
55
    boolean doFabaceae = include;
56
    boolean doGymnospermae = include;
57
    boolean doLamVerbenaceae = include;
58
    boolean doMalpighiaceae = include;
59
    boolean doMelastomataceae = include;
60
    boolean doMonocots = include;
61
    boolean doMyrtaceae = include;
62
    boolean doOrchidaceae = include;
63
    boolean doRubiaceae = include;
64
    boolean doUrticaceae = include;
65

  
66
    static boolean include = !invers;
68 67

  
69 68

  
70 69

  
app-import/src/main/java/eu/etaxonomy/cdm/io/cuba/CubaExcelImport.java
61 61
    private static final long serialVersionUID = -747486709409732371L;
62 62
    private static final Logger logger = Logger.getLogger(CubaExcelImport.class);
63 63

  
64
    private static final String HOMONYM_MARKER = ".*\\s+homon.?$";
64
    private static final String HOMONYM_MARKER = "\\s+homon.?$";
65 65
    private static final String DOUBTFUL_MARKER = "^\\?\\s?";
66 66

  
67 67

  
......
302 302
    private static final String heterotypicRegExStr_TEST = "([^\\(]{5,}" +"(\\(.+\\))?" + "[^\\)\\(]{2,})"
303 303
            +"(\\((.{6,})\\))?";
304 304
    private static final String auctRegExStr = "auct\\."
305
            +"((\\sFC(\\-S)?(\\s&\\sA&S)?)|(\\sA&S))?(\\s+p\\.\\s*p\\.)?";
306
    private static final String missapliedRegExStr = "“(.*{5,})”\\s+(" + auctRegExStr + "|sensu\\s+.{2,})";
305
            +"((\\sFC(\\-S)?(\\s&\\sA&S)?)|(\\sA&S)|\\sSagra|\\sBritton|\\sGriseb\\.|\\sWright|\\sFRC|\\sCoL|\\sUrb\\.)?(\\s+p\\.\\s*p\\.)?";
306
    private static final String missapliedRegExStr = "(\\?\\s)?“(.*{5,})”\\s+(" + auctRegExStr + "|sensu\\s+.{2,})";
307 307
    private static final String nomInvalRegExStr = "“(.*{5,})”\\s+nom\\.\\s+inval\\.";
308 308
    private static final String homonymRegExStr = "\\s*(\\[.*\\])*\\s*";
309 309

  
......
340 340

  
341 341
        List<BotanicalName> homonyms = new ArrayList<>();
342 342
        if (missapliedMatcher.matches()){
343
            String firstPart = missapliedMatcher.group(1);
343
            boolean doubtful = missapliedMatcher.group(1) != null;
344
            String firstPart = missapliedMatcher.group(2);
344 345
            BotanicalName name = (BotanicalName)nameParser.parseSimpleName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
345 346

  
346
            String secondPart = missapliedMatcher.group(2);
347
            String secondPart = missapliedMatcher.group(3);
347 348
            Taxon misappliedNameTaxon = Taxon.NewInstance(name, null);
349
            misappliedNameTaxon.setDoubtful(doubtful);
348 350
            if (secondPart.startsWith("sensu")){
349 351
                secondPart = secondPart.substring(5).trim();
350 352
                if (secondPart.contains(" ")){
351
                    logger.warn(line + "Second part contains more than 1 word. Check if this is correct: " + secondPart);
353
                    logger.warn(line + "CHECK: Second part contains more than 1 word. Check if this is correct: " + secondPart);
352 354
                }
353 355
                Reference<?> sensu = ReferenceFactory.newGeneric();
354 356
                Team team = Team.NewTitledInstance(secondPart, null);
......
378 380
            String secondPart = heterotypicMatcher.groupCount() < 3 ? null : heterotypicMatcher.group(3);
379 381
            String homonymPart = heterotypicMatcher.groupCount() < 4 ? null : heterotypicMatcher.group(4);
380 382
            boolean isDoubtful = firstPart.matches("^\\?\\s*.*");
381
            boolean isHomonym = firstPart.trim().matches(HOMONYM_MARKER);
383
            boolean isHomonym = firstPart.trim().matches(".*" + HOMONYM_MARKER);
382 384
            firstPart = normalizeStatus(firstPart);
383 385
            BotanicalName synName = (BotanicalName)nameParser.parseReferencedName(firstPart, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
384 386
            if (synName.isProtectedTitleCache()){
......
423 425
        BotanicalName currentBasionym = homotypicName;
424 426
        String[] splits = homotypicStr.split("\\s*,\\s*");
425 427
        for (String split : splits){
426
            boolean isHomonym = split.trim().matches(HOMONYM_MARKER);
428
            boolean isHomonym = split.trim().matches(".*" + HOMONYM_MARKER);
427 429
            String singleName = normalizeStatus(split);
428 430
            BotanicalName newName = (BotanicalName)nameParser.parseReferencedName(singleName, state.getConfig().getNomenclaturalCode(), Rank.SPECIES());
429 431
            if (newName.isProtectedTitleCache()){
......
492 494
            basionymName = name2;
493 495
            newCombination = currentBasionym;
494 496
        }
497
//        newCombination.getHomotypicalGroup().removeGroupBasionym(xxx);
495 498
        if (matchAuthor(basionymName.getCombinationAuthorship(), newCombination.getBasionymAuthorship())){
496 499
            newCombination.getHomotypicalGroup().setGroupBasionym(basionymName);
497 500
        }
......
591 594

  
592 595

  
593 596
    private static final String[] nomStatusStrings = new String[]{"nom. cons.", "ined.", "nom. illeg.",
594
            "nom. rej.","nom. cons. prop.","nom. altern."};
597
            "nom. rej.","nom. cons. prop.","nom. altern.","nom. confus.","nom. dub."};
595 598
    /**
596 599
     * @param taxonStr
597 600
     * @return

Also available in: Unified diff