Project

General

Profile

« Previous | Next » 

Revision 13ed824e

Added by Andreas Kohlbecker over 7 years ago

ref #6026 more parser improvements and tests

View differences:

app-import/src/main/java/eu/etaxonomy/cdm/io/iapt/IAPTExcelImport.java
89 89
            Pattern.compile("^(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // April 99 or April, 1999 or Apr. 12
90 90
            Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(\\s?)(?<month>[0-1]?[0-9])\\2\\3(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12.04.1969 or 12. 04. 1969 or 12/04/1969 or 12-04-1969
91 91
            Pattern.compile("^(?<day>[0-9]{1,2})([\\.\\-/])(?<monthName>[IVX]{1,2})\\2(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12-VI-1969
92
            Pattern.compile("^(?:(?<day>[0-9]{1,2})(?:\\sde)\\s)?(?<monthName>\\p{L}+)\\sde\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full and partial date like 12 de Enero de 1999 or Enero de 1999
92
            Pattern.compile("^(?:(?<day>[0-9]{1,2})(?:\\sde)?\\s)?(?<monthName>\\p{L}+)(?:\\sde)?\\s(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full and partial date like 12 de Enero de 1999 or Enero de 1999
93 93
            Pattern.compile("^(?<month>[0-1]?[0-9])([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like 04.1969 or 04/1969 or 04-1969
94 94
            Pattern.compile("^(?<year>(?:1[7,8,9])?[0-9]{2})([\\.\\-/])(?<month>[0-1]?[0-9])$"),//  partial date like 1999-04
95 95
            Pattern.compile("^(?<monthName>[IVX]{1,2})([\\.\\-/])(?<year>(?:1[7,8,9])?[0-9]{2})$"), // partial date like VI-1969
96 96
            Pattern.compile("^(?<day>[0-9]{1,2})(?:[\\./]|th|rd|st)?\\s(?<monthName>\\p{L}+\\.?),?\\s?(?<year>(?:1[7,8,9])?[0-9]{2})$"), // full date like 12. April 1969 or april 1999 or 22 Dec.1999
97 97
        };
98
    private static final Pattern typeSpecimenSplitPattern =  Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype.*?[:\\(](?<isotype>.*)\\.?)?\\.?$");
98
    protected static final Pattern typeSpecimenSplitPattern =  Pattern.compile("^(?:\"*[Tt]ype: (?<fieldUnit>.*?))(?:[Hh]olotype:(?<holotype>.*?)\\.?)?(?:[Ii]sotype[^:]*:(?<isotype>.*)\\.?)?\\.?$");
99 99

  
100 100
    private static final Pattern typeNameBasionymPattern =  Pattern.compile("\\([Bb]asionym\\s?\\:\\s?(?<basionymName>[^\\)]*).*$");
101 101
    private static final Pattern typeNameNotePattern =  Pattern.compile("\\[([^\\[]*)"); // matches the inner of '[...]'
102 102
    private static final Pattern typeNameSpecialSplitPattern =  Pattern.compile("(?<note>.*\\;.*?)\\:(?<agent>)\\;(<name>.*)");
103 103

  
104
    private static final Pattern collectorPattern =  Pattern.compile(".*?(?<fullStr1>\\(leg\\.\\s+(?<data1>[^\\)]*)\\))|.*?(?<fullStr2>\\sleg\\.\\s+(?<data2>.*?)\\.?)$");
104
    protected static final Pattern collectorPattern =  Pattern.compile(".*?(?<fullStr1>\\([Ll]eg\\.\\s+(?<data1>[^\\)]*)\\)).*$|.*?(?<fullStr2>\\s[Ll]eg\\.\\:?\\s+(?<data2>.*?)\\.?)$|^(?<fullStr3>[Ll]eg\\.\\:?\\s+(?<data3>.*?)\\.?)");
105 105
    private static final Pattern collectionDataPattern =  Pattern.compile("^(?<collector>[^,]*),\\s?(?<detail>.*?)\\.?$");
106 106
    private static final Pattern collectorsNumber =  Pattern.compile("^([nN]o\\.\\s.*)$");
107 107

  
......
479 479
     * @param state
480 480
     * @return null if the fieldUnitStr could not be parsed
481 481
     */
482
    private FieldUnit parseFieldUnit(String fieldUnitStr, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
482
    protected FieldUnit parseFieldUnit(String fieldUnitStr, String regNumber, SimpleExcelTaxonImportState<CONFIG> state) {
483 483

  
484 484
        FieldUnit fieldUnit = null;
485 485

  
486 486
        Matcher m1 = collectorPattern.matcher(fieldUnitStr);
487 487
        if(m1.matches()){
488 488

  
489
            String collectorData = m1.group(2); // like (leg. Metzeltin, 30. 9. 1996)
489
            String collectorData = m1.group(2); // like ... (leg. Metzeltin, 30. 9. 1996)
490 490
            String removal = m1.group(1);
491 491
            if(collectorData == null){
492
                collectorData = m1.group(4); // like leg. Metzeltin, 30. 9. 1996
492
                collectorData = m1.group(4); // like ... leg. Metzeltin, 30. 9. 1996
493 493
                removal = m1.group(3);
494 494
            }
495
            if(collectorData == null){
496
                collectorData = m1.group(6); // like ^leg. J. J. Halda 18.3.1997$
497
                removal = null;
498
            }
495 499
            if(collectorData == null){
496 500
                return null;
497 501
            }
498 502

  
499 503
            // the fieldUnitStr is parsable
500 504
            // remove all collectorData from the fieldUnitStr and use the rest as locality
501
            String locality = fieldUnitStr.replace(removal, "");
505
            String locality = null;
506
            if(removal != null){
507
                locality = fieldUnitStr.replace(removal, "");
508
            }
502 509

  
503 510
            String collectorStr = null;
504 511
            String detailStr = null;
......
534 541

  
535 542
            fieldUnit = FieldUnit.NewInstance();
536 543
            GatheringEvent ge = GatheringEvent.NewInstance();
537
            ge.setLocality(LanguageString.NewInstance(locality, Language.UNKNOWN_LANGUAGE()));
544
            if(locality != null){
545
                ge.setLocality(LanguageString.NewInstance(locality, Language.UNKNOWN_LANGUAGE()));
546
            }
538 547

  
539 548
            TeamOrPersonBase agent =  state.getAgentBase(collectorStr);
540 549
            if(agent == null) {

Also available in: Unified diff