Project

General

Profile

« Previous | Next » 

Revision a96a9d32

Added by Andreas Müller over 7 years ago

ref #5983 and ref #6410 parsing of hybrid formulas with missing name parts

View differences:

cdmlib-commons/src/main/java/eu/etaxonomy/cdm/common/CdmUtils.java
240 240
     * @param seperator
241 241
     * @return String
242 242
     */
243
    static public String concat(CharSequence separator, String[] strings){
243
    static public String concat(CharSequence separator, String... strings){
244 244
        String result = "";
245 245
        boolean allNull = true;
246 246
        for (String string : strings){
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImpl.java
909 909
			}
910 910
		     //hybrid formula
911 911
			 else if (hybridFormulaPattern.matcher(fullNameString).matches()){
912
				 Set<HybridRelationship> existingRelations = new HashSet<HybridRelationship>();
913
				 Set<HybridRelationship> notToBeDeleted = new HashSet<HybridRelationship>();
912
				 Set<HybridRelationship> existingRelations = new HashSet<>();
913
				 Set<HybridRelationship> notToBeDeleted = new HashSet<>();
914 914

  
915 915
				 for ( HybridRelationship rel : nameToBeFilled.getHybridChildRelations()){
916 916
				     existingRelations.add(rel);
......
928 928
						 secondNameString += " " + str;
929 929
					 }
930 930
				 }
931
				 firstNameString = firstNameString.trim();
932
				 secondNameString = secondNameString.trim();
931 933
				 nameToBeFilled.setHybridFormula(true);
932 934
				 NomenclaturalCode code = nameToBeFilled.getNomenclaturalCode();
933
				 INonViralName firstName = this.parseFullName(firstNameString.trim(), code, rank);
934
				 INonViralName secondName = this.parseFullName(secondNameString.trim(), code, rank);
935
				 INonViralName firstName = this.parseFullName(firstNameString, code, rank);
936
				 if (secondNameString.matches(abbrevHybridSecondPart)){
937
				     secondNameString = extendSecondHybridPart(firstName, secondNameString);
938
				 }
939
				 INonViralName secondName = this.parseFullName(secondNameString, code, rank);
935 940
				 HybridRelationship firstRel = nameToBeFilled.addHybridParent(firstName, HybridRelationshipType.FIRST_PARENT(), null);
936 941
				 HybridRelationship second = nameToBeFilled.addHybridParent(secondName, HybridRelationshipType.SECOND_PARENT(), null);
937 942
				 checkRelationExist(firstRel, existingRelations, notToBeDeleted);
......
941 946
				 Rank firstRank = firstName.getRank();
942 947
				 Rank secondRank = secondName.getRank();
943 948

  
944
				 if (firstRank == null || firstRank.isHigher(secondRank)){
949
				 if (firstRank == null || (secondRank != null && firstRank.isHigher(secondRank))){
945 950
					 newRank = secondRank;
946 951
				 }else{
947 952
					 newRank = firstRank;
......
996 1001
	}
997 1002

  
998 1003
	/**
1004
     * @param firstName
1005
     * @param secondNameString
1006
     * @return
1007
     */
1008
    private String extendSecondHybridPart(INonViralName firstName, String secondNameString) {
1009
        //first letter of genus given
1010
        if (secondNameString.matches("^" + abbrevHybridGenus + ".*")){
1011
            if (StringUtils.isNotBlank(firstName.getGenusOrUninomial())){
1012
                if (secondNameString.substring(0,1).equals(firstName.getGenusOrUninomial().substring(0, 1))){
1013
                    secondNameString = secondNameString.replaceAll("^" + abbrevHybridGenus, firstName.getGenusOrUninomial() + " ");
1014
                }
1015
            }
1016
        }else if (secondNameString.matches(abbrevHybridSecondPartOnlyInfraSpecies)){
1017
            secondNameString = CdmUtils.concat(" " , firstName.getGenusOrUninomial(), firstName.getSpecificEpithet(), secondNameString);
1018
        }else if (true){  //there will be further alternatives in future maybe
1019
            secondNameString = CdmUtils.concat(" " , firstName.getGenusOrUninomial(), secondNameString);
1020
        }
1021
        return secondNameString;
1022
    }
1023

  
1024
    /**
999 1025
     * Checks if a hybrid relation exists in the Set of existing relations
1000 1026
     * and <BR>
1001 1027
     *  if it does not adds it to relations not to be deleted <BR>
cdmlib-model/src/main/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImplRegExBase.java
87 87
    //marker
88 88
    protected static String InfraGenusMarker = "(n|notho)?(subgen\\.|subg\\.|sect\\.|subsect\\.|ser\\.|subser\\.|t\\.infgen\\.|\\[unranked\\])";
89 89
    protected static String aggrOrGroupMarker = "(aggr\\.|agg\\.|group)";
90
    protected static String infraSpeciesMarker = "(n|notho)?(subsp\\.|convar\\.|var\\.|subvar\\.|f\\.|subf\\.|f\\.\\ssp\\.|f\\.spec\\.|f\\.sp\\.|\\[unranked\\]|tax\\." + fWs + "infrasp\\.)";
90
    protected static String infraSpeciesMarkerNoNotho = "(subsp\\.|convar\\.|var\\.|subvar\\.|f\\.|subf\\.|f\\.\\ssp\\.|f\\.spec\\.|f\\.sp\\.|\\[unranked\\]|tax\\." + fWs + "infrasp\\.)";
91
    protected static String infraSpeciesMarker = "(n|notho)?" + infraSpeciesMarkerNoNotho;
91 92
    protected static String oldInfraSpeciesMarker = "(prol\\.|proles|race|taxon|sublusus)";
92 93

  
93 94

  
......
287 288
    protected static String anyBotanicFullName = "(" + autonym2 + "|" + anyBotanicName + oWs + fullBotanicAuthorString + ")"  ;
288 289
    protected static String anyZooFullName = anyZooName + oWs + fullZooAuthorString ;
289 290
    protected static String anyFullName = "(" + anyBotanicFullName + "|" + anyZooFullName + ")";
290
    protected static String hybridFullName = "(" + anyFullName  + "|" +  anyBotanicName + "|" + anyZooName + ")" + hybridFormularSeparator + "(" + anyFullName  + "|" +  anyBotanicName + "|" + anyZooName + ")";
291
    protected static String abbrevHybridGenus = "([A-Z](\\.\\s*|\\s+))";
292
    protected static String abbrevHybridSecondPartWithSpecies = abbrevHybridGenus + "?" + nonCapitalEpiWord + "(" + oWs + infraSpeciesMarkerNoNotho + oWs + nonCapitalEpiWord + ")?";  //#5983 first step but still to strict
293
    protected static String abbrevHybridSecondPartOnlyInfraSpecies = infraSpeciesMarkerNoNotho + oWs + nonCapitalEpiWord;
294
    protected static String abbrevHybridSecondPart = "(" + abbrevHybridSecondPartWithSpecies + "|" + abbrevHybridSecondPartOnlyInfraSpecies + ")";
295

  
296
    protected static String hybridSecondPart = "(" + anyFullName  + "|" +  anyBotanicName + "|" + anyZooName + "|" + abbrevHybridSecondPart + ")";
297
    protected static String hybridFullName = "(" + anyFullName  + "|" +  anyBotanicName + "|" + anyZooName + ")" + hybridFormularSeparator + hybridSecondPart ;
291 298

  
292 299
    //Pattern
293 300
    protected static Pattern oWsPattern = Pattern.compile(oWs);
cdmlib-model/src/test/java/eu/etaxonomy/cdm/strategy/parser/NonViralNameParserImplTest.java
673 673
        assertEquals("Title cache must be correct", "Abies alba \u00D7 Pinus bus var. beta", name1.getTitleCache());
674 674
        assertEquals("Hybrid name must have the lower rank ('variety') as rank", Rank.VARIETY(), name1.getRank());
675 675

  
676
        //hybrids with authors
676
        //hybrids with authors  //happens but questionable
677 677
        name1 = parser.parseFullName("Abies alba L. \u00D7 Pinus bus Mill.", botanicCode, null);
678 678
        assertTrue("Name must have hybrid formula bit set", name1.isHybridFormula());
679 679
        assertEquals("Name must have 2 hybrid parents", 2, name1.getHybridChildRelations().size());
......
686 686
        assertEquals("Name must have Pinus bus Mill. as second hybrid parent", "Pinus bus Mill.", secondParent.getTitleCache());
687 687
        assertEquals("Hybrid name must have the lower rank ('species') as rank", Rank.SPECIES(), name1.getRank());
688 688

  
689
        //abbreviated genus hybrid formula #6410 / #5983
690
        String nameStr = "Nepenthes mirabilis \u00D7 N. alata";
691
        name1 = parser.parseFullName(nameStr, botanicCode, null);
692
        assertTrue("Name must have hybrid formula bit set", name1.isHybridFormula());
693
        assertEquals("Name must have 2 hybrid parents", 2, name1.getHybridChildRelations().size());
694
        //could also be N. or no genus at all, depends on formatter
695
        assertEquals("Title cache must be correct", "Nepenthes mirabilis \u00D7 Nepenthes alata", name1.getTitleCache());
696
        orderedRels = name1.getOrderedChildRelationships();
697
        assertEquals("Name must have 2 hybrid parents in ordered list", 2, orderedRels.size());
698
        firstParent = orderedRels.get(0).getParentName();
699
        //to be discussed as usually they should be ordered alphabetically
700
        assertEquals("Name must have Nepenthes mirabilis as first hybrid parent", "Nepenthes mirabilis", firstParent.getTitleCache());
701
        secondParent = orderedRels.get(1).getParentName();
702
        assertEquals("Name must have Nepenthes alata as second hybrid parent", "Nepenthes alata", secondParent.getTitleCache());
703
        assertEquals("Hybrid name must have the lower rank ('species') as rank", Rank.SPECIES(), name1.getRank());
704

  
705
        //missing genus hybrid formula #5983
706
        nameStr = "Nepenthes mirabilis \u00D7 alata";
707
        name1 = parser.parseFullName(nameStr, botanicCode, null);
708
        assertTrue("Name must have hybrid formula bit set", name1.isHybridFormula());
709
        assertEquals("Name must have 2 hybrid parents", 2, name1.getHybridChildRelations().size());
710
        //could also be N. or no genus at all, depends on formatter
711
        assertEquals("Title cache must be correct", "Nepenthes mirabilis \u00D7 Nepenthes alata", name1.getTitleCache());
712
        orderedRels = name1.getOrderedChildRelationships();
713
        assertEquals("Name must have 2 hybrid parents in ordered list", 2, orderedRels.size());
714
        firstParent = orderedRels.get(0).getParentName();
715
        //to be discussed as usually they should be ordered alphabetically
716
        assertEquals("Name must have Nepenthes mirabilis as first hybrid parent", "Nepenthes mirabilis", firstParent.getTitleCache());
717
        secondParent = orderedRels.get(1).getParentName();
718
        assertEquals("Name must have Nepenthes alata as second hybrid parent", "Nepenthes alata", secondParent.getTitleCache());
719
        assertEquals("Hybrid name must have the lower rank ('species') as rank", Rank.SPECIES(), name1.getRank());
720

  
721
        //#5983 subsp. with species and missing genus
722
        nameStr = "Orchis coriophora subsp. fragrans \u00D7 sancta";
723
        name1 = parser.parseFullName(nameStr, botanicCode, null);
724
        assertTrue("Name must have hybrid formula bit set", name1.isHybridFormula());
725
        assertEquals("Name must have 2 hybrid parents", 2, name1.getHybridChildRelations().size());
726
        //could also be N. or no genus at all, depends on formatter
727
        assertEquals("Title cache must be correct", "Orchis coriophora subsp. fragrans \u00D7 Orchis sancta", name1.getTitleCache());
728
        orderedRels = name1.getOrderedChildRelationships();
729
        assertEquals("Name must have 2 hybrid parents in ordered list", 2, orderedRels.size());
730
        firstParent = orderedRels.get(0).getParentName();
731
        assertEquals("Name must have Orchis coriophora subsp. fragrans as first hybrid parent", "Orchis coriophora subsp. fragrans", firstParent.getTitleCache());
732
        secondParent = orderedRels.get(1).getParentName();
733
        assertEquals("Name must have Orchis sancta as second hybrid parent", "Orchis sancta", secondParent.getTitleCache());
734
        assertEquals("Hybrid name must have the lower rank ('subspecies') as rank", Rank.SUBSPECIES(), name1.getRank());
735

  
736
        //2 subspecies with missing genus part #5983
737
        nameStr = "Orchis morio subsp. syriaca \u00D7 papilionacea subsp. schirvanica";
738
        name1 = parser.parseFullName(nameStr, botanicCode, null);
739
        assertTrue("Name must have hybrid formula bit set", name1.isHybridFormula());
740
        assertEquals("Name must have 2 hybrid parents", 2, name1.getHybridChildRelations().size());
741
        //could also be N. or no genus at all, depends on formatter
742
        assertEquals("Title cache must be correct", "Orchis morio subsp. syriaca \u00D7 Orchis papilionacea subsp. schirvanica", name1.getTitleCache());
743
        orderedRels = name1.getOrderedChildRelationships();
744
        assertEquals("Name must have 2 hybrid parents in ordered list", 2, orderedRels.size());
745
        firstParent = orderedRels.get(0).getParentName();
746
        assertEquals("Name must have Orchis morio subsp. syriaca as first hybrid parent", "Orchis morio subsp. syriaca", firstParent.getTitleCache());
747
        secondParent = orderedRels.get(1).getParentName();
748
        assertEquals("Name must have Orchis papilionacea subsp. schirvanica as second hybrid parent", "Orchis papilionacea subsp. schirvanica", secondParent.getTitleCache());
749
        assertEquals("Hybrid name must have the lower rank ('subspecies') as rank", Rank.SUBSPECIES(), name1.getRank());
750

  
751
        //subspecies and variety with missing genus part
752
        nameStr = "Orchis morio subsp. syriaca \u00D7 papilionacea var. schirvanica";
753
        name1 = parser.parseFullName(nameStr, botanicCode, null);
754
        assertTrue("Name must have hybrid formula bit set", name1.isHybridFormula());
755
        assertEquals("Name must have 2 hybrid parents", 2, name1.getHybridChildRelations().size());
756
        //could also be N. or no genus at all, depends on formatter
757
        assertEquals("Title cache must be correct", "Orchis morio subsp. syriaca \u00D7 Orchis papilionacea var. schirvanica", name1.getTitleCache());
758
        orderedRels = name1.getOrderedChildRelationships();
759
        assertEquals("Name must have 2 hybrid parents in ordered list", 2, orderedRels.size());
760
        firstParent = orderedRels.get(0).getParentName();
761
        assertEquals("Name must have Orchis morio subsp. syriaca as first hybrid parent", "Orchis morio subsp. syriaca", firstParent.getTitleCache());
762
        secondParent = orderedRels.get(1).getParentName();
763
        assertEquals("Name must have Orchis papilionacea var. schirvanica as second hybrid parent", "Orchis papilionacea var. schirvanica", secondParent.getTitleCache());
764
        assertEquals("Hybrid name must have the lower rank ('variety') as rank", Rank.VARIETY(), name1.getRank());
765

  
766
        //2 subspecies with missing genus and species part #5983
767
        nameStr = "Orchis morio subsp. syriaca \u00D7 subsp. schirvanica";
768
        name1 = parser.parseFullName(nameStr, botanicCode, null);
769
        assertTrue("Name must have hybrid formula bit set", name1.isHybridFormula());
770
        assertEquals("Name must have 2 hybrid parents", 2, name1.getHybridChildRelations().size());
771
        //could also be N. or no genus at all, depends on formatter
772
        assertEquals("Title cache must be correct", "Orchis morio subsp. syriaca \u00D7 Orchis morio subsp. schirvanica", name1.getTitleCache());
773
        orderedRels = name1.getOrderedChildRelationships();
774
        assertEquals("Name must have 2 hybrid parents in ordered list", 2, orderedRels.size());
775
        firstParent = orderedRels.get(0).getParentName();
776
        assertEquals("Name must have Orchis morio subsp. syriaca as first hybrid parent", "Orchis morio subsp. syriaca", firstParent.getTitleCache());
777
        secondParent = orderedRels.get(1).getParentName();
778
        assertEquals("Name must have Orchis morio subsp. schirvanica as second hybrid parent", "Orchis morio subsp. schirvanica", secondParent.getTitleCache());
779
        assertEquals("Hybrid name must have the lower rank ('subspecies') as rank", Rank.SUBSPECIES(), name1.getRank());
780

  
781
        //subspecies and variety with missing genus and species part #5983
782
        nameStr = "Orchis morio subsp. syriaca \u00D7 var. schirvanica";
783
        name1 = parser.parseFullName(nameStr, botanicCode, null);
784
        assertTrue("Name must have hybrid formula bit set", name1.isHybridFormula());
785
        assertEquals("Name must have 2 hybrid parents", 2, name1.getHybridChildRelations().size());
786
        //could also be N. or no genus at all, depends on formatter
787
        assertEquals("Title cache must be correct", "Orchis morio subsp. syriaca \u00D7 Orchis morio var. schirvanica", name1.getTitleCache());
788
        orderedRels = name1.getOrderedChildRelationships();
789
        assertEquals("Name must have 2 hybrid parents in ordered list", 2, orderedRels.size());
790
        firstParent = orderedRels.get(0).getParentName();
791
        assertEquals("Name must have Orchis morio subsp. syriaca as first hybrid parent", "Orchis morio subsp. syriaca", firstParent.getTitleCache());
792
        secondParent = orderedRels.get(1).getParentName();
793
        assertEquals("Name must have Orchis morio subsp. schirvanica as second hybrid parent", "Orchis morio var. schirvanica", secondParent.getTitleCache());
794
        assertEquals("Hybrid name must have the lower rank ('variety') as rank", Rank.VARIETY(), name1.getRank());
795

  
796

  
689 797
    }
690 798

  
799
//    @Test
800
//    public final void testTemp(){
801
////        String nalata = "N. alata";
802
////        if (! nalata.matches(NonViralNameParserImplRegExBase.abbrevHybridSecondPart)){
803
////            throw new RuntimeException();
804
////        }
805
//
806
//        //abbreviated hybrid formula #6410
807
//        String nameStr = "Orchis morio subsp. syriaca \u00D7 papilionacea subsp. schirvanica";
808
//        INonViralName name1 = parser.parseFullName(nameStr, botanicCode, null);
809
//        assertTrue("Name must have hybrid formula bit set", name1.isHybridFormula());
810
//        assertEquals("Name must have 2 hybrid parents", 2, name1.getHybridChildRelations().size());
811
//        //could also be N. or no genus at all, depends on formatter
812
//        assertEquals("Title cache must be correct", "Orchis morio subsp. syriaca \u00D7 Orchis papilionacea subsp. schirvanica", name1.getTitleCache());
813
//    }
814

  
815

  
691 816
    @Test
692 817
    public final void testHybridsRemoval(){
693 818
        //if the parser input already has hybridrelationships they need to be removed

Also available in: Unified diff