From 551b85002670edc5d4712e752f223126665724bc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Andreas=20M=C3=BCller?= Date: Tue, 10 Jun 2014 09:52:56 +0000 Subject: [PATCH] latest changes to taxonX import --- .../cdm/io/taxonx2013/TaxonXExtractor.java | 4 +- .../taxonx2013/TaxonXTreatmentExtractor.java | 60 +++++++++++++++---- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/taxonx2013/TaxonXExtractor.java b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/taxonx2013/TaxonXExtractor.java index 6e0ff2780d..8612f6a3a3 100644 --- a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/taxonx2013/TaxonXExtractor.java +++ b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/taxonx2013/TaxonXExtractor.java @@ -1207,7 +1207,9 @@ public class TaxonXExtractor { "|(sp\\.\\s*n\\.)" + ")"; if (status.trim().matches(pattern)){ - return status; + //FIXME + return null; +// return status; }else{ return null; } diff --git a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/taxonx2013/TaxonXTreatmentExtractor.java b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/taxonx2013/TaxonXTreatmentExtractor.java index 0c83fb6874..7275187c35 100644 --- a/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/taxonx2013/TaxonXTreatmentExtractor.java +++ b/cdmlib-io/src/main/java/eu/etaxonomy/cdm/io/taxonx2013/TaxonXTreatmentExtractor.java @@ -145,8 +145,8 @@ public class TaxonXTreatmentExtractor extends TaxonXExtractor{ * @param sourceName: the URI of the document */ @SuppressWarnings({ "rawtypes", "unused" }) - protected void extractTreatment(Node treatmentnode, Reference refMods, URI sourceName) { - logger.info("extractTreatment"); + + protected void extractTreatment(Node treatmentnode, Reference refMods, URI sourceName) { logger.info("extractTreatment"); List namesToSave = new ArrayList(); NodeList children = treatmentnode.getChildNodes(); Taxon acceptedTaxon =null; @@ -1163,7 +1163,7 @@ public class TaxonXTreatmentExtractor extends TaxonXExtractor{ * @return */ private boolean stringIsEmpty(String blaStr) { - if (blaStr.matches("(\\.|,|;)?")){ + if (blaStr.matches("(\\.|,|;|\\.-)?")){ return true; }else{ return false; @@ -1552,6 +1552,8 @@ public class TaxonXTreatmentExtractor extends TaxonXExtractor{ for(MyName name:names){ TaxonNameBase nameToBeFilled = name.getTaxonNameBase(); Synonym synonym = name.getSyno(); + addFollowingTextToName(nameToBeFilled, followingText); + /* INonViralNameParser parser = NonViralNameParserImpl.NewInstance(); nameToBeFilled = parser.parseFullName(name.getName(), nomenclaturalCode, name.getRank()); if (nameToBeFilled.hasProblem() && @@ -1585,7 +1587,22 @@ public class TaxonXTreatmentExtractor extends TaxonXExtractor{ } - /** + private boolean addFollowingTextToName(TaxonNameBase nameToBeFilled, String followingText) { + if (nameToBeFilled != null && StringUtils.isNotBlank(followingText)){ + if (! followingText.matches("\\d\\.?")){ + + if (followingText.startsWith(",")){ + followingText = followingText.substring(1).trim(); + } + nameToBeFilled.setFullTitleCache(nameToBeFilled.getFullTitleCache()+ "," +followingText , true); + } + return true; + } + return false; + + } + + /** * @param refgroup: the XML nodes * @param nametosave: the list of objects to save into the CDM * @param acceptedTaxon: the current acceptedTaxon @@ -2048,6 +2065,7 @@ public class TaxonXTreatmentExtractor extends TaxonXExtractor{ } boolean containsSynonyms=false; + boolean wasSynonym = false; usedFollowingTextPrefix = null; //reset for (int i=0; i nameToBeFilled; //System.out.println("HANDLE FIRST NAME OF THE LIST"); if(!containsSynonyms){ - //System.out.println("I : "+i); + wasSynonym = false; + + //System.out.println("I : "+i); currentMyName = new MyName(false); try { currentMyName = extractScientificName(childNode, refMods, followingText); @@ -2186,6 +2206,8 @@ public class TaxonXTreatmentExtractor extends TaxonXExtractor{ }else{ try{ extractSynonyms(childNode, acceptedTaxon, refMods, followingText); + wasSynonym = true; + }catch(NullPointerException e){ logger.warn("null pointer exception, the accepted taxon might be null"); } @@ -2195,9 +2217,30 @@ public class TaxonXTreatmentExtractor extends TaxonXExtractor{ reloadClassification(); //extract the References within the document extractReferences(childNode,nametosave,acceptedTaxon,refMods); + }else if (childName.equalsIgnoreCase("tax:bibref")){ + logger.warn(childName + " still preliminary"); + + NonViralName currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase(); + boolean handled = addFollowingTextToName (currentName, childNode.getTextContent() ); + if (! handled){ + setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject()); + } + }else{ + logger.warn(childName + " not yet handled"); } if(!stringIsEmpty(freetext.trim())) {; - setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject()); + if (! freetext.matches("\\d\\.?")){ + NonViralName currentName = currentMyName == null ? null : currentMyName.getTaxonNameBase(); + boolean handled = false; + if (currentName != null && !wasSynonym){ + handled = addFollowingTextToName (currentName, childNode.getTextContent() ); + } + if (! handled){ + setParticularDescription(freetext.trim(), acceptedTaxon,acceptedTaxon, refMods, getNotMarkedUpFeatureObject()); + } + } + + freetext = ""; } } @@ -3933,15 +3976,12 @@ public class TaxonXTreatmentExtractor extends TaxonXExtractor{ boolean foundIdentic=false; Taxon tmp=null; - // Taxon tmpPartial=null; for (TaxonBase tmpb:tmpListFiltered){ if(tmpb !=null){ TaxonNameBase tnb = tmpb.getName(); Rank crank=null; if (tnb != null){ - // //System.out.println(tnb.getTitleCache()); - // if (tnb.getTitleCache().split("sec.")[0].equals(partialname) ||tnb.getTitleCache().split("sec.")[0].equals(fullname) ){ - if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){ + if(globalrank.equals(rank) || (globalrank.isLower(Rank.SPECIES()) && rank.equals(Rank.SPECIES()))){ if (tnb.getTitleCache().split("sec.")[0].trim().equalsIgnoreCase(fullname) ){ crank =tnb.getRank(); if (crank !=null && rank !=null){ -- 2.34.1