Project

General

Profile

« Previous | Next » 

Revision 71614d18

Added by Katja Luther over 7 years ago

fix #5893: fauna europaea reference import with more atomized data

View differences:

cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/faunaEuropaea/FaunaEuropaeaAuthorImport.java
39 39
	private static int modCount = 1000;
40 40
	private final static String authorSeparator = ", ";
41 41
	private final static String lastAuthorSeparator = " & ";
42
	 protected static String fWs = "\\s*";
43
	 protected static String oWs = "\\s+";
44
	 protected static String finalTeamSplitter = "(" + fWs + "(&)" + fWs + "|" + oWs + "et" + oWs + ")";
45
	protected static String notFinalTeamSplitter = "((?:" + fWs + "," + fWs + ")(?!([A-Z][\\.]))"+"|" + finalTeamSplitter + ")";
46
	protected static String test = "(,\\s(?![A-Z][.|\\s|$]))|" + finalTeamSplitter ;
42 47

  
43 48

  
44 49
	/* (non-Javadoc)
......
199 204
        return author;
200 205
	}
201 206

  
207
    /**
208
     * @param refAuthor
209
     * @return
210
     */
211
    public static TeamOrPersonBase<?> parseNomAuthorString(String refAuthor) {
212
        TeamOrPersonBase<?> author = null;
213
        //possible strings: Lastname, A., Lastname B. & Lastname C.
214
        //Lastname A, Lastname B & Lastname
215
        //Lastname A Lastname B & Lastname C
216
        String[] teamMembers = refAuthor.split(test);
217

  
218
        String lastMember;
219

  
220
        Person teamMember;
221
        author = Team.NewInstance();
222
        if (teamMembers.length>1){
223
            for(String member:teamMembers){
224
                if (!member.trim().equals("")){
225
                    teamMember = Person.NewInstance();
226
                    teamMember.setTitleCache(member, true);
227
                   ((Team)author).addTeamMember(teamMember);
228
                }
229
            }
230
        }else{
231
            author = Person.NewInstance();
232
            author.setTitleCache(refAuthor, true);
233

  
234
        }
235
        author.getTitleCache();
236
        return author;
237
    }
238

  
202 239
}
cdm-pesi/src/main/java/eu/etaxonomy/cdm/io/pesi/faunaEuropaea/FaunaEuropaeaRefImport.java
21 21
import java.util.Set;
22 22
import java.util.UUID;
23 23

  
24
import org.apache.commons.lang.StringUtils;
24 25
import org.apache.log4j.Logger;
25 26
import org.springframework.stereotype.Component;
26 27
import org.springframework.transaction.TransactionStatus;
......
29 30
import eu.etaxonomy.cdm.io.common.IImportConfigurator.DO_REFERENCES;
30 31
import eu.etaxonomy.cdm.io.common.ImportHelper;
31 32
import eu.etaxonomy.cdm.io.common.Source;
32
import eu.etaxonomy.cdm.model.agent.Team;
33 33
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
34 34
import eu.etaxonomy.cdm.model.common.CdmBase;
35 35
import eu.etaxonomy.cdm.model.common.OriginalSourceBase;
......
43 43
import eu.etaxonomy.cdm.model.taxon.Synonym;
44 44
import eu.etaxonomy.cdm.model.taxon.Taxon;
45 45
import eu.etaxonomy.cdm.model.taxon.TaxonBase;
46
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
46 47

  
47 48

  
48 49
/**
......
161 162
			String selectQueryRefs) {
162 163
		TransactionStatus txStatus = null;
163 164
		int count;
165
		Map<String, Reference> inReferences = new HashMap<String, Reference>();
164 166
		try {
165 167
			ResultSet rsRefs = source.getResultSet(countQueryRefs);
166 168
			rsRefs.next();
......
199 201
					txStatus = startTransaction();
200 202
					references = new HashMap<Integer,Reference>(limit);
201 203
					authors = new HashMap<String,TeamOrPersonBase<?>>(limit);
202

  
204
					//inReferences = new HashMap<String, Reference>(limit);
203 205
					if(logger.isInfoEnabled()) {
204 206
						logger.info("i = " + i + " - Reference import transaction started");
205 207
					}
......
213 215
//				reference.setTitleCache(title);
214 216
				reference.setTitle(title);
215 217
				reference.setDatePublished(ImportHelper.getDatePublished(year));
216
				reference.setTitleCache(title + " " +refSource , true);
218
				Reference inReference;
219
				Reference tempInReference;
220
				if (!StringUtils.isBlank(refSource)) {
221
				    tempInReference = (Reference)NonViralNameParserImpl.NewInstance().parseReferenceTitle(refSource, null, false);
222
				    if (inReferences.containsKey(tempInReference.getTitleCache())){
223
				        inReference = inReferences.get(tempInReference.getTitleCache());
224

  
225
				    }else{
226
				        inReference = (Reference) tempInReference.clone();
227
				        inReference.setPages(null);
228
				        inReference.setEdition(null);
229
				        inReferences.put(inReference.getTitleCache(), inReference);
230

  
231
				    }
232
				    reference.setPages(tempInReference.getPages());
233
                    reference.setEdition(tempInReference.getEdition());
234
                    tempInReference = null;
235
                    reference.setInReference(inReference);
236
				}
217 237

  
218 238
				if (!authors.containsKey(refAuthor)) {
219 239
					if (refAuthor == null) {
220 240
						logger.warn("Reference author is null");
221 241
					}
222
					author = Team.NewInstance();
223
					author.setTitleCache(refAuthor, true);
242
					author = FaunaEuropaeaAuthorImport.parseNomAuthorString(refAuthor);
243

  
224 244
					authors.put(refAuthor,author);
225 245
					if (logger.isTraceEnabled()) {
226 246
						logger.trace("Stored author (" + refAuthor + ")");
......
279 299
			logger.error("SQLException:" +  e);
280 300
			state.setUnsuccessfull();
281 301
		}
302
		inReferences = null;
282 303

  
283 304
	}
284 305

  

Also available in: Unified diff