Project

General

Profile

Download (18 KB) Statistics
| Branch: | Tag: | Revision:
1
package eu.etaxonomy.cdm.io.taxonx2013;
2

    
3
import java.net.URI;
4
import java.net.URISyntaxException;
5
import java.util.ArrayList;
6
import java.util.HashMap;
7
import java.util.List;
8
import java.util.Map;
9
import java.util.UUID;
10

    
11
import org.apache.commons.lang.StringUtils;
12
import org.apache.log4j.Logger;
13
import org.w3c.dom.NamedNodeMap;
14
import org.w3c.dom.Node;
15
import org.w3c.dom.NodeList;
16

    
17
import eu.etaxonomy.cdm.model.agent.Person;
18
import eu.etaxonomy.cdm.model.agent.Team;
19
import eu.etaxonomy.cdm.model.common.TimePeriod;
20
import eu.etaxonomy.cdm.model.reference.IBook;
21
import eu.etaxonomy.cdm.model.reference.Reference;
22
import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
23
import eu.etaxonomy.cdm.strategy.parser.TimePeriodParser;
24

    
25
public class TaxonXModsExtractor extends TaxonXExtractor{
26

    
27
    private final Map<String,UUID> personMap = new HashMap<String, UUID>();
28

    
29
    Logger logger = Logger.getLogger(getClass());
30

    
31
    /**
32
     * @param agentService
33
     */
34
    public TaxonXModsExtractor(TaxonXImport importer) {
35
        this.importer = importer;
36
    }
37

    
38
    public Reference<?> extractMods(Node node){
39
        //        System.out.println("extractMods");
40
        Map<String, String> modsMap = new HashMap<String, String>();
41
        NodeList children = node.getChildNodes();
42
        List<String> roleList = new ArrayList<String>();
43
        String content="";
44

    
45
        //        int reftype = askQuestion("What kind of reference is it?\n 1: Generic\n 2: Book\n 3: Article\n" +
46
        //                " 4 : BookSection\n 5 : Journal\n 6 : Printseries\n 7: Thesis ");
47
        int reftype=4;
48
        Reference<?> ref= getReferenceType(reftype);
49
        for (int i=0; i<children.getLength();i++){
50

    
51
            if (children.item(i).getNodeName().equalsIgnoreCase("mods:titleinfo")){
52
                NodeList tmp = children.item(i).getChildNodes();
53
                for (int j=0;j<tmp.getLength();j++){
54
                    if (tmp.item(j).getNodeName().equalsIgnoreCase("mods:title")) {
55
                        content=tmp.item(j).getTextContent().trim();
56
                        if (!content.isEmpty()) {
57
                            modsMap.put("mainTitle",content);
58
                            //                            ref.setTitleCache(content,true);
59
                            ref.setTitle(content);
60
                            //                            ref.generateTitle();
61
                        }
62
                    }
63
                }
64
            }
65

    
66
            if (children.item(i).getNodeName().equalsIgnoreCase("mods:name")){
67
                Map<String,String> mapmap = getModsNames(children.item(i), ref);
68
                if (!mapmap.isEmpty()) {
69
                    roleList.add(mapmap.toString());
70
                }
71
            }
72

    
73
            if (children.item(i).getNodeName().equalsIgnoreCase("mods:typeofresource")){
74
                content = children.item(i).getTextContent().trim();
75
                if (!content.isEmpty()) {
76
                    modsMap.put("typeofresource",content);
77
                }
78
            }
79
            if (children.item(i).getNodeName().equalsIgnoreCase("mods:identifier")){
80
                content = children.item(i).getTextContent().trim();
81
                if (!content.isEmpty()) {
82
                    modsMap.put(children.item(i).getAttributes().getNamedItem("type").getNodeValue(),content);
83
                    if (children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("isbn")) {
84
                        ref.setIsbn(content);
85
                    }
86
                    if (children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("issn")) {
87
                        ref.setIssn(content);
88
                    }
89
                    if (children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("GenericHash")) {
90
                        ref.setIssn("GenericHash: "+content);
91
                        try {
92
                            ref.setUri(new URI("http://plazi.cs.umb.edu/GgServer/search?MODS.ModsDocID="+content));
93
                        } catch (URISyntaxException e) {
94
                            // TODO Auto-generated catch block
95
                            e.printStackTrace();
96
                        }
97
                    }
98
                }
99
            }
100
            if (children.item(i).getNodeName().equalsIgnoreCase("mods:location")){
101
                NodeList tmp = children.item(i).getChildNodes();
102
                for (int j=0;j<tmp.getLength();j++){
103
                    //                    System.out.println("Child of mods:location: "+tmp.item(j).getNodeName());
104
                    if (tmp.item(j).getNodeName().equalsIgnoreCase("mods:url")) {
105
                        content = tmp.item(j).getTextContent().trim();
106
                        if (!content.isEmpty() && (content != "http://un.availab.le")) {
107
                            modsMap.put("url",content);
108
                            ref.setUri(URI.create(content));
109
                        }
110
                    }
111
                }
112
            }
113

    
114

    
115
            if (children.item(i).getNodeName().equalsIgnoreCase("mods:relatedItem")){
116
                addRelatedMods(children.item(i), modsMap, ref);
117
            }
118

    
119

    
120
        }
121
        modsMap.put("people",StringUtils.join(roleList.toArray(),SPLITTER));
122

    
123
        List<Reference> references = importer.getReferenceService().list(Reference.class, 0, 0, null, null);
124
        for(Reference<?> refe:references){
125
            if (refe.getCitation().equalsIgnoreCase(ref.getCitation())) {
126
                ref=refe;
127
            }
128
        }
129
        //        System.out.println(modsMap);
130
        //
131
        //        System.out.println("REFERENCE "+ref.getCitation());
132
        //        System.out.println("REFERENCE "+ref.getTitle());
133
        //        System.out.println("REFERENCE "+ref.getTitleCache());
134
        return ref;
135
    }
136

    
137
    private final String AUTHOR = "author";
138
    private final String EDITOR = "editor";
139
    /**
140
     * @param item
141
     * @return
142
     */
143
    private Map<String, String> getModsNames(Node node, Reference<?> ref) {
144
        NamedNodeMap attributeMap = node.getAttributes();
145
        Map<String,String> mapmap = new HashMap<String, String>();
146
        List<String> roleList = new ArrayList<String>();
147
        boolean newRole=false;
148
        String content="";
149
        String role =null;
150
        List<Person> persons = new ArrayList<Person>();
151
        List<String> editors= new ArrayList<String>();
152

    
153
        if ((attributeMap.getNamedItem("type") != null) && attributeMap.getNamedItem("type").getNodeValue().equalsIgnoreCase("personal")) {
154

    
155
            NodeList tmp = node.getChildNodes();
156
            for (int j=0;j<tmp.getLength();j++){
157

    
158
                //                System.out.println("Child of modsnametype: "+tmp.item(j).getNodeName());
159
                Person p=null;
160
                if (tmp.item(j).getNodeName().equalsIgnoreCase("mods:namePart")) {
161
                    content=tmp.item(j).getTextContent().trim();
162
                    if (!content.isEmpty()) {
163
                        mapmap.put("namePart",content);
164
                        p = Person.NewInstance();
165
                        p.setTitleCache(content, true);
166
                    }
167
                }
168
                if (tmp.item(j).getNodeName().equalsIgnoreCase("mods:role")) {
169
                    NodeList tmp2 = tmp.item(j).getChildNodes();
170
                    for (int k=0; k< tmp2.getLength();k++){
171
                        if (tmp2.item(k).getNodeName().equalsIgnoreCase("mods:roleTerm")){
172
                            content = tmp2.item(k).getTextContent().trim();
173
                            //                            System.out.println("ROLETERM!" +content);
174
                            if (!content.isEmpty()) {
175
                                roleList.add(content);
176
                                //                                p.setNomenclaturalTitle(content);
177
                                if (content.equalsIgnoreCase(EDITOR)) {
178
                                    role=EDITOR;
179
                                }
180
                                if (content.equalsIgnoreCase(AUTHOR)) {
181
                                    role=AUTHOR;
182
                                }
183
                                newRole=true;
184
                            }
185
                        }
186
                    }
187
                }
188
                if (newRole){
189
                    if ((p!=null) && role.equals(AUTHOR)) {
190
                        UUID uuid = null;
191
                        if (!personMap.containsKey(p.getTitleCache())){
192
                            uuid = importer.getAgentService().saveOrUpdate(p);
193
                            p = (Person) importer.getAgentService().find(uuid);
194
                            personMap.put(p.getTitleCache(),uuid);
195
                        }else{
196
                            uuid = personMap.get(p.getTitleCache());
197
                            p = (Person) importer.getAgentService().find(uuid);
198
                        }
199
                        //                        logger.info("ADD PERSON "+p);
200
                        persons.add(p);
201
                    }
202
                    if ((p!=null) && role.equals(EDITOR)) {
203
                        editors.add(p.getTitleCache());
204
                    }
205
                }
206
            }
207
        }
208
        if (persons.size()>0){
209
            if (persons.size()==1){
210
                ref.setAuthorTeam(persons.get(0));
211
            }
212
            else{
213
                Team authorTeam = Team.NewInstance();
214
                for (Person pers:persons){
215
                    authorTeam.addTeamMember(pers);
216
                }
217

    
218
                if (!personMap.containsKey(authorTeam.getTitleCache()) && (authorTeam.getTeamMembers().size()>0)){
219
                    UUID uuid = importer.getAgentService().saveOrUpdate(authorTeam);
220
                    personMap.put(authorTeam.getTitleCache(),uuid);
221
                }else{
222
                    if(authorTeam.getTeamMembers().size()>1) {
223
                        authorTeam =  (Team) importer.getAgentService().find(personMap.get(authorTeam.getTitleCache()));
224
                    }
225
                }
226

    
227
                ref.setAuthorTeam(authorTeam);
228
            }
229
        }
230
        if (editors.size()>0) {
231
            ref.setEditor(StringUtils.join(editors,", "));
232
        }
233
        mapmap.put("role",StringUtils.join(roleList.toArray(),SPLITTER));
234
        return mapmap;
235
    }
236

    
237

    
238
    /**
239
     * @param item
240
     * @param modsMap
241
     */
242
    private void addRelatedMods(Node node, Map<String, String> modsMap, Reference<?> ref) {
243
        NodeList tmp =node.getChildNodes();
244
        NodeList partNodes = null;
245
        NodeList children = null;
246

    
247
        List<String> originInfo = null;
248
        List<String> partList = null;
249

    
250
        TimePeriod date;
251

    
252
        String publisher="";
253
        String publishplace="";
254
        String pstart="";
255
        String pend="";
256

    
257
        Map<String,String> mapmap=null;
258

    
259
        Map<String, String> relatedInfoMap = new HashMap<String, String>();
260
        List<String> roleList = new ArrayList<String>();
261
        String content="";
262

    
263
        relatedInfoMap.put("type",node.getAttributes().getNamedItem("type").getNodeValue());
264

    
265
        for (int j=0;j<tmp.getLength();j++){
266
            if (tmp.item(j).getNodeName().equalsIgnoreCase("mods:titleInfo")) {
267
                content=tmp.item(j).getTextContent().trim();
268
                if (!content.isEmpty()) {
269
                    relatedInfoMap.put("titleInfo",content);
270
                    if (node.getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("host")){
271
                        List<Reference> references = importer.getReferenceService().list(Reference.class, 0, 0, null, null);
272
                        boolean refFound = false;
273
                        IBook book = null;
274
                        for (Reference<?> refe:references){
275
                            if(refe.getTitleCache().equalsIgnoreCase(content)){
276
                                refFound=true;
277
                                book= refe;
278
                            }
279
                        }
280
                        if (!refFound){
281
                            book = ReferenceFactory.newBook();
282
                            //                            book.setTitleCache(content,true);
283
                            book.setTitle(content);
284
                            //                            book.generateTitle();
285
                        }
286
                        if ((ref.getInBook() == null) || !ref.getInBook().equals(book)) {
287
                            ref.setInBook(book);
288
                            //                            ref.generateTitle();
289
                        }
290
                    }
291
                }
292
            }
293

    
294
            if (tmp.item(j).getNodeName().equalsIgnoreCase("mods:originInfo")) {
295
                children = tmp.item(j).getChildNodes();
296
                originInfo = new ArrayList<String>();
297
                for (int i=0;i<children.getLength();i++){
298
                    content=children.item(i).getTextContent().trim();
299
                    if (!content.isEmpty()) {
300
                        originInfo.add(children.item(i).getNodeName()+":"+content);
301
                        if (children.item(i).getNodeName().contains("dateIssued")) {
302
                            ref.setDatePublished(TimePeriodParser.parseString(content));
303
                        }
304
                    }
305
                    publisher="";
306
                    publishplace="";
307
                    if (children.item(i).getNodeName().contains("publisher")) {
308
                        try{
309
                            publisher=children.item(i).getChildNodes().item(0).getTextContent().trim();
310
                            //                            System.out.println("PUBLISHER "+publisher);
311
                        }catch(Exception e){System.out.println("oups "+e);}
312
                    }
313
                    if (children.item(i).getNodeName().contains("place")) {
314
                        try{
315
                            publishplace=children.item(i).getTextContent().trim();
316
                            //                            System.out.println("PUBLISHED "+publishplace);
317
                        }catch(Exception e){System.out.println("oups "+e);}
318
                    }
319
                    if (publishplace.isEmpty() && !publisher.isEmpty()) {
320
                        ref.setPublisher(publisher);
321
                    }
322
                    if (!publishplace.isEmpty() && !publisher.isEmpty()) {
323
                        ref.setPublisher(publisher, publishplace);
324
                    }
325
                }
326
                relatedInfoMap.put("originInfo", StringUtils.join(originInfo.toArray(),SPLITTER));
327
            }
328

    
329

    
330
            if (tmp.item(j).getNodeName().equalsIgnoreCase("mods:name")){
331
                mapmap = getModsNames(tmp.item(j),ref);
332
                if (!mapmap.isEmpty()) {
333
                    roleList.add(mapmap.toString());
334
                }
335
            }
336
            if (tmp.item(j).getNodeName().equalsIgnoreCase("mods:part")){
337
                children = tmp.item(j).getChildNodes();
338
                partList = new ArrayList<String>();
339
                for (int i=0;i<children.getLength();i++){
340
                    mapmap = new HashMap<String, String>();
341
                    //                    System.out.println(children.item(i).getNodeName());
342

    
343
                    if (children.item(i).getNodeName().equalsIgnoreCase("mods:date")){
344
                        content = children.item(i).getTextContent().trim();
345
                        if (!content.isEmpty()){
346
                            date = TimePeriodParser.parseString(content);
347
                            ref.setDatePublished(date);
348
                        }
349
                    }
350
                    if (children.item(i).getNodeName().equalsIgnoreCase("mods:detail") &&
351
                            children.item(i).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("volume")){
352
                        partNodes = children.item(i).getChildNodes();
353
                        for (int k=0; k<partNodes.getLength();k++){
354
                            if (partNodes.item(k).getNodeName().equalsIgnoreCase("mods:number")) {
355
                                content = partNodes.item(k).getTextContent().trim();
356
                                if (!content.isEmpty()) {
357
                                    ref.setVolume(content);
358
                                }
359
                            }
360
                        }
361
                    }
362
                    if (children.item(i).getNodeName().equalsIgnoreCase("mods:extent")) {
363
                        mapmap.put("unit", children.item(i).getAttributes().getNamedItem("unit").getNodeValue());
364
                        partNodes = children.item(i).getChildNodes();
365
                        pstart="";
366
                        pend="";
367
                        for (int k=0; k<partNodes.getLength();k++){
368
                            if (partNodes.item(k).getNodeName().equalsIgnoreCase("mods:start")) {
369
                                content = partNodes.item(k).getTextContent().trim();
370
                                if (!content.isEmpty()) {
371
                                    mapmap.put("start",content);
372
                                    pstart=content;
373
                                }
374
                            }
375
                            if (partNodes.item(k).getNodeName().equalsIgnoreCase("mods:end")) {
376
                                content = partNodes.item(k).getTextContent().trim();
377
                                if (!content.isEmpty()) {
378
                                    mapmap.put("end",content);
379
                                    pend=content;
380
                                }
381
                            }
382
                        }
383
                        //                        System.out.println("SET PAGES "+pstart+"-"+pend);
384
                        ref.setPages(pstart+"-"+pend);
385
                    }
386
                    partList.add(mapmap.toString());
387
                }
388
                modsMap.put("part",StringUtils.join(partList.toArray(),SPLITTER));
389
            }
390
        }
391
        relatedInfoMap.put("relatedRoles", StringUtils.join(roleList.toArray(),SPLITTER));
392
        modsMap.put("relatedInfo",relatedInfoMap.toString());
393
    }
394

    
395
}
(7-7/9)