为了综合当当和豆瓣的图书详情数据,这里在图书信息从当当抓取成功后,根据isbn信息再从豆瓣获取一次图书详情数据。
相关豆瓣图书的接口地址为:
https://developers.douban.com/wiki/?title=book_v2#get_isbn_book
然后将豆瓣与当当的图书详情进行比对,豆瓣如果存在对应的字段则以豆瓣的为准:
1 private boolean compareBookInfo(BookInfo srcInfo, BookInfo dstInfo){ 2 boolean update = false; 3 4 //title 5 if(!StringUtils.isEmpty(srcInfo.getTitle()) 6 && !srcInfo.getTitle().equalsIgnoreCase(dstInfo.getTitle())){ 7 dstInfo.setTitle(srcInfo.getTitle()); 8 update = true; 9 } 10 11 //subtitle 12 if(!StringUtils.isEmpty(srcInfo.getSubtitle()) 13 && !srcInfo.getSubtitle().equalsIgnoreCase(dstInfo.getSubtitle())){ 14 dstInfo.setSubtitle(srcInfo.getSubtitle()); 15 update = true; 16 } 17 18 //image 19 if(!StringUtils.isEmpty(srcInfo.getImage()) 20 && !srcInfo.getImage().equalsIgnoreCase(dstInfo.getImage()) 21 && ( StringUtils.isEmpty(dstInfo.getImage()) || dstInfo.getImage().contains("book-default")) 22 ){ 23 dstInfo.setImage(srcInfo.getImage()); 24 update = true; 25 } 26 27 //author 28 if(!StringUtils.isEmpty(srcInfo.getAuthor()) 29 && !srcInfo.getAuthor().equalsIgnoreCase(dstInfo.getAuthor())){ 30 dstInfo.setAuthor(srcInfo.getAuthor()); 31 update = true; 32 } 33 34 //author_info 35 if(!StringUtils.isEmpty(srcInfo.getAuthor_intro()) 36 && !srcInfo.getAuthor_intro().equalsIgnoreCase(dstInfo.getAuthor_intro())){ 37 dstInfo.setAuthor_intro(srcInfo.getAuthor_intro()); 38 update = true; 39 } 40 41 //rating 42 if(!StringUtils.isEmpty(srcInfo.getRating()) 43 && !srcInfo.getRating().equalsIgnoreCase(dstInfo.getRating())){ 44 dstInfo.setRating(srcInfo.getRating()); 45 update = true; 46 } 47 48 //price 49 if(!StringUtils.isEmpty(srcInfo.getPrice()) 50 && StringUtils.isEmpty(dstInfo.getPrice())){ 51 dstInfo.setPrice(srcInfo.getPrice()); 52 update = true; 53 } 54 55 //tags 56 if(!StringUtils.isEmpty(srcInfo.getTags()) 57 && !srcInfo.getTags().equalsIgnoreCase(dstInfo.getTags())){ 58 dstInfo.setTags(srcInfo.getTags()); 59 update = true; 60 } 61 62 //summary 63 if(!StringUtils.isEmpty(srcInfo.getSummary()) 64 && !srcInfo.getSummary().equalsIgnoreCase(dstInfo.getSummary())){ 65 dstInfo.setSummary(srcInfo.getSummary()); 66 update = true; 67 } 68 69 //catalog 70 if(!StringUtils.isEmpty(srcInfo.getCatalog()) 71 && !srcInfo.getCatalog().equalsIgnoreCase(dstInfo.getCatalog())){ 72 dstInfo.setCatalog(srcInfo.getCatalog()); 73 update = true; 74 } 75 76 return update; 77 }
可以在这个基础上继续完善对豆瓣和当当的数据合并处理。