package com.sdyc.jise.fetch.crawler.requester; import cn.edu.hfut.dmic.webcollector.model.CrawlDatum; import cn.edu.hfut.dmic.webcollector.net.HttpRequest; import cn.edu.hfut.dmic.webcollector.net.HttpResponse; import cn.edu.hfut.dmic.webcollector.net.Requester; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.net.URL; import java.util.HashMap; import java.util.Map; /** *
 *
 * Created by zhenqin.
 * User: zhenqin
 * Date: 17/4/25
 * Time: 16:26
 * Vendor: NowledgeData
 * To change this template use File | Settings | File Templates.
 *
 * 
* * @author zhenqin */ public class JavaNativeRequester implements Requester { /** * Http Cookie */ protected String cookie; /** * UserAgent */ protected String userAgent; /** * 访问超时时间 */ protected int connectTimeout = -1; /** * Http Header */ protected final Map header = new HashMap(5); /** * 日志系统 */ protected static Logger LOG = LoggerFactory.getLogger(JavaNativeRequester.class); public JavaNativeRequester() { } @Override public HttpResponse getResponse(CrawlDatum crawlDatum) throws Exception { HttpRequest request = new HttpRequest(crawlDatum); if(StringUtils.isNotBlank(cookie)) { request.setCookie(cookie); } if(StringUtils.isNotBlank(userAgent)) { request.setUserAgent(userAgent); } if(connectTimeout > 0) { request.setTimeoutForConnect(connectTimeout); } if(!header.isEmpty()) { for (Map.Entry entry : header.entrySet()) { request.addHeader(entry.getKey(), entry.getValue()); } } LOG.info("fetch url: {}", crawlDatum.url()); HttpResponse response = null; int retry = 0; do { try { response = request.response(); break; } catch (Exception e) { retry++; LOG.info("不知道是否IP发生切换,发送抓取异常, 稍等 " + (retry * 2) + "s ,进行重试。"); Thread.sleep(retry * 2 * 1000); LOG.info("等待后,重试开始, 当前重试第 " + retry + " 次。"); } } while(retry < 5); if(retry >= 5){ response = new HttpResponse(new URL(crawlDatum.url())); response.setNotFound(true); response.setRedirect(false); response.code(404); response.setHtml(""); } return response; } public String getCookie() { return cookie; } public void setCookie(String cookie) { this.cookie = cookie; } public String getUserAgent() { return userAgent; } public void setUserAgent(String userAgent) { this.userAgent = userAgent; } public int getConnectTimeout() { return connectTimeout; } public void setConnectTimeout(int connectTimeout) { this.connectTimeout = connectTimeout; } public Map getHeader() { return header; } public String addHeader(String key, String value) { return header.put(key, value); } }