package com.sdyc.jise.fetch.crawler.requester;
import cn.edu.hfut.dmic.webcollector.model.CrawlDatum;
import cn.edu.hfut.dmic.webcollector.net.HttpRequest;
import cn.edu.hfut.dmic.webcollector.net.HttpResponse;
import cn.edu.hfut.dmic.webcollector.net.Requester;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
/**
*
*
* Created by zhenqin.
* User: zhenqin
* Date: 17/4/25
* Time: 16:26
* Vendor: NowledgeData
* To change this template use File | Settings | File Templates.
*
*
*
* @author zhenqin
*/
public class JavaNativeRequester implements Requester {
/**
* Http Cookie
*/
protected String cookie;
/**
* UserAgent
*/
protected String userAgent;
/**
* 访问超时时间
*/
protected int connectTimeout = -1;
/**
* Http Header
*/
protected final Map header = new HashMap(5);
/**
* 日志系统
*/
protected static Logger LOG = LoggerFactory.getLogger(JavaNativeRequester.class);
public JavaNativeRequester() {
}
@Override
public HttpResponse getResponse(CrawlDatum crawlDatum) throws Exception {
HttpRequest request = new HttpRequest(crawlDatum);
if(StringUtils.isNotBlank(cookie)) {
request.setCookie(cookie);
}
if(StringUtils.isNotBlank(userAgent)) {
request.setUserAgent(userAgent);
}
if(connectTimeout > 0) {
request.setTimeoutForConnect(connectTimeout);
}
if(!header.isEmpty()) {
for (Map.Entry entry : header.entrySet()) {
request.addHeader(entry.getKey(), entry.getValue());
}
}
LOG.info("fetch url: {}", crawlDatum.url());
HttpResponse response = null;
int retry = 0;
do {
try {
response = request.response();
break;
} catch (Exception e) {
retry++;
LOG.info("不知道是否IP发生切换,发送抓取异常, 稍等 " + (retry * 2) + "s ,进行重试。");
Thread.sleep(retry * 2 * 1000);
LOG.info("等待后,重试开始, 当前重试第 " + retry + " 次。");
}
} while(retry < 5);
if(retry >= 5){
response = new HttpResponse(new URL(crawlDatum.url()));
response.setNotFound(true);
response.setRedirect(false);
response.code(404);
response.setHtml("");
}
return response;
}
public String getCookie() {
return cookie;
}
public void setCookie(String cookie) {
this.cookie = cookie;
}
public String getUserAgent() {
return userAgent;
}
public void setUserAgent(String userAgent) {
this.userAgent = userAgent;
}
public int getConnectTimeout() {
return connectTimeout;
}
public void setConnectTimeout(int connectTimeout) {
this.connectTimeout = connectTimeout;
}
public Map getHeader() {
return header;
}
public String addHeader(String key, String value) {
return header.put(key, value);
}
}