Get HTML source using HttpClient
HttpClient version : 3.1
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.methods.GetMethod;
public class Test {
private String getHtmlSource(String url, String encoding) throws IOException {
HttpClient client = new HttpClient();
HttpMethod method = new GetMethod();
method.setRequestHeader("user-agent", "Mozilla/5.0 (Windows; U; Windows NT 5.2; zh-TW; rv:1.9.0.1) Gecko/2008070208 Firefox/3.0.1");
method.setRequestHeader("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
method.setRequestHeader("accept-language", "en-us;q=0.7,en;q=0.3");
method.setRequestHeader("accept-encoding", "gzip,deflate");
method.setRequestHeader("accept-charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7");
method.setRequestHeader("keep-alive", "300");
method.setRequestHeader("connection", "keep-alive");
InputStream is = null;
GZIPInputStream gzip = null;
InputStreamReader isReader = null;
BufferedReader bReader = null;
client.executeMethod(method);
is = method.getResponseBodyAsStream();
if (method.getResponseHeader("Content-Encoding") != null && method.getResponseHeader("Content-Encoding").getValue().equalsIgnoreCase("gzip")) { // 若有壓縮
gzip = new GZIPInputStream(is);
isReader = new InputStreamReader(gzip, encoding);
} else {
isReader = new InputStreamReader(is, encoding);
}
bReader = new BufferedReader(isReader);
StringBuilder sb = new StringBuilder();
String line = null;
while ((line = bReader.readLine()) != null) {
sb.append(String.format("%s ", line));
}
if (bReader != null)
bReader.close();
if (isReader != null)
isReader.close();
if (gzip != null)
gzip.close();
if (is != null)
is.close();
method.releaseConnection();
return sb.toString();
}
}