1. HttpClient的使用
1.1 依赖
<dependency>
<groupId>org.apache.httpcomponents
</groupId>
<artifactId>httpclient
</artifactId>
<version>4.5.6
</version>
</dependency>
1.2 JavaApi
package com
.sun
.httpclient
;
import com
.sun
.javafx
.fxml
.builder
.URLBuilder
;
import org
.apache
.http
.NameValuePair
;
import org
.apache
.http
.client
.HttpClient
;
import org
.apache
.http
.client
.config
.RequestConfig
;
import org
.apache
.http
.client
.entity
.UrlEncodedFormEntity
;
import org
.apache
.http
.client
.methods
.CloseableHttpResponse
;
import org
.apache
.http
.client
.methods
.HttpGet
;
import org
.apache
.http
.client
.methods
.HttpPost
;
import org
.apache
.http
.client
.utils
.URIBuilder
;
import org
.apache
.http
.impl
.client
.CloseableHttpClient
;
import org
.apache
.http
.impl
.client
.HttpClientBuilder
;
import org
.apache
.http
.impl
.client
.HttpClients
;
import org
.apache
.http
.impl
.conn
.PoolingHttpClientConnectionManager
;
import org
.apache
.http
.message
.BasicNameValuePair
;
import org
.apache
.http
.util
.EntityUtils
;
import java
.io
.IOException
;
import java
.io
.UnsupportedEncodingException
;
import java
.net
.URI
;
import java
.util
.ArrayList
;
import java
.util
.List
;
public class HttpTest {
public static void main(String
[] args
) throws Exception
{
httpPostDataTest();
}
public static void httpGetTest() {
CloseableHttpClient httpClient
= HttpClients
.createDefault();
HttpGet httpGet
= new HttpGet("http://www.baidu.com");
try (CloseableHttpResponse response
= httpClient
.execute(httpGet
)) {
if (response
.getStatusLine().getStatusCode() == 200) {
String result
= EntityUtils
.toString(response
.getEntity(), "UTF-8");
System
.out
.println(result
);
}
}catch (IOException e
){}
}
public static void httpGetParamTest() throws Exception
{
CloseableHttpClient httpClient
= HttpClients
.createDefault();
HttpGet httpGet
= new HttpGet(new URIBuilder("http://yun.itheima.com/search")
.setParameter("keys", "Java")
.build());
try(CloseableHttpResponse response
= httpClient
.execute(httpGet
)){
if(response
.getStatusLine().getStatusCode() == 200){
String result
= EntityUtils
.toString(response
.getEntity(), "UTF-8");
System
.out
.println(result
);
}
}
}
public static void httpPostDataTest() throws IOException
{
CloseableHttpClient httpClient
= HttpClients
.createDefault();
HttpPost httpPost
= new HttpPost("http://yun.itheima.com/search");
List
<NameValuePair> data
= new ArrayList<>();
data
.add(new BasicNameValuePair("keys","Java"));
UrlEncodedFormEntity urlEncodedFormEntity
= new UrlEncodedFormEntity(data
,"UTF-8");
httpPost
.setEntity(urlEncodedFormEntity
);
CloseableHttpResponse response
= httpClient
.execute(httpPost
);
if(response
.getStatusLine().getStatusCode() == 200){
String result
= EntityUtils
.toString(response
.getEntity(), "UTF-8");
System
.out
.println(result
);
}
}
public static void useHttpClientPool(){
PoolingHttpClientConnectionManager httpPool
= new PoolingHttpClientConnectionManager();
CloseableHttpClient httpClient
= HttpClients
.custom().setConnectionManager(httpPool
).build();
}
public static void httpConfigTest(){
RequestConfig config
= RequestConfig
.custom()
.setConnectionRequestTimeout(4000)
.setConnectTimeout(4000)
.build();
HttpGet httpGet
= new HttpGet("http://www.baidu.com");
httpGet
.setConfig(config
);
}
}
2.Jsoup
Java HTML解析器
2.1 依赖
<dependency>
<groupId>org.jsoup
</groupId>
<artifactId>jsoup
</artifactId>
<version>1.11.3
</version>
</dependency>
public static void testJsoup() throws IOException
{
Document document
= Jsoup
.parse(new URL("http://www.itcast.cn/"), 4000);
String result
= document
.getElementsByTag("title").first().text();
Elements elements
= document
.select("#id");
}
3. HttpClient 整合 Jsoup
代码我只写了大概思路 , 大家可以自行细化 , 分而治之
package com
.sun
.httpclient
;
import org
.apache
.http
.client
.methods
.CloseableHttpResponse
;
import org
.apache
.http
.client
.methods
.HttpGet
;
import org
.apache
.http
.impl
.client
.CloseableHttpClient
;
import org
.apache
.http
.impl
.client
.HttpClients
;
import org
.apache
.http
.impl
.conn
.PoolingHttpClientConnectionManager
;
import org
.apache
.http
.util
.EntityUtils
;
import org
.jsoup
.Jsoup
;
import org
.jsoup
.nodes
.Document
;
import java
.io
.IOException
;
@SuppressWarnings("all")
public class JDSpider {
public static void main(String
[] args
) throws IOException
{
String url
= "https://item.jd.com/100005853638.html";
PoolingHttpClientConnectionManager httpPool
= new PoolingHttpClientConnectionManager();
CloseableHttpClient httpClient
= HttpClients
.custom().setConnectionManager(httpPool
).build();
HttpGet httpGet
= new HttpGet(url
);
CloseableHttpResponse response
= httpClient
.execute(httpGet
);
if(response
.getStatusLine().getStatusCode() == 200){
String html
= EntityUtils
.toString(response
.getEntity(), "UTF-8");
Document document
= Jsoup
.parse(html
);
String title
= document
.getElementsByClass("sku-name").text();
String price
= document
.getElementsByClass("p-price").next().first().text();
String img
= document
.select(".img-hover img").attr("src");
}
}
}
4.辅助工具
ip代理 https://proxy.mimvp.com/freesecret.php去重算法 https://github.com/yanyiwu/simhash
5.学习心得
以上的知识是我通过晚上一到两个小时内学到的 , 我敢说自己完全掌握以上记得笔记 . 不只是复制粘贴 对于api , 方法 我希望大家不要去死记硬背了 , 之前我刚开始学Java的时候就是背代码 , 因为当时我的脑海中还没有一个代码执行的流程 , 不懂这些套路, 所以需要背代码 , 这样才是正确的 , 不然总是不能写功能 现在 , 当我学的越多后 , 心中有了这样的一个总体的流程后 , 我只需要记得这个类 记得这个功能 , 具体细节我不需要记那么清楚 我做了笔记 , 一是加深了映象 二是忘记时快速找到觉解决问题
还有没事了多翻源码 , 这样你就知道这个类还有哪些其他的方法 , 多看源码 , 多看注释 , 被虐几遍后 你就会发现如此简单 我分享下我是如何看源码的 今天我看了RestTemplate类的源码
首先我去看这个类(RestTemplate)上的注释 , 了解这个类的大体作用其次我会看这个类的父类的源码 并做笔记,看他有哪些方法 并做笔记我遵从首先广度优先 , 先在脑海大体构建这个类的结构 , 然后再深度优先 , 学习细节如何实现 , 有什么作用
看源码的过程我觉得并不难 , 只要多被打击几次 , 再来的时候就会游刃有余, 独领风骚 特别强调的是 多看注释
还有一点的是 , 大家学习的时候一定要挑重点看 , 对于老师开始废话的时候就快进 , 要高效的学习 , 不要浪费时间 , 直击重点, 弄清问题的本质, 因为你可能看的可是面向0基础等等的 , 那么在你会的地方的时候就跳过 , 对于高校学习这点 , 我做的总是不好 , 要么就是碰到的教程无法适应我的高度 , 要么不符合我现在的时间要求 , 需要太多时间成本学习 , 所以大家如果遇到好的学习教程的话, 可以给我分享一下吗 ~