From da2fd99814d8b3f2b91bf6cef91ba9d62d218bab Mon Sep 17 00:00:00 2001 From: witt Date: Fri, 13 Mar 2020 16:14:37 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E5=AE=8C=E5=96=84=E6=8E=88=E6=9D=83?= =?UTF-8?q?=E9=AA=8C=E8=AF=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/action/MoveBlogAction.java | 53 ++++++++++---------------- src/action/Oauth2Action.java | 39 +++++-------------- src/action/SpiderAction.java | 60 +++++++++++++----------------- src/action/SysCatalogAction.java | 51 ++++++++++++------------- src/action/UserAction.java | 3 +- src/common/AppConfig.xml | 42 ++++++++++----------- src/common/AuthenticationTool.java | 58 +++++++++++++++++++++++++++++ src/oschina/BlogApi.java | 3 +- 8 files changed, 161 insertions(+), 148 deletions(-) create mode 100644 src/common/AuthenticationTool.java diff --git a/src/action/MoveBlogAction.java b/src/action/MoveBlogAction.java index edf1d11..81fc38a 100644 --- a/src/action/MoveBlogAction.java +++ b/src/action/MoveBlogAction.java @@ -1,25 +1,24 @@ package action; -import java.io.IOException; - -import javax.servlet.ServletException; -import javax.servlet.annotation.WebServlet; -import javax.servlet.http.Cookie; -import javax.servlet.http.HttpServlet; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.apache.commons.lang3.StringUtils; - +import beans.Blog; +import common.AuthenticationTool; import common.JsonMsg; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; import oschina.BlogApi; import spider.BlogList; -import spider.BlogPipeline; import spider.BlogPageProcessor; +import spider.BlogPipeline; import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.processor.PageProcessor; -import beans.Blog; + +import javax.servlet.ServletException; +import javax.servlet.annotation.WebServlet; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.IOException; /** * 博客搬家action @@ -37,26 +36,15 @@ public class MoveBlogAction extends HttpServlet { @Override protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - String user=""; - Cookie[] cookie = request.getCookies(); - - if(cookie == null){ - JsonMsg.json_out(JsonMsg.jsonError("请先授权!"),response); - return; - } - - for (int i = 0; i < cookie.length; i++) { - Cookie cook = cookie[i]; - if(cook.getName().equalsIgnoreCase("user")){ //获取键 - user = cook.getValue().toString(); - break; - } - } - - if(StringUtils.isBlank(user)){//授权码获取失败 - JsonMsg.json_out(JsonMsg.jsonError("请先授权!"),response); + // 查询授权信息 + Pair userToken = AuthenticationTool.ME.getTokenFromCookie(request.getCookies()); + if (userToken == null) { + JsonMsg.json_out(JsonMsg.jsonError("请先授权!"), response); return; } + + String user = userToken.getLeft(); + String token = userToken.getRight(); String link = request.getParameter("link"); String user_catalog = request.getParameter("user_catalog"); @@ -130,8 +118,7 @@ public class MoveBlogAction extends HttpServlet { blog.setOrigin_url(link); } - long key = Long.valueOf(user); - String token = Oauth2Action.Users().get(key); + String reString = BlogApi.pubBlog(blog,token); //根据access_token 导入blog if(StringUtils.isBlank(reString) || reString.contains("error=500")){ diff --git a/src/action/Oauth2Action.java b/src/action/Oauth2Action.java index 58f1da7..f79e9ab 100644 --- a/src/action/Oauth2Action.java +++ b/src/action/Oauth2Action.java @@ -1,25 +1,21 @@ package action; -import java.io.IOException; -import java.net.URLEncoder; -import java.util.concurrent.ConcurrentHashMap; +import beans.User; +import common.AuthenticationTool; +import common.JsonMsg; +import org.apache.commons.lang3.StringUtils; +import oschina.Oauth2Api; +import oschina.UserApi; -import javax.servlet.ServletConfig; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.Cookie; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; - -import org.apache.commons.lang3.StringUtils; - -import common.JsonMsg; - -import oschina.Oauth2Api; -import oschina.UserApi; -import beans.User; +import java.io.IOException; +import java.net.URLEncoder; /** * 获取认证action @@ -29,8 +25,6 @@ import beans.User; @WebServlet("/Oauth2Action") public class Oauth2Action extends HttpServlet { - public static ConcurrentHashMap Users; - @Override protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { this.doPost(request, response); @@ -58,8 +52,8 @@ public class Oauth2Action extends HttpServlet { JsonMsg.json_out(JsonMsg.jsonError("user获取失败"),response); return; } - - Users().put(Long.valueOf(user.getId()), access_token); + + AuthenticationTool.ME.putUser(Long.parseLong(user.getId()), access_token); Cookie u = new Cookie("user",user.getId()) ; int maxAge = 60*10*6;//设置最长的Cookie时间为60分钟(1个小时) @@ -76,17 +70,4 @@ public class Oauth2Action extends HttpServlet { response.sendRedirect("/index.html"); } - - @Override - public void init(ServletConfig config) throws ServletException { - Users = new ConcurrentHashMap(); - super.init(config); - } - - public static ConcurrentHashMap Users(){ - if(Users==null){ - Users = new ConcurrentHashMap(); - } - return Users; - } } diff --git a/src/action/SpiderAction.java b/src/action/SpiderAction.java index 5d84021..87d0dc7 100644 --- a/src/action/SpiderAction.java +++ b/src/action/SpiderAction.java @@ -1,24 +1,26 @@ package action; -import java.io.IOException; -import java.util.List; -import javax.servlet.ServletException; -import javax.servlet.annotation.WebServlet; -import javax.servlet.http.Cookie; -import javax.servlet.http.HttpServlet; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; -import org.apache.commons.lang3.StringUtils; +import beans.BlogLink; import com.google.gson.Gson; +import common.AuthenticationTool; import common.JsonMsg; -import beans.BlogLink; -import spider.BlogPipeline; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; import spider.BlogPageProcessor; +import spider.BlogPipeline; import spider.LinksList; import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.processor.PageProcessor; +import javax.servlet.ServletException; +import javax.servlet.annotation.WebServlet; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.IOException; +import java.util.List; + /** * 爬虫调用action * @author oscfox @@ -33,41 +35,29 @@ public class SpiderAction extends HttpServlet { } protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - + String result=""; String url = request.getParameter("url"); - + if(StringUtils.isBlank(url)){ JsonMsg.json_out(JsonMsg.jsonError("请输入url!"),response); return; } - + if(!url.contains("http://") && !url.contains("https://")){ url="http://"+url; } - - String user=""; - Cookie[] cookie = request.getCookies(); - if(cookie == null){ - JsonMsg.json_out(JsonMsg.jsonError("请先授权!"),response); - return; - } - - for (int i = 0; i < cookie.length; i++) { - Cookie cook = cookie[i]; - if(cook.getName().equalsIgnoreCase("user")){ //获取键 - user = cook.getValue().toString(); - break; - } - } - - if(StringUtils.isBlank(user)){//授权码获取失败 - JsonMsg.json_out(JsonMsg.jsonError("请先授权!"),response); + // 查询授权信息 + Pair userToken = AuthenticationTool.ME.getTokenFromCookie(request.getCookies()); + if (userToken == null) { + JsonMsg.json_out(JsonMsg.jsonError("请先授权!"), response); return; } - - PageProcessor pageProcessor=null; + + String user = userToken.getLeft(); + + PageProcessor pageProcessor; try { pageProcessor = new BlogPageProcessor(url); } catch (Exception e) { @@ -80,7 +70,7 @@ public class SpiderAction extends HttpServlet { Spider.create(pageProcessor) .addUrl(url) .addPipeline(new BlogPipeline(user)).run(); - + List linkList=LinksList.getLinkList(user); if(null == linkList){ JsonMsg.json_out(JsonMsg.jsonError("链接有误或抓取超时!"), response); diff --git a/src/action/SysCatalogAction.java b/src/action/SysCatalogAction.java index d65da35..57875c3 100644 --- a/src/action/SysCatalogAction.java +++ b/src/action/SysCatalogAction.java @@ -1,41 +1,36 @@ package action; -import java.io.IOException; +import common.AuthenticationTool; +import common.JsonMsg; +import org.apache.commons.lang3.tuple.Pair; +import oschina.BlogApi; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; -import javax.servlet.http.Cookie; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import common.JsonMsg; -import oschina.BlogApi; +import java.io.IOException; @WebServlet("/action/syscatalog") public class SysCatalogAction extends HttpServlet { - protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - doPost(request, response); - } - - protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { - - String authoruid = null; - Cookie[] cookie = request.getCookies(); - - if(cookie == null){ - JsonMsg.json_out(JsonMsg.jsonError("请先授权!"),response); - return; - } - for (int i = 0; i < cookie.length; i++) { - Cookie cook = cookie[i]; - if(cook.getName().equalsIgnoreCase("user")){ //获取键 - authoruid = cook.getValue().toString(); - break; - } - } - - String reString = BlogApi.getBlogSysCatalog(authoruid); - JsonMsg.json_out(reString, response); - } + protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + doPost(request, response); + } + + protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { + + // 查询授权信息 + Pair userToken = AuthenticationTool.ME.getTokenFromCookie(request.getCookies()); + if (userToken == null) { + JsonMsg.json_out(JsonMsg.jsonError("请先授权!"), response); + return; + } + + String user = userToken.getLeft(); + + String reString = BlogApi.getBlogSysCatalog(user); + JsonMsg.json_out(reString, response); + } } diff --git a/src/action/UserAction.java b/src/action/UserAction.java index e2730a2..2f75476 100644 --- a/src/action/UserAction.java +++ b/src/action/UserAction.java @@ -8,6 +8,7 @@ import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import common.AuthenticationTool; import org.apache.commons.lang3.StringUtils; import com.alibaba.fastjson.JSON; @@ -28,7 +29,7 @@ public class UserAction extends HttpServlet { if(!StringUtils.isNumeric(user_id)) return; - String access_token = Oauth2Action.Users().get(Long.valueOf(user_id)); + String access_token = AuthenticationTool.ME.getToken(Long.parseLong(user_id)); if(null == access_token){ JsonMsg.json_out(JsonMsg.jsonError("请重新认证!",JsonMsg.ERROR_CODE_AUTH), response); diff --git a/src/common/AppConfig.xml b/src/common/AppConfig.xml index 3ffe2d0..daca12d 100644 --- a/src/common/AppConfig.xml +++ b/src/common/AppConfig.xml @@ -1,4 +1,4 @@ - + -   - + + - http://www.oschina.com:8090 - - - - 1P7IsIMrGXqs6AZrcZJtjZRMjain5r5J - 2rbeJBgeNRvg0gqZJIci - http://www.moveblog.com:8081/Oauth2Action - - - /action/openapi/token - /action/openapi/blog_pub - /action/openapi/user - - /action/openapi/blog_catalog_list - - - \ No newline at end of file + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/common/AuthenticationTool.java b/src/common/AuthenticationTool.java new file mode 100644 index 0000000..a45fb08 --- /dev/null +++ b/src/common/AuthenticationTool.java @@ -0,0 +1,58 @@ +package common; + +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; + +import javax.servlet.http.Cookie; +import java.util.concurrent.ConcurrentHashMap; + +public class AuthenticationTool { + + /** + * key osc user id + * value token + */ + private static ConcurrentHashMap USERS = new ConcurrentHashMap<>(); + + public static final AuthenticationTool ME = new AuthenticationTool(); + + private AuthenticationTool() { + // test + // USERS.put(3270170L, "test"); + } + + public void putUser(long id, String token) { + USERS.put(id, token); + } + + public String getToken(long id) { + return USERS.get(id); + } + + public String getToken(String user) { + if (!StringUtils.isBlank(user) && StringUtils.isNumeric(user)) { + long userId = Long.parseLong(user); + return USERS.get(userId); + } + return null; + } + + /** + * @param cookies cookies + * @return pair + * + */ + public Pair getTokenFromCookie(Cookie[] cookies) { + if (cookies != null) { + for (Cookie cookie : cookies) { + if (cookie.getName().equalsIgnoreCase("user")) { //获取键 + String user = cookie.getValue(); + String token = getToken(user); + return StringUtils.isBlank(token) ? null : Pair.of(user, token); + } + } + } + return null; + } + +} diff --git a/src/oschina/BlogApi.java b/src/oschina/BlogApi.java index 4ad1225..a44a192 100644 --- a/src/oschina/BlogApi.java +++ b/src/oschina/BlogApi.java @@ -2,6 +2,7 @@ package oschina; import java.io.IOException; +import common.AuthenticationTool; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.NameValuePair; @@ -137,7 +138,7 @@ public static String getBlogSysCatalog(String authoruid) { "Mozilla/5.0 (X11; U; Linux i686; zh-CN; rv:1.9.1.2) Gecko/20090803"); AppConfigTool configTool = new AppConfigTool(); - String access_token = Oauth2Action.Users().get(Long.valueOf(authoruid)); + String access_token = AuthenticationTool.ME.getToken(Long.parseLong(authoruid)); PostMethod method = new PostMethod(configTool.getConfig("osc_host") + configTool.getConfig("blog_sys_catalog")); method.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET,"utf-8"); -- Gitee From 3fcce588b0a63e71f26ecfadeaa9119dd428ac25 Mon Sep 17 00:00:00 2001 From: witt Date: Fri, 13 Mar 2020 18:24:24 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E8=AF=B7=E6=B1=82=20CSDN=20=E5=88=97?= =?UTF-8?q?=E8=A1=A8=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/common/AuthenticationTool.java | 2 +- src/common/Spider.xml | 2 +- src/spider/BlogPageProcessor.java | 11 +++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/common/AuthenticationTool.java b/src/common/AuthenticationTool.java index a45fb08..3089da4 100644 --- a/src/common/AuthenticationTool.java +++ b/src/common/AuthenticationTool.java @@ -18,7 +18,7 @@ public class AuthenticationTool { private AuthenticationTool() { // test - // USERS.put(3270170L, "test"); + //USERS.put(3270170L, "test"); } public void putUser(long id, String token) { diff --git a/src/common/Spider.xml b/src/common/Spider.xml index 7365b86..e63c5ea 100644 --- a/src/common/Spider.xml +++ b/src/common/Spider.xml @@ -24,7 +24,7 @@ ]]> - + diff --git a/src/spider/BlogPageProcessor.java b/src/spider/BlogPageProcessor.java index 1db07c6..f81351e 100644 --- a/src/spider/BlogPageProcessor.java +++ b/src/spider/BlogPageProcessor.java @@ -257,6 +257,17 @@ public class BlogPageProcessor implements PageProcessor{ page.addTargetRequests(Pagelinks); } + } else if (this.domain.equals("blog.csdn.net")) { + String blogTotalInfo = page.getHtml().xpath("//*[@id=\"asideProfile\"]/div[2]/dl[1]/dd/a/span/text()").toString(); + if (StringUtils.isNumeric(blogTotalInfo)) { + int blogTotal = Integer.parseInt(blogTotalInfo); + int pageSize = 40; + int pageTotal = blogTotal % pageSize == 0 ? blogTotal / pageSize : blogTotal / pageSize + 1; + String url = PagelinksRex.get(0).replace("\\", ""); + for (int i = 2; i <= pageTotal; i++) { + page.addTargetRequest(url + i); + } + } } else { page.addTargetRequests(Pagelinks); } -- Gitee