From 309ad5845d9136b7f8b0e080172d8e85aa253226 Mon Sep 17 00:00:00 2001 From: Bright <540467981@qq.com> Date: Wed, 14 Dec 2022 12:18:36 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BA=86=E4=BB=BB?= =?UTF-8?q?=E5=8A=A1=E7=AE=A1=E7=90=86=E7=9A=84interval=E5=AE=9A=E6=97=B6?= =?UTF-8?q?=E6=93=8D=E4=BD=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .flaskenv | 8 ++++---- applications/common/tasks/tasks.py | 16 +++++++--------- applications/view/__init__.py | 2 ++ applications/view/admin/task.py | 8 ++++++++ .../admin/component/pear/css/module/layout.css | 1 + 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/.flaskenv b/.flaskenv index 721d9f0..80038d5 100644 --- a/.flaskenv +++ b/.flaskenv @@ -9,12 +9,12 @@ FLASK_RUN_PORT = 5000 SYSTEM_NAME = Pear Admin # MySql配置信息 -MYSQL_HOST=127.0.0.1 +MYSQL_HOST=202.193.53.151 # MYSQL_HOST=dbserver MYSQL_PORT=3306 MYSQL_DATABASE=PearAdminFlask MYSQL_USERNAME=root -MYSQL_PASSWORD=123456 +MYSQL_PASSWORD=root # Redis 配置 # REDIS_HOST=127.0.0.1 @@ -25,5 +25,5 @@ SECRET_KEY='pear-admin-flask' # 邮箱配置 MAIL_SERVER='smtp.qq.com' -MAIL_USERNAME='123@qq.com' -MAIL_PASSWORD='XXXXX' # 生成的授权码 \ No newline at end of file +MAIL_USERNAME='540467981@qq.com' +MAIL_PASSWORD='Libingcai@123' # 生成的授权码 \ No newline at end of file diff --git a/applications/common/tasks/tasks.py b/applications/common/tasks/tasks.py index 85636e7..23c4e81 100644 --- a/applications/common/tasks/tasks.py +++ b/applications/common/tasks/tasks.py @@ -1,15 +1,13 @@ import datetime -task_list = ['task2', 'task3', 'task4'] +task_list = ['景区评论标题', '线路评论标题', '景区攻略'] +def 景区评论标题(id, name): + print(id, name) -def task2(a, b): - print(f'定时任务_1_{a},{b},{datetime.datetime.now()}') +def 线路评论标题(id, name): + print(id, name) -def task3(a, b): - print(f'定时任务_2_{a}{b}{datetime.datetime.now()}') - - -def task4(a, b): - print(f'定时任务_4_{a}{b}{datetime.datetime.now()}') +def 景区攻略(id, name): + print(id, name) diff --git a/applications/view/__init__.py b/applications/view/__init__.py index bec7989..a168078 100644 --- a/applications/view/__init__.py +++ b/applications/view/__init__.py @@ -3,6 +3,7 @@ from applications.view.index import register_index_views from applications.view.passport import register_passport_views from applications.view.rights import register_rights_view from applications.view.department import register_dept_views +from applications.view.test import register_test_views def init_view(app): @@ -10,4 +11,5 @@ def init_view(app): register_index_views(app) register_rights_view(app) register_passport_views(app) + register_test_views(app) register_dept_views(app) diff --git a/applications/view/admin/task.py b/applications/view/admin/task.py index 5c825a6..92c180d 100644 --- a/applications/view/admin/task.py +++ b/applications/view/admin/task.py @@ -5,6 +5,7 @@ from applications.common.tasks import tasks from applications.common.tasks.tasks import task_list from applications.common.utils.http import table_api, fail_api, success_api from applications.extensions.init_apscheduler import scheduler +import time as t admin_task = Blueprint('adminTask', __name__, url_prefix='/admin/task') @@ -44,6 +45,10 @@ def save(): functions = request.json.get("functions") datetime = request.json.get("datetime") time = request.json.get("time") + list = str(time).split(':') + hour = int(list[0]) + min = int(list[1]) + sec = int(list[2]) if not hasattr(tasks, functions): return fail_api() if type == 'date': @@ -62,6 +67,9 @@ def save(): name=name, args=(1, 1), trigger=type, + hours=hour, + minutes=min, + seconds=sec, replace_existing=True) elif type == 'cron': scheduler.add_job( diff --git a/static/admin/component/pear/css/module/layout.css b/static/admin/component/pear/css/module/layout.css index beceeea..f4e1b91 100644 --- a/static/admin/component/pear/css/module/layout.css +++ b/static/admin/component/pear/css/module/layout.css @@ -61,6 +61,7 @@ body::-webkit-scrollbar-corner { .mainBox { width: 100%; + /*height: 500;*/ position: absolute; top: 0px; left: 0px; -- Gitee From 1ada28c4e159a67d95401f6a1c6c2812db0fa10b Mon Sep 17 00:00:00 2001 From: Bright <540467981@qq.com> Date: Wed, 14 Dec 2022 16:35:43 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E8=87=AA=E5=B7=B1=E7=9A=84=E7=88=AC?= =?UTF-8?q?=E8=99=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- HBaseConnect.py | 92 +++++++++++ MysqlConnect.py | 58 +++++++ applications/common/tasks/tasks.py | 22 ++- .../search_360.py" | 100 ++++++++++++ .../sentiment_start.py" | 38 +++++ .../test.js" | 1 + .../weibo_scenic_fans.py" | 90 +++++++++++ .../weibo_scenic_trend.py" | 76 +++++++++ .../weibo_scenic_wordbygeo.py" | 81 ++++++++++ .../guide_start.py" | 34 ++++ .../mafengwo_scenic.py" | 103 +++++++++++++ .../qunaer_scenic.py" | 89 +++++++++++ .../xiecheng_scenic.py" | 89 +++++++++++ .../mafengwo_scenic_comment_title.py" | 127 +++++++++++++++ .../qunaer_scenic_comment_title.py" | 87 +++++++++++ .../scenic_start.py" | 38 +++++ .../tongcheng_scenic_comment_title.py" | 80 ++++++++++ .../xiecheng_scenic_comment_title.py" | 145 ++++++++++++++++++ .../qunaer_route_comment_title.py" | 89 +++++++++++ .../route_start.py" | 30 ++++ .../xiecheng_route_comment_title.py" | 106 +++++++++++++ .../hotel_title_start.py" | 34 ++++ .../qunaer_hotel_comment_title.py" | 76 +++++++++ .../tongcheng_hotel_comment_title.py" | 102 ++++++++++++ .../xiecheng_hotel_comment_title.py" | 121 +++++++++++++++ 25 files changed, 1902 insertions(+), 6 deletions(-) create mode 100644 HBaseConnect.py create mode 100644 MysqlConnect.py create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/search_360.py" create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/sentiment_start.py" create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/test.js" create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_fans.py" create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_trend.py" create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_wordbygeo.py" create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/guide_start.py" create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/mafengwo_scenic.py" create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/qunaer_scenic.py" create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/xiecheng_scenic.py" create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/mafengwo_scenic_comment_title.py" create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_scenic_comment_title.py" create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py" create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/tongcheng_scenic_comment_title.py" create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_scenic_comment_title.py" create mode 100644 "applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_route_comment_title.py" create mode 100644 "applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/route_start.py" create mode 100644 "applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_route_comment_title.py" create mode 100644 "applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py" create mode 100644 "applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_hotel_comment_title.py" create mode 100644 "applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/tongcheng_hotel_comment_title.py" create mode 100644 "applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_hotel_comment_title.py" diff --git a/HBaseConnect.py b/HBaseConnect.py new file mode 100644 index 0000000..3d4097d --- /dev/null +++ b/HBaseConnect.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- +import happybase +import json +import re + + +# 实现连接类方法和通用表操作方法 +class HBaseConnect: + def __init__(self): + """ + 建立与thrift server端的连接 + """ + self.connection = happybase.Connection(host="202.193.53.106", port=9090, timeout=None, autoconnect=True, + table_prefix=None, table_prefix_separator=b'_', compat='0.98', + transport='buffered', protocol='binary') + + def getTable(self, table_name: str): # Return Happybase Table + return self.connection.table(table_name) + + def start(self): # Start To Connect + self.connection.open() + + def stop(self): # Stop To Connect + self.connection.close() + + # 删除表 + def deleteTable(self, tableName): + self.connection.disable_table(tableName) + self.connection.delete_table(tableName) + + def printTables(self): + return self.connection.tables() + + def createTable(self, tableName, families): + self.connection.create_table(tableName, families) + + def putTable(self, tableName, rowKey, data): + table = self.connection.table(tableName) + table.put(rowKey, data) + + +if __name__ == '__main__': + hbase = HBaseConnect() + hbase.start() + # hbase.deleteTable("qunaerscenic") + # list = ["xiechenghotel","xiechengscenic","qunaerhotel","qunaerscenic","tongchenghotel","tongchengscenic",] + # for item in list: + # # + # # hbase.deleteTable(item) + # hbase.createTable(item, {"info": dict()}) + # + # hbase.putTable("xiechenghotel","test",{"info:name":"23"}) + # hbase.deleteTable("tongchengscenic") + # hbase.createTable('route_comment', {"info": dict()}) + table = hbase.getTable("route_comment") + i=0 + # id = 3909 + for key, value in table.scan(): + data = str(value).encode().decode('unicode-escape').encode('raw_unicode_escape').decode() + # print(len(data)) + # i=i+1 + # res = re.compile(rf"b'info:datafrom': b'去哪儿', b'info:hid': b'1853'").search(data) + # if res != None: + # i=i+1 + # # table.delete(key) + # print(data) + res = re.compile(r"info:content': b'用户未及时评价, 该评价为系统默认好评!'").search(data) + if res != None: + i=i+1 + print(data) + table.delete(key) + res = re.compile(r"info:content': b'用户未填写文字评价。").search(data) + if res != None: + i=i+1 + print(data) + table.delete(key) + # print (str(data)) + print(i) + # print(hbase.printTables()) + hbase.stop() + +""" +data={ + 'info:place_from': item['place_from'], + 'info:place_to': item['place_to'], + 'info:Date': item['Date'], + 'info:totalCount': item['totalCount'], + 'info:type': item['type'], + 'info:ticketStatus': item['ticketStatus'] + } + obj.putTable("表名","行键",data) #插入语句 +""" diff --git a/MysqlConnect.py b/MysqlConnect.py new file mode 100644 index 0000000..25dd989 --- /dev/null +++ b/MysqlConnect.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> mysqlConn +@IDE :PyCharm +@Author :sandmswift +@Date :2022-11-21 10:54 +@Desc +==================================================''' +import pymysql + + +# 实现连接类方法和通用表操作方法 +class MysqlConnect: + # 初始化数据库 + def __init__(self): + print() + self.connection = pymysql.connect(host='202.193.53.151', port=3306, user='root', passwd='root', db='travel',charset='utf8mb4') + self.cur = self.connection.cursor() + + def query(self,sql, args): + self.cur.execute(sql, args) + results = self.cur.fetchall() + # print(type(results)) # 返回 tuple元组类型 + # self.connection.commit() + return results + + def queryHotel(self,sql, args): + self.cur.execute(sql, args) + results = self.cur.fetchall() + # print(type(results)) # 返回 tuple元组类型 + self.connection.commit() + return results + + def update(self,sql,args): + # 携程修改的sql + self.cur.execute(sql,args) + self.connection.commit() + + # 封装插入数据到数据库 + def insert(self,sql, args): + # 携程和去哪儿景区 + # sql = f'INSERT INTO {tableName}(scenicId,scenicName,score,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);' + # 同程景区 + # sql = 'INSERT INTO scenic_comment(scenicId,scenicName,satisfy_present,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);' + + # 同程酒店内容 + # sql = f'INSERT INTO hotels(name,level,address,tc_url,tc_data,crawlTime) VALUES(%s,%s,%s,%s,%s,%s);' + # 同程和去哪儿酒店 + # sql = 'INSERT INTO hotel_comment(hotelId,hotelName,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s);' + + result = self.cur.execute(sql, args) + # print(result) + self.connection.commit() + +# if __name__ == '__main__': +# mysql = MysqlConnect() +# mysql.update((222,"诗与远方·漓江院子酒店(两江四湖东西巷店)")) \ No newline at end of file diff --git a/applications/common/tasks/tasks.py b/applications/common/tasks/tasks.py index 23c4e81..fca30e2 100644 --- a/applications/common/tasks/tasks.py +++ b/applications/common/tasks/tasks.py @@ -1,13 +1,23 @@ -import datetime -task_list = ['景区评论标题', '线路评论标题', '景区攻略'] +from applications.common.tasks.景区评论标题.scenic_start import Scenic +from applications.common.tasks.线路评论标题.route_start import Route +from applications.common.tasks.酒店评论标题.hotel_title_start import Hotel +from applications.common.tasks.景区攻略.guide_start import Guide -def 景区评论标题(id, name): - print(id, name) +task_list = ['景区评论标题', '线路评论标题', '景区攻略','酒店评论标题'] +def 景区评论标题(id, name): + scenic_start = Scenic() + scenic_start.run() def 线路评论标题(id, name): - print(id, name) + scenic_start = Route() + scenic_start.run() def 景区攻略(id, name): - print(id, name) + scenic_start = Guide() + scenic_start.run() + +def 酒店评论标题(id, name): + scenic_start = Hotel() + scenic_start.run() diff --git "a/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/search_360.py" "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/search_360.py" new file mode 100644 index 0000000..46ca727 --- /dev/null +++ "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/search_360.py" @@ -0,0 +1,100 @@ +import requests +import re +import aiohttp +import asyncio +import os +import xlwt +import xlrd +import time +import openpyxl +import json +from datetime import date, timedelta + +today = time.strftime("%Y-%m-%d",time.localtime()) +tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d") +headers = { + 'Host': 'trends.so.com', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0', + 'Accept': 'application/json, text/plain, */*', + 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', + 'Accept-Encoding': 'gzip, deflate, br', + 'X-Requested-With': 'XMLHttpRequest', + 'Connection': 'keep-alive', + 'Referer': 'https://trends.so.com/result?query=%E6%BC%93%E6%B1%9F,%E8%B1%A1%E5%B1%B1&period=30', + 'Cookie': '__guid=239254294.3977692736380259300.1670144255177.8064; __gid=239254294.318323770.1670144255178.1670144261083.38; __bn=OBOS%7BOxOnSwO%24O%2FBVKQFw%3CS%3EStwRt3BqwL%2C%2B%2FUpx.pdL%28UoxX%294STo1Dzg%2F%7DVYG%40dJp%3F1M%40f%5EJ0%7Cs%3ClLe%5E%23OAT8gZKW%232LE%7C9ue%25YHrkL_c8y%2AnNf5v%26LmJ7%5Eh%21_6; QiHooGUID=4B479C909060D45303A26176A83571EE.1670921647212; count=2; test_cookie_enable=null; Q=u%3D360H3408265314%26n%3D%25Q3%25Q0%25Q0%25P4%25P8%25PO_308%26le%3D%26m%3DZGH1WGWOWGWOWGWOWGWOWGWOBQt0%26qid%3D3408265314%26im%3D1_t015d6b97def2a4a918%26src%3Dpcw_360index%26t%3D1; T=s%3D6b400eed78c7fa7b27a496343be40f86%26t%3D1670921768%26lm%3D0-1%26lf%3D2%26sk%3Dc5c99806e7fcea7539ea20e35943912a%26mt%3D1670921768%26rc%3D%26v%3D2.0%26a%3D1; so_huid=11k1XvfU5zrG3%2BT9U%2FzH9XHKHrh6tHE07PUntGARvW36A%3D; __huid=11k1XvfU5zrG3%2BT9U%2FzH9XHKHrh6tHE07PUntGARvW36A%3D; _S=fd41ea22d44de0791f9996eaa25d084e', + 'Sec-Fetch-Dest': 'empty', + 'Sec-Fetch-Mode': 'cors', + 'Sec-Fetch-Site': 'same-origin', + 'TE': 'trailers', +} + +from MysqlConnect import * +mysql = MysqlConnect() + +class Search_360: + async def getFans(self,item, session): + try: + maxPage = int(item['maxPage']) + wordList = [] + for index in range(1,maxPage+1): + url = f"https://s.weibo.com/weibo?q={item['short_name']}&page={index}" + async with session.get(url) as res: + res = await res.text() + tempList = re.compile(r'>#(.*?)#<').findall(res) + wordList.extend(tempList) + if index % 8 == 0: + print(f"<----------------{item['short_name']}爬到{index}页------------------->") + time.sleep(3) + # 微博fans的sql + args = (item["id"], item["short_name"], str(wordList), today) + print(args) + sql = 'INSERT INTO weibo_word(scenicId,name,wordList,crawlTime) VALUES(%s,%s,%s,%s);' + mysql.insert(sql, args) + except Exception as e: + print("fans报错",e) + + # 从数据库获取景区信息 + async def getScenic(self): + async with aiohttp.ClientSession(headers=headers) as session: + # 从数据库拿url + results = mysql.queryHotel("select id,name ,short_name from scenics where id > 0", None) + tasks = [] + url_list = [] + for row in results: + id = row[0] + name = row[1] + short_name = row[2] + url_list.append({ + "id": id, + "name": name, + "short_name": short_name, + }) + print("微博所有景区长度", len(url_list)) + for item in url_list: + url = f"https://s.weibo.com/weibo?q={item['short_name']}&page=1" + res = requests.get(url, headers=headers) + maxPages = re.compile(r'第(.*)页').findall(res.text) + if maxPages == []: + item['maxPage'] = 1 + else: + item['maxPage'] = maxPages[len(maxPages) - 1] + print(item['short_name'] + f'长度为:{item["maxPage"]}') + task = asyncio.create_task(self.getFans(item.copy(), session)) + tasks.append(task) + await asyncio.wait(tasks) + print(f"{item['short_name']}爬完了") + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + def test(): + url = "https://trends.so.com/index/soMediaJson?q=漓江,象山&from=20130111&to=20221212&s=0" + res = requests.post(url,headers=headers) + # print(res.json()) + resp = res.json() + data = resp['data']['media']['漓江'] + # resp = res.content.decode('unicode-escape', 'ignore').encode('utf-8', 'ignore').decode('utf-8') # 爬取页面并且解码 + print(data) + +if __name__ == '__main__': + test() + # asyncio.run(getScenic()) diff --git "a/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/sentiment_start.py" "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/sentiment_start.py" new file mode 100644 index 0000000..6c58652 --- /dev/null +++ "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/sentiment_start.py" @@ -0,0 +1,38 @@ +# coding:utf-8 +# version:python3.7 +# author:Ivy + +from applications.common.tasks.微博景区.weibo_scenic_fans import Weibo_Fans +from applications.common.tasks.微博景区.weibo_scenic_trend import Weibo_Trend +from applications.common.tasks.微博景区.weibo_scenic_wordbygeo import Weibo_Wordbygeo +from applications.common.tasks.微博景区.search_360 import Search_360 +import asyncio +import time + +mafengwo = Weibo_Fans() +qunaer = Weibo_Trend() +tongcheng = Weibo_Wordbygeo() +xiecheng = Search_360() + +class Scenic: + def run(self): + print("开始爬取各个网站的评论标题!") + time_start=time.time() + + asyncio.run(xiecheng.getScenic()) + print("携程爬取结束") + asyncio.run(tongcheng.getScenic()) + print("同程爬取结束") + asyncio.run(qunaer.getScenic()) + print("去哪儿爬取结束") + asyncio.run(mafengwo.getScenic()) + print("马蜂窝爬取结束") + + time_end=time.time() + print(' time cost ',time_end-time_start,'s') + + + + + + diff --git "a/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/test.js" "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/test.js" new file mode 100644 index 0000000..e98763e --- /dev/null +++ "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/test.js" @@ -0,0 +1 @@ +console.log(123) \ No newline at end of file diff --git "a/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_fans.py" "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_fans.py" new file mode 100644 index 0000000..911672f --- /dev/null +++ "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_fans.py" @@ -0,0 +1,90 @@ +import requests +import re +import aiohttp +import asyncio +import os +import xlwt +import xlrd +import time +import openpyxl +import json +from datetime import date, timedelta + +today = time.strftime("%Y-%m-%d",time.localtime()) +tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d") +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0', + 'Cookie':"SINAGLOBAL=6487030870592.412.1670217755062; ULV=1670913619949:2:2:1:448084127526.90094.1670913619909:1670217755065; SUB=_2A25OnG8QDeRhGeFG61oQ9CfOyzWIHXVt6MfYrDV8PUNbmtANLXbRkW9NfnN7XS0bIXvPWvBx4AplvHeMTR0yYZWh; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhuyxuyZV28mO7UcSyqZia-5JpX5KzhUgL.FoMRehnpSh.Eeh.2dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM71K201h27eKBc; XSRF-TOKEN=Nb98b84zpKrLkQFK2G4QVm_B; WBPSESS=GXgdHCdzDjVRXBTFCQtCjwvN0D3EIjJu6yKjC9Ly2vpYlmMNPvd-am2fVfhb0LZzGlpu1z5hvjfehJnVFqrpOT7jc98bCwb2dNLjM6gCoYpJDGf8FiZZUWuoMIVf8Swi9hEuLXICEsBqKZoSkXjGwg==; _s_tentry=-; Apache=448084127526.90094.1670913619909; PC_TOKEN=45843b523d; crossidccode=CODE-yf-1P4YZf-3iYEFJ-6bhPhbY6POtswkxda2c7a; appkey=; geetest_token=667c6133aa018c4142666d597550c90c; ALF=1702449855; SSOLoginState=1670913856", + # 'referer': 'https://s.weibo.com/' +} + +from MysqlConnect import * +mysql = MysqlConnect() + +class Weibo_Fans: + async def getFans(self,item, session): + try: + maxPage = int(item['maxPage']) + sum = 0 + for index in range(1,maxPage+1): + url = f"https://s.weibo.com/user?q={item['short_name']}&Refer=weibo_user&page={index}" + async with session.get(url) as res: + res = await res.text() + resp = re.compile(r'粉丝:(.*)').findall(res) + for i in range(0,len(resp)): + obj = re.compile(r'万').search(resp[i]) + fans = resp[i] + if obj != None: + fans = resp[i].replace('万','') + fans = float(fans) * 10000 + else: + fans = fans + fans = int(fans) + sum = sum + fans + print(f"<-----------------{item['short_name']}爬到{index}页,目前fans为{sum}---------------->") + # 微博fans的sql + if index % 5 == 0: + time.sleep(5) + args = (item["id"], item["short_name"], sum, today) + print(args) + sql = 'INSERT INTO weibo_fans(scenicId,name,wb_fans,crawlTime) VALUES(%s,%s,%s,%s);' + mysql.insert(sql, args) + except Exception as e: + print("fans报错",e) + + # 从数据库获取景区信息 + async def getScenic(self): + async with aiohttp.ClientSession(headers=headers) as session: + # 从数据库拿url + results = mysql.queryHotel("select id,name ,short_name from scenics where id > 0", None) + tasks = [] + url_list = [] + for row in results: + id = row[0] + name = row[1] + short_name = row[2] + url_list.append({ + "id": id, + "name": name, + "short_name": short_name, + }) + print("微博所有景区长度", len(url_list)) + for item in url_list: + url = f"https://s.weibo.com/user?q={item['short_name']}&Refer=weibo_user&page=1" + res = requests.get(url, headers=headers) + maxPages = re.compile(r'第(.*)页').findall(res.text) + if maxPages == []: + item['maxPage'] = 1 + else: + item['maxPage'] = maxPages[len(maxPages) - 1] + print(item['short_name'] + f'长度为:{item["maxPage"]}') + task = asyncio.create_task(self.getFans(item.copy(), session)) + tasks.append(task) + await asyncio.wait(tasks) + print(f"{item['short_name']}爬完了") + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + +if __name__ == '__main__': + asyncio.run(getScenic()) diff --git "a/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_trend.py" "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_trend.py" new file mode 100644 index 0000000..df82661 --- /dev/null +++ "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_trend.py" @@ -0,0 +1,76 @@ +import requests +import re +import aiohttp +import asyncio +import os +import xlwt +import xlrd +import time +import openpyxl +import json +from datetime import date, timedelta + +# year = time.strftime("%Y-", time.localtime()) +today = time.strftime("%Y-%m-%d",time.localtime()) +tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d") +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0', + # 'Cookie':"SINAGLOBAL=3380569287618.6953.1670308366047; _s_tentry=s.weibo.com; Apache=5236619054300.727.1670324406000; ULV=1670324406003:2:2:2:5236619054300.727.1670324406000:1670308366049; XSRF-TOKEN=13toiK7TaB8Axa4Vx7DncNNO; login_sid_t=ec7d5e0d423e9ec19b8acb14ea31e88f; cross_origin_proto=SSL; wb_view_log=2560*14401.5; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WFCcqTynPQA63zJA5y17Xs15JpX5o275NHD95QcSoe0eo5XSo-fWs4Dqc_xi--fiK.0i-8Wi--ciKLhiKn4i--4iKnEi-20i--Xi-z4iKnRi--fi-2XiKLWi--ci-zpiKnEi--RiKn7iKyhi--Xi-zRiKy2i--fi-i8iK.N; SSOLoginState=1670812852; SUB=_2A25OkuTlDeRhGeBI6FET8CrKzjmIHXVt5lEtrDV8PUNbmtANLWiskW9NRptelGTTlu3DzUc2k9u71P0K706eTg71; ALF=1702348852; WBPSESS=QRnN_8uUPIRKDidMZ7ysnFsKmswTd-coyxvC3kx2wmVsnZYfCgM3CVbyYUESYHrYB0_OPXwWvhlacPaYtSVNXY0EckXCBF-9xxe7fsm2CcjhwFzQ2yBIcsDsTtMkf5Epp7PzpdyQGn9mf7C9CvIb3w==", + # 'referer': 'https://s.weibo.com/' +} + +from MysqlConnect import * +mysql = MysqlConnect() + +class Weibo_Trend: + async def getTalk(self,item, session): + try: + url = f"https://m.s.weibo.com/ajax_topic/detail?q={item['short_name']}" + async with session.get(url) as res: + resp = await res.json() + read = 0 + ori_uv = 0 + mention = 0 + star = 0 + if 'count' in resp['data']['baseInfo']: + read = resp['data']['baseInfo']['count']['read'] + ori_uv = resp['data']['baseInfo']['count']['ori_uv'] + mention = resp['data']['baseInfo']['count']['mention'] + star = resp['data']['baseInfo']['count']['star'] + args = (item["id"], item["short_name"], read,ori_uv,mention,star, today) + print(args) + sql = 'INSERT INTO weibo_trend(scenicId,`name`,`read`,ori_uv,mention,star,crawlTime) VALUES(%s,%s,%s,%s,%s,%s,%s);' + mysql.insert(sql, args) + except Exception as e: + print("fans报错",e) + + # 从数据库获取景区信息 + async def getScenic(self): + async with aiohttp.ClientSession(headers=headers) as session: + # 从数据库拿url + results = mysql.queryHotel("select id,name ,short_name from scenics where id > 0 ", None) + tasks = [] + url_list = [] + for row in results: + id = row[0] + name = row[1] + short_name = row[2] + url_list.append({ + "id": id, + "name": name, + "short_name": short_name, + }) + print("微博所有景区长度", len(url_list)) + i = 0 + for item in url_list: + task = asyncio.create_task(self.getTalk(item.copy(), session)) + tasks.append(task) + i = i + 1 + if i % 8 == 0: + time.sleep(3) + await asyncio.wait(tasks) + # 关闭mysql + mysql.cur.close() + mysql.connection.close() +if __name__ == '__main__': + asyncio.run(getScenic()) \ No newline at end of file diff --git "a/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_wordbygeo.py" "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_wordbygeo.py" new file mode 100644 index 0000000..a155499 --- /dev/null +++ "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_wordbygeo.py" @@ -0,0 +1,81 @@ +import requests +import re +import aiohttp +import asyncio +import os +import xlwt +import xlrd +import time +import openpyxl +import json +from datetime import date, timedelta + +today = time.strftime("%Y-%m-%d",time.localtime()) +tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d") +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0', + 'Cookie':"SINAGLOBAL=6487030870592.412.1670217755062; ULV=1670913619949:2:2:1:448084127526.90094.1670913619909:1670217755065; SUB=_2A25OnG8QDeRhGeFG61oQ9CfOyzWIHXVt6MfYrDV8PUNbmtANLXbRkW9NfnN7XS0bIXvPWvBx4AplvHeMTR0yYZWh; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhuyxuyZV28mO7UcSyqZia-5JpX5KzhUgL.FoMRehnpSh.Eeh.2dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM71K201h27eKBc; XSRF-TOKEN=Nb98b84zpKrLkQFK2G4QVm_B; WBPSESS=GXgdHCdzDjVRXBTFCQtCjwvN0D3EIjJu6yKjC9Ly2vpYlmMNPvd-am2fVfhb0LZzGlpu1z5hvjfehJnVFqrpORnpezcjqLRXjHTwRYkqud2f-lo5ogx3FJhjiiEoA2AHWwC4_I4Ebc8XETWMRRXqRQ==; _s_tentry=-; Apache=448084127526.90094.1670913619909; appkey=; ALF=1702449855; SSOLoginState=1670913856", + # 'referer': 'https://s.weibo.com/' +} + +from MysqlConnect import * +mysql = MysqlConnect() + +class Weibo_Wordbygeo: + async def getWord(self,item, session): + try: + maxPage = int(item['maxPage']) + wordList = [] + for index in range(1,maxPage+1): + url = f"https://s.weibo.com/weibo?q={item['short_name']}&page={index}" + async with session.get(url) as res: + res = await res.text() + tempList = re.compile(r'>#(.*?)#<').findall(res) + wordList.extend(tempList) + if index % 8 == 0: + print(f"<----------------{item['short_name']}爬到{index}页------------------->") + time.sleep(3) + # 微博fans的sql + args = (item["id"], item["short_name"], str(wordList), today) + print(args) + sql = 'INSERT INTO weibo_word(scenicId,name,wordList,crawlTime) VALUES(%s,%s,%s,%s);' + mysql.insert(sql, args) + except Exception as e: + print("fans报错",e) + + # 从数据库获取景区信息 + async def getScenic(self): + async with aiohttp.ClientSession(headers=headers) as session: + # 从数据库拿url + results = mysql.queryHotel("select id,name ,short_name from scenics where id > 0", None) + tasks = [] + url_list = [] + for row in results: + id = row[0] + name = row[1] + short_name = row[2] + url_list.append({ + "id": id, + "name": name, + "short_name": short_name, + }) + print("微博所有景区长度", len(url_list)) + for item in url_list: + url = f"https://s.weibo.com/weibo?q={item['short_name']}&page=1" + res = requests.get(url, headers=headers) + maxPages = re.compile(r'第(.*)页').findall(res.text) + if maxPages == []: + item['maxPage'] = 1 + else: + item['maxPage'] = maxPages[len(maxPages) - 1] + print(item['short_name'] + f'长度为:{item["maxPage"]}') + task = asyncio.create_task(self.getWord(item.copy(), session)) + tasks.append(task) + await asyncio.wait(tasks) + print(f"{item['short_name']}爬完了") + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + +if __name__ == '__main__': + asyncio.run(getScenic()) diff --git "a/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/guide_start.py" "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/guide_start.py" new file mode 100644 index 0000000..8fbaabd --- /dev/null +++ "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/guide_start.py" @@ -0,0 +1,34 @@ +# coding:utf-8 +# version:python3.7 +# author:Ivy + +from applications.common.tasks.景区攻略.mafengwo_scenic import Mafengwo_Scenic +from applications.common.tasks.景区攻略.qunaer_scenic import Qunaer_Scenic +from applications.common.tasks.景区攻略.xiecheng_scenic import Xiecheng_Scenic +import asyncio +import time + +mafengwo = Mafengwo_Scenic() +qunaer = Qunaer_Scenic() +xiecheng = Xiecheng_Scenic() + +class Guide: + def run(self): + print("开始爬取各个网站的评论标题!") + time_start=time.time() + + asyncio.run(xiecheng.getScenic()) + print("携程爬取结束") + # asyncio.run(qunaer.getScenic()) + # print("去哪儿爬取结束") + asyncio.run(mafengwo.getScenic()) + print("马蜂窝爬取结束") + + time_end=time.time() + print(' time cost ',time_end-time_start,'s') + + + + + + diff --git "a/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/mafengwo_scenic.py" "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/mafengwo_scenic.py" new file mode 100644 index 0000000..5f0acc0 --- /dev/null +++ "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/mafengwo_scenic.py" @@ -0,0 +1,103 @@ +import time + +import pymysql +import requests +from lxml import etree +import datetime +xiechengUrl = 'https://www.mafengwo.cn/search/q.php?q={}&t=notes&seid=8ADBD862-D2E8-4B0D-ADE1-0C98ED641130&mxid=&mid=&mname=&kt=1' + + +class Mafengwo_Scenic: + + def getSource(self,url): + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36' + } + + response = requests.get(url,headers=headers) + response.encoding = 'utf-8' + return response.text + + def getEveryItem(self,id,name,source): + try: + html_element = etree.HTML(source) + href_list = html_element.xpath('//*[@id="_j_mfw_search_main"]/div[1]/div/div/a/@href') + + guilde_url = href_list[2] + note_url = href_list[3] + answer_url = href_list[4] + + guilde_html = self.getSource(guilde_url) + guilde_html = etree.HTML(guilde_html) + note_html = self.getSource(note_url) + note_html = etree.HTML(note_html) + answer_html = self.getSource(answer_url) + answer_html = etree.HTML(answer_html) + + guide_list = guilde_html.xpath('//*[@id="_j_search_result_left"]/div/div/ul/li') + note_list = note_html.xpath('//*[@id="_j_search_result_left"]/div/div/ul/li') + answer_list = answer_html.xpath('//*[@id="_j_search_result_left"]/div/div/div[@class="ct-text closeto"]') + + len_guide_list = len(guide_list) + len_note_list = len(note_list) + len_answer_list = len(answer_list) + + now_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + + infoDict = {} + infoDict['scenicId'] = id + infoDict['guide_num'] = len_note_list + infoDict['note_num'] = len_guide_list + infoDict['answer_num'] = len_answer_list + infoDict['crawlTime'] = now_time + infoDict['scenic_name'] = name + infoDict['sitefrom'] = "马蜂窝" + return infoDict + except Exception as e: + print(e) + + # def writeData(traininfoList): + # + # with open('xiecheng.csv','w',newline='') as fs: + # + # writer = csv.DictWriter(fs,fieldnames=['出发时间','出发车站','需要耗时','车次信息','到达时间','到达车站','车票价格','剩余车票']) + # + # writer.writeheader() + # writer.writerows(traininfoList) + + def getScenic(self): + sql1 = "select id,name from scenics where id > 53" + sql2 = 'INSERT INTO scenic_index(scenicId,hot_guide_num,elite_guide_num,guide_num,note_num,answer_num,crawlTime,scenic_name,sitefrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s);' + # insert(sql, (2, 'wang', 13)) + conn = pymysql.connect(host='202.193.53.151', port=3306, user='root', passwd='root', db='travel') + cur = conn.cursor() + cur.execute(sql1, None) + results = cur.fetchall() + i=0 + for row in results: + i=i+1 + pageLink = xiechengUrl.format(str(row[1]).replace(" ", "")) + source = self.getSource(pageLink) + dict = self.getEveryItem(row[0], row[1], source) + # print(dict['scenicId']) + # sql2.format(dict['scenicId'],dict['guide_num'],dict['note_num'],dict['answer_num'],dict['crawlTime'],dict['sitefrom']) + args = [dict['scenicId'], 0, 0, dict['guide_num'], dict['note_num'], dict['answer_num'], dict['crawlTime'], + dict['scenic_name'], dict['sitefrom']] + print(args) + if i % 5 == 0 : + time.sleep(5) + cur.execute(sql2, + args) + conn.commit() + cur.close() + conn.close() + +if __name__ == '__main__': + getScenic() + + + + + + + diff --git "a/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/qunaer_scenic.py" "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/qunaer_scenic.py" new file mode 100644 index 0000000..bf54583 --- /dev/null +++ "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/qunaer_scenic.py" @@ -0,0 +1,89 @@ +import requests +import re +import aiohttp +import asyncio +import os +import xlwt +import xlrd +import time +import random +import json +import openpyxl + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0", + # "Cookie": 'QN1=00009180306c48f75230434f; QN300=s%3Dbaidu; QN99=7930; QunarGlobal=10.67.197.57_-315863c_1844fb4402c_-4886|1667785799221; QN205=s%3Dbaidu; QN277=s%3Dbaidu; _i=ueHd8Zy9S8X7Cs5y-nPVKDLNsGkX; QN601=fc3340e635beebd8fed01d244dfa103f; QN269=7D87F9C05E3E11ED8278FA163EAD537B; QN48=tc_b56d9b243d79884d_1844fc7bd55_d0d9; fid=ab2cfc1a-4442-4996-8058-95e369865862; csrfToken=vwiqxqLdRWrdSUvmIn84yliNXbhGLphE; QN58=1669821195055%7C1669822256665%7C5; QN57=16678736048620.2656904960064569; ctt_june=1654604625968##iK3wVRvNVhPwawPwa%3DjnWRa%2BES2Aa2PwW2aOaS0RE2DsEDGDE2DsERfIXSX8iK3siK3saKjOWst%2BWR3sWRX8VuPwaUvt; ctf_june=1654604625968##iK3wWsaOWwPwawPwa%3DkhWDfTaD3NXsERXKj%3DX2EGEKaAWs28aSERW2a%3DWsX%2BiK3siK3saKjOVK2%2BWK2AWRamVhPwaUvt; cs_june=1e980219e0683d534a30d19cbf460690504831204710eca7ff8957e47452ae78150e2f38a8a12ca96514b111ebdac1878f7fa30cb8f280132faaa5b783ecd9d7b17c80df7eee7c02a9c1a6a5b97c1179774d0c3f26f472d208f55073055c8e3b5a737ae180251ef5be23400b098dd8ca; QN271AC=register_pc; QN271SL=791c41e753d68b5ac9365b726bb2960d; QN271RC=791c41e753d68b5ac9365b726bb2960d; Hm_lvt_c56a2b5278263aa647778d304009eafc=1667874629,1668075379,1669972940; viewpoi=5942247|716544|706160|722948; uld=1-300113-1-1669974575|1-299801-3-1669974326|2-5942247-1-1667874649; SECKEY_ABVK=oBn0fel6+CD+aAN/hYsF0tz2y0FKgx63zX5Zn2S9lEM%3D; BMAP_SECKEY=6322QfSPZ1N2m2UuiZlS0H6FoMDxhQ-GnPPIgN-EndoROx7_vGs84WwiwKWL44NBDiCLOGD2d-Y7KyqD2s8PM2ytpXq2q1eZ0TzXIPrmUoDe2ij4Z5mR9gOY1KAWi2msFlzCCbX6sugCEQBjlDn83Ly8gGRLDqMpqMWaTSICD2NztE1Tawzv3BAgu-x7EUlO; QN233=FreetripTouchin; HN1=v1ecbd83e6109eb406ad7ee9754047124a; HN2=qunuqnuggzkcg; quinn=e5ba94e400db7ae611b28097b8ad7ddc9fea18aa074280921e89258cf82e7cb417cc1fc89ba3f04bfda0535faf80ae42; QN621=1490067914133%2Ctestssong%3DDEFAULT%26fr%3Dtejia_inton_search%261490067914133%252Ctestssong%3DDEFAULT; QN668=51%2C56%2C56%2C58%2C56%2C55%2C54%2C56%2C58%2C57%2C57%2C51%2C56; QN243=679; ariaDefaultTheme=null; QN100=WyLotaDnq7nnrZLppa3ppa7nlKjmsLTmoYLmnpfpvpnohIrmoq%2FnlLDph5HlnZHlpKflr6jpu4TmtJvnkbblr6jlpKflt7Tovabnuq%2Fnjqnlj6%2FliqDnvIbovabkuIDml6XmuLh85qGC5p6XIiwi5Yid6YGH5ryT5rGf55WF5ri457K%2B576O5ryT5rGf57K%2B5Y2O5ri46Ii56KeC6LWP5LqM5Y2B5YWD5Lq65rCR5biB6IOM5pmv5Lmd6ams55S75bGx5b6S5q2l5YW05Z2q5Y%2Bk6ZWH6Ziz5pyU5LiW55WM5rq25rSe5aWH6KeC6ZO25a2Q5bKp57qv546p5LiA5pel5ri4fOahguaelyIsIuahguael%2BmYs%2BaclOe6r%2BeOqeS4gOaXpea4uOmTtuWtkOWyqeaXoOi0reeJqXzmoYLmnpciLCLng63ojZAxMuS6uueyvuWTgeWwj%2BWboiDmvJPmsZ%2FmuLjoiLkyMOWFg%2BiDjOaZr%2BmBh%2Bm%2Bmeays%2BmTtuWtkOWyqeWNgXzmoYLmnpciLCLmoYLmnpd85qGC5p6XIl0%3D; qunar-assist={%22version%22:%2220211215173359.925%22%2C%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22readscreen%22:false%2C%22theme%22:%22default%22}; QN267=0531040385eb6753d; QN271=aac3c78a-6161-4135-8024-4d417d4798fd; JSESSIONID=540F1DB1B565507C76E711DE50DEEE27; Hm_lpvt_c56a2b5278263aa647778d304009eafc=1669976262; viewdist=299801-1; viewbook=7673685|5804838|7405861; _vi=oVDC9e1VW3oiCf8HuMZBgBCq212ulsphL4ZvksnfyM24u9ptCRpd6nwZ_dl356Rh70BPTkTu65nuFpEFZTuI0pekzVy6x6EWIVwDrft6xlPPMZ0c2DO6nWnwUxB0zc_J36j7pNWamepyavD-W6SanJmZzLr59gUrgIrbH3thSQUe' +} +# 获取当前时间 +from datetime import date, timedelta + +today = time.strftime("%Y-%m-%d", time.localtime()) +tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d") + +from MysqlConnect import * + +mysql = MysqlConnect() + +class Qunaer_Scenic: + async def getGuild(self,item, session): + try: + async with session.get(item["start_heat"]) as res: + resp = await res.text() + start_heat = 0 + result = re.compile(r'data-beacon="tab_gonglue">攻略 \((.*?)\)').findall(resp) + if result != []: + start_heat = result[0] + async with session.get(item["elite_heat"]) as res: + resp = await res.text() + elite_heat = 0 + result = re.compile(r'data-beacon="tab_gonglue">攻略 \((.*?)\)').findall(resp) + if result != []: + elite_heat = result[0] + async with session.get(item["hot_heat"]) as res: + resp = await res.text() + hot_heat = 0 + result = re.compile(r'data-beacon="tab_gonglue">攻略 \((.*?)\)').findall(resp) + if result != []: + hot_heat = result[0] + args = (item["id"], item["name"], start_heat, elite_heat, hot_heat, today, "去哪儿") + print(args) + # sql = f'INSERT INTO scenic_index(scenicId,scenic_name,guide_num,elite_guide_num_num,hot_guide_num,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s);' + # mysql.insert(sql, args) + + except Exception as e: + print("comment报错", e) + # print(item) + # print("报错页数",index,sightId) + + + async def getScenic(self): + async with aiohttp.ClientSession(headers=headers) as session: + results = mysql.query("select id,name,gw_url from scenics where gw_url !='' ", None) + tasks = [] + url_list = [] + for row in results: + id = row[0] + name = row[1] + url = row[2] + url_list.append({ + "id": id, + "name": name, + "url": url, + }) + print("去哪儿网站的所有景区长度", len(url_list)) + for item in url_list: + item['start_heat'] = f"https://travel.qunar.com/search/gonglue/{item['name']}/start_heat/1.htm" + item['elite_heat'] = f"https://travel.qunar.com/search/gonglue/{item['name']}/elite_heat/1.htm" + item['hot_heat'] = f"https://travel.qunar.com/search/gonglue/{item['name']}/hot_heat/1.htm" + task = asyncio.create_task(self.getGuild(item, session)) + tasks.append(task) + await asyncio.wait(tasks) + # time.sleep(5) + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + + + +if __name__ == "__main__": + asyncio.run(getScenic()) diff --git "a/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/xiecheng_scenic.py" "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/xiecheng_scenic.py" new file mode 100644 index 0000000..d67f8dc --- /dev/null +++ "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/xiecheng_scenic.py" @@ -0,0 +1,89 @@ +import requests +import re +import aiohttp +import asyncio +import time +import random +import json + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0", + # "Cookie": 'QN1=00009180306c48f75230434f; QN300=s%3Dbaidu; QN99=7930; QunarGlobal=10.67.197.57_-315863c_1844fb4402c_-4886|1667785799221; QN205=s%3Dbaidu; QN277=s%3Dbaidu; _i=ueHd8Zy9S8X7Cs5y-nPVKDLNsGkX; QN601=fc3340e635beebd8fed01d244dfa103f; QN269=7D87F9C05E3E11ED8278FA163EAD537B; QN48=tc_b56d9b243d79884d_1844fc7bd55_d0d9; fid=ab2cfc1a-4442-4996-8058-95e369865862; csrfToken=vwiqxqLdRWrdSUvmIn84yliNXbhGLphE; QN58=1669821195055%7C1669822256665%7C5; QN57=16678736048620.2656904960064569; ctt_june=1654604625968##iK3wVRvNVhPwawPwa%3DjnWRa%2BES2Aa2PwW2aOaS0RE2DsEDGDE2DsERfIXSX8iK3siK3saKjOWst%2BWR3sWRX8VuPwaUvt; ctf_june=1654604625968##iK3wWsaOWwPwawPwa%3DkhWDfTaD3NXsERXKj%3DX2EGEKaAWs28aSERW2a%3DWsX%2BiK3siK3saKjOVK2%2BWK2AWRamVhPwaUvt; cs_june=1e980219e0683d534a30d19cbf460690504831204710eca7ff8957e47452ae78150e2f38a8a12ca96514b111ebdac1878f7fa30cb8f280132faaa5b783ecd9d7b17c80df7eee7c02a9c1a6a5b97c1179774d0c3f26f472d208f55073055c8e3b5a737ae180251ef5be23400b098dd8ca; QN271AC=register_pc; QN271SL=791c41e753d68b5ac9365b726bb2960d; QN271RC=791c41e753d68b5ac9365b726bb2960d; Hm_lvt_c56a2b5278263aa647778d304009eafc=1667874629,1668075379,1669972940; viewpoi=5942247|716544|706160|722948; uld=1-300113-1-1669974575|1-299801-3-1669974326|2-5942247-1-1667874649; SECKEY_ABVK=oBn0fel6+CD+aAN/hYsF0tz2y0FKgx63zX5Zn2S9lEM%3D; BMAP_SECKEY=6322QfSPZ1N2m2UuiZlS0H6FoMDxhQ-GnPPIgN-EndoROx7_vGs84WwiwKWL44NBDiCLOGD2d-Y7KyqD2s8PM2ytpXq2q1eZ0TzXIPrmUoDe2ij4Z5mR9gOY1KAWi2msFlzCCbX6sugCEQBjlDn83Ly8gGRLDqMpqMWaTSICD2NztE1Tawzv3BAgu-x7EUlO; QN233=FreetripTouchin; HN1=v1ecbd83e6109eb406ad7ee9754047124a; HN2=qunuqnuggzkcg; quinn=e5ba94e400db7ae611b28097b8ad7ddc9fea18aa074280921e89258cf82e7cb417cc1fc89ba3f04bfda0535faf80ae42; QN621=1490067914133%2Ctestssong%3DDEFAULT%26fr%3Dtejia_inton_search%261490067914133%252Ctestssong%3DDEFAULT; QN668=51%2C56%2C56%2C58%2C56%2C55%2C54%2C56%2C58%2C57%2C57%2C51%2C56; QN243=679; ariaDefaultTheme=null; QN100=WyLotaDnq7nnrZLppa3ppa7nlKjmsLTmoYLmnpfpvpnohIrmoq%2FnlLDph5HlnZHlpKflr6jpu4TmtJvnkbblr6jlpKflt7Tovabnuq%2Fnjqnlj6%2FliqDnvIbovabkuIDml6XmuLh85qGC5p6XIiwi5Yid6YGH5ryT5rGf55WF5ri457K%2B576O5ryT5rGf57K%2B5Y2O5ri46Ii56KeC6LWP5LqM5Y2B5YWD5Lq65rCR5biB6IOM5pmv5Lmd6ams55S75bGx5b6S5q2l5YW05Z2q5Y%2Bk6ZWH6Ziz5pyU5LiW55WM5rq25rSe5aWH6KeC6ZO25a2Q5bKp57qv546p5LiA5pel5ri4fOahguaelyIsIuahguael%2BmYs%2BaclOe6r%2BeOqeS4gOaXpea4uOmTtuWtkOWyqeaXoOi0reeJqXzmoYLmnpciLCLng63ojZAxMuS6uueyvuWTgeWwj%2BWboiDmvJPmsZ%2FmuLjoiLkyMOWFg%2BiDjOaZr%2BmBh%2Bm%2Bmeays%2BmTtuWtkOWyqeWNgXzmoYLmnpciLCLmoYLmnpd85qGC5p6XIl0%3D; qunar-assist={%22version%22:%2220211215173359.925%22%2C%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22readscreen%22:false%2C%22theme%22:%22default%22}; QN267=0531040385eb6753d; QN271=aac3c78a-6161-4135-8024-4d417d4798fd; JSESSIONID=540F1DB1B565507C76E711DE50DEEE27; Hm_lpvt_c56a2b5278263aa647778d304009eafc=1669976262; viewdist=299801-1; viewbook=7673685|5804838|7405861; _vi=oVDC9e1VW3oiCf8HuMZBgBCq212ulsphL4ZvksnfyM24u9ptCRpd6nwZ_dl356Rh70BPTkTu65nuFpEFZTuI0pekzVy6x6EWIVwDrft6xlPPMZ0c2DO6nWnwUxB0zc_J36j7pNWamepyavD-W6SanJmZzLr59gUrgIrbH3thSQUe' +} +# 获取当前时间 +from datetime import date, timedelta + +today = time.strftime("%Y-%m-%d", time.localtime()) +tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d") + +from MysqlConnect import * + +mysql = MysqlConnect() + + +class Xiecheng_Scenic: + async def getGuild(self,item, session): + try: + async with session.post(item["note_url"], json=item['note_data']) as res: + resp = await res.json() + note_num = 0 + if 'total' in resp: + note_num = resp['total'] + async with session.post(item["answer_url"], json=item['answer_data']) as res: + resp = await res.json() + answer_num = 0 + if 'total' in resp: + answer_num = resp['total'] + args = (item["id"], item["name"], answer_num, note_num, today, "携程") + print(args) + sql = f'INSERT INTO scenic_index(scenicId,scenic_name,answer_num,note_num,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s);' + mysql.insert(sql, args) + + except Exception as e: + print("comment报错", e) + + + async def getScenic(self): + async with aiohttp.ClientSession(headers=headers) as session: + results = mysql.query("select id,name,gw_url from scenics", None) + url_list = [] + for row in results: + id = row[0] + name = row[1] + url_list.append({ + "id": id, + "name": name, + }) + tasks = [] + print("携程网站的所有景区长度", len(url_list)) + i = 0 + for item in url_list: + item[ + 'answer_url'] = "https://m.ctrip.com/restapi/soa2/20591/getGsOnlineResult?_fxpcqlniredt=09031172114453342165&x-traceID=09031172114453342165-1670145454163-2467091" + item['answer_data'] = {"keyword": f"{item['name']}", "pageIndex": 1, "pageSize": 12, "tab": "gsask", + "sourceFrom": "", + "profile": False, + "head": {"cid": "09031172114453342165", "ctok": "", "cver": "1.0", "lang": "01", + "sid": "8888", + "syscode": "09", "auth": "", "xsid": "", "extension": []}} + item['note_url'] = "https://m.ctrip.com/restapi/soa2/20591/getGsOnlineResult?_fxpcqlniredt=09031172114453342165&x-traceID=09031172114453342165-1670145707306-8611319" + item['note_data'] = {"keyword": f"{item['name']}", "pageIndex": 1, "pageSize": 12, "tab": "travelnotes", + "sourceFrom": "", + "profile": False, + "head": {"cid": "09031172114453342165", "ctok": "", "cver": "1.0", "lang": "01", + "sid": "8888", + "syscode": "09", "auth": "", "xsid": "", "extension": []}} + # print(item) + i = i+1 + if i > 5 : + i=0 + time.sleep(5) + task = asyncio.create_task(self.getGuild(item, session)) + tasks.append(task) + await asyncio.wait(tasks) + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + +if __name__ == "__main__": + asyncio.run(getScenic()) diff --git "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/mafengwo_scenic_comment_title.py" "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/mafengwo_scenic_comment_title.py" new file mode 100644 index 0000000..5addc58 --- /dev/null +++ "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/mafengwo_scenic_comment_title.py" @@ -0,0 +1,127 @@ +import re +import time +import requests +import aiohttp +import asyncio +import json +#评论内容所在的url,?后面是get请求需要的参数内容 +comment_url='http://pagelet.mafengwo.cn/poi/pagelet/poiCommentListApi?' + +headers={ + 'Referer': 'https://www.mafengwo.cn/jd/10095/gonglve.html', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36' +}#请求头 + +from datetime import date, timedelta +today = time.strftime("%Y-%m-%d", time.localtime()) +tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d") + +from MysqlConnect import * +mysql = MysqlConnect() +mysqlTableName = "hotels" + +class Mafengwo_Scenic: + async def getComment(self,item,session): + try: + # async with session.post(item["url"]) as res: + # resp = await res.content() + resp = requests.post(item['url'],headers=eval(item['headers'])) + page = resp.content.decode('unicode-escape', 'ignore').encode('utf-8', 'ignore').decode('utf-8')#爬取页面并且解码 + page = page.replace('\/', '/')#将\/转换成/ + # print(page) + # 评论数量 + commentCountRes = re.compile(r'共有(?P.*?)').search(page) + commentCount = commentCountRes.group('commentCount') + # 评论标题 + nameobj = re.compile(r'(?P.*?)',re.S) + tagList = nameobj.findall(page) + # 评论数量 + numobj = re.compile(r'(?P.*?)',re.S) + numList = numobj.findall(page) + othersComment = [] + dic = {"好评":0,"中评":0,"差评":0} + for i in range(0,len(numList)): + # 处理标题 + tag = str(tagList[i+1]) + tag = tag.replace('\n','').replace('','').strip().replace('人提及)','').replace('(','') + tag = re.sub(r'[0-9]+', '', tag) + # 处理数量 + num = str(numList[i]) + num = num.replace('(','').replace(')','').replace('条','').replace('人提及','').replace(' (','').replace(')','') + + if tag != "好评" and tag != "中评"'' and tag != "差评": + othersComment.append({f"{tag}": num}) + else: + dic[f"{tag}"] = num + othersComment = str(othersComment) + args = ( + item["id"], item["name"], commentCount, dic["好评"], dic["中评"], dic["差评"], othersComment, + today, "马蜂窝") + print(args) + sql = f'INSERT INTO scenic_comment(scenicId,scenicName,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s);' + mysql.insert(sql, args) + except Exception as e: + print("comment报错",e) + + async def saveScenic(self): + for i in range(1,2): + url = "https://www.mafengwo.cn/ajax/router.php" + data = { + 'sAct':"KMdd_StructWebAjax|GetPoisByTag", + 'iMddid':"10095", + 'iTagId':"0", + 'iPage':20, + '_ts':"1669286358348", + '_sn':"69d4a7c89e" + } + try: + res = requests.post(url, headers=headers, data=data) + # print(res.json()) + List = re.compile(r'/poi/(.*?).html.*?target="_blank" title="(.*?)">').findall(str(res.json())) + # print(List) + for item in List: + mfw_url = 'https://pagelet.mafengwo.cn/poi/pagelet/poiCommentListApi?callback=jQuery1810866662618942958_1669200603971¶ms={"poi_id":"%s","page":1}&_ts=1669200604147&_sn=8e0384d86d&_=1669200604147' % ( + item[0]) + mfw_headers = { + 'Referer': f'http://www.mafengwo.cn/poi/{item[0]}.html', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36' + } + args = (mfw_url, json.dumps(mfw_headers), today, item[1]) + print(args) + if i % 5 == 0: + time.sleep(5) + except Exception as e: + print(e) + + async def getScenic(self): + results = mysql.query("select id,name,mfw_url,mfw_header from scenics where mfw_url !='' ", None) + tasks = [] + url_list = [] + for row in results: + id = row[0] + name = row[1] + url = row[2] + headers = row[3] + url_list.append({ + "id": id, + "name": name, + "url": url, + "headers": headers, + }) + print("马蜂窝网站的所有景区长度",len(url_list)) + i = 0 + for item in url_list: + async with aiohttp.ClientSession(headers=eval(item['headers'])) as session: + task1 = asyncio.create_task(self.getComment(item, session)) + i = i + 1 + tasks.append(task1) + if i % 5 == 0 : + time.sleep(5) + await asyncio.wait(tasks) + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + +if __name__ == '__main__': + asyncio.run(getScenic()) + # asyncio.run(saveScenic()) diff --git "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_scenic_comment_title.py" "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_scenic_comment_title.py" new file mode 100644 index 0000000..dccca1f --- /dev/null +++ "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_scenic_comment_title.py" @@ -0,0 +1,87 @@ +import requests +import re +import aiohttp +import asyncio +import os +import xlwt +import xlrd +import time +import random +import json +import openpyxl + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0", + "Cookie": 'SECKEY_ABVK=6HIqXjD0ds/vRQnCLt0mHL+BlKCMeW1C20QspZholGg%3D; BMAP_SECKEY=OwZJJ1XtuED6yxVKpYWTZlTrTfswjB7iLK1DDxx5laDPIMT285FwiY4kkMQRbmC-Qa_BYY2jMWeW6YXOu2TQBUSKqwawAnndWCRQARf00Hr7hN3NWtO8bePrI84I1wO0XUK8Bk2BjsTKbFG-9C7uvg-ijL9UVeXTY9x8XC1q_JuGPhwYESA4uJipuwkX1kLC; QN1=00009180306c48f75230434f; QN300=s%3Dbaidu; QN99=7930; QunarGlobal=10.67.197.57_-315863c_1844fb4402c_-4886|1667785799221; QN205=s%3Dbaidu; QN277=s%3Dbaidu; _i=ueHd8Zy9S8X7Cs5y-nPVKDLNsGkX; QN601=fc3340e635beebd8fed01d244dfa103f; QN269=7D87F9C05E3E11ED8278FA163EAD537B; QN48=tc_b56d9b243d79884d_1844fc7bd55_d0d9; fid=ab2cfc1a-4442-4996-8058-95e369865862; csrfToken=vwiqxqLdRWrdSUvmIn84yliNXbhGLphE; QN67=4815%2C8402%2C461%2C6287%2C31980%2C6675%2C39170%2C514011%2C513120%2C512329; QN58=1668915779477%7C1668915787448%7C4; QN57=16678736048620.2656904960064569; Hm_lvt_15577700f8ecddb1a927813c81166ade=1668508423,1668670982,1668768828,1668852847; ctt_june=1654604625968##iK3wVRvNVhPwawPwa%3DjnWRa%2BES2Aa2PwW2aOaS0RE2DsEDGDE2DsERfIXSX8iK3siK3saKjOWst%2BWR3sWRX8VuPwaUvt; ctf_june=1654604625968##iK3wWKXmWUPwawPwasXwaRj8VKiGVRiTWKg%3DERjNES2OXsDwaRPsEKasWPPsiK3siK3saKjOVRtOVRjmWsamWhPwaUvt; cs_june=84ca3a9b5a98782f34be6296a1606f06eb95a586e457de9dc68fac6e9a429296150e2f38a8a12ca96514b111ebdac1878f7fa30cb8f280132faaa5b783ecd9d7b17c80df7eee7c02a9c1a6a5b97c1179abfc7c950e9b30934146fcf8bd089a765a737ae180251ef5be23400b098dd8ca; QN271AC=register_pc; QN271SL=791c41e753d68b5ac9365b726bb2960d; QN271RC=791c41e753d68b5ac9365b726bb2960d; _q=U.cbkbblv3519; _s=s_3IDSC2V3W3PGZ5F7A2NNNVAAOE; _t=27907349; _v=SsLO8uhOBBxdqVHEaJ4HRRRm-S5OQ4tF_8od6DDnWkVT_ugYFgt4T06vA1JNPsidy87-YU6-Em7O13wYNxUWwYMcqZtXVYqS6D-UDVREDpp4GBSmQBKSBqR41pOUqtVzJOa7ynWOtM4YS0MiDWncGOrqjfjDGrH8PuPitoHSVLH6; QN43=2; QN42=%E5%8E%BB%E5%93%AA%E5%84%BF%E7%94%A8%E6%88%B7; _vi=vngppYwRPwCDqhIqFPtxLm89wykxl2K7lGZEPOnwB341RCRAj3afnxLN-pQ2n-drX7GENqb0dVOcAFO7QBxpE7uqmso_3vMGM223wBq9FSP8OX21p_a6qwYhay-zJs3uYLRjvOLn0RpM-D8_YQGmyG5ba2uC3XqyN76edIKIa709; QN233=FreetripTouchin; __qt=v1%7CVTJGc2RHVmtYMStSdWh2QnF2amE0bVovaEEzaFVaK0pTaTcrdkIzRWd6VGx1V2xGUWtLMFZwdEFqRzBVMncvd0dqbEJhMXFTZk1FUFN6M3drN2tSR0MzMzUveURkN09yZUJ6dWVkQ2VvanJDRWZYTTh5L0NYUFN1YlMydVJJMVRDVEtNZmxyQWRiTEdiVUU1ZVp1UW5xQWtHbmhTbk1NZGR3Z2tKK0lrbWFudDkvVmVxZzFsL3NLVUt1Y1dsU0N5bERITnVRM2hlN1NDbSt0TGphTnZROUVJOWdBbmNXSDNRYUpzZHBhYURLdz0%3D%7C1668915783978%7CVTJGc2RHVmtYMS9Va284c1RyZGtEOFlGUXZlUWdveFFmdGdJNWwyTHlaSVlNYWorNlF5dmJ5eTAxaUhGZ1BId3VVZFJ2RnJQRk9oUW9jVnFhenVoYXc9PQ%3D%3D%7CVTJGc2RHVmtYMThzTUJOUVkzeUErMkpQMi9vT1dSN2Zib20zaUdOeWUwQ3BHWHBldWw1dld3YWhmdCtiQ3NZbFBiTERyYkdiMnhVYlhKdmQ1Wk5MNHFyQUEzSThWKzZTMHAyQndlYWtWQVplM2hlSkY5WXVxR2Iva1VwRWsrakEvdUttcXpnYWgrVys2REVvVVVMdm9tcDJ4OGpWdDlMUHZCT2pHMndub1VVM2doRHdTdnNudjBjdS9peXNiUkFxMVc1czlmTU56b2NNR3pqZEl5Ulc4RE94VmtLMFlDWlNnWEdGVVNTaVB3YVpYZnF6ZzIwdGxmVG5xUEhzZXJVZXk1UjdFbzFnRXhCa0I4MENLOEhBdE82azlpWCs2dlkwa092Q3dsMjExYTVDSlptY3BPdm1raVpMVytoT3dEMm52Y1ZyMEY2cVBER240c2Z4MDZrcFFOL3NTVFhMcm1IZlorT2U3RFJ1d1ltTmsvem91Kyt5TDdQOEVLZGQwOVVTVnl4QkcwbmEwcGo2T1lXc0U4c3l0UzlMY1k0bGpNc1lMSkVKODNkMVdPdzdJUklZYnE0eDdyYVVETU9nV2NTNTlHenRJS1ROZ3VvYXBocGszS0FkZWlZMGNtdWlBVFBzRTFKVTRwVjdCU3EwS2hMeGhqTUtGa1NEOGtkdlEzMS92VHB2MmxENUZ1aEhodEU1K1d5Q2RVL3FzaDhPa2xuUGpsQ0tJc1ZaeGNGL3hRU0NMclVEeXMzbjhvTEZ2RGlZMUE1WVhhUnN5NHkrelp5Sy84S2FYTUtZM0ZNR3A5ZWdqTkdobWlZOWt3T1FodHhuVE1HZ0xuQWI4alhuNkg0WkhLQXM4ZUcwMzlpQ2JFZFRKdFZUajMxMnFqNkpoU1VXZUNaa1dYdDJFWkFwZ1VscFQ0emFvM3d3dFFqQjlGdWlJMDU3aXR2L3BPSU1VRWFGRVQxSi9kV0xzVy9EdzRXVHVCU0NBMFJGaGlNVm5qN1JUMHhIV0VpS1QyWkVIeWF2c3dWTU5iNU1QQytWREh0OG5SRWJwOGE5N1g3OHVYT29ldENmb250OHZMNDVnOUlxdHM0N0IzUmFVMEJ4eitGNUJ1L3pVVXM5WDRZTURlS09SSUNkL3c9PQ%3D%3D; HN1=v1ecbd83e6109eb406ad7ee9754047124a; HN2=qunuqnuggzkcg; quinn=e5ba94e400db7ae611b28097b8ad7ddc9fea18aa074280921e89258cf82e7cb417cc1fc89ba3f04bfda0535faf80ae42; QN621=1490067914133%2Ctestssong%3DDEFAULT%26fr%3Dtejia_inton_search%261490067914133%252Ctestssong%3DDEFAULT; QN668=51%2C56%2C56%2C58%2C56%2C55%2C54%2C56%2C58%2C57%2C57%2C51%2C56; ariaDefaultTheme=null; QN63=%E6%A1%82%E6%9E%97%7C%E9%87%8D%E5%BA%86%7Cgl%20%7C%E9%98%B3%E6%9C%94%E5%8A%A8%E6%84%9F6D%E7%94%B5%E5%BD%B1%7C%E4%B8%80%E9%94%8B%E8%B6%8A%E9%87%8E%E5%B1%B1%E5%9C%B0%E8%BD%A6%E4%BF%B1%E4%B9%90%E9%83%A8%7C%E6%A1%82%E6%9E%97%E5%86%9B%E5%8D%9A%E5%9B%AD; qunar-assist={%22version%22:%2220211215173359.925%22%2C%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22readscreen%22:false%2C%22theme%22:%22default%22}; QN44=cbkbblv3519; QN267=05310403864bcfa85; QN163=0; JSESSIONID=1AD098F421DEEF0CDC162A9D3277ECCE; QN271=a9ec74f7-b22b-4855-9aed-c212fe90a582; QN71="MTgwLjEzNi43MC41MzrmoYLmnpc6MQ=="; Hm_lpvt_15577700f8ecddb1a927813c81166ade=1668915784; QN1231=0; activityClose=1; QN243=15; QN310=hrec_zaj86' +} +# 获取当前时间 +from datetime import date, timedelta +today = time.strftime("%Y-%m-%d",time.localtime()) +tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d") + +from MysqlConnect import * +mysql = MysqlConnect() + +class Qunaer_Scenic: + async def getComment(self,item, index, session,dic,comments): + try: + async with session.get(item["url"]) as res: + resp = await res.json() + # print(resp) + dic["score"] = str(resp["data"]["score"]) + dic["commentCount"] = str(resp["data"]["commentCount"]) + dic["好评"] = 0 + dic["中评"] = 0 + dic["差评"] = 0 + # commentList = resp["data"]["commentList"] + if index == 1: + othersComment = [] + tagList = resp["data"]["tagList"] + # print(tagList) + for tag in tagList: + tagName = tag["tagName"] + tagNum = tag["tagNum"] + if tagName != "好评" and tagName != "中评"'' and tagName != "差评": + othersComment.append({f"{tagName}":tagNum}) + dic[f"{tagName}"] = tagNum + othersComment = str(othersComment) + args = (item["id"],item["name"],dic["score"],dic["commentCount"],dic["好评"],dic["中评"],dic["差评"],othersComment,today,"去哪儿") + print(args) + sql = f'INSERT INTO scenic_comment(scenicId,scenicName,score,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);' + mysql.insert(sql,args) + except Exception as e: + print("comment报错",e) + print(item) + # print("报错页数",index,sightId) + + async def getScenic(self): + async with aiohttp.ClientSession(headers=headers) as session: + results = mysql.query("select id,name,gw_url from scenics where gw_url !=''", None) + tasks = [] + url_list = [] + for row in results: + id = row[0] + name = row[1] + url = row[2] + url_list.append({ + "id": id, + "name": name, + "url": url, + }) + print("去哪儿网站的所有景区长度",len(url_list)) + i = 0 + for item in url_list: + dic = {} + comments = {} + task1 = asyncio.create_task(self.getComment(item, 1, session, dic, comments)) + i = i + 1 + tasks.append(task1) + if i % 5 == 0 : + time.sleep(5) + await asyncio.wait(tasks) + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + +if __name__ == "__main__": + asyncio.run(getScenic()) diff --git "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py" "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py" new file mode 100644 index 0000000..82ce79c --- /dev/null +++ "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py" @@ -0,0 +1,38 @@ +# coding:utf-8 +# version:python3.7 +# author:Ivy + +from applications.common.tasks.景区评论标题.mafengwo_scenic_comment_title import Mafengwo_Scenic +from applications.common.tasks.景区评论标题.qunaer_scenic_comment_title import Qunaer_Scenic +from applications.common.tasks.景区评论标题.tongcheng_scenic_comment_title import Tongcheng_Scenic +from applications.common.tasks.景区评论标题.xiecheng_scenic_comment_title import Xiecheng_Scenic +import asyncio +import time + +mafengwo = Mafengwo_Scenic() +qunaer = Qunaer_Scenic() +tongcheng = Tongcheng_Scenic() +xiecheng = Xiecheng_Scenic() + +class Scenic: + def run(self): + print("开始爬取各个网站的评论标题!") + time_start=time.time() + + asyncio.run(xiecheng.getScenic()) + print("携程爬取结束") + asyncio.run(tongcheng.getScenic()) + print("同程爬取结束") + asyncio.run(qunaer.getScenic()) + print("去哪儿爬取结束") + asyncio.run(mafengwo.getScenic()) + print("马蜂窝爬取结束") + + time_end=time.time() + print(' time cost ',time_end-time_start,'s') + + + + + + diff --git "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/tongcheng_scenic_comment_title.py" "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/tongcheng_scenic_comment_title.py" new file mode 100644 index 0000000..4c69e1d --- /dev/null +++ "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/tongcheng_scenic_comment_title.py" @@ -0,0 +1,80 @@ +import requests +import re +import aiohttp +import asyncio +import csv +import json +import os +import time + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0", +} +from datetime import date, timedelta +today = time.strftime("%Y-%m-%d",time.localtime()) +tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d") + +from MysqlConnect import * +mysql = MysqlConnect() + +# 爬取一个景区的评论 +class Tongcheng_Scenic: + async def getComment(self,item, pageIndex, session, dic): + try: + async with session.post(item['url']) as res: + res1 = await res.text() + res1 = json.loads(res1) + if pageIndex == 1: + dic['degreeLevel'] = res1['degreeLevel'] + dic['totalNum'] = res1['totalNum'] + dic['goodNum'] = res1['goodNum'] + dic['midNum'] = res1['midNum'] + dic['badNum'] = res1['badNum'] + dic['hasImgNum'] = res1['hasImgNum'] + dpTagList = res1['dpTagList'] + othersComment = [] + i = 0 + if dpTagList != None: + for dpTag in dpTagList: + if i > 5 : + othersComment.append({f"{dpTag['tagName']}":dpTag['tagNum']}) + i = i + 1 + othersComment = str(othersComment) + args = (item["id"], item["name"], dic["degreeLevel"], dic["totalNum"], dic['goodNum'], dic['midNum'], dic['badNum'], othersComment,today, "同程") + print(args) + sql = 'INSERT INTO scenic_comment(scenicId,scenicName,satisfy_present,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);' + mysql.insert(sql, args) + except Exception as e: + print(e) + + async def getScenic(self): + async with aiohttp.ClientSession(headers=headers) as session: + results = mysql.query("select id,name,tc_url from scenics where tc_url !='' ", None) + tasks = [] + url_list = [] + for row in results: + id = row[0] + name = row[1] + url = row[2] + url_list.append({ + "id": id, + "name": name, + "url": url, + }) + print("同程网站的所有景区长度", len(url_list)) + i = 0 + for item in url_list: + dic = {} + task = asyncio.create_task(self.getComment(item, 1, session, dic)) + i = i + 1 + tasks.append(task) + if i % 5 == 0 : + time.sleep(5) + await asyncio.wait(tasks) + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + +if __name__ == '__main__': + asyncio.run(getScenic()) + diff --git "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_scenic_comment_title.py" "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_scenic_comment_title.py" new file mode 100644 index 0000000..33b9bde --- /dev/null +++ "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_scenic_comment_title.py" @@ -0,0 +1,145 @@ +import requests +import re +import aiohttp +import asyncio +import csv +import json +import time + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0", +} + +from datetime import date, timedelta +today = time.strftime("%Y-%m-%d",time.localtime()) +tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d") + +from MysqlConnect import * +mysql = MysqlConnect() + + +class Xiecheng_Scenic: + # 爬取景区基本信息 + def getBaseInfo(self,html): + # 初始化搜索条件 + dic = {} + obj = re.compile(r'

(?P.*?)

.*?' + r'titleTips">(?P.*?).*?' + r'commentScoreNum">(?P.*?)

.*?' + r'hover-underline">(?P.*?).*?' + # r'地址

(?P

.*?)

.*?' + r'开放时间
(?P
.*?' + r',"poiId":(?P.*?),"poiType"' + r'', re.S) + # 有些景点没有等级 的处理方法 + tempobj1 = re.compile(r'titleTips">(?P.*?).*?', re.S) + tempres1 = tempobj1.search(html) + if tempres1 == None: + dic['level'] = 0 + obj = re.compile(r'

(?P.*?)

.*?' + r'commentScoreNum">(?P.*?)

.*?' + r'hover-underline">(?P.*?).*?' + # r'地址

(?P

.*?)

.*?' + r'开放时间
(?P
.*?' + r',"poiId":(?P.*?),"poiType"', re.S) + else: + dic['level'] = tempres1.group('level') + # 有些景点没有评分 + tempobj2 = re.compile(r'commentScoreNum">(?P.*?)

.*?' + r'hover-underline">(?P.*?).*?' + , re.S) + tempres2 = tempobj2.search(html) + if tempres2 == None: + dic['score'] = 0 + obj = re.compile(r'

(?P.*?)

.*?' + # r'地址

(?P

.*?)

.*?' + r'开放时间
(?P
.*?' + r',"poiId":(?P.*?),"poiType"', re.S) + # 有些景点没有评分 + else: + dic['score'] = tempres2.group('score') + tempobj3 = re.compile(r'hover-underline">(?P.*?).*?' + , re.S) + tempobj3 = tempobj3.search(html) + if tempobj3 == None: + dic['commentNum'] = 0 + return dic + else: + dic['commentNum'] = tempobj3.group('commentNum') + # 最终爬取景点基本信息并存入 + resp1 = obj.search(html) + # print(resp1.group('name') + '爬取成功') + if resp1 != None : + dic = resp1.groupdict() + if tempres1 != None: + dic['level'] = dic['level'].replace('', '') + return dic + + # 爬取评论标题 + def getCommentTitle(self,html, dic,othersComment): + obj = re.compile(r'"hotTag">(?P.*?)</span>.*?', re.S) + titles = obj.finditer(html) + i = 0 + for item in titles: + good = item.group('title').split('<!-- -->') + good[1] = good[1].replace('(', '').replace(')', '') + # print(good) + if good[0] == '好评' or good[0] == '差评': + dic[f'{good[0]}'] = good[1] + else: + othersComment.append({f"{good[0]}": good[1]}) + i = i + 1 + + # 爬取网页具体信息ok + async def getDetail(self,item, session): + tasks = [] + try: + async with session.get(item['url']) as res: + html = await res.text() + dic = self.getBaseInfo(html) + othersComment = [] + dic["好评"] = 0 + dic["中评"] = 0 + dic["差评"] = 0 + self.getCommentTitle(html, dic, othersComment) + othersComment = str(othersComment) + args = (item["id"], item["name"], dic["score"], dic["commentNum"], dic["好评"], dic["中评"], dic["差评"], othersComment, + today,"携程") + print(args) + sql = f'INSERT INTO scenic_comment(scenicId,scenicName,score,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);' + mysql.insert(sql, args) + except Exception as e: + print("comment报错",e) + time.sleep(5) + await getDetail(item,session) + + + async def getScenic(self): + async with aiohttp.ClientSession(headers=headers) as session: + results = mysql.query("select id,name,xc_url from scenics where xc_url !=''", None) + url_list = [] + for row in results: + id = row[0] + name = row[1] + url = row[2] + url_list.append({ + "id": id, + "name": name, + "url": url, + }) + tasks = [] + print("携程网站的所有景区长度", len(url_list)) + i = 0 + for item in url_list: + task = asyncio.create_task(self.getDetail(item, session)) + i = i + 1 + tasks.append(task) + if i % 5 == 0 : + time.sleep(5) + await asyncio.wait(tasks) + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + +if __name__ == '__main__': + asyncio.run(getScenic()) \ No newline at end of file diff --git "a/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_route_comment_title.py" "b/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_route_comment_title.py" new file mode 100644 index 0000000..a00a19c --- /dev/null +++ "b/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_route_comment_title.py" @@ -0,0 +1,89 @@ +import requests +import re +import aiohttp +import asyncio +import csv +import json +import time + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0", + # 'Referer': "https://glsh7.package.qunar.com/user/detail.jsp?id=3078148931&osrc=tts_tuan&rttp=%E6%9C%AC%E5%9C%B0%E6%B8%B8&dep=5qGC5p6X&arr=5qGC5p6X&ftdt=2023-03-01%2C2023-03-01&qssrc=eyJ0cyI6IjE2Njk4NzM1MTgyMzUiLCJzcmMiOiJ1bmRlZmluZWQuZW52YW5vIiwiYWN0Ijoic2Nyb2xsIiwicmFuZG9tIjoiOTU1MjAwIn0=", + # 'Cookie': "QN1=00009180306c48f75230434f; QN300=s%3Dbaidu; QN99=7930; QunarGlobal=10.67.197.57_-315863c_1844fb4402c_-4886|1667785799221; QN205=s%3Dbaidu; QN277=s%3Dbaidu; _i=ueHd8Zy9S8X7Cs5y-nPVKDLNsGkX; QN601=fc3340e635beebd8fed01d244dfa103f; QN269=7D87F9C05E3E11ED8278FA163EAD537B; QN48=tc_b56d9b243d79884d_1844fc7bd55_d0d9; fid=ab2cfc1a-4442-4996-8058-95e369865862; csrfToken=vwiqxqLdRWrdSUvmIn84yliNXbhGLphE; QN58=1669821195055%7C1669822256665%7C5; QN57=16678736048620.2656904960064569; ctt_june=1654604625968##iK3wVRvNVhPwawPwa%3DjnWRa%2BES2Aa2PwW2aOaS0RE2DsEDGDE2DsERfIXSX8iK3siK3saKjOWst%2BWR3sWRX8VuPwaUvt; ctf_june=1654604625968##iK3wVK3OaUPwawPwas2sXsjNXs2mWSXnXPEIaRHIaPX8X%3DDOaRanXPEhEPjAiK3siK3saKjOVKjsaSaAas38ahPwaUvt; cs_june=173605640a003a620f2f106b211063ea2287c94e80d66389452136307aa6d7d9150e2f38a8a12ca96514b111ebdac1878f7fa30cb8f280132faaa5b783ecd9d7b17c80df7eee7c02a9c1a6a5b97c117951bd5a81c5254ab3bab7748a9aa6d8185a737ae180251ef5be23400b098dd8ca; QN271AC=register_pc; QN271SL=791c41e753d68b5ac9365b726bb2960d; QN271RC=791c41e753d68b5ac9365b726bb2960d; _q=U.cbkbblv3519; _s=s_3IDSC2V3W3PGZ5F7A2NNNVAAOE; _t=27907349; _v=SsLO8uhOBBxdqVHEaJ4HRRRm-S5OQ4tF_8od6DDnWkVT_ugYFgt4T06vA1JNPsidy87-YU6-Em7O13wYNxUWwYMcqZtXVYqS6D-UDVREDpp4GBSmQBKSBqR41pOUqtVzJOa7ynWOtM4YS0MiDWncGOrqjfjDGrH8PuPitoHSVLH6; QN43=2; QN42=%E5%8E%BB%E5%93%AA%E5%84%BF%E7%94%A8%E6%88%B7; _vi=CxIUDXSKKXrdfKW8a_JOt7FdAzF3YVARuSGejExpLtNTYJb0IsR-5f82yRcybhrWWwl3aU7KqT10nKk_ydXwYxMzUiLL1hgdynGc4YfMr2UYeME-S_UnXUKnHzth2xeCRbsBgBPNuA-aM44OzN_1OoHFpGGhOCYcEmmLrjtCInJi; QN233=FreetripTouchin; HN1=v1ecbd83e6109eb406ad7ee9754047124a; HN2=qunuqnuggzkcg; quinn=e5ba94e400db7ae611b28097b8ad7ddc9fea18aa074280921e89258cf82e7cb417cc1fc89ba3f04bfda0535faf80ae42; QN621=1490067914133%2Ctestssong%3DDEFAULT%26fr%3Dtejia_inton_search%261490067914133%252Ctestssong%3DDEFAULT; QN668=51%2C56%2C56%2C58%2C56%2C55%2C54%2C56%2C58%2C57%2C57%2C51%2C56; QN243=572; _jzqa=1.2488123552548573700.1668855923.1669870580.1669874997.8; _jzqx=1.1669087498.1669874997.1.jzqsr=dujia%2Equnar%2Ecom|jzqct=/.-; ariaDefaultTheme=null; QN100=WyLmoYLmnpfpmLPmnJTnuq%2FnjqnkuIDml6XmuLjpk7blrZDlsqnml6DotK3nial85qGC5p6XIiwi54Ot6I2QMTLkurrnsr7lk4HlsI%2Flm6Ig5ryT5rGf5ri46Ii5MjDlhYPog4zmma%2FpgYfpvpnmsrPpk7blrZDlsqnljYF85qGC5p6XIiwi5qGC5p6XfOahguaelyJd; qunar-assist={%22version%22:%2220211215173359.925%22%2C%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22readscreen%22:false%2C%22theme%22:%22default%22}; QN44=cbkbblv3519; QN267=053104038b8bcf1fd; QN163=0; QN271=638b35df-ba09-4ab7-a0ef-e80528529e59; _jzqc=1; _jzqckmp=1; QN61=%5B%22%E6%A1%82%E6%9E%97%E9%98%B3%E6%9C%94%E7%BA%AF%E7%8E%A9%E4%B8%80%E6%97%A5%E6%B8%B8%E9%93%B6%E5%AD%90%E5%B2%A9%E6%97%A0%E8%B4%AD%E7%89%A9%22%2C%22%E7%83%AD%E8%8D%9012%E4%BA%BA%E7%B2%BE%E5%93%81%E5%B0%8F%E5%9B%A2%20%E6%BC%93%E6%B1%9F%E6%B8%B8%E8%88%B920%E5%85%83%E8%83%8C%E6%99%AF%E9%81%87%E9%BE%99%E6%B2%B3%E9%93%B6%E5%AD%90%E5%B2%A9%E5%8D%81%22%2C%22%E6%A1%82%E6%9E%97%22%5D; _qzja=1.621578797.1669870580293.1669870580293.1669875028328.1669876047449.1669877523855..0.0.7.2; _qzjc=1; _qzjto=7.2.0; Hm_lvt_a8a41d37454fd880cdb23d6ef05d917b=1669870580; Hm_lpvt_a8a41d37454fd880cdb23d6ef05d917b=1669877524; JSESSIONID=A0CB91CD362911D601F4C9CF6971DF8D; activityClose=1; _jzqb=1.24.10.1669874997.1; _qzjb=1.1669875028328.3.0.0.0" +} +from datetime import date, timedelta + +today = time.strftime("%Y-%m-%d", time.localtime()) +tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d") +from MysqlConnect import * + +mysql = MysqlConnect() + +class Qunaer_Route: + # 爬取评论标题 + async def getCommentTitle(self,item, session): + data = json.loads(item['data']) + try: + async with session.post(item['url'],data=data) as res: + otherComment = [] + resp = await res.json() + # print(resp) + resp = resp['data'] + totalComment = resp['totalComment'] + ratingExcellent = resp['ratingExcellent'] + ratingAverage = resp['ratingAverage'] + ratingAwful = resp['ratingAwful'] + numWithImages = resp['numWithImages'] + goodRate = 0 + if totalComment != 0: + goodRate = ratingExcellent/totalComment + otherComment.append({'有图':numWithImages}) + # mainCommentList = resp['mainCommentList'] + args = ( + item['id'], item['name'], totalComment, 0, goodRate, ratingExcellent, ratingAverage, ratingAwful, + str(otherComment), today, "去哪儿") + print(args) + sql = 'INSERT INTO route_comment(route_id,route_name,total,score,goodRate,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);' + mysql.insert(sql, args) + except Exception as e: + print("comment报错", e) + print(resp) + # time.sleep(3) + # async with aiohttp.ClientSession(headers=headers) as session1: + # await getCommentTitle(item,session1) + + async def getRoute(self): + # results = mysql.query("select id,route_name,xc_url,xc_data from route where xc_data !='' and id = 106", None) + results = mysql.query("select id,route_name,gw_url,gw_data from route where gw_data !=''", None) + url_list = [] + for row in results: + id = row[0] + name = row[1] + url = row[2] + data = row[3] + url_list.append({ + "id": id, + "name": name, + "url": url, + "data": data, + }) + tasks = [] + print("去哪儿网站的所有线路长度", len(url_list)) + # print(url_list) + i = 0 + for item in url_list: + async with aiohttp.ClientSession(headers=headers) as session: + task = asyncio.create_task(self.getCommentTitle(item, session)) + i = i + 1 + tasks.append(task) + if i % 2 == 0: + time.sleep(5) + await asyncio.wait(tasks) + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + +if __name__ == '__main__': + # test() + # asyncio.run(getSearch()) + asyncio.run(getScenic()) diff --git "a/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/route_start.py" "b/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/route_start.py" new file mode 100644 index 0000000..f58ddf2 --- /dev/null +++ "b/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/route_start.py" @@ -0,0 +1,30 @@ +# coding:utf-8 +# version:python3.7 +# author:Ivy + +from applications.common.tasks.线路评论标题.xiecheng_route_comment_title import Xiecheng_Route +from applications.common.tasks.线路评论标题.qunaer_route_comment_title import Qunaer_Route +import asyncio +import time + +qunaer = Qunaer_Route() +xiecheng = Xiecheng_Route() + +class Route: + def run(self): + print("开始爬取各个网站的评论标题!") + time_start=time.time() + + asyncio.run(xiecheng.getRoute()) + print("携程爬取结束") + asyncio.run(qunaer.getRoute()) + print("去哪儿爬取结束") + + time_end=time.time() + print(' time cost ',time_end-time_start,'s') + + + + + + diff --git "a/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_route_comment_title.py" "b/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_route_comment_title.py" new file mode 100644 index 0000000..3a67ea4 --- /dev/null +++ "b/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_route_comment_title.py" @@ -0,0 +1,106 @@ +import requests +import re +import aiohttp +import asyncio +import csv +import json +import time + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0", + 'Cookie':"_bfa=1.1667787832924.3h7rvk.1.1669895769722.1669970415585.59.1057.0; _ubtstatus=%7B%22vid%22%3A%221667787832924.3h7rvk%22%2C%22sid%22%3A59%2C%22pvid%22%3A1057%2C%22pid%22%3A0%7D; MKT_OrderClick=ASID=4897155952&AID=4897&CSID=155952&OUID=index&CT=1669895769884&CURL=https%3A%2F%2Fwww.ctrip.com%2F%3Fsid%3D155952%26allianceid%3D4897%26ouid%3Dindex&VAL={}; __zpspc=9.66.1669970417.1669972201.47%232%7Cwww.baidu.com%7C%7C%7C%25E6%2590%25BA%25E7%25A8%258B%25E6%2594%25BB%25E7%2595%25A5%7C%23; _jzqco=%7C%7C%7C%7C1669970417342%7C1.386356559.1667787833232.1669971673086.1669972201783.1669971673086.1669972201783.undefined.0.0.519.519; MKT_CKID=1667787833303.6cihj.yc0k; _RF1=180.136.89.152; _RSG=PI4tVah22dC4DYKmrdfaUA; _RDG=28a682bf6ceb192ebc37d846ca69b5ed63; _RGUID=c9c20ab9-1fdc-4499-a7a4-a67d70522344; MKT_Pagesource=PC; _bfaStatusPVSend=1; _bfaStatus=success; nfes_isSupportWebP=1; _ga=GA1.2.728287556.1667875987; Session=SmartLinkCode=ctrip&SmartLinkKeyWord=&SmartLinkQuary=_UTF.&SmartLinkHost=ctrip.com&SmartLinkLanguage=zh; UUID=20EDDDB8AE46403495EFEE36FAC417C1; IsPersonalizedLogin=F; ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; ibu_h5_lang=en; ibu_h5_local=en-us; Hm_lvt_37b54c42b9dde393e60c88c1a84657cb=1668156071,1668390905,1668766327,1669630249; _lizard_LZ=ghjTSPinRlQVIkJmqrUWXFopstucvwx210a3ydz754YfM6ZE89+b-eKCHOLNBGAD; intl_ht1=h4=33_75424975,33_782288,33_6550062,2_441618; _abtest_userid=294c1513-b267-4324-8a01-750ac3d84f81; _gcl_au=1.1.920354234.1668409170; U_TICKET_SELECTED_DISTRICT_CITY=%7B%22value%22%3A%7B%22districtid%22%3A%222%22%2C%22districtname%22%3A%22%E4%B8%8A%E6%B5%B7%22%2C%22isOversea%22%3Anull%7D%2C%22createTime%22%3A1668416843913%2C%22updateDate%22%3A1668416843913%7D; FlightIntl=Search=[%22KWL|%E6%A1%82%E6%9E%97(KWL)|33|KWL|480%22%2C%22BJS|%E5%8C%97%E4%BA%AC(BJS)|1|BJS|480%22%2C%222022-11-17%22]; Hm_lvt_576acc2e13e286aa1847d8280cd967a5=1668916753; Union=OUID=index&AllianceID=4897&SID=155952&SourceID=&createtime=1669895770&Expires=1670500569884; login_uid=C0AB45AFF50D550863B877680E735ABE; login_type=0; cticket=337E22BDC21DD4985842195D8CEDEC0CE79886C069743562391907D8E4575607; AHeadUserInfo=VipGrade=0&VipGradeName=%C6%D5%CD%A8%BB%E1%D4%B1&UserName=&NoReadMessageCount=0; DUID=u=C0AB45AFF50D550863B877680E735ABE&v=0; IsNonUser=F; appFloatCnt=1; StartCity_Pkg=PkgStartCity=33; GUID=09031172114453342165; _bfs=1.61; MKT_CKID_LMT=1669970417053; _bfi=p1%3D290570%26p2%3D290601%26v1%3D1056%26v2%3D1055" +} +from datetime import date, timedelta + +today = time.strftime("%Y-%m-%d", time.localtime()) +tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d") +from MysqlConnect import * + +mysql = MysqlConnect() + + +# 爬取评论标题 +class Xiecheng_Route: + async def getCommentTitle(self,item, session): + data = json.loads(item['data']) + try: + async with session.post(item['url'], json=data) as res: + dic = {} + otherComment = [] + resp = await res.json() + commentAggregation = resp['commentAggregation'] + totalCount = resp['totalCount'] + scoreAvg = commentAggregation['scoreAvg'] + goodRate = commentAggregation['goodRate'] + commonTags = commentAggregation['commonTags'] + dic['score'] = scoreAvg + dic['goodRate'] = goodRate + dic['total'] = totalCount + for comment in commonTags: + totalCount = comment['totalCount'] + displayName = comment['displayName'] + if displayName == '好评' or displayName == '中差评': + dic[f'{displayName}'] = totalCount + else: + otherComment.append({f'{displayName}': totalCount}) + if 'tourTypeTags' in commentAggregation: + tourTypeTags = commentAggregation['tourTypeTags'] + for comment in tourTypeTags: + totalCount = comment['totalCount'] + displayName = comment['displayName'] + otherComment.append({f'{displayName}': totalCount}) + if 'aiTags' in commentAggregation: + aiTags = commentAggregation['aiTags'] + for comment in aiTags: + totalCount = comment['totalCount'] + displayName = comment['displayName'] + otherComment.append({f'{displayName}': totalCount}) + if 'subItemTags' in commentAggregation: + subItemTags = commentAggregation['subItemTags'] + for comment in subItemTags: + totalCount = comment['totalCount'] + displayName = comment['displayName'] + otherComment.append({f'{displayName}': totalCount}) + args = ( + item['id'], item['name'], dic['total'], dic['score'], dic['goodRate'], dic['好评'], dic['中差评'], dic['中差评'], + str(otherComment), today, "携程") + print(args) + sql = 'INSERT INTO route_comment(route_id,route_name,total,score,goodRate,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);' + mysql.insert(sql, args) + except Exception as e: + print("comment报错", e) + print(resp) + + async def getRoute(self): + async with aiohttp.ClientSession(headers=headers) as session: + # results = mysql.query("select id,route_name,xc_url,xc_data from route where xc_data !='' and id = 106", None) + results = mysql.query("select id,route_name,xc_url,xc_data from route where xc_data !=''", None) + url_list = [] + for row in results: + id = row[0] + name = row[1] + url = row[2] + data = row[3] + url_list.append({ + "id": id, + "name": name, + "url": url, + "data": data, + }) + tasks = [] + print("携程网站的所有线路长度", len(url_list)) + # print(url_list) + i = 0 + for item in url_list: + task = asyncio.create_task(self.getCommentTitle(item, session)) + i = i + 1 + tasks.append(task) + if i % 10 == 0: + time.sleep(5) + await asyncio.wait(tasks) + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + +if __name__ == '__main__': + asyncio.run(getScenic()) diff --git "a/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py" "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py" new file mode 100644 index 0000000..39a67b6 --- /dev/null +++ "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py" @@ -0,0 +1,34 @@ +# coding:utf-8 +# version:python3.7 +# author:Ivy + +from applications.common.tasks.酒店评论标题.xiecheng_hotel_comment_title import Xiecheng_Hotel +from applications.common.tasks.酒店评论标题.qunaer_hotel_comment_title import Qunaer_Hotel +from applications.common.tasks.酒店评论标题.tongcheng_hotel_comment_title import Tongcheng_Hotel +import asyncio +import time + +qunaer = Qunaer_Hotel() +tongcheng = Tongcheng_Hotel() +xiecheng = Xiecheng_Hotel() + +class Hotel: + def run(self): + print("开始爬取各个网站的评论标题!") + time_start=time.time() + + asyncio.run(xiecheng.getHotel()) + print("携程爬取结束") + asyncio.run(tongcheng.getHotel()) + print("同程爬取结束") + asyncio.run(qunaer.getHotel()) + print("去哪儿爬取结束") + + time_end=time.time() + print(' time cost ',time_end-time_start,'s') + + + + + + diff --git "a/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_hotel_comment_title.py" "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_hotel_comment_title.py" new file mode 100644 index 0000000..19f04d7 --- /dev/null +++ "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_hotel_comment_title.py" @@ -0,0 +1,76 @@ +import requests +import re +import aiohttp +import asyncio +import os +import xlwt +import xlrd +import time +import openpyxl +import json +from datetime import date, timedelta + +today = time.strftime("%Y-%m-%d",time.localtime()) +tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d") +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0', + # 'Content-Type': 'application/json;charset=utf-8', + # 'Content-Length': '1418', + # 'Origin': 'https://hotel.qunar.com', + # 'Connection': 'keep-alive', + # 'Referer': 'https://hotel.qunar.com/cn/guilin/?fromDate=2022-11-16&toDate=2022-11-17&cityName=%E6%A1%82%E6%9E%97', + # 'Cookie': 'QN1=00009180306c48f75230434f; QN300=s%3Dbaidu; QN99=7930; QunarGlobal=10.67.197.57_-315863c_1844fb4402c_-4886|1667785799221; QN205=s%3Dbaidu; QN277=s%3Dbaidu; _i=ueHd8Zy9S8X7Cs5y-nPVKDLNsGkX; QN601=fc3340e635beebd8fed01d244dfa103f; QN269=7D87F9C05E3E11ED8278FA163EAD537B; QN48=tc_b56d9b243d79884d_1844fc7bd55_d0d9; fid=ab2cfc1a-4442-4996-8058-95e369865862; csrfToken=vwiqxqLdRWrdSUvmIn84yliNXbhGLphE; QN58=1668514571680%7C1668514571680%7C1; QN57=16678736048620.2656904960064569; ariaDefaultTheme=null; ctt_june=1654604625968##iK3wVRvNVhPwawPwa%3DjnWRa%2BES2Aa2PwW2aOaS0RE2DsEDGDE2DsERfIXSX8iK3siK3saKjOWst%2BWR3sWRX8VuPwaUvt; ctf_june=1654604625968##iK3wWKDOWuPwawPwasiGXKtmaRamVKGDEDkIVKD%3Da%3DjNX2ERaK3sWDfGX2EDiK3siK3saKjOVRPmVK3wVKaAWwPwaUvt; cs_june=9272ae1939d58083d4743676507ffd29fbabe527912f2f1973ed8370a47566c8150e2f38a8a12ca96514b111ebdac1878f7fa30cb8f280132faaa5b783ecd9d7b17c80df7eee7c02a9c1a6a5b97c117928b56a71cbfdbdae6d0a8e0c2d26aa9e5a737ae180251ef5be23400b098dd8ca; QN271AC=register_pc; QN271SL=791c41e753d68b5ac9365b726bb2960d; QN271RC=791c41e753d68b5ac9365b726bb2960d; _q=U.cbkbblv3519; _s=s_3IDSC2V3W3PGZ5F7A2NNNVAAOE; _t=27907349; _v=SsLO8uhOBBxdqVHEaJ4HRRRm-S5OQ4tF_8od6DDnWkVT_ugYFgt4T06vA1JNPsidy87-YU6-Em7O13wYNxUWwYMcqZtXVYqS6D-UDVREDpp4GBSmQBKSBqR41pOUqtVzJOa7ynWOtM4YS0MiDWncGOrqjfjDGrH8PuPitoHSVLH6; QN43=2; QN42=%E5%8E%BB%E5%93%AA%E5%84%BF%E7%94%A8%E6%88%B7; _vi=CGXWRmr0v6gQkJRlZ_6pw-bLocEdRkSo8Xwow4GZOzimn0tLx6x5Le1BMu6f87LYgsSfYHgjOFhvVsDnmRqzU0mo-HkSSge-5UpMggzcw6CbOTTb41NX1K04bOsVxvYErsEQ-dxBNHzmnLsMbpTpDhjmYVi-cwQPLk3yyborrCAc; QN233=FreetripTouchin; HN1=v1ecbd83e6109eb406ad7ee9754047124a; HN2=qunuqnuggzkcg; __qt=v1%7CVTJGc2RHVmtYMS9JYkhPaVB5VnNneGMxSHB0MjgxTW0vQk00NXIrc1JyWUdyaFhEeWtUbFBDVUk4ZEE5SUN3MFJ0emtjeUVRdkxQUStzRDdoV0NQT1VuOFlyRFZhOGhaZk9TaVJZS1hDZWtYSjdzbHhUR3dSRDRzOXhKRFlLT01BcFBnR2NBY2VaTnh3N0R6bDF3WE9tK2FjcDVPNG1nMk1DeGRqdFAwdkNuK2FaaEdubkdLSzVVUXptcnZqUmVHQXgxWFZrd0d3S08zQThySnVSYkQ0UT09%7C1668589224247%7CVTJGc2RHVmtYMTljdWtuS3FaQklDS2RDeVJDNUVIT2dHVEt3WURCWXAzZ3pNVGZSWlR5TVltSzNDWlh1aFhNK1NKamk3UG5vU21oN2ZTWnN6RHdGWkE9PQ%3D%3D%7CVTJGc2RHVmtYMTlMTFFGQnNtQlVzWUhYeUg1Wnl1WVpMalNYYTlTd3pLZHVmZ0Q2eDNXY1N3VWwwbkZTZEpndnlqeENQcDVSZFlvaFpWVHVXYld4UFh2TVo1TDFPaFJCQndSbDVZWEZUZ3U2SVJwam80cUNYZ0s4VFl3WUpXbzczQjg3TDJmL0x6NnFraDlOdTFoWG1YdVExRjh6ZlBHWGU3WWNjUFNacmJBaURKeGNjdzVVZ3plTndOL0JXekt6T0h5TXQzbGxhVGZjbFRnNFlQMUpLd3ppZHMzQlpBVzlpRmo1WkVHWDVrcDJRZHNYQmRYVTk2eFJiMVZ6cmtpMGFOTHBBVzNBbGhBWEEwZFBjOUNyekl1VGxtUlNyZ09Gc2tZa2F5dnZtS1B2emlLS2VKaFdIVXp6V21vVFhoMmRPby9sa2JVUGZ0enFZREl5bFVrME5RMVM2U1U5eFJIQjdFaCtsZ2dXT3NCWnltelpCYVZxeVVScHhWOXVNakwvd0xqVmZqT1dZZzRrUi9XanUrcTZqYlBrQWRkbHppYVUzejE1Vzg4UTBXS1oyL0lpVmtZQmNEVGVHdjBWdXNSZmw2VUtTeWRKS3VnSXBFZ3A5YmRDRWNvSWpBR3NpM2hwN2JITks0c051S2FuVC9xdGhmdjN0V1JUWUZoRWNkL092aXVqdjFuTktlZUNXa3BvSEdUSklFT0RrVVFsRldPSjdJemlDbm1RUkFkVWdEQk5ib2svZmNzQnpDUjJjOTJoMVdUanE2KytZS24vOVU0dlhGOXMvdDc1NnlVYUhub3pDZnhMQ0k1QTRUbStTMjlpNG9lSm9hd3FGTW1aeWdFRUs0OWhKZVE1c2tiZVJRL2IydkgvVmNlbmRRNTNVeVpKdkxyaFZqbjIxQVZ3Q0FKWFdmTU5XYVp4c0QrQUtmM2M%3D; SECKEY_ABVK=n0yGYaC0Uv/VO8QaVoB7LWOTFI1iG4L4ZC9VsTH65IM%3D; BMAP_SECKEY=yzvwj4ltTTURWnc-Y3gwGTG9ua0QoKNAekMiwcrQ6JAAewWk9khA6I8hsY9M6VR656LUBVS30ubB-smmXJ5vg3QAHYFamo-SGOzGHPzX10oqjUcmL7xZKw-IyJc7cEhRaug23EssK3-RhsYVSss7Ui3jjCW6AxSlVUe3Dz4v2hKyrifgZOqQQOOZ_uacKXiG; qunar-assist={%22version%22:%2220211215173359.925%22%2C%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22readscreen%22:false%2C%22theme%22:%22default%22}; QN44=cbkbblv3519; QN267=0531040383a985e0a; QN163=0; QN271=2339fa3a-72a3-4cfd-8f88-55e9645bcb62; tabIndex=0; cityUrl=guilin; cityName=%25E6%25A1%2582%25E6%259E%2597; checkInDate=2022-11-16; checkOutDate=2022-11-17', +} + +from MysqlConnect import * +mysql = MysqlConnect() + +class Qunaer_Hotel: + async def getComment(self,item, session): + try: + async with session.get(item['url']) as res: + resp = await res.json() + goodcomment = resp['data']['ratingStat']['positiveCount'] + midcomment = resp['data']['ratingStat']['neutralCount'] + badcomment = resp['data']['ratingStat']['negativeCount'] + count = resp['data']['count'] + othersComment = "" + args = (item["id"], item["name"], count, goodcomment, midcomment, badcomment, othersComment, today, "去哪儿") + print(args) + sql = 'INSERT INTO hotel_comment(hotelId,hotelName,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s);' + mysql.insert(sql, args) + except Exception as e: + print("comment报错",e) + + # 从数据库获取酒店信息 + async def getHotel(self): + async with aiohttp.ClientSession(headers=headers) as session: + # 从数据库拿url + results = mysql.queryHotel("select id,name,gw_url from hotels where gw_url!='' and 1000<id ", None) + tasks = [] + url_list = [] + for row in results: + id = row[0] + name = row[1] + url = row[2] + url_list.append({ + "id": id, + "name": name, + "url": url, + }) + print("去哪儿网站的所有酒店长度", len(url_list)) + i = 0 + for item in url_list: + task2 = asyncio.create_task(self.getComment(item, session)) + i = i + 1 + tasks.append(task2) + if i % 5 == 0 : + time.sleep(5) + await asyncio.wait(tasks) + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + +if __name__ == '__main__': + # saveHotel() + asyncio.run(getHotel()) diff --git "a/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/tongcheng_hotel_comment_title.py" "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/tongcheng_hotel_comment_title.py" new file mode 100644 index 0000000..f7abe15 --- /dev/null +++ "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/tongcheng_hotel_comment_title.py" @@ -0,0 +1,102 @@ +import requests +import re +import aiohttp +import asyncio +import csv +import json +import os +import time +import datetime +import pytz + +from datetime import date, timedelta + +today = time.strftime("%Y-%m-%d", time.localtime()) +tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d") +headers = { + 'Host': 'www.ly.com', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0', + 'Accept': 'application/json, text/plain, */*', + 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', + 'Accept-Encoding': 'gzip, deflate, br', + 'tmapi-client': 'tpc', + 'appfrom': '16', + 'cluster': 'idc', + 'deviceid': 'd0e094a7-a80c-453b-82c5-f593d99e4c35', + 'traceid': '1695c089-9f04-44d6-beb7-41a3689edfd5', + 'Connection': 'keep-alive', + 'Referer': 'https://www.ly.com/hotel/hotellist?pageSize=20&t=1668496948801&city=102&inDate=2022-11-15&outDate=2022-11-16&filterList=8888_1', + 'Cookie': f'SECKEY_ABVK=HszLUR4bbMgPWbGpeaYwbnMzNSB8SbXGhmS9odMd70Q%3D; BMAP_SECKEY=MORm19l7pkFSd3jICPEEe16DM0R-sY9MVVQUmWSB28tD0qRlMBoQwhGvbBs2-1_b7Fyy9msydOvLpZET2XNwl6sdMkCbo0lwHjgDBs4YP2lE5h9fanCHdFch01LvClsY9xYHrOlIMobFBS1Pnzi4cLujgsusthtxVK4YGjGqsQuhPFKwgAf4kuwYIWPxa95u; _dx_uzZo5y=5edb851076e73262f075be8f0ffaaf6e3c2297a4b63d42c5d1d231595a42a46c8cb790a8; Hm_lvt_64941895c0a12a3bdeb5b07863a52466=1668669174,1669038391,1669042695,1669088760; __tctma=144323752.16677878877661.1667787887750.1669092022899.1669095674727.15; H5CookieId=d0e094a7-a80c-453b-82c5-f593d99e4c35; Hm_lvt_c6a93e2a75a5b1ef9fb5d4553a2226e5=1668669182,1669038398,1669042705,1669088762; _dx_captcha_cid=06284819; _dx_app_bc4b3ca6ae27747981b43e9f4a6aa769=637b814eXFfcQWGQWFSCEnNPnInDSNFQ3UOJCbJ1; businessLine=hotel; firsttime=1668495014628; lasttime=1669096008415; cityid=2101; nus=userid=819559597&nickName=%e5%90%8c%e7%a8%8b%e4%bc%9a%e5%91%98_59287A06B3A&level=1; __ftoken=sn5o6j2U%2Fr0AktDkUXYquYrYm%2FYg8YzOVtJ%2FplIFl6gQxfOSc%2B8QwfR%2BAan9v4glQv3LdEaLVvDb5I2ukTmy%2Bw%3D%3D; __ftrace=ada97b48-152a-4527-981a-194bd2c90cd3; _tcudid_v2=T1a4qnnExKXGp3YgAYIp90heDDbPN0mtmTQ66BfSVPk; indexTopSearchHistory=%5B%22%E8%B5%84%E6%BA%90%E5%8E%BF%E5%A1%98%E6%B4%9E%E6%99%AF%E5%8C%BA%EF%BC%88%E7%BA%A2%E8%89%B2%E6%99%AF%E5%8C%BA%EF%BC%89%22%2C%22%E8%B5%84%E6%BA%90%E5%8E%BF%E5%AE%9D%E9%BC%8E%E6%99%AF%E5%8C%BA%22%2C%22%E6%A1%82%E6%9E%97%E8%B5%84%E6%B1%9F%E7%81%AF%E8%B0%B7%E6%99%AF%E5%8C%BA%22%2C%22v%22%2C%22%E6%A1%82%E6%9E%97%E8%B5%84%E6%B1%9F%C2%B7%E5%A4%A9%E9%97%A8%E5%B1%B1%E6%99%AF%E5%8C%BA%22%2C%22%E7%BD%97%E6%B1%89%E6%9E%9C%E5%B0%8F%E9%95%87%22%2C%22%E6%B0%B8%E7%A6%8F%E5%8E%BF%E5%87%A4%E5%B1%B1%E6%99%AF%E5%8C%BA%22%2C%22%E6%B0%B8%E7%A6%8F%E9%87%91%E9%92%9F%E5%B1%B1%E6%97%85%E6%B8%B8%E5%BA%A6%E5%81%87%E5%8C%BA%22%2C%22%E7%82%8E%E4%BA%95%22%2C%22%E6%A1%82%E6%9E%97%E6%B9%98%E5%B1%B1%E9%85%BF%E9%85%92%E7%94%9F%E6%80%81%E5%9B%AD%E6%99%AF%E5%8C%BA%22%5D; H5Channel=mnoreferseo%2CSEO; indate=2022-11-22; outdate=2022-11-23; _ga=GA1.2.85011599.1669042102; NewProvinceId=7; NCid=102; NewProvinceName=%E5%B9%BF%E8%A5%BF; NCName=%E6%A1%82%E6%9E%97; 17uCNRefId=RefId=6928722&SEFrom=baidu&SEKeyWords=; TicketSEInfo=RefId=6928722&SEFrom=baidu&SEKeyWords=; CNSEInfo=RefId=6928722&tcbdkeyid=&SEFrom=baidu&SEKeyWords=&RefUrl=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DqMP277Q9_03GFp9XDaOnD1yESxu4dQFrYoLTVLEI2Fa%26wd%3D%26eqid%3Da43766230008598a00000006637c52b4; qdid=39264|1|6928722|0a6c16,39264|1|6928722|0a6c16; route=a14e2b278f3edf5ed22249307678b7ac; __tctmc=144323752.108798340; __tctmd=144323752.737325; __tctmu=144323752.0.0; __tctmz=144323752.1669092022899.14.2.utmccn=(organic)|utmcmd=organic|utmEsl=gb2312|utmcsr=baidu|utmctr=; longKey=16677878877661; __tctrack=0; Hm_lpvt_64941895c0a12a3bdeb5b07863a52466=1669092025; Hm_lpvt_c6a93e2a75a5b1ef9fb5d4553a2226e5=1669097452; JSESSIONID=61FD1846F1708452C94F9453B2635172; __sd_captcha_id=b21a6833-60c4-46a2-8d97-1513ef3640ea; searchEntranceId=h5_home; DetailNewOrOld=newDetail; passport_login_state=pageurl=http%3a%2f%2fmember.ly.com%2forder; __tctmb=144323752.3725345079911344.1669097434738.1669097450268.10; tracerid=nologin-1669096963503; ASP.NET_SessionId=4y5ue4pydjihkcjgludpsddw', +} + +from MysqlConnect import * +mysql = MysqlConnect() + +# 爬取一个酒店的评论 +class Tongcheng_Hotel: + async def getComment(self,item, pageIndex, session): + data = json.loads(item['data']) + try: + async with session.post(item['url'], json=data) as res: + res1 = await res.json() + # print(res1) + # comments = res1['data']['comments'] + filterList = res1['data']['filterList'] + subTags = filterList[0]['subTag'] + dic = {} + dic["中评"] = 0 + othersComment = [] + for filter in filterList: + filterName = filter['filterName'] + filterCount = filter['filterCount'] + if filterName == '好评' or filterName == '待改善' or filterName == '全部': + dic[f'{filterName}'] = filterCount + else: + othersComment.append({f"{filterName}": filterCount}) + for subTag in subTags: + subTagName = subTag['filterName'] + subTagCount = subTag['filterCount'] + othersComment.append({f"{subTagName}": subTagCount}) + othersComment = str(othersComment) + args = (item["id"], item["name"], dic["全部"], dic["好评"], dic["中评"], dic["待改善"], othersComment,today, "同程") + print(args) + sql = f'INSERT INTO hotels(name,level,address,tc_url,tc_data,crawlTime) VALUES(%s,%s,%s,%s,%s,%s);' + mysql.insert(sql, args) + except Exception as e: + print('comment',e) + + # 从数据库获取酒店信息 + async def getHotel(self): + async with aiohttp.ClientSession(headers=headers) as session: + # 从数据库拿url + results = mysql.queryHotel("select id,name,tc_url,tc_data from hotels ", None) + tasks = [] + url_list = [] + for row in results: + id = row[0] + name = row[1] + url = row[2] + data = row[3] + url_list.append({ + "id": id, + "name": name, + "url": url, + "data": data, + }) + print("同程网站的所有酒店长度", len(url_list)) + i = 0 + for item in url_list: + print(item['name']) + task2 = asyncio.create_task(self.getComment(item, 1, session)) + i = i + 1 + tasks.append(task2) + if i % 5 == 0 : + time.sleep(5) + await asyncio.wait(tasks) + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + +if __name__ == '__main__': + # saveHotel() + asyncio.run(getHotel()) diff --git "a/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_hotel_comment_title.py" "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_hotel_comment_title.py" new file mode 100644 index 0000000..652ab93 --- /dev/null +++ "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_hotel_comment_title.py" @@ -0,0 +1,121 @@ +import requests +import re +import aiohttp +import asyncio +import csv +import json +import time +from 旅游项目.HBaseConnect import * + +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0', + # 'Accept': 'application/json', + # 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', + # 'Accept-Encoding': 'gzip, deflate, br', + # 'P': '41687220417', + # 'Content-Type': 'application/json;charset=UTF-8', + # 'Content-Length': '3653', + # 'Origin': 'https://hotels.ctrip.com', + # 'Connection': 'keep-alive', + # 'Referer': 'https://hotels.ctrip.com/', + 'Cookie': '_bfa=1.1667787832924.3h7rvk.1.1670568674517.1670572311780.70.1211.212092; _ubtstatus=%7B%22vid%22%3A%221667787832924.3h7rvk%22%2C%22sid%22%3A70%2C%22pvid%22%3A1211%2C%22pid%22%3A0%7D; MKT_OrderClick=ASID=4897155952&AID=4897&CSID=155952&OUID=index&CT=1670568669940&CURL=https%3A%2F%2Fwww.ctrip.com%2F%3Fsid%3D155952%26allianceid%3D4897%26ouid%3Dindex&VAL={}; __zpspc=9.80.1670572312.1670572325.3%232%7Cwww.baidu.com%7C%7C%7C%25E6%2590%25BA%25E7%25A8%258B%7C%23; _jzqco=%7C%7C%7C%7C1670554470477%7C1.386356559.1667787833232.1670572320920.1670572325075.1670572320920.1670572325075.undefined.0.0.575.575; MKT_CKID=1667787833303.6cihj.yc0k; _RF1=171.109.34.146; _RSG=PI4tVah22dC4DYKmrdfaUA; _RDG=28a682bf6ceb192ebc37d846ca69b5ed63; _RGUID=c9c20ab9-1fdc-4499-a7a4-a67d70522344; MKT_Pagesource=PC; _bfaStatusPVSend=1; _bfaStatus=success; nfes_isSupportWebP=1; _ga=GA1.2.728287556.1667875987; Session=smartlinkcode=U130026&smartlinklanguage=zh&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=; UUID=20EDDDB8AE46403495EFEE36FAC417C1; IsPersonalizedLogin=F; ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; ibu_h5_lang=en; ibu_h5_local=en-us; Hm_lvt_37b54c42b9dde393e60c88c1a84657cb=1668156071,1668390905,1668766327,1669630249; _lizard_LZ=ghjTSPinRlQVIkJmqrUWXFopstucvwx210a3ydz754YfM6ZE89+b-eKCHOLNBGAD; intl_ht1=h4=33_51616458,33_6550062,33_75424975,33_782288,2_441618; _abtest_userid=294c1513-b267-4324-8a01-750ac3d84f81; _gcl_au=1.1.920354234.1668409170; U_TICKET_SELECTED_DISTRICT_CITY=%7B%22value%22%3A%7B%22districtid%22%3A%222%22%2C%22districtname%22%3A%22%E4%B8%8A%E6%B5%B7%22%2C%22isOversea%22%3Anull%7D%2C%22createTime%22%3A1668416843913%2C%22updateDate%22%3A1668416843913%7D; FlightIntl=Search=[%22KWL|%E6%A1%82%E6%9E%97(KWL)|33|KWL|480%22%2C%22BJS|%E5%8C%97%E4%BA%AC(BJS)|1|BJS|480%22%2C%222022-11-17%22]; Hm_lvt_576acc2e13e286aa1847d8280cd967a5=1668916753; login_uid=C0AB45AFF50D550863B877680E735ABE; login_type=0; appFloatCnt=1; StartCity_Pkg=PkgStartCity=33; GUID=09031172114453342165; cticket=337E22BDC21DD4985842195D8CEDEC0C8B461417CA4C7D03098988464C0EFC5E; AHeadUserInfo=VipGrade=0&VipGradeName=%C6%D5%CD%A8%BB%E1%D4%B1&UserName=&NoReadMessageCount=0; DUID=u=C0AB45AFF50D550863B877680E735ABE&v=0; IsNonUser=F; Union=OUID=index&AllianceID=4897&SID=155952&SourceID=&createtime=1670568670&Expires=1671173469939; MKT_CKID_LMT=1670554468414; _bfi=p1%3D212094%26p2%3D212093%26v1%3D1210%26v2%3D1208; librauuid=; htltmp=; htlstmp=; _gid=GA1.2.1489218311.1670568674; _pd=%7B%22_o%22%3A3%2C%22s%22%3A272%2C%22_s%22%3A1%7D; _bfs=1.32; hotelhst=1164390341', + +} + +from datetime import date, timedelta +today = time.strftime("%Y-%m-%d", time.localtime()) +tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d") + +from MysqlConnect import * +mysql = MysqlConnect() + +# 爬取酒店的评论 +class Xiecheng_Hotel: + async def getComment(self,item, session, dic): + try: + data = json.loads(item['data']) + async with session.post(item['url'], json=data) as res: + resp = await res.json() + # print(resp) + totalCommentCount = 0 + dic['差评'] = 0 + dic['值得推荐'] = 0 + if resp['totalCount'] != None: + totalCommentCount = resp['totalCount'] + statisticList = resp['statisticList'] + travelTypeList = resp['travelTypeList'] + commentTagList = resp['commentTagList'] + othersComment = [] + # 评价标签 + for tag in travelTypeList: + tagName = tag['name'] + tagcommentCount = tag['commentCount'] + othersComment.append({f"{tagName}": tagcommentCount}) + # 评价标签 + for tag in commentTagList: + tagName = tag['name'] + tagcommentCount = tag['commentCount'] + othersComment.append({f"{tagName}": tagcommentCount}) + # 评价标签 + for tab in statisticList: + tabName = tab['name'] + tabcommentCount = tab['commentCount'] + if tabName == '值得推荐' or tabName == '差评' or tabName == '所有点评': + dic[f'{tabName}'] = tabcommentCount + else: + othersComment.append({f"{tabName}": tabcommentCount}) + dic['好评'] = dic['值得推荐'] - dic['差评'] + dic['中评'] = 0 + othersComment = str(othersComment) + args = ( + item["id"], item["name"], totalCommentCount, dic['好评'], dic['中评'], dic['差评'], othersComment, today, + "携程") + print(args) + sql = 'INSERT INTO hotel_comment(hotelId,hotelName,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s);' + mysql.insert(sql, args) + except Exception as e: + print('comment报错', e) + time.sleep(5) + await self.getComment(item,session,dic) + + # 从数据库获取酒店信息 + async def getHotel(self): + async with aiohttp.ClientSession(headers=headers) as session: + # 从数据库拿url + results = mysql.queryHotel("select id,name,xc_url,xc_data from hotels where xc_url!='' and id > 1662 ", None) + tasks = [] + url_list = [] + for row in results: + id = row[0] + name = row[1] + url = row[2] + data = row[3] + url_list.append({ + "id": id, + "name": name, + "url": url, + "data": data + }) + # print(list) + print("携程网站的所有酒店长度", len(url_list)) + index = 0 + for item in url_list: + index = index + 1 + dic = {"id": item['id'], "name": item['name'], 'flag': 0} + item['url'] = "https://m.ctrip.com/restapi/soa2/24626/commentlist?_fxpcqlniredt=09031172114453342165&x-traceID=09031172114453342165-1670568709094-2810570" + item['data'] = str(item['data']).replace(f'"pageIndex": 1,', f'"pageIndex": {index},') + item['data'] = str(item['data']).replace(f'"pageIndex": {index - 1}', f'"pageIndex": {index}') + task = asyncio.create_task(self.getComment(item.copy(), session, dic)) + if index % 8 == 0: + time.sleep(5) + tasks.append(task) + await asyncio.wait(tasks) + print("爬完了!!!!") + # 关闭mysql + mysql.cur.close() + mysql.connection.close() + + +if __name__ == '__main__': + # saveHotel() + asyncio.run(getHotel()) -- Gitee From 2c08a369d3f4638c07fb4c9eeb222d9c712cfc2d Mon Sep 17 00:00:00 2001 From: WANY <oncwnuORhI2wd6rlQNKOwi3sWN28@git.weixin.qq.com> Date: Wed, 14 Dec 2022 18:26:10 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E7=99=BE=E5=BA=A6=E5=BE=AE=E5=8D=9A?= =?UTF-8?q?=E7=AD=BE=E5=88=B0=E7=A5=A8=E6=95=B4=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../common/scrapySpiders/wangModel/scrapy.cfg | 11 + .../wangModel/wangModel/__init__.py | 0 .../wangModel/common_spiders/__init__.py | 9 + .../wangModel/common_spiders/baiduacc.py | 100 + .../wangModel/common_spiders/baidusearch.py | 71 + .../wangModel/common_spiders/baiduwords.py | 76 + .../wangModel/common_spiders/tongtencities.py | 33 + .../wangModel/common_spiders/tuniu_route.py | 108 + .../wangModel/common_spiders/weather.py | 95 + .../wangModel/common_spiders/weibosign.py | 238 ++ .../wangModel/wangModel/files/city.txt | 2901 +++++++++++++++++ .../wangModel/wangModel/files/city_cap.txt | 546 ++++ .../wangModel/wangModel/files/scenic | 95 + .../wangModel/wangModel/items.py | 72 + .../wangModel/wangModel/middlewares.py | 56 + .../wangModel/wangModel/pipelines.py | 211 ++ .../wangModel/wangModel/readme.md | 11 + .../wangModel/wangModel/settings.py | 135 + .../wangModel/wangModel/spiders/__init__.py | 4 + .../wangModel/wangModel/spiders/a.html | 163 + .../wangModel/wangModel/spiders/gw.py | 58 + .../wangModel/wangModel/spiders/main.py | 52 + .../wangModel/wangModel/spiders/tongchen.py | 137 + .../wangModel/spiders/tuniu_hotel.py | 187 ++ .../wangModel/spiders/tuniu_scenic.py | 97 + .../wangModel/wangModel/spiders/weibo.py | 87 + .../wangModel/wangModel/spiders/weixin.py | 46 + .../wangModel/wangModel/test.csv | 271 ++ .../wangModel/wangModel/tuniu.csv | 0 .../wangModel/wangModel/utils/HbaseConn.py | 54 + .../wangModel/wangModel/utils/citydeal.py | 27 + .../wangModel/wangModel/utils/createTables.py | 42 + .../wangModel/utils/hostory_weather.py | 135 + .../wangModel/wangModel/utils/mysqlConn.py | 76 + .../wangModel/wangModel/utils/proxys.py | 83 + .../wangModel/wangModel/utils/weather_deal.py | 12 + applications/common/tasks/tasks.py | 29 +- .../weibosign.py" | 25 + .../scenic_start.py" | 19 +- .../\347\231\276\345\272\246/baidu_start.py" | 34 + .../hotel_title_start.py" | 8 + applications/view/__init__.py | 4 +- 42 files changed, 6406 insertions(+), 12 deletions(-) create mode 100644 applications/common/scrapySpiders/wangModel/scrapy.cfg create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/__init__.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/__init__.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduacc.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baidusearch.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduwords.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tongtencities.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tuniu_route.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weather.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weibosign.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/files/city.txt create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/files/city_cap.txt create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/files/scenic create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/items.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/middlewares.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/pipelines.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/readme.md create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/settings.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/__init__.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/a.html create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/gw.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/main.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/tongchen.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_hotel.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_scenic.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/weibo.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/weixin.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/test.csv create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/tuniu.csv create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/HbaseConn.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/citydeal.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/createTables.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/hostory_weather.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/mysqlConn.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/proxys.py create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/weather_deal.py create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\347\255\276\345\210\260/weibosign.py" create mode 100644 "applications/common/tasks/\347\231\276\345\272\246/baidu_start.py" diff --git a/applications/common/scrapySpiders/wangModel/scrapy.cfg b/applications/common/scrapySpiders/wangModel/scrapy.cfg new file mode 100644 index 0000000..7ff7b66 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/scrapy.cfg @@ -0,0 +1,11 @@ +# Automatically created by: scrapy startproject +# +# For more information about the [deploy] section see: +# https://scrapyd.readthedocs.io/en/latest/deploy.html + +[settings] +default = wangModel.settings + +[deploy] +#url = http://localhost:6800/ +project = wangModel diff --git a/applications/common/scrapySpiders/wangModel/wangModel/__init__.py b/applications/common/scrapySpiders/wangModel/wangModel/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/__init__.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/__init__.py new file mode 100644 index 0000000..24bbbb6 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/__init__.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> __init__.py +@IDE :PyCharm +@Author :sandmswift +@Date :2022-11-22 15:51 +@Desc +==================================================''' diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduacc.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduacc.py new file mode 100644 index 0000000..ca7b9c5 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduacc.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> baiduacc +@IDE :PyCharm +@Author :sandmswift +@Date :2022-11-15 14:25 +@Desc +==================================================''' +import json + +import requests +from urllib import parse +import time +import random +from wangModel.utils.proxys import PROXY +from wangModel.utils.HbaseConn import HbaseUtil +import asyncio +import datetime +from wangModel.utils.mysqlConn import insert,query +import aiohttp +import time +class baiduacc(): + header = { + "Cipher-Text":"1669014148691_1669025682395_gmyeUYFkqtWGz/Aodu6MCBx/jA/TcFYa3elCcC4PVE1i1F2XCekER0aqy9Mx1dO6Qu0Y3W2+6/ojulveu+uCC/Q1oRpRM2Iy/3YW0Dt7KogYgCtBAZulpY0RDu+dn5RiBs75lW9Ot/YIIeM4Pw5Bvtj6gwMLHLTS60hqu+o9xQdbJOQa8Dj3F2+Zyz+MXvMx1o4wulS5d/W8pIdT9n+Ud1J8ULkr3zIW2/dNMcX/53VET1S9IiG2uaG+3XDvf8rQLT8wIXKI9LwrwFI4+gZZhd/YnOMSb7reDLOo5bcfNyYRGzqpNb2Dozufe4HjuPzbvccAPU9XNigUDNyR/y5aqVUILehLWBs/bNg9OpuhvCsVumPQl/dIIDa57SKBBOHqSAx31TxH1po65FrdwblPhZF4qB9jXX/IzU1inyHNeKI=", + "Accept":"application/json,text/plain,*/*", + "Accept-Encoding":"gzip,deflate,br", + "Accept-Language":"keep-alive", + "Content-Length":"0", + "Host":"index.baidu.com", + "Origin":"https://index.baidu.com", + "sec-ch-ua":"'Microsoft Edge';v='107', 'Chromium';v='107', 'Not=A?Brand';v='24'", + "Referer":"https://index.baidu.com/v2/main/index.html", + "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.52", + "Cookie":"BIDUPSID=B772A7AE03D22C237EA5162D657EFEA8; PSTM=1646828464; ab_jid=1b5f1f7bd0d2ad6c8322197428813831b876; ab_jid_BFESS=1b5f1f7bd0d2ad6c8322197428813831b876; BAIDUID=EF08EE41A5B9911A97D741FCA1E975AB:FG=1; H_WISE_SIDS=110085_131862_188746_194529_204904_211986_212295_213039_214795_215730_216853_216941_219623_219943_219946_222624_223064_223337_224045_224047_224436_226628_226815_227932_228650_228870_229154_229907_229967_230077_230241_230244_230248_230287_230930_231433_231628_231761_231904_231979_232055_232244_232357_232616_232755_232834_232908_233041_233368_233401_233464_233465_233518_233598_233604_233719_233924_234044_234085_234208_234225_234296_234317_234349_234382_234515_234521_234559_234670_234690_234722_234799_234924_234980_235091_235131_235174_235201_235228_235258_235398_235421_235453_235461_235511_235534_235581_235634_235770_235808_235829_235870_235969_235980_236022_236050_236052_236084_236101_236129_236239_236243_236341_236512_236515_236524_236527_236538_236611_236811_236838; MCITY=-142%3A; delPer=0; PSINO=6; BAIDUID_BFESS=EF08EE41A5B9911A97D741FCA1E975AB:FG=1; BA_HECTOR=aha5208la10kagah8lak0qlp1ho93q81f; ZFY=xtXF:ABfiWEAgoaeInpi6iku9vkiVh7JUT1fVvaM9stc:C; bdindexid=p0k1jlaqpura3afsp3oajf1j73; BCLID=6870042325976552650; BCLID_BFESS=6870042325976552650; BDSFRCVID=J70OJexroG0leprj73-kMHDF9QpWxY5TDYrELPfiaimDVu-VJeC6EG0Pts1-dEu-EHtdogKKBgOTH4FF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; BDSFRCVID_BFESS=J70OJexroG0leprj73-kMHDF9QpWxY5TDYrELPfiaimDVu-VJeC6EG0Pts1-dEu-EHtdogKKBgOTH4FF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tbP8oK-MJKD3fP36q47HMtu_hlrt2D62aKDs3qTYBhcqEIL4Mj5E-P_wMGJ3Jp5uWgnlVlvVfx_bMUbSjln_0J_JhHon2nQwanrU_DD5yq5nhMJpXj7JDMP0XJbK35Oy523i5J3vQpPMslQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xb6_0DjPthxO-hI6aKC5bL6rJabC3OC0xXU6q2bDeQN3QKROH2JkesRvzWpbPbfjx3n7Zjq0vWq54WpOh2C60WlbCb664OR5JjxonDh83KNLLKUQtHmT7LnbO5hvvER3O3MAMQxKmDloOW-TB5bbPLUQF5l8-sq0x0bOte-bQXH_E5bj2qRCf_IKM3e; H_BDCLCKID_SF_BFESS=tbP8oK-MJKD3fP36q47HMtu_hlrt2D62aKDs3qTYBhcqEIL4Mj5E-P_wMGJ3Jp5uWgnlVlvVfx_bMUbSjln_0J_JhHon2nQwanrU_DD5yq5nhMJpXj7JDMP0XJbK35Oy523i5J3vQpPMslQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xb6_0DjPthxO-hI6aKC5bL6rJabC3OC0xXU6q2bDeQN3QKROH2JkesRvzWpbPbfjx3n7Zjq0vWq54WpOh2C60WlbCb664OR5JjxonDh83KNLLKUQtHmT7LnbO5hvvER3O3MAMQxKmDloOW-TB5bbPLUQF5l8-sq0x0bOte-bQXH_E5bj2qRCf_IKM3e; BDRCVFR[SquYicL8Vkb]=I67x6TjHwwYf0; H_PS_PSSID=26350; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; ZD_ENTRY=baidu; H_WISE_SIDS_BFESS=110085_131862_188746_194529_204904_211986_212295_213039_214795_215730_216853_216941_219623_219943_219946_222624_223064_223337_224045_224047_224436_226628_226815_227932_228650_228870_229154_229907_229967_230077_230241_230244_230248_230287_230930_231433_231628_231761_231904_231979_232055_232244_232357_232616_232755_232834_232908_233041_233368_233401_233464_233465_233518_233598_233604_233719_233924_234044_234085_234208_234225_234296_234317_234349_234382_234515_234521_234559_234670_234690_234722_234799_234924_234980_235091_235131_235174_235201_235228_235258_235398_235421_235453_235461_235511_235534_235581_235634_235770_235808_235829_235870_235969_235980_236022_236050_236052_236084_236101_236129_236239_236243_236341_236512_236515_236524_236527_236538_236611_236811_236838; __bid_n=1841d0d39462b7eb984207; BDUSS=VRCZDNtbXNuNW41UGlFbjZKUmI3WEc2aUxYYVFsejg0SVVROEVJNmtxd35SNnhqSUFBQUFBJCQAAAAAAAAAAAEAAAC67J41AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD-6hGM~uoRjY; SIGNIN_UC=70a2711cf1d3d9b1a82d2f87d633bd8a04196552899imlBp2YPGD4aewP8FEYzgkEScKOqh7wb51tJTuP1B6MtmB3vSB6esoqA1w0BCKHaSr3H%2BaAs%2B95UgUw5JUbwAjHo4ooV%2BHBPtHJbiiJxU3CbQrkyxr2V65CIOTGVbPwt0Kij485ztLZlqLDr%2FeP4j8mK%2F1BnLMUjD0IZINAdz9OcGDB5KlDUGhEMUmHW5FkAhu26dh63%2FP000Cmpeyz06Ww6TciYJ3j7g9b1pdBcgCvDfrwAp4NUZ7z4PY8wFikBxuF2%2B0HT3niFFIDJz6HNM1GEJXoPVe0hWKEwKigxDYQ%3D80937910607250753648874982969453; ab_bid=4039c07e99dd9dfa4f512beb01a097f2a6ff; ab_sr=1.0.1_ZGMwYzg4NTQxY2U2MWM0OGY2ZmMzMzc3YmI2NzJlZDQ5NGQyNDc5NGI5NWJmYjMwZWUyMjBiYmU0MGFlOTc5YmM0MzEyNGI0ZDQxNjQ1YjNmY2M2YTEyYTliMWVjZGFjMjZhZDdkMmQ0YWM2NTM4Zjc4YTIwODNkNjY5YmQ4MzMwNjI5MDI1Yzc1OTZmMzUyYzFkNzEwYTcxYzQzZDAyYw==; RT='z=1&dm=baidu.com&si=630f2482-3780-4da8-9234-e06ac91171fd&ss=lb0u05kx&sl=m&tt=kt4&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf'; BDUSS_BFESS=VRCZDNtbXNuNW41UGlFbjZKUmI3WEc2aUxYYVFsejg0SVVROEVJNmtxd35SNnhqSUFBQUFBJCQAAAAAAAAAAAEAAAC67J41AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD-6hGM~uoRjY" + } + + item_list=[] + + def __init__(self): + host = '202.193.53.106' + table_name = 'bauduacc' + hbase = HbaseUtil(host) + self.hbase = hbase + self.tablename= table_name + + + + def parse1(self): + time.sleep(3) + try: + url_list = query("select id,name,bdacc_url from scenics where bdacc_url !='' ", None) + for redatas in url_list: + id=redatas['id'] + scenicName=redatas['name'] + url=redatas['url'] + # url = f"https://index.baidu.com/api/SearchApi/index?area=0&word=[[%7B%22name%22:%22{parse.quote(keyword)}%22,%22wordType%22:1%7D]]&days=30" + + response = requests.get(url, headers=self.header, proxies=PROXY, timeout=5) + time.sleep(3) + data= response.json() + if data['data'] =='': + # print(data) + print("被检测了,请更新验证数据") + else: + print(data['data']) + start_time=str(data['data']['userIndexes'][0]['all']['startDate']) + end_time=str(data['data']['userIndexes'][0]['all']['endDate']) + all_avg=str(data['data']['generalRatio'][0]['all']['avg']) #整体日均值 + all_yoy=str(data['data']['generalRatio'][0]['all']['yoy'])+"%" #整体同比% + all_qoq=str(data['data']['generalRatio'][0]['all']['qoq'] )+"%"#整体环比% + wise_avg=str(data['data']['generalRatio'][0]['wise']['avg'] )+"%"#移动日均值 + wise_yoy=str(data['data']['generalRatio'][0]['wise']['yoy'] )+"%"#移动同比% + wise_qoq=str(data['data']['generalRatio'][0]['wise']['qoq'] )+"%"#移动环比% + + sql="INSERT INTO baiduacc(scenicId,all_avg,all_yoy,all_qoq,wise_avg,wise_yoy,wise_qoq,crawlTime) VALUES (%s,%s,%s,%s,%s,%s,%s,%s);" + insert(sql,(id,all_avg,all_yoy,all_qoq,wise_avg,wise_yoy,wise_qoq,datetime.date.today())) + except: + print("url无效") + # + # def inputHbase(self,list): + # for i in range(len(list)): + # # 插入数据库 + # print(list[i]) + # self.hbase.putTable(self.tablename, str(datetime.date.today())+"_"+str(i), { + # 'info:name': list[i]['name'], + # 'all:all_avg': list[i]['all_avg'], + # 'all:all_yoy': list[i]['all_yoy'], + # 'all:all_qoq': list[i]['all_qoq'], + # 'wise:wise_avg': list[i]['wise_avg'], + # 'wise:wise_yoy': list[i]['wise_yoy'], + # 'wise:wise_qoq': list[i]['wise_qoq'], + # }) + +# if __name__=="__main__": +# +# object=baiduacc() +# object.parse1() + + diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baidusearch.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baidusearch.py new file mode 100644 index 0000000..40ab617 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baidusearch.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> baidusearch +@IDE :PyCharm +@Author :sandmswift +@Date :2022-11-15 18:11 +@Desc +==================================================''' +from urllib import parse +import time +import random +import datetime +import re +import requests +from wangModel.utils.proxys import PROXY,ips +from lxml import etree +from wangModel.utils.HbaseConn import HbaseUtil +from wangModel.utils.mysqlConn import query,insert,getRows,update +""" +爬取百度搜索各个景点的搜索结果数量 +""" +class BaiduSpider(): + + def parse(self): + kw="" + url_list = getRows("select id,name from scenics ", None) + for content in url_list: + item={} + print(content) + time.sleep(random.randint(1,5)) + id=content[0] + item['id']=id + name=content[1] + item['name']=name.strip() + kw=parse.quote(name) + url= f"https://www.baidu.com/s?wd={kw}&rsv_spt=1&rsv_iqid=0xd0a36e920005e207&issp=1&f=8&rsv_bp=0&rsv_idx=2&ie=utf-8&rqlang=&tn=baiduhome_pg&ch=" + item['url']=url + self.parse_item(url,item) + + def parse_item(self, url,item): + print("--------------发起请求--------------------") + header = { + "Cipher-Text": "1668409355251_1668493173103_aLgQH4YFqAwPcYSE7v52xdJaHSAeId9tI+WY1JMiHu8HwngWY2DifDL8GwYz2O+DvIVgj+9ldrUsKJ3ADGdnEUHL1GARwcCChi73BbkUFeNFtACrNrwhmPStsz0iWKZEK1aqGImhb+zMQg9/qJkxFRR+4AuJz5zbU+IkH793cccuV18DONXlam0zLfF07BZFrBRtTFCC7P7YOpfz9du1sz0OHMxRr7Iwdq1hrNzZ0yW4pzm8Hw2C7gvEfXs81XQSHDeGOtaoZ/IQyn5QqCYSsGC47kiKIeEy2hOaGITVWj4wBHvNe//u+dxPX/cDPjIM7QWoQnmSAg2qOAUtzTMnBE0Eal21o3C03eGBGNJHXYM9xVQz2OEs+NeMG2HXjKi5boG4R8ypMvU8D5JsL9lU7G2WStNDiX7sEjaskomtx2g=", + "Referer": "https://index.baidu.com/v2/main/index.html", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.42", + "Cookie": "BIDUPSID=B772A7AE03D22C237EA5162D657EFEA8; PSTM=1646828464; BAIDUID=EF08EE41A5B9911A97D741FCA1E975AB:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BCLID_BFESS=8610272839171174388; BDSFRCVID_BFESS=LqkOJeCmHx1yiQOjtw6fuaBrwgKK0gOTHbucfJLsovUqE2IVJeC6EG0Ptf8g0Kubdu1yogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF_BFESS=tJFfoK0afIK3f-opMtT5q4t3KxrjetJyaR3rQRvvWJ5TDqn9DJrE0f4q5htqKJQK0jrf0hvctn3cShnP5tbtyxAyhpjPbxLHWGcZ0l8K3l02V-bIe-t2ynLVbh_Dh4RMW20j0h7mWIQvsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJEjj6jK4JKjaK8t65P; delPer=0; PSINO=6; BA_HECTOR=200h85al2l2001agah0gat3a1hn62841f; BAIDUID_BFESS=EF08EE41A5B9911A97D741FCA1E975AB:FG=1; ZFY=PEntF3sipSTjFmSpBgjsg2if1PiObhH0XFP3GeQX4wg:C; H_PS_PSSID=37784_36554_37552_37519_37689_37772_37628_34813_37778_37727_37538_37712_37742_26350_37789; Hm_lvt_d101ea4d2a5c67dab98251f0b5de24dc=1668491560; BDUSS=FZTkFXWlFOLVVLdjExR293ZXZQWXkyeXFzRzVUZll1OXo5azR0SjlueGd2SnBqSVFBQUFBJCQAAAAAAAAAAAEAAABbpYyTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAvc2NgL3NjOG; SIGNIN_UC=70a2711cf1d3d9b1a82d2f87d633bd8a04185056022rfZ4Fr8b%2FZjzCjWhkEXzVhsU%2BRJmUfM92Rxqhej6x6RSy2D67EegK8bVq7Xw8G45FA9lGDxsArHhqi6FUXK4q4RUQKI%2FOaidfKoId7N9w5%2BvNtec2wUhywSQZq0jcgF6x9ekV4CZhLqqdSZJW8MmPYtfaFxQO1F04SU%2Bg1VM6k80VfstLewTJ%2FyvBssATejPpii0mplIhwrdv4izW0XcCSgczOv1KoEYf3DDBB%2BAkLlXIVuMXT08UND685c51gs1LPln6JVHlEmqjH2syDrFSw%3D%3D93823493866336522493836350719633; __cas__rn__=418505602; __cas__st__212=c3dabde61364b016ccd784a68f72be264e46b9a5983c1a499a1f2f58aed9b391b09413376c3a0f261c176a62; __cas__id__212=41971051; CPID_212=41971051; CPTK_212=693995330; Hm_up_d101ea4d2a5c67dab98251f0b5de24dc=%7B%22uid_%22%3A%7B%22value%22%3A%222475468123%22%2C%22scope%22%3A1%7D%7D; bdindexid=t2n1cvnhg9lue59q0t78cq8jb2; RT='z=1&dm=baidu.com&si=524e125a-f181-40a3-b23b-0f6278b2185e&ss=lahssm1u&sl=f&tt=kfp&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf'; Hm_lpvt_d101ea4d2a5c67dab98251f0b5de24dc=1668493173; BDUSS_BFESS=FZTkFXWlFOLVVLdjExR293ZXZQWXkyeXFzRzVUZll1OXo5azR0SjlueGd2SnBqSVFBQUFBJCQAAAAAAAAAAAEAAABbpYyTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAvc2NgL3NjOG; ab_sr=1.0.1_ZTJlZDhiZWMyN2IxYWY2MjQ5NzZiNDVkODUyMTIwNzJlNWRmMjAyZWY2ZDFiYWVmNDE2ODE4ZmIyOTQ5MGZmNDdiNTQ5ODJjNWY1MGViN2MwOWI0YzEyYzBlZWY5NzU4MjM0ODk0NzFlMzUxNDJiZjI2ZDZiYWZlYzljMDAyMmZlNTM2MWUzMjdmYjY4MzA1YTAzMWE5MTdhODY1ZGZlYg==" + } + ip="http://"+random.choice(ips) + response=requests.get(url,headers=header,proxies={"http":ip},timeout=5) + selector = etree.HTML(response.text) + try: + data=selector.xpath("//*[@id='tsn_inner']/div[2]/span/text()")[0] + num=re.findall(r"\d+",data) + result="" + if num is not None: + for content in num: + result=result+content + item['num']=result + print(item) + + #插入数据库 + update_sql = "UPDATE scenics SET bdsearch_url = %s where id = %s " + insert_sql = "insert into bd_search(scenicId,scenicName,num,crawlTime) values (%s,%s,%s,%s)" + update(update_sql, (item['url'], item['id'])) + insert(insert_sql, (item['id'], item['name'], item['num'], datetime.date.today())) + except: + print("定位失败,检查Cokie是否失效或网页结构是否更改") +# if __name__ =="__main__": +# run=BaiduSpider() +# run.parse() \ No newline at end of file diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduwords.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduwords.py new file mode 100644 index 0000000..725bb3d --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduwords.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> baiduwords +@IDE :PyCharm +@Author :sandmswift +@Date :2022-11-16 10:21 +@Desc:百度百科搜索词条 +==================================================''' +from urllib import parse +import time +import random +import re +import requests +from wangModel.utils.proxys import PROXY,ips +from lxml import etree +import datetime +from wangModel.utils.mysqlConn import query,insert,getRows,update +from selenium import webdriver +class BaiDuWords(): + + def parse(self,id,kw): + url=f"https://baike.baidu.com/item/{parse.quote(kw)}?fromtitle={parse.quote(kw)}" + print(url) + header = { + "X-Requested-With":"XMLHttpRequest", + "Cipher-Text": "1668409355251_1668493173103_aLgQH4YFqAwPcYSE7v52xdJaHSAeId9tI+WY1JMiHu8HwngWY2DifDL8GwYz2O+DvIVgj+9ldrUsKJ3ADGdnEUHL1GARwcCChi73BbkUFeNFtACrNrwhmPStsz0iWKZEK1aqGImhb+zMQg9/qJkxFRR+4AuJz5zbU+IkH793cccuV18DONXlam0zLfF07BZFrBRtTFCC7P7YOpfz9du1sz0OHMxRr7Iwdq1hrNzZ0yW4pzm8Hw2C7gvEfXs81XQSHDeGOtaoZ/IQyn5QqCYSsGC47kiKIeEy2hOaGITVWj4wBHvNe//u+dxPX/cDPjIM7QWoQnmSAg2qOAUtzTMnBE0Eal21o3C03eGBGNJHXYM9xVQz2OEs+NeMG2HXjKi5boG4R8ypMvU8D5JsL9lU7G2WStNDiX7sEjaskomtx2g=", + "Referer": "https://baike.baidu.com/item/%E7%99%BE%E5%BA%A6%E7%99%BE%E7%A7%91?fromModule=lemma_search-box", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.52", + "Cookie": "BIDUPSID=B772A7AE03D22C237EA5162D657EFEA8; PSTM=1646828464; BAIDUID=EF08EE41A5B9911A97D741FCA1E975AB:FG=1; FPTOKEN=30$MrCecZxUi8VS8olQk6GALxBMkPjNDvPqJaUEnW0U/il23iuvfUkXY4mgGNIFtYpKGevoBroMxF6rVAASyZuGaOxurO6Vofyd98uaWKxm9i3oqBQmI361ZlV81CXwf/HgVmK8C/nBkRrvPbXoNG88dFO6bXZHRhqqmusaAiWRqo/INvI0Ykfrx9zGtWoWDmG8LmigrS9r31q9r1YENQshlw1vLnBlsRHoK4S3fj+AnIqz5W/H4RBf92ik6VgmTwmERIDXUryJO6uZKLaMnXm9yYYgkSE3CJd91tmiIeR92jBb3b8hF5Pm1kyTK6qW7GsdA2ybnC4ueez9qmxosW5kRh6I+PEw8HCxiBno6qeXb4e6p0pgYL38oz+yhfmoRWlW|sqY0adRCWGB/CE2viadPBGBBbaHCAe/V4KFwr8eW/08=|10|51987c75976c595af8f6e5b793a7c623; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; ZFY=C:BtWlDYn2vvbhq54rjO:BevzXkLZNv:AbRA3Fah5VvMrA:C; BAIDUID_BFESS=EF08EE41A5B9911A97D741FCA1E975AB:FG=1; BAIDU_WISE_UID=wapp_1668931780429_949; MCITY=-%3A; __bid_n=18495ad43f144d83c64207; BK_SEARCHLOG=%7B%22key%22%3A%5B%22%E4%BC%9A%E4%BB%99%E5%96%80%E6%96%AF%E7%89%B9%E5%9B%BD%E5%AE%B6%E6%B9%BF%E5%9C%B0%E5%85%AC%E5%9B%AD%E6%99%AF%E5%8C%BA%22%2C%22%E6%A1%82%E6%9E%97%E6%BC%93%E6%B1%9F%E6%99%AF%E5%8C%BA%22%2C%22%E6%A1%82%E6%9E%97%22%2C%22%E6%A1%82%E6%9E%97%E4%B8%83%E6%98%9F%E5%85%AC%E5%9B%AD%22%2C%22%E8%B1%A1%E9%BC%BB%E5%B1%B1%22%2C%22%E7%94%A8%E6%B0%9F%E5%88%B7%E7%89%99%22%2C%22%E7%94%A8%E7%9A%84%E6%95%B0%E5%AD%97%E5%9B%BE%E5%83%8F%E7%9A%84%E8%B7%9D%E7%A6%BB%E5%BA%A6%E9%87%8F%E6%9C%89%E5%87%A0%E7%A7%8D%3F%22%5D%7D; BA_HECTOR=a42k85010084202104802vdd1hnpagk1e; delPer=0; PSINO=6; H_PS_PSSID=37784_36554_37552_37519_37772_37628_34813_37778_37819_37727_37793_37712_37742_26350_37789; zhishiTopicRequestTime=1669174887100; BDUSS=mt5NmljaFBnVUE5RGdxQVRvMEZCfklSS09NTkFVZ0FqTnNEa1hSflF3Si1JNlZqSUFBQUFBJCQAAAAAAAAAAAEAAABbpYyTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAH6WfWN-ln1ja; BDUSS_BFESS=mt5NmljaFBnVUE5RGdxQVRvMEZCfklSS09NTkFVZ0FqTnNEa1hSflF3Si1JNlZqSUFBQUFBJCQAAAAAAAAAAAEAAABbpYyTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAH6WfWN-ln1ja; channel=passport.baidu.com; baikeVisitId=5f33655f-41f8-474d-8e54-9589a8fa8510; RT='sl=8&ss=lat3mooz&tt=b4g&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&z=1&dm=baidu.com&si=1e981726-04a9-4146-be75-d83748eee7ca'; Hm_lvt_55b574651fcae74b0a9f1cf9c8d7c93a=1668564739,1668958469,1669132483,1669174993; Hm_lpvt_55b574651fcae74b0a9f1cf9c8d7c93a=1669174993; ab_sr=1.0.1_ZDE3M2EwZTU0MjIyMmU5ZjFlYjM0ODBjYTY4NzZkOGNmZTU5MDc0YWU4N2Y2ODhkNWNiMDRhNzE2YjQ5Mjg2ZmMyMGVjYTc4OTJmZjFjNWM3NzNiZTJlZTkyYjk3OTQ4NGE0YTNkZGExNzdkMjYzODAyMzMwNzBkYTgwYjJlYjE1NDdkOTM1OWYxYzg2ZWUzNDU0ZDVkNDA3MWJjYjI2ZmJmMWU2MWEwNjMzYTk5YTg2MDhmYTYxMzkwNjAwMDQx" + } + ip = "http://" + random.choice(ips) + # response = requests.get(url, headers=header,proxies={"http":ip}, timeout=5) + driver = webdriver.Chrome() + driver.set_window_size(1280, 720) # 自定义窗口大小: + driver.implicitly_wait(3) # 设置隐式时间等待 + driver.get(url) + driver.implicitly_wait(3) # 设置隐式时间等待 + data = driver.page_source + try: + like_count=driver.find_element(by='xpath',value="//*[@id='j-top-vote']/span[1]").text #点赞数 + print(like_count) + share_count=driver.find_element(by='xpath',value="//*[@id='j-topShareCount']").text #转发量 + print(share_count) + see_count=driver.find_element(by='xpath',value="//*[@id='j-lemmaStatistics-pv']").text #浏览量 + print(see_count) + edit_count_text=driver.find_element(by='xpath',value="/html/body/div[3]/div[2]/div/div[2]/dl/dd[1]/ul/li[2]").text #编辑量 + print(edit_count_text) + num = re.findall(r"\d+", edit_count_text) + print(num) + edit_count = "" + if num is not None: + for content in num: + edit_count = edit_count + content + print("搜索结果数",num) + # 插入数据库 + update_sql = "UPDATE scenics SET bdword_url = %s where id = %s " + insert_sql = "insert into bd_words(scenicId,scenicName,like_count,share_count,see_count,edit_count,crawlTime) values (%s,%s,%s,%s,%s,%s,%s)" + update(update_sql, (url, id)) + insert(insert_sql, (id, kw, like_count,share_count,see_count,edit_count, datetime.date.today())) + driver.close() + except: + print("暂未收录该词条") + driver.close() + + + def run(self): + url_list = getRows("select id,name from scenics ", None) + for content in url_list: + print("爬取景点", content) + id = content[0] + kw = content[1] + self.parse(id,kw) +# if __name__ =="__main__": +# baiduWord=BaiDuWords() +# baiduWord.run() \ No newline at end of file diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tongtencities.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tongtencities.py new file mode 100644 index 0000000..429da00 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tongtencities.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> tongtencities +@IDE :PyCharm +@Author :sandmswift +@Date :2022-11-17 11:25 +@Desc 此爬虫用于爬取同城旅行的城市id信息 +==================================================''' +import requests +import json +url='https://bus.ly.com/busresapi/destination/getDesByLetter' + +data={ + 'letter': 'ALL', + 'depCName': '深圳', + 'depCId': 1090 +} +params={ + "plateId":3 +} +response=requests.post(url,json=data,params=params) +result=response.json() +city_list=result['body'] +with open("../files/city.txt", "a+", encoding='utf-8') as f: + for item in city_list: + id=item['id'] + cityName=item['name'] + f.write(str(id)) + f.write(",") + f.write(cityName) + f.write("\n") +f.close() \ No newline at end of file diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tuniu_route.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tuniu_route.py new file mode 100644 index 0000000..c4bfab1 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tuniu_route.py @@ -0,0 +1,108 @@ +import requests +import re +import aiohttp +import asyncio +import csv +import json +import os +import time +import datetime +import pytz +from lxml import etree +import time +from wangModel.utils.proxys import PROXY +from parsel import Selector +from wangModel.utils.mysqlConn import insert + + +from datetime import date, timedelta + +today = time.strftime("%Y-%m-%d", time.localtime()) +tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d") +headers = { + 'Referer':"https://www.tuniu.com/", + 'User-Agent':'Mozilla/5.0(Windows NT 10.0; Win64; x64)AppleWebKit/537.36(KHTML, like Gecko)Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.46', + 'Cookie': "_uab_collina=166113651603660325791145; udid=tn-100-1661136526886-f196ed1d-21c4-11ed-921a-0ba79eabb6b9; _tact=ODE2YWU2MjgtMDU4MC1hYzJjLTQwOWMtMDMyZGEyNzIzMTMz; _ga=GA1.2.2105264209.1661136529; p_phone_400=4007-999-999; p_phone_level=0; p_global_phone=%2B0086-25-8685-9999; fp_ver=4.7.3; BSFIT_OkLJUJ=FHMgfFXHnQXEiVkz8qQ3cNf9ukZQWypQ; cto_bundle=DDPyZ19ENHlVa1poTVJiZ0twWTExWTB1WXF3RUZzQm5wQjB4c1d0cUsycCUyQkpQMkdFdkVnNnhtcXEzbkZmTW1zYnJCcFBHa3FSWlNKOFVyJTJCN2NJSVkxdWxLTDU0MENscU5QRnhNZHVPZFZiU0h1dHpVcXdlWkJNaE9mcGhOUnQ1STBNTlBnQ1FLREtZN09OMXV1YmJQanZUeHF3JTNEJTNE; __utmz=1.1668932705.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); Hm_lvt_44f54d76a67ba9230a7bb92d5ed5e4ba=1667808931,1668142577,1668940349; __utma=1.2105264209.1661136529.1668958304.1668962727.6; tuniuuser_force_logout=1669109601000; tuniuuser_vip=MA%3D%3D; tuniuuser_level=MA%3D%3D; tuniuuser_id=97164384; tuniuuser_name=MTUyMjkyMzI1Mw%3D%3D; tuniuuser_image=Ly9pbWczLnR1bml1Y2RuLmNvbS9pbWcvMjAxNDA0MDkwMS91c2VyX2NlbnRlci9nX3RvdXhpYW5nLnBuZw%3D%3D; _tacz2=taccsr%3Dcn.bing.com%7Ctacccn%3D%28referral%29%7Ctaccmd%3D%28none%29%7Ctaccct%3D%28none%29%7Ctaccrt%3D%28none%29; tuniu_partner=MTAxLDAsLDlmZDgyZThjYTZkNGMwMTlmZTUyNzdlYjJmNTcxYzQ1; isHaveShowPriceTips=1; _tacau=MCw4NjMxMDNiZi1kOGIwLTViMmYtMWZlMS1mNTFjYjYzYjgyNDIs; PageSwitch=1%2C213612736; _gid=GA1.2.387299839.1670741383; clickCache=%5B%7B%22key%22%3A1670741382627%2C%22url%22%3A%22https%3A%2F%2Fwww.tuniu.com%2F%22%2C%22pageName%22%3A%22%E5%BA%A6%E5%81%87%3A%E5%8D%97%E5%AE%81%3A%E9%A6%96%E9%A1%B5%3Ann%22%2C%22referer%22%3A%22%22%2C%22events%22%3A%5B%7B%22text%22%3A%22%E7%82%B9%E5%87%BB_%E9%A1%B6%E9%83%A8%E5%AF%BC%E8%88%AA_%E4%B8%80%E7%BA%A7%E5%AF%BC%E8%88%AA_6_%E9%85%92%E5%BA%97%22%2C%22x%22%3A394%2C%22y%22%3A149%2C%22lg%22%3A1670741384382%7D%5D%7D%5D; rg_entrance=010000%2F003001%2F000013%2F000000; tuniu-assist={%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22bgcolor%22:false}; Hm_lvt_fe3fbe4228e14b1544525f058df92f91=1669634420,1670140045,1670741377,1670757948; _pzfxuvpc=1661136528944%7C9947719618884716956%7C135%7C1670757948543%7C48%7C1145071184993068063%7C2602933339806650760; OLBSESSID=m10d18uckfquipe2llgnec7ds4; acw_sc__v2=6395be40a848e7c226787a31326638850a340bc0; tuniu_zeus=M18zXzFfMV8xXzI6Omh0dHBzOi8vbWVucGlhby50dW5pdS5jb20vOjoyMDIyLTExLTIwIDIyOjIzOjQy%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vdHJpcHMudHVuaXUuY29tLzo6MjAyMi0xMS0yMCAyMjozMDo0Mw%3D%3D%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vbWVucGlhby50dW5pdS5jb20vOjoyMDIyLTExLTIwIDIyOjMwOjQ5%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vd3d3LnR1bml1LmNvbS86OjIwMjItMTEtMjAgMjM6MTA6MDQ%3D%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vbWVucGlhby50dW5pdS5jb20vOjoyMDIyLTExLTIwIDIzOjI3OjM2%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vbWVucGlhby50dW5pdS5jb20vOjoyMDIyLTExLTIxIDEyOjQ2OjUz%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vbWVucGlhby50dW5pdS5jb20vOjoyMDIyLTExLTIxIDEyOjQ2OjU0%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vd3d3LnR1bml1LmNvbS86OjIwMjItMTEtMjEgMTY6MjI6MjU%3D%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vbWVucGlhby50dW5pdS5jb20vOjoyMDIyLTExLTIxIDE2OjIyOjQz%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vd3d3LnR1bml1LmNvbS90b3Vycy86OjIwMjItMTItMTEgMTk6MjU6NTI%3D%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vd3d3LnR1bml1LmNvbS90b3Vycy86OjIwMjItMTItMTEgMTk6MjU6NTU%3D; tuniu_searched=a%3A5%3A%7Bi%3A0%3Ba%3A2%3A%7Bs%3A7%3A%22keyword%22%3Bs%3A6%3A%22%E6%A1%82%E6%9E%97%22%3Bs%3A4%3A%22link%22%3Bs%3A47%3A%22%2F%2Fs.tuniu.com%2Fsearch_complex%2Ftours-nn-0-%E6%A1%82%E6%9E%97%2F%22%3B%7Di%3A1%3Ba%3A2%3A%7Bs%3A7%3A%22keyword%22%3Bs%3A21%3A%22%E6%A1%82%E6%9E%97%E7%9A%84%E6%97%85%E6%B8%B8%E7%BA%BF%E8%B7%AF%22%3Bs%3A4%3A%22link%22%3Bs%3A50%3A%22http%3A%2F%2Fwww.tuniu.com%2Fg705%2Fwhole-gl-0%2Flist-h0-j0_0%2F%22%3B%7Di%3A2%3Ba%3A2%3A%7Bs%3A7%3A%22keyword%22%3Bs%3A62%3A%22%E8%AF%97%E4%B8%8E%E8%BF%9C%E6%96%B9%C2%B7%E6%BC%93%E6%B1%9F%E9%99%A2%E5%AD%90%E9%85%92%E5%BA%97%EF%BC%88%E4%B8%A4%E6%B1%9F%E5%9B%9B%E6%B9%96%E4%B8%9C%E8%A5%BF%E5%B7%B7%E5%BA%97%EF%BC%89%22%3Bs%3A4%3A%22link%22%3Bs%3A40%3A%22http%3A%2F%2Fhotel.tuniu.com%2Fdetail%2F2073760650%22%3B%7Di%3A3%3Ba%3A2%3A%7Bs%3A7%3A%22keyword%22%3Bs%3A58%3A%22%E8%AF%97%E4%B8%8E%E8%BF%9C%E6%96%B9%C2%B7%E6%BC%93%E6%B1%9F%E9%99%A2%E5%AD%90%E9%85%92%E5%BA%97%28%E4%B8%A4%E6%B1%9F%E5%9B%9B%E6%B9%96%E4%B8%9C%E8%A5%BF%E5%B7%B7%E5%BA%97%29%22%3Bs%3A4%3A%22link%22%3Bs%3A106%3A%22http%3A%2F%2Fs.tuniu.com%2Fsearch_complex%2Fhotel-gl-0-%E8%AF%97%E4%B8%8E%E8%BF%9C%E6%96%B9+%E6%BC%93%E6%B1%9F%E9%99%A2%E5%AD%90+%E4%B8%A4%E6%B1%9F%E5%9B%9B%E6%B9%96%E4%B8%9C%E8%A5%BF%E5%B7%B7%E5%BA%97%2F%3Fjump%3Dauto%22%3B%7Di%3A4%3Ba%3A2%3A%7Bs%3A7%3A%22keyword%22%3Bs%3A30%3A%22%E6%A1%82%E6%9E%97%E9%AB%98%E9%93%81%E5%8C%97%E7%AB%99%E4%BA%9A%E6%9C%B5%E9%85%92%E5%BA%97%22%3Bs%3A4%3A%22link%22%3Bs%3A71%3A%22%2F%2Fs.tuniu.com%2Fsearch_complex%2Fwhole-gl-0-%E6%A1%82%E6%9E%97%E9%AB%98%E9%93%81%E5%8C%97%E7%AB%99%E4%BA%9A%E6%9C%B5%E9%85%92%E5%BA%97%2F%22%3B%7D%7D; _taca=1661136526075.1670748191106.1670757961271.60; _tacb=NjcxMTdhMzMtNDYwNS0zMjE1LWI1MjUtMTc0NzEzMDI1YjM2; _tacc=1; Hm_lvt_51d49a7cda10d5dd86537755f081cc02=1669088113,1669636043,1670741377,1670757963; PcHomeVisit=1; BSFIT_EXPIRATION=1671968376330; BSFIT_DEVICEID=EJuL0zqYa9MeH-6Ld1C5FRNL8B1kMZYIegSialKv0NO8fko-n1BgNo4ADDsljSX9RDzQzUzxkgAzC-3ZltyyhA9ScDjMQi3oC6mV9IwloOamu0jnNiSVQErdw8ZDYB-HYu3jG6FqV1R30gENEORkYMaNBVfSo39z; tuniuuser_citycode=NzA1; tuniuuser_ip_citycode=NzA1; acw_tc=76b20f6216707597669104541e718761bbdb8491ea15506eaeb386802d1205; acw_sc__v3=6395c55b87a173d5c8ab0530eda85c313100aa8d; connect.sid=s%3ADVpFth7p1RAHGpm5xwz_qhKk2aNxKIEW.61uxobNGuz3FuYwMsUM4VC6yWaHU8%2FsMsdvUKjMO%2Bto; __xsptplusUT_352=1; Hm_lpvt_51d49a7cda10d5dd86537755f081cc02=1670759773; Hm_lpvt_fe3fbe4228e14b1544525f058df92f91=1670759773; __xsptplus352=352.28.1670758079.1670759773.5%234%7C%7C%7C%7C%7C%23%23x3KmvfCykhxF9YMlGHYyP86YcrS0s2BZ%23; ssxmod_itna2=GqAh7KY50KAIxYq0d4YKO1xUxDwUpAExacC1QDnFSiaPDs=55DLQury4qnbPWt=4=za2K63qzhLALHV7IBPx8MfgS0L1Vnbq4FCZS4qC2qiWbpOR6QHBmfa=Xf2=uCfNpN6UC95HwyrUaW7aeczFhcFFg9FNx71=I9rRr7kX6mCmrornmOvr9gvP6mkkFrpT+38p4STIYf=im4Dw22dDjKD+1d5i0r47g+KoYD==; ssxmod_itna=eqUxnDcD900Qit3GHIhCAYD7YA5xCDD5+LdD/KimDnqD=GFDK40oo7qr=oDOnBrAhnEG=UGAh+T+W7BWpxex1bqaTDU4i8DCLxNo+mDYY8Dt4DTD34DYDixibkxi5GRD0KDFF5XUZ9Dm4GWFqGfDDoDY86RDitD4qDBCodDKqGgFTxsFq2j7mt3pLxe57GcD0tdxBdeWawcGCcFciNe56nDNEQDzkHDtutS9kd3x0PyBMUDM2ozQi+1BoQebEhrz0D=bSOaiieN9/4rKODq8BCwj75cPD===Host: www.tuniu.comIf-None-Match: W/'3c-NfUtq77l6/q+4MT+i1B8akkKCJY'Referer: https://www.tuniu.com/tour/210484383?u_atoken=2a0c1736-73f1-4b7c-ad98-d6d94e03c5ab&u_asession=01903W2-YdH07896fdt57A6KHEQxoe3p_nM9u4UCHyFrbiLBWPB-i4K8FGDfX1dPGpX0KNBwm7Lovlpxjd_P_q4JsKWYrT3W_NKPr8w6oU7K9FEef24uNZxWMQdDwnHXnnD9UaPztW_A5jsQn1Dkg23WBkFo3NEHBv0PZUm6pbxQU&u_asig=0544d1nAGeRC20Zu086zLa4yldBmLJqPWv-B9mz-H9Xrtah9jkPouz-FPnXWOOi4UZ2dTfIGDG7nuzXDqJvJoeKGx9aDWVBLA6ECRzZSExcTnAgKFWx6KrKB261iRpf4DSkxMbgQTdrHv4qiNIfGkvC9dPO90Bbvr-EUj-WfwYHev9JS7q8ZD7Xtz2Ly-b0kmuyAKRFSVJkkdwVUnyHAIJzW5j2yf_d5xdXB-OIEbSd9cmnegWFHsFYePqrtdDZqCL5w-GOMIgInRzpzRYK0ZViu3h9VXwMyh6PgyDIVSG1W8a4h9Ftm1jXrxOtcF1nJf7yVBaI0FPIhiohXoTwrQ-uFvYeu4qtMWWYz3pQwg1DG_l-4EGHxB6fgv3zddmpOI2mWspDxyAEEo4kbsryBKb9Q&u_aref=zug78BonXAlUtdj%2FsdJ3FWivLgg%3D".encode('utf-8'), +} + +route_list_pages=0 + +def temp(): + + + currentPage=1 + url = f"https://www.tuniu.com/g705/whole-gl-0/list-z9004399/{currentPage}" + + res = requests.get(url,headers=headers,proxies=PROXY) + # print(res.text) + selector = etree.HTML(res.text) + + #获取总页数 + allPage = selector.xpath("//*[@id='contentcontainer']/div[2]/div[1]/div[1]/div[2]/div/a[last()-1]/text()") + print(allPage) + if len(allPage)>0: + print("页数", allPage) + route_list_pages = int(allPage[0]) + time.sleep(2) + + for m in range(1,route_list_pages+1): + currentPage=m + res = requests.get(url, headers=headers, proxies=PROXY) + selector = etree.HTML(res.text) + list=selector.xpath("//*[@id='contentcontainer']/div[2]/div[1]/div[1]/div[1]/ul/li") + + print(len(list)) + for child in list: + child_url=child.xpath("./div/a/@href")[0] #线路详情链接 + title=child.xpath("./div/a/@aria-label")[0] + scenics=child.xpath("./div/a/dl/dd[1]/@title")[0] + print("路线",title) + print("路线",type(title)) + print("景点",scenics) + if child_url is not None: + time.sleep(3) + dedati_url="https:"+child_url + + #解析线路详情 + child_request=requests.get(dedati_url,headers=headers,proxies=PROXY) + childSelector=Selector(text=child_request.text) + routedesc=childSelector.css("#J_Detail > div > div.J_DetailRoute.section-box.detail-route.detail-route4 > div.section-box-body > div.J_DetailRouteDetail.section-box-content.detail-journey-4-detail.active > div.section-box-content.detail-route4-brief-box.detail-route4-brief-nomap > div > div > div") + print("这是爬取内容,如果没有就是被检测了",routedesc) + if routedesc: + routedesc=routedesc[0] + arranges_list=routedesc.xpath("./p") + desc="" + for i in range( len(arranges_list)): + content=routedesc.xpath(f"string(./p[{i}][@aria-label])").extract_first() + data=str(content).strip().replace(" ","").replace("\n","") + desc=desc+data+";" + print("简介",desc) + print(url) + sql = "INSERT INTO route(route_name,sceniclist,route_desc,tn_url) values (%s,%s,%s,%s);" + insert(sql,(str(title),scenics,desc,str(dedati_url))) + + # #评价总数据 + # try: + # print(childSelector.xpath("//*[@id='J_Comment']/div/div[2]/div[2]/div[1]/div[2]/strong/text()")) + # satistion=childSelector.xpath("//*[@id='J_Comment']/div/div[2]/div[2]/div[1]/div[2]/strong/text()").extract_first() + # good=childSelector.xpath("//*[@id='J_Comment']/div/div[2]/div[2]/div[2]/div[1]/div[1]/text()").extract_first() + # good=re.search("\d+",good).group() + # middle=childSelector.xpath("//*[@id='J_Comment']/div/div[2]/div[2]/div[2]/div[2]/div[1]/text()").extract_first() + # middle=re.search("\d+").group() + # bad=childSelector.xpath("//*[@id='J_Comment']/div/div[2]/div[2]/div[2]/div[3]/div[1]/text()").extract_first() + # bad=re.search("\d+",bad).group() + # otherslist=childSelector.xpath("//*[@class='fraction']/div") + # otherdata=[] + # for otherService in otherslist: + # service=otherService.xpath("./@aria-label") + # print(service) + # otherdata.append(service) + # except: + # print("为获取渲染数据") + + + else: + print("网页爬虫被检测到了,请在网页手动验证") + # # print(res.text) +if __name__ == '__main__': + temp() diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weather.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weather.py new file mode 100644 index 0000000..e657f78 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weather.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> weather +@IDE :PyCharm +@Author :sandmswift +@Date :2022-12-11 20:37 +@Desc +==================================================''' +import requests +from lxml import etree +import requests +import re +import time +from bs4 import BeautifulSoup +import pandas as pd +from wangModel.utils.mysqlConn import insert,query + +#url = 'http://lishi.tianqi.com/mianyang/201905.html' +headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', + 'Cookie':'lianjia_uuid=9d3277d3-58e4-440e-bade-5069cb5203a4; UM_distinctid=16ba37f7160390-05f17711c11c3e-454c0b2b-100200-16ba37f716618b; _smt_uid=5d176c66.5119839a; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2216ba37f7a942a6-0671dfdde0398a-454c0b2b-1049088-16ba37f7a95409%22%2C%22%24device_id%22%3A%2216ba37f7a942a6-0671dfdde0398a-454c0b2b-1049088-16ba37f7a95409%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; _ga=GA1.2.1772719071.1561816174; Hm_lvt_9152f8221cb6243a53c83b956842be8a=1561822858; _jzqa=1.2532744094467475000.1561816167.1561822858.1561870561.3; CNZZDATA1253477573=987273979-1561811144-%7C1561865554; CNZZDATA1254525948=879163647-1561815364-%7C1561869382; CNZZDATA1255633284=1986996647-1561812900-%7C1561866923; CNZZDATA1255604082=891570058-1561813905-%7C1561866148; _qzja=1.1577983579.1561816168942.1561822857520.1561870561449.1561870561449.1561870847908.0.0.0.7.3; select_city=110000; lianjia_ssid=4e1fa281-1ebf-e1c1-ac56-32b3ec83f7ca; srcid=eyJ0Ijoie1wiZGF0YVwiOlwiMzQ2MDU5ZTQ0OWY4N2RiOTE4NjQ5YmQ0ZGRlMDAyZmFhODZmNjI1ZDQyNWU0OGQ3MjE3Yzk5NzFiYTY4ODM4ZThiZDNhZjliNGU4ODM4M2M3ODZhNDNiNjM1NzMzNjQ4ODY3MWVhMWFmNzFjMDVmMDY4NWMyMTM3MjIxYjBmYzhkYWE1MzIyNzFlOGMyOWFiYmQwZjBjYjcyNmIwOWEwYTNlMTY2MDI1NjkyOTBkNjQ1ZDkwNGM5ZDhkYTIyODU0ZmQzZjhjODhlNGQ1NGRkZTA0ZTBlZDFiNmIxOTE2YmU1NTIxNzhhMGQ3Yzk0ZjQ4NDBlZWI0YjlhYzFiYmJlZjJlNDQ5MDdlNzcxMzAwMmM1ODBlZDJkNmIwZmY0NDAwYmQxNjNjZDlhNmJkNDk3NGMzOTQxNTdkYjZlMjJkYjAxYjIzNjdmYzhiNzMxZDA1MGJlNjBmNzQxMTZjNDIzNFwiLFwia2V5X2lkXCI6XCIxXCIsXCJzaWduXCI6XCIzMGJlNDJiN1wifSIsInIiOiJodHRwczovL2JqLmxpYW5qaWEuY29tL3p1ZmFuZy9yY28zMS8iLCJvcyI6IndlYiIsInYiOiIwLjEifQ==' + } + +def set_link(year): + #year参数为需要爬取数据的年份 + link = [] + for i in range(1,13): + #一年有12个月份 + if i < 10: + url='http://lishi.tianqi.com/guilin/{}0{}.html'.format(year,i) + print(url) + else: + url='http://lishi.tianqi.com/guilin/{}{}.html'.format(year,i) + print(url) + link.append(url) + return link + +def get_page(url,headers): + html = requests.get(url,headers=headers) + if html.status_code == 200: + html.encoding = html.apparent_encoding + print(html.text) + return html.text + else: + return None + +date_box = [] +max_temp = [] +min_temp = [] +weh = [] +wind = [] +week_box = [] + +def get_data(): + link = set_link(2022) + for url in link: + + html = get_page(url,headers) + bs = BeautifulSoup(html,'html.parser') + + data = bs.find_all(class_="thrui") + date = re.compile('class="th200">(.*?)</') + print(data) + tem = re.compile('class="th140">(.*?)</') + + time = re.findall(date,str(data)) + for item in time: + week = item[10:] + week_box.append(week) + date_box.append(item[:10]) + temp = re.findall(tem, str(data)) + for i in range(len(time)): + #之前因为自身需要的只是19年6月的天气信息,没有考虑到每个月的天数不一样,现在修改后就没有问题了 + max_temp.append(temp[i*4+0]) + min_temp.append(temp[i*4+1]) + weh.append(temp[i*4+2]) + wind.append(temp[i*4+3]) +get_data() +datas = pd.DataFrame({'日期':date_box,'星期':week_box,'最高温度':max_temp,'最低温度':min_temp,'天气':weh,'风向':wind}) +for i in range(0,len(datas)): + w_time=datas.loc[i]['日期'] + w_week=datas.loc[i]['星期'] + max_tem=datas.loc[i]['最高温度'] + max_tem=re.search("\d+",max_tem).group() + min_tem=datas.loc[i]['最低温度'] + min_tem=re.search("\d+",min_tem).group() + statu=datas.loc[i]['天气'] + wind=datas.loc[i]['风向'] + insert("insert into weather(w_time,w_week,max_tem,min_tem,statu,wind) select %s,%s,%s,%s,%s,%s from dual where not exists(select w_time from weather where w_time=%s) ",(w_time,w_week,max_tem,min_tem,statu,wind,w_time)) + +print(datas) + + + + diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weibosign.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weibosign.py new file mode 100644 index 0000000..3d095cd --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weibosign.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> weibosign +@IDE :PyCharm +@Author :sandmswift +@Date :2022-11-23 17:56 +@Desc +==================================================''' +import random +import time +from time import mktime +import datetime +import requests +from wangModel.utils.proxys import ips +import requests +from bs4 import BeautifulSoup +import json +import re +import time +import sqlite3, pandas +import random +import traceback +import threading +from snownlp import SnowNLP +from wangModel.utils.mysqlConn import insert, query, getRows +from snownlp import SnowNLP + + +class WeiboSignSpider(): + header = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.56", + "MWeibo-Pwa": "1", + "X-XSRF-TOKEN": "02df4d", + "X-Requested-With": "XMLHttpRequest", + "Cookie": "SUB=_2A25OhDMBDeRhGeBP7lMU-SbKyT6IHXVth11JrDV6PUJbktANLRWkkW1NRVCnaEGA4AX519XF_MMcAtaGMSsUMm8O; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9W5zvTRN3-zLYaqhYgDN-85m5NHD95QceK-pSK.RSozEWs4Dqc_zi--Xi-zRiKy2i--NiKnRi-zpi--Ri-8si-zXi--Ri-8siKL2i--NiKLWiKnXi--4iK.Ri-z0i--fiK.0i-2fi--fiK.0i-2f; SSOLoginState=1669350225; _T_WM=83963246267; WEIBOCN_FROM=1110006030; MLOGIN=1; __bid_n=184ad2413b313dd7364207; FPTOKEN=30$YVRFaVAuo0Hb+ZCeGddk5p5th37hiAH3OD/a7GIZ/EifG6bPi/j090zR3KK9++fg6peU2CIuZsSJWb/gQj1NoDsjbDRvlOefETnNuv4Zx11df54uM5cp7GO2lRldfLaA/H0Y1zFlg/Et1NrarB+/IC8nPG9aAU2D70bJMXbH2aik3ZAMz6ybL1NhYR6i9lr5t0C1gGbRj585QemHLaRPPW+34QZApuuOhdJhI5rUu0OeCbHkoapziul6hHk+JUco1CFHGxiBnJPluvUa+VmnTGOUBxaur7ndbiECeY9AZOyh/cY2gfnBjO37BqXekHdwimFEqIpYaTUFDpMmOCS/DnRhY6nfcZ4xLtQclnzUHMZiywGLlV0rmzQujNPb6EgK|5ByMysyxg5uNsuQAFYoC08fks57jzZCDUASGWGvQH9U=|10|60e86591f8b07231e25ee3e8ee7a1014; XSRF-TOKEN=02df4d; mweibo_short_token=cf30a892e2; BAIDU_SSP_lcr=https://cn.bing.com/; M_WEIBOCN_PARAMS=oid%3D4839655495435847%26luicode%3D20000061%26lfid%3D4839655495435847%26uicode%3D20000061%26fid%3D4839655495435847" + } + + lasterTime = "" + flag=0 + + # 爬虫基本功能部分,返回网页的一个json + def get_tweets(self, URL, page, ippool): + url = URL.format(str(page)) + while True: + try: + proxy_ip = "http://" + random.choice(ips) + time.sleep(3) + res = requests.get(url, headers=self.header) + res.encoding = 'utf-8' + soup = BeautifulSoup(res.text, 'html.parser') + jd = json.loads(res.text) + # print(jd) + + except: + print('代理有问题呀,换个ip试试') + continue + + if (jd['ok'] == 0) and ("这里还没有内容" in str(jd)): + print(jd) + return 0 + + if jd['ok'] == 0: + print('获取地点的页面失败啊,换个ip试试') + else: + break + + # 第一页的结果会有点不一样 + if page == 1: + if 'card_id' in jd['data']['cards'][0]: + if jd['data']['cards'][0]['card_id'] == 'card_hq_poiweibo': + tweets = jd['data']['cards'][0]['card_group'] + return tweets + else: + tweets = jd['data']['cards'][1]['card_group'] + return tweets + else: + card_id=jd['data']['cards'][0]['card_id'] + if(card_id!="hot_search"): + tweets = jd['data']['cards'][1]['card_group'] + else: + tweets = jd['data']['cards'][0]['card_group'] + + # print(tweets) + return tweets + + def writedb(self, items, page): + # 遍历每条微博 + if items: + print("评论长度",len(items)) + for i in range(len(items)): + print("内容",items[i]) + # 整理微博表的数据 + temp = [0 for i in range(13)] # 初始化一行,一共有11列 + # print(temp) + if 'mblog' in items[i]: + temp[0] = items[i]['mblog']['id'] + if "id" in items[i]['mblog'] and temp[0] is not None: + temp[1] = current_time + temp[2] = items[i]['mblog']['created_at'] + temp[3] = items[i]['mblog']['user']['id'] + temp[4] = items[i]['mblog']['source'] + temp[5] = re.sub("[A-Za-z0-9\!\%\[\]\,\。\<\-\=\"\:\/\.\?\&\_\>\'\;\ ]", "", items[i]['mblog']['text']) + s2 = SnowNLP(temp[5]) + # print(temp[5], s2.sentiments) + temp[6] = items[i]['mblog']['reposts_count'] + temp[7] = items[i]['mblog']['comments_count'] + temp[8] = items[i]['mblog']['attitudes_count'] + temp[9] = items[i]['mblog']['pending_approval_count'] + temp[10] = place + temp[11] = scenicId + + # 删掉来源里面那些乱七八糟的字符 + temp[4] = temp[4].replace("'", "") + temp[4] = temp[4].replace('"', '') + temp[5] = str(temp[5]).replace("#", "").replace("🌸", "") + # print("品论内容",type(temp[5])) + + s = time.strptime(temp[2], '%a %b %d %H:%M:%S +0800 %Y') + remarkTime = str(s.tm_year) + "-" + str(s.tm_mon) + "-" + str(s.tm_mday) + " " + str( + s.tm_hour) + ":" + str( + s.tm_min) + ":" + str(s.tm_sec) + remarkTime = time.strptime(remarkTime, '%Y-%m-%d %H:%M:%S') + remarkTime=datetime.datetime(*remarkTime[:6]) + # print("该景区最新评论时间是",remarkTime) + + flag = 0 + args = (temp[11], temp[10], temp[3], temp[4], str(temp[5]), temp[6], temp[7], temp[8], remarkTime, + datetime.date.today()) + if self.lasterTime is None: + # 写入数据库 + sql = "insert into weibosign(scenicId,scenicName,user_id,sourcefrom,content,reports_count,comments_count,attitudes_count,sign_time,crawlTime) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);" + insert(sql, args) + print("插入数据",temp) + print('Page', page, ' %s 这条微博写进微博表啦' % temp[0]) + flag = 1 + self.flag=flag + # return flag + elif self.lasterTime < remarkTime: + # 写入数据库 + sql = "insert into weibosign(scenicId,scenicName,user_id,sourcefrom,content,reports_count,comments_count,attitudes_count,sign_time,crawlTime) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);" + insert(sql, args) + print('Page', page, ' %s 这条微博写进微博表啦' % temp[0]) + flag = 1 + self.flag = flag + else: + flag=0 + self.flag = flag + + + else: + pass + + # 爬取指定景点的微博数据 + def main(self, row, ippool): + + global conn, cur, place, pid, scenicId + scenic = getRows("select name,wb_scenicId,id from scenics where wb_scenicId!=''", None) + # 读取资料文档 + # place = scenic[row][0] + # pid = scenic[row][1] + # scenicId = scenic[row][2] + place = scenic[row+1][0] + pid = scenic[row+1][1] + scenicId = scenic[row+1][2] + print("景点名称:%s,景点id:%s,景点网站id:%s" % (place, scenicId, pid)) + + # 判断微博第一条是否已经爬过 + selectHasTimeSql = "select sign_time from weibosign where scenicId=%s order by sign_time desc" + databaseComment = getRows(selectHasTimeSql, scenicId) + print("查询数据库景点评论数据时间列表", databaseComment) + # 获取上一次爬取的最新评论时间 + lasterDate = None + if databaseComment: + # 格式转化 + lasterDate = databaseComment[0][0] + print("最新时间是", lasterDate) + + self.lasterTime = lasterDate + + print('******************开始爬%s的微博了*******************************' % place) + try: + time_start = time.time() + + # 爬150页微博 + # page = 1 + for page in range(1, 150): + # 微博位置URL + URL = 'https://m.weibo.cn/api/container/getIndex?containerid=' + pid + f'&luicode=10000011&lfid=100103type%3D1%26q%3D%E6%AD%A6%E6%B1%89%E5%A4%A7%E5%AD%A6&page={page}' + print('开始爬', place, '第', page, '页') + + # 获取当前时间 + global current_time + current_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) + # 获取一个页面的所有内容,json格式 + tweets = self.get_tweets(URL, page, ippool) + + # 判断是不是到底了 + if "周边值得去" in str(tweets): + print('爬到底了!') + break + + if tweets == 0: + print('已经到第', page, '页了,没有内容了') + break + + self.writedb(tweets,page) + flag = self.flag + print("爬取结果的标志", flag) + if flag==0: + print("该景点最新的数据已经存完啦") + break + else: + print(place, ' 第', page, '页爬完了!') + page += 1 + continue + + time_end = time.time() + print(place, ' time cost ', time_end - time_start, 's') + + print('******************%s的微博爬完了*******************************' % place) + + + except: + e = traceback.format_exc() + # 要是报错了,就发邮件然后退出 + print(e) + + print(place, '爬完了!等待下一次') + + def run(self): + rows = getRows("select count(*) from scenics where wb_scenicId!=''", None) + n = rows[0][0] + for i in range(n): + self.main(i, ips) +# if __name__ == '__main__': +# web = WeiboSignSpider() +# web.run() diff --git a/applications/common/scrapySpiders/wangModel/wangModel/files/city.txt b/applications/common/scrapySpiders/wangModel/wangModel/files/city.txt new file mode 100644 index 0000000..ce5f43c --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/files/city.txt @@ -0,0 +1,2901 @@ +1096,珠海 +1095,中山 +1824,廉江 +1110,玉林 +1126,海口 +1078,佛山 +1106,柳州 +1082,江门 +1807,顺德区 +1928,罗定 +1871,兴宁 +2174,兴义 +2099,北流 +1859,化州 +2095,岑溪 +1077,东莞 +1080,河源 +3201,遂川县 +2100,博白县 +1977,斗门区 +1084,茂名 +1107,南宁 +2102,容县 +1087,汕头 +3224,修水县 +1092,云浮 +1830,紫金县 +1097,百色 +1202,郴州 +1099,崇左 +1237,抚州 +2101,陆川县 +1085,梅州 +4415,蒙自 +1955,三乡镇 +1870,五华县 +1119,遵义 +2934,道县 +1857,电白区 +1113,贵阳 +1103,河池 +1081,惠州 +2141,剑河县 +1876,连州 +1130,琼海 +1132,三亚 +1089,韶关 +3270,万载县 +1937,徐闻县 +5244,大沥镇 +1864,大埔县 +4030,大竹县 +2001,扶绥县 +1101,桂林 +1105,来宾 +1334,乐山 +4482,马关县 +1338,南充 +1900,南雄 +2939,宁远县 +2076,上林县 +1845,台山 +1192,天门 +1207,湘潭 +1954,小榄镇 +2942,新田县 +4126,营山县 +2863,永兴县 +1093,湛江 +1165,驻马店 +1111,安顺 +2834,安乡县 +1150,安阳 +3177,安远县 +2052,八步区 +2726,巴东县 +1098,北海 +2071,宾阳县 +2152,岑巩县 +5449,春湾镇 +2547,郸城县 +2139,丹寨县 +4544,东阳 +2157,独山县 +1100,防城港 +3264,丰城 +38433,拱北 +1332,广安 +1102,贵港 +2549,淮阳县 +1185,黄石 +71510,华阳镇 +2921,花垣县 +2160,惠水县 +2883,会同县 +1239,吉安 +2922,吉首 +4031,开江县 +2143,凯里 +2161,荔波县 +1128,临高县 +2086,灵山县 +1129,陵水 +2015,荔浦 +1114,六盘水 +2841,澧县 +1986,隆林县 +1895,陆丰 +2063,鹿寨县 +6996,麻陂镇 +1242,南昌 +2044,南丹县 +3173,南丰县 +1156,南阳 +2804,南漳县 +1339,内江 +3186,宁都县 +1582,平潭县 +1869,平远县 +2170,普安县 +2087,浦北县 +1167,濮阳 +4655,黔江区 +2742,蕲春县 +1108,钦州 +32211,琼中县 +4032,渠县 +22290,容桂 +2465,社旗县 +2763,松滋 +1935,遂溪县 +1191,随州 +3202,泰和县 +2553,太康县 +55954,太平镇 +1965,坦洲镇 +1989,田东县 +1991,田阳县 +56404,亭角村 +2172,望谟县 +4065,武胜县 +4634,巫溪县 +2056,武宣县 +1196,襄阳 +2057,象州县 +1861,信宜 +1245,新余 +2743,浠水县 +2468,西峡县 +2733,宣恩县 +1342,雅安 +1922,阳春 +1925,阳西县 +2184,沿河县 +1246,宜春 +3272,宜丰县 +8005,英桥镇 +2185,印江县 +4638,酉阳县 +2890,沅陵县 +3194,于都县 +3254,余干县 +41195,张黄镇 +2175,贞丰县 +1400,重庆 +5663,阿猛镇 +70677,安场镇 +3196,安福县 +62684,安福乡 +5326,安海镇 +2926,安化县 +5812,安江镇 +1315,安康 +5947,安流镇 +2168,安龙县 +2811,安陆 +119929,安平村 +2300,安平县 +5880,安平镇 +53959,安平镇 +83359,安铺村 +5966,安铺镇 +1035,安庆 +80025,安仁村 +2855,安仁县 +81277,安山村 +10351,安山镇 +80003,安乡 +3229,安义县 +6007,安远镇 +4166,安岳县 +1402,中国澳门 +10377,敖溪镇 +21426,八宝镇 +6109,八尺镇 +55677,坝固镇 +6232,把荷乡 +46889,柏布 +6258,白仓镇 +22513,白地市镇 +152616,白濠村 +39726,白河村 +3851,白河县 +113527,百候镇 +5281,白花镇 +6312,白蕉镇 +6347,白螺镇 +89292,白马村 +6361,白芒营镇 +84398,白马镇 +127679,白马镇 +88217,白庙 +101941,白庙村 +1120,白沙 +113723,白沙村 +95583,白沙村 +86354,白沙井村 +39829,白沙镇 +91702,白沙镇 +38940,白沙镇 +32970,白沙镇 +5471,白沙镇 +113785,白沙镇 +6394,白砂镇 +51180,白沙镇 +57718,白沙镇 +71882,白沙镇 +73301,白沙镇 +80920,白沙镇 +74187,白石铺镇 +46904,白石镇 +3901,白水县 +56726,白水镇 +92490,白塔镇 +6445,白圩镇 +1817,白云区 +6596,百丈乡 +6162,八甲镇 +32195,巴铃镇 +2036,巴马县 +72365,巴马镇 +90362,板坝村 +1036,蚌埠 +6788,邦溪镇 +88938,板桥镇 +88396,板桥镇 +5294,板桥镇 +56410,板头村 +1915,宝安区 +2474,宝丰县 +36800,宝丰镇 +1316,宝鸡 +23724,保家村 +76976,保家镇 +2918,保靖县 +2801,保康县 +1373,保山 +6912,鲍峡镇 +6822,包信镇 +6859,宝圩乡 +6860,宝圩镇 +56006,宝圩镇 +3149,宝应县 +5425,八所镇 +6214,八塘镇 +165092,八一村 +94650,八一 +97340,八一农场 +158253,八一乡 +1327,巴中 +84836,北岸村 +4650,北碚区 +50984,北大 +5475,北惯镇 +5243,北滘镇 +11264,北界镇 +11936,北界镇 +11958,北景乡 +35043,北栅 +5435,北盛镇 +43715,北市镇 +18513,贝水村 +7135,北陀镇 +62199,扁牙村 +1112,毕节 +5490,丙村镇 +3137,滨海县 +60270,滨海镇 +3926,彬县 +4643,璧山区 +7396,伯劳镇 +1834,博罗县 +7419,博美镇 +6630,柏埔镇 +6633,柏塘镇 +72956,播植镇 +1051,亳州 +95285,播州区 +5413,步云桥镇 +2788,蔡甸区 +4593,苍南县 +122791,苍山村 +97020,苍山村 +118150,苍山镇 +2091,苍梧县 +4068,苍溪县 +100952,草海村 +124001,草海镇 +100430,草塘镇 +3599,曹县 +21003,草寨村 +2169,册亨县 +7789,茶庵铺镇 +7782,茶安铺镇 +87857,汊涧镇 +2961,茶陵县 +7824,茶林乡 +1778,长安镇 +8688,长布镇 +1200,常德 +2527,长葛 +3213,昌江区 +158241,长坎村 +1575,长乐 +87202,长乐镇 +101301,长隆 +29095,长隆村 +7931,昌明镇 +2867,常宁 +4155,长宁县 +29115,长宁镇 +1780,常平镇 +89181,长坡镇 +92549,长坡镇 +73029,菖蒲乡 +91805,长铺子乡 +89263,长庆桥 +81761,长庆桥镇 +1201,长沙 +4611,常山县 +4656,长寿区 +46990,长寿镇 +62993,长寿镇 +3086,常熟 +2155,长顺县 +98631,长塘村 +163180,长塘村 +91318,长塘镇 +12524,长塘镇 +1590,长汀县 +3927,长武县 +4529,长兴县 +71666,长阳坡村 +8735,长阳铺镇 +2819,长阳县 +2502,长垣县 +1304,长治 +1224,常州 +1768,潮安区 +1037,巢湖 +1892,潮南区 +1884,潮阳区 +1076,潮州 +75124,茶山村 +118128,茶山镇 +1793,茶山镇 +7833,茶阳镇 +14525,车河镇 +8079,陈场镇 +5253,陈村镇 +5315,陈店镇 +157627,城北村 +2899,城步县 +92375,城东 +162867,城东镇 +84829,城东镇 +1328,成都 +3875,城固县 +1885,澄海区 +48068,城皇村 +71088,城隍镇 +8264,城月镇 +48033,陈家 +37699,陈家村 +153961,陈江村 +8171,陈江镇 +2880,辰溪县 +8067,车田镇 +2794,赤壁 +73040,赤光镇 +1939,赤坎区 +48785,赤坎镇 +56747,赤眉镇 +46535,赤水 +2189,赤水 +5402,赤水镇 +72605,赤水镇 +15238,池尾镇 +1038,池州 +87855,冲花村 +8347,冲蒌镇 +3165,崇仁县 +67366,崇仁镇 +2795,崇阳县 +3178,崇义县 +4008,崇州 +9156,船步镇 +9162,船塘镇 +4514,淳安县 +97498,楚雄 +1374,楚雄州 +1039,滁州 +79294,慈化村 +87578,慈化镇 +2956,慈利县 +4564,慈溪 +1823,从化区 +113528,从化庄村 +2138,从江县 +98351,翠亨村 +2972,大安市 +6597,大安镇 +151979,大坳村 +16110,大坳村 +16118,大坝镇 +5328,达埔镇 +64244,达川区 +89331,大垌镇 +2114,大方县 +16437,大付 +115554,大付村 +74283,大福镇 +16050,达濠 +2037,大化县 +71173,大湖镇 +9736,待补镇 +89665,大井镇 +88151,大岚村 +1781,大朗镇 +35101,大理 +1251,大连 +128076,大良口村 +44573,大良镇 +1375,大理州 +98317,大沥村 +1779,大岭山镇 +3903,大荔县 +70660,大龙村 +91669,大路村 +68800,大伦镇 +33621,大庙村 +99058,大庙镇 +10362,旦场镇 +3887,丹凤县 +95355,丹凤镇 +1525,砀山县 +2820,当阳 +58664,大宁 +2766,丹江口 +4104,丹棱县 +39858,淡水 +4721,淡水区 +3157,丹阳 +5246,丹灶镇 +1137,儋州 +5477,道滘镇 +81736,道口镇 +2190,道真县 +70592,大坪 +101229,大坪乡 +80864,大坪乡 +91666,大坪镇 +64055,大坡外镇 +16550,大坡镇 +112419,大桥村 +75577,大桥村 +88613,大桥镇 +11327,大桥镇 +88577,大桥镇 +55386,大桥镇 +56959,大仁庙村 +86869,大沙镇 +88614,大沙镇 +5323,大沙镇 +46940,大盛镇 +3384,大石桥 +32873,大石镇 +16399,大寺镇 +60576,大塘村 +86418,大塘镇 +1638,大田县 +1305,大同 +35128,大通湖区 +82909,大旺镇 +153072,大旺镇 +95027,大湾镇 +98594,大围 +2812,大悟县 +2000,大新县 +57398,大新镇 +5187,大溪镇 +9672,大瑶镇 +164610,大亚湾村 +2745,大冶 +4140,大英县 +112888,大涌村 +5234,大涌镇 +3179,大余县 +46769,大镇 +73122,大镇镇 +9712,大直镇 +1329,达州 +154825,大竹村 +89516,大竹镇 +4620,大足区 +3215,德安县 +1982,德保县 +4087,德昌县 +114691,德城 +1626,德化县 +2177,德江县 +2534,登封 +10550,邓家铺镇 +3350,灯塔 +94541,灯塔村 +87239,灯塔镇 +169775,登云 +150557,登云镇 +2460,邓州 +1943,德庆县 +10482,德胜镇 +3247,德兴 +1330,德阳 +83487,电城镇 +4621,垫江县 +9911,底庙镇 +1124,定安县 +82281,丁河镇 +1951,鼎湖区 +3180,定南县 +74988,底蓬镇 +2935,东安县 +56177,东陂镇 +87196,东城 +116819,东城 +4437,东川区 +1125,东方 +95933,东方 +1971,东凤镇 +3312,东港 +88184,东港镇 +3057,东海县 +1798,东坑镇 +2900,洞口县 +92456,洞口乡 +2038,东兰县 +69724,洞利 +46822,东平村 +3674,东平县 +56001,东平镇 +5308,东平镇 +55643,东平镇 +10085,东圃镇 +113085,东区 +1660,东山县 +70132,东胜村 +3433,东胜区 +5478,东升镇 +88105,东水镇 +3139,东台 +3166,东乡区 +10928,东乡镇 +16866,东溪村 +2007,东兴 +82927,东兴镇 +84803,东溪镇 +1289,东营 +112885,东涌村 +158252,东涌镇 +1825,东源县 +1453,东至县 +9897,斗山镇 +45616,都安乡 +2039,都安县 +3216,都昌县 +48208,都川镇 +5005,都斛镇 +9947,对江镇 +9877,都结乡 +162994,渡口村 +60838,独山镇 +9974,都阳镇 +2156,都匀 +4076,峨眉山 +1841,恩平市 +2727,恩施 +1183,恩施州 +74529,二郎镇 +88435,二塘镇 +5436,二塘镇 +1182,鄂州 +10119,发轮镇 +158254,法那村 +2011,防城区 +2461,方城县 +21087,芳村 +1567,房山区 +2767,房县 +1819,番禺区 +9828,飞仙镇 +92211,凤村镇 +4622,丰都县 +2191,凤冈县 +1792,凤岗镇 +126736,凤岗镇 +87907,凤凰 +2919,凤凰县 +84438,凤凰乡 +4565,奉化区 +4623,奉节县 +1944,封开县 +61058,峰口镇 +4446,凤庆县 +2503,封丘县 +56562,凤山村 +2040,凤山县 +100443,凤山镇 +56989,凤山镇 +1865,丰顺县 +5460,枫亭镇 +3864,凤翔区 +3265,奉新县 +71201,丰阳镇 +11434,丰阳镇 +72931,分界镇 +121658,分水坳镇 +5575,汾水道 +86548,分水镇 +39995,分水镇 +39830,分水镇 +3260,分宜县 +58704,佛冈村 +1873,佛冈县 +1609,福安 +2048,富川县 +9978,福德镇 +1610,福鼎 +45079,福鼎村 +3865,扶风县 +88567,佛岗村 +2548,扶沟县 +31471,扶合镇 +46641,伏虎镇 +30899,富家村 +78117,富家镇 +4657,涪陵区 +10078,富罗镇 +1469,阜南县 +3140,阜宁县 +4479,富宁县 +3904,富平县 +1576,福清 +2158,福泉 +84885,芙蓉镇 +5197,芙蓉镇 +167272,福山村 +3710,福山区 +170590,福山镇 +11694,福山镇 +1974,阜沙镇 +4171,富顺县 +38419,富顺镇 +17282,富田镇 +41501,扶新镇 +1040,阜阳 +5782,富驿镇 +4458,富源县 +1053,福州 +88369,岗背镇 +31096,港口村 +92538,港口镇 +46869,港口镇 +5934,岗美镇 +5874,赶水镇 +3181,赣县区 +3058,赣榆区 +1238,赣州 +3266,高安 +79020,高安村 +52807,高陂镇 +69601,高陂镇 +1788,高埗镇 +23655,高峰镇 +6028,高家堰镇 +6057,高良镇 +1809,高明区 +74340,高明乡 +89238,高坪 +4129,高坪区 +84873,高坪乡 +95601,高坪镇 +11779,高平镇 +154180,高坪镇 +89195,高坡 +100109,高坡村 +6083,高坡镇 +35326,高埔镇 +46631,高桥 +102718,高桥镇 +100003,高桥镇 +56439,高桥镇 +86519,高沙 +162990,高沙村 +5438,高沙镇 +1761,高台县 +4156,高县 +1945,高要区 +3150,高邮 +1858,高州 +4408,个旧 +56256,公安村 +74631,公安县 +79668,公安镇 +162103,蚣坝镇 +2012,恭城县 +5506,恭城镇 +86846,公馆村 +85470,公馆村 +5265,公馆镇 +12053,公会镇 +43701,巩桥 +70392,巩桥村 +6253,共青城 +4163,珙县 +88179,公庄镇 +6274,构林镇 +16787,关埠镇 +1773,莞城 +45604,官渡镇 +42595,官渡镇 +4067,广安区 +3168,广昌县 +3248,广丰区 +6583,广福乡 +70692,广福镇 +91134,广福镇 +56659,广福镇 +5004,广海镇 +12853,广华 +71131,广华岭 +47757,厂窖镇 +4480,广南县 +1946,广宁县 +2516,光山县 +2776,广水 +1333,广元 +44558,广园新村 +1598,光泽县 +1079,广州 +38172,观澜镇 +2107,关岭县 +91681,官桥镇 +13032,官桥镇 +42617,官桥镇 +5442,官桥镇 +5420,冠市镇 +98999,关王镇 +6518,官圩镇 +2013,灌阳县 +6566,灌涨镇 +6525,官庄乡 +103872,官庄乡 +69472,官庄乡 +7683,官庄镇 +27265,官庄子村 +6474,观珠镇 +2802,谷城县 +99927,古城镇 +81408,归朝村 +6620,归朝镇 +31406,桂城 +2159,贵定县 +2857,桂东县 +2030,桂平 +6639,贵台镇 +6647,桂头镇 +3275,贵溪 +2858,桂阳县 +6642,贵子镇 +4178,古蔺县 +60496,谷陇镇 +81823,郭集镇 +6742,果遂乡 +5313,谷饶镇 +2515,固始县 +5397,古水镇 +12391,古宋镇 +6330,古潭乡 +1611,古田县 +1274,固原 +2920,古丈县 +1953,古镇镇 +6346,古竹镇 +3078,海安 +54008,海安镇 +1894,海丰县 +3079,海门 +70590,海门镇 +118157,海田村 +3538,海晏县 +1815,海珠区 +2813,汉川 +1142,邯郸 +6846,寒冻镇 +6820,浛洸镇 +1389,杭州 +2792,汉口 +1447,含山县 +2836,汉寿县 +95478,汉塘村 +3852,汉阴县 +4147,汉源县 +1317,汉中 +91837,蒿板镇 +6915,好义镇 +6925,郝寨镇 +64326,黄坭乡 +1151,鹤壁 +4624,合川区 +1041,合肥 +2728,鹤峰县 +46957,荷花镇 +98491,合江村 +56719,合江村 +4179,合江县 +99849,合江镇 +105657,合江镇 +55887,合江镇 +6932,禾加镇 +7111,贺街镇 +4409,河口县 +62197,河口镇 +1010,河南 +95168,河南岸镇 +7202,横板桥镇 +80946,横陂镇 +2868,衡东县 +1968,横栏镇 +1802,横沥镇 +2869,衡南县 +7228,横琴镇 +2870,衡山县 +2072,横县 +1203,衡阳 +112095,和平村 +112145,和平村 +162996,和平村 +1826,和平县 +44115,和平乡 +5316,和平镇 +48621,河婆镇 +91610,河浦 +1995,合浦县 +1842,鹤山 +2053,合山 +48054,何市镇 +61652,何市镇 +154411,合水村 +1739,合水县 +53440,合水镇 +45754,合水镇 +5321,荷塘镇 +119734,荷塘镇 +1448,和县 +7102,荷香桥镇 +56623,河西 +1290,菏泽 +2115,赫章县 +1104,贺州 +2734,红安县 +63462,洪安镇 +7281,红果镇 +1378,红河州 +4410,红河县 +1199,洪湖 +2881,洪江 +7399,洪濑镇 +10203,宏路镇 +102939,洪桥村 +118713,洪桥村 +4193,红桥区 +35513,洪桥社区 +39327,红桥镇 +5107,洪桥镇 +42610,洪阳镇 +87516,红岩寺镇 +86885,红岩寺镇 +4105,洪雅县 +3048,洪泽区 +35596,猴场村 +57803,猴场镇 +117437,猴场镇 +80783,后湖村 +1777,厚街镇 +82737,鮜门镇 +68017,鲘门镇 +7544,厚坡镇 +5463,华城镇 +60759,华东村 +1820,花都区 +1225,淮安 +1042,淮北 +152609,怀德 +1204,怀化 +1947,怀集县 +7869,淮口镇 +1043,淮南 +1425,怀宁县 +75784,怀乡 +1439,怀远县 +38971,怀远镇 +71004,花桥镇 +2790,黄陂区 +48702,黄陂镇 +56392,黄陂镇 +5278,黄埠镇 +7952,黄布镇 +5430,黄材镇 +2523,潢川县 +87190,黄村 +1184,黄冈 +121069,黄阁镇 +7984,黄果树镇 +7995,黄槐镇 +1789,黄江镇 +47039,黄金埠镇 +8022,黄练镇 +85249,黄龙村 +42712,黄毛元镇 +2736,黄梅县 +2140,黄平县 +5469,黄坡镇 +1818,黄埔区 +1969,黄圃镇 +169664,黄桥 +170511,黄桥村 +74917,黄桥镇 +14072,黄桥镇 +156147,黄桥镇 +59915,黄塘镇 +87179,黄田镇 +55297,黄田镇 +4591,黄岩区 +2744,黄州区 +4494,华宁县 +2042,环江县 +91696,花桥村 +13919,花桥村 +158459,花桥村 +150417,花桥村 +45134,花桥乡 +84365,花桥镇 +52979,花桥镇 +102795,花桥镇 +100760,花桥镇 +5058,花桥镇 +63260,花桥镇 +2725,华容区 +2946,华容县 +91312,花坦乡 +2410,滑县 +72597,华阳 +4063,华蓥 +1627,惠安县 +3183,会昌县 +1838,惠城区 +1835,惠东县 +1850,惠来县 +163937,回龙村 +8268,回龙寺镇 +60703,回龙乡 +7687,回龙镇 +95900,回龙镇 +87680,回龙镇 +60651,回马镇 +1839,惠阳区 +4459,会泽县 +105908,胡集 +5017,胡集镇 +3217,湖口县 +1776,虎门镇 +8310,火厂坪镇 +8315,火连坡镇 +3775,霍州 +1390,湖州 +165462,加禾 +165040,加禾村 +2859,嘉禾县 +121859,加禾乡 +4077,夹江县 +4517,建德 +4157,江安县 +3151,江都区 +4069,剑阁县 +2936,江华县 +152362,蒋家坪村 +5415,蒋家桥镇 +4625,江津区 +150783,江口 +146055,江口村 +2178,江口县 +162995,江口乡 +111830,江口镇 +120990,江口镇 +46072,江口镇 +51168,江口镇 +45152,江口镇 +78575,江口镇 +1640,将乐县 +2761,江陵县 +84314,江门镇 +2081,江南区 +4612,江山 +53360,江信村 +112085,姜圩乡 +3109,姜堰区 +3116,江阴 +2937,江永县 +3141,建湖县 +2760,监利县 +1639,建宁县 +1600,建瓯 +93346,尖沙咀 +2729,建始县 +4411,建水县 +4082,犍为县 +155085,建兴乡 +74319,建兴镇 +32532,简阳 +123632,建阳村 +1599,建阳区 +145006,滘口村 +1153,焦作 +2480,郏县 +1391,嘉兴 +2796,嘉鱼县 +53284,甲子镇 +1851,揭东区 +10118,碣石镇 +1471,界首 +1852,揭西县 +1083,揭阳 +99001,鸡街镇 +99534,鸡街镇 +5280,吉隆镇 +89343,集美区 +1291,济南 +8775,金钗镇 +1306,晋城 +2047,金城江区 +10060,金鼎镇 +8813,金渡镇 +3267,靖安县 +1737,泾川县 +4988,泾川镇 +1240,景德镇 +4469,景东县 +3200,井冈山 +4488,景洪 +3110,靖江 +1186,荆门 +2754,京山市 +1983,靖西 +4078,井研县 +1187,荆州 +2884,靖州县 +9029,敬梓镇 +48057,靖海镇 +10006,金和镇 +1392,金华 +3050,金湖县 +1292,济宁 +1629,晋江 +40858,金江镇 +79215,金井镇 +5330,金井镇 +8865,金孔镇 +46002,金兰镇 +10004,金利镇 +164069,金牌村 +4412,金平县 +1888,金平区 +2142,锦屏县 +93532,金沙 +165284,金沙 +2116,金沙县 +49579,金沙镇 +2837,津市 +10003,金石桥镇 +4011,金堂县 +68855,金淘镇 +58560,金屋村 +83364,进贤村 +3230,进贤县 +91189,金溪村 +88338,金溪村 +2054,金秀县 +3169,金溪县 +73098,金溪镇 +4562,缙云县 +1510,金寨县 +112886,金洲村 +154822,金洲村 +31506,金洲镇 +8942,筋竹镇 +3199,吉水县 +56545,久长镇 +10108,九重镇 +157727,九丰村 +38329,九峰镇 +10095,九公桥镇 +9055,九和镇 +1241,九江 +5251,九江镇 +75482,九隆 +4662,九龙城区 +29675,九龙塘 +45954,九龙镇 +9075,九市镇 +40859,九所镇 +57001,九王庙村 +39439,九支镇 +100004,旧州村 +71372,旧州镇 +5551,旧州镇 +79556,旧州镇 +1152,济源市 +3208,吉州区 +4164,筠连县 +2418,浚县 +1154,开封 +1844,开平 +2124,开阳县 +4413,开远 +4626,开州 +48071,克度镇 +9354,葵潭镇 +1379,昆明 +3087,昆山 +1462,来安县 +2730,来凤县 +9380,拉烈镇 +38660,郎岱镇 +56944,郞岱镇 +41696,琅东村 +9476,榔坪镇 +48812,朗塘镇 +4127,阆中 +2436,兰考县 +9454,蓝口镇 +3644,兰陵县 +9432,兰陵镇 +91800,兰山镇 +9458,蓝塘镇 +1068,兰州 +51436,老城 +15740,老城村 +105809,老城乡 +88999,老城镇 +2803,老河口 +15802,老隆镇 +2938,蓝山县 +3915,蓝田县 +3170,乐安县 +1899,乐昌 +5252,乐从镇 +103866,乐东村 +4092,雷波县 +2144,雷山县 +2874,耒阳 +1934,雷州 +85934,勒流镇 +42040,乐民镇 +41206,乐民镇 +81306,水口村 +2893,冷水江 +2944,冷水滩区 +85274,冷水镇 +3212,乐平 +27434,乐平村 +69611,乐平乡 +4595,乐清 +1984,乐业县 +79191,乐业镇 +36802,乐至县 +95202,连城村 +1591,连城县 +48627,良垌镇 +22907,良坊镇 +4627,梁平区 +71089,良田镇 +87924,两英镇 +74814,莲花村 +3240,莲花县 +162818,莲花镇 +98436,莲花镇 +82891,连江村 +1578,连江县 +103699,连江镇 +154823,连界村 +48547,连界镇 +1874,连南县 +1828,连平县 +1875,连山县 +54013,莲塘镇 +13730,连滩镇 +2894,涟源 +1226,连云港 +116647,连州镇 +1797,寮步镇 +1294,聊城 +2731,利川 +3171,黎川县 +86842,烈面镇 +10058,黎埠镇 +78470,立化镇 +46943,里湖镇 +1380,丽江 +2966,醴陵 +43712,栗木镇 +1381,临沧 +55863,林尘镇 +1308,临汾 +2484,灵宝 +14079,岭背镇 +1521,灵璧县 +2017,灵川县 +54981,灵峰镇 +57940,酃湖乡 +2945,零陵区 +87204,岭门镇 +70428,灵山镇 +2016,临桂区 +1985,凌云县 +82583,凌云乡 +4583,临海 +2981,临江 +112087,临江镇 +88223,临江镇 +45467,临江镇 +2838,临澧县 +81259,伶俐镇 +1472,临泉县 +4064,邻水县 +13950,林头镇 +99923,林头镇 +2860,临武县 +2947,临湘 +1295,临沂 +2569,临颍县 +64192,林寨镇 +2411,林州市 +2145,黎平县 +3929,礼泉县 +122190,厉山镇 +76813,黎少镇 +56320,李市镇 +82965,犁市镇 +13608,李市镇 +1393,丽水 +152443,黎塘村 +5850,黎塘镇 +87188,里田乡 +48505,里田乡 +1045,六安 +2060,柳城县 +6400,六都寨镇 +45938,六合村 +3068,六合区 +75523,刘家场村 +7634,刘家场镇 +15390,六靖镇 +6426,六景镇 +35707,六麻镇 +51454,柳泉铺镇 +91242,流沙 +91243,流沙村 +98998,流沙镇 +55484,流沙镇 +5194,柳市镇 +2848,浏阳 +71247,六枝村 +2134,六枝特区 +1814,荔湾区 +151280,立新村 +115887,立新村 +165273,立新村 +112350,立新村 +122727,立新村 +69542,立新村 +1550,利辛县 +3042,溧阳 +46917,李寨 +60618,李寨镇 +2073,隆安县 +6487,龙布镇 +4131,隆昌 +1829,龙川县 +6496,龙村镇 +75277,龙港村 +1916,龙岗区 +146450,龙港市 +5191,龙港镇 +67011,龙港镇 +121181,龙光乡 +1662,龙海 +2901,隆回县 +1890,龙湖区 +35712,龙虎山镇 +88391,龙虎乡 +127995,龙湖镇 +110469,龙江 +51037,龙江村 +97285,龙江镇 +5248,龙江镇 +10243,隆江镇 +71363,龙见田村 +10244,隆街镇 +75507,龙结镇 +3703,龙口 +87384,龙口镇 +98340,龙口镇 +2162,龙里县 +43965,龙马村 +80393,龙门 +95132,龙门村 +121758,龙门村 +1837,龙门县 +60387,龙门镇 +169776,龙门镇 +68518,龙门镇 +74721,龙母镇 +89567,龙南 +85284,龙南村 +3184,龙南县 +62081,龙泉镇 +61002,龙泉镇 +155977,龙山村 +48189,龙山 +2923,龙山县 +54090,龙山镇 +2018,龙胜县 +10250,隆盛镇 +60104,隆盛镇 +5241,龙市镇 +164825,龙潭村 +165121,龙潭村 +86358,龙潭镇 +146143,龙潭镇 +95156,龙潭镇 +90519,龙潭镇 +5392,龙潭镇 +80266,龙潭镇 +39860,龙潭镇 +112082,龙头镇 +87906,龙头镇 +77166,龙湾镇 +64505,龙窝镇 +46852,龙溪 +145197,龙溪铺村 +74922,龙溪铺镇 +37014,龙溪乡 +52030,龙圩区 +1054,龙岩 +122261,龙眼村 +10239,龙镇 +89394,龙镇村 +2002,龙州县 +1205,娄底 +10434,禄步镇 +4414,绿春县 +35736,鲁沟村 +72832,鲁沟村 +5221,泸沽湖镇 +1896,陆河县 +6663,芦洪市镇 +1449,庐江县 +10683,锣场镇 +2043,罗城县 +90598,罗冲围村 +2163,罗甸县 +84588,罗店镇 +56017,罗浮镇 +98341,萝岗镇 +10593,罗岗镇 +1166,漯河 +71170,骆湖镇 +32770,罗家桥 +91833,罗家镇 +5446,罗镜镇 +6853,罗坎镇 +3888,洛南县 +4461,罗平县 +77143,罗平镇 +156542,罗平镇 +10693,骡坪镇 +98490,罗山村 +2518,罗山县 +2737,罗田县 +49711,洛香镇 +57236,罗秀镇 +98494,罗秀镇 +1155,洛阳 +55453,罗阳村 +10719,洛阳镇 +46949,洛阳镇 +80744,洛阳镇 +1579,罗源县 +56429,罗源镇 +4592,路桥区 +2475,鲁山县 +2485,卢氏县 +89217,陆屋村 +10394,陆屋镇 +4181,泸县 +4420,泸西县 +2925,泸溪县 +3241,芦溪县 +2550,鹿邑县 +1346,泸州 +1046,马鞍山 +5165,马鞍镇 +75589,马安镇 +70637,马坝镇 +6998,麻布岗镇 +165292,马场坪 +58913,马场坪村 +144693,马场平村 +46887,马场坪村 +165869,马场坪村 +60078,马场坪村 +2738,麻城 +168717,马达 +47582,马房镇 +7011,麻岗镇 +11001,马贵镇 +64782,卖酒镇 +2146,麻江县 +55697,麻江乡 +158172,马家湾 +96103,马家湾村 +67502,马街 +105366,马街村 +11051,马街乡 +35767,马街镇 +11008,马迹塘镇 +4481,麻栗坡县 +11424,芒部镇 +7291,茅草街镇 +7294,茅店镇 +163335,毛咀村 +48229,茂兰镇 +1862,茂南区 +89291,茅湾村 +84983,茂芝村 +59627,毛嘴镇 +156236,马平村 +11144,马坪镇 +75455,马坪镇 +5347,马坡镇 +143737,马山村 +35741,马山 +2074,马山县 +114527,马山乡 +57014,马山乡 +11187,马水镇 +39330,马踏镇 +56049,马踏镇 +7144,马田镇 +11224,马头镇 +41218,马头镇 +79948,码头镇 +10897,麻尾镇 +11237,马圩镇 +10902,麻溪铺镇 +2885,麻阳县 +56234,马牙 +1942,麻章区 +5440,梅城镇 +69432,梅城镇 +83485,梅川镇 +4093,美姑县 +95903,梅花村 +87580,梅花镇 +1872,梅江区 +22078,梅林村 +88408,梅林镇 +87851,梅陇镇 +1336,眉山 +82207,梅山镇 +5513,梅山镇 +105332,美台村 +159804,美台乡 +46951,梅塘镇 +2201,湄潭县 +1867,梅县区 +79206,梅仙镇 +68180,梅州村 +1551,蒙城县 +39501,濛江镇 +2092,蒙山县 +94900,蒙圩镇 +7481,棉湖镇 +3880,勉县 +1337,绵阳 +7485,棉洋镇 +7710,庙岔镇 +7457,米场镇 +4388,弥渡县 +4416,弥勒 +2952,汨罗 +37202,明港 +71262,明港镇 +5147,明港镇 +86857,茗山村 +4149,名山区 +78451,明溪村 +1641,明溪县 +1763,民乐县 +155605,民乐镇 +1581,闽清县 +2492,民权县 +5235,民众镇 +2558,泌阳县 +4473,墨江县 +91187,谟岭村 +57352,木格镇 +5424,那大镇 +7899,那霍镇 +5445,那良镇 +7904,那林镇 +46893,那龙镇 +7982,那马镇 +35832,那蒙镇 +1630,南安 +4649,南岸区 +91255,南安乡 +59467,南白镇 +4121,南部县 +1774,南城 +3172,南城县 +4628,南川区 +102452,南大村 +92397,南渡镇 +13183,南渡镇 +5373,南丰镇 +72405,南丰镇 +61877,南岗村 +164826,南岗村 +164705,南岗村 +1806,南海区 +4376,南华县 +8144,南江口镇 +4003,南江县 +47329,南江乡 +4389,南涧县 +1227,南京 +56490,南径镇 +62067,南经镇 +3185,南康区 +71945,南岭镇 +83420,南龙村 +13366,南龙村 +46942,南门镇 +1055,南平 +86944,南平镇 +87184,南桥镇 +1821,南沙区 +48363,南沙镇 +113847,南沙镇 +8353,楠市镇 +13065,南宝镇 +52006,南塘镇 +1228,南通 +2927,南县 +61885,南晓镇 +53436,南溪村 +39982,南溪 +89307,南兴镇 +4158,南溪区 +4533,南浔区 +48675,南阳油田 +35813,南油社区 +2463,南召县 +102653,南镇 +94639,南镇村 +5249,南庄镇 +1987,那坡县 +55503,那坡镇 +8004,那洒镇 +8006,那桐镇 +12984,那务镇 +4184,纳溪区 +2117,纳雍县 +2412,内黄县 +48037,内湖镇 +2464,内乡县 +1394,宁波 +1056,宁德 +4567,宁海县 +1642,宁化县 +2493,宁陵县 +2003,宁明县 +117587,宁潭镇 +1742,宁县 +2846,宁乡 +156239,牛山村 +165106,牛头村 +49784,牛头村 +146128,排坊 +1498,潘集区 +163336,潘龙村 +96679,蟠龙乡 +100262,蟠龙镇 +166544,盘县 +165646,盘县村 +1340,攀枝花 +2135,盘州 +98804,炮台 +125691,炮台镇 +4123,蓬安县 +5305,蓬壶镇 +5382,朋口镇 +4107,彭山区 +4629,彭水县 +4141,蓬溪县 +3220,彭泽县 +8602,彭寨镇 +4012,彭州 +56688,坪坝镇 +2108,平坝区 +4417,屏边县 +32437,屏边乡 +82188,平昌村 +4004,平昌县 +78749,平定村 +1157,平顶山 +3833,平定县 +42619,平定镇 +42788,平伐村 +1988,平果县 +1664,平和县 +4538,平湖 +2948,平江县 +82092,平江镇 +68613,平吉镇 +2019,平乐县 +1071,平凉 +8635,平陵镇 +3854,平利县 +3838,平陆县 +125633,平南 +1613,屏南县 +2032,平南县 +83119,屏南镇 +157728,平桥村 +92203,平桥村 +2337,平山县 +4159,屏山县 +92330,屏山乡 +45686,屏山乡 +78759,平山镇 +92250,平山镇 +60567,平山镇 +62845,平山镇 +91944,坪石村 +64255,坪石村 +10229,坪石乡 +2164,平塘县 +73738,平塘乡 +72169,平塘镇 +10216,平望镇 +1243,萍乡 +2004,凭祥 +69021,平阳村 +4596,平阳县 +6462,平阳镇 +32187,平远镇 +2559,平舆县 +10220,平政镇 +48161,平海镇 +1941,坡头区 +10510,坡头镇 +3246,鄱阳县 +1602,浦城县 +2109,普定县 +3979,浦东新区 +1384,普洱 +4013,蒲江县 +1854,普宁 +37051,普坪镇 +146039,埔前 +8706,埔前镇 +105674,蒲圻 +38411,蒲塘镇 +1057,莆田 +8734,普宜镇 +10624,黔城镇 +1115,黔东南州 +8851,千官镇 +1188,潜江 +8869,迁江镇 +1116,黔南州 +10612,钱排镇 +10519,千山红镇 +1426,潜山县 +10630,浅水镇 +10618,钱塘镇 +3930,乾县 +1117,黔西南州 +2118,黔西县 +10590,前詹镇 +1866,蕉岭县 +113447,蕉门村 +1791,桥头镇 +10710,桥圩镇 +8996,乔贤镇 +3081,启东 +2873,祁东县 +89290,祈福新村 +8742,七拱镇 +10384,漆河镇 +4644,綦江区 +49507,七江镇 +122198,七里湖村 +10349,七里湖乡 +84823,歧岭村 +91613,歧岭村 +149234,岐岭村 +84833,歧岭乡 +8814,岐岭镇 +91706,歧岭镇 +46939,麒麟镇 +1881,清城区 +4070,青川县 +1296,青岛 +46886,清华园 +5344,清湖镇 +1643,清流县 +2171,晴隆县 +91821,清平村 +10840,青平镇 +4108,青神县 +9592,青树坪镇 +57029,青塘 +70656,青塘 +95899,清塘镇 +40508,青塘镇 +4557,青田县 +10962,清湾镇 +48045,青溪村 +39844,清溪村 +1877,清新区 +2079,青秀区 +1783,清溪镇 +1072,庆阳 +1086,清远 +4558,庆元县 +47366,青云镇 +2125,清镇 +3691,青州 +2035,覃塘区 +2426,沁阳 +4016,邛崃 +144507,岐山村 +3870,岐山县 +10480,歧山乡 +4483,丘北县 +11071,球溪镇 +2439,杞县 +2419,淇县 +2940,祁阳县 +1464,全椒县 +3187,全南县 +48499,荃湾村 +4675,荃湾区 +1058,泉州 +2020,全州县 +2560,确山县 +3624,曲阜 +1910,曲江区 +48076,瞿家湾镇 +1383,曲靖 +9662,渠旧镇 +9663,渠黎镇 +1399,衢州 +1770,饶平县 +39943,仁和 +103875,仁和村 +93846,仁和乡 +2192,仁怀 +1902,仁化县 +9776,任市镇 +4109,仁寿县 +9764,仁寿镇 +1297,日照 +2064,融安县 +4630,荣昌区 +1855,榕城区 +48271,容城镇 +36940,冗渡镇 +2153,榕江县 +9788,溶江镇 +47594,容奇镇 +26449,榕山镇 +2065,融水县 +4172,荣县 +2861,汝城县 +3083,如皋 +4597,瑞安 +3846,芮城县 +3188,瑞金 +4400,瑞丽 +2561,汝南县 +2447,汝阳县 +1903,乳源县 +2477,汝州 +38561,沙梨乡 +91727,三都村 +2165,三都县 +103868,三都镇 +11457,三阁司乡 +2957,桑植县 +5977,散花镇 +111928,三惠县 +87545,三江村 +98303,三江村 +96919,三江村 +11493,三江县 +155976,三江镇 +5492,三江镇 +54138,三江镇 +46610,三江镇 +153058,三角 +103865,三角村 +11505,三角塘村 +43981,三角塘村 +1956,三角镇 +55244,三甲镇 +70083,三里镇 +11539,三里镇 +1158,三门峡 +4584,三门县 +1059,三明 +61441,三水村 +1808,三水区 +111925,三水乡 +2147,三穗县 +4116,三台县 +94875,三塘镇 +92257,三乡村 +3931,三原县 +79622,沙扒镇 +91265,沙步村 +163533,沙洞村 +57013,沙垌镇 +81893,沙河店镇 +73921,沙河口村 +5348,沙河镇 +5462,沙井 +55640,沙口镇 +22516,上壁村 +69345,上壁村 +2562,上蔡县 +2519,商城县 +96597,尚店村 +6281,上奉镇 +5495,上冈镇 +3268,上高县 +1325,上海 +1024,上海 +1593,上杭县 +35914,上磺镇 +3243,上栗县 +1318,商洛 +3891,商南县 +88284,上坪 +12386,上坪村 +70647,上坪村 +103867,上坪镇 +93426,上坪镇 +1159,商丘 +1244,上饶 +98525,上水 +91682,上水村 +2551,商水县 +2009,上思县 +6434,上映乡 +3189,上犹县 +4576,上虞区 +3894,商州区 +48052,珊瑚镇 +154361,山口村 +91678,山口镇 +5264,山口镇 +47830,山门镇 +1088,汕尾 +5346,山心镇 +2431,山阳区 +3889,山阳县 +2487,陕州区 +2902,邵东县 +6464,筲箕湾镇 +2912,韶山 +1603,邵武 +1395,绍兴 +1206,邵阳 +2904,邵阳县 +41188,沙坪镇 +57368,沙坪镇 +48752,沙坡镇 +48279,沙陂镇 +45637,沙市 +164968,沙市区 +5502,沙市镇 +11972,沙塘乡 +55638,沙塘镇 +4672,沙田区 +99658,沙田镇 +44673,沙田镇 +1801,沙田镇 +5259,沙头镇 +42564,沙头镇 +1645,沙县 +2755,沙洋县 +89209,沙子坪 +87990,沙子坪 +102723,沙子坪村 +87991,沙子镇 +5431,社港镇 +4142,射洪 +6502,畲江镇 +5357,盛泽镇 +1122,昌江县 +1127,乐东县 +16203,升钟镇 +4581,嵊州 +5324,深井镇 +2552,沈丘县 +6584,神泉镇 +1090,深圳 +6512,佘田桥镇 +3142,射阳县 +33609,石板村 +46945,石板镇 +96696,石宝镇 +48084,石坝镇 +2148,施秉县 +103870,石槽村 +96113,石槽村 +26727,石槽村 +117530,石曹镇 +3190,石城县 +36032,施洞镇 +88439,十二岭村 +5458,十方镇 +6662,师岗镇 +2877,石鼓区 +61876,石鼓镇 +5021,石花镇 +12930,石江镇 +90494,石角村 +73226,石角村 +71891,石角塘村 +61588,石角镇 +42193,石角镇 +1146,石家庄 +1785,石碣镇 +6821,石康镇 +48697,石坎镇 +6833,石陵镇 +118417,石岭镇 +4429,石林县 +81934,石林镇 +70602,十里铺村 +6726,十里铺乡 +7641,十里铺镇 +48199,石龙村 +1786,石龙镇 +56314,石龙镇 +63119,石龙镇 +95159,石门村 +4965,石门区 +2839,石门县 +4150,石棉县 +1796,石排镇 +39232,石盘镇 +2179,石阡县 +105651,市桥 +6604,石桥镇 +1958,石岐区 +85616,石泉村 +3855,石泉县 +6670,诗山镇 +1632,石狮 +2762,石首 +13135,石塘镇 +83260,石塘村 +35391,石塘镇 +71448,石塘镇 +68122,石塘镇 +42213,石潭镇 +55715,石潭镇 +91667,石窝 +91684,石窝镇 +1905,始兴县 +1190,十堰 +6907,石正镇 +4631,石柱县 +5136,石柱镇 +31648,狮子林 +154824,石子岭 +165122,十字路乡 +103055,十字乡 +38963,石子镇 +4464,师宗县 +44726,寿宁南阳 +112076,双东圩镇 +168438,双东镇 +2896,双峰县 +81483,双河村 +87197,双河镇 +47973,双河镇 +91688,双滘镇 +4014,双流区 +155673,双排村 +2941,双牌县 +147976,双牌镇 +10072,双旺镇 +47047,水边镇 +48715,水边镇 +2136,水城县 +10088,水东江镇 +40623,水东镇 +71163,水墩镇 +4505,水富县 +89207,水浸坪乡 +42738,水口镇 +5322,水口镇 +70337,水口镇 +7107,水鸣镇 +72165,水清镇 +89050,水头 +43595,水头镇 +5205,水头镇 +7131,水汶镇 +11279,水寨镇 +1604,顺昌县 +83056,顺天镇 +152613,树田 +3104,沭阳县 +68639,寺村镇 +38742,四公里村 +159212,思贺镇 +3105,泗洪县 +1948,四会 +87198,四角楼村 +7186,思林镇 +7341,泗纶镇 +5317,司马浦镇 +10181,四马桥镇 +56993,司门前镇 +2180,思南县 +7176,司前乡 +37130,司前镇 +54202,司前镇 +7305,寺山乡 +7282,四通镇 +1524,泗县 +3106,泗阳县 +168073,思州 +62966,宋河镇 +5434,松口镇 +62734,松涛村 +2181,松桃县 +44706,松旺镇 +7388,松烟镇 +88214,松源镇 +4506,绥江县 +1341,遂宁 +3132,睢宁县 +2905,绥宁县 +2563,遂平县 +2499,睢县 +2501,睢阳区 +2193,绥阳县 +87954,素龙 +164239,孙家镇 +64299,锁龙村 +1230,宿迁 +57062,苏区镇 +1427,宿松县 +7462,苏圩镇 +1229,苏州 +1047,宿州 +1298,泰安 +3089,太仓 +97122,台城村 +5002,台城镇 +3716,台儿庄区 +169200,太和村 +7599,太和堂镇 +1473,太和县 +94699,太和镇 +46323,太和镇 +1428,太湖县 +79079,太湖镇 +1585,台江区 +2149,台江县 +7652,泰美镇 +1646,泰宁县 +45949,太平 +48529,太平村 +112098,太平村 +95131,太平村 +36092,太平古镇 +95274,太平镇 +5302,太平镇 +55455,太平镇 +3679,泰山 +3111,泰兴 +1311,太原 +1231,泰州 +1396,台州 +57394,太子 +7644,太子庙镇 +7860,塘坝镇 +15832,塘缀镇 +7868,塘渡口镇 +42732,塘掇镇 +2466,唐河县 +7871,塘红乡 +98911,唐家 +105323,唐家村 +74098,塘家洞村 +5268,唐江镇 +112887,塘坑村 +106953,塘坑村 +48224,塘口镇 +158225,塘莲村 +159711,塘蓬镇 +7778,汤塘镇 +98965,塘田 +7883,塘田市镇 +146161,塘头村 +165293,塘湾 +101966,塘湾镇 +61880,棠下村 +1784,塘厦镇 +7855,棠下镇 +2413,汤阴县 +157263,谭连村 +46432,潭连村 +122183,潭莲村 +94854,潭莲村 +7661,坛洛镇 +7715,潭水镇 +48717,潭头镇 +7696,谭庄镇 +7895,桃川镇 +7929,陶邓乡 +15846,桃花源镇 +2928,桃江县 +78523,桃江乡 +100219,桃源 +2840,桃源县 +7976,藤田镇 +2093,藤县 +3713,滕州 +1465,天长 +2005,天等县 +2045,天峨县 +5426,田畈街镇 +48577,天河 +1816,天河区 +10276,天河镇 +1347,天津 +1990,田林县 +111806,天门村 +1073,天水 +4586,天台县 +46900,天台镇 +52113,天堂村 +52990,天堂镇 +10291,天堂镇 +24643,田头村 +87209,田头镇 +103871,天西村 +74925,田心村 +170649,田心围村 +88216,田心镇 +98623,田心镇 +10389,田心镇 +2150,天柱县 +1998,铁山港区 +45917,亭亮镇 +1656,同安区 +45021,同安镇 +51178,同安镇 +2467,桐柏县 +1429,桐城 +2797,通城县 +1319,铜川 +2886,通道县 +55672,铜鼓村 +3269,铜鼓县 +99662,铜鼓镇 +78680,铜鼓镇 +4496,通海县 +4005,通江县 +4632,铜梁区 +1048,铜陵 +4519,桐庐县 +85933,桐木村 +74325,桐木镇 +85056,桐木镇 +47888,桐木镇 +4642,潼南区 +1118,铜仁 +2798,通山县 +4539,桐乡 +2437,通许县 +2194,桐梓县 +161294,桐梓镇 +8273,头排镇 +2739,团风县 +8308,土关垭镇 +1133,屯昌县 +37913,屯脚镇 +4673,屯门区 +19150,托洞镇 +8405,驮卢镇 +23194,土坪镇 +72540,瓦店乡 +125812,瓦店镇 +31581,外海 +64256,外沙 +91241,外沙村 +60493,外砂镇 +3203,万安县 +8756,望埠镇 +4071,旺苍县 +57010,旺甫镇 +8762,望高镇 +1430,望江县 +46794,旺角 +38562,王灵镇 +8749,旺茂镇 +1803,望牛墩镇 +79174,汪桥镇 +10738,汪桥镇 +10170,万合镇 +1775,万江 +158250,挽澜镇 +3253,万年县 +1134,万宁 +4651,万盛区 +21513,宛田乡 +8481,湾头桥镇 +4033,万源 +4654,万州区 +4665,湾仔区 +8446,瓦溪镇 +1300,潍坊 +1299,威海 +2506,卫辉 +157726,魏家村 +1320,渭南 +2119,威宁县 +8806,维桥乡 +2438,尉氏县 +96668,维新村 +4507,威信县 +165868,维新镇 +53960,威远村 +4133,威远县 +1135,文昌 +83430,文昌镇 +8937,汶村镇 +5345,文地镇 +2166,瓮安县 +144669,翁安寨村 +5504,翁城镇 +16773,瓮城镇 +1906,翁源县 +4587,温岭 +36178,文渠 +48178,温泉镇 +8911,文渠乡 +1385,文山 +4484,文山市 +2427,温县 +1397,温州 +1552,涡阳县 +2783,武昌区 +1936,吴川 +2195,务川县 +51959,武当山镇 +9241,雾渡河镇 +158962,五丰铺镇 +9071,五峰铺镇 +2906,武冈 +2478,舞钢 +3932,武功县 +9198,武功镇 +1972,五桂山 +1270,乌海 +1193,武汉 +1049,芜湖 +3101,吴江区 +20311,乌江镇 +9097,五经富镇 +112884,五块石 +84430,五里牌 +9112,五里铺镇 +9211,武利镇 +4635,武隆区 +2077,武鸣区 +3222,武宁县 +1594,武平县 +9119,五强溪镇 +23104,五桥 +11313,五山 +4633,巫山县 +79302,五山镇 +52757,乌石村 +8998,乌石乡 +73521,乌石镇 +95124,乌石镇 +9177,伍市镇 +9224,武潭镇 +4084,五通桥区 +86844,五通镇 +11319,五通镇 +1450,无为县 +1232,无锡 +2740,武穴 +11251,吴圩镇 +2570,舞阳县 +3257,婺源县 +91810,五云镇 +1276,吴忠 +1109,梧州 +152340,厦边村 +58106,下车村 +152546,厦岗村 +98309,霞湖村 +3204,峡江县 +93756,下坑村 +23548,夏涝池 +11730,下老乡 +1060,厦门 +1321,西安 +5965,仙城镇 +2732,咸丰县 +2555,项城 +2528,襄城县 +48696,香花镇 +4568,象山县 +46317,响水 +1208,湘西 +2915,湘乡 +2949,湘阴县 +4391,祥云县 +1976,香洲区 +5428,巷子口镇 +1195,咸宁 +11848,羡塘乡 +1194,仙桃 +23701,仙下村 +1322,咸阳 +1620,仙游县 +9245,西岸镇 +2814,孝昌县 +6353,小董镇 +1197,孝感 +165574,小鸡街 +86845,小金村 +58394,小金口镇 +4000,小金县 +38422,小平山镇 +95582,小平阳镇 +4526,萧山区 +89208,小塘镇 +5256,小塘镇 +1523,萧县 +5676,细坳镇 +1615,霞浦县 +5858,下桥镇 +88421,峡山村 +1940,霞山区 +5312,峡山镇 +55612,夏石镇 +59564,下洋镇 +118453,下洋镇 +5306,下洋镇 +2495,夏邑县 +98291,霞涌镇 +4098,西昌 +46875,西冲 +4124,西充县 +4485,西畴县 +2471,淅川县 +37500,西渡镇 +30644,协和乡 +6721,谢鸡镇 +1747,西峰区 +2126,息烽县 +99359,西峰乡 +91951,西华村 +2554,西华县 +79910,西湖村 +23909,西湖村 +69016,西湖区 +3235,西湖区 +102254,西湖镇 +98568,锡坑镇 +95555,西林村 +1992,西林县 +95554,西林乡 +9369,西胪镇 +5654,洗马乡 +84008,洗马镇 +9379,西庙岗乡 +51163,西南镇 +52844,新宝镇 +77495,新蔡村 +2565,新蔡县 +4579,新昌县 +2058,忻城县 +96025,新店村 +100746,新店乡 +6796,新地镇 +154418,新渡口 +7084,信都镇 +52172,新风 +91697,新风村 +1907,新丰县 +3191,信丰县 +98577,新丰镇 +57006,新丰镇 +6813,新福乡 +48232,新福镇 +1273,兴安盟 +3205,新干县 +2021,兴安县 +3192,兴国县 +3113,兴化 +156339,星火村 +124241,星火村 +33989,星火镇 +38232,杏林镇 +52657,兴隆村 +91695,兴隆镇 +100132,兴隆镇 +2080,兴宁区 +105919,新沟村 +103869,新沟镇 +64865,新沟镇 +3934,兴平 +2173,兴仁市 +39849,兴仁镇 +78786,兴仁镇 +2623,兴山区 +2822,兴山县 +7093,星沙镇 +4160,兴文县 +42536,兴文镇 +2103,兴业县 +61888,杏子村 +7095,星子镇 +4287,新和县 +57198,新和镇 +2887,新晃县 +2897,新化县 +1848,新会区 +29804,新江村 +4015,新津县 +2342,新乐 +2536,新密 +2907,新宁县 +6839,新亨镇 +6873,新平安镇 +160094,新泉镇 +2908,新邵县 +6883,新哨镇 +6885,新盛店镇 +89259,新盛镇 +46455,新盛镇 +34021,新盛镇 +7689,新市镇 +39329,新市镇 +38934,新市镇 +75866,新市镇 +164827,新塘村 +168309,新塘村 +70685,新塘村 +82792,新塘村 +43033,新塘镇 +84405,新田村 +29907,新天村 +114021,新田村 +6901,新田铺镇 +61882,新天寨 +98616,新田镇 +152952,新田镇 +68993,新田镇 +2521,新县 +1160,新乡 +150724,新锡边村 +114019,新兴村 +1929,新兴县 +112265,新兴乡 +87201,新溪乡 +6910,新溪镇 +30004,新墟村 +51124,新圩乡 +99935,新圩镇 +82786,新墟镇 +29967,新圩镇 +55413,新圩镇 +1161,信阳 +2469,新野县 +3129,新沂 +2537,新郑 +2791,新洲区 +2564,西平县 +81806,西坪镇 +5461,西坪镇 +19843,西樵村 +5245,西樵镇 +2196,习水县 +152619,溪头村 +71175,绣缎镇 +106380,秀山 +4636,秀山县 +2127,修文县 +2208,秀英区 +2520,息县 +2082,西乡塘区 +3883,西乡县 +123180,溪西镇 +82175,西阳镇 +89090,西阳镇 +11465,西阳镇 +9439,西燕镇 +9436,西岩镇 +103078,西中村 +1050,宣城 +4034,宣汉县 +1162,许昌 +75510,巡场镇 +74875,巽寮镇 +3193,寻乌县 +3856,旬阳县 +3935,旬邑县 +2891,溆浦县 +3052,盱眙县 +4180,叙永县 +1233,徐州 +4059,雅江县 +91606,雅江镇 +46892,雅居乐 +1323,延安 +1234,盐城 +7454,沿渡河镇 +7531,羊册镇 +86694,杨村 +91246,杨村镇 +56029,杨村镇 +1923,阳东区 +1091,阳江 +30782,羊角村 +89275,羊角镇 +5289,羊街镇 +156586,羊街村 +87920,洋里村 +37785,杨林村 +94367,杨林村 +162991,杨林村 +97479,杨林 +112889,杨林镇 +4830,杨梅 +85890,杨梅村 +2225,洋浦经济开发区 +118036,杨桥镇 +7691,洋桥镇 +64239,杨桥镇 +1313,阳泉 +1879,阳山县 +87148,杨市 +2022,阳朔县 +60057,杨田 +103864,杨田村 +60510,杨田镇 +146040,杨屋村 +3884,洋县 +61472,洋溪乡 +2747,阳新县 +91269,洋溪镇 +90226,洋溪镇 +5594,洋溪镇 +3159,扬中 +1235,扬州 +123470,沿河乡 +4508,盐津县 +2509,延津县 +2532,鄢陵县 +2962,炎陵县 +4486,砚山县 +78302,砚山镇 +1301,烟台 +4177,沿滩区 +7439,岩滩镇 +38291,严田镇 +4117,盐亭县 +7722,腰古镇 +7771,姚集乡 +94463,姚集镇 +112178,姚集镇 +47517,姚圩镇 +3897,耀州区 +60896,亚山镇 +7348,鸭溪镇 +7844,野三关镇 +91692,野山关镇 +7858,叶塘镇 +46950,叶潭镇 +2479,叶县 +1343,宜宾 +1198,宜昌 +2806,宜城 +1180,伊春 +2824,宜都 +7861,一渡水镇 +3174,宜黄县 +38170,一六圩村 +98838,一六镇 +48221,一六镇 +4125,仪陇县 +2488,义马 +2816,应城 +1880,英德 +8001,英都镇 +4401,盈江县 +5325,英林镇 +8003,英利镇 +99943,英山村 +1474,颍上县 +61157,应山 +2741,英山县 +32905,营山乡 +1247,鹰潭 +7971,银坑镇 +7933,义容镇 +4550,义乌 +3118,宜兴 +1209,益阳 +3256,弋阳县 +2862,宜章县 +3153,仪征 +2046,宜州区 +1647,永安 +46845,永安街道 +84406,永安新村 +70133,永安镇 +2496,永城 +4637,永川区 +1633,永春县 +4450,永德县 +1595,永定区 +3206,永丰 +166482,永丰村 +71108,永丰村 +2023,永福县 +4601,永嘉县 +46644,永嘉镇 +4551,永康 +52882,永康镇 +2078,邕宁县 +4380,永仁县 +4510,永善县 +3936,永寿县 +95032,永顺村 +2924,永顺县 +1583,永泰县 +155918,永头村 +81901,永兴镇 +3207,永新县 +3225,永修县 +1210,永州 +27725,油墩街镇 +64199,油坑村 +47562,油田镇 +161468,邮亭村 +8106,邮亭镇 +2965,攸县 +1648,尤溪县 +2825,远安县 +153011,院垌村 +2930,沅江 +5452,源头镇 +2510,原阳县 +4419,元阳县 +5484,园洲镇 +2497,虞城县 +87370,悦城镇 +4066,岳池县 +8325,岳口镇 +91623,月岭村 +70224,月岭村 +1431,岳西县 +8336,岳溪镇 +1211,岳阳 +2953,岳阳楼区 +61962,月屿村 +2068,鱼峰区 +57058,余关乡 +158324,雨河镇 +4589,玉环 +100437,鱼化乡 +3277,余江区 +1324,榆林 +1931,云安区 +1930,郁南县 +8358,云表镇 +1314,运城 +1933,云城区 +2817,云梦县 +44273,云门镇 +8369,云潭镇 +4451,云县 +1665,云霄县 +2769,郧西县 +2770,郧阳区 +4639,云阳县 +61174,云阳镇 +2186,玉屏县 +2197,余庆县 +8204,郁山镇 +3255,玉山县 +125278,余田村 +8165,余田乡 +169843,鱼窝头 +1387,玉溪 +4569,余姚 +2531,禹州 +84583,皂市镇 +40752,皂市镇 +2807,枣阳 +1302,枣庄 +5186,泽国镇 +1812,增城区 +8516,寨沙镇 +8520,寨圩镇 +8468,渣江镇 +28245,闸口乡 +94873,闸口镇 +8593,张公庙镇 +8596,张沟镇 +3091,张家港 +1212,张家界 +57252,樟木村 +1782,樟木头镇 +103862,樟木乡 +5343,樟木镇 +1666,漳浦县 +3273,樟树 +1061,漳州 +36506,湛江镇 +8555,占陇镇 +1668,诏安县 +8804,赵店子镇 +35073,朝东镇 +84830,赵家镇 +46958,赵家镇 +2050,昭平县 +1094,肇庆 +1388,昭通 +8860,肇兴乡 +5515,闸坡镇 +2498,柘城县 +89423,浙川县 +47029,柘港乡 +60495,折弓乡 +3892,镇安县 +42224,镇安镇 +3885,镇巴县 +8918,郑店镇 +123702,震东乡 +2198,正安县 +88416,郑场镇 +91326,郑店 +1607,政和县 +1746,正宁县 +2566,正阳县 +1163,郑州 +1236,镇江 +8898,镇江镇 +57640,镇隆镇 +5279,镇隆镇 +44569,镇隆镇 +8904,镇宁堡乡 +2110,镇宁县 +76931,镇平村 +2470,镇平县 +3857,镇坪县 +4512,镇雄县 +2151,镇远县 +1617,柘荣县 +158226,者塘村 +5026,枝城镇 +2826,枝江 +2889,芷江县 +78235,芷江镇 +2120,织金县 +5597,织里镇 +155978,芝山镇 +28805,值夏镇 +58964,中坝镇 +2888,中方县 +1401,中国香港 +32797,中和镇 +4041,中江县 +28844,中平村 +99946,中平镇 +78230,钟山村 +2137,钟山区 +2051,钟山县 +1787,中堂镇 +8986,中垌镇 +4640,忠县 +2756,钟祥 +42237,忠信镇 +4661,中西区 +165117,中寨村 +158251,中寨镇 +83441,中寨镇 +43404,中寨子村 +55702,周安村 +42241,周安村 +9083,周党镇 +9098,周江镇 +1164,周口 +105924,周老镇 +9100,周老嘴镇 +9104,周鹿镇 +1616,周宁县 +40750,周旺铺村 +78354,周旺镇 +81895,砖店镇 +9322,转水镇 +9295,竹篙镇 +9296,竹沟镇 +9144,朱河镇 +4580,诸暨 +39807,珠街乡 +56715,朱街镇 +28192,珠街镇 +71867,朱里镇 +106166,竹山村 +155975,竹山村 +3214,珠山区 +2771,竹山县 +74619,珠山镇 +158249,竹山镇 +87979,竹市镇 +91621,竹市镇 +9290,株潭镇 +81814,竹溪村 +2772,竹溪县 +9301,竹溪乡 +116191,竹溪镇 +33477,竹园坝 +88170,竹园镇 +117300,竹园镇 +39408,竹园镇 +1213,株洲 +1303,淄博 +1345,自贡 +2827,秭归县 +82278,紫金镇 +52732,紫市镇 +4118,梓潼县 +103863,梓潼镇 +2864,资兴 +1344,资阳 +2932,资阳区 +2111,紫云县 +4134,资中县 +9417,陬市镇 diff --git a/applications/common/scrapySpiders/wangModel/wangModel/files/city_cap.txt b/applications/common/scrapySpiders/wangModel/wangModel/files/city_cap.txt new file mode 100644 index 0000000..ff90a66 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/files/city_cap.txt @@ -0,0 +1,546 @@ +1096,珠海 +1095,中山 +1824,廉江 +1110,玉林 +1126,海口 +1078,佛山 +1106,柳州 +1082,江门 +1928,罗定 +1871,兴宁 +2174,兴义 +2099,北流 +1859,化州 +2095,岑溪 +1077,东莞 +1080,河源 +1084,茂名 +1107,南宁 +1087,汕头 +1092,云浮 +1097,百色 +1202,郴州 +1099,崇左 +1237,抚州 +1085,梅州 +4415,蒙自 +1119,遵义 +1113,贵阳 +1103,河池 +1081,惠州 +1876,连州 +1130,琼海 +1132,三亚 +1089,韶关 +1101,桂林 +1105,来宾 +1334,乐山 +1338,南充 +1900,南雄 +1845,台山 +1192,天门 +1207,湘潭 +1093,湛江 +1165,驻马店 +1111,安顺 +1150,安阳 +1098,北海 +4544,东阳 +1100,防城港 +3264,丰城 +38433,拱北 +1332,广安 +1102,贵港 +1185,黄石 +1239,吉安 +2922,吉首 +2143,凯里 +1129,陵水 +2015,荔浦 +1114,六盘水 +1895,陆丰 +1242,南昌 +1156,南阳 +1339,内江 +1167,濮阳 +1108,钦州 +22290,容桂 +2763,松滋 +1191,随州 +1196,襄阳 +1861,信宜 +1245,新余 +1342,雅安 +1922,阳春 +1246,宜春 +1400,重庆 +1315,安康 +2811,安陆 +1035,安庆 +1402,中国澳门 +46889,柏布 +88217,白庙 +1120,白沙 +1036,蚌埠 +1316,宝鸡 +1373,保山 +94650,八一 +97340,八一农场 +1327,巴中 +50984,北大 +35043,北栅 +1112,毕节 +1051,亳州 +1200,常德 +2527,长葛 +1575,长乐 +101301,长隆 +2867,常宁 +89263,长庆桥 +1201,长沙 +3086,常熟 +1304,长治 +1224,常州 +1037,巢湖 +1076,潮州 +92375,城东 +1328,成都 +48033,陈家 +2794,赤壁 +46535,赤水 +2189,赤水 +1038,池州 +4008,崇州 +97498,楚雄 +1374,楚雄州 +1039,滁州 +4564,慈溪 +2972,大安市 +16437,大付 +16050,达濠 +35101,大理 +1251,大连 +1375,大理州 +2820,当阳 +58664,大宁 +2766,丹江口 +39858,淡水 +3157,丹阳 +1137,儋州 +70592,大坪 +3384,大石桥 +1305,大同 +98594,大围 +2745,大冶 +1329,达州 +114691,德城 +2534,登封 +3350,灯塔 +169775,登云 +2460,邓州 +3247,德兴 +1330,德阳 +87196,东城 +116819,东城 +1125,东方 +95933,东方 +3312,东港 +69724,洞利 +3139,东台 +2007,东兴 +1289,东营 +2156,都匀 +4076,峨眉山 +1841,恩平市 +2727,恩施 +1183,恩施州 +1182,鄂州 +87907,凤凰 +5575,汾水道 +1609,福安 +1610,福鼎 +1576,福清 +2158,福泉 +1040,阜阳 +1053,福州 +1238,赣州 +3266,高安 +89238,高坪 +89195,高坡 +46631,高桥 +86519,高沙 +3150,高邮 +1858,高州 +4408,个旧 +43701,巩桥 +6253,共青城 +1773,莞城 +12853,广华 +71131,广华岭 +2776,广水 +1333,广元 +1079,广州 +31406,桂城 +2030,桂平 +3275,贵溪 +1274,固原 +3078,海安 +3079,海门 +2813,汉川 +1142,邯郸 +1389,杭州 +2792,汉口 +1317,汉中 +1151,鹤壁 +1041,合肥 +1010,河南 +1203,衡阳 +91610,河浦 +1842,鹤山 +2053,合山 +56623,河西 +1290,菏泽 +1104,贺州 +1378,红河州 +1199,洪湖 +2881,洪江 +1225,淮安 +1042,淮北 +152609,怀德 +1204,怀化 +1043,淮南 +1184,黄冈 +169664,黄桥 +72597,华阳 +4063,华蓥 +105908,胡集 +3775,霍州 +1390,湖州 +165462,加禾 +4517,建德 +150783,江口 +4612,江山 +3116,江阴 +1600,建瓯 +93346,尖沙咀 +32532,简阳 +1153,焦作 +1391,嘉兴 +1471,界首 +1083,揭阳 +1291,济南 +1306,晋城 +3200,井冈山 +4488,景洪 +3110,靖江 +1186,荆门 +2754,京山市 +1983,靖西 +1187,荆州 +1392,金华 +1292,济宁 +1629,晋江 +93532,金沙 +165284,金沙 +2837,津市 +1241,九江 +75482,九隆 +29675,九龙塘 +1152,济源市 +1154,开封 +1844,开平 +4413,开远 +4626,开州 +1379,昆明 +3087,昆山 +4127,阆中 +1068,兰州 +51436,老城 +2803,老河口 +1899,乐昌 +2874,耒阳 +1934,雷州 +2893,冷水江 +3212,乐平 +4595,乐清 +2894,涟源 +1226,连云港 +1294,聊城 +2731,利川 +1380,丽江 +2966,醴陵 +1381,临沧 +1308,临汾 +2484,灵宝 +4583,临海 +2981,临江 +2947,临湘 +1295,临沂 +2411,林州市 +1393,丽水 +1045,六安 +91242,流沙 +2848,浏阳 +3042,溧阳 +46917,李寨 +4131,隆昌 +146450,龙港市 +1662,龙海 +110469,龙江 +3703,龙口 +80393,龙门 +89567,龙南 +48189,龙山 +46852,龙溪 +1054,龙岩 +1205,娄底 +1166,漯河 +32770,罗家桥 +1155,洛阳 +1346,泸州 +1046,马鞍山 +165292,马场坪 +2738,麻城 +168717,马达 +158172,马家湾 +67502,马街 +35741,马山 +56234,马牙 +1336,眉山 +1337,绵阳 +4416,弥勒 +2952,汨罗 +37202,明港 +1630,南安 +1774,南城 +1227,南京 +1055,南平 +1228,南通 +39982,南溪 +48675,南阳油田 +1394,宁波 +1056,宁德 +146128,排坊 +1340,攀枝花 +2135,盘州 +98804,炮台 +4012,彭州 +1157,平顶山 +4538,平湖 +1071,平凉 +125633,平南 +2004,凭祥 +1384,普洱 +1854,普宁 +146039,埔前 +105674,蒲圻 +1057,莆田 +1115,黔东南州 +1188,潜江 +1116,黔南州 +1117,黔西南州 +3081,启东 +1296,青岛 +46886,清华园 +57029,青塘 +70656,青塘 +1072,庆阳 +1086,清远 +3691,青州 +2426,沁阳 +4016,邛崃 +1058,泉州 +3624,曲阜 +1383,曲靖 +1399,衢州 +39943,仁和 +2192,仁怀 +1297,日照 +3083,如皋 +4597,瑞安 +3188,瑞金 +4400,瑞丽 +2477,汝州 +153058,三角 +1158,三门峡 +1059,三明 +5462,沙井 +1325,上海 +1318,商洛 +88284,上坪 +1159,商丘 +1244,上饶 +98525,上水 +1088,汕尾 +2912,韶山 +1603,邵武 +1395,绍兴 +1206,邵阳 +45637,沙市 +89209,沙子坪 +87990,沙子坪 +4142,射洪 +4581,嵊州 +1090,深圳 +1146,石家庄 +105651,市桥 +1632,石狮 +2762,石首 +91667,石窝 +1190,十堰 +31648,狮子林 +154824,石子岭 +44726,寿宁南阳 +89050,水头 +152613,树田 +1948,四会 +168073,思州 +1341,遂宁 +87954,素龙 +1230,宿迁 +1229,苏州 +1047,宿州 +1298,泰安 +3089,太仓 +45949,太平 +3679,泰山 +3111,泰兴 +1311,太原 +1231,泰州 +1396,台州 +57394,太子 +98911,唐家 +98965,塘田 +165293,塘湾 +100219,桃源 +3713,滕州 +1465,天长 +48577,天河 +1347,天津 +1073,天水 +1429,桐城 +1319,铜川 +1048,铜陵 +1118,铜仁 +31581,外海 +64256,外沙 +46794,旺角 +1775,万江 +1134,万宁 +4033,万源 +1300,潍坊 +1299,威海 +2506,卫辉 +1320,渭南 +1135,文昌 +4587,温岭 +36178,文渠 +1385,文山 +4484,文山市 +1397,温州 +1936,吴川 +2906,武冈 +2478,舞钢 +1972,五桂山 +1270,乌海 +1193,武汉 +1049,芜湖 +112884,五块石 +84430,五里牌 +23104,五桥 +11313,五山 +1232,无锡 +2740,武穴 +1276,吴忠 +1109,梧州 +23548,夏涝池 +1060,厦门 +1321,西安 +2555,项城 +46317,响水 +1208,湘西 +1195,咸宁 +1194,仙桃 +1322,咸阳 +1197,孝感 +165574,小鸡街 +4098,西昌 +46875,西冲 +154418,新渡口 +52172,新风 +1273,兴安盟 +3113,兴化 +3934,兴平 +2173,兴仁市 +2342,新乐 +2536,新密 +61882,新天寨 +1161,信阳 +3129,新沂 +2537,新郑 +106380,秀山 +1050,宣城 +1162,许昌 +1233,徐州 +46892,雅居乐 +1323,延安 +1234,盐城 +1091,阳江 +97479,杨林 +4830,杨梅 +1313,阳泉 +87148,杨市 +60057,杨田 +3159,扬中 +1235,扬州 +1301,烟台 +1343,宜宾 +1198,宜昌 +2806,宜城 +1180,伊春 +2824,宜都 +2488,义马 +2816,应城 +1880,英德 +61157,应山 +1247,鹰潭 +4550,义乌 +3118,宜兴 +1209,益阳 +3153,仪征 +1647,永安 +46845,永安街道 +2496,永城 +3206,永丰 +4551,永康 +1210,永州 +2930,沅江 +1211,岳阳 +4589,玉环 +1324,榆林 +1314,运城 +169843,鱼窝头 +1387,玉溪 +4569,余姚 +2531,禹州 +2807,枣阳 +1302,枣庄 +3091,张家港 +1212,张家界 +3273,樟树 +1061,漳州 +1094,肇庆 +1388,昭通 +91326,郑店 +1163,郑州 +2826,枝江 +1401,中国香港 +2756,钟祥 +1164,周口 +4580,诸暨 +33477,竹园坝 +1213,株洲 +1303,淄博 +1345,自贡 +2864,资兴 +1344,资阳 diff --git a/applications/common/scrapySpiders/wangModel/wangModel/files/scenic b/applications/common/scrapySpiders/wangModel/wangModel/files/scenic new file mode 100644 index 0000000..61eaad8 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/files/scenic @@ -0,0 +1,95 @@ +1,桂桂林漓江景区 +2,桂林两江四湖象山景区 +3,七星景区 +4,桂林穿山景区 +5,桂林尧山景区 +6,桂林市南溪山景区 +7,桂万福广场休闲旅游城 +8,桂林市瓦窑小镇景区 +9,万象山区侗情水庄景区 +10,海之鑫洞藏酒文化馆 +11,桂林独秀峰王城景区 +12,芦笛景区 +13,桂林经典刘三姐大观园景区 +14,桂林西山景区 +15,桂林桂花公社景区景区 +16,桂林芦笛岩鸡血玉文化艺术中心景区 +17,桂林冠岩景区 +18,桂林愚自乐园艺术园艺术中心景区 +19,桂林旅苑景区 +20,桂林市神龙水世界景区 +21,桂林多耶古寨蛇王李景区 +22,桂林在水一汸景区 +23,桂林新区环城水系景区景区 +24,桂林罗山湖玛雅水上乐园景区 +25,桂林红溪景区 +26,桂林黄沙秘境大峡谷景区园景区 +27,美国飞虎队桂林遗址公园 +28,李宗仁故居区 +29,会仙喀斯特国家湿地公园景区园 +30,临桂十二滩漂流景区 +31,抱璞文化展示中心园景区 +32,桂林崇华中医街 +33,一院两馆景区 +34,佑子湾民俗风情园景区 +35,桂林世外桃源旅游区 +35,阳朔图腾古道聚龙潭景区 +37,桂林阳朔县蝴蝶泉旅游景区 +38,阳朔西街景区景区 +39,阳朔三千漓中国山水人文度假区景区 +40,桂林乐满地休闲世界 +41, 红军长征突破湘江烈士纪念碑园景区(红色景区)文度假区 +42,兴安灵渠景区 +43,桂林市猫儿山景区纪念碑园景区 +44,桂林龙胜温泉旅游度假区 +45,龙胜龙脊梯田景区 +46,龙胜县白面瑶寨景区(红色景区)区 +47,龙胜艺江南中国红玉文化园景区 +48,桂林银子岩旅游度假区 +49,桂林丰鱼岩旅游度假区化园景区 +50,荔浦荔江湾景区 +51,荔浦县马岭鼓寨民族风情园 +52,荔浦天河瀑布景区 +53,荔浦县柘村景区情园 +54,恭城三庙两馆景区 +55,恭城红岩村景区 +56,黄岭景区 +57,杨溪景区 +58,瑶族文化村景区 +59,北洞源景区 +60,恭城龙虎关景区 +61,恭城矮寨景区 +62,恭城社山景区 +63,红军长征湘江战役新圩狙击战纪念园(红色景区) +64,灌阳千家洞文旅度假区 +65,灌阳唐景崧故里景区狙击战纪念园 +66,灌阳茶博园 +67,灌阳神农稻博园 +68,灌阳洞井古民居景区 +69,灌阳都庞岭大峡谷景区 +70,灌阳文市石林景区 +71,灵川县大圩古镇景区 +72,灵川县漓水人家景区 +73,桂林古东瀑布景区 +74,桂林市逍遥湖景区 +75,桂林希宇欢乐城景区 +76,八路军桂林办事处路莫村物资转运站景区(红色景区) +77,灵川龙门瀑布景区 +78,灵川县江头景区村物资转运站景区 +79,平乐仙家温泉景区 +80,大碧头国际旅游度假区 +81,红军长征湘江战役纪念园(红色景区) +82,桂林全州县湘山·湘源历史文化旅游区 +83,桂林国际茶花谷旅游休闲度假区园 +84,桂林湘山酿酒生态园景区 史文化旅游区 +85,全州县炎井温泉闲度假区 +86,永福金钟山旅游度假区区 +87,永福县凤山景区 +88,罗汉果小镇 +89,桂林资江·天门山景区 +90,桂林八角寨景区 +91,桂林资江灯谷景区 +92,资源县宝鼎景区 +93,资源县塘洞景区(红色景区) + + diff --git a/applications/common/scrapySpiders/wangModel/wangModel/items.py b/applications/common/scrapySpiders/wangModel/wangModel/items.py new file mode 100644 index 0000000..7e0ca1e --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/items.py @@ -0,0 +1,72 @@ +# Define here the models for your scraped items +# +# See documentation in: +# https://docs.scrapy.org/en/latest/topics/items.html + +import scrapy + + +# 途牛的景点信息和评价 +class WangmodelItem(scrapy.Item): + # define the fields for your item here like: + id = scrapy.Field() #景点id + name = scrapy.Field() #景点名称 + begin_price = scrapy.Field() #起步价 + satisfy_present = scrapy.Field() #满意度 + remarkAmount = scrapy.Field() #评价总数 + compGrade3Amount = scrapy.Field() #评价满意的人数 + compGrade2Amount = scrapy.Field() #评价一般的人数 + compGrade1Amount = scrapy.Field() #评价不满意的人数 + img = scrapy.Field() #景点图片封面 + address = scrapy.Field() #景点地址 + time_arrange = scrapy.Field() #开放时间等 + commentlist = scrapy.Field() #评论 + +#途牛酒店信息和酒店评价 +class TuniuhotelItem(scrapy.Item): + id=scrapy.Field() + hname=scrapy.Field() + starname=scrapy.Field() + hpic=scrapy.Field() + haddress=scrapy.Field() + business=scrapy.Field() + distance=scrapy.Field() + hlowstprice=scrapy.Field() + hcomments=scrapy.Field() + others=scrapy.Field() + +class HornetNestNoteItem(scrapy.Item): + id = scrapy.Field() + url=scrapy.Field() + title = scrapy.Field() + total=scrapy.Field() + see=scrapy.Field() + collect=scrapy.Field() + commentNum=scrapy.Field() + +class WeiboItem(scrapy.Item): + id=scrapy.Field() + userid=scrapy.Field() + screen_name=scrapy.Field() + fins=scrapy.Field() + artilelist=scrapy.Field() + total_artiles=scrapy.Field() + + +class TongchenTrainItem(scrapy.Item): + id = scrapy.Field() #id + site = scrapy.Field() #id + place_from = scrapy.Field() + place_to = scrapy.Field() + date = scrapy.Field() + total_count = scrapy.Field() + from_station=scrapy.Field() + to_station=scrapy.Field() + from_time=scrapy.Field() + to_time=scrapy.Field() + seat_name=scrapy.Field() + seat_price=scrapy.Field() + seats_left=scrapy.Field() + type=scrapy.Field() + status=scrapy.Field() + diff --git a/applications/common/scrapySpiders/wangModel/wangModel/middlewares.py b/applications/common/scrapySpiders/wangModel/wangModel/middlewares.py new file mode 100644 index 0000000..1c72e5c --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/middlewares.py @@ -0,0 +1,56 @@ +# Define here the models for your spider middleware +# +# See documentation in: +# https://docs.scrapy.org/en/latest/topics/spider-middleware.html + + +from scrapy import signals +import time +import logging +# useful for handling different item types with a single interface +from scrapy.utils.project import get_project_settings +import random + + +# 定于随机请求头 +class RandowSpiderMiddleware(object): + def process_request(self, request, spider): + # pass + settings = get_project_settings() + user_agent = settings["USER_AGENT_LIST"] + # print(user_agent) + # 随机选择请求头 + ua = random.choice(user_agent) + request.headers['USER-AGENT'] = ua + + +# 定义随机IP +class RandowProxy(object): + def process_request(self, request, spider): + settings = get_project_settings() + proxy_list = settings["PROXY_LIST"] + print("代理列表: ", proxy_list) + proxy = random.choice(proxy_list) + print("代理: ", proxy['ip_port']) + + +# 设置随机延时 +class RandomDelayMiddleware(object): + def __init__(self, delay): + self.delay = delay + + @classmethod + def from_crawler(cls, crawler): + delay = crawler.spider.settings.get("DOWNLOAD_DELAY", 3) # setting里设置的时间,注释默认为1s + if not isinstance(delay, int): + raise ValueError("RANDOM_DELAY need a int") + return cls(delay) + + def process_request(self, request, spider): + delay = random.randint(0, self.delay) + logging.debug("### random delay: %s s ###" % delay) + time.sleep(delay) + + + + diff --git a/applications/common/scrapySpiders/wangModel/wangModel/pipelines.py b/applications/common/scrapySpiders/wangModel/wangModel/pipelines.py new file mode 100644 index 0000000..9bb8f15 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/pipelines.py @@ -0,0 +1,211 @@ +# Define your item pipelines here +# +# Don't forget to add your pipeline to the ITEM_PIPELINES setting +# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html + + +# useful for handling different item types with a single interface +import datetime +import uuid + +from itemadapter import ItemAdapter +import csv +from scrapy.exceptions import DropItem +import happybase +import json +from wangModel.items import WangmodelItem +from wangModel.items import TuniuhotelItem +from wangModel.items import WeiboItem +from wangModel.items import TongchenTrainItem +from wangModel.utils.mysqlConn import insert,update + +from wangModel.utils.HbaseConn import HbaseUtil + + +class DuplicatesPipeline(object): + """ + 去重 + """ + def __init__(self): + self.book_set = set() + + def process_item(self, item, spider): + name = item['id'] + if name in self.book_set: + raise DropItem("Duplicate book found:%s" % item) + + self.book_set.add(name) + return item + + +class WangmodelPipeline(object): + def process_item(self, item, spider): + f = open('test.csv', 'a+', encoding='utf-8') + csv_writer = csv.writer(f) + csv_writer.writerow([item['name'], item['begin_price'],item['satisfy_present'],item['img'],item['address'],item['time_arrange']]) + f.close() + return item + +class tuniuHBasePipeline(object): + def __init__(self): + # host = '192.168.174.129' + host = '202.193.53.106' + table_name1 = 'tuniu_scenic' + table_name2 = 'scenic_hotel' + hbase=HbaseUtil(host) + self.hbase=hbase + self.tablename1=table_name1 + self.tablename2=table_name2 + + def process_item(self, item, spider): + """ + 存储途牛景点数据 + :param item: + :param spider: + :return: + """ + if isinstance(item,WangmodelItem): + host = '202.193.53.106' + hbase = HbaseUtil(host) + sql="INSERT INTO scenic_comment(scenicId,scenicName,satisfy_present,num,good,middle,bad,crawlTime,siteFrom) select %s,%s,%s,%s,%s,%s,%s,%s,%s from dual where not exists (select scenicName,crawlTime,siteFrom from scenic_comment where scenicName=%s and crawlTime=%s and siteFrom='途牛');" + insert(sql,(item['id'],item['name'],item['satisfy_present'],item['remarkAmount'],item['compGrade3Amount'],item['compGrade2Amount'],item['compGrade1Amount'],datetime.date.today(),"途牛",item['name'],datetime.date.today())) + id = item['id'] + commentlist=[] + # print(item) + commentlist=item['commentlist'] + obj={} + if len(commentlist)>0: + for data in commentlist: + userId=str(data['userId']) + userName=str(data['userName']) + content=str(data['content']) + if data['subCompGrade'] is not None: + others={} + for k,v in data['subCompGrade'].items(): + others[k]=str(v) + remarkSatisfaction=str(data['remarkSatisfaction']) + compGrade=str(data['compGrade']) + key=uuid.uuid1().hex + print(others) + wibsite='途牛' + putInfo={ + "info:userid": userId, + "info:username": userName, + "info:scenicid": str(item['id']), + "info:scenicname": item['name'], + "info:content": content, + "info:others": str(others), + "info:satisfaction": remarkSatisfaction, + "info:compgrade": compGrade, + "info:datafrom": "途牛", + "info:postDate": data['remarkTime'] + + } + print(putInfo) + try: + self.hbase.batchTable("scenics_comment",str(key),putInfo) + except: + self.hbase.closeCon() + hbase = HbaseUtil('202.193.53.106') + hbase.batchTable("scenics_comment",str(key),putInfo) + + + """ + 存储途牛酒店数据 + """ + elif isinstance(item,TuniuhotelItem): + for child in item['hcomments']: + print("存入Hbase",child) + userId=child['reviewerId'] + userName=child['reviewerName'] + content=child['content'] + score=str(child['score']) + remarkTime=child['remarkTime'] + + try: + self.hbase.batchTable("hotel_comments", str(uuid.uuid1().hex), + { + 'info:hid': str(item['id']), + 'info:hname': item['hname'], + 'info:userid': userId, + 'info:username': userName, + 'info:content': content, + 'info:score': score, + 'info:postDate': remarkTime, + }) + except: + self.hbase.closeCon() + hbase = HbaseUtil('202.193.53.106') + hbase.batchTable("scenics_comment", str(key), putInfo) + hbase.batchTable("hotel_comments",str(uuid.uuid1().hex), + { + 'info:hid':str(item['id']), + 'info:hname':item['hname'], + 'info:userid':userId, + 'info:username':userName, + 'info:content':content, + 'info:score':score, + 'info:postDate':remarkTime, + }) + """ + 存储微博数据:桂林官方旅游微博每一条文章 + """ + elif isinstance(item, WeiboItem): + print("存储该页的微博文章",item['artilelist']) + for artile_content in item['artilelist']: + self.hbase.putTable("weibo",artile_content['artile_id'],{ + 'info:userid':str(item['userid']), + 'info:screen_name':item['screen_name'], + 'info:fins':item['fins'], + 'info:total_artiles':str(item['total_artiles']), + 'info:artile_id':artile_content['artile_id'], + 'info:attitudes_count':artile_content['attitudes_count'], + 'info:comments_count':artile_content['comments_count'], + 'info:reposts_count':artile_content['reposts_count'], + 'info:postDate':artile_content['postDate'], + 'info:text':artile_content['text'] + }) + """ + 存储同城旅游 + """ + + elif isinstance(item, TongchenTrainItem): + print("获取对象",item) + host = '202.193.53.106' + hbase = HbaseUtil(host) + try: + hbase.batchTable("leftticket", item['id'], { + 'info:place_from': item['place_from'], #出发城市 + 'info:place_to': item['place_to'], #抵达城市:桂林 + 'info:date': item['date'], #时间 + 'info:total_count': item['total_count'], #一个城市到桂林的车次数目 + 'info:from_station': item['from_station'], #出发车站名 + 'info:to_station': item['to_station'], #抵达车站名 + 'info:type': item['type'], #乘坐类型:火车/客车 + 'info:from_time': item['from_time'],#出发时间 + 'info:to_time': item['to_time'], #到达时间 + 'info:seat_name': item['seat_name'], #座位名,火车有特等坐,客车没有就不用插 + 'info:seat_price': item['seat_price'], #座位价格 + 'info:seats_left': item['seats_left'], #剩余票数 + 'info:status': item['status'] #座位状态 + }) + except: + hbase.closeCon() + hbase=HbaseUtil(host) + hbase.batchTable("leftticket", item['id'], { + 'info:place_from': item['place_from'], # 出发城市 + 'info:place_to': item['place_to'], # 抵达城市:桂林 + 'info:date': item['date'], # 时间 + 'info:total_count': item['total_count'], # 一个城市到桂林的车次数目 + 'info:from_station': item['from_station'], # 出发车站名 + 'info:to_station': item['to_station'], # 抵达车站名 + 'info:type': item['type'], # 乘坐类型:火车/客车 + 'info:from_time': item['from_time'], # 出发时间 + 'info:to_time': item['to_time'], # 到达时间 + 'info:seat_name': item['seat_name'], # 座位名,火车有特等坐,客车没有就不用插 + 'info:seat_price': item['seat_price'], # 座位价格 + 'info:seats_left': item['seats_left'], # 剩余票数 + 'info:status': item['status'] # 座位状态 + }) + return item + diff --git a/applications/common/scrapySpiders/wangModel/wangModel/readme.md b/applications/common/scrapySpiders/wangModel/wangModel/readme.md new file mode 100644 index 0000000..e1266a1 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/readme.md @@ -0,0 +1,11 @@ +### 创建csrapy项目 +scrapy startproject 项目名 + +### cd spider文件夹,创建爬虫 +scrapy genspider 爬虫名称 "域名" + +###运行命令 +scrapy crawl 爬虫名 + +#所有爬虫执行入口文件 +wangModel/spiders/main.py \ No newline at end of file diff --git a/applications/common/scrapySpiders/wangModel/wangModel/settings.py b/applications/common/scrapySpiders/wangModel/wangModel/settings.py new file mode 100644 index 0000000..7580dde --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/settings.py @@ -0,0 +1,135 @@ +# Scrapy settings for wangModel project +# +# For simplicity, this file contains only settings considered important or +# commonly used. You can find more settings consulting the documentation: +# +# https://docs.scrapy.org/en/latest/topics/settings.html +# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html +# https://docs.scrapy.org/en/latest/topics/spider-middleware.html + +BOT_NAME = 'wangModel' + +SPIDER_MODULES = ['wangModel.spiders'] +NEWSPIDER_MODULE = 'wangModel.spiders' +DOWNLOAD_DELAY = 3 #下载延迟3秒 +DOWNLOAD_TIMEOUT = 60 #超时下载 +#请求头列表 + +# Crawl responsibly by identifying yourself (and your website) on the user-agent +USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36' +#请求头列表 +USER_AGENT_LIST=[ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36 QIHU 360EE/13.0.2256.0', + 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19041', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER', + 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36', + + "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 10.0; Trident/6.0)", + "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2752.40 Safari/537.36", + "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 5.1; WOW64; Trident/5.0)", + "Mozilla/5.0 (Windows NT 5.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0" +] +# Obey robots.txt rules +# ROBOTSTXT_OBEY = False +PROXY_LIST=[ + {"ip_port":"49.87.250.13:4325"}, + {"ip_port":"114.106.173.42:4313"}, + {"ip_port":"115.239.16.241:4314"}, + {"ip_port":"183.165.249.249:4310"}, + {"ip_port":"182.128.45.57:4315"}, + {"ip_port":"183.154.221.57:4356"}, + {"ip_port":"114.233.169.249:4313"}, + {"ip_port":"124.161.212.165:4358"}, + {"ip_port":"114.239.29.114:4345"}, + {"ip_port":"220.201.85.63:4331"}, + {"ip_port":"113.243.33.56:4343"}, + {"ip_port":"113.65.125.60:4386"}, + {"ip_port":"114.103.89.96:4354"}, + {"ip_port":"115.209.123.141:4326"}, + {"ip_port":"42.56.3.70:4361"}, + + +] + +URLLENGTH_LIMIT = 5000 #设置请求url最大长度 +HTTPERROR_ALLOWED_CODES = [521] +# Configure maximum concurrent requests performed by Scrapy (default: 16) +#CONCURRENT_REQUESTS = 32 + +# Configure a delay for requests for the same website (default: 0) +# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay +# See also autothrottle settings and docs +# DOWNLOAD_DELAY = 3 +# The download delay setting will honor only one of: +#CONCURRENT_REQUESTS_PER_DOMAIN = 16 +#CONCURRENT_REQUESTS_PER_IP = 16 + +# Disable cookies (enabled by default) +#COOKIES_ENABLED = False + +# Disable Telnet Console (enabled by default) +#TELNETCONSOLE_ENABLED = False + +# Override the default request headers: +#DEFAULT_REQUEST_HEADERS = { +# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', +# 'Accept-Language': 'en', +#} + +# Enable or disable spider middlewares +# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html +#SPIDER_MIDDLEWARES = { +# 'wangModel.middlewares.WangmodelSpiderMiddleware': 543, +#} + +# Enable or disable downloader middlewares +# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html +DOWNLOADER_MIDDLEWARES = { + 'wangModel.middlewares.RandowProxy': 543, #随机代理中间件 + 'wangModel.middlewares.RandowSpiderMiddleware': 543, #随机请求头中间件 + 'wangModel.middlewares.RandomDelayMiddleware': 150, #设置延时 +} +custom_settings = { + "RANDOM_DELAY": 3, + "DOWNLOADER_MIDDLEWARES": { + 'wangModel.middlewares.RandomDelayMiddleware': 150, #设置延时 + } + } + +# Enable or disable extensions +# See https://docs.scrapy.org/en/latest/topics/extensions.html +#EXTENSIONS = { +# 'scrapy.extensions.telnet.TelnetConsole': None, +#} + +# Configure item pipelines +# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html +ITEM_PIPELINES = { + 'wangModel.pipelines.tuniuHBasePipeline': 300, + 'wangModel.pipelines.DuplicatesPipeline': 280, +} +FEED_EXPORT_ENCODING='utf-8' +# Enable and configure the AutoThrottle extension (disabled by default) +# See https://docs.scrapy.org/en/latest/topics/autothrottle.html +#AUTOTHROTTLE_ENABLED = True +# The initial download delay +#AUTOTHROTTLE_START_DELAY = 5 +# The maximum download delay to be set in case of high latencies +#AUTOTHROTTLE_MAX_DELAY = 60 +# The average number of requests Scrapy should be sending in parallel to +# each remote server +#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 +# Enable showing throttling stats for every response received: +#AUTOTHROTTLE_DEBUG = False + +# Enable and configure HTTP caching (disabled by default) +# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings +#HTTPCACHE_ENABLED = True +#HTTPCACHE_EXPIRATION_SECS = 0 +#HTTPCACHE_DIR = 'httpcache' +#HTTPCACHE_IGNORE_HTTP_CODES = [] +#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/__init__.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/__init__.py new file mode 100644 index 0000000..ebd689a --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/__init__.py @@ -0,0 +1,4 @@ +# This package will contain the spiders of your Scrapy project +# +# Please refer to the documentation for information on how to create and manage +# your spiders. diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/a.html b/applications/common/scrapySpiders/wangModel/wangModel/spiders/a.html new file mode 100644 index 0000000..135d0bc --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/a.html @@ -0,0 +1,163 @@ +<!DOCTYPE html><html class="pc "><head><meta charSet="utf-8" class="next-head"/><title class="next-head">上海到北京火车票预订与代购-高铁票价,动车票价-高铁订票,动车订票网-携程火车票订购中心

上海-北京 单程 2022-11-30 (共16车次)
火车票>上海北京火车票

  • 11-29
    今天
  • 11-30
    明天
  • 12-01
    周四
  • 12-02
    周五
  • 12-03
    周六
  • 12-04
    周日
  • 出发
  • 运行时长
  • 价格排序
超实惠
06:37
上海虹桥
6时1分
G102
12:38
北京南
553
  • 二等座有票
  • 一等座有票
  • 商务座15张
  • 二等座
    553
  • 一等座
    930
  • 商务座
    1748
07:38
上海虹桥
5时54分
G110
13:32
北京南
553
  • 二等座有票
  • 一等座(抢)
  • 商务座2张
  • 二等座
    553
  • 一等座
    969
  • 商务座
    1998
10:00
上海虹桥
4时26分
G10
14:26
北京南
626
  • 二等座有票
  • 一等座有票
  • 商务座12张
  • 二等座
    626
  • 一等座
    1035
  • 商务座
    2318
10:34
上海虹桥
5时55分
G122
16:29
北京南
576
  • 二等座有票
  • 一等座3张
  • 商务座2张
  • 二等座
    576
  • 一等座
    969
  • 商务座
    2158
11:00
上海虹桥
4时31分
G12
15:31
北京南
626
  • 二等座有票
  • 一等座有票
  • 商务座4张
  • 二等座
    626
  • 一等座
    1035
  • 商务座
    2318
12:15
上海
22时35分
1462
10:50 +1
北京
156.5
  • 硬座有票
  • 硬卧有票
  • 软卧9张
  • 无座(抢)
  • 硬座
    156.5
  • 硬卧
    283.5
  • 软卧
    455.5
  • 无座
    156.5
13:29
上海虹桥
6时2分
G138
19:31
北京南
576
  • 二等座有票
  • 一等座11张
  • 商务座3张
  • 二等座
    576
  • 一等座
    969
  • 商务座
    2158
13:34
上海虹桥
6时8分
G140
19:42
北京南
553
  • 二等座有票
  • 一等座16张
  • 商务座4张
  • 二等座
    553
  • 一等座
    930
  • 商务座
    1998
14:00
上海虹桥
4时35分
G18
18:35
北京南
626
  • 二等座有票
  • 一等座有票
  • 商务座6张
  • 二等座
    626
  • 一等座
    1035
  • 商务座
    2318
14:16
上海虹桥
5时54分
G142
20:10
北京南
553
  • 二等座有票
  • 一等座9张
  • 商务座3张
  • 二等座
    553
  • 一等座
    930
  • 商务座
    1873
15:00
上海虹桥
4时36分
G20
19:36
北京南
626
  • 二等座有票
  • 一等座有票
  • 商务座8张
  • 二等座
    626
  • 一等座
    1035
  • 商务座
    2318
16:00
上海虹桥
4时37分
G22
20:37
北京南
626
  • 二等座有票
  • 一等座有票
  • 商务座9张
  • 二等座
    626
  • 一等座
    1035
  • 商务座
    2318
16:04
上海虹桥
5时52分
G150
21:56
北京南
553
  • 二等座有票
  • 一等座有票
  • 商务座13张
  • 二等座
    553
  • 一等座
    930
  • 商务座
    1873
16:19
上海虹桥
6时1分
G152
22:20
北京南
553
  • 二等座有票
  • 一等座有票
  • 商务座有票
  • 二等座
    553
  • 一等座
    930
  • 商务座
    1873
17:00
上海虹桥
4时33分
G24
21:33
北京南
598
  • 二等座有票
  • 一等座有票
  • 商务座11张
  • 二等座
    598
  • 一等座
    1006
  • 商务座
    2158
17:16
上海虹桥
5时52分
G156
23:08
北京南
498
  • 二等座有票
  • 一等座有票
  • 商务座13张
  • 二等座
    498
  • 一等座
    837
  • 商务座
    1748

中转方案推荐

16:32
上海虹桥

全程4时56分

南京南

换乘21分

21:28
北京南
650
  • 第一程 G234 二等座有票
  • 第二程 G36 二等座有票
1
2022-11-30出发
G234
经停信息
16:32上海虹桥
17:45南京南
  • 二等座21张
    146
  • 一等座无票
    247
  • 商务座无票
    490
中转

换乘至 南京南

换乘停留 21分

2
2022-11-30出发
G36
经停信息
18:06南京南
21:28北京南
  • 二等座21张
    504
  • 一等座21张
    832
  • 商务座无票
    1735
18:14
上海虹桥

全程5时9分

南京南

换乘30分

23:23
北京南
617
  • 第一程 G1826 二等座有票
  • 第二程 G40 二等座有票
1
2022-11-30出发
G1826
经停信息
18:14上海虹桥
19:36南京南
  • 二等座21张
    135
  • 一等座21张
    228
  • 商务座7张
    428
中转

换乘至 南京南

换乘停留 30分

2
2022-11-30出发
G40
经停信息
20:06南京南
23:23北京南
  • 二等座21张
    482
  • 一等座21张
    809
  • 商务座14张
    1735
18:54
上海

全程14时50分

天津

换乘1时53分

09:44+1
北京南
218
  • 第一程 Z172 硬座有票
  • 第二程 C2018 二等座有票
1
2022-11-30出发
Z172
经停信息
18:54上海
07:21+1天津
  • 硬座7张
    163.5
  • 硬卧17张
    280.5
  • 软卧无票
    438.5
  • 无座无票
    163.5
中转

换乘至 天津

换乘停留 1时53分

2
2022-12-01出发
C2018
经停信息
09:14天津
09:44北京南
  • 二等座21张
    54.5
  • 一等座14张
    88
  • 商务座无票
    174
19:58
上海虹桥

全程16时31分

盐城

换乘9时0分

12:29
北京南
564
  • 第一程 G8358 二等座有票
  • 第二程 G2582 二等座有票
1
2022-11-30出发
G8358
经停信息
19:58上海虹桥
22:15盐城
  • 二等座有票
    118
  • 一等座有票
    186
  • 商务座10张
    374
  • 无座无票
    118
中转

换乘至 盐城

换乘停留 9时0分

2
2022-12-01出发
G2582
经停信息
07:15盐城
12:29北京南
  • 二等座21张
    446
  • 一等座15张
    739
  • 商务座10张
    1348
21:25
上海虹桥

全程15时13分

南京南

换乘8时46分

12:38
北京南
560
  • 第一程 D2282 二等座有票
  • 第二程 G102 二等座有票
1
2022-11-30出发
D2282
经停信息
21:25上海虹桥
23:18南京南
  • 二等座21张
    115
  • 一等座20张
    184
  • 无座无票
    115
中转

换乘至 南京南

换乘停留 8时46分

2
2022-12-01出发
G102
经停信息
08:04南京南
12:38北京南
  • 二等座有票
    445
  • 一等座有票
    747
  • 商务座10张
    1405
查看更多中转方案
  • 仅显示有票车次

车型

重置
  • 高铁(G/C)
  • 动车(D)
  • 普通(Z/T/K)
  • 其他(L/Y)

出发时间

重置
  • 00:00

    -

    06:00

  • 06:00

    -

    12:00

  • 12:00

    -

    18:00

  • 18:00

    -

    24:00

到达时间

重置
  • 00:00

    -

    06:00

  • 06:00

    -

    12:00

  • 12:00

    -

    18:00

  • 18:00

    -

    24:00

展开
\ No newline at end of file diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/gw.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/gw.py new file mode 100644 index 0000000..d0ac153 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/gw.py @@ -0,0 +1,58 @@ +import scrapy +import json +import datetime +import uuid +import time +from urllib import parse +from wangModel.items import TongchenTrainItem + +class GwSpider(scrapy.Spider): + name = 'gw' + allowed_domains = ['qunar.com'] + start_urls = ['http://qunar.com/'] + today = str(datetime.date.today()) + def parse(self, response): + with open("../files/city_cap.txt", encoding="utf-8") as f: + for cityInfo in f: + city_name = cityInfo.split(",")[1] + kw = parse.quote(city_name.strip()) + currentTime = str(round(time.time() * 1000)) + time.sleep(2) + url="https://train.qunar.com/dict/open/s2s.do?callback=jQuery172018610690190401646_1669725079415&dptStation="+kw+"&arrStation=%E6%A1%82%E6%9E%97&date="+str(datetime.date.today())+"&type=normal&user=neibu&source=site&start=1&num=500&sort=3&_="+currentTime + yield scrapy.Request( + url=url, + callback=self.parseItem + ) + + def parseItem(self, response): + item=TongchenTrainItem() + data = response.text + index = data.index("{") # 获取第一个大括号所在的索引位置 + result = json.loads(response.text[index:-2]) + if result['ret']: + info = result['data'] + print(info) + item['site'] = "去哪儿旅行" # 爬取站点 + item['place_from'] = info['dptStation'] # 出发城市 + item['place_to'] = info['arrStation'] # 目的城市 + item['date'] = str(self.today) # 出发日期 + item['id'] = str(uuid.uuid1().hex) + + item['total_count'] = str(len(info['s2sBeanList'])) # 车次数量 + train_list=info['s2sBeanList'] + if len(info['s2sBeanList'])>0: + for train in train_list: # 循环车次 + item['from_station'] = train['dptStationName'] # 开始车站 + item['to_station'] = train['arrStationName'] # 到达车站 + item['from_time'] = train['dptTime'] # 启程时间 + item['to_time'] = train['arrTime'] # 到站时间 + item['status'] = train['note'] # 状态 + ticketState = train['seats'] + for seatType, content in ticketState.items(): # 循环一个车次中的座位情况 + print(seatType,content) + item['seat_name'] = content['seatName'] # 座位等级 + item['seat_price'] = str(content['price']) # 座位价格 + item['seats_left'] = str(content['count']) # 座位剩余数目 + item['type'] = "火车" + print(item) + yield item \ No newline at end of file diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/main.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/main.py new file mode 100644 index 0000000..54c4779 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/main.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> 运行爬虫文件 +@IDE :PyCharm +@Author :sandmswift +@Date :2022-12-11 14:20 +@Desc +==================================================''' +from scrapy.cmdline import execute +from wangModel.common_spiders.baidusearch import BaiduSpider +from wangModel.common_spiders.baiduacc import baiduacc +from wangModel.common_spiders.baiduwords import BaiDuWords +from wangModel.common_spiders.weibosign import WeiboSignSpider +from wangModel.common_spiders.tuniu_route import temp + +from scrapy.crawler import CrawlerProcess +from scrapy.utils.project import get_project_settings +import os +import sys + +settings = get_project_settings() +crawler = CrawlerProcess(settings) + +#scrapy爬虫 +crawler.crawl('tongchen') +crawler.crawl('tuniu_scenic') +crawler.crawl('tuniu_hotel') +crawler.crawl('weibo') + +crawler.start() +crawler.start() +crawler.start() +crawler.start() + + +# 爬取普通爬虫 +""" 1.百度指数""" +object=baiduacc() +object.parse1() + +""" 2.百度搜索""" +run = BaiduSpider() +run.parse() + +"""3.百度词条""" +baiduWord = BaiDuWords() +baiduWord.run() + +"""4.微博签到""" +web = WeiboSignSpider() +web.run() diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/tongchen.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/tongchen.py new file mode 100644 index 0000000..3ba64b6 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/tongchen.py @@ -0,0 +1,137 @@ +import time +import uuid + +import scrapy +from urllib import parse +from wangModel.items import TongchenTrainItem +import json +import datetime +from scrapy.http import JsonRequest + +from wangModel.utils.HbaseConn import HbaseUtil +""" +同城旅行爬取今日火车票 +""" +class TongchenSpider(scrapy.Spider): + name = 'tongchen' + allowed_domains = ['ly.com'] + start_urls = ['http://ly.com/'] + today = str(datetime.date.today()) + + + + def start_requests(self): + with open("../files/city_cap.txt",encoding="utf-8") as f: + car_url = "https://bus.ly.com/busresapi/schedule/getScheduleList?plateId=3" + for cityInfo in f: + item = TongchenTrainItem() + city_id=cityInfo.split(",")[0] + city_name=cityInfo.split(",")[1] + kw = parse.quote(city_name.strip()) + currentTime=str(round(time.time()*1000)) + print("-------------正在爬取的城市是 %s-----------------"%kw) + train_url = "https://www.ly.com/uniontrain/trainapi/TrainPCCommon/SearchTrainRemainderTickets?callback=jQuery18305629279457315504_1668857363483¶={%22To%22:%22%E6%A1%82%E6%9E%97%22,%22From%22:%22" + kw + "%22,%22TrainDate%22:%22" + self.today + "%22,%22PassType%22:%22%22,%22TrainClass%22:%22%22,%22FromTimeSlot%22:%22%22,%22ToTimeSlot%22:%22%22,%22FromStation%22:%22%22,%22ToStation%22:%22%22,%22SortBy%22:%22fromTime%22,%22callback%22:%22%22,%22tag%22:%22%22,%22memberId%22:%22%22,%22constId%22:%22TzXdqT-dUJYltDmsdvGtjh4huQTPXw1489UB3g7-exI%22,%22headct%22:%220%22,%22platId%22:1,%22headver%22:%221.0.0%22,%22headtime%22:1668590089068}&_="+currentTime + + + """ + 1.爬取火车票剩余数量情况 + """ + yield scrapy.Request( + url=train_url, + callback=self.parse_item, + dont_filter=False, + meta={"item": item} + ) + + json_request={ + "departure": city_name.strip(), + "destination": "桂林", + "departureDate": self.today, + "depId": city_id, #城市id + "desId": 1101, + "page": 1, + "pageSize": 25, + "orderTime": 0, + "orderPrice": 0, + "dptTimeSpan": "", + "departureStation": "", + "arrivalStation": "", + "hasCategory": True + } + """ + 2.爬取汽车票剩余票数情况 + """ + yield JsonRequest( + url=car_url, + callback=self.parse_car, + data=json_request, + dont_filter=False, + meta={"item":item,"departure":city_name.strip()} + ) + + """ + 解析火车票 + """ + def parse_item(self, response): + item=response.meta.get('item') + data=response.text + + index=data.index("{") #获取第一个大括号所在的索引位置 + result=json.loads(response.text[index:-1]) + info=result['data'] + print("网页响应火车票数据",info) + if info is not None: + flag = result['data']['trains'] + if len(flag)>0: + item['site']="同城旅行" #爬取站点 + item['place_from']=info['from'] #出发城市 + item['place_to']=info['to'] #目的城市 + item['date']= str(self.today) #出发日期 + item['total_count']=str(info['totalCount']) #车次数量 + train_list=result['data']['trains'] + list=[] + for train in train_list: #循环车次 + item['from_station']=train['fromCity'] #开始车站 + item['to_station']=train['toCity'] #到达车站 + item['from_time']=train['fromTime'] #启程时间 + item['to_time']=train['toTime'] #到站时间 + ticketState= train['ticketState'] + + for seatType,content in ticketState.items(): #循环一个车次中的座位情况 + item['id'] = str(uuid.uuid1().hex) + item['seat_name']=content['cn'] #座位等级 + item['seat_price']=content['price'] #座位价格 + item['seats_left']=content['seats'] #座位剩余数目 + item['type']="火车" + item['status']=content['state'] #状态 1表示有票,表示无票 + yield item + # print("火车票",item) + + "解析汽车票" + def parse_car(self, response): + item = response.meta.get('item') + departure=response.meta.get('departure') + responseData=response.json() + status=responseData['header']['isSuccess'] + # print("汽车票网页响应数据",responseData) + if status==True: + print("汽车票网页响应数据", responseData) + cars_list=responseData['body']['schedule'] + item['site'] = "同城旅行" # 爬取站点 + item['place_from'] = departure # 出发地 + item['place_to'] = "桂林" # 目的地 + item['date'] = str(self.today) # 出发日期 + item['type']="客车" + item['total_count'] = str(len(cars_list)) # 车次数量 + for car in cars_list: + item['id'] = str(uuid.uuid1().hex) + item['seat_name'] = "" # 座位等级ticketPrice + item['seat_price'] = str(car['ticketPrice']) # 座位价格 + item['from_station']=car['dptStation'] + item['to_station']=car['arrStation'] + item['from_time']=car['dptTime'] + item['to_time']="" + item['seats_left']=car['ticketLeft'] + item['status']=car['bookingDesc'] #售票状态 + yield item + # print("汽车票",item) \ No newline at end of file diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_hotel.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_hotel.py new file mode 100644 index 0000000..2eac80f --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_hotel.py @@ -0,0 +1,187 @@ +import time +import scrapy +import json +from scrapy.http import JsonRequest +from wangModel.items import TuniuhotelItem +from math import ceil +import datetime +from wangModel.utils.mysqlConn import getRows,insert + +class TuniuHotelSpider(scrapy.Spider): + name = 'tuniu_hotel' + allowed_domains = ['tuniu.com'] + start_urls = ['https://s.tuniu.com/search_complex/hotel-nn-0-%E6%A1%82%E6%9E%97/'] + page = 1 + count = 0 + pageNum=0 + today = datetime.date.today() + tomorrow = today + datetime.timedelta(days=1) + + data = {"primary": {"cityCode": "705", + "cityType": 0, + "checkIn": str(today), + "checkOut": str(tomorrow), + "roomNum": 1, + "adultNum": 2, + "childNum": 0, + "childAges": [], + "keyword": ""}, + "secondary": { + "poi": { + "locationType": 2, + "pois": []}, + "prices": [], + "stars": [], + "brands": [], + "features": [], + "facilities": [], + "commentScore": "", + "bedTypes": []}, + "threeStages": [], + "suggest": {}, + "pageNo": 1, + "pageSize": 10, + "sort": 0, + "customerClient": 2, + "returnDistance": True, + "secondaryDist": {"pValue": "", "userType": 0}} + def start_requests(self): + + url = "https://hotel.tuniu.com/hotel-api/hotel/list?c=%7B%22ct%22%3A20000%7D" + # url = ' https://hotel.tuniu.com/hotel-api/hotel/detail?c={"ct":20000}&d={"hotelId":"351748651"}' + + yield JsonRequest( + url=url, + callback=self.parse, + data=self.data, + ) + + # 酒店列表数据 + def parse(self, response): + print(f"爬取第{self.page}页") + data = response.json() + # print(data) + item = TuniuhotelItem() + self.count = data['data']['count'] + hotellist = data['data']['hotels'] + for i in range(len(hotellist)): + hotel = hotellist[i]['hotel'] + refer = hotellist[i]['reference'] + item['id'] =hotel['hotelId'] + item['hname'] = hotel['chineseName'] + item['starname'] = hotel['starName'] + item['hpic'] = hotel['firstPic'] + item['haddress'] = hotel['address'] + item['business'] = hotel['business'] + item['distance'] = refer['distanceText'] + item['hlowstprice'] = hotellist[i]['lowestPrice'] + comment = hotellist[i]['comment'] + + + ##爬取有评论的酒店内容信息 + if 'score' in comment: + # print("有评论内容",comment) + hotel_name = str(item['hname']).replace("(", "(").replace(")", ")") + # print("原始酒店名称", hotel_name) + sql = "select id,name from hotels where name = %s" + dataRows=getRows(sql,hotel_name) + print("数据库的数据查询结果",dataRows) + print("酒店id",item['id']) + if dataRows: + id = getRows(sql, hotel_name)[0][0] + baseName = getRows(sql, hotel_name)[0][1] + print("开始爬取酒店:",baseName) + yield scrapy.Request( + url=f"https://hotel.tuniu.com/hotel-api/comment/summary?c=%7B%22ct%22:20000%7D&d=%7B%22hotelId%22:%22{item['id']}%22%7D", + callback=self.parse_summary_comments, + dont_filter=False, + meta={"hotelId":id,"hotelName":baseName,"item":item} + ) + + # print(item) + self.pageNum = ceil(self.count / 10) + print("总页数", self.pageNum) + + #酒店列表翻页 + if (self.pageNum > 1): + self.page=self.page+1 + print(f"开始爬取第{self.page}页") + self.data['pageNo']= self.page + print(self.page) + # if(self.page<=self.pageNum): + if(self.page<=self.pageNum): + yield JsonRequest( + url="https://hotel.tuniu.com/hotel-api/hotel/list?c=%7B%22ct%22%3A20000%7D", + callback=self.parse, + data=self.data, + ) + time.sleep(2) + + #评论取出其他类型评论 + def parse_summary_comments(self, response): + item=response.meta.get('item') + # print(item) + id=response.meta.get('hotelId') + hotelName=response.meta.get('hotelName') + summary=response.json() + otherComment=summary['data']['aspects'] + commentSum=summary['data']['commentCount'] + print("爬取分类的评论",otherComment) + item['others']=otherComment + item['hcomments']=[] + requestbody = { + "hotelId": str(item['id']), + "grade": "ALL", + "pageNo": 1, + "pageSize": 8 + } + pages=ceil(commentSum/8) + for i in range(1,pages+1): + requestbody['pageNo']=i+1 + time.sleep(3) + yield JsonRequest( + url="https://hotel.tuniu.com/hotel-api/comment/list?c=%7B%22ct%22%3A20000%7D", + callback=self.parse_comments, + dont_filter=False, + data=requestbody, + meta={"hotelId": id, "hotelName": hotelName, "item": item,"body":requestbody,"pages":pages,"currentPage":i} + ) + # yield item + # 酒店评论内容详情解析 + def parse_comments(self, response): + # print("进入解析",response) + item = response.meta.get('item') + pages = response.meta.get('pages') + currentPage = response.meta.get('currentPage') + print(f"解析{item['hname']}第{currentPage}页,共{pages}页") + data=response.json() + print("json数据",data) + id = response.meta.get('hotelId') + others=item['others'] + + otherslist=[] + # print("其他评论详情",others) + for contentInfo in others: + # print(type(contentInfo)) + # print(contentInfo) + categroy = {} + categroy["cnName"]=contentInfo['cnName'] + categroy["enName"]=contentInfo['enName'] + categroy["score"]=str(contentInfo['aspectScore']) + otherslist.append(categroy) + # print("其他评论", otherslist) + hotelName = response.meta.get('hotelName') + comment_sum=data['data']['groupCount']['ALL'] #总评论数 + good=data['data']['groupCount']['GOOD'] + middle=data['data']['groupCount']['COMMON'] + bad=data['data']['groupCount']['BAD'] + sql="insert into hotel_comment(hotelId,hotelName,num,good,middle,bad,othersComment,siteFrom,crawlTime) select %s,%s,%s,%s,%s,%s,%s,%s,%s from dual where not exists (select hotelName,siteFrom,crawlTime from hotel_comment where hotelName= %s and siteFrom='途牛' and crawlTime=%s);" + insert(sql,(id,hotelName,comment_sum,good,middle,bad,str(otherslist),"途牛",datetime.date.today(),hotelName,datetime.date.today())) + if 'comments' in data['data']: + print(data['data']['comments']) + item['hcomments']=item['hcomments']+data['data']['comments'] #本也评论列表内容 + if currentPage==pages: + print( item['hcomments']) + yield item + + diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_scenic.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_scenic.py new file mode 100644 index 0000000..b466026 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_scenic.py @@ -0,0 +1,97 @@ +# -*- coding:utf-8 -*- +import json +import time + +import requests +import scrapy +from wangModel.items import WangmodelItem +import re +from selenium import webdriver +from scrapy.http import HtmlResponse +from selenium.webdriver.common.action_chains import ActionChains +from math import ceil +from wangModel.utils.mysqlConn import query +import datetime +import re + +# 2.继承RedisSprider +class ItcastSpider(scrapy.Spider): + name = 'tuniu_scenic' + # redis_key = 'tuniu:start_urls' #监听此key + allowed_domains = ['tuniu.com'] + + start_urls = ['https://www.tuniu.com/menpiao/787427#/index'] + # start_urls = ["https://s.tuniu.com/search_complex/ticket-nn-0-%E6%A1%82%E6%9E%97/"] + # start_urls = ["https://www.tuniu.com/resp-detail/api/menpiao/getMenpiaoComment?currentPage=3&specId=1167&stamp=078776045436755181667991933212"] + flag=1 + + def start_requests(self): + url_list= query("select id,name,tn_url from scenics where tn_url !='' ",None) + # print(self.start_urls) + # yield scrapy.Request( + # url=url_list[0]['url'], + # callback=self.parse, + # meta={"scenic": url_list[0]} + # ) + for redatas in url_list: + time.sleep(2) + yield scrapy.Request( + url=redatas['url'], + dont_filter=False, + callback=self.parse, + meta={"scenic":redatas} + ) + """ + 爬取列表页面 + """ + def parse(self, response): # 处理详情页,处理json数据 + time.sleep(3) + # print("---------------进入详情页面爬取-----------------") + item = WangmodelItem() + data=response.json() + scenic=response.meta.get('scenic') + item['id']=scenic['id'] + item['name']=scenic['name'] + item['satisfy_present']=data['data']['summary']['satisfaction'] + item['remarkAmount']=data['data']['summary']['remarkAmount'] + item['compGrade3Amount']=data['data']['summary']['compGrade3Amount'] + item['compGrade2Amount']=data['data']['summary']['compGrade2Amount'] + item['compGrade1Amount']=data['data']['summary']['compGrade1Amount'] + commentlist=data['data']['remarkList'] + item['commentlist'] = [] + # print(item) + # yield item + + """ + 评论详情 + """ + if commentlist is not None: + item['commentlist']=commentlist + comment_page=ceil(item['remarkAmount']/10) #评论页数 + if(comment_page)>1: + flag = 1 + while flag <= comment_page: + currentPage=flag+1 + flag+=1 + productId=re.search("specId=.*&",scenic['url']).group().replace("specId=","").replace("&","") + detail_page_url = f"https://www.tuniu.com/resp-detail/api/menpiao/getMenpiaoComment?currentPage={currentPage}&specId={productId}&stamp=078776045436755181667991933212" + yield scrapy.Request( + url=detail_page_url, + callback=self.parse_detail_nextPage, + meta={"item": item, "url": detail_page_url,"currentPage":currentPage,"comment_page":comment_page}, + dont_filter=True + ) + + + def parse_detail_nextPage(self, response): # 处理详情翻页评论页,处理json数据 + time.sleep(2) + item = response.meta.get('item') + currentPage=response.meta.get('currentPage') + comment_page=response.meta.get('comment_page') + data = response.json() + # print("翻页评论详情",data) + commentlist = data['data']['remarkList'] + if commentlist is not None: + item['commentlist'] =item['commentlist']+commentlist + if currentPage==comment_page: + yield item \ No newline at end of file diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/weibo.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/weibo.py new file mode 100644 index 0000000..42651f8 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/weibo.py @@ -0,0 +1,87 @@ +import time + +import scrapy +from wangModel.items import WeiboItem +from math import ceil +import re +import uuid + +class WeiboSpider(scrapy.Spider): + name = 'weibo' + allowed_domains = ['weibo.com'] + start_urls = ['http://weibo.com/'] + articles_url = "https://m.weibo.cn/api/container/getIndex?uid=1989772524&luicode=10000011&lfid=100103type=3&q=桂林旅游&t=&type=uid&value=1989772524&containerid=1076031989772524" + + total_pages=0 + current_page=1 + #https://weibo.com/ajax/profile/info?screen_name=桂林市文化广电和旅游局 + #微博列表:https://weibo.com/ajax/statuses/mymblog?uid=1989772524&page=1&feature=0 + def start_requests(self): + yield scrapy.Request( + url="https://m.weibo.cn/api/container/getIndex?uid=1989772524&luicode=10000011&lfid=100103type%3D3%26q%3D%E6%A1%82%E6%9E%97%E6%97%85%E6%B8%B8%26t%3D&type=uid&value=1989772524&containerid=1005051989772524", + callback=self.parse, + ) + #解析,获取用户的粉丝数等信息 + def parse(self, response): + data=response.json() + item=WeiboItem() + item['userid']=data['data']['userInfo']['id'] + item['screen_name']=data['data']['userInfo']['screen_name'] + item['fins']=data['data']['userInfo']['followers_count_str'] + item['artilelist']=[] + + yield scrapy.Request( + url=self.articles_url, + callback=self.parse_articles, + dont_filter=True, + meta={"item":item} + ) + + + # 翻页解析每一页的博客 + def parse_articles(self, response): + item=response.meta.get('item') + item['id'] = str(uuid.uuid1().hex) + artilePage=response.json() + if 'cardlistInfo' in artilePage['data']: + nextPage_id=artilePage['data']['cardlistInfo']['since_id'] #下一页的搜索参数 + item['total_artiles']=artilePage['data']['cardlistInfo']['total'] + self.total_pages=ceil(artilePage['data']['cardlistInfo']['total']/12) + print(f"一共有{self.total_pages}页") + content_list=artilePage['data']['cards'] #博客列表 + card=[] + item['artilelist']=[] + for i in range(len(content_list)): + card_type=content_list[i]['card_type'] + if card_type==9: + print(content_list[i]['mblog']['id']) + artile_id=content_list[i]['mblog']['id'] #博客id + reposts_count=content_list[i]['mblog']['reposts_count'] #转发数 + comments_count=content_list[i]['mblog']['comments_count'] #评论数 + attitudes_count=content_list[i]['mblog']['attitudes_count'] #点赞数 + content_text = re.sub("[A-Za-z0-9\!\%\[\]\,\。\<\-\=\"\:\/\.\?\&\_\>\'\;\ ]", "", + content_list[i]['mblog']['text']) + postDate = content_list[i]['mblog']['created_at'] + card.append({ + "artile_id":str(artile_id), + "reposts_count":str(reposts_count), + "comments_count":str(comments_count), + "attitudes_count":str(attitudes_count), + "text":str(content_text), + 'postDate':postDate + }) + # item['artilelist']= item['artilelist']+card + item['artilelist']= card + print(f"爬取第{self.current_page}页") + while self.current_page <= self.total_pages: + time.sleep(2) + yield scrapy.Request( + url=self.articles_url + "&since_id=" + str(nextPage_id), + callback=self.parse_articles, + dont_filter=True, + meta={"item": item} + ) + print(f"-----------------第{self.current_page}页爬取完毕----------------") + self.current_page = self.current_page + 1 + yield item + diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/weixin.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/weixin.py new file mode 100644 index 0000000..012044f --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/weixin.py @@ -0,0 +1,46 @@ +import scrapy + +""" +获取内荣没有点赞量收藏量 +""" +class WeixinSpider(scrapy.Spider): + name = 'weixin' + allowed_domains = ['weixin.qq.com'] + start_urls = ['http://weixin.qq.com/'] + + def parse(self, response): + url="https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin=0&count=5&fakeid=MjM5MTU4MDA3NA==&type=9&query=&token=1865697574&lang=zh_CN&f=json&ajax=1" + cookie={"ua_id":"M0pQpE2KNnw1HvOXAAAAAE1fDecySy9uYPcTbbxXQRU=", + "wxuin":"46810516647178", + "mm_lang":"zh_CN", + "RK":"79EdPle1Va", + "ptcz":"f456c97e2c8f1090c61d121feb1eeef1419024051b0ed67a796600b76d0188ce", + "tvfe_boss_uuid":"6b9a9980f35eae48", + "pgv_pvid":"5260654758", "o_cookie":"1732095688", + "sd_userid":"30241653648365907", + "sd_cookie_crttime":"1653648365907", + "pgv_pvi":"3187809280", + "_hjSessionUser_3021617":"eyJpZCI6IjlhOTNkZWFiLTMzMDgtNTE5Yi05NWFlLTY4NGRlNGRjM2RhNSIsImNyZWF0ZWQiOjE2NTgwMjcyNTc1MTIsImV4aXN0aW5nIjpmYWxzZX0=", + "fqm_pvqid":"6afd8062-36ba-409d-b8b7-5b81ed4b79a6", + "eas_sid":"t1s6C6I0l577k3v243y953C9j9", + "Qs_lvt_323937":"1660573260", + "Qs_pv_323937":"715607012924411500", + "pgv_info":"ssid=s8054505600", + "uuid":"290f168055a3887964b014be8c572aeb", + "rand_info":"CAESIMvL6//JBy3GkYFANvsjpopfu+U1CadTWcrGvE5/iUkg", + "slave_bizuin":"3865832081", "data_bizuin":"3865832081", "bizuin":"3865832081", + "data_ticket":"3PNmNqEn/TJReP5OnXXQDWRy8NSPvxdRXgAP1zpmBJEEXd373AHCceq4yOquFumT", + "slave_sid":"RHZjMFdod1pQdzdHVjEzTUgyZkREeUZYVDR0YUFZUlpreXJYTWZLUDB3TTJIUklGdkhlX213UVVQeFg2cVdtX1FSRDhkcWVTcE5tRm1BZm52R2E2RkpaQkRrbzdoWFpCRWtvVmtZajYydmNMajdwTmpFOUhHWjRYbHlGcGppQ2tBYTc5cmNSVm02RFE1VTNk", + "slave_user":"gh_0a3b16a337ce", + "xid":"a1ceb4b8eea06c75fdfd31d65f3767f5", + "_clck":"3865832081|1|f6k|0"} + yield scrapy.Request( + url=url, + callback=self.parse_item, + cookies=cookie + ) + + def parse_item(self, response): + print(response.json()) + data=response.json() + artilelist=data['app_msg_list'] \ No newline at end of file diff --git a/applications/common/scrapySpiders/wangModel/wangModel/test.csv b/applications/common/scrapySpiders/wangModel/wangModel/test.csv new file mode 100644 index 0000000..4135feb --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/test.csv @@ -0,0 +1,271 @@ +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +虞山公园 ,220,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,99,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,83,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,87,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,86,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,39,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,96,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,83,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,99,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,82,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,79,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,75,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,98,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,74,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +桂林相公山 ,80,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,94,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,86,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,76,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,95,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,81,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,69,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,70,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,83,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,87,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,99,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,86,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,83,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,39,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,96,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,99,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,82,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,79,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,75,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,98,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,74,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +桂林相公山 ,80,76,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,94,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,95,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,81,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,69,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,70,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,86,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +桂林相公山 ,80,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西桂林阳朔兴坪镇境内的漓江西岸。, +虞山公园 ,220,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,86,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,83,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,87,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,74,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,98,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,79,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,75,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,39,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,96,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,83,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,99,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,82,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,99,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。 +靖江王陵 ,28,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:桂林市尧山路,开放时间:09:00-17:00 +山水园 ,11,76,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,94,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,86,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,69,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 +山水园 ,11,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地  址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30 diff --git a/applications/common/scrapySpiders/wangModel/wangModel/tuniu.csv b/applications/common/scrapySpiders/wangModel/wangModel/tuniu.csv new file mode 100644 index 0000000..e69de29 diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/HbaseConn.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/HbaseConn.py new file mode 100644 index 0000000..44b63d9 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/HbaseConn.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> connTest +@IDE :PyCharm +@Author :sandmswift +@Date :2022-11-11 11:31 +@Desc +==================================================''' +import happybase +# con=happybase.Connection(host=’localhost’, port=9090, timeout=None, autoconnect=True, table_prefix=None, table_prefix_separator=b’_’, compat=’0.98’, transport=’buffered’, protocol=’binary’) +# 不配置参数的话直接简单连接 thrift的默认端口就是9090 + +class HbaseUtil: + def __init__(self,con): + self.con=con + self.con = happybase.Connection(con) + self.con.open() + """ + 插入数据 + 参数:表名,行键,数据:键值对 + 数据实例:左边是列族:列名,右边是插入的数据 + data= { "info:name": "lisa", + "info:address":"Beijing" + } + """ + def putTable(self,tablename,rowkey,data): + table=self.con.table(tablename) + table.put(rowkey,data) + # self.con.close() + + def batchTable(self,tablename,rowkey,data): + table=self.con.table(tablename) + bat=table.batch() + bat.put(rowkey,data) + bat.send() + # self.con.close() + + """ + 获取所有表名 + """ + def getTables(self): + print(self.con.tables()) + + def closeCon(self): + self.con.close() + + +# # +# obj=HbaseUtil('202.193.53.106') #连接 +# obj.getTables() #查看表 +# """ + + diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/citydeal.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/citydeal.py new file mode 100644 index 0000000..5ad314a --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/citydeal.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> citydeal +@IDE :PyCharm +@Author :sandmswift +@Date :2022-11-30 20:08 +@Desc +==================================================''' +import re +with open('../files/city.txt',encoding="utf-8") as f: + for cityInfo in f: + city_id = cityInfo.split(",")[0] + city_name = cityInfo.split(",")[1] + kw = city_name.strip() + if len(re.findall("([.*镇]|[.*县]|[.*村]|[.*区]|[.*乡])",kw))==0: + print(kw) + with open("../files/city_cap.txt","a+",encoding='utf-8') as city: + city.write(city_id+","+kw+"\n") + +# data="灌阳县" +# data1="灌阳镇" +# data2="灌阳村" +# +# print(len(re.findall("([.*镇]|[.*县]|[.*村])",data))>0) +# print(re.findall("([.*镇]|[.*县]|[.*村])",data1)) +# print(re.findall("([.*镇]|[.*县]|[.*村])",data2)) diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/createTables.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/createTables.py new file mode 100644 index 0000000..37b5c38 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/createTables.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> test +@IDE :PyCharm +@Author :sandmswift +@Date :2022-11-17 16:21 +@Desc +==================================================''' +import happybase + +con = happybase.Connection("202.193.53.106") +con.open() + +con.create_table("tuniu_scenic",{ + 'info':dict(), + 'comments':dict(), +}) +con.create_table("scenic_hotel",{ + 'info':dict() +}) +con.create_table("weibo",{ + 'info':dict() +}) +con.create_table("tongchen",{ + 'info':dict() +}) +con.create_table("bauduacc",{ + 'info':dict(), + 'all':dict(), + 'wise':dict() +}) +con.create_table("baiduwords",{ + 'info':dict() + +}) +con.create_table("baudusearch", { + 'info': dict() + +}) + +con.close() \ No newline at end of file diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/hostory_weather.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/hostory_weather.py new file mode 100644 index 0000000..5f892e1 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/hostory_weather.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> hostory_weather +@IDE :PyCharm +@Author :sandmswift +@Date :2022-12-13 12:24 +@Desc +==================================================''' +# -*- coding: utf-8 -*- +""" +Created on Mon Apr 13 11:48:58 2020 + +@author: ZAN +""" + +import requests +import pandas as pd +from bs4 import BeautifulSoup +from collections import defaultdict +from dateutil.relativedelta import relativedelta +from datetime import datetime +import numpy as np + + +class weather_data: + def __init__(self, city, start_year, end_year, start_month=1, end_month=12): + """ + + :param city: 需爬取的城市全拼 + :param start_year: 爬取开始年份 + :param end_year: 爬取结束年份 + :param start_month: 爬取开始月份 + :param end_month: 爬取结束月份 + """ + self.city = city + self.start_time = datetime.strptime(f"{start_year}-{start_month}", '%Y-%m') + self.end_time = datetime.strptime(f"{end_year}-{end_month}", '%Y-%m') + + def _get_original_html(self): + """ + 网页爬取 + """ + + url = f"https://tianqi.911cha.com/{self.city}/{self.start_time.year}-{self.start_time.month}.html" + print(url) + header = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"} # 填写自己浏览器内容 + response = requests.get(url, headers=header) + return response.content.decode("utf-8") + + def _parse_data(self): + # 一次解析一个月 + soup = BeautifulSoup(self.html, "html.parser") + data = defaultdict(dict) + for n, tr in enumerate(soup.find_all("tr")): + if n == 0: + continue + + if n % 2 != 0: + date = tr.find("a").get_text() + # 创建日期字典 + # [时间,图片,天气,温度,湿度,风力,风级,降水量,体感温度,云量] + data[date]["Day"] = {str(self.start_time.year) + '-' + key: con.get_text() for key, con in + zip(['time', 'image', 'weather', 'temperature', 'humidity', 'wind_force', + 'wind_scale', + 'precipitation', 'sendible_temperature', 'cloud_amount'], tr.find_all("td"))} + + else: + data[date]["Night"] = {key: con.get_text() for key, con in zip( + ['time', 'image', 'weather', 'temperature', 'humidity', 'wind_force', 'wind_scale', + 'precipitation', 'sendible_temperature', 'cloud_amount'], tr.find_all("td"))} + return data + + def main(self): + + data = [] + while self.start_time <= self.end_time: + self.html = self._get_original_html() + data.append(self._parse_data()) + self.start_time += relativedelta(months=1) + + return data + + +result = [] +if __name__ == "__main__": + T = weather_data(city="guilin", start_year=2018, end_year=2019, start_month=1, end_month=12) + with open('weather_dict.txt', 'w', encoding='UTF-8') as f: + for line in T.main(): + result.append(line) + f.writelines(str(line)) +key_list = [] +key2_list = [] +val_list = [] +val3_list = [] +val5_list = [] +for data in result: + key_value = list(data.keys()) + key_list.append(key_value) + val_value = list(data.values()) + val_list.append(val_value) + +for i in key_list: + key2_list = key2_list + i; + +# 下面全是对val值进行操作 +for val2 in val_list: + for val3 in val2: + val3_value = list(val3.values()) + val3_list.append(val3_value) + +for nu in range(len(val3_list)): + for val4 in val3_list[nu]: + val5 = list(val4.values()) + val6 = ['0' if i == '-' else i for i in val5] # 把降雨的-改成0,工作需要 + val5_list.append(val6) + +data_key = pd.DataFrame(key2_list) # 日期 +data_val = pd.DataFrame(val5_list) # 气象信息,可以根据自己需要对这个变量进行修改 + +# 去除符号 +temp = data_val[3].str.strip('℃') +humd = data_val[4].str.strip('%') +rain = data_val[7].str.strip('mm') + +weather = pd.DataFrame([temp, humd, rain]).T + +# 保留奇数行,删除偶数行 +day = weather[weather.index % 2 == 0].reset_index(drop=True) # 白天数据 +# 保留偶数行,删除奇数行 +night = weather[weather.index % 2 == 1].reset_index(drop=True) # 晚上数据 + +fin = pd.concat([data_key, night, day], axis=1) +fin.to_csv('恩施气象.csv', encoding="utf_8_sig") diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/mysqlConn.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/mysqlConn.py new file mode 100644 index 0000000..ad7a6a6 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/mysqlConn.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> mysqlConn +@IDE :PyCharm +@Author :sandmswift +@Date :2022-11-21 10:54 +@Desc +==================================================''' +import pymysql + +def get_conn(): + conn = pymysql.connect(host='202.193.53.151', port=3306, user='root', passwd='root', db='travel') + return conn + +""" +查询数据库 +""" +def getRows(sql,args): + conn = get_conn() + cur = conn.cursor() + cur.execute(sql, args) + results = cur.fetchall() + return results + +def query(sql,args): + conn = get_conn() + cur = conn.cursor() + cur.execute(sql,args) + results = cur.fetchall() + # print(type(results)) # 返回 tuple元组类型 + list=[] + for row in results: + id=row[0] + name=row[1] + url=row[2] + list.append({ + "id":id, + "name":name, + "url":url + }) + print(list) + conn.commit() + cur.close() + conn.close() + return list + +""" +插入数据库 +""" +def insert(sql, args): + conn = get_conn() + cur = conn.cursor() + result = cur.execute(sql, args) + # print(result) + conn.commit() + cur.close() + conn.close() + +"""更新""" +def update(sql,args): + conn = get_conn() + cur = conn.cursor() + result = cur.execute(sql,args) + print(result) + conn.commit() + cur.close() + conn.close() + + +# if __name__ == '__main__': +# sql="select id,name,tn_url from scenics where tn_url !='' " +# +# query(sql,None) + # sql = 'INSERT INTO scenic_comment(scenicId,scenicName,satisfy_present,num,good,middle,bad) VALUES(%s,%s,%s,%s,%s,%s,%s);' + # insert(sql, (2, 'wang', 13)) diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/proxys.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/proxys.py new file mode 100644 index 0000000..e4d7121 --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/proxys.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> proxys +@IDE :PyCharm +@Author :sandmswift +@Date :2022-11-15 18:17 +@Desc +==================================================''' +# 设置代理列表 +PROXY = { + "http": "http://1.83.249.30:4329", + "http": "http://182.45.41.139:4314", + "http": "http://175.146.208.97:4356", + "http": "http://49.85.179.62:4331", + "http": "http://110.86.177.255:4385", + "http": "http://218.72.80.179:4343" + +} + +asyncProxy=[ +"http://175.6.60.172:6666", +"http://61.171.99.128:6666", + +] + +ips=[ +"106.110.86.175:4331", +"49.82.49.157:4315", +"117.32.77.213:4315", +"111.76.67.53:4315", +"115.234.245.144:4375", +"218.85.249.120:4331", +"113.138.147.101:4314", +"140.224.61.27:4324", +"27.158.34.243:4335", +"49.85.49.161:4313", +"49.85.188.172:4331", +"113.138.144.253:4328", +"183.92.199.101:4324", +"27.29.150.18:4367", +"182.128.44.94:4331", +"123.115.202.150:4325", +"1.83.249.21:4326", +"106.110.86.229:4331", +"59.59.158.244:4331", +"117.34.231.111:4315", +"124.72.100.151:4352", +"117.32.78.202:4315", +"117.26.231.36:4345", +"117.26.131.113:4324", +"27.156.194.18:4368", +"183.165.247.91:4345", +"115.204.59.33:4343", +"42.7.4.243:4331", +"125.105.110.229:4345", +"27.190.72.105:4341", +"114.237.193.227:4348", +"125.79.192.81:4313", +"220.189.78.156:4314", +"124.116.116.162:4328", +"42.7.30.64:4313", +"27.156.196.94:4332", +"59.59.215.82:4313", +"175.146.210.246:4356", +"60.169.115.208:4323", +"14.157.103.161:4313", +"114.216.46.137:4357", +"120.42.191.38:4313", +"42.57.148.171:4356", +"114.106.170.131:4345", +"114.106.170.146:4354", +"114.103.88.180:4345", +"115.208.46.183:4331", +"117.34.230.147:4315", +"114.106.156.218:4354", +"122.246.91.178:4305", +"115.229.247.100:4331", +"114.99.2.196:4378", +"175.146.68.134:4385", +"49.87.250.61:4315", +"183.143.135.162:4326" +] diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/weather_deal.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/weather_deal.py new file mode 100644 index 0000000..b50451f --- /dev/null +++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/weather_deal.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :爬虫 -> weather_deal +@IDE :PyCharm +@Author :sandmswift +@Date :2022-12-13 12:13 +@Desc +==================================================''' +from mysqlConn import getRows,update + +select_westher="select id,max_tem,min_tem," diff --git a/applications/common/tasks/tasks.py b/applications/common/tasks/tasks.py index fca30e2..20a61a6 100644 --- a/applications/common/tasks/tasks.py +++ b/applications/common/tasks/tasks.py @@ -1,23 +1,48 @@ - +from applications.common.tasks.微博签到.weibosign import WeiBoSign from applications.common.tasks.景区评论标题.scenic_start import Scenic +from applications.common.tasks.百度.baidu_start import BaiduCrawl from applications.common.tasks.线路评论标题.route_start import Route from applications.common.tasks.酒店评论标题.hotel_title_start import Hotel from applications.common.tasks.景区攻略.guide_start import Guide +from scrapy.crawler import CrawlerProcess +from scrapy.utils.project import get_project_settings + +settings = get_project_settings() +crawler = CrawlerProcess(settings) + +task_list = ['景区评论标题', '线路评论标题', '景区攻略', '酒店评论标题'] -task_list = ['景区评论标题', '线路评论标题', '景区攻略','酒店评论标题'] def 景区评论标题(id, name): scenic_start = Scenic() scenic_start.run() + def 线路评论标题(id, name): scenic_start = Route() scenic_start.run() + def 景区攻略(id, name): scenic_start = Guide() scenic_start.run() + def 酒店评论标题(id, name): scenic_start = Hotel() scenic_start.run() + + +def 交通拥堵爬取(): + crawler.crawl('tongchen') # 只爬取同城火车票和汽车票 + crawler.start() + + +def 微博签到爬取(): + webosign = WeiBoSign() + webosign.run() + + +def 百度相关指数爬取(): + baidu_start = BaiduCrawl() + baidu_start.run() diff --git "a/applications/common/tasks/\345\276\256\345\215\232\347\255\276\345\210\260/weibosign.py" "b/applications/common/tasks/\345\276\256\345\215\232\347\255\276\345\210\260/weibosign.py" new file mode 100644 index 0000000..d889821 --- /dev/null +++ "b/applications/common/tasks/\345\276\256\345\215\232\347\255\276\345\210\260/weibosign.py" @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :pear-admin-flask -> weibosign +@IDE :PyCharm +@Author :sandmswift +@Date :2022-12-14 18:14 +@Desc +==================================================''' +from scrapy.crawler import CrawlerProcess +from scrapy.utils.project import get_project_settings + +from wangModel.common_spiders.weibosign import WeiboSignSpider + +settings = get_project_settings() +crawler = CrawlerProcess(settings) + + +class WeiBoSign: + + def run(self): + print("开始爬取微博签到") + web = WeiboSignSpider() + web.run() + print("爬取微博签到结束") diff --git "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py" "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py" index 82ce79c..d7d511b 100644 --- "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py" +++ "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py" @@ -8,16 +8,21 @@ from applications.common.tasks.景区评论标题.tongcheng_scenic_comment_title from applications.common.tasks.景区评论标题.xiecheng_scenic_comment_title import Xiecheng_Scenic import asyncio import time +from scrapy.crawler import CrawlerProcess +from scrapy.utils.project import get_project_settings +settings = get_project_settings() +crawler = CrawlerProcess(settings) mafengwo = Mafengwo_Scenic() qunaer = Qunaer_Scenic() tongcheng = Tongcheng_Scenic() xiecheng = Xiecheng_Scenic() + class Scenic: def run(self): print("开始爬取各个网站的评论标题!") - time_start=time.time() + time_start = time.time() asyncio.run(xiecheng.getScenic()) print("携程爬取结束") @@ -28,11 +33,9 @@ class Scenic: asyncio.run(mafengwo.getScenic()) print("马蜂窝爬取结束") - time_end=time.time() - print(' time cost ',time_end-time_start,'s') - - - - - + crawler.crawl('tuniu_scenic') + crawler.start() + print("途牛景区爬取结束") + time_end = time.time() + print(' time cost ', time_end - time_start, 's') diff --git "a/applications/common/tasks/\347\231\276\345\272\246/baidu_start.py" "b/applications/common/tasks/\347\231\276\345\272\246/baidu_start.py" new file mode 100644 index 0000000..9e27728 --- /dev/null +++ "b/applications/common/tasks/\347\231\276\345\272\246/baidu_start.py" @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- +'''================================================= +@Project -> File :pear-admin-flask -> baidu_start +@IDE :PyCharm +@Author :sandmswift +@Date :2022-12-14 18:20 +@Desc +==================================================''' +from wangModel.common_spiders.baiduacc import baiduacc +from wangModel.common_spiders.baidusearch import BaiduSpider +from wangModel.common_spiders.baiduwords import BaiDuWords + + +class BaiduCrawl: + def run(self): + # 爬取普通爬虫 + """ 1.百度指数""" + print("开始爬取百度指数") + object = baiduacc() + object.parse1() + print("百度指数爬取完毕") + + """ 2.百度搜索""" + print("开始爬取百度搜索") + run = BaiduSpider() + run.parse() + print("百度搜索爬取完毕") + + """3.百度词条""" + print("开始爬取百度词条") + baiduWord = BaiDuWords() + baiduWord.run() + print("百度词条爬取完毕") \ No newline at end of file diff --git "a/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py" "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py" index 39a67b6..897bf3b 100644 --- "a/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py" +++ "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py" @@ -7,6 +7,10 @@ from applications.common.tasks.酒店评论标题.qunaer_hotel_comment_title imp from applications.common.tasks.酒店评论标题.tongcheng_hotel_comment_title import Tongcheng_Hotel import asyncio import time +from scrapy.crawler import CrawlerProcess +from scrapy.utils.project import get_project_settings +settings = get_project_settings() +crawler = CrawlerProcess(settings) qunaer = Qunaer_Hotel() tongcheng = Tongcheng_Hotel() @@ -24,6 +28,10 @@ class Hotel: asyncio.run(qunaer.getHotel()) print("去哪儿爬取结束") + crawler.crawl('tuniu_hotel') + crawler.start() + print("途牛酒店爬取结束") + time_end=time.time() print(' time cost ',time_end-time_start,'s') diff --git a/applications/view/__init__.py b/applications/view/__init__.py index a168078..9cecc83 100644 --- a/applications/view/__init__.py +++ b/applications/view/__init__.py @@ -3,7 +3,7 @@ from applications.view.index import register_index_views from applications.view.passport import register_passport_views from applications.view.rights import register_rights_view from applications.view.department import register_dept_views -from applications.view.test import register_test_views +# from applications.view.test import register_test_views def init_view(app): @@ -11,5 +11,5 @@ def init_view(app): register_index_views(app) register_rights_view(app) register_passport_views(app) - register_test_views(app) + # register_test_views(app) register_dept_views(app) -- Gitee