From 309ad5845d9136b7f8b0e080172d8e85aa253226 Mon Sep 17 00:00:00 2001
From: Bright <540467981@qq.com>
Date: Wed, 14 Dec 2022 12:18:36 +0800
Subject: [PATCH 1/3] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BA=86=E4=BB=BB?=
=?UTF-8?q?=E5=8A=A1=E7=AE=A1=E7=90=86=E7=9A=84interval=E5=AE=9A=E6=97=B6?=
=?UTF-8?q?=E6=93=8D=E4=BD=9C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.flaskenv | 8 ++++----
applications/common/tasks/tasks.py | 16 +++++++---------
applications/view/__init__.py | 2 ++
applications/view/admin/task.py | 8 ++++++++
.../admin/component/pear/css/module/layout.css | 1 +
5 files changed, 22 insertions(+), 13 deletions(-)
diff --git a/.flaskenv b/.flaskenv
index 721d9f0..80038d5 100644
--- a/.flaskenv
+++ b/.flaskenv
@@ -9,12 +9,12 @@ FLASK_RUN_PORT = 5000
SYSTEM_NAME = Pear Admin
# MySql配置信息
-MYSQL_HOST=127.0.0.1
+MYSQL_HOST=202.193.53.151
# MYSQL_HOST=dbserver
MYSQL_PORT=3306
MYSQL_DATABASE=PearAdminFlask
MYSQL_USERNAME=root
-MYSQL_PASSWORD=123456
+MYSQL_PASSWORD=root
# Redis 配置
# REDIS_HOST=127.0.0.1
@@ -25,5 +25,5 @@ SECRET_KEY='pear-admin-flask'
# 邮箱配置
MAIL_SERVER='smtp.qq.com'
-MAIL_USERNAME='123@qq.com'
-MAIL_PASSWORD='XXXXX' # 生成的授权码
\ No newline at end of file
+MAIL_USERNAME='540467981@qq.com'
+MAIL_PASSWORD='Libingcai@123' # 生成的授权码
\ No newline at end of file
diff --git a/applications/common/tasks/tasks.py b/applications/common/tasks/tasks.py
index 85636e7..23c4e81 100644
--- a/applications/common/tasks/tasks.py
+++ b/applications/common/tasks/tasks.py
@@ -1,15 +1,13 @@
import datetime
-task_list = ['task2', 'task3', 'task4']
+task_list = ['景区评论标题', '线路评论标题', '景区攻略']
+def 景区评论标题(id, name):
+ print(id, name)
-def task2(a, b):
- print(f'定时任务_1_{a},{b},{datetime.datetime.now()}')
+def 线路评论标题(id, name):
+ print(id, name)
-def task3(a, b):
- print(f'定时任务_2_{a}{b}{datetime.datetime.now()}')
-
-
-def task4(a, b):
- print(f'定时任务_4_{a}{b}{datetime.datetime.now()}')
+def 景区攻略(id, name):
+ print(id, name)
diff --git a/applications/view/__init__.py b/applications/view/__init__.py
index bec7989..a168078 100644
--- a/applications/view/__init__.py
+++ b/applications/view/__init__.py
@@ -3,6 +3,7 @@ from applications.view.index import register_index_views
from applications.view.passport import register_passport_views
from applications.view.rights import register_rights_view
from applications.view.department import register_dept_views
+from applications.view.test import register_test_views
def init_view(app):
@@ -10,4 +11,5 @@ def init_view(app):
register_index_views(app)
register_rights_view(app)
register_passport_views(app)
+ register_test_views(app)
register_dept_views(app)
diff --git a/applications/view/admin/task.py b/applications/view/admin/task.py
index 5c825a6..92c180d 100644
--- a/applications/view/admin/task.py
+++ b/applications/view/admin/task.py
@@ -5,6 +5,7 @@ from applications.common.tasks import tasks
from applications.common.tasks.tasks import task_list
from applications.common.utils.http import table_api, fail_api, success_api
from applications.extensions.init_apscheduler import scheduler
+import time as t
admin_task = Blueprint('adminTask', __name__, url_prefix='/admin/task')
@@ -44,6 +45,10 @@ def save():
functions = request.json.get("functions")
datetime = request.json.get("datetime")
time = request.json.get("time")
+ list = str(time).split(':')
+ hour = int(list[0])
+ min = int(list[1])
+ sec = int(list[2])
if not hasattr(tasks, functions):
return fail_api()
if type == 'date':
@@ -62,6 +67,9 @@ def save():
name=name,
args=(1, 1),
trigger=type,
+ hours=hour,
+ minutes=min,
+ seconds=sec,
replace_existing=True)
elif type == 'cron':
scheduler.add_job(
diff --git a/static/admin/component/pear/css/module/layout.css b/static/admin/component/pear/css/module/layout.css
index beceeea..f4e1b91 100644
--- a/static/admin/component/pear/css/module/layout.css
+++ b/static/admin/component/pear/css/module/layout.css
@@ -61,6 +61,7 @@ body::-webkit-scrollbar-corner {
.mainBox {
width: 100%;
+ /*height: 500;*/
position: absolute;
top: 0px;
left: 0px;
--
Gitee
From 1ada28c4e159a67d95401f6a1c6c2812db0fa10b Mon Sep 17 00:00:00 2001
From: Bright <540467981@qq.com>
Date: Wed, 14 Dec 2022 16:35:43 +0800
Subject: [PATCH 2/3] =?UTF-8?q?=E8=87=AA=E5=B7=B1=E7=9A=84=E7=88=AC?=
=?UTF-8?q?=E8=99=AB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
HBaseConnect.py | 92 +++++++++++
MysqlConnect.py | 58 +++++++
applications/common/tasks/tasks.py | 22 ++-
.../search_360.py" | 100 ++++++++++++
.../sentiment_start.py" | 38 +++++
.../test.js" | 1 +
.../weibo_scenic_fans.py" | 90 +++++++++++
.../weibo_scenic_trend.py" | 76 +++++++++
.../weibo_scenic_wordbygeo.py" | 81 ++++++++++
.../guide_start.py" | 34 ++++
.../mafengwo_scenic.py" | 103 +++++++++++++
.../qunaer_scenic.py" | 89 +++++++++++
.../xiecheng_scenic.py" | 89 +++++++++++
.../mafengwo_scenic_comment_title.py" | 127 +++++++++++++++
.../qunaer_scenic_comment_title.py" | 87 +++++++++++
.../scenic_start.py" | 38 +++++
.../tongcheng_scenic_comment_title.py" | 80 ++++++++++
.../xiecheng_scenic_comment_title.py" | 145 ++++++++++++++++++
.../qunaer_route_comment_title.py" | 89 +++++++++++
.../route_start.py" | 30 ++++
.../xiecheng_route_comment_title.py" | 106 +++++++++++++
.../hotel_title_start.py" | 34 ++++
.../qunaer_hotel_comment_title.py" | 76 +++++++++
.../tongcheng_hotel_comment_title.py" | 102 ++++++++++++
.../xiecheng_hotel_comment_title.py" | 121 +++++++++++++++
25 files changed, 1902 insertions(+), 6 deletions(-)
create mode 100644 HBaseConnect.py
create mode 100644 MysqlConnect.py
create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/search_360.py"
create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/sentiment_start.py"
create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/test.js"
create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_fans.py"
create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_trend.py"
create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_wordbygeo.py"
create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/guide_start.py"
create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/mafengwo_scenic.py"
create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/qunaer_scenic.py"
create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/xiecheng_scenic.py"
create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/mafengwo_scenic_comment_title.py"
create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_scenic_comment_title.py"
create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py"
create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/tongcheng_scenic_comment_title.py"
create mode 100644 "applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_scenic_comment_title.py"
create mode 100644 "applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_route_comment_title.py"
create mode 100644 "applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/route_start.py"
create mode 100644 "applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_route_comment_title.py"
create mode 100644 "applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py"
create mode 100644 "applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_hotel_comment_title.py"
create mode 100644 "applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/tongcheng_hotel_comment_title.py"
create mode 100644 "applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_hotel_comment_title.py"
diff --git a/HBaseConnect.py b/HBaseConnect.py
new file mode 100644
index 0000000..3d4097d
--- /dev/null
+++ b/HBaseConnect.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+import happybase
+import json
+import re
+
+
+# 实现连接类方法和通用表操作方法
+class HBaseConnect:
+ def __init__(self):
+ """
+ 建立与thrift server端的连接
+ """
+ self.connection = happybase.Connection(host="202.193.53.106", port=9090, timeout=None, autoconnect=True,
+ table_prefix=None, table_prefix_separator=b'_', compat='0.98',
+ transport='buffered', protocol='binary')
+
+ def getTable(self, table_name: str): # Return Happybase Table
+ return self.connection.table(table_name)
+
+ def start(self): # Start To Connect
+ self.connection.open()
+
+ def stop(self): # Stop To Connect
+ self.connection.close()
+
+ # 删除表
+ def deleteTable(self, tableName):
+ self.connection.disable_table(tableName)
+ self.connection.delete_table(tableName)
+
+ def printTables(self):
+ return self.connection.tables()
+
+ def createTable(self, tableName, families):
+ self.connection.create_table(tableName, families)
+
+ def putTable(self, tableName, rowKey, data):
+ table = self.connection.table(tableName)
+ table.put(rowKey, data)
+
+
+if __name__ == '__main__':
+ hbase = HBaseConnect()
+ hbase.start()
+ # hbase.deleteTable("qunaerscenic")
+ # list = ["xiechenghotel","xiechengscenic","qunaerhotel","qunaerscenic","tongchenghotel","tongchengscenic",]
+ # for item in list:
+ # #
+ # # hbase.deleteTable(item)
+ # hbase.createTable(item, {"info": dict()})
+ #
+ # hbase.putTable("xiechenghotel","test",{"info:name":"23"})
+ # hbase.deleteTable("tongchengscenic")
+ # hbase.createTable('route_comment', {"info": dict()})
+ table = hbase.getTable("route_comment")
+ i=0
+ # id = 3909
+ for key, value in table.scan():
+ data = str(value).encode().decode('unicode-escape').encode('raw_unicode_escape').decode()
+ # print(len(data))
+ # i=i+1
+ # res = re.compile(rf"b'info:datafrom': b'去哪儿', b'info:hid': b'1853'").search(data)
+ # if res != None:
+ # i=i+1
+ # # table.delete(key)
+ # print(data)
+ res = re.compile(r"info:content': b'用户未及时评价, 该评价为系统默认好评!'").search(data)
+ if res != None:
+ i=i+1
+ print(data)
+ table.delete(key)
+ res = re.compile(r"info:content': b'用户未填写文字评价。").search(data)
+ if res != None:
+ i=i+1
+ print(data)
+ table.delete(key)
+ # print (str(data))
+ print(i)
+ # print(hbase.printTables())
+ hbase.stop()
+
+"""
+data={
+ 'info:place_from': item['place_from'],
+ 'info:place_to': item['place_to'],
+ 'info:Date': item['Date'],
+ 'info:totalCount': item['totalCount'],
+ 'info:type': item['type'],
+ 'info:ticketStatus': item['ticketStatus']
+ }
+ obj.putTable("表名","行键",data) #插入语句
+"""
diff --git a/MysqlConnect.py b/MysqlConnect.py
new file mode 100644
index 0000000..25dd989
--- /dev/null
+++ b/MysqlConnect.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> mysqlConn
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-11-21 10:54
+@Desc
+=================================================='''
+import pymysql
+
+
+# 实现连接类方法和通用表操作方法
+class MysqlConnect:
+ # 初始化数据库
+ def __init__(self):
+ print()
+ self.connection = pymysql.connect(host='202.193.53.151', port=3306, user='root', passwd='root', db='travel',charset='utf8mb4')
+ self.cur = self.connection.cursor()
+
+ def query(self,sql, args):
+ self.cur.execute(sql, args)
+ results = self.cur.fetchall()
+ # print(type(results)) # 返回 tuple元组类型
+ # self.connection.commit()
+ return results
+
+ def queryHotel(self,sql, args):
+ self.cur.execute(sql, args)
+ results = self.cur.fetchall()
+ # print(type(results)) # 返回 tuple元组类型
+ self.connection.commit()
+ return results
+
+ def update(self,sql,args):
+ # 携程修改的sql
+ self.cur.execute(sql,args)
+ self.connection.commit()
+
+ # 封装插入数据到数据库
+ def insert(self,sql, args):
+ # 携程和去哪儿景区
+ # sql = f'INSERT INTO {tableName}(scenicId,scenicName,score,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);'
+ # 同程景区
+ # sql = 'INSERT INTO scenic_comment(scenicId,scenicName,satisfy_present,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);'
+
+ # 同程酒店内容
+ # sql = f'INSERT INTO hotels(name,level,address,tc_url,tc_data,crawlTime) VALUES(%s,%s,%s,%s,%s,%s);'
+ # 同程和去哪儿酒店
+ # sql = 'INSERT INTO hotel_comment(hotelId,hotelName,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s);'
+
+ result = self.cur.execute(sql, args)
+ # print(result)
+ self.connection.commit()
+
+# if __name__ == '__main__':
+# mysql = MysqlConnect()
+# mysql.update((222,"诗与远方·漓江院子酒店(两江四湖东西巷店)"))
\ No newline at end of file
diff --git a/applications/common/tasks/tasks.py b/applications/common/tasks/tasks.py
index 23c4e81..fca30e2 100644
--- a/applications/common/tasks/tasks.py
+++ b/applications/common/tasks/tasks.py
@@ -1,13 +1,23 @@
-import datetime
-task_list = ['景区评论标题', '线路评论标题', '景区攻略']
+from applications.common.tasks.景区评论标题.scenic_start import Scenic
+from applications.common.tasks.线路评论标题.route_start import Route
+from applications.common.tasks.酒店评论标题.hotel_title_start import Hotel
+from applications.common.tasks.景区攻略.guide_start import Guide
-def 景区评论标题(id, name):
- print(id, name)
+task_list = ['景区评论标题', '线路评论标题', '景区攻略','酒店评论标题']
+def 景区评论标题(id, name):
+ scenic_start = Scenic()
+ scenic_start.run()
def 线路评论标题(id, name):
- print(id, name)
+ scenic_start = Route()
+ scenic_start.run()
def 景区攻略(id, name):
- print(id, name)
+ scenic_start = Guide()
+ scenic_start.run()
+
+def 酒店评论标题(id, name):
+ scenic_start = Hotel()
+ scenic_start.run()
diff --git "a/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/search_360.py" "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/search_360.py"
new file mode 100644
index 0000000..46ca727
--- /dev/null
+++ "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/search_360.py"
@@ -0,0 +1,100 @@
+import requests
+import re
+import aiohttp
+import asyncio
+import os
+import xlwt
+import xlrd
+import time
+import openpyxl
+import json
+from datetime import date, timedelta
+
+today = time.strftime("%Y-%m-%d",time.localtime())
+tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d")
+headers = {
+ 'Host': 'trends.so.com',
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0',
+ 'Accept': 'application/json, text/plain, */*',
+ 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
+ 'Accept-Encoding': 'gzip, deflate, br',
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Connection': 'keep-alive',
+ 'Referer': 'https://trends.so.com/result?query=%E6%BC%93%E6%B1%9F,%E8%B1%A1%E5%B1%B1&period=30',
+ 'Cookie': '__guid=239254294.3977692736380259300.1670144255177.8064; __gid=239254294.318323770.1670144255178.1670144261083.38; __bn=OBOS%7BOxOnSwO%24O%2FBVKQFw%3CS%3EStwRt3BqwL%2C%2B%2FUpx.pdL%28UoxX%294STo1Dzg%2F%7DVYG%40dJp%3F1M%40f%5EJ0%7Cs%3ClLe%5E%23OAT8gZKW%232LE%7C9ue%25YHrkL_c8y%2AnNf5v%26LmJ7%5Eh%21_6; QiHooGUID=4B479C909060D45303A26176A83571EE.1670921647212; count=2; test_cookie_enable=null; Q=u%3D360H3408265314%26n%3D%25Q3%25Q0%25Q0%25P4%25P8%25PO_308%26le%3D%26m%3DZGH1WGWOWGWOWGWOWGWOWGWOBQt0%26qid%3D3408265314%26im%3D1_t015d6b97def2a4a918%26src%3Dpcw_360index%26t%3D1; T=s%3D6b400eed78c7fa7b27a496343be40f86%26t%3D1670921768%26lm%3D0-1%26lf%3D2%26sk%3Dc5c99806e7fcea7539ea20e35943912a%26mt%3D1670921768%26rc%3D%26v%3D2.0%26a%3D1; so_huid=11k1XvfU5zrG3%2BT9U%2FzH9XHKHrh6tHE07PUntGARvW36A%3D; __huid=11k1XvfU5zrG3%2BT9U%2FzH9XHKHrh6tHE07PUntGARvW36A%3D; _S=fd41ea22d44de0791f9996eaa25d084e',
+ 'Sec-Fetch-Dest': 'empty',
+ 'Sec-Fetch-Mode': 'cors',
+ 'Sec-Fetch-Site': 'same-origin',
+ 'TE': 'trailers',
+}
+
+from MysqlConnect import *
+mysql = MysqlConnect()
+
+class Search_360:
+ async def getFans(self,item, session):
+ try:
+ maxPage = int(item['maxPage'])
+ wordList = []
+ for index in range(1,maxPage+1):
+ url = f"https://s.weibo.com/weibo?q={item['short_name']}&page={index}"
+ async with session.get(url) as res:
+ res = await res.text()
+ tempList = re.compile(r'>#(.*?)#<').findall(res)
+ wordList.extend(tempList)
+ if index % 8 == 0:
+ print(f"<----------------{item['short_name']}爬到{index}页------------------->")
+ time.sleep(3)
+ # 微博fans的sql
+ args = (item["id"], item["short_name"], str(wordList), today)
+ print(args)
+ sql = 'INSERT INTO weibo_word(scenicId,name,wordList,crawlTime) VALUES(%s,%s,%s,%s);'
+ mysql.insert(sql, args)
+ except Exception as e:
+ print("fans报错",e)
+
+ # 从数据库获取景区信息
+ async def getScenic(self):
+ async with aiohttp.ClientSession(headers=headers) as session:
+ # 从数据库拿url
+ results = mysql.queryHotel("select id,name ,short_name from scenics where id > 0", None)
+ tasks = []
+ url_list = []
+ for row in results:
+ id = row[0]
+ name = row[1]
+ short_name = row[2]
+ url_list.append({
+ "id": id,
+ "name": name,
+ "short_name": short_name,
+ })
+ print("微博所有景区长度", len(url_list))
+ for item in url_list:
+ url = f"https://s.weibo.com/weibo?q={item['short_name']}&page=1"
+ res = requests.get(url, headers=headers)
+ maxPages = re.compile(r'第(.*)页').findall(res.text)
+ if maxPages == []:
+ item['maxPage'] = 1
+ else:
+ item['maxPage'] = maxPages[len(maxPages) - 1]
+ print(item['short_name'] + f'长度为:{item["maxPage"]}')
+ task = asyncio.create_task(self.getFans(item.copy(), session))
+ tasks.append(task)
+ await asyncio.wait(tasks)
+ print(f"{item['short_name']}爬完了")
+ # 关闭mysql
+ mysql.cur.close()
+ mysql.connection.close()
+ def test():
+ url = "https://trends.so.com/index/soMediaJson?q=漓江,象山&from=20130111&to=20221212&s=0"
+ res = requests.post(url,headers=headers)
+ # print(res.json())
+ resp = res.json()
+ data = resp['data']['media']['漓江']
+ # resp = res.content.decode('unicode-escape', 'ignore').encode('utf-8', 'ignore').decode('utf-8') # 爬取页面并且解码
+ print(data)
+
+if __name__ == '__main__':
+ test()
+ # asyncio.run(getScenic())
diff --git "a/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/sentiment_start.py" "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/sentiment_start.py"
new file mode 100644
index 0000000..6c58652
--- /dev/null
+++ "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/sentiment_start.py"
@@ -0,0 +1,38 @@
+# coding:utf-8
+# version:python3.7
+# author:Ivy
+
+from applications.common.tasks.微博景区.weibo_scenic_fans import Weibo_Fans
+from applications.common.tasks.微博景区.weibo_scenic_trend import Weibo_Trend
+from applications.common.tasks.微博景区.weibo_scenic_wordbygeo import Weibo_Wordbygeo
+from applications.common.tasks.微博景区.search_360 import Search_360
+import asyncio
+import time
+
+mafengwo = Weibo_Fans()
+qunaer = Weibo_Trend()
+tongcheng = Weibo_Wordbygeo()
+xiecheng = Search_360()
+
+class Scenic:
+ def run(self):
+ print("开始爬取各个网站的评论标题!")
+ time_start=time.time()
+
+ asyncio.run(xiecheng.getScenic())
+ print("携程爬取结束")
+ asyncio.run(tongcheng.getScenic())
+ print("同程爬取结束")
+ asyncio.run(qunaer.getScenic())
+ print("去哪儿爬取结束")
+ asyncio.run(mafengwo.getScenic())
+ print("马蜂窝爬取结束")
+
+ time_end=time.time()
+ print(' time cost ',time_end-time_start,'s')
+
+
+
+
+
+
diff --git "a/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/test.js" "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/test.js"
new file mode 100644
index 0000000..e98763e
--- /dev/null
+++ "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/test.js"
@@ -0,0 +1 @@
+console.log(123)
\ No newline at end of file
diff --git "a/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_fans.py" "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_fans.py"
new file mode 100644
index 0000000..911672f
--- /dev/null
+++ "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_fans.py"
@@ -0,0 +1,90 @@
+import requests
+import re
+import aiohttp
+import asyncio
+import os
+import xlwt
+import xlrd
+import time
+import openpyxl
+import json
+from datetime import date, timedelta
+
+today = time.strftime("%Y-%m-%d",time.localtime())
+tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d")
+headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0',
+ 'Cookie':"SINAGLOBAL=6487030870592.412.1670217755062; ULV=1670913619949:2:2:1:448084127526.90094.1670913619909:1670217755065; SUB=_2A25OnG8QDeRhGeFG61oQ9CfOyzWIHXVt6MfYrDV8PUNbmtANLXbRkW9NfnN7XS0bIXvPWvBx4AplvHeMTR0yYZWh; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhuyxuyZV28mO7UcSyqZia-5JpX5KzhUgL.FoMRehnpSh.Eeh.2dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM71K201h27eKBc; XSRF-TOKEN=Nb98b84zpKrLkQFK2G4QVm_B; WBPSESS=GXgdHCdzDjVRXBTFCQtCjwvN0D3EIjJu6yKjC9Ly2vpYlmMNPvd-am2fVfhb0LZzGlpu1z5hvjfehJnVFqrpOT7jc98bCwb2dNLjM6gCoYpJDGf8FiZZUWuoMIVf8Swi9hEuLXICEsBqKZoSkXjGwg==; _s_tentry=-; Apache=448084127526.90094.1670913619909; PC_TOKEN=45843b523d; crossidccode=CODE-yf-1P4YZf-3iYEFJ-6bhPhbY6POtswkxda2c7a; appkey=; geetest_token=667c6133aa018c4142666d597550c90c; ALF=1702449855; SSOLoginState=1670913856",
+ # 'referer': 'https://s.weibo.com/'
+}
+
+from MysqlConnect import *
+mysql = MysqlConnect()
+
+class Weibo_Fans:
+ async def getFans(self,item, session):
+ try:
+ maxPage = int(item['maxPage'])
+ sum = 0
+ for index in range(1,maxPage+1):
+ url = f"https://s.weibo.com/user?q={item['short_name']}&Refer=weibo_user&page={index}"
+ async with session.get(url) as res:
+ res = await res.text()
+ resp = re.compile(r'粉丝:(.*)').findall(res)
+ for i in range(0,len(resp)):
+ obj = re.compile(r'万').search(resp[i])
+ fans = resp[i]
+ if obj != None:
+ fans = resp[i].replace('万','')
+ fans = float(fans) * 10000
+ else:
+ fans = fans
+ fans = int(fans)
+ sum = sum + fans
+ print(f"<-----------------{item['short_name']}爬到{index}页,目前fans为{sum}---------------->")
+ # 微博fans的sql
+ if index % 5 == 0:
+ time.sleep(5)
+ args = (item["id"], item["short_name"], sum, today)
+ print(args)
+ sql = 'INSERT INTO weibo_fans(scenicId,name,wb_fans,crawlTime) VALUES(%s,%s,%s,%s);'
+ mysql.insert(sql, args)
+ except Exception as e:
+ print("fans报错",e)
+
+ # 从数据库获取景区信息
+ async def getScenic(self):
+ async with aiohttp.ClientSession(headers=headers) as session:
+ # 从数据库拿url
+ results = mysql.queryHotel("select id,name ,short_name from scenics where id > 0", None)
+ tasks = []
+ url_list = []
+ for row in results:
+ id = row[0]
+ name = row[1]
+ short_name = row[2]
+ url_list.append({
+ "id": id,
+ "name": name,
+ "short_name": short_name,
+ })
+ print("微博所有景区长度", len(url_list))
+ for item in url_list:
+ url = f"https://s.weibo.com/user?q={item['short_name']}&Refer=weibo_user&page=1"
+ res = requests.get(url, headers=headers)
+ maxPages = re.compile(r'第(.*)页').findall(res.text)
+ if maxPages == []:
+ item['maxPage'] = 1
+ else:
+ item['maxPage'] = maxPages[len(maxPages) - 1]
+ print(item['short_name'] + f'长度为:{item["maxPage"]}')
+ task = asyncio.create_task(self.getFans(item.copy(), session))
+ tasks.append(task)
+ await asyncio.wait(tasks)
+ print(f"{item['short_name']}爬完了")
+ # 关闭mysql
+ mysql.cur.close()
+ mysql.connection.close()
+
+if __name__ == '__main__':
+ asyncio.run(getScenic())
diff --git "a/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_trend.py" "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_trend.py"
new file mode 100644
index 0000000..df82661
--- /dev/null
+++ "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_trend.py"
@@ -0,0 +1,76 @@
+import requests
+import re
+import aiohttp
+import asyncio
+import os
+import xlwt
+import xlrd
+import time
+import openpyxl
+import json
+from datetime import date, timedelta
+
+# year = time.strftime("%Y-", time.localtime())
+today = time.strftime("%Y-%m-%d",time.localtime())
+tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d")
+headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0',
+ # 'Cookie':"SINAGLOBAL=3380569287618.6953.1670308366047; _s_tentry=s.weibo.com; Apache=5236619054300.727.1670324406000; ULV=1670324406003:2:2:2:5236619054300.727.1670324406000:1670308366049; XSRF-TOKEN=13toiK7TaB8Axa4Vx7DncNNO; login_sid_t=ec7d5e0d423e9ec19b8acb14ea31e88f; cross_origin_proto=SSL; wb_view_log=2560*14401.5; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WFCcqTynPQA63zJA5y17Xs15JpX5o275NHD95QcSoe0eo5XSo-fWs4Dqc_xi--fiK.0i-8Wi--ciKLhiKn4i--4iKnEi-20i--Xi-z4iKnRi--fi-2XiKLWi--ci-zpiKnEi--RiKn7iKyhi--Xi-zRiKy2i--fi-i8iK.N; SSOLoginState=1670812852; SUB=_2A25OkuTlDeRhGeBI6FET8CrKzjmIHXVt5lEtrDV8PUNbmtANLWiskW9NRptelGTTlu3DzUc2k9u71P0K706eTg71; ALF=1702348852; WBPSESS=QRnN_8uUPIRKDidMZ7ysnFsKmswTd-coyxvC3kx2wmVsnZYfCgM3CVbyYUESYHrYB0_OPXwWvhlacPaYtSVNXY0EckXCBF-9xxe7fsm2CcjhwFzQ2yBIcsDsTtMkf5Epp7PzpdyQGn9mf7C9CvIb3w==",
+ # 'referer': 'https://s.weibo.com/'
+}
+
+from MysqlConnect import *
+mysql = MysqlConnect()
+
+class Weibo_Trend:
+ async def getTalk(self,item, session):
+ try:
+ url = f"https://m.s.weibo.com/ajax_topic/detail?q={item['short_name']}"
+ async with session.get(url) as res:
+ resp = await res.json()
+ read = 0
+ ori_uv = 0
+ mention = 0
+ star = 0
+ if 'count' in resp['data']['baseInfo']:
+ read = resp['data']['baseInfo']['count']['read']
+ ori_uv = resp['data']['baseInfo']['count']['ori_uv']
+ mention = resp['data']['baseInfo']['count']['mention']
+ star = resp['data']['baseInfo']['count']['star']
+ args = (item["id"], item["short_name"], read,ori_uv,mention,star, today)
+ print(args)
+ sql = 'INSERT INTO weibo_trend(scenicId,`name`,`read`,ori_uv,mention,star,crawlTime) VALUES(%s,%s,%s,%s,%s,%s,%s);'
+ mysql.insert(sql, args)
+ except Exception as e:
+ print("fans报错",e)
+
+ # 从数据库获取景区信息
+ async def getScenic(self):
+ async with aiohttp.ClientSession(headers=headers) as session:
+ # 从数据库拿url
+ results = mysql.queryHotel("select id,name ,short_name from scenics where id > 0 ", None)
+ tasks = []
+ url_list = []
+ for row in results:
+ id = row[0]
+ name = row[1]
+ short_name = row[2]
+ url_list.append({
+ "id": id,
+ "name": name,
+ "short_name": short_name,
+ })
+ print("微博所有景区长度", len(url_list))
+ i = 0
+ for item in url_list:
+ task = asyncio.create_task(self.getTalk(item.copy(), session))
+ tasks.append(task)
+ i = i + 1
+ if i % 8 == 0:
+ time.sleep(3)
+ await asyncio.wait(tasks)
+ # 关闭mysql
+ mysql.cur.close()
+ mysql.connection.close()
+if __name__ == '__main__':
+ asyncio.run(getScenic())
\ No newline at end of file
diff --git "a/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_wordbygeo.py" "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_wordbygeo.py"
new file mode 100644
index 0000000..a155499
--- /dev/null
+++ "b/applications/common/tasks/\345\276\256\345\215\232\346\231\257\345\214\272/weibo_scenic_wordbygeo.py"
@@ -0,0 +1,81 @@
+import requests
+import re
+import aiohttp
+import asyncio
+import os
+import xlwt
+import xlrd
+import time
+import openpyxl
+import json
+from datetime import date, timedelta
+
+today = time.strftime("%Y-%m-%d",time.localtime())
+tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d")
+headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0',
+ 'Cookie':"SINAGLOBAL=6487030870592.412.1670217755062; ULV=1670913619949:2:2:1:448084127526.90094.1670913619909:1670217755065; SUB=_2A25OnG8QDeRhGeFG61oQ9CfOyzWIHXVt6MfYrDV8PUNbmtANLXbRkW9NfnN7XS0bIXvPWvBx4AplvHeMTR0yYZWh; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9WhuyxuyZV28mO7UcSyqZia-5JpX5KzhUgL.FoMRehnpSh.Eeh.2dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM71K201h27eKBc; XSRF-TOKEN=Nb98b84zpKrLkQFK2G4QVm_B; WBPSESS=GXgdHCdzDjVRXBTFCQtCjwvN0D3EIjJu6yKjC9Ly2vpYlmMNPvd-am2fVfhb0LZzGlpu1z5hvjfehJnVFqrpORnpezcjqLRXjHTwRYkqud2f-lo5ogx3FJhjiiEoA2AHWwC4_I4Ebc8XETWMRRXqRQ==; _s_tentry=-; Apache=448084127526.90094.1670913619909; appkey=; ALF=1702449855; SSOLoginState=1670913856",
+ # 'referer': 'https://s.weibo.com/'
+}
+
+from MysqlConnect import *
+mysql = MysqlConnect()
+
+class Weibo_Wordbygeo:
+ async def getWord(self,item, session):
+ try:
+ maxPage = int(item['maxPage'])
+ wordList = []
+ for index in range(1,maxPage+1):
+ url = f"https://s.weibo.com/weibo?q={item['short_name']}&page={index}"
+ async with session.get(url) as res:
+ res = await res.text()
+ tempList = re.compile(r'>#(.*?)#<').findall(res)
+ wordList.extend(tempList)
+ if index % 8 == 0:
+ print(f"<----------------{item['short_name']}爬到{index}页------------------->")
+ time.sleep(3)
+ # 微博fans的sql
+ args = (item["id"], item["short_name"], str(wordList), today)
+ print(args)
+ sql = 'INSERT INTO weibo_word(scenicId,name,wordList,crawlTime) VALUES(%s,%s,%s,%s);'
+ mysql.insert(sql, args)
+ except Exception as e:
+ print("fans报错",e)
+
+ # 从数据库获取景区信息
+ async def getScenic(self):
+ async with aiohttp.ClientSession(headers=headers) as session:
+ # 从数据库拿url
+ results = mysql.queryHotel("select id,name ,short_name from scenics where id > 0", None)
+ tasks = []
+ url_list = []
+ for row in results:
+ id = row[0]
+ name = row[1]
+ short_name = row[2]
+ url_list.append({
+ "id": id,
+ "name": name,
+ "short_name": short_name,
+ })
+ print("微博所有景区长度", len(url_list))
+ for item in url_list:
+ url = f"https://s.weibo.com/weibo?q={item['short_name']}&page=1"
+ res = requests.get(url, headers=headers)
+ maxPages = re.compile(r'第(.*)页').findall(res.text)
+ if maxPages == []:
+ item['maxPage'] = 1
+ else:
+ item['maxPage'] = maxPages[len(maxPages) - 1]
+ print(item['short_name'] + f'长度为:{item["maxPage"]}')
+ task = asyncio.create_task(self.getWord(item.copy(), session))
+ tasks.append(task)
+ await asyncio.wait(tasks)
+ print(f"{item['short_name']}爬完了")
+ # 关闭mysql
+ mysql.cur.close()
+ mysql.connection.close()
+
+if __name__ == '__main__':
+ asyncio.run(getScenic())
diff --git "a/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/guide_start.py" "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/guide_start.py"
new file mode 100644
index 0000000..8fbaabd
--- /dev/null
+++ "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/guide_start.py"
@@ -0,0 +1,34 @@
+# coding:utf-8
+# version:python3.7
+# author:Ivy
+
+from applications.common.tasks.景区攻略.mafengwo_scenic import Mafengwo_Scenic
+from applications.common.tasks.景区攻略.qunaer_scenic import Qunaer_Scenic
+from applications.common.tasks.景区攻略.xiecheng_scenic import Xiecheng_Scenic
+import asyncio
+import time
+
+mafengwo = Mafengwo_Scenic()
+qunaer = Qunaer_Scenic()
+xiecheng = Xiecheng_Scenic()
+
+class Guide:
+ def run(self):
+ print("开始爬取各个网站的评论标题!")
+ time_start=time.time()
+
+ asyncio.run(xiecheng.getScenic())
+ print("携程爬取结束")
+ # asyncio.run(qunaer.getScenic())
+ # print("去哪儿爬取结束")
+ asyncio.run(mafengwo.getScenic())
+ print("马蜂窝爬取结束")
+
+ time_end=time.time()
+ print(' time cost ',time_end-time_start,'s')
+
+
+
+
+
+
diff --git "a/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/mafengwo_scenic.py" "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/mafengwo_scenic.py"
new file mode 100644
index 0000000..5f0acc0
--- /dev/null
+++ "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/mafengwo_scenic.py"
@@ -0,0 +1,103 @@
+import time
+
+import pymysql
+import requests
+from lxml import etree
+import datetime
+xiechengUrl = 'https://www.mafengwo.cn/search/q.php?q={}&t=notes&seid=8ADBD862-D2E8-4B0D-ADE1-0C98ED641130&mxid=&mid=&mname=&kt=1'
+
+
+class Mafengwo_Scenic:
+
+ def getSource(self,url):
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'
+ }
+
+ response = requests.get(url,headers=headers)
+ response.encoding = 'utf-8'
+ return response.text
+
+ def getEveryItem(self,id,name,source):
+ try:
+ html_element = etree.HTML(source)
+ href_list = html_element.xpath('//*[@id="_j_mfw_search_main"]/div[1]/div/div/a/@href')
+
+ guilde_url = href_list[2]
+ note_url = href_list[3]
+ answer_url = href_list[4]
+
+ guilde_html = self.getSource(guilde_url)
+ guilde_html = etree.HTML(guilde_html)
+ note_html = self.getSource(note_url)
+ note_html = etree.HTML(note_html)
+ answer_html = self.getSource(answer_url)
+ answer_html = etree.HTML(answer_html)
+
+ guide_list = guilde_html.xpath('//*[@id="_j_search_result_left"]/div/div/ul/li')
+ note_list = note_html.xpath('//*[@id="_j_search_result_left"]/div/div/ul/li')
+ answer_list = answer_html.xpath('//*[@id="_j_search_result_left"]/div/div/div[@class="ct-text closeto"]')
+
+ len_guide_list = len(guide_list)
+ len_note_list = len(note_list)
+ len_answer_list = len(answer_list)
+
+ now_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+
+ infoDict = {}
+ infoDict['scenicId'] = id
+ infoDict['guide_num'] = len_note_list
+ infoDict['note_num'] = len_guide_list
+ infoDict['answer_num'] = len_answer_list
+ infoDict['crawlTime'] = now_time
+ infoDict['scenic_name'] = name
+ infoDict['sitefrom'] = "马蜂窝"
+ return infoDict
+ except Exception as e:
+ print(e)
+
+ # def writeData(traininfoList):
+ #
+ # with open('xiecheng.csv','w',newline='') as fs:
+ #
+ # writer = csv.DictWriter(fs,fieldnames=['出发时间','出发车站','需要耗时','车次信息','到达时间','到达车站','车票价格','剩余车票'])
+ #
+ # writer.writeheader()
+ # writer.writerows(traininfoList)
+
+ def getScenic(self):
+ sql1 = "select id,name from scenics where id > 53"
+ sql2 = 'INSERT INTO scenic_index(scenicId,hot_guide_num,elite_guide_num,guide_num,note_num,answer_num,crawlTime,scenic_name,sitefrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s);'
+ # insert(sql, (2, 'wang', 13))
+ conn = pymysql.connect(host='202.193.53.151', port=3306, user='root', passwd='root', db='travel')
+ cur = conn.cursor()
+ cur.execute(sql1, None)
+ results = cur.fetchall()
+ i=0
+ for row in results:
+ i=i+1
+ pageLink = xiechengUrl.format(str(row[1]).replace(" ", ""))
+ source = self.getSource(pageLink)
+ dict = self.getEveryItem(row[0], row[1], source)
+ # print(dict['scenicId'])
+ # sql2.format(dict['scenicId'],dict['guide_num'],dict['note_num'],dict['answer_num'],dict['crawlTime'],dict['sitefrom'])
+ args = [dict['scenicId'], 0, 0, dict['guide_num'], dict['note_num'], dict['answer_num'], dict['crawlTime'],
+ dict['scenic_name'], dict['sitefrom']]
+ print(args)
+ if i % 5 == 0 :
+ time.sleep(5)
+ cur.execute(sql2,
+ args)
+ conn.commit()
+ cur.close()
+ conn.close()
+
+if __name__ == '__main__':
+ getScenic()
+
+
+
+
+
+
+
diff --git "a/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/qunaer_scenic.py" "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/qunaer_scenic.py"
new file mode 100644
index 0000000..bf54583
--- /dev/null
+++ "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/qunaer_scenic.py"
@@ -0,0 +1,89 @@
+import requests
+import re
+import aiohttp
+import asyncio
+import os
+import xlwt
+import xlrd
+import time
+import random
+import json
+import openpyxl
+
+headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0",
+ # "Cookie": 'QN1=00009180306c48f75230434f; QN300=s%3Dbaidu; QN99=7930; QunarGlobal=10.67.197.57_-315863c_1844fb4402c_-4886|1667785799221; QN205=s%3Dbaidu; QN277=s%3Dbaidu; _i=ueHd8Zy9S8X7Cs5y-nPVKDLNsGkX; QN601=fc3340e635beebd8fed01d244dfa103f; QN269=7D87F9C05E3E11ED8278FA163EAD537B; QN48=tc_b56d9b243d79884d_1844fc7bd55_d0d9; fid=ab2cfc1a-4442-4996-8058-95e369865862; csrfToken=vwiqxqLdRWrdSUvmIn84yliNXbhGLphE; QN58=1669821195055%7C1669822256665%7C5; QN57=16678736048620.2656904960064569; ctt_june=1654604625968##iK3wVRvNVhPwawPwa%3DjnWRa%2BES2Aa2PwW2aOaS0RE2DsEDGDE2DsERfIXSX8iK3siK3saKjOWst%2BWR3sWRX8VuPwaUvt; ctf_june=1654604625968##iK3wWsaOWwPwawPwa%3DkhWDfTaD3NXsERXKj%3DX2EGEKaAWs28aSERW2a%3DWsX%2BiK3siK3saKjOVK2%2BWK2AWRamVhPwaUvt; cs_june=1e980219e0683d534a30d19cbf460690504831204710eca7ff8957e47452ae78150e2f38a8a12ca96514b111ebdac1878f7fa30cb8f280132faaa5b783ecd9d7b17c80df7eee7c02a9c1a6a5b97c1179774d0c3f26f472d208f55073055c8e3b5a737ae180251ef5be23400b098dd8ca; QN271AC=register_pc; QN271SL=791c41e753d68b5ac9365b726bb2960d; QN271RC=791c41e753d68b5ac9365b726bb2960d; Hm_lvt_c56a2b5278263aa647778d304009eafc=1667874629,1668075379,1669972940; viewpoi=5942247|716544|706160|722948; uld=1-300113-1-1669974575|1-299801-3-1669974326|2-5942247-1-1667874649; SECKEY_ABVK=oBn0fel6+CD+aAN/hYsF0tz2y0FKgx63zX5Zn2S9lEM%3D; BMAP_SECKEY=6322QfSPZ1N2m2UuiZlS0H6FoMDxhQ-GnPPIgN-EndoROx7_vGs84WwiwKWL44NBDiCLOGD2d-Y7KyqD2s8PM2ytpXq2q1eZ0TzXIPrmUoDe2ij4Z5mR9gOY1KAWi2msFlzCCbX6sugCEQBjlDn83Ly8gGRLDqMpqMWaTSICD2NztE1Tawzv3BAgu-x7EUlO; QN233=FreetripTouchin; HN1=v1ecbd83e6109eb406ad7ee9754047124a; HN2=qunuqnuggzkcg; quinn=e5ba94e400db7ae611b28097b8ad7ddc9fea18aa074280921e89258cf82e7cb417cc1fc89ba3f04bfda0535faf80ae42; QN621=1490067914133%2Ctestssong%3DDEFAULT%26fr%3Dtejia_inton_search%261490067914133%252Ctestssong%3DDEFAULT; QN668=51%2C56%2C56%2C58%2C56%2C55%2C54%2C56%2C58%2C57%2C57%2C51%2C56; QN243=679; ariaDefaultTheme=null; QN100=WyLotaDnq7nnrZLppa3ppa7nlKjmsLTmoYLmnpfpvpnohIrmoq%2FnlLDph5HlnZHlpKflr6jpu4TmtJvnkbblr6jlpKflt7Tovabnuq%2Fnjqnlj6%2FliqDnvIbovabkuIDml6XmuLh85qGC5p6XIiwi5Yid6YGH5ryT5rGf55WF5ri457K%2B576O5ryT5rGf57K%2B5Y2O5ri46Ii56KeC6LWP5LqM5Y2B5YWD5Lq65rCR5biB6IOM5pmv5Lmd6ams55S75bGx5b6S5q2l5YW05Z2q5Y%2Bk6ZWH6Ziz5pyU5LiW55WM5rq25rSe5aWH6KeC6ZO25a2Q5bKp57qv546p5LiA5pel5ri4fOahguaelyIsIuahguael%2BmYs%2BaclOe6r%2BeOqeS4gOaXpea4uOmTtuWtkOWyqeaXoOi0reeJqXzmoYLmnpciLCLng63ojZAxMuS6uueyvuWTgeWwj%2BWboiDmvJPmsZ%2FmuLjoiLkyMOWFg%2BiDjOaZr%2BmBh%2Bm%2Bmeays%2BmTtuWtkOWyqeWNgXzmoYLmnpciLCLmoYLmnpd85qGC5p6XIl0%3D; qunar-assist={%22version%22:%2220211215173359.925%22%2C%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22readscreen%22:false%2C%22theme%22:%22default%22}; QN267=0531040385eb6753d; QN271=aac3c78a-6161-4135-8024-4d417d4798fd; JSESSIONID=540F1DB1B565507C76E711DE50DEEE27; Hm_lpvt_c56a2b5278263aa647778d304009eafc=1669976262; viewdist=299801-1; viewbook=7673685|5804838|7405861; _vi=oVDC9e1VW3oiCf8HuMZBgBCq212ulsphL4ZvksnfyM24u9ptCRpd6nwZ_dl356Rh70BPTkTu65nuFpEFZTuI0pekzVy6x6EWIVwDrft6xlPPMZ0c2DO6nWnwUxB0zc_J36j7pNWamepyavD-W6SanJmZzLr59gUrgIrbH3thSQUe'
+}
+# 获取当前时间
+from datetime import date, timedelta
+
+today = time.strftime("%Y-%m-%d", time.localtime())
+tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
+
+from MysqlConnect import *
+
+mysql = MysqlConnect()
+
+class Qunaer_Scenic:
+ async def getGuild(self,item, session):
+ try:
+ async with session.get(item["start_heat"]) as res:
+ resp = await res.text()
+ start_heat = 0
+ result = re.compile(r'data-beacon="tab_gonglue">攻略 \((.*?)\)').findall(resp)
+ if result != []:
+ start_heat = result[0]
+ async with session.get(item["elite_heat"]) as res:
+ resp = await res.text()
+ elite_heat = 0
+ result = re.compile(r'data-beacon="tab_gonglue">攻略 \((.*?)\)').findall(resp)
+ if result != []:
+ elite_heat = result[0]
+ async with session.get(item["hot_heat"]) as res:
+ resp = await res.text()
+ hot_heat = 0
+ result = re.compile(r'data-beacon="tab_gonglue">攻略 \((.*?)\)').findall(resp)
+ if result != []:
+ hot_heat = result[0]
+ args = (item["id"], item["name"], start_heat, elite_heat, hot_heat, today, "去哪儿")
+ print(args)
+ # sql = f'INSERT INTO scenic_index(scenicId,scenic_name,guide_num,elite_guide_num_num,hot_guide_num,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s);'
+ # mysql.insert(sql, args)
+
+ except Exception as e:
+ print("comment报错", e)
+ # print(item)
+ # print("报错页数",index,sightId)
+
+
+ async def getScenic(self):
+ async with aiohttp.ClientSession(headers=headers) as session:
+ results = mysql.query("select id,name,gw_url from scenics where gw_url !='' ", None)
+ tasks = []
+ url_list = []
+ for row in results:
+ id = row[0]
+ name = row[1]
+ url = row[2]
+ url_list.append({
+ "id": id,
+ "name": name,
+ "url": url,
+ })
+ print("去哪儿网站的所有景区长度", len(url_list))
+ for item in url_list:
+ item['start_heat'] = f"https://travel.qunar.com/search/gonglue/{item['name']}/start_heat/1.htm"
+ item['elite_heat'] = f"https://travel.qunar.com/search/gonglue/{item['name']}/elite_heat/1.htm"
+ item['hot_heat'] = f"https://travel.qunar.com/search/gonglue/{item['name']}/hot_heat/1.htm"
+ task = asyncio.create_task(self.getGuild(item, session))
+ tasks.append(task)
+ await asyncio.wait(tasks)
+ # time.sleep(5)
+ # 关闭mysql
+ mysql.cur.close()
+ mysql.connection.close()
+
+
+
+if __name__ == "__main__":
+ asyncio.run(getScenic())
diff --git "a/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/xiecheng_scenic.py" "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/xiecheng_scenic.py"
new file mode 100644
index 0000000..d67f8dc
--- /dev/null
+++ "b/applications/common/tasks/\346\231\257\345\214\272\346\224\273\347\225\245/xiecheng_scenic.py"
@@ -0,0 +1,89 @@
+import requests
+import re
+import aiohttp
+import asyncio
+import time
+import random
+import json
+
+headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0",
+ # "Cookie": 'QN1=00009180306c48f75230434f; QN300=s%3Dbaidu; QN99=7930; QunarGlobal=10.67.197.57_-315863c_1844fb4402c_-4886|1667785799221; QN205=s%3Dbaidu; QN277=s%3Dbaidu; _i=ueHd8Zy9S8X7Cs5y-nPVKDLNsGkX; QN601=fc3340e635beebd8fed01d244dfa103f; QN269=7D87F9C05E3E11ED8278FA163EAD537B; QN48=tc_b56d9b243d79884d_1844fc7bd55_d0d9; fid=ab2cfc1a-4442-4996-8058-95e369865862; csrfToken=vwiqxqLdRWrdSUvmIn84yliNXbhGLphE; QN58=1669821195055%7C1669822256665%7C5; QN57=16678736048620.2656904960064569; ctt_june=1654604625968##iK3wVRvNVhPwawPwa%3DjnWRa%2BES2Aa2PwW2aOaS0RE2DsEDGDE2DsERfIXSX8iK3siK3saKjOWst%2BWR3sWRX8VuPwaUvt; ctf_june=1654604625968##iK3wWsaOWwPwawPwa%3DkhWDfTaD3NXsERXKj%3DX2EGEKaAWs28aSERW2a%3DWsX%2BiK3siK3saKjOVK2%2BWK2AWRamVhPwaUvt; cs_june=1e980219e0683d534a30d19cbf460690504831204710eca7ff8957e47452ae78150e2f38a8a12ca96514b111ebdac1878f7fa30cb8f280132faaa5b783ecd9d7b17c80df7eee7c02a9c1a6a5b97c1179774d0c3f26f472d208f55073055c8e3b5a737ae180251ef5be23400b098dd8ca; QN271AC=register_pc; QN271SL=791c41e753d68b5ac9365b726bb2960d; QN271RC=791c41e753d68b5ac9365b726bb2960d; Hm_lvt_c56a2b5278263aa647778d304009eafc=1667874629,1668075379,1669972940; viewpoi=5942247|716544|706160|722948; uld=1-300113-1-1669974575|1-299801-3-1669974326|2-5942247-1-1667874649; SECKEY_ABVK=oBn0fel6+CD+aAN/hYsF0tz2y0FKgx63zX5Zn2S9lEM%3D; BMAP_SECKEY=6322QfSPZ1N2m2UuiZlS0H6FoMDxhQ-GnPPIgN-EndoROx7_vGs84WwiwKWL44NBDiCLOGD2d-Y7KyqD2s8PM2ytpXq2q1eZ0TzXIPrmUoDe2ij4Z5mR9gOY1KAWi2msFlzCCbX6sugCEQBjlDn83Ly8gGRLDqMpqMWaTSICD2NztE1Tawzv3BAgu-x7EUlO; QN233=FreetripTouchin; HN1=v1ecbd83e6109eb406ad7ee9754047124a; HN2=qunuqnuggzkcg; quinn=e5ba94e400db7ae611b28097b8ad7ddc9fea18aa074280921e89258cf82e7cb417cc1fc89ba3f04bfda0535faf80ae42; QN621=1490067914133%2Ctestssong%3DDEFAULT%26fr%3Dtejia_inton_search%261490067914133%252Ctestssong%3DDEFAULT; QN668=51%2C56%2C56%2C58%2C56%2C55%2C54%2C56%2C58%2C57%2C57%2C51%2C56; QN243=679; ariaDefaultTheme=null; QN100=WyLotaDnq7nnrZLppa3ppa7nlKjmsLTmoYLmnpfpvpnohIrmoq%2FnlLDph5HlnZHlpKflr6jpu4TmtJvnkbblr6jlpKflt7Tovabnuq%2Fnjqnlj6%2FliqDnvIbovabkuIDml6XmuLh85qGC5p6XIiwi5Yid6YGH5ryT5rGf55WF5ri457K%2B576O5ryT5rGf57K%2B5Y2O5ri46Ii56KeC6LWP5LqM5Y2B5YWD5Lq65rCR5biB6IOM5pmv5Lmd6ams55S75bGx5b6S5q2l5YW05Z2q5Y%2Bk6ZWH6Ziz5pyU5LiW55WM5rq25rSe5aWH6KeC6ZO25a2Q5bKp57qv546p5LiA5pel5ri4fOahguaelyIsIuahguael%2BmYs%2BaclOe6r%2BeOqeS4gOaXpea4uOmTtuWtkOWyqeaXoOi0reeJqXzmoYLmnpciLCLng63ojZAxMuS6uueyvuWTgeWwj%2BWboiDmvJPmsZ%2FmuLjoiLkyMOWFg%2BiDjOaZr%2BmBh%2Bm%2Bmeays%2BmTtuWtkOWyqeWNgXzmoYLmnpciLCLmoYLmnpd85qGC5p6XIl0%3D; qunar-assist={%22version%22:%2220211215173359.925%22%2C%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22readscreen%22:false%2C%22theme%22:%22default%22}; QN267=0531040385eb6753d; QN271=aac3c78a-6161-4135-8024-4d417d4798fd; JSESSIONID=540F1DB1B565507C76E711DE50DEEE27; Hm_lpvt_c56a2b5278263aa647778d304009eafc=1669976262; viewdist=299801-1; viewbook=7673685|5804838|7405861; _vi=oVDC9e1VW3oiCf8HuMZBgBCq212ulsphL4ZvksnfyM24u9ptCRpd6nwZ_dl356Rh70BPTkTu65nuFpEFZTuI0pekzVy6x6EWIVwDrft6xlPPMZ0c2DO6nWnwUxB0zc_J36j7pNWamepyavD-W6SanJmZzLr59gUrgIrbH3thSQUe'
+}
+# 获取当前时间
+from datetime import date, timedelta
+
+today = time.strftime("%Y-%m-%d", time.localtime())
+tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
+
+from MysqlConnect import *
+
+mysql = MysqlConnect()
+
+
+class Xiecheng_Scenic:
+ async def getGuild(self,item, session):
+ try:
+ async with session.post(item["note_url"], json=item['note_data']) as res:
+ resp = await res.json()
+ note_num = 0
+ if 'total' in resp:
+ note_num = resp['total']
+ async with session.post(item["answer_url"], json=item['answer_data']) as res:
+ resp = await res.json()
+ answer_num = 0
+ if 'total' in resp:
+ answer_num = resp['total']
+ args = (item["id"], item["name"], answer_num, note_num, today, "携程")
+ print(args)
+ sql = f'INSERT INTO scenic_index(scenicId,scenic_name,answer_num,note_num,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s);'
+ mysql.insert(sql, args)
+
+ except Exception as e:
+ print("comment报错", e)
+
+
+ async def getScenic(self):
+ async with aiohttp.ClientSession(headers=headers) as session:
+ results = mysql.query("select id,name,gw_url from scenics", None)
+ url_list = []
+ for row in results:
+ id = row[0]
+ name = row[1]
+ url_list.append({
+ "id": id,
+ "name": name,
+ })
+ tasks = []
+ print("携程网站的所有景区长度", len(url_list))
+ i = 0
+ for item in url_list:
+ item[
+ 'answer_url'] = "https://m.ctrip.com/restapi/soa2/20591/getGsOnlineResult?_fxpcqlniredt=09031172114453342165&x-traceID=09031172114453342165-1670145454163-2467091"
+ item['answer_data'] = {"keyword": f"{item['name']}", "pageIndex": 1, "pageSize": 12, "tab": "gsask",
+ "sourceFrom": "",
+ "profile": False,
+ "head": {"cid": "09031172114453342165", "ctok": "", "cver": "1.0", "lang": "01",
+ "sid": "8888",
+ "syscode": "09", "auth": "", "xsid": "", "extension": []}}
+ item['note_url'] = "https://m.ctrip.com/restapi/soa2/20591/getGsOnlineResult?_fxpcqlniredt=09031172114453342165&x-traceID=09031172114453342165-1670145707306-8611319"
+ item['note_data'] = {"keyword": f"{item['name']}", "pageIndex": 1, "pageSize": 12, "tab": "travelnotes",
+ "sourceFrom": "",
+ "profile": False,
+ "head": {"cid": "09031172114453342165", "ctok": "", "cver": "1.0", "lang": "01",
+ "sid": "8888",
+ "syscode": "09", "auth": "", "xsid": "", "extension": []}}
+ # print(item)
+ i = i+1
+ if i > 5 :
+ i=0
+ time.sleep(5)
+ task = asyncio.create_task(self.getGuild(item, session))
+ tasks.append(task)
+ await asyncio.wait(tasks)
+ # 关闭mysql
+ mysql.cur.close()
+ mysql.connection.close()
+
+if __name__ == "__main__":
+ asyncio.run(getScenic())
diff --git "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/mafengwo_scenic_comment_title.py" "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/mafengwo_scenic_comment_title.py"
new file mode 100644
index 0000000..5addc58
--- /dev/null
+++ "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/mafengwo_scenic_comment_title.py"
@@ -0,0 +1,127 @@
+import re
+import time
+import requests
+import aiohttp
+import asyncio
+import json
+#评论内容所在的url,?后面是get请求需要的参数内容
+comment_url='http://pagelet.mafengwo.cn/poi/pagelet/poiCommentListApi?'
+
+headers={
+ 'Referer': 'https://www.mafengwo.cn/jd/10095/gonglve.html',
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'
+}#请求头
+
+from datetime import date, timedelta
+today = time.strftime("%Y-%m-%d", time.localtime())
+tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
+
+from MysqlConnect import *
+mysql = MysqlConnect()
+mysqlTableName = "hotels"
+
+class Mafengwo_Scenic:
+ async def getComment(self,item,session):
+ try:
+ # async with session.post(item["url"]) as res:
+ # resp = await res.content()
+ resp = requests.post(item['url'],headers=eval(item['headers']))
+ page = resp.content.decode('unicode-escape', 'ignore').encode('utf-8', 'ignore').decode('utf-8')#爬取页面并且解码
+ page = page.replace('\/', '/')#将\/转换成/
+ # print(page)
+ # 评论数量
+ commentCountRes = re.compile(r'共有(?P.*?)').search(page)
+ commentCount = commentCountRes.group('commentCount')
+ # 评论标题
+ nameobj = re.compile(r'(?P.*?)',re.S)
+ tagList = nameobj.findall(page)
+ # 评论数量
+ numobj = re.compile(r'(?P.*?)',re.S)
+ numList = numobj.findall(page)
+ othersComment = []
+ dic = {"好评":0,"中评":0,"差评":0}
+ for i in range(0,len(numList)):
+ # 处理标题
+ tag = str(tagList[i+1])
+ tag = tag.replace('\n','').replace('','').strip().replace('人提及)','').replace('(','')
+ tag = re.sub(r'[0-9]+', '', tag)
+ # 处理数量
+ num = str(numList[i])
+ num = num.replace('(','').replace(')','').replace('条','').replace('人提及','').replace(' (','').replace(')','')
+
+ if tag != "好评" and tag != "中评"'' and tag != "差评":
+ othersComment.append({f"{tag}": num})
+ else:
+ dic[f"{tag}"] = num
+ othersComment = str(othersComment)
+ args = (
+ item["id"], item["name"], commentCount, dic["好评"], dic["中评"], dic["差评"], othersComment,
+ today, "马蜂窝")
+ print(args)
+ sql = f'INSERT INTO scenic_comment(scenicId,scenicName,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s);'
+ mysql.insert(sql, args)
+ except Exception as e:
+ print("comment报错",e)
+
+ async def saveScenic(self):
+ for i in range(1,2):
+ url = "https://www.mafengwo.cn/ajax/router.php"
+ data = {
+ 'sAct':"KMdd_StructWebAjax|GetPoisByTag",
+ 'iMddid':"10095",
+ 'iTagId':"0",
+ 'iPage':20,
+ '_ts':"1669286358348",
+ '_sn':"69d4a7c89e"
+ }
+ try:
+ res = requests.post(url, headers=headers, data=data)
+ # print(res.json())
+ List = re.compile(r'/poi/(.*?).html.*?target="_blank" title="(.*?)">').findall(str(res.json()))
+ # print(List)
+ for item in List:
+ mfw_url = 'https://pagelet.mafengwo.cn/poi/pagelet/poiCommentListApi?callback=jQuery1810866662618942958_1669200603971¶ms={"poi_id":"%s","page":1}&_ts=1669200604147&_sn=8e0384d86d&_=1669200604147' % (
+ item[0])
+ mfw_headers = {
+ 'Referer': f'http://www.mafengwo.cn/poi/{item[0]}.html',
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'
+ }
+ args = (mfw_url, json.dumps(mfw_headers), today, item[1])
+ print(args)
+ if i % 5 == 0:
+ time.sleep(5)
+ except Exception as e:
+ print(e)
+
+ async def getScenic(self):
+ results = mysql.query("select id,name,mfw_url,mfw_header from scenics where mfw_url !='' ", None)
+ tasks = []
+ url_list = []
+ for row in results:
+ id = row[0]
+ name = row[1]
+ url = row[2]
+ headers = row[3]
+ url_list.append({
+ "id": id,
+ "name": name,
+ "url": url,
+ "headers": headers,
+ })
+ print("马蜂窝网站的所有景区长度",len(url_list))
+ i = 0
+ for item in url_list:
+ async with aiohttp.ClientSession(headers=eval(item['headers'])) as session:
+ task1 = asyncio.create_task(self.getComment(item, session))
+ i = i + 1
+ tasks.append(task1)
+ if i % 5 == 0 :
+ time.sleep(5)
+ await asyncio.wait(tasks)
+ # 关闭mysql
+ mysql.cur.close()
+ mysql.connection.close()
+
+if __name__ == '__main__':
+ asyncio.run(getScenic())
+ # asyncio.run(saveScenic())
diff --git "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_scenic_comment_title.py" "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_scenic_comment_title.py"
new file mode 100644
index 0000000..dccca1f
--- /dev/null
+++ "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_scenic_comment_title.py"
@@ -0,0 +1,87 @@
+import requests
+import re
+import aiohttp
+import asyncio
+import os
+import xlwt
+import xlrd
+import time
+import random
+import json
+import openpyxl
+
+headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0",
+ "Cookie": 'SECKEY_ABVK=6HIqXjD0ds/vRQnCLt0mHL+BlKCMeW1C20QspZholGg%3D; BMAP_SECKEY=OwZJJ1XtuED6yxVKpYWTZlTrTfswjB7iLK1DDxx5laDPIMT285FwiY4kkMQRbmC-Qa_BYY2jMWeW6YXOu2TQBUSKqwawAnndWCRQARf00Hr7hN3NWtO8bePrI84I1wO0XUK8Bk2BjsTKbFG-9C7uvg-ijL9UVeXTY9x8XC1q_JuGPhwYESA4uJipuwkX1kLC; QN1=00009180306c48f75230434f; QN300=s%3Dbaidu; QN99=7930; QunarGlobal=10.67.197.57_-315863c_1844fb4402c_-4886|1667785799221; QN205=s%3Dbaidu; QN277=s%3Dbaidu; _i=ueHd8Zy9S8X7Cs5y-nPVKDLNsGkX; QN601=fc3340e635beebd8fed01d244dfa103f; QN269=7D87F9C05E3E11ED8278FA163EAD537B; QN48=tc_b56d9b243d79884d_1844fc7bd55_d0d9; fid=ab2cfc1a-4442-4996-8058-95e369865862; csrfToken=vwiqxqLdRWrdSUvmIn84yliNXbhGLphE; QN67=4815%2C8402%2C461%2C6287%2C31980%2C6675%2C39170%2C514011%2C513120%2C512329; QN58=1668915779477%7C1668915787448%7C4; QN57=16678736048620.2656904960064569; Hm_lvt_15577700f8ecddb1a927813c81166ade=1668508423,1668670982,1668768828,1668852847; ctt_june=1654604625968##iK3wVRvNVhPwawPwa%3DjnWRa%2BES2Aa2PwW2aOaS0RE2DsEDGDE2DsERfIXSX8iK3siK3saKjOWst%2BWR3sWRX8VuPwaUvt; ctf_june=1654604625968##iK3wWKXmWUPwawPwasXwaRj8VKiGVRiTWKg%3DERjNES2OXsDwaRPsEKasWPPsiK3siK3saKjOVRtOVRjmWsamWhPwaUvt; cs_june=84ca3a9b5a98782f34be6296a1606f06eb95a586e457de9dc68fac6e9a429296150e2f38a8a12ca96514b111ebdac1878f7fa30cb8f280132faaa5b783ecd9d7b17c80df7eee7c02a9c1a6a5b97c1179abfc7c950e9b30934146fcf8bd089a765a737ae180251ef5be23400b098dd8ca; QN271AC=register_pc; QN271SL=791c41e753d68b5ac9365b726bb2960d; QN271RC=791c41e753d68b5ac9365b726bb2960d; _q=U.cbkbblv3519; _s=s_3IDSC2V3W3PGZ5F7A2NNNVAAOE; _t=27907349; _v=SsLO8uhOBBxdqVHEaJ4HRRRm-S5OQ4tF_8od6DDnWkVT_ugYFgt4T06vA1JNPsidy87-YU6-Em7O13wYNxUWwYMcqZtXVYqS6D-UDVREDpp4GBSmQBKSBqR41pOUqtVzJOa7ynWOtM4YS0MiDWncGOrqjfjDGrH8PuPitoHSVLH6; QN43=2; QN42=%E5%8E%BB%E5%93%AA%E5%84%BF%E7%94%A8%E6%88%B7; _vi=vngppYwRPwCDqhIqFPtxLm89wykxl2K7lGZEPOnwB341RCRAj3afnxLN-pQ2n-drX7GENqb0dVOcAFO7QBxpE7uqmso_3vMGM223wBq9FSP8OX21p_a6qwYhay-zJs3uYLRjvOLn0RpM-D8_YQGmyG5ba2uC3XqyN76edIKIa709; QN233=FreetripTouchin; __qt=v1%7CVTJGc2RHVmtYMStSdWh2QnF2amE0bVovaEEzaFVaK0pTaTcrdkIzRWd6VGx1V2xGUWtLMFZwdEFqRzBVMncvd0dqbEJhMXFTZk1FUFN6M3drN2tSR0MzMzUveURkN09yZUJ6dWVkQ2VvanJDRWZYTTh5L0NYUFN1YlMydVJJMVRDVEtNZmxyQWRiTEdiVUU1ZVp1UW5xQWtHbmhTbk1NZGR3Z2tKK0lrbWFudDkvVmVxZzFsL3NLVUt1Y1dsU0N5bERITnVRM2hlN1NDbSt0TGphTnZROUVJOWdBbmNXSDNRYUpzZHBhYURLdz0%3D%7C1668915783978%7CVTJGc2RHVmtYMS9Va284c1RyZGtEOFlGUXZlUWdveFFmdGdJNWwyTHlaSVlNYWorNlF5dmJ5eTAxaUhGZ1BId3VVZFJ2RnJQRk9oUW9jVnFhenVoYXc9PQ%3D%3D%7CVTJGc2RHVmtYMThzTUJOUVkzeUErMkpQMi9vT1dSN2Zib20zaUdOeWUwQ3BHWHBldWw1dld3YWhmdCtiQ3NZbFBiTERyYkdiMnhVYlhKdmQ1Wk5MNHFyQUEzSThWKzZTMHAyQndlYWtWQVplM2hlSkY5WXVxR2Iva1VwRWsrakEvdUttcXpnYWgrVys2REVvVVVMdm9tcDJ4OGpWdDlMUHZCT2pHMndub1VVM2doRHdTdnNudjBjdS9peXNiUkFxMVc1czlmTU56b2NNR3pqZEl5Ulc4RE94VmtLMFlDWlNnWEdGVVNTaVB3YVpYZnF6ZzIwdGxmVG5xUEhzZXJVZXk1UjdFbzFnRXhCa0I4MENLOEhBdE82azlpWCs2dlkwa092Q3dsMjExYTVDSlptY3BPdm1raVpMVytoT3dEMm52Y1ZyMEY2cVBER240c2Z4MDZrcFFOL3NTVFhMcm1IZlorT2U3RFJ1d1ltTmsvem91Kyt5TDdQOEVLZGQwOVVTVnl4QkcwbmEwcGo2T1lXc0U4c3l0UzlMY1k0bGpNc1lMSkVKODNkMVdPdzdJUklZYnE0eDdyYVVETU9nV2NTNTlHenRJS1ROZ3VvYXBocGszS0FkZWlZMGNtdWlBVFBzRTFKVTRwVjdCU3EwS2hMeGhqTUtGa1NEOGtkdlEzMS92VHB2MmxENUZ1aEhodEU1K1d5Q2RVL3FzaDhPa2xuUGpsQ0tJc1ZaeGNGL3hRU0NMclVEeXMzbjhvTEZ2RGlZMUE1WVhhUnN5NHkrelp5Sy84S2FYTUtZM0ZNR3A5ZWdqTkdobWlZOWt3T1FodHhuVE1HZ0xuQWI4alhuNkg0WkhLQXM4ZUcwMzlpQ2JFZFRKdFZUajMxMnFqNkpoU1VXZUNaa1dYdDJFWkFwZ1VscFQ0emFvM3d3dFFqQjlGdWlJMDU3aXR2L3BPSU1VRWFGRVQxSi9kV0xzVy9EdzRXVHVCU0NBMFJGaGlNVm5qN1JUMHhIV0VpS1QyWkVIeWF2c3dWTU5iNU1QQytWREh0OG5SRWJwOGE5N1g3OHVYT29ldENmb250OHZMNDVnOUlxdHM0N0IzUmFVMEJ4eitGNUJ1L3pVVXM5WDRZTURlS09SSUNkL3c9PQ%3D%3D; HN1=v1ecbd83e6109eb406ad7ee9754047124a; HN2=qunuqnuggzkcg; quinn=e5ba94e400db7ae611b28097b8ad7ddc9fea18aa074280921e89258cf82e7cb417cc1fc89ba3f04bfda0535faf80ae42; QN621=1490067914133%2Ctestssong%3DDEFAULT%26fr%3Dtejia_inton_search%261490067914133%252Ctestssong%3DDEFAULT; QN668=51%2C56%2C56%2C58%2C56%2C55%2C54%2C56%2C58%2C57%2C57%2C51%2C56; ariaDefaultTheme=null; QN63=%E6%A1%82%E6%9E%97%7C%E9%87%8D%E5%BA%86%7Cgl%20%7C%E9%98%B3%E6%9C%94%E5%8A%A8%E6%84%9F6D%E7%94%B5%E5%BD%B1%7C%E4%B8%80%E9%94%8B%E8%B6%8A%E9%87%8E%E5%B1%B1%E5%9C%B0%E8%BD%A6%E4%BF%B1%E4%B9%90%E9%83%A8%7C%E6%A1%82%E6%9E%97%E5%86%9B%E5%8D%9A%E5%9B%AD; qunar-assist={%22version%22:%2220211215173359.925%22%2C%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22readscreen%22:false%2C%22theme%22:%22default%22}; QN44=cbkbblv3519; QN267=05310403864bcfa85; QN163=0; JSESSIONID=1AD098F421DEEF0CDC162A9D3277ECCE; QN271=a9ec74f7-b22b-4855-9aed-c212fe90a582; QN71="MTgwLjEzNi43MC41MzrmoYLmnpc6MQ=="; Hm_lpvt_15577700f8ecddb1a927813c81166ade=1668915784; QN1231=0; activityClose=1; QN243=15; QN310=hrec_zaj86'
+}
+# 获取当前时间
+from datetime import date, timedelta
+today = time.strftime("%Y-%m-%d",time.localtime())
+tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d")
+
+from MysqlConnect import *
+mysql = MysqlConnect()
+
+class Qunaer_Scenic:
+ async def getComment(self,item, index, session,dic,comments):
+ try:
+ async with session.get(item["url"]) as res:
+ resp = await res.json()
+ # print(resp)
+ dic["score"] = str(resp["data"]["score"])
+ dic["commentCount"] = str(resp["data"]["commentCount"])
+ dic["好评"] = 0
+ dic["中评"] = 0
+ dic["差评"] = 0
+ # commentList = resp["data"]["commentList"]
+ if index == 1:
+ othersComment = []
+ tagList = resp["data"]["tagList"]
+ # print(tagList)
+ for tag in tagList:
+ tagName = tag["tagName"]
+ tagNum = tag["tagNum"]
+ if tagName != "好评" and tagName != "中评"'' and tagName != "差评":
+ othersComment.append({f"{tagName}":tagNum})
+ dic[f"{tagName}"] = tagNum
+ othersComment = str(othersComment)
+ args = (item["id"],item["name"],dic["score"],dic["commentCount"],dic["好评"],dic["中评"],dic["差评"],othersComment,today,"去哪儿")
+ print(args)
+ sql = f'INSERT INTO scenic_comment(scenicId,scenicName,score,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);'
+ mysql.insert(sql,args)
+ except Exception as e:
+ print("comment报错",e)
+ print(item)
+ # print("报错页数",index,sightId)
+
+ async def getScenic(self):
+ async with aiohttp.ClientSession(headers=headers) as session:
+ results = mysql.query("select id,name,gw_url from scenics where gw_url !=''", None)
+ tasks = []
+ url_list = []
+ for row in results:
+ id = row[0]
+ name = row[1]
+ url = row[2]
+ url_list.append({
+ "id": id,
+ "name": name,
+ "url": url,
+ })
+ print("去哪儿网站的所有景区长度",len(url_list))
+ i = 0
+ for item in url_list:
+ dic = {}
+ comments = {}
+ task1 = asyncio.create_task(self.getComment(item, 1, session, dic, comments))
+ i = i + 1
+ tasks.append(task1)
+ if i % 5 == 0 :
+ time.sleep(5)
+ await asyncio.wait(tasks)
+ # 关闭mysql
+ mysql.cur.close()
+ mysql.connection.close()
+
+if __name__ == "__main__":
+ asyncio.run(getScenic())
diff --git "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py" "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py"
new file mode 100644
index 0000000..82ce79c
--- /dev/null
+++ "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py"
@@ -0,0 +1,38 @@
+# coding:utf-8
+# version:python3.7
+# author:Ivy
+
+from applications.common.tasks.景区评论标题.mafengwo_scenic_comment_title import Mafengwo_Scenic
+from applications.common.tasks.景区评论标题.qunaer_scenic_comment_title import Qunaer_Scenic
+from applications.common.tasks.景区评论标题.tongcheng_scenic_comment_title import Tongcheng_Scenic
+from applications.common.tasks.景区评论标题.xiecheng_scenic_comment_title import Xiecheng_Scenic
+import asyncio
+import time
+
+mafengwo = Mafengwo_Scenic()
+qunaer = Qunaer_Scenic()
+tongcheng = Tongcheng_Scenic()
+xiecheng = Xiecheng_Scenic()
+
+class Scenic:
+ def run(self):
+ print("开始爬取各个网站的评论标题!")
+ time_start=time.time()
+
+ asyncio.run(xiecheng.getScenic())
+ print("携程爬取结束")
+ asyncio.run(tongcheng.getScenic())
+ print("同程爬取结束")
+ asyncio.run(qunaer.getScenic())
+ print("去哪儿爬取结束")
+ asyncio.run(mafengwo.getScenic())
+ print("马蜂窝爬取结束")
+
+ time_end=time.time()
+ print(' time cost ',time_end-time_start,'s')
+
+
+
+
+
+
diff --git "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/tongcheng_scenic_comment_title.py" "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/tongcheng_scenic_comment_title.py"
new file mode 100644
index 0000000..4c69e1d
--- /dev/null
+++ "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/tongcheng_scenic_comment_title.py"
@@ -0,0 +1,80 @@
+import requests
+import re
+import aiohttp
+import asyncio
+import csv
+import json
+import os
+import time
+
+headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0",
+}
+from datetime import date, timedelta
+today = time.strftime("%Y-%m-%d",time.localtime())
+tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d")
+
+from MysqlConnect import *
+mysql = MysqlConnect()
+
+# 爬取一个景区的评论
+class Tongcheng_Scenic:
+ async def getComment(self,item, pageIndex, session, dic):
+ try:
+ async with session.post(item['url']) as res:
+ res1 = await res.text()
+ res1 = json.loads(res1)
+ if pageIndex == 1:
+ dic['degreeLevel'] = res1['degreeLevel']
+ dic['totalNum'] = res1['totalNum']
+ dic['goodNum'] = res1['goodNum']
+ dic['midNum'] = res1['midNum']
+ dic['badNum'] = res1['badNum']
+ dic['hasImgNum'] = res1['hasImgNum']
+ dpTagList = res1['dpTagList']
+ othersComment = []
+ i = 0
+ if dpTagList != None:
+ for dpTag in dpTagList:
+ if i > 5 :
+ othersComment.append({f"{dpTag['tagName']}":dpTag['tagNum']})
+ i = i + 1
+ othersComment = str(othersComment)
+ args = (item["id"], item["name"], dic["degreeLevel"], dic["totalNum"], dic['goodNum'], dic['midNum'], dic['badNum'], othersComment,today, "同程")
+ print(args)
+ sql = 'INSERT INTO scenic_comment(scenicId,scenicName,satisfy_present,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);'
+ mysql.insert(sql, args)
+ except Exception as e:
+ print(e)
+
+ async def getScenic(self):
+ async with aiohttp.ClientSession(headers=headers) as session:
+ results = mysql.query("select id,name,tc_url from scenics where tc_url !='' ", None)
+ tasks = []
+ url_list = []
+ for row in results:
+ id = row[0]
+ name = row[1]
+ url = row[2]
+ url_list.append({
+ "id": id,
+ "name": name,
+ "url": url,
+ })
+ print("同程网站的所有景区长度", len(url_list))
+ i = 0
+ for item in url_list:
+ dic = {}
+ task = asyncio.create_task(self.getComment(item, 1, session, dic))
+ i = i + 1
+ tasks.append(task)
+ if i % 5 == 0 :
+ time.sleep(5)
+ await asyncio.wait(tasks)
+ # 关闭mysql
+ mysql.cur.close()
+ mysql.connection.close()
+
+if __name__ == '__main__':
+ asyncio.run(getScenic())
+
diff --git "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_scenic_comment_title.py" "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_scenic_comment_title.py"
new file mode 100644
index 0000000..33b9bde
--- /dev/null
+++ "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_scenic_comment_title.py"
@@ -0,0 +1,145 @@
+import requests
+import re
+import aiohttp
+import asyncio
+import csv
+import json
+import time
+
+headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0",
+}
+
+from datetime import date, timedelta
+today = time.strftime("%Y-%m-%d",time.localtime())
+tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d")
+
+from MysqlConnect import *
+mysql = MysqlConnect()
+
+
+class Xiecheng_Scenic:
+ # 爬取景区基本信息
+ def getBaseInfo(self,html):
+ # 初始化搜索条件
+ dic = {}
+ obj = re.compile(r'(?P.*?)
.*?'
+ r'titleTips">(?P.*?).*?'
+ r'commentScoreNum">(?P.*?)
.*?'
+ r'hover-underline">(?P.*?).*?'
+ # r'地址(?P
.*?).*?'
+ r'开放时间(?P
.*?'
+ r',"poiId":(?P.*?),"poiType"'
+ r'', re.S)
+ # 有些景点没有等级 的处理方法
+ tempobj1 = re.compile(r'titleTips">(?P.*?).*?', re.S)
+ tempres1 = tempobj1.search(html)
+ if tempres1 == None:
+ dic['level'] = 0
+ obj = re.compile(r'(?P.*?)
.*?'
+ r'commentScoreNum">(?P.*?).*?'
+ r'hover-underline">(?P.*?).*?'
+ # r'地址(?P
.*?).*?'
+ r'开放时间(?P
.*?'
+ r',"poiId":(?P.*?),"poiType"', re.S)
+ else:
+ dic['level'] = tempres1.group('level')
+ # 有些景点没有评分
+ tempobj2 = re.compile(r'commentScoreNum">(?P.*?).*?'
+ r'hover-underline">(?P.*?).*?'
+ , re.S)
+ tempres2 = tempobj2.search(html)
+ if tempres2 == None:
+ dic['score'] = 0
+ obj = re.compile(r'(?P.*?)
.*?'
+ # r'地址(?P
.*?).*?'
+ r'开放时间(?P
.*?'
+ r',"poiId":(?P.*?),"poiType"', re.S)
+ # 有些景点没有评分
+ else:
+ dic['score'] = tempres2.group('score')
+ tempobj3 = re.compile(r'hover-underline">(?P.*?).*?'
+ , re.S)
+ tempobj3 = tempobj3.search(html)
+ if tempobj3 == None:
+ dic['commentNum'] = 0
+ return dic
+ else:
+ dic['commentNum'] = tempobj3.group('commentNum')
+ # 最终爬取景点基本信息并存入
+ resp1 = obj.search(html)
+ # print(resp1.group('name') + '爬取成功')
+ if resp1 != None :
+ dic = resp1.groupdict()
+ if tempres1 != None:
+ dic['level'] = dic['level'].replace('', '')
+ return dic
+
+ # 爬取评论标题
+ def getCommentTitle(self,html, dic,othersComment):
+ obj = re.compile(r'"hotTag">(?P.*?).*?', re.S)
+ titles = obj.finditer(html)
+ i = 0
+ for item in titles:
+ good = item.group('title').split('')
+ good[1] = good[1].replace('(', '').replace(')', '')
+ # print(good)
+ if good[0] == '好评' or good[0] == '差评':
+ dic[f'{good[0]}'] = good[1]
+ else:
+ othersComment.append({f"{good[0]}": good[1]})
+ i = i + 1
+
+ # 爬取网页具体信息ok
+ async def getDetail(self,item, session):
+ tasks = []
+ try:
+ async with session.get(item['url']) as res:
+ html = await res.text()
+ dic = self.getBaseInfo(html)
+ othersComment = []
+ dic["好评"] = 0
+ dic["中评"] = 0
+ dic["差评"] = 0
+ self.getCommentTitle(html, dic, othersComment)
+ othersComment = str(othersComment)
+ args = (item["id"], item["name"], dic["score"], dic["commentNum"], dic["好评"], dic["中评"], dic["差评"], othersComment,
+ today,"携程")
+ print(args)
+ sql = f'INSERT INTO scenic_comment(scenicId,scenicName,score,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);'
+ mysql.insert(sql, args)
+ except Exception as e:
+ print("comment报错",e)
+ time.sleep(5)
+ await getDetail(item,session)
+
+
+ async def getScenic(self):
+ async with aiohttp.ClientSession(headers=headers) as session:
+ results = mysql.query("select id,name,xc_url from scenics where xc_url !=''", None)
+ url_list = []
+ for row in results:
+ id = row[0]
+ name = row[1]
+ url = row[2]
+ url_list.append({
+ "id": id,
+ "name": name,
+ "url": url,
+ })
+ tasks = []
+ print("携程网站的所有景区长度", len(url_list))
+ i = 0
+ for item in url_list:
+ task = asyncio.create_task(self.getDetail(item, session))
+ i = i + 1
+ tasks.append(task)
+ if i % 5 == 0 :
+ time.sleep(5)
+ await asyncio.wait(tasks)
+ # 关闭mysql
+ mysql.cur.close()
+ mysql.connection.close()
+
+if __name__ == '__main__':
+ asyncio.run(getScenic())
\ No newline at end of file
diff --git "a/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_route_comment_title.py" "b/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_route_comment_title.py"
new file mode 100644
index 0000000..a00a19c
--- /dev/null
+++ "b/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_route_comment_title.py"
@@ -0,0 +1,89 @@
+import requests
+import re
+import aiohttp
+import asyncio
+import csv
+import json
+import time
+
+headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0",
+ # 'Referer': "https://glsh7.package.qunar.com/user/detail.jsp?id=3078148931&osrc=tts_tuan&rttp=%E6%9C%AC%E5%9C%B0%E6%B8%B8&dep=5qGC5p6X&arr=5qGC5p6X&ftdt=2023-03-01%2C2023-03-01&qssrc=eyJ0cyI6IjE2Njk4NzM1MTgyMzUiLCJzcmMiOiJ1bmRlZmluZWQuZW52YW5vIiwiYWN0Ijoic2Nyb2xsIiwicmFuZG9tIjoiOTU1MjAwIn0=",
+ # 'Cookie': "QN1=00009180306c48f75230434f; QN300=s%3Dbaidu; QN99=7930; QunarGlobal=10.67.197.57_-315863c_1844fb4402c_-4886|1667785799221; QN205=s%3Dbaidu; QN277=s%3Dbaidu; _i=ueHd8Zy9S8X7Cs5y-nPVKDLNsGkX; QN601=fc3340e635beebd8fed01d244dfa103f; QN269=7D87F9C05E3E11ED8278FA163EAD537B; QN48=tc_b56d9b243d79884d_1844fc7bd55_d0d9; fid=ab2cfc1a-4442-4996-8058-95e369865862; csrfToken=vwiqxqLdRWrdSUvmIn84yliNXbhGLphE; QN58=1669821195055%7C1669822256665%7C5; QN57=16678736048620.2656904960064569; ctt_june=1654604625968##iK3wVRvNVhPwawPwa%3DjnWRa%2BES2Aa2PwW2aOaS0RE2DsEDGDE2DsERfIXSX8iK3siK3saKjOWst%2BWR3sWRX8VuPwaUvt; ctf_june=1654604625968##iK3wVK3OaUPwawPwas2sXsjNXs2mWSXnXPEIaRHIaPX8X%3DDOaRanXPEhEPjAiK3siK3saKjOVKjsaSaAas38ahPwaUvt; cs_june=173605640a003a620f2f106b211063ea2287c94e80d66389452136307aa6d7d9150e2f38a8a12ca96514b111ebdac1878f7fa30cb8f280132faaa5b783ecd9d7b17c80df7eee7c02a9c1a6a5b97c117951bd5a81c5254ab3bab7748a9aa6d8185a737ae180251ef5be23400b098dd8ca; QN271AC=register_pc; QN271SL=791c41e753d68b5ac9365b726bb2960d; QN271RC=791c41e753d68b5ac9365b726bb2960d; _q=U.cbkbblv3519; _s=s_3IDSC2V3W3PGZ5F7A2NNNVAAOE; _t=27907349; _v=SsLO8uhOBBxdqVHEaJ4HRRRm-S5OQ4tF_8od6DDnWkVT_ugYFgt4T06vA1JNPsidy87-YU6-Em7O13wYNxUWwYMcqZtXVYqS6D-UDVREDpp4GBSmQBKSBqR41pOUqtVzJOa7ynWOtM4YS0MiDWncGOrqjfjDGrH8PuPitoHSVLH6; QN43=2; QN42=%E5%8E%BB%E5%93%AA%E5%84%BF%E7%94%A8%E6%88%B7; _vi=CxIUDXSKKXrdfKW8a_JOt7FdAzF3YVARuSGejExpLtNTYJb0IsR-5f82yRcybhrWWwl3aU7KqT10nKk_ydXwYxMzUiLL1hgdynGc4YfMr2UYeME-S_UnXUKnHzth2xeCRbsBgBPNuA-aM44OzN_1OoHFpGGhOCYcEmmLrjtCInJi; QN233=FreetripTouchin; HN1=v1ecbd83e6109eb406ad7ee9754047124a; HN2=qunuqnuggzkcg; quinn=e5ba94e400db7ae611b28097b8ad7ddc9fea18aa074280921e89258cf82e7cb417cc1fc89ba3f04bfda0535faf80ae42; QN621=1490067914133%2Ctestssong%3DDEFAULT%26fr%3Dtejia_inton_search%261490067914133%252Ctestssong%3DDEFAULT; QN668=51%2C56%2C56%2C58%2C56%2C55%2C54%2C56%2C58%2C57%2C57%2C51%2C56; QN243=572; _jzqa=1.2488123552548573700.1668855923.1669870580.1669874997.8; _jzqx=1.1669087498.1669874997.1.jzqsr=dujia%2Equnar%2Ecom|jzqct=/.-; ariaDefaultTheme=null; QN100=WyLmoYLmnpfpmLPmnJTnuq%2FnjqnkuIDml6XmuLjpk7blrZDlsqnml6DotK3nial85qGC5p6XIiwi54Ot6I2QMTLkurrnsr7lk4HlsI%2Flm6Ig5ryT5rGf5ri46Ii5MjDlhYPog4zmma%2FpgYfpvpnmsrPpk7blrZDlsqnljYF85qGC5p6XIiwi5qGC5p6XfOahguaelyJd; qunar-assist={%22version%22:%2220211215173359.925%22%2C%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22readscreen%22:false%2C%22theme%22:%22default%22}; QN44=cbkbblv3519; QN267=053104038b8bcf1fd; QN163=0; QN271=638b35df-ba09-4ab7-a0ef-e80528529e59; _jzqc=1; _jzqckmp=1; QN61=%5B%22%E6%A1%82%E6%9E%97%E9%98%B3%E6%9C%94%E7%BA%AF%E7%8E%A9%E4%B8%80%E6%97%A5%E6%B8%B8%E9%93%B6%E5%AD%90%E5%B2%A9%E6%97%A0%E8%B4%AD%E7%89%A9%22%2C%22%E7%83%AD%E8%8D%9012%E4%BA%BA%E7%B2%BE%E5%93%81%E5%B0%8F%E5%9B%A2%20%E6%BC%93%E6%B1%9F%E6%B8%B8%E8%88%B920%E5%85%83%E8%83%8C%E6%99%AF%E9%81%87%E9%BE%99%E6%B2%B3%E9%93%B6%E5%AD%90%E5%B2%A9%E5%8D%81%22%2C%22%E6%A1%82%E6%9E%97%22%5D; _qzja=1.621578797.1669870580293.1669870580293.1669875028328.1669876047449.1669877523855..0.0.7.2; _qzjc=1; _qzjto=7.2.0; Hm_lvt_a8a41d37454fd880cdb23d6ef05d917b=1669870580; Hm_lpvt_a8a41d37454fd880cdb23d6ef05d917b=1669877524; JSESSIONID=A0CB91CD362911D601F4C9CF6971DF8D; activityClose=1; _jzqb=1.24.10.1669874997.1; _qzjb=1.1669875028328.3.0.0.0"
+}
+from datetime import date, timedelta
+
+today = time.strftime("%Y-%m-%d", time.localtime())
+tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
+from MysqlConnect import *
+
+mysql = MysqlConnect()
+
+class Qunaer_Route:
+ # 爬取评论标题
+ async def getCommentTitle(self,item, session):
+ data = json.loads(item['data'])
+ try:
+ async with session.post(item['url'],data=data) as res:
+ otherComment = []
+ resp = await res.json()
+ # print(resp)
+ resp = resp['data']
+ totalComment = resp['totalComment']
+ ratingExcellent = resp['ratingExcellent']
+ ratingAverage = resp['ratingAverage']
+ ratingAwful = resp['ratingAwful']
+ numWithImages = resp['numWithImages']
+ goodRate = 0
+ if totalComment != 0:
+ goodRate = ratingExcellent/totalComment
+ otherComment.append({'有图':numWithImages})
+ # mainCommentList = resp['mainCommentList']
+ args = (
+ item['id'], item['name'], totalComment, 0, goodRate, ratingExcellent, ratingAverage, ratingAwful,
+ str(otherComment), today, "去哪儿")
+ print(args)
+ sql = 'INSERT INTO route_comment(route_id,route_name,total,score,goodRate,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);'
+ mysql.insert(sql, args)
+ except Exception as e:
+ print("comment报错", e)
+ print(resp)
+ # time.sleep(3)
+ # async with aiohttp.ClientSession(headers=headers) as session1:
+ # await getCommentTitle(item,session1)
+
+ async def getRoute(self):
+ # results = mysql.query("select id,route_name,xc_url,xc_data from route where xc_data !='' and id = 106", None)
+ results = mysql.query("select id,route_name,gw_url,gw_data from route where gw_data !=''", None)
+ url_list = []
+ for row in results:
+ id = row[0]
+ name = row[1]
+ url = row[2]
+ data = row[3]
+ url_list.append({
+ "id": id,
+ "name": name,
+ "url": url,
+ "data": data,
+ })
+ tasks = []
+ print("去哪儿网站的所有线路长度", len(url_list))
+ # print(url_list)
+ i = 0
+ for item in url_list:
+ async with aiohttp.ClientSession(headers=headers) as session:
+ task = asyncio.create_task(self.getCommentTitle(item, session))
+ i = i + 1
+ tasks.append(task)
+ if i % 2 == 0:
+ time.sleep(5)
+ await asyncio.wait(tasks)
+ # 关闭mysql
+ mysql.cur.close()
+ mysql.connection.close()
+
+if __name__ == '__main__':
+ # test()
+ # asyncio.run(getSearch())
+ asyncio.run(getScenic())
diff --git "a/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/route_start.py" "b/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/route_start.py"
new file mode 100644
index 0000000..f58ddf2
--- /dev/null
+++ "b/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/route_start.py"
@@ -0,0 +1,30 @@
+# coding:utf-8
+# version:python3.7
+# author:Ivy
+
+from applications.common.tasks.线路评论标题.xiecheng_route_comment_title import Xiecheng_Route
+from applications.common.tasks.线路评论标题.qunaer_route_comment_title import Qunaer_Route
+import asyncio
+import time
+
+qunaer = Qunaer_Route()
+xiecheng = Xiecheng_Route()
+
+class Route:
+ def run(self):
+ print("开始爬取各个网站的评论标题!")
+ time_start=time.time()
+
+ asyncio.run(xiecheng.getRoute())
+ print("携程爬取结束")
+ asyncio.run(qunaer.getRoute())
+ print("去哪儿爬取结束")
+
+ time_end=time.time()
+ print(' time cost ',time_end-time_start,'s')
+
+
+
+
+
+
diff --git "a/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_route_comment_title.py" "b/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_route_comment_title.py"
new file mode 100644
index 0000000..3a67ea4
--- /dev/null
+++ "b/applications/common/tasks/\347\272\277\350\267\257\350\257\204\350\256\272\346\240\207\351\242\230/xiecheng_route_comment_title.py"
@@ -0,0 +1,106 @@
+import requests
+import re
+import aiohttp
+import asyncio
+import csv
+import json
+import time
+
+headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0",
+ 'Cookie':"_bfa=1.1667787832924.3h7rvk.1.1669895769722.1669970415585.59.1057.0; _ubtstatus=%7B%22vid%22%3A%221667787832924.3h7rvk%22%2C%22sid%22%3A59%2C%22pvid%22%3A1057%2C%22pid%22%3A0%7D; MKT_OrderClick=ASID=4897155952&AID=4897&CSID=155952&OUID=index&CT=1669895769884&CURL=https%3A%2F%2Fwww.ctrip.com%2F%3Fsid%3D155952%26allianceid%3D4897%26ouid%3Dindex&VAL={}; __zpspc=9.66.1669970417.1669972201.47%232%7Cwww.baidu.com%7C%7C%7C%25E6%2590%25BA%25E7%25A8%258B%25E6%2594%25BB%25E7%2595%25A5%7C%23; _jzqco=%7C%7C%7C%7C1669970417342%7C1.386356559.1667787833232.1669971673086.1669972201783.1669971673086.1669972201783.undefined.0.0.519.519; MKT_CKID=1667787833303.6cihj.yc0k; _RF1=180.136.89.152; _RSG=PI4tVah22dC4DYKmrdfaUA; _RDG=28a682bf6ceb192ebc37d846ca69b5ed63; _RGUID=c9c20ab9-1fdc-4499-a7a4-a67d70522344; MKT_Pagesource=PC; _bfaStatusPVSend=1; _bfaStatus=success; nfes_isSupportWebP=1; _ga=GA1.2.728287556.1667875987; Session=SmartLinkCode=ctrip&SmartLinkKeyWord=&SmartLinkQuary=_UTF.&SmartLinkHost=ctrip.com&SmartLinkLanguage=zh; UUID=20EDDDB8AE46403495EFEE36FAC417C1; IsPersonalizedLogin=F; ibulanguage=CN; ibulocale=zh_cn; cookiePricesDisplayed=CNY; ibu_h5_lang=en; ibu_h5_local=en-us; Hm_lvt_37b54c42b9dde393e60c88c1a84657cb=1668156071,1668390905,1668766327,1669630249; _lizard_LZ=ghjTSPinRlQVIkJmqrUWXFopstucvwx210a3ydz754YfM6ZE89+b-eKCHOLNBGAD; intl_ht1=h4=33_75424975,33_782288,33_6550062,2_441618; _abtest_userid=294c1513-b267-4324-8a01-750ac3d84f81; _gcl_au=1.1.920354234.1668409170; U_TICKET_SELECTED_DISTRICT_CITY=%7B%22value%22%3A%7B%22districtid%22%3A%222%22%2C%22districtname%22%3A%22%E4%B8%8A%E6%B5%B7%22%2C%22isOversea%22%3Anull%7D%2C%22createTime%22%3A1668416843913%2C%22updateDate%22%3A1668416843913%7D; FlightIntl=Search=[%22KWL|%E6%A1%82%E6%9E%97(KWL)|33|KWL|480%22%2C%22BJS|%E5%8C%97%E4%BA%AC(BJS)|1|BJS|480%22%2C%222022-11-17%22]; Hm_lvt_576acc2e13e286aa1847d8280cd967a5=1668916753; Union=OUID=index&AllianceID=4897&SID=155952&SourceID=&createtime=1669895770&Expires=1670500569884; login_uid=C0AB45AFF50D550863B877680E735ABE; login_type=0; cticket=337E22BDC21DD4985842195D8CEDEC0CE79886C069743562391907D8E4575607; AHeadUserInfo=VipGrade=0&VipGradeName=%C6%D5%CD%A8%BB%E1%D4%B1&UserName=&NoReadMessageCount=0; DUID=u=C0AB45AFF50D550863B877680E735ABE&v=0; IsNonUser=F; appFloatCnt=1; StartCity_Pkg=PkgStartCity=33; GUID=09031172114453342165; _bfs=1.61; MKT_CKID_LMT=1669970417053; _bfi=p1%3D290570%26p2%3D290601%26v1%3D1056%26v2%3D1055"
+}
+from datetime import date, timedelta
+
+today = time.strftime("%Y-%m-%d", time.localtime())
+tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
+from MysqlConnect import *
+
+mysql = MysqlConnect()
+
+
+# 爬取评论标题
+class Xiecheng_Route:
+ async def getCommentTitle(self,item, session):
+ data = json.loads(item['data'])
+ try:
+ async with session.post(item['url'], json=data) as res:
+ dic = {}
+ otherComment = []
+ resp = await res.json()
+ commentAggregation = resp['commentAggregation']
+ totalCount = resp['totalCount']
+ scoreAvg = commentAggregation['scoreAvg']
+ goodRate = commentAggregation['goodRate']
+ commonTags = commentAggregation['commonTags']
+ dic['score'] = scoreAvg
+ dic['goodRate'] = goodRate
+ dic['total'] = totalCount
+ for comment in commonTags:
+ totalCount = comment['totalCount']
+ displayName = comment['displayName']
+ if displayName == '好评' or displayName == '中差评':
+ dic[f'{displayName}'] = totalCount
+ else:
+ otherComment.append({f'{displayName}': totalCount})
+ if 'tourTypeTags' in commentAggregation:
+ tourTypeTags = commentAggregation['tourTypeTags']
+ for comment in tourTypeTags:
+ totalCount = comment['totalCount']
+ displayName = comment['displayName']
+ otherComment.append({f'{displayName}': totalCount})
+ if 'aiTags' in commentAggregation:
+ aiTags = commentAggregation['aiTags']
+ for comment in aiTags:
+ totalCount = comment['totalCount']
+ displayName = comment['displayName']
+ otherComment.append({f'{displayName}': totalCount})
+ if 'subItemTags' in commentAggregation:
+ subItemTags = commentAggregation['subItemTags']
+ for comment in subItemTags:
+ totalCount = comment['totalCount']
+ displayName = comment['displayName']
+ otherComment.append({f'{displayName}': totalCount})
+ args = (
+ item['id'], item['name'], dic['total'], dic['score'], dic['goodRate'], dic['好评'], dic['中差评'], dic['中差评'],
+ str(otherComment), today, "携程")
+ print(args)
+ sql = 'INSERT INTO route_comment(route_id,route_name,total,score,goodRate,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);'
+ mysql.insert(sql, args)
+ except Exception as e:
+ print("comment报错", e)
+ print(resp)
+
+ async def getRoute(self):
+ async with aiohttp.ClientSession(headers=headers) as session:
+ # results = mysql.query("select id,route_name,xc_url,xc_data from route where xc_data !='' and id = 106", None)
+ results = mysql.query("select id,route_name,xc_url,xc_data from route where xc_data !=''", None)
+ url_list = []
+ for row in results:
+ id = row[0]
+ name = row[1]
+ url = row[2]
+ data = row[3]
+ url_list.append({
+ "id": id,
+ "name": name,
+ "url": url,
+ "data": data,
+ })
+ tasks = []
+ print("携程网站的所有线路长度", len(url_list))
+ # print(url_list)
+ i = 0
+ for item in url_list:
+ task = asyncio.create_task(self.getCommentTitle(item, session))
+ i = i + 1
+ tasks.append(task)
+ if i % 10 == 0:
+ time.sleep(5)
+ await asyncio.wait(tasks)
+ # 关闭mysql
+ mysql.cur.close()
+ mysql.connection.close()
+
+if __name__ == '__main__':
+ asyncio.run(getScenic())
diff --git "a/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py" "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py"
new file mode 100644
index 0000000..39a67b6
--- /dev/null
+++ "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py"
@@ -0,0 +1,34 @@
+# coding:utf-8
+# version:python3.7
+# author:Ivy
+
+from applications.common.tasks.酒店评论标题.xiecheng_hotel_comment_title import Xiecheng_Hotel
+from applications.common.tasks.酒店评论标题.qunaer_hotel_comment_title import Qunaer_Hotel
+from applications.common.tasks.酒店评论标题.tongcheng_hotel_comment_title import Tongcheng_Hotel
+import asyncio
+import time
+
+qunaer = Qunaer_Hotel()
+tongcheng = Tongcheng_Hotel()
+xiecheng = Xiecheng_Hotel()
+
+class Hotel:
+ def run(self):
+ print("开始爬取各个网站的评论标题!")
+ time_start=time.time()
+
+ asyncio.run(xiecheng.getHotel())
+ print("携程爬取结束")
+ asyncio.run(tongcheng.getHotel())
+ print("同程爬取结束")
+ asyncio.run(qunaer.getHotel())
+ print("去哪儿爬取结束")
+
+ time_end=time.time()
+ print(' time cost ',time_end-time_start,'s')
+
+
+
+
+
+
diff --git "a/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_hotel_comment_title.py" "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_hotel_comment_title.py"
new file mode 100644
index 0000000..19f04d7
--- /dev/null
+++ "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/qunaer_hotel_comment_title.py"
@@ -0,0 +1,76 @@
+import requests
+import re
+import aiohttp
+import asyncio
+import os
+import xlwt
+import xlrd
+import time
+import openpyxl
+import json
+from datetime import date, timedelta
+
+today = time.strftime("%Y-%m-%d",time.localtime())
+tomorrow = (date.today() + timedelta(days= 1)).strftime("%Y-%m-%d")
+headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0',
+ # 'Content-Type': 'application/json;charset=utf-8',
+ # 'Content-Length': '1418',
+ # 'Origin': 'https://hotel.qunar.com',
+ # 'Connection': 'keep-alive',
+ # 'Referer': 'https://hotel.qunar.com/cn/guilin/?fromDate=2022-11-16&toDate=2022-11-17&cityName=%E6%A1%82%E6%9E%97',
+ # 'Cookie': 'QN1=00009180306c48f75230434f; QN300=s%3Dbaidu; QN99=7930; QunarGlobal=10.67.197.57_-315863c_1844fb4402c_-4886|1667785799221; QN205=s%3Dbaidu; QN277=s%3Dbaidu; _i=ueHd8Zy9S8X7Cs5y-nPVKDLNsGkX; QN601=fc3340e635beebd8fed01d244dfa103f; QN269=7D87F9C05E3E11ED8278FA163EAD537B; QN48=tc_b56d9b243d79884d_1844fc7bd55_d0d9; fid=ab2cfc1a-4442-4996-8058-95e369865862; csrfToken=vwiqxqLdRWrdSUvmIn84yliNXbhGLphE; QN58=1668514571680%7C1668514571680%7C1; QN57=16678736048620.2656904960064569; ariaDefaultTheme=null; ctt_june=1654604625968##iK3wVRvNVhPwawPwa%3DjnWRa%2BES2Aa2PwW2aOaS0RE2DsEDGDE2DsERfIXSX8iK3siK3saKjOWst%2BWR3sWRX8VuPwaUvt; ctf_june=1654604625968##iK3wWKDOWuPwawPwasiGXKtmaRamVKGDEDkIVKD%3Da%3DjNX2ERaK3sWDfGX2EDiK3siK3saKjOVRPmVK3wVKaAWwPwaUvt; cs_june=9272ae1939d58083d4743676507ffd29fbabe527912f2f1973ed8370a47566c8150e2f38a8a12ca96514b111ebdac1878f7fa30cb8f280132faaa5b783ecd9d7b17c80df7eee7c02a9c1a6a5b97c117928b56a71cbfdbdae6d0a8e0c2d26aa9e5a737ae180251ef5be23400b098dd8ca; QN271AC=register_pc; QN271SL=791c41e753d68b5ac9365b726bb2960d; QN271RC=791c41e753d68b5ac9365b726bb2960d; _q=U.cbkbblv3519; _s=s_3IDSC2V3W3PGZ5F7A2NNNVAAOE; _t=27907349; _v=SsLO8uhOBBxdqVHEaJ4HRRRm-S5OQ4tF_8od6DDnWkVT_ugYFgt4T06vA1JNPsidy87-YU6-Em7O13wYNxUWwYMcqZtXVYqS6D-UDVREDpp4GBSmQBKSBqR41pOUqtVzJOa7ynWOtM4YS0MiDWncGOrqjfjDGrH8PuPitoHSVLH6; QN43=2; QN42=%E5%8E%BB%E5%93%AA%E5%84%BF%E7%94%A8%E6%88%B7; _vi=CGXWRmr0v6gQkJRlZ_6pw-bLocEdRkSo8Xwow4GZOzimn0tLx6x5Le1BMu6f87LYgsSfYHgjOFhvVsDnmRqzU0mo-HkSSge-5UpMggzcw6CbOTTb41NX1K04bOsVxvYErsEQ-dxBNHzmnLsMbpTpDhjmYVi-cwQPLk3yyborrCAc; QN233=FreetripTouchin; HN1=v1ecbd83e6109eb406ad7ee9754047124a; HN2=qunuqnuggzkcg; __qt=v1%7CVTJGc2RHVmtYMS9JYkhPaVB5VnNneGMxSHB0MjgxTW0vQk00NXIrc1JyWUdyaFhEeWtUbFBDVUk4ZEE5SUN3MFJ0emtjeUVRdkxQUStzRDdoV0NQT1VuOFlyRFZhOGhaZk9TaVJZS1hDZWtYSjdzbHhUR3dSRDRzOXhKRFlLT01BcFBnR2NBY2VaTnh3N0R6bDF3WE9tK2FjcDVPNG1nMk1DeGRqdFAwdkNuK2FaaEdubkdLSzVVUXptcnZqUmVHQXgxWFZrd0d3S08zQThySnVSYkQ0UT09%7C1668589224247%7CVTJGc2RHVmtYMTljdWtuS3FaQklDS2RDeVJDNUVIT2dHVEt3WURCWXAzZ3pNVGZSWlR5TVltSzNDWlh1aFhNK1NKamk3UG5vU21oN2ZTWnN6RHdGWkE9PQ%3D%3D%7CVTJGc2RHVmtYMTlMTFFGQnNtQlVzWUhYeUg1Wnl1WVpMalNYYTlTd3pLZHVmZ0Q2eDNXY1N3VWwwbkZTZEpndnlqeENQcDVSZFlvaFpWVHVXYld4UFh2TVo1TDFPaFJCQndSbDVZWEZUZ3U2SVJwam80cUNYZ0s4VFl3WUpXbzczQjg3TDJmL0x6NnFraDlOdTFoWG1YdVExRjh6ZlBHWGU3WWNjUFNacmJBaURKeGNjdzVVZ3plTndOL0JXekt6T0h5TXQzbGxhVGZjbFRnNFlQMUpLd3ppZHMzQlpBVzlpRmo1WkVHWDVrcDJRZHNYQmRYVTk2eFJiMVZ6cmtpMGFOTHBBVzNBbGhBWEEwZFBjOUNyekl1VGxtUlNyZ09Gc2tZa2F5dnZtS1B2emlLS2VKaFdIVXp6V21vVFhoMmRPby9sa2JVUGZ0enFZREl5bFVrME5RMVM2U1U5eFJIQjdFaCtsZ2dXT3NCWnltelpCYVZxeVVScHhWOXVNakwvd0xqVmZqT1dZZzRrUi9XanUrcTZqYlBrQWRkbHppYVUzejE1Vzg4UTBXS1oyL0lpVmtZQmNEVGVHdjBWdXNSZmw2VUtTeWRKS3VnSXBFZ3A5YmRDRWNvSWpBR3NpM2hwN2JITks0c051S2FuVC9xdGhmdjN0V1JUWUZoRWNkL092aXVqdjFuTktlZUNXa3BvSEdUSklFT0RrVVFsRldPSjdJemlDbm1RUkFkVWdEQk5ib2svZmNzQnpDUjJjOTJoMVdUanE2KytZS24vOVU0dlhGOXMvdDc1NnlVYUhub3pDZnhMQ0k1QTRUbStTMjlpNG9lSm9hd3FGTW1aeWdFRUs0OWhKZVE1c2tiZVJRL2IydkgvVmNlbmRRNTNVeVpKdkxyaFZqbjIxQVZ3Q0FKWFdmTU5XYVp4c0QrQUtmM2M%3D; SECKEY_ABVK=n0yGYaC0Uv/VO8QaVoB7LWOTFI1iG4L4ZC9VsTH65IM%3D; BMAP_SECKEY=yzvwj4ltTTURWnc-Y3gwGTG9ua0QoKNAekMiwcrQ6JAAewWk9khA6I8hsY9M6VR656LUBVS30ubB-smmXJ5vg3QAHYFamo-SGOzGHPzX10oqjUcmL7xZKw-IyJc7cEhRaug23EssK3-RhsYVSss7Ui3jjCW6AxSlVUe3Dz4v2hKyrifgZOqQQOOZ_uacKXiG; qunar-assist={%22version%22:%2220211215173359.925%22%2C%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22readscreen%22:false%2C%22theme%22:%22default%22}; QN44=cbkbblv3519; QN267=0531040383a985e0a; QN163=0; QN271=2339fa3a-72a3-4cfd-8f88-55e9645bcb62; tabIndex=0; cityUrl=guilin; cityName=%25E6%25A1%2582%25E6%259E%2597; checkInDate=2022-11-16; checkOutDate=2022-11-17',
+}
+
+from MysqlConnect import *
+mysql = MysqlConnect()
+
+class Qunaer_Hotel:
+ async def getComment(self,item, session):
+ try:
+ async with session.get(item['url']) as res:
+ resp = await res.json()
+ goodcomment = resp['data']['ratingStat']['positiveCount']
+ midcomment = resp['data']['ratingStat']['neutralCount']
+ badcomment = resp['data']['ratingStat']['negativeCount']
+ count = resp['data']['count']
+ othersComment = ""
+ args = (item["id"], item["name"], count, goodcomment, midcomment, badcomment, othersComment, today, "去哪儿")
+ print(args)
+ sql = 'INSERT INTO hotel_comment(hotelId,hotelName,num,good,middle,bad,othersComment,crawlTime,siteFrom) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s);'
+ mysql.insert(sql, args)
+ except Exception as e:
+ print("comment报错",e)
+
+ # 从数据库获取酒店信息
+ async def getHotel(self):
+ async with aiohttp.ClientSession(headers=headers) as session:
+ # 从数据库拿url
+ results = mysql.queryHotel("select id,name,gw_url from hotels where gw_url!='' and 1000 1662 ", None)
+ tasks = []
+ url_list = []
+ for row in results:
+ id = row[0]
+ name = row[1]
+ url = row[2]
+ data = row[3]
+ url_list.append({
+ "id": id,
+ "name": name,
+ "url": url,
+ "data": data
+ })
+ # print(list)
+ print("携程网站的所有酒店长度", len(url_list))
+ index = 0
+ for item in url_list:
+ index = index + 1
+ dic = {"id": item['id'], "name": item['name'], 'flag': 0}
+ item['url'] = "https://m.ctrip.com/restapi/soa2/24626/commentlist?_fxpcqlniredt=09031172114453342165&x-traceID=09031172114453342165-1670568709094-2810570"
+ item['data'] = str(item['data']).replace(f'"pageIndex": 1,', f'"pageIndex": {index},')
+ item['data'] = str(item['data']).replace(f'"pageIndex": {index - 1}', f'"pageIndex": {index}')
+ task = asyncio.create_task(self.getComment(item.copy(), session, dic))
+ if index % 8 == 0:
+ time.sleep(5)
+ tasks.append(task)
+ await asyncio.wait(tasks)
+ print("爬完了!!!!")
+ # 关闭mysql
+ mysql.cur.close()
+ mysql.connection.close()
+
+
+if __name__ == '__main__':
+ # saveHotel()
+ asyncio.run(getHotel())
--
Gitee
From 2c08a369d3f4638c07fb4c9eeb222d9c712cfc2d Mon Sep 17 00:00:00 2001
From: WANY
Date: Wed, 14 Dec 2022 18:26:10 +0800
Subject: [PATCH 3/3] =?UTF-8?q?=E7=99=BE=E5=BA=A6=E5=BE=AE=E5=8D=9A?=
=?UTF-8?q?=E7=AD=BE=E5=88=B0=E7=A5=A8=E6=95=B4=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../common/scrapySpiders/wangModel/scrapy.cfg | 11 +
.../wangModel/wangModel/__init__.py | 0
.../wangModel/common_spiders/__init__.py | 9 +
.../wangModel/common_spiders/baiduacc.py | 100 +
.../wangModel/common_spiders/baidusearch.py | 71 +
.../wangModel/common_spiders/baiduwords.py | 76 +
.../wangModel/common_spiders/tongtencities.py | 33 +
.../wangModel/common_spiders/tuniu_route.py | 108 +
.../wangModel/common_spiders/weather.py | 95 +
.../wangModel/common_spiders/weibosign.py | 238 ++
.../wangModel/wangModel/files/city.txt | 2901 +++++++++++++++++
.../wangModel/wangModel/files/city_cap.txt | 546 ++++
.../wangModel/wangModel/files/scenic | 95 +
.../wangModel/wangModel/items.py | 72 +
.../wangModel/wangModel/middlewares.py | 56 +
.../wangModel/wangModel/pipelines.py | 211 ++
.../wangModel/wangModel/readme.md | 11 +
.../wangModel/wangModel/settings.py | 135 +
.../wangModel/wangModel/spiders/__init__.py | 4 +
.../wangModel/wangModel/spiders/a.html | 163 +
.../wangModel/wangModel/spiders/gw.py | 58 +
.../wangModel/wangModel/spiders/main.py | 52 +
.../wangModel/wangModel/spiders/tongchen.py | 137 +
.../wangModel/spiders/tuniu_hotel.py | 187 ++
.../wangModel/spiders/tuniu_scenic.py | 97 +
.../wangModel/wangModel/spiders/weibo.py | 87 +
.../wangModel/wangModel/spiders/weixin.py | 46 +
.../wangModel/wangModel/test.csv | 271 ++
.../wangModel/wangModel/tuniu.csv | 0
.../wangModel/wangModel/utils/HbaseConn.py | 54 +
.../wangModel/wangModel/utils/citydeal.py | 27 +
.../wangModel/wangModel/utils/createTables.py | 42 +
.../wangModel/utils/hostory_weather.py | 135 +
.../wangModel/wangModel/utils/mysqlConn.py | 76 +
.../wangModel/wangModel/utils/proxys.py | 83 +
.../wangModel/wangModel/utils/weather_deal.py | 12 +
applications/common/tasks/tasks.py | 29 +-
.../weibosign.py" | 25 +
.../scenic_start.py" | 19 +-
.../\347\231\276\345\272\246/baidu_start.py" | 34 +
.../hotel_title_start.py" | 8 +
applications/view/__init__.py | 4 +-
42 files changed, 6406 insertions(+), 12 deletions(-)
create mode 100644 applications/common/scrapySpiders/wangModel/scrapy.cfg
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/__init__.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/__init__.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduacc.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baidusearch.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduwords.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tongtencities.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tuniu_route.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weather.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weibosign.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/files/city.txt
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/files/city_cap.txt
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/files/scenic
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/items.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/middlewares.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/pipelines.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/readme.md
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/settings.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/__init__.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/a.html
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/gw.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/main.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/tongchen.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_hotel.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_scenic.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/weibo.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/spiders/weixin.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/test.csv
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/tuniu.csv
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/HbaseConn.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/citydeal.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/createTables.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/hostory_weather.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/mysqlConn.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/proxys.py
create mode 100644 applications/common/scrapySpiders/wangModel/wangModel/utils/weather_deal.py
create mode 100644 "applications/common/tasks/\345\276\256\345\215\232\347\255\276\345\210\260/weibosign.py"
create mode 100644 "applications/common/tasks/\347\231\276\345\272\246/baidu_start.py"
diff --git a/applications/common/scrapySpiders/wangModel/scrapy.cfg b/applications/common/scrapySpiders/wangModel/scrapy.cfg
new file mode 100644
index 0000000..7ff7b66
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = wangModel.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = wangModel
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/__init__.py b/applications/common/scrapySpiders/wangModel/wangModel/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/__init__.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/__init__.py
new file mode 100644
index 0000000..24bbbb6
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/__init__.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> __init__.py
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-11-22 15:51
+@Desc
+=================================================='''
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduacc.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduacc.py
new file mode 100644
index 0000000..ca7b9c5
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduacc.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> baiduacc
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-11-15 14:25
+@Desc
+=================================================='''
+import json
+
+import requests
+from urllib import parse
+import time
+import random
+from wangModel.utils.proxys import PROXY
+from wangModel.utils.HbaseConn import HbaseUtil
+import asyncio
+import datetime
+from wangModel.utils.mysqlConn import insert,query
+import aiohttp
+import time
+class baiduacc():
+ header = {
+ "Cipher-Text":"1669014148691_1669025682395_gmyeUYFkqtWGz/Aodu6MCBx/jA/TcFYa3elCcC4PVE1i1F2XCekER0aqy9Mx1dO6Qu0Y3W2+6/ojulveu+uCC/Q1oRpRM2Iy/3YW0Dt7KogYgCtBAZulpY0RDu+dn5RiBs75lW9Ot/YIIeM4Pw5Bvtj6gwMLHLTS60hqu+o9xQdbJOQa8Dj3F2+Zyz+MXvMx1o4wulS5d/W8pIdT9n+Ud1J8ULkr3zIW2/dNMcX/53VET1S9IiG2uaG+3XDvf8rQLT8wIXKI9LwrwFI4+gZZhd/YnOMSb7reDLOo5bcfNyYRGzqpNb2Dozufe4HjuPzbvccAPU9XNigUDNyR/y5aqVUILehLWBs/bNg9OpuhvCsVumPQl/dIIDa57SKBBOHqSAx31TxH1po65FrdwblPhZF4qB9jXX/IzU1inyHNeKI=",
+ "Accept":"application/json,text/plain,*/*",
+ "Accept-Encoding":"gzip,deflate,br",
+ "Accept-Language":"keep-alive",
+ "Content-Length":"0",
+ "Host":"index.baidu.com",
+ "Origin":"https://index.baidu.com",
+ "sec-ch-ua":"'Microsoft Edge';v='107', 'Chromium';v='107', 'Not=A?Brand';v='24'",
+ "Referer":"https://index.baidu.com/v2/main/index.html",
+ "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.52",
+ "Cookie":"BIDUPSID=B772A7AE03D22C237EA5162D657EFEA8; PSTM=1646828464; ab_jid=1b5f1f7bd0d2ad6c8322197428813831b876; ab_jid_BFESS=1b5f1f7bd0d2ad6c8322197428813831b876; BAIDUID=EF08EE41A5B9911A97D741FCA1E975AB:FG=1; H_WISE_SIDS=110085_131862_188746_194529_204904_211986_212295_213039_214795_215730_216853_216941_219623_219943_219946_222624_223064_223337_224045_224047_224436_226628_226815_227932_228650_228870_229154_229907_229967_230077_230241_230244_230248_230287_230930_231433_231628_231761_231904_231979_232055_232244_232357_232616_232755_232834_232908_233041_233368_233401_233464_233465_233518_233598_233604_233719_233924_234044_234085_234208_234225_234296_234317_234349_234382_234515_234521_234559_234670_234690_234722_234799_234924_234980_235091_235131_235174_235201_235228_235258_235398_235421_235453_235461_235511_235534_235581_235634_235770_235808_235829_235870_235969_235980_236022_236050_236052_236084_236101_236129_236239_236243_236341_236512_236515_236524_236527_236538_236611_236811_236838; MCITY=-142%3A; delPer=0; PSINO=6; BAIDUID_BFESS=EF08EE41A5B9911A97D741FCA1E975AB:FG=1; BA_HECTOR=aha5208la10kagah8lak0qlp1ho93q81f; ZFY=xtXF:ABfiWEAgoaeInpi6iku9vkiVh7JUT1fVvaM9stc:C; bdindexid=p0k1jlaqpura3afsp3oajf1j73; BCLID=6870042325976552650; BCLID_BFESS=6870042325976552650; BDSFRCVID=J70OJexroG0leprj73-kMHDF9QpWxY5TDYrELPfiaimDVu-VJeC6EG0Pts1-dEu-EHtdogKKBgOTH4FF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; BDSFRCVID_BFESS=J70OJexroG0leprj73-kMHDF9QpWxY5TDYrELPfiaimDVu-VJeC6EG0Pts1-dEu-EHtdogKKBgOTH4FF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF=tbP8oK-MJKD3fP36q47HMtu_hlrt2D62aKDs3qTYBhcqEIL4Mj5E-P_wMGJ3Jp5uWgnlVlvVfx_bMUbSjln_0J_JhHon2nQwanrU_DD5yq5nhMJpXj7JDMP0XJbK35Oy523i5J3vQpPMslQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xb6_0DjPthxO-hI6aKC5bL6rJabC3OC0xXU6q2bDeQN3QKROH2JkesRvzWpbPbfjx3n7Zjq0vWq54WpOh2C60WlbCb664OR5JjxonDh83KNLLKUQtHmT7LnbO5hvvER3O3MAMQxKmDloOW-TB5bbPLUQF5l8-sq0x0bOte-bQXH_E5bj2qRCf_IKM3e; H_BDCLCKID_SF_BFESS=tbP8oK-MJKD3fP36q47HMtu_hlrt2D62aKDs3qTYBhcqEIL4Mj5E-P_wMGJ3Jp5uWgnlVlvVfx_bMUbSjln_0J_JhHon2nQwanrU_DD5yq5nhMJpXj7JDMP0XJbK35Oy523i5J3vQpPMslQ3DRoWXPIqbN7P-p5Z5mAqKl0MLPbtbb0xb6_0DjPthxO-hI6aKC5bL6rJabC3OC0xXU6q2bDeQN3QKROH2JkesRvzWpbPbfjx3n7Zjq0vWq54WpOh2C60WlbCb664OR5JjxonDh83KNLLKUQtHmT7LnbO5hvvER3O3MAMQxKmDloOW-TB5bbPLUQF5l8-sq0x0bOte-bQXH_E5bj2qRCf_IKM3e; BDRCVFR[SquYicL8Vkb]=I67x6TjHwwYf0; H_PS_PSSID=26350; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; ZD_ENTRY=baidu; H_WISE_SIDS_BFESS=110085_131862_188746_194529_204904_211986_212295_213039_214795_215730_216853_216941_219623_219943_219946_222624_223064_223337_224045_224047_224436_226628_226815_227932_228650_228870_229154_229907_229967_230077_230241_230244_230248_230287_230930_231433_231628_231761_231904_231979_232055_232244_232357_232616_232755_232834_232908_233041_233368_233401_233464_233465_233518_233598_233604_233719_233924_234044_234085_234208_234225_234296_234317_234349_234382_234515_234521_234559_234670_234690_234722_234799_234924_234980_235091_235131_235174_235201_235228_235258_235398_235421_235453_235461_235511_235534_235581_235634_235770_235808_235829_235870_235969_235980_236022_236050_236052_236084_236101_236129_236239_236243_236341_236512_236515_236524_236527_236538_236611_236811_236838; __bid_n=1841d0d39462b7eb984207; BDUSS=VRCZDNtbXNuNW41UGlFbjZKUmI3WEc2aUxYYVFsejg0SVVROEVJNmtxd35SNnhqSUFBQUFBJCQAAAAAAAAAAAEAAAC67J41AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD-6hGM~uoRjY; SIGNIN_UC=70a2711cf1d3d9b1a82d2f87d633bd8a04196552899imlBp2YPGD4aewP8FEYzgkEScKOqh7wb51tJTuP1B6MtmB3vSB6esoqA1w0BCKHaSr3H%2BaAs%2B95UgUw5JUbwAjHo4ooV%2BHBPtHJbiiJxU3CbQrkyxr2V65CIOTGVbPwt0Kij485ztLZlqLDr%2FeP4j8mK%2F1BnLMUjD0IZINAdz9OcGDB5KlDUGhEMUmHW5FkAhu26dh63%2FP000Cmpeyz06Ww6TciYJ3j7g9b1pdBcgCvDfrwAp4NUZ7z4PY8wFikBxuF2%2B0HT3niFFIDJz6HNM1GEJXoPVe0hWKEwKigxDYQ%3D80937910607250753648874982969453; ab_bid=4039c07e99dd9dfa4f512beb01a097f2a6ff; ab_sr=1.0.1_ZGMwYzg4NTQxY2U2MWM0OGY2ZmMzMzc3YmI2NzJlZDQ5NGQyNDc5NGI5NWJmYjMwZWUyMjBiYmU0MGFlOTc5YmM0MzEyNGI0ZDQxNjQ1YjNmY2M2YTEyYTliMWVjZGFjMjZhZDdkMmQ0YWM2NTM4Zjc4YTIwODNkNjY5YmQ4MzMwNjI5MDI1Yzc1OTZmMzUyYzFkNzEwYTcxYzQzZDAyYw==; RT='z=1&dm=baidu.com&si=630f2482-3780-4da8-9234-e06ac91171fd&ss=lb0u05kx&sl=m&tt=kt4&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf'; BDUSS_BFESS=VRCZDNtbXNuNW41UGlFbjZKUmI3WEc2aUxYYVFsejg0SVVROEVJNmtxd35SNnhqSUFBQUFBJCQAAAAAAAAAAAEAAAC67J41AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD-6hGM~uoRjY"
+ }
+
+ item_list=[]
+
+ def __init__(self):
+ host = '202.193.53.106'
+ table_name = 'bauduacc'
+ hbase = HbaseUtil(host)
+ self.hbase = hbase
+ self.tablename= table_name
+
+
+
+ def parse1(self):
+ time.sleep(3)
+ try:
+ url_list = query("select id,name,bdacc_url from scenics where bdacc_url !='' ", None)
+ for redatas in url_list:
+ id=redatas['id']
+ scenicName=redatas['name']
+ url=redatas['url']
+ # url = f"https://index.baidu.com/api/SearchApi/index?area=0&word=[[%7B%22name%22:%22{parse.quote(keyword)}%22,%22wordType%22:1%7D]]&days=30"
+
+ response = requests.get(url, headers=self.header, proxies=PROXY, timeout=5)
+ time.sleep(3)
+ data= response.json()
+ if data['data'] =='':
+ # print(data)
+ print("被检测了,请更新验证数据")
+ else:
+ print(data['data'])
+ start_time=str(data['data']['userIndexes'][0]['all']['startDate'])
+ end_time=str(data['data']['userIndexes'][0]['all']['endDate'])
+ all_avg=str(data['data']['generalRatio'][0]['all']['avg']) #整体日均值
+ all_yoy=str(data['data']['generalRatio'][0]['all']['yoy'])+"%" #整体同比%
+ all_qoq=str(data['data']['generalRatio'][0]['all']['qoq'] )+"%"#整体环比%
+ wise_avg=str(data['data']['generalRatio'][0]['wise']['avg'] )+"%"#移动日均值
+ wise_yoy=str(data['data']['generalRatio'][0]['wise']['yoy'] )+"%"#移动同比%
+ wise_qoq=str(data['data']['generalRatio'][0]['wise']['qoq'] )+"%"#移动环比%
+
+ sql="INSERT INTO baiduacc(scenicId,all_avg,all_yoy,all_qoq,wise_avg,wise_yoy,wise_qoq,crawlTime) VALUES (%s,%s,%s,%s,%s,%s,%s,%s);"
+ insert(sql,(id,all_avg,all_yoy,all_qoq,wise_avg,wise_yoy,wise_qoq,datetime.date.today()))
+ except:
+ print("url无效")
+ #
+ # def inputHbase(self,list):
+ # for i in range(len(list)):
+ # # 插入数据库
+ # print(list[i])
+ # self.hbase.putTable(self.tablename, str(datetime.date.today())+"_"+str(i), {
+ # 'info:name': list[i]['name'],
+ # 'all:all_avg': list[i]['all_avg'],
+ # 'all:all_yoy': list[i]['all_yoy'],
+ # 'all:all_qoq': list[i]['all_qoq'],
+ # 'wise:wise_avg': list[i]['wise_avg'],
+ # 'wise:wise_yoy': list[i]['wise_yoy'],
+ # 'wise:wise_qoq': list[i]['wise_qoq'],
+ # })
+
+# if __name__=="__main__":
+#
+# object=baiduacc()
+# object.parse1()
+
+
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baidusearch.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baidusearch.py
new file mode 100644
index 0000000..40ab617
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baidusearch.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> baidusearch
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-11-15 18:11
+@Desc
+=================================================='''
+from urllib import parse
+import time
+import random
+import datetime
+import re
+import requests
+from wangModel.utils.proxys import PROXY,ips
+from lxml import etree
+from wangModel.utils.HbaseConn import HbaseUtil
+from wangModel.utils.mysqlConn import query,insert,getRows,update
+"""
+爬取百度搜索各个景点的搜索结果数量
+"""
+class BaiduSpider():
+
+ def parse(self):
+ kw=""
+ url_list = getRows("select id,name from scenics ", None)
+ for content in url_list:
+ item={}
+ print(content)
+ time.sleep(random.randint(1,5))
+ id=content[0]
+ item['id']=id
+ name=content[1]
+ item['name']=name.strip()
+ kw=parse.quote(name)
+ url= f"https://www.baidu.com/s?wd={kw}&rsv_spt=1&rsv_iqid=0xd0a36e920005e207&issp=1&f=8&rsv_bp=0&rsv_idx=2&ie=utf-8&rqlang=&tn=baiduhome_pg&ch="
+ item['url']=url
+ self.parse_item(url,item)
+
+ def parse_item(self, url,item):
+ print("--------------发起请求--------------------")
+ header = {
+ "Cipher-Text": "1668409355251_1668493173103_aLgQH4YFqAwPcYSE7v52xdJaHSAeId9tI+WY1JMiHu8HwngWY2DifDL8GwYz2O+DvIVgj+9ldrUsKJ3ADGdnEUHL1GARwcCChi73BbkUFeNFtACrNrwhmPStsz0iWKZEK1aqGImhb+zMQg9/qJkxFRR+4AuJz5zbU+IkH793cccuV18DONXlam0zLfF07BZFrBRtTFCC7P7YOpfz9du1sz0OHMxRr7Iwdq1hrNzZ0yW4pzm8Hw2C7gvEfXs81XQSHDeGOtaoZ/IQyn5QqCYSsGC47kiKIeEy2hOaGITVWj4wBHvNe//u+dxPX/cDPjIM7QWoQnmSAg2qOAUtzTMnBE0Eal21o3C03eGBGNJHXYM9xVQz2OEs+NeMG2HXjKi5boG4R8ypMvU8D5JsL9lU7G2WStNDiX7sEjaskomtx2g=",
+ "Referer": "https://index.baidu.com/v2/main/index.html",
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.42",
+ "Cookie": "BIDUPSID=B772A7AE03D22C237EA5162D657EFEA8; PSTM=1646828464; BAIDUID=EF08EE41A5B9911A97D741FCA1E975AB:FG=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; BCLID_BFESS=8610272839171174388; BDSFRCVID_BFESS=LqkOJeCmHx1yiQOjtw6fuaBrwgKK0gOTHbucfJLsovUqE2IVJeC6EG0Ptf8g0Kubdu1yogKK0gOTH6KF_2uxOjjg8UtVJeC6EG0Ptf8g0M5; H_BDCLCKID_SF_BFESS=tJFfoK0afIK3f-opMtT5q4t3KxrjetJyaR3rQRvvWJ5TDqn9DJrE0f4q5htqKJQK0jrf0hvctn3cShnP5tbtyxAyhpjPbxLHWGcZ0l8K3l02V-bIe-t2ynLVbh_Dh4RMW20j0h7mWIQvsxA45J7cM4IseboJLfT-0bc4KKJxbnLWeIJEjj6jK4JKjaK8t65P; delPer=0; PSINO=6; BA_HECTOR=200h85al2l2001agah0gat3a1hn62841f; BAIDUID_BFESS=EF08EE41A5B9911A97D741FCA1E975AB:FG=1; ZFY=PEntF3sipSTjFmSpBgjsg2if1PiObhH0XFP3GeQX4wg:C; H_PS_PSSID=37784_36554_37552_37519_37689_37772_37628_34813_37778_37727_37538_37712_37742_26350_37789; Hm_lvt_d101ea4d2a5c67dab98251f0b5de24dc=1668491560; BDUSS=FZTkFXWlFOLVVLdjExR293ZXZQWXkyeXFzRzVUZll1OXo5azR0SjlueGd2SnBqSVFBQUFBJCQAAAAAAAAAAAEAAABbpYyTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAvc2NgL3NjOG; SIGNIN_UC=70a2711cf1d3d9b1a82d2f87d633bd8a04185056022rfZ4Fr8b%2FZjzCjWhkEXzVhsU%2BRJmUfM92Rxqhej6x6RSy2D67EegK8bVq7Xw8G45FA9lGDxsArHhqi6FUXK4q4RUQKI%2FOaidfKoId7N9w5%2BvNtec2wUhywSQZq0jcgF6x9ekV4CZhLqqdSZJW8MmPYtfaFxQO1F04SU%2Bg1VM6k80VfstLewTJ%2FyvBssATejPpii0mplIhwrdv4izW0XcCSgczOv1KoEYf3DDBB%2BAkLlXIVuMXT08UND685c51gs1LPln6JVHlEmqjH2syDrFSw%3D%3D93823493866336522493836350719633; __cas__rn__=418505602; __cas__st__212=c3dabde61364b016ccd784a68f72be264e46b9a5983c1a499a1f2f58aed9b391b09413376c3a0f261c176a62; __cas__id__212=41971051; CPID_212=41971051; CPTK_212=693995330; Hm_up_d101ea4d2a5c67dab98251f0b5de24dc=%7B%22uid_%22%3A%7B%22value%22%3A%222475468123%22%2C%22scope%22%3A1%7D%7D; bdindexid=t2n1cvnhg9lue59q0t78cq8jb2; RT='z=1&dm=baidu.com&si=524e125a-f181-40a3-b23b-0f6278b2185e&ss=lahssm1u&sl=f&tt=kfp&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf'; Hm_lpvt_d101ea4d2a5c67dab98251f0b5de24dc=1668493173; BDUSS_BFESS=FZTkFXWlFOLVVLdjExR293ZXZQWXkyeXFzRzVUZll1OXo5azR0SjlueGd2SnBqSVFBQUFBJCQAAAAAAAAAAAEAAABbpYyTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAvc2NgL3NjOG; ab_sr=1.0.1_ZTJlZDhiZWMyN2IxYWY2MjQ5NzZiNDVkODUyMTIwNzJlNWRmMjAyZWY2ZDFiYWVmNDE2ODE4ZmIyOTQ5MGZmNDdiNTQ5ODJjNWY1MGViN2MwOWI0YzEyYzBlZWY5NzU4MjM0ODk0NzFlMzUxNDJiZjI2ZDZiYWZlYzljMDAyMmZlNTM2MWUzMjdmYjY4MzA1YTAzMWE5MTdhODY1ZGZlYg=="
+ }
+ ip="http://"+random.choice(ips)
+ response=requests.get(url,headers=header,proxies={"http":ip},timeout=5)
+ selector = etree.HTML(response.text)
+ try:
+ data=selector.xpath("//*[@id='tsn_inner']/div[2]/span/text()")[0]
+ num=re.findall(r"\d+",data)
+ result=""
+ if num is not None:
+ for content in num:
+ result=result+content
+ item['num']=result
+ print(item)
+
+ #插入数据库
+ update_sql = "UPDATE scenics SET bdsearch_url = %s where id = %s "
+ insert_sql = "insert into bd_search(scenicId,scenicName,num,crawlTime) values (%s,%s,%s,%s)"
+ update(update_sql, (item['url'], item['id']))
+ insert(insert_sql, (item['id'], item['name'], item['num'], datetime.date.today()))
+ except:
+ print("定位失败,检查Cokie是否失效或网页结构是否更改")
+# if __name__ =="__main__":
+# run=BaiduSpider()
+# run.parse()
\ No newline at end of file
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduwords.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduwords.py
new file mode 100644
index 0000000..725bb3d
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/baiduwords.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> baiduwords
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-11-16 10:21
+@Desc:百度百科搜索词条
+=================================================='''
+from urllib import parse
+import time
+import random
+import re
+import requests
+from wangModel.utils.proxys import PROXY,ips
+from lxml import etree
+import datetime
+from wangModel.utils.mysqlConn import query,insert,getRows,update
+from selenium import webdriver
+class BaiDuWords():
+
+ def parse(self,id,kw):
+ url=f"https://baike.baidu.com/item/{parse.quote(kw)}?fromtitle={parse.quote(kw)}"
+ print(url)
+ header = {
+ "X-Requested-With":"XMLHttpRequest",
+ "Cipher-Text": "1668409355251_1668493173103_aLgQH4YFqAwPcYSE7v52xdJaHSAeId9tI+WY1JMiHu8HwngWY2DifDL8GwYz2O+DvIVgj+9ldrUsKJ3ADGdnEUHL1GARwcCChi73BbkUFeNFtACrNrwhmPStsz0iWKZEK1aqGImhb+zMQg9/qJkxFRR+4AuJz5zbU+IkH793cccuV18DONXlam0zLfF07BZFrBRtTFCC7P7YOpfz9du1sz0OHMxRr7Iwdq1hrNzZ0yW4pzm8Hw2C7gvEfXs81XQSHDeGOtaoZ/IQyn5QqCYSsGC47kiKIeEy2hOaGITVWj4wBHvNe//u+dxPX/cDPjIM7QWoQnmSAg2qOAUtzTMnBE0Eal21o3C03eGBGNJHXYM9xVQz2OEs+NeMG2HXjKi5boG4R8ypMvU8D5JsL9lU7G2WStNDiX7sEjaskomtx2g=",
+ "Referer": "https://baike.baidu.com/item/%E7%99%BE%E5%BA%A6%E7%99%BE%E7%A7%91?fromModule=lemma_search-box",
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.52",
+ "Cookie": "BIDUPSID=B772A7AE03D22C237EA5162D657EFEA8; PSTM=1646828464; BAIDUID=EF08EE41A5B9911A97D741FCA1E975AB:FG=1; FPTOKEN=30$MrCecZxUi8VS8olQk6GALxBMkPjNDvPqJaUEnW0U/il23iuvfUkXY4mgGNIFtYpKGevoBroMxF6rVAASyZuGaOxurO6Vofyd98uaWKxm9i3oqBQmI361ZlV81CXwf/HgVmK8C/nBkRrvPbXoNG88dFO6bXZHRhqqmusaAiWRqo/INvI0Ykfrx9zGtWoWDmG8LmigrS9r31q9r1YENQshlw1vLnBlsRHoK4S3fj+AnIqz5W/H4RBf92ik6VgmTwmERIDXUryJO6uZKLaMnXm9yYYgkSE3CJd91tmiIeR92jBb3b8hF5Pm1kyTK6qW7GsdA2ybnC4ueez9qmxosW5kRh6I+PEw8HCxiBno6qeXb4e6p0pgYL38oz+yhfmoRWlW|sqY0adRCWGB/CE2viadPBGBBbaHCAe/V4KFwr8eW/08=|10|51987c75976c595af8f6e5b793a7c623; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; ZFY=C:BtWlDYn2vvbhq54rjO:BevzXkLZNv:AbRA3Fah5VvMrA:C; BAIDUID_BFESS=EF08EE41A5B9911A97D741FCA1E975AB:FG=1; BAIDU_WISE_UID=wapp_1668931780429_949; MCITY=-%3A; __bid_n=18495ad43f144d83c64207; BK_SEARCHLOG=%7B%22key%22%3A%5B%22%E4%BC%9A%E4%BB%99%E5%96%80%E6%96%AF%E7%89%B9%E5%9B%BD%E5%AE%B6%E6%B9%BF%E5%9C%B0%E5%85%AC%E5%9B%AD%E6%99%AF%E5%8C%BA%22%2C%22%E6%A1%82%E6%9E%97%E6%BC%93%E6%B1%9F%E6%99%AF%E5%8C%BA%22%2C%22%E6%A1%82%E6%9E%97%22%2C%22%E6%A1%82%E6%9E%97%E4%B8%83%E6%98%9F%E5%85%AC%E5%9B%AD%22%2C%22%E8%B1%A1%E9%BC%BB%E5%B1%B1%22%2C%22%E7%94%A8%E6%B0%9F%E5%88%B7%E7%89%99%22%2C%22%E7%94%A8%E7%9A%84%E6%95%B0%E5%AD%97%E5%9B%BE%E5%83%8F%E7%9A%84%E8%B7%9D%E7%A6%BB%E5%BA%A6%E9%87%8F%E6%9C%89%E5%87%A0%E7%A7%8D%3F%22%5D%7D; BA_HECTOR=a42k85010084202104802vdd1hnpagk1e; delPer=0; PSINO=6; H_PS_PSSID=37784_36554_37552_37519_37772_37628_34813_37778_37819_37727_37793_37712_37742_26350_37789; zhishiTopicRequestTime=1669174887100; BDUSS=mt5NmljaFBnVUE5RGdxQVRvMEZCfklSS09NTkFVZ0FqTnNEa1hSflF3Si1JNlZqSUFBQUFBJCQAAAAAAAAAAAEAAABbpYyTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAH6WfWN-ln1ja; BDUSS_BFESS=mt5NmljaFBnVUE5RGdxQVRvMEZCfklSS09NTkFVZ0FqTnNEa1hSflF3Si1JNlZqSUFBQUFBJCQAAAAAAAAAAAEAAABbpYyTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAH6WfWN-ln1ja; channel=passport.baidu.com; baikeVisitId=5f33655f-41f8-474d-8e54-9589a8fa8510; RT='sl=8&ss=lat3mooz&tt=b4g&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&z=1&dm=baidu.com&si=1e981726-04a9-4146-be75-d83748eee7ca'; Hm_lvt_55b574651fcae74b0a9f1cf9c8d7c93a=1668564739,1668958469,1669132483,1669174993; Hm_lpvt_55b574651fcae74b0a9f1cf9c8d7c93a=1669174993; ab_sr=1.0.1_ZDE3M2EwZTU0MjIyMmU5ZjFlYjM0ODBjYTY4NzZkOGNmZTU5MDc0YWU4N2Y2ODhkNWNiMDRhNzE2YjQ5Mjg2ZmMyMGVjYTc4OTJmZjFjNWM3NzNiZTJlZTkyYjk3OTQ4NGE0YTNkZGExNzdkMjYzODAyMzMwNzBkYTgwYjJlYjE1NDdkOTM1OWYxYzg2ZWUzNDU0ZDVkNDA3MWJjYjI2ZmJmMWU2MWEwNjMzYTk5YTg2MDhmYTYxMzkwNjAwMDQx"
+ }
+ ip = "http://" + random.choice(ips)
+ # response = requests.get(url, headers=header,proxies={"http":ip}, timeout=5)
+ driver = webdriver.Chrome()
+ driver.set_window_size(1280, 720) # 自定义窗口大小:
+ driver.implicitly_wait(3) # 设置隐式时间等待
+ driver.get(url)
+ driver.implicitly_wait(3) # 设置隐式时间等待
+ data = driver.page_source
+ try:
+ like_count=driver.find_element(by='xpath',value="//*[@id='j-top-vote']/span[1]").text #点赞数
+ print(like_count)
+ share_count=driver.find_element(by='xpath',value="//*[@id='j-topShareCount']").text #转发量
+ print(share_count)
+ see_count=driver.find_element(by='xpath',value="//*[@id='j-lemmaStatistics-pv']").text #浏览量
+ print(see_count)
+ edit_count_text=driver.find_element(by='xpath',value="/html/body/div[3]/div[2]/div/div[2]/dl/dd[1]/ul/li[2]").text #编辑量
+ print(edit_count_text)
+ num = re.findall(r"\d+", edit_count_text)
+ print(num)
+ edit_count = ""
+ if num is not None:
+ for content in num:
+ edit_count = edit_count + content
+ print("搜索结果数",num)
+ # 插入数据库
+ update_sql = "UPDATE scenics SET bdword_url = %s where id = %s "
+ insert_sql = "insert into bd_words(scenicId,scenicName,like_count,share_count,see_count,edit_count,crawlTime) values (%s,%s,%s,%s,%s,%s,%s)"
+ update(update_sql, (url, id))
+ insert(insert_sql, (id, kw, like_count,share_count,see_count,edit_count, datetime.date.today()))
+ driver.close()
+ except:
+ print("暂未收录该词条")
+ driver.close()
+
+
+ def run(self):
+ url_list = getRows("select id,name from scenics ", None)
+ for content in url_list:
+ print("爬取景点", content)
+ id = content[0]
+ kw = content[1]
+ self.parse(id,kw)
+# if __name__ =="__main__":
+# baiduWord=BaiDuWords()
+# baiduWord.run()
\ No newline at end of file
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tongtencities.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tongtencities.py
new file mode 100644
index 0000000..429da00
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tongtencities.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> tongtencities
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-11-17 11:25
+@Desc 此爬虫用于爬取同城旅行的城市id信息
+=================================================='''
+import requests
+import json
+url='https://bus.ly.com/busresapi/destination/getDesByLetter'
+
+data={
+ 'letter': 'ALL',
+ 'depCName': '深圳',
+ 'depCId': 1090
+}
+params={
+ "plateId":3
+}
+response=requests.post(url,json=data,params=params)
+result=response.json()
+city_list=result['body']
+with open("../files/city.txt", "a+", encoding='utf-8') as f:
+ for item in city_list:
+ id=item['id']
+ cityName=item['name']
+ f.write(str(id))
+ f.write(",")
+ f.write(cityName)
+ f.write("\n")
+f.close()
\ No newline at end of file
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tuniu_route.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tuniu_route.py
new file mode 100644
index 0000000..c4bfab1
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/tuniu_route.py
@@ -0,0 +1,108 @@
+import requests
+import re
+import aiohttp
+import asyncio
+import csv
+import json
+import os
+import time
+import datetime
+import pytz
+from lxml import etree
+import time
+from wangModel.utils.proxys import PROXY
+from parsel import Selector
+from wangModel.utils.mysqlConn import insert
+
+
+from datetime import date, timedelta
+
+today = time.strftime("%Y-%m-%d", time.localtime())
+tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
+headers = {
+ 'Referer':"https://www.tuniu.com/",
+ 'User-Agent':'Mozilla/5.0(Windows NT 10.0; Win64; x64)AppleWebKit/537.36(KHTML, like Gecko)Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.46',
+ 'Cookie': "_uab_collina=166113651603660325791145; udid=tn-100-1661136526886-f196ed1d-21c4-11ed-921a-0ba79eabb6b9; _tact=ODE2YWU2MjgtMDU4MC1hYzJjLTQwOWMtMDMyZGEyNzIzMTMz; _ga=GA1.2.2105264209.1661136529; p_phone_400=4007-999-999; p_phone_level=0; p_global_phone=%2B0086-25-8685-9999; fp_ver=4.7.3; BSFIT_OkLJUJ=FHMgfFXHnQXEiVkz8qQ3cNf9ukZQWypQ; cto_bundle=DDPyZ19ENHlVa1poTVJiZ0twWTExWTB1WXF3RUZzQm5wQjB4c1d0cUsycCUyQkpQMkdFdkVnNnhtcXEzbkZmTW1zYnJCcFBHa3FSWlNKOFVyJTJCN2NJSVkxdWxLTDU0MENscU5QRnhNZHVPZFZiU0h1dHpVcXdlWkJNaE9mcGhOUnQ1STBNTlBnQ1FLREtZN09OMXV1YmJQanZUeHF3JTNEJTNE; __utmz=1.1668932705.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); Hm_lvt_44f54d76a67ba9230a7bb92d5ed5e4ba=1667808931,1668142577,1668940349; __utma=1.2105264209.1661136529.1668958304.1668962727.6; tuniuuser_force_logout=1669109601000; tuniuuser_vip=MA%3D%3D; tuniuuser_level=MA%3D%3D; tuniuuser_id=97164384; tuniuuser_name=MTUyMjkyMzI1Mw%3D%3D; tuniuuser_image=Ly9pbWczLnR1bml1Y2RuLmNvbS9pbWcvMjAxNDA0MDkwMS91c2VyX2NlbnRlci9nX3RvdXhpYW5nLnBuZw%3D%3D; _tacz2=taccsr%3Dcn.bing.com%7Ctacccn%3D%28referral%29%7Ctaccmd%3D%28none%29%7Ctaccct%3D%28none%29%7Ctaccrt%3D%28none%29; tuniu_partner=MTAxLDAsLDlmZDgyZThjYTZkNGMwMTlmZTUyNzdlYjJmNTcxYzQ1; isHaveShowPriceTips=1; _tacau=MCw4NjMxMDNiZi1kOGIwLTViMmYtMWZlMS1mNTFjYjYzYjgyNDIs; PageSwitch=1%2C213612736; _gid=GA1.2.387299839.1670741383; clickCache=%5B%7B%22key%22%3A1670741382627%2C%22url%22%3A%22https%3A%2F%2Fwww.tuniu.com%2F%22%2C%22pageName%22%3A%22%E5%BA%A6%E5%81%87%3A%E5%8D%97%E5%AE%81%3A%E9%A6%96%E9%A1%B5%3Ann%22%2C%22referer%22%3A%22%22%2C%22events%22%3A%5B%7B%22text%22%3A%22%E7%82%B9%E5%87%BB_%E9%A1%B6%E9%83%A8%E5%AF%BC%E8%88%AA_%E4%B8%80%E7%BA%A7%E5%AF%BC%E8%88%AA_6_%E9%85%92%E5%BA%97%22%2C%22x%22%3A394%2C%22y%22%3A149%2C%22lg%22%3A1670741384382%7D%5D%7D%5D; rg_entrance=010000%2F003001%2F000013%2F000000; tuniu-assist={%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22bgcolor%22:false}; Hm_lvt_fe3fbe4228e14b1544525f058df92f91=1669634420,1670140045,1670741377,1670757948; _pzfxuvpc=1661136528944%7C9947719618884716956%7C135%7C1670757948543%7C48%7C1145071184993068063%7C2602933339806650760; OLBSESSID=m10d18uckfquipe2llgnec7ds4; acw_sc__v2=6395be40a848e7c226787a31326638850a340bc0; tuniu_zeus=M18zXzFfMV8xXzI6Omh0dHBzOi8vbWVucGlhby50dW5pdS5jb20vOjoyMDIyLTExLTIwIDIyOjIzOjQy%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vdHJpcHMudHVuaXUuY29tLzo6MjAyMi0xMS0yMCAyMjozMDo0Mw%3D%3D%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vbWVucGlhby50dW5pdS5jb20vOjoyMDIyLTExLTIwIDIyOjMwOjQ5%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vd3d3LnR1bml1LmNvbS86OjIwMjItMTEtMjAgMjM6MTA6MDQ%3D%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vbWVucGlhby50dW5pdS5jb20vOjoyMDIyLTExLTIwIDIzOjI3OjM2%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vbWVucGlhby50dW5pdS5jb20vOjoyMDIyLTExLTIxIDEyOjQ2OjUz%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vbWVucGlhby50dW5pdS5jb20vOjoyMDIyLTExLTIxIDEyOjQ2OjU0%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vd3d3LnR1bml1LmNvbS86OjIwMjItMTEtMjEgMTY6MjI6MjU%3D%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vbWVucGlhby50dW5pdS5jb20vOjoyMDIyLTExLTIxIDE2OjIyOjQz%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vd3d3LnR1bml1LmNvbS90b3Vycy86OjIwMjItMTItMTEgMTk6MjU6NTI%3D%2CM18zXzFfMV8xXzI6Omh0dHBzOi8vd3d3LnR1bml1LmNvbS90b3Vycy86OjIwMjItMTItMTEgMTk6MjU6NTU%3D; tuniu_searched=a%3A5%3A%7Bi%3A0%3Ba%3A2%3A%7Bs%3A7%3A%22keyword%22%3Bs%3A6%3A%22%E6%A1%82%E6%9E%97%22%3Bs%3A4%3A%22link%22%3Bs%3A47%3A%22%2F%2Fs.tuniu.com%2Fsearch_complex%2Ftours-nn-0-%E6%A1%82%E6%9E%97%2F%22%3B%7Di%3A1%3Ba%3A2%3A%7Bs%3A7%3A%22keyword%22%3Bs%3A21%3A%22%E6%A1%82%E6%9E%97%E7%9A%84%E6%97%85%E6%B8%B8%E7%BA%BF%E8%B7%AF%22%3Bs%3A4%3A%22link%22%3Bs%3A50%3A%22http%3A%2F%2Fwww.tuniu.com%2Fg705%2Fwhole-gl-0%2Flist-h0-j0_0%2F%22%3B%7Di%3A2%3Ba%3A2%3A%7Bs%3A7%3A%22keyword%22%3Bs%3A62%3A%22%E8%AF%97%E4%B8%8E%E8%BF%9C%E6%96%B9%C2%B7%E6%BC%93%E6%B1%9F%E9%99%A2%E5%AD%90%E9%85%92%E5%BA%97%EF%BC%88%E4%B8%A4%E6%B1%9F%E5%9B%9B%E6%B9%96%E4%B8%9C%E8%A5%BF%E5%B7%B7%E5%BA%97%EF%BC%89%22%3Bs%3A4%3A%22link%22%3Bs%3A40%3A%22http%3A%2F%2Fhotel.tuniu.com%2Fdetail%2F2073760650%22%3B%7Di%3A3%3Ba%3A2%3A%7Bs%3A7%3A%22keyword%22%3Bs%3A58%3A%22%E8%AF%97%E4%B8%8E%E8%BF%9C%E6%96%B9%C2%B7%E6%BC%93%E6%B1%9F%E9%99%A2%E5%AD%90%E9%85%92%E5%BA%97%28%E4%B8%A4%E6%B1%9F%E5%9B%9B%E6%B9%96%E4%B8%9C%E8%A5%BF%E5%B7%B7%E5%BA%97%29%22%3Bs%3A4%3A%22link%22%3Bs%3A106%3A%22http%3A%2F%2Fs.tuniu.com%2Fsearch_complex%2Fhotel-gl-0-%E8%AF%97%E4%B8%8E%E8%BF%9C%E6%96%B9+%E6%BC%93%E6%B1%9F%E9%99%A2%E5%AD%90+%E4%B8%A4%E6%B1%9F%E5%9B%9B%E6%B9%96%E4%B8%9C%E8%A5%BF%E5%B7%B7%E5%BA%97%2F%3Fjump%3Dauto%22%3B%7Di%3A4%3Ba%3A2%3A%7Bs%3A7%3A%22keyword%22%3Bs%3A30%3A%22%E6%A1%82%E6%9E%97%E9%AB%98%E9%93%81%E5%8C%97%E7%AB%99%E4%BA%9A%E6%9C%B5%E9%85%92%E5%BA%97%22%3Bs%3A4%3A%22link%22%3Bs%3A71%3A%22%2F%2Fs.tuniu.com%2Fsearch_complex%2Fwhole-gl-0-%E6%A1%82%E6%9E%97%E9%AB%98%E9%93%81%E5%8C%97%E7%AB%99%E4%BA%9A%E6%9C%B5%E9%85%92%E5%BA%97%2F%22%3B%7D%7D; _taca=1661136526075.1670748191106.1670757961271.60; _tacb=NjcxMTdhMzMtNDYwNS0zMjE1LWI1MjUtMTc0NzEzMDI1YjM2; _tacc=1; Hm_lvt_51d49a7cda10d5dd86537755f081cc02=1669088113,1669636043,1670741377,1670757963; PcHomeVisit=1; BSFIT_EXPIRATION=1671968376330; BSFIT_DEVICEID=EJuL0zqYa9MeH-6Ld1C5FRNL8B1kMZYIegSialKv0NO8fko-n1BgNo4ADDsljSX9RDzQzUzxkgAzC-3ZltyyhA9ScDjMQi3oC6mV9IwloOamu0jnNiSVQErdw8ZDYB-HYu3jG6FqV1R30gENEORkYMaNBVfSo39z; tuniuuser_citycode=NzA1; tuniuuser_ip_citycode=NzA1; acw_tc=76b20f6216707597669104541e718761bbdb8491ea15506eaeb386802d1205; acw_sc__v3=6395c55b87a173d5c8ab0530eda85c313100aa8d; connect.sid=s%3ADVpFth7p1RAHGpm5xwz_qhKk2aNxKIEW.61uxobNGuz3FuYwMsUM4VC6yWaHU8%2FsMsdvUKjMO%2Bto; __xsptplusUT_352=1; Hm_lpvt_51d49a7cda10d5dd86537755f081cc02=1670759773; Hm_lpvt_fe3fbe4228e14b1544525f058df92f91=1670759773; __xsptplus352=352.28.1670758079.1670759773.5%234%7C%7C%7C%7C%7C%23%23x3KmvfCykhxF9YMlGHYyP86YcrS0s2BZ%23; ssxmod_itna2=GqAh7KY50KAIxYq0d4YKO1xUxDwUpAExacC1QDnFSiaPDs=55DLQury4qnbPWt=4=za2K63qzhLALHV7IBPx8MfgS0L1Vnbq4FCZS4qC2qiWbpOR6QHBmfa=Xf2=uCfNpN6UC95HwyrUaW7aeczFhcFFg9FNx71=I9rRr7kX6mCmrornmOvr9gvP6mkkFrpT+38p4STIYf=im4Dw22dDjKD+1d5i0r47g+KoYD==; ssxmod_itna=eqUxnDcD900Qit3GHIhCAYD7YA5xCDD5+LdD/KimDnqD=GFDK40oo7qr=oDOnBrAhnEG=UGAh+T+W7BWpxex1bqaTDU4i8DCLxNo+mDYY8Dt4DTD34DYDixibkxi5GRD0KDFF5XUZ9Dm4GWFqGfDDoDY86RDitD4qDBCodDKqGgFTxsFq2j7mt3pLxe57GcD0tdxBdeWawcGCcFciNe56nDNEQDzkHDtutS9kd3x0PyBMUDM2ozQi+1BoQebEhrz0D=bSOaiieN9/4rKODq8BCwj75cPD===Host: www.tuniu.comIf-None-Match: W/'3c-NfUtq77l6/q+4MT+i1B8akkKCJY'Referer: https://www.tuniu.com/tour/210484383?u_atoken=2a0c1736-73f1-4b7c-ad98-d6d94e03c5ab&u_asession=01903W2-YdH07896fdt57A6KHEQxoe3p_nM9u4UCHyFrbiLBWPB-i4K8FGDfX1dPGpX0KNBwm7Lovlpxjd_P_q4JsKWYrT3W_NKPr8w6oU7K9FEef24uNZxWMQdDwnHXnnD9UaPztW_A5jsQn1Dkg23WBkFo3NEHBv0PZUm6pbxQU&u_asig=0544d1nAGeRC20Zu086zLa4yldBmLJqPWv-B9mz-H9Xrtah9jkPouz-FPnXWOOi4UZ2dTfIGDG7nuzXDqJvJoeKGx9aDWVBLA6ECRzZSExcTnAgKFWx6KrKB261iRpf4DSkxMbgQTdrHv4qiNIfGkvC9dPO90Bbvr-EUj-WfwYHev9JS7q8ZD7Xtz2Ly-b0kmuyAKRFSVJkkdwVUnyHAIJzW5j2yf_d5xdXB-OIEbSd9cmnegWFHsFYePqrtdDZqCL5w-GOMIgInRzpzRYK0ZViu3h9VXwMyh6PgyDIVSG1W8a4h9Ftm1jXrxOtcF1nJf7yVBaI0FPIhiohXoTwrQ-uFvYeu4qtMWWYz3pQwg1DG_l-4EGHxB6fgv3zddmpOI2mWspDxyAEEo4kbsryBKb9Q&u_aref=zug78BonXAlUtdj%2FsdJ3FWivLgg%3D".encode('utf-8'),
+}
+
+route_list_pages=0
+
+def temp():
+
+
+ currentPage=1
+ url = f"https://www.tuniu.com/g705/whole-gl-0/list-z9004399/{currentPage}"
+
+ res = requests.get(url,headers=headers,proxies=PROXY)
+ # print(res.text)
+ selector = etree.HTML(res.text)
+
+ #获取总页数
+ allPage = selector.xpath("//*[@id='contentcontainer']/div[2]/div[1]/div[1]/div[2]/div/a[last()-1]/text()")
+ print(allPage)
+ if len(allPage)>0:
+ print("页数", allPage)
+ route_list_pages = int(allPage[0])
+ time.sleep(2)
+
+ for m in range(1,route_list_pages+1):
+ currentPage=m
+ res = requests.get(url, headers=headers, proxies=PROXY)
+ selector = etree.HTML(res.text)
+ list=selector.xpath("//*[@id='contentcontainer']/div[2]/div[1]/div[1]/div[1]/ul/li")
+
+ print(len(list))
+ for child in list:
+ child_url=child.xpath("./div/a/@href")[0] #线路详情链接
+ title=child.xpath("./div/a/@aria-label")[0]
+ scenics=child.xpath("./div/a/dl/dd[1]/@title")[0]
+ print("路线",title)
+ print("路线",type(title))
+ print("景点",scenics)
+ if child_url is not None:
+ time.sleep(3)
+ dedati_url="https:"+child_url
+
+ #解析线路详情
+ child_request=requests.get(dedati_url,headers=headers,proxies=PROXY)
+ childSelector=Selector(text=child_request.text)
+ routedesc=childSelector.css("#J_Detail > div > div.J_DetailRoute.section-box.detail-route.detail-route4 > div.section-box-body > div.J_DetailRouteDetail.section-box-content.detail-journey-4-detail.active > div.section-box-content.detail-route4-brief-box.detail-route4-brief-nomap > div > div > div")
+ print("这是爬取内容,如果没有就是被检测了",routedesc)
+ if routedesc:
+ routedesc=routedesc[0]
+ arranges_list=routedesc.xpath("./p")
+ desc=""
+ for i in range( len(arranges_list)):
+ content=routedesc.xpath(f"string(./p[{i}][@aria-label])").extract_first()
+ data=str(content).strip().replace(" ","").replace("\n","")
+ desc=desc+data+";"
+ print("简介",desc)
+ print(url)
+ sql = "INSERT INTO route(route_name,sceniclist,route_desc,tn_url) values (%s,%s,%s,%s);"
+ insert(sql,(str(title),scenics,desc,str(dedati_url)))
+
+ # #评价总数据
+ # try:
+ # print(childSelector.xpath("//*[@id='J_Comment']/div/div[2]/div[2]/div[1]/div[2]/strong/text()"))
+ # satistion=childSelector.xpath("//*[@id='J_Comment']/div/div[2]/div[2]/div[1]/div[2]/strong/text()").extract_first()
+ # good=childSelector.xpath("//*[@id='J_Comment']/div/div[2]/div[2]/div[2]/div[1]/div[1]/text()").extract_first()
+ # good=re.search("\d+",good).group()
+ # middle=childSelector.xpath("//*[@id='J_Comment']/div/div[2]/div[2]/div[2]/div[2]/div[1]/text()").extract_first()
+ # middle=re.search("\d+").group()
+ # bad=childSelector.xpath("//*[@id='J_Comment']/div/div[2]/div[2]/div[2]/div[3]/div[1]/text()").extract_first()
+ # bad=re.search("\d+",bad).group()
+ # otherslist=childSelector.xpath("//*[@class='fraction']/div")
+ # otherdata=[]
+ # for otherService in otherslist:
+ # service=otherService.xpath("./@aria-label")
+ # print(service)
+ # otherdata.append(service)
+ # except:
+ # print("为获取渲染数据")
+
+
+ else:
+ print("网页爬虫被检测到了,请在网页手动验证")
+ # # print(res.text)
+if __name__ == '__main__':
+ temp()
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weather.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weather.py
new file mode 100644
index 0000000..e657f78
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weather.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> weather
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-12-11 20:37
+@Desc
+=================================================='''
+import requests
+from lxml import etree
+import requests
+import re
+import time
+from bs4 import BeautifulSoup
+import pandas as pd
+from wangModel.utils.mysqlConn import insert,query
+
+#url = 'http://lishi.tianqi.com/mianyang/201905.html'
+headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
+ 'Cookie':'lianjia_uuid=9d3277d3-58e4-440e-bade-5069cb5203a4; UM_distinctid=16ba37f7160390-05f17711c11c3e-454c0b2b-100200-16ba37f716618b; _smt_uid=5d176c66.5119839a; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2216ba37f7a942a6-0671dfdde0398a-454c0b2b-1049088-16ba37f7a95409%22%2C%22%24device_id%22%3A%2216ba37f7a942a6-0671dfdde0398a-454c0b2b-1049088-16ba37f7a95409%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; _ga=GA1.2.1772719071.1561816174; Hm_lvt_9152f8221cb6243a53c83b956842be8a=1561822858; _jzqa=1.2532744094467475000.1561816167.1561822858.1561870561.3; CNZZDATA1253477573=987273979-1561811144-%7C1561865554; CNZZDATA1254525948=879163647-1561815364-%7C1561869382; CNZZDATA1255633284=1986996647-1561812900-%7C1561866923; CNZZDATA1255604082=891570058-1561813905-%7C1561866148; _qzja=1.1577983579.1561816168942.1561822857520.1561870561449.1561870561449.1561870847908.0.0.0.7.3; select_city=110000; lianjia_ssid=4e1fa281-1ebf-e1c1-ac56-32b3ec83f7ca; srcid=eyJ0Ijoie1wiZGF0YVwiOlwiMzQ2MDU5ZTQ0OWY4N2RiOTE4NjQ5YmQ0ZGRlMDAyZmFhODZmNjI1ZDQyNWU0OGQ3MjE3Yzk5NzFiYTY4ODM4ZThiZDNhZjliNGU4ODM4M2M3ODZhNDNiNjM1NzMzNjQ4ODY3MWVhMWFmNzFjMDVmMDY4NWMyMTM3MjIxYjBmYzhkYWE1MzIyNzFlOGMyOWFiYmQwZjBjYjcyNmIwOWEwYTNlMTY2MDI1NjkyOTBkNjQ1ZDkwNGM5ZDhkYTIyODU0ZmQzZjhjODhlNGQ1NGRkZTA0ZTBlZDFiNmIxOTE2YmU1NTIxNzhhMGQ3Yzk0ZjQ4NDBlZWI0YjlhYzFiYmJlZjJlNDQ5MDdlNzcxMzAwMmM1ODBlZDJkNmIwZmY0NDAwYmQxNjNjZDlhNmJkNDk3NGMzOTQxNTdkYjZlMjJkYjAxYjIzNjdmYzhiNzMxZDA1MGJlNjBmNzQxMTZjNDIzNFwiLFwia2V5X2lkXCI6XCIxXCIsXCJzaWduXCI6XCIzMGJlNDJiN1wifSIsInIiOiJodHRwczovL2JqLmxpYW5qaWEuY29tL3p1ZmFuZy9yY28zMS8iLCJvcyI6IndlYiIsInYiOiIwLjEifQ=='
+ }
+
+def set_link(year):
+ #year参数为需要爬取数据的年份
+ link = []
+ for i in range(1,13):
+ #一年有12个月份
+ if i < 10:
+ url='http://lishi.tianqi.com/guilin/{}0{}.html'.format(year,i)
+ print(url)
+ else:
+ url='http://lishi.tianqi.com/guilin/{}{}.html'.format(year,i)
+ print(url)
+ link.append(url)
+ return link
+
+def get_page(url,headers):
+ html = requests.get(url,headers=headers)
+ if html.status_code == 200:
+ html.encoding = html.apparent_encoding
+ print(html.text)
+ return html.text
+ else:
+ return None
+
+date_box = []
+max_temp = []
+min_temp = []
+weh = []
+wind = []
+week_box = []
+
+def get_data():
+ link = set_link(2022)
+ for url in link:
+
+ html = get_page(url,headers)
+ bs = BeautifulSoup(html,'html.parser')
+
+ data = bs.find_all(class_="thrui")
+ date = re.compile('class="th200">(.*?)')
+ print(data)
+ tem = re.compile('class="th140">(.*?)')
+
+ time = re.findall(date,str(data))
+ for item in time:
+ week = item[10:]
+ week_box.append(week)
+ date_box.append(item[:10])
+ temp = re.findall(tem, str(data))
+ for i in range(len(time)):
+ #之前因为自身需要的只是19年6月的天气信息,没有考虑到每个月的天数不一样,现在修改后就没有问题了
+ max_temp.append(temp[i*4+0])
+ min_temp.append(temp[i*4+1])
+ weh.append(temp[i*4+2])
+ wind.append(temp[i*4+3])
+get_data()
+datas = pd.DataFrame({'日期':date_box,'星期':week_box,'最高温度':max_temp,'最低温度':min_temp,'天气':weh,'风向':wind})
+for i in range(0,len(datas)):
+ w_time=datas.loc[i]['日期']
+ w_week=datas.loc[i]['星期']
+ max_tem=datas.loc[i]['最高温度']
+ max_tem=re.search("\d+",max_tem).group()
+ min_tem=datas.loc[i]['最低温度']
+ min_tem=re.search("\d+",min_tem).group()
+ statu=datas.loc[i]['天气']
+ wind=datas.loc[i]['风向']
+ insert("insert into weather(w_time,w_week,max_tem,min_tem,statu,wind) select %s,%s,%s,%s,%s,%s from dual where not exists(select w_time from weather where w_time=%s) ",(w_time,w_week,max_tem,min_tem,statu,wind,w_time))
+
+print(datas)
+
+
+
+
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weibosign.py b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weibosign.py
new file mode 100644
index 0000000..3d095cd
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/common_spiders/weibosign.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> weibosign
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-11-23 17:56
+@Desc
+=================================================='''
+import random
+import time
+from time import mktime
+import datetime
+import requests
+from wangModel.utils.proxys import ips
+import requests
+from bs4 import BeautifulSoup
+import json
+import re
+import time
+import sqlite3, pandas
+import random
+import traceback
+import threading
+from snownlp import SnowNLP
+from wangModel.utils.mysqlConn import insert, query, getRows
+from snownlp import SnowNLP
+
+
+class WeiboSignSpider():
+ header = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.56",
+ "MWeibo-Pwa": "1",
+ "X-XSRF-TOKEN": "02df4d",
+ "X-Requested-With": "XMLHttpRequest",
+ "Cookie": "SUB=_2A25OhDMBDeRhGeBP7lMU-SbKyT6IHXVth11JrDV6PUJbktANLRWkkW1NRVCnaEGA4AX519XF_MMcAtaGMSsUMm8O; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9W5zvTRN3-zLYaqhYgDN-85m5NHD95QceK-pSK.RSozEWs4Dqc_zi--Xi-zRiKy2i--NiKnRi-zpi--Ri-8si-zXi--Ri-8siKL2i--NiKLWiKnXi--4iK.Ri-z0i--fiK.0i-2fi--fiK.0i-2f; SSOLoginState=1669350225; _T_WM=83963246267; WEIBOCN_FROM=1110006030; MLOGIN=1; __bid_n=184ad2413b313dd7364207; FPTOKEN=30$YVRFaVAuo0Hb+ZCeGddk5p5th37hiAH3OD/a7GIZ/EifG6bPi/j090zR3KK9++fg6peU2CIuZsSJWb/gQj1NoDsjbDRvlOefETnNuv4Zx11df54uM5cp7GO2lRldfLaA/H0Y1zFlg/Et1NrarB+/IC8nPG9aAU2D70bJMXbH2aik3ZAMz6ybL1NhYR6i9lr5t0C1gGbRj585QemHLaRPPW+34QZApuuOhdJhI5rUu0OeCbHkoapziul6hHk+JUco1CFHGxiBnJPluvUa+VmnTGOUBxaur7ndbiECeY9AZOyh/cY2gfnBjO37BqXekHdwimFEqIpYaTUFDpMmOCS/DnRhY6nfcZ4xLtQclnzUHMZiywGLlV0rmzQujNPb6EgK|5ByMysyxg5uNsuQAFYoC08fks57jzZCDUASGWGvQH9U=|10|60e86591f8b07231e25ee3e8ee7a1014; XSRF-TOKEN=02df4d; mweibo_short_token=cf30a892e2; BAIDU_SSP_lcr=https://cn.bing.com/; M_WEIBOCN_PARAMS=oid%3D4839655495435847%26luicode%3D20000061%26lfid%3D4839655495435847%26uicode%3D20000061%26fid%3D4839655495435847"
+ }
+
+ lasterTime = ""
+ flag=0
+
+ # 爬虫基本功能部分,返回网页的一个json
+ def get_tweets(self, URL, page, ippool):
+ url = URL.format(str(page))
+ while True:
+ try:
+ proxy_ip = "http://" + random.choice(ips)
+ time.sleep(3)
+ res = requests.get(url, headers=self.header)
+ res.encoding = 'utf-8'
+ soup = BeautifulSoup(res.text, 'html.parser')
+ jd = json.loads(res.text)
+ # print(jd)
+
+ except:
+ print('代理有问题呀,换个ip试试')
+ continue
+
+ if (jd['ok'] == 0) and ("这里还没有内容" in str(jd)):
+ print(jd)
+ return 0
+
+ if jd['ok'] == 0:
+ print('获取地点的页面失败啊,换个ip试试')
+ else:
+ break
+
+ # 第一页的结果会有点不一样
+ if page == 1:
+ if 'card_id' in jd['data']['cards'][0]:
+ if jd['data']['cards'][0]['card_id'] == 'card_hq_poiweibo':
+ tweets = jd['data']['cards'][0]['card_group']
+ return tweets
+ else:
+ tweets = jd['data']['cards'][1]['card_group']
+ return tweets
+ else:
+ card_id=jd['data']['cards'][0]['card_id']
+ if(card_id!="hot_search"):
+ tweets = jd['data']['cards'][1]['card_group']
+ else:
+ tweets = jd['data']['cards'][0]['card_group']
+
+ # print(tweets)
+ return tweets
+
+ def writedb(self, items, page):
+ # 遍历每条微博
+ if items:
+ print("评论长度",len(items))
+ for i in range(len(items)):
+ print("内容",items[i])
+ # 整理微博表的数据
+ temp = [0 for i in range(13)] # 初始化一行,一共有11列
+ # print(temp)
+ if 'mblog' in items[i]:
+ temp[0] = items[i]['mblog']['id']
+ if "id" in items[i]['mblog'] and temp[0] is not None:
+ temp[1] = current_time
+ temp[2] = items[i]['mblog']['created_at']
+ temp[3] = items[i]['mblog']['user']['id']
+ temp[4] = items[i]['mblog']['source']
+ temp[5] = re.sub("[A-Za-z0-9\!\%\[\]\,\。\<\-\=\"\:\/\.\?\&\_\>\'\;\ ]", "", items[i]['mblog']['text'])
+ s2 = SnowNLP(temp[5])
+ # print(temp[5], s2.sentiments)
+ temp[6] = items[i]['mblog']['reposts_count']
+ temp[7] = items[i]['mblog']['comments_count']
+ temp[8] = items[i]['mblog']['attitudes_count']
+ temp[9] = items[i]['mblog']['pending_approval_count']
+ temp[10] = place
+ temp[11] = scenicId
+
+ # 删掉来源里面那些乱七八糟的字符
+ temp[4] = temp[4].replace("'", "")
+ temp[4] = temp[4].replace('"', '')
+ temp[5] = str(temp[5]).replace("#", "").replace("🌸", "")
+ # print("品论内容",type(temp[5]))
+
+ s = time.strptime(temp[2], '%a %b %d %H:%M:%S +0800 %Y')
+ remarkTime = str(s.tm_year) + "-" + str(s.tm_mon) + "-" + str(s.tm_mday) + " " + str(
+ s.tm_hour) + ":" + str(
+ s.tm_min) + ":" + str(s.tm_sec)
+ remarkTime = time.strptime(remarkTime, '%Y-%m-%d %H:%M:%S')
+ remarkTime=datetime.datetime(*remarkTime[:6])
+ # print("该景区最新评论时间是",remarkTime)
+
+ flag = 0
+ args = (temp[11], temp[10], temp[3], temp[4], str(temp[5]), temp[6], temp[7], temp[8], remarkTime,
+ datetime.date.today())
+ if self.lasterTime is None:
+ # 写入数据库
+ sql = "insert into weibosign(scenicId,scenicName,user_id,sourcefrom,content,reports_count,comments_count,attitudes_count,sign_time,crawlTime) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);"
+ insert(sql, args)
+ print("插入数据",temp)
+ print('Page', page, ' %s 这条微博写进微博表啦' % temp[0])
+ flag = 1
+ self.flag=flag
+ # return flag
+ elif self.lasterTime < remarkTime:
+ # 写入数据库
+ sql = "insert into weibosign(scenicId,scenicName,user_id,sourcefrom,content,reports_count,comments_count,attitudes_count,sign_time,crawlTime) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);"
+ insert(sql, args)
+ print('Page', page, ' %s 这条微博写进微博表啦' % temp[0])
+ flag = 1
+ self.flag = flag
+ else:
+ flag=0
+ self.flag = flag
+
+
+ else:
+ pass
+
+ # 爬取指定景点的微博数据
+ def main(self, row, ippool):
+
+ global conn, cur, place, pid, scenicId
+ scenic = getRows("select name,wb_scenicId,id from scenics where wb_scenicId!=''", None)
+ # 读取资料文档
+ # place = scenic[row][0]
+ # pid = scenic[row][1]
+ # scenicId = scenic[row][2]
+ place = scenic[row+1][0]
+ pid = scenic[row+1][1]
+ scenicId = scenic[row+1][2]
+ print("景点名称:%s,景点id:%s,景点网站id:%s" % (place, scenicId, pid))
+
+ # 判断微博第一条是否已经爬过
+ selectHasTimeSql = "select sign_time from weibosign where scenicId=%s order by sign_time desc"
+ databaseComment = getRows(selectHasTimeSql, scenicId)
+ print("查询数据库景点评论数据时间列表", databaseComment)
+ # 获取上一次爬取的最新评论时间
+ lasterDate = None
+ if databaseComment:
+ # 格式转化
+ lasterDate = databaseComment[0][0]
+ print("最新时间是", lasterDate)
+
+ self.lasterTime = lasterDate
+
+ print('******************开始爬%s的微博了*******************************' % place)
+ try:
+ time_start = time.time()
+
+ # 爬150页微博
+ # page = 1
+ for page in range(1, 150):
+ # 微博位置URL
+ URL = 'https://m.weibo.cn/api/container/getIndex?containerid=' + pid + f'&luicode=10000011&lfid=100103type%3D1%26q%3D%E6%AD%A6%E6%B1%89%E5%A4%A7%E5%AD%A6&page={page}'
+ print('开始爬', place, '第', page, '页')
+
+ # 获取当前时间
+ global current_time
+ current_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
+ # 获取一个页面的所有内容,json格式
+ tweets = self.get_tweets(URL, page, ippool)
+
+ # 判断是不是到底了
+ if "周边值得去" in str(tweets):
+ print('爬到底了!')
+ break
+
+ if tweets == 0:
+ print('已经到第', page, '页了,没有内容了')
+ break
+
+ self.writedb(tweets,page)
+ flag = self.flag
+ print("爬取结果的标志", flag)
+ if flag==0:
+ print("该景点最新的数据已经存完啦")
+ break
+ else:
+ print(place, ' 第', page, '页爬完了!')
+ page += 1
+ continue
+
+ time_end = time.time()
+ print(place, ' time cost ', time_end - time_start, 's')
+
+ print('******************%s的微博爬完了*******************************' % place)
+
+
+ except:
+ e = traceback.format_exc()
+ # 要是报错了,就发邮件然后退出
+ print(e)
+
+ print(place, '爬完了!等待下一次')
+
+ def run(self):
+ rows = getRows("select count(*) from scenics where wb_scenicId!=''", None)
+ n = rows[0][0]
+ for i in range(n):
+ self.main(i, ips)
+# if __name__ == '__main__':
+# web = WeiboSignSpider()
+# web.run()
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/files/city.txt b/applications/common/scrapySpiders/wangModel/wangModel/files/city.txt
new file mode 100644
index 0000000..ce5f43c
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/files/city.txt
@@ -0,0 +1,2901 @@
+1096,珠海
+1095,中山
+1824,廉江
+1110,玉林
+1126,海口
+1078,佛山
+1106,柳州
+1082,江门
+1807,顺德区
+1928,罗定
+1871,兴宁
+2174,兴义
+2099,北流
+1859,化州
+2095,岑溪
+1077,东莞
+1080,河源
+3201,遂川县
+2100,博白县
+1977,斗门区
+1084,茂名
+1107,南宁
+2102,容县
+1087,汕头
+3224,修水县
+1092,云浮
+1830,紫金县
+1097,百色
+1202,郴州
+1099,崇左
+1237,抚州
+2101,陆川县
+1085,梅州
+4415,蒙自
+1955,三乡镇
+1870,五华县
+1119,遵义
+2934,道县
+1857,电白区
+1113,贵阳
+1103,河池
+1081,惠州
+2141,剑河县
+1876,连州
+1130,琼海
+1132,三亚
+1089,韶关
+3270,万载县
+1937,徐闻县
+5244,大沥镇
+1864,大埔县
+4030,大竹县
+2001,扶绥县
+1101,桂林
+1105,来宾
+1334,乐山
+4482,马关县
+1338,南充
+1900,南雄
+2939,宁远县
+2076,上林县
+1845,台山
+1192,天门
+1207,湘潭
+1954,小榄镇
+2942,新田县
+4126,营山县
+2863,永兴县
+1093,湛江
+1165,驻马店
+1111,安顺
+2834,安乡县
+1150,安阳
+3177,安远县
+2052,八步区
+2726,巴东县
+1098,北海
+2071,宾阳县
+2152,岑巩县
+5449,春湾镇
+2547,郸城县
+2139,丹寨县
+4544,东阳
+2157,独山县
+1100,防城港
+3264,丰城
+38433,拱北
+1332,广安
+1102,贵港
+2549,淮阳县
+1185,黄石
+71510,华阳镇
+2921,花垣县
+2160,惠水县
+2883,会同县
+1239,吉安
+2922,吉首
+4031,开江县
+2143,凯里
+2161,荔波县
+1128,临高县
+2086,灵山县
+1129,陵水
+2015,荔浦
+1114,六盘水
+2841,澧县
+1986,隆林县
+1895,陆丰
+2063,鹿寨县
+6996,麻陂镇
+1242,南昌
+2044,南丹县
+3173,南丰县
+1156,南阳
+2804,南漳县
+1339,内江
+3186,宁都县
+1582,平潭县
+1869,平远县
+2170,普安县
+2087,浦北县
+1167,濮阳
+4655,黔江区
+2742,蕲春县
+1108,钦州
+32211,琼中县
+4032,渠县
+22290,容桂
+2465,社旗县
+2763,松滋
+1935,遂溪县
+1191,随州
+3202,泰和县
+2553,太康县
+55954,太平镇
+1965,坦洲镇
+1989,田东县
+1991,田阳县
+56404,亭角村
+2172,望谟县
+4065,武胜县
+4634,巫溪县
+2056,武宣县
+1196,襄阳
+2057,象州县
+1861,信宜
+1245,新余
+2743,浠水县
+2468,西峡县
+2733,宣恩县
+1342,雅安
+1922,阳春
+1925,阳西县
+2184,沿河县
+1246,宜春
+3272,宜丰县
+8005,英桥镇
+2185,印江县
+4638,酉阳县
+2890,沅陵县
+3194,于都县
+3254,余干县
+41195,张黄镇
+2175,贞丰县
+1400,重庆
+5663,阿猛镇
+70677,安场镇
+3196,安福县
+62684,安福乡
+5326,安海镇
+2926,安化县
+5812,安江镇
+1315,安康
+5947,安流镇
+2168,安龙县
+2811,安陆
+119929,安平村
+2300,安平县
+5880,安平镇
+53959,安平镇
+83359,安铺村
+5966,安铺镇
+1035,安庆
+80025,安仁村
+2855,安仁县
+81277,安山村
+10351,安山镇
+80003,安乡
+3229,安义县
+6007,安远镇
+4166,安岳县
+1402,中国澳门
+10377,敖溪镇
+21426,八宝镇
+6109,八尺镇
+55677,坝固镇
+6232,把荷乡
+46889,柏布
+6258,白仓镇
+22513,白地市镇
+152616,白濠村
+39726,白河村
+3851,白河县
+113527,百候镇
+5281,白花镇
+6312,白蕉镇
+6347,白螺镇
+89292,白马村
+6361,白芒营镇
+84398,白马镇
+127679,白马镇
+88217,白庙
+101941,白庙村
+1120,白沙
+113723,白沙村
+95583,白沙村
+86354,白沙井村
+39829,白沙镇
+91702,白沙镇
+38940,白沙镇
+32970,白沙镇
+5471,白沙镇
+113785,白沙镇
+6394,白砂镇
+51180,白沙镇
+57718,白沙镇
+71882,白沙镇
+73301,白沙镇
+80920,白沙镇
+74187,白石铺镇
+46904,白石镇
+3901,白水县
+56726,白水镇
+92490,白塔镇
+6445,白圩镇
+1817,白云区
+6596,百丈乡
+6162,八甲镇
+32195,巴铃镇
+2036,巴马县
+72365,巴马镇
+90362,板坝村
+1036,蚌埠
+6788,邦溪镇
+88938,板桥镇
+88396,板桥镇
+5294,板桥镇
+56410,板头村
+1915,宝安区
+2474,宝丰县
+36800,宝丰镇
+1316,宝鸡
+23724,保家村
+76976,保家镇
+2918,保靖县
+2801,保康县
+1373,保山
+6912,鲍峡镇
+6822,包信镇
+6859,宝圩乡
+6860,宝圩镇
+56006,宝圩镇
+3149,宝应县
+5425,八所镇
+6214,八塘镇
+165092,八一村
+94650,八一
+97340,八一农场
+158253,八一乡
+1327,巴中
+84836,北岸村
+4650,北碚区
+50984,北大
+5475,北惯镇
+5243,北滘镇
+11264,北界镇
+11936,北界镇
+11958,北景乡
+35043,北栅
+5435,北盛镇
+43715,北市镇
+18513,贝水村
+7135,北陀镇
+62199,扁牙村
+1112,毕节
+5490,丙村镇
+3137,滨海县
+60270,滨海镇
+3926,彬县
+4643,璧山区
+7396,伯劳镇
+1834,博罗县
+7419,博美镇
+6630,柏埔镇
+6633,柏塘镇
+72956,播植镇
+1051,亳州
+95285,播州区
+5413,步云桥镇
+2788,蔡甸区
+4593,苍南县
+122791,苍山村
+97020,苍山村
+118150,苍山镇
+2091,苍梧县
+4068,苍溪县
+100952,草海村
+124001,草海镇
+100430,草塘镇
+3599,曹县
+21003,草寨村
+2169,册亨县
+7789,茶庵铺镇
+7782,茶安铺镇
+87857,汊涧镇
+2961,茶陵县
+7824,茶林乡
+1778,长安镇
+8688,长布镇
+1200,常德
+2527,长葛
+3213,昌江区
+158241,长坎村
+1575,长乐
+87202,长乐镇
+101301,长隆
+29095,长隆村
+7931,昌明镇
+2867,常宁
+4155,长宁县
+29115,长宁镇
+1780,常平镇
+89181,长坡镇
+92549,长坡镇
+73029,菖蒲乡
+91805,长铺子乡
+89263,长庆桥
+81761,长庆桥镇
+1201,长沙
+4611,常山县
+4656,长寿区
+46990,长寿镇
+62993,长寿镇
+3086,常熟
+2155,长顺县
+98631,长塘村
+163180,长塘村
+91318,长塘镇
+12524,长塘镇
+1590,长汀县
+3927,长武县
+4529,长兴县
+71666,长阳坡村
+8735,长阳铺镇
+2819,长阳县
+2502,长垣县
+1304,长治
+1224,常州
+1768,潮安区
+1037,巢湖
+1892,潮南区
+1884,潮阳区
+1076,潮州
+75124,茶山村
+118128,茶山镇
+1793,茶山镇
+7833,茶阳镇
+14525,车河镇
+8079,陈场镇
+5253,陈村镇
+5315,陈店镇
+157627,城北村
+2899,城步县
+92375,城东
+162867,城东镇
+84829,城东镇
+1328,成都
+3875,城固县
+1885,澄海区
+48068,城皇村
+71088,城隍镇
+8264,城月镇
+48033,陈家
+37699,陈家村
+153961,陈江村
+8171,陈江镇
+2880,辰溪县
+8067,车田镇
+2794,赤壁
+73040,赤光镇
+1939,赤坎区
+48785,赤坎镇
+56747,赤眉镇
+46535,赤水
+2189,赤水
+5402,赤水镇
+72605,赤水镇
+15238,池尾镇
+1038,池州
+87855,冲花村
+8347,冲蒌镇
+3165,崇仁县
+67366,崇仁镇
+2795,崇阳县
+3178,崇义县
+4008,崇州
+9156,船步镇
+9162,船塘镇
+4514,淳安县
+97498,楚雄
+1374,楚雄州
+1039,滁州
+79294,慈化村
+87578,慈化镇
+2956,慈利县
+4564,慈溪
+1823,从化区
+113528,从化庄村
+2138,从江县
+98351,翠亨村
+2972,大安市
+6597,大安镇
+151979,大坳村
+16110,大坳村
+16118,大坝镇
+5328,达埔镇
+64244,达川区
+89331,大垌镇
+2114,大方县
+16437,大付
+115554,大付村
+74283,大福镇
+16050,达濠
+2037,大化县
+71173,大湖镇
+9736,待补镇
+89665,大井镇
+88151,大岚村
+1781,大朗镇
+35101,大理
+1251,大连
+128076,大良口村
+44573,大良镇
+1375,大理州
+98317,大沥村
+1779,大岭山镇
+3903,大荔县
+70660,大龙村
+91669,大路村
+68800,大伦镇
+33621,大庙村
+99058,大庙镇
+10362,旦场镇
+3887,丹凤县
+95355,丹凤镇
+1525,砀山县
+2820,当阳
+58664,大宁
+2766,丹江口
+4104,丹棱县
+39858,淡水
+4721,淡水区
+3157,丹阳
+5246,丹灶镇
+1137,儋州
+5477,道滘镇
+81736,道口镇
+2190,道真县
+70592,大坪
+101229,大坪乡
+80864,大坪乡
+91666,大坪镇
+64055,大坡外镇
+16550,大坡镇
+112419,大桥村
+75577,大桥村
+88613,大桥镇
+11327,大桥镇
+88577,大桥镇
+55386,大桥镇
+56959,大仁庙村
+86869,大沙镇
+88614,大沙镇
+5323,大沙镇
+46940,大盛镇
+3384,大石桥
+32873,大石镇
+16399,大寺镇
+60576,大塘村
+86418,大塘镇
+1638,大田县
+1305,大同
+35128,大通湖区
+82909,大旺镇
+153072,大旺镇
+95027,大湾镇
+98594,大围
+2812,大悟县
+2000,大新县
+57398,大新镇
+5187,大溪镇
+9672,大瑶镇
+164610,大亚湾村
+2745,大冶
+4140,大英县
+112888,大涌村
+5234,大涌镇
+3179,大余县
+46769,大镇
+73122,大镇镇
+9712,大直镇
+1329,达州
+154825,大竹村
+89516,大竹镇
+4620,大足区
+3215,德安县
+1982,德保县
+4087,德昌县
+114691,德城
+1626,德化县
+2177,德江县
+2534,登封
+10550,邓家铺镇
+3350,灯塔
+94541,灯塔村
+87239,灯塔镇
+169775,登云
+150557,登云镇
+2460,邓州
+1943,德庆县
+10482,德胜镇
+3247,德兴
+1330,德阳
+83487,电城镇
+4621,垫江县
+9911,底庙镇
+1124,定安县
+82281,丁河镇
+1951,鼎湖区
+3180,定南县
+74988,底蓬镇
+2935,东安县
+56177,东陂镇
+87196,东城
+116819,东城
+4437,东川区
+1125,东方
+95933,东方
+1971,东凤镇
+3312,东港
+88184,东港镇
+3057,东海县
+1798,东坑镇
+2900,洞口县
+92456,洞口乡
+2038,东兰县
+69724,洞利
+46822,东平村
+3674,东平县
+56001,东平镇
+5308,东平镇
+55643,东平镇
+10085,东圃镇
+113085,东区
+1660,东山县
+70132,东胜村
+3433,东胜区
+5478,东升镇
+88105,东水镇
+3139,东台
+3166,东乡区
+10928,东乡镇
+16866,东溪村
+2007,东兴
+82927,东兴镇
+84803,东溪镇
+1289,东营
+112885,东涌村
+158252,东涌镇
+1825,东源县
+1453,东至县
+9897,斗山镇
+45616,都安乡
+2039,都安县
+3216,都昌县
+48208,都川镇
+5005,都斛镇
+9947,对江镇
+9877,都结乡
+162994,渡口村
+60838,独山镇
+9974,都阳镇
+2156,都匀
+4076,峨眉山
+1841,恩平市
+2727,恩施
+1183,恩施州
+74529,二郎镇
+88435,二塘镇
+5436,二塘镇
+1182,鄂州
+10119,发轮镇
+158254,法那村
+2011,防城区
+2461,方城县
+21087,芳村
+1567,房山区
+2767,房县
+1819,番禺区
+9828,飞仙镇
+92211,凤村镇
+4622,丰都县
+2191,凤冈县
+1792,凤岗镇
+126736,凤岗镇
+87907,凤凰
+2919,凤凰县
+84438,凤凰乡
+4565,奉化区
+4623,奉节县
+1944,封开县
+61058,峰口镇
+4446,凤庆县
+2503,封丘县
+56562,凤山村
+2040,凤山县
+100443,凤山镇
+56989,凤山镇
+1865,丰顺县
+5460,枫亭镇
+3864,凤翔区
+3265,奉新县
+71201,丰阳镇
+11434,丰阳镇
+72931,分界镇
+121658,分水坳镇
+5575,汾水道
+86548,分水镇
+39995,分水镇
+39830,分水镇
+3260,分宜县
+58704,佛冈村
+1873,佛冈县
+1609,福安
+2048,富川县
+9978,福德镇
+1610,福鼎
+45079,福鼎村
+3865,扶风县
+88567,佛岗村
+2548,扶沟县
+31471,扶合镇
+46641,伏虎镇
+30899,富家村
+78117,富家镇
+4657,涪陵区
+10078,富罗镇
+1469,阜南县
+3140,阜宁县
+4479,富宁县
+3904,富平县
+1576,福清
+2158,福泉
+84885,芙蓉镇
+5197,芙蓉镇
+167272,福山村
+3710,福山区
+170590,福山镇
+11694,福山镇
+1974,阜沙镇
+4171,富顺县
+38419,富顺镇
+17282,富田镇
+41501,扶新镇
+1040,阜阳
+5782,富驿镇
+4458,富源县
+1053,福州
+88369,岗背镇
+31096,港口村
+92538,港口镇
+46869,港口镇
+5934,岗美镇
+5874,赶水镇
+3181,赣县区
+3058,赣榆区
+1238,赣州
+3266,高安
+79020,高安村
+52807,高陂镇
+69601,高陂镇
+1788,高埗镇
+23655,高峰镇
+6028,高家堰镇
+6057,高良镇
+1809,高明区
+74340,高明乡
+89238,高坪
+4129,高坪区
+84873,高坪乡
+95601,高坪镇
+11779,高平镇
+154180,高坪镇
+89195,高坡
+100109,高坡村
+6083,高坡镇
+35326,高埔镇
+46631,高桥
+102718,高桥镇
+100003,高桥镇
+56439,高桥镇
+86519,高沙
+162990,高沙村
+5438,高沙镇
+1761,高台县
+4156,高县
+1945,高要区
+3150,高邮
+1858,高州
+4408,个旧
+56256,公安村
+74631,公安县
+79668,公安镇
+162103,蚣坝镇
+2012,恭城县
+5506,恭城镇
+86846,公馆村
+85470,公馆村
+5265,公馆镇
+12053,公会镇
+43701,巩桥
+70392,巩桥村
+6253,共青城
+4163,珙县
+88179,公庄镇
+6274,构林镇
+16787,关埠镇
+1773,莞城
+45604,官渡镇
+42595,官渡镇
+4067,广安区
+3168,广昌县
+3248,广丰区
+6583,广福乡
+70692,广福镇
+91134,广福镇
+56659,广福镇
+5004,广海镇
+12853,广华
+71131,广华岭
+47757,厂窖镇
+4480,广南县
+1946,广宁县
+2516,光山县
+2776,广水
+1333,广元
+44558,广园新村
+1598,光泽县
+1079,广州
+38172,观澜镇
+2107,关岭县
+91681,官桥镇
+13032,官桥镇
+42617,官桥镇
+5442,官桥镇
+5420,冠市镇
+98999,关王镇
+6518,官圩镇
+2013,灌阳县
+6566,灌涨镇
+6525,官庄乡
+103872,官庄乡
+69472,官庄乡
+7683,官庄镇
+27265,官庄子村
+6474,观珠镇
+2802,谷城县
+99927,古城镇
+81408,归朝村
+6620,归朝镇
+31406,桂城
+2159,贵定县
+2857,桂东县
+2030,桂平
+6639,贵台镇
+6647,桂头镇
+3275,贵溪
+2858,桂阳县
+6642,贵子镇
+4178,古蔺县
+60496,谷陇镇
+81823,郭集镇
+6742,果遂乡
+5313,谷饶镇
+2515,固始县
+5397,古水镇
+12391,古宋镇
+6330,古潭乡
+1611,古田县
+1274,固原
+2920,古丈县
+1953,古镇镇
+6346,古竹镇
+3078,海安
+54008,海安镇
+1894,海丰县
+3079,海门
+70590,海门镇
+118157,海田村
+3538,海晏县
+1815,海珠区
+2813,汉川
+1142,邯郸
+6846,寒冻镇
+6820,浛洸镇
+1389,杭州
+2792,汉口
+1447,含山县
+2836,汉寿县
+95478,汉塘村
+3852,汉阴县
+4147,汉源县
+1317,汉中
+91837,蒿板镇
+6915,好义镇
+6925,郝寨镇
+64326,黄坭乡
+1151,鹤壁
+4624,合川区
+1041,合肥
+2728,鹤峰县
+46957,荷花镇
+98491,合江村
+56719,合江村
+4179,合江县
+99849,合江镇
+105657,合江镇
+55887,合江镇
+6932,禾加镇
+7111,贺街镇
+4409,河口县
+62197,河口镇
+1010,河南
+95168,河南岸镇
+7202,横板桥镇
+80946,横陂镇
+2868,衡东县
+1968,横栏镇
+1802,横沥镇
+2869,衡南县
+7228,横琴镇
+2870,衡山县
+2072,横县
+1203,衡阳
+112095,和平村
+112145,和平村
+162996,和平村
+1826,和平县
+44115,和平乡
+5316,和平镇
+48621,河婆镇
+91610,河浦
+1995,合浦县
+1842,鹤山
+2053,合山
+48054,何市镇
+61652,何市镇
+154411,合水村
+1739,合水县
+53440,合水镇
+45754,合水镇
+5321,荷塘镇
+119734,荷塘镇
+1448,和县
+7102,荷香桥镇
+56623,河西
+1290,菏泽
+2115,赫章县
+1104,贺州
+2734,红安县
+63462,洪安镇
+7281,红果镇
+1378,红河州
+4410,红河县
+1199,洪湖
+2881,洪江
+7399,洪濑镇
+10203,宏路镇
+102939,洪桥村
+118713,洪桥村
+4193,红桥区
+35513,洪桥社区
+39327,红桥镇
+5107,洪桥镇
+42610,洪阳镇
+87516,红岩寺镇
+86885,红岩寺镇
+4105,洪雅县
+3048,洪泽区
+35596,猴场村
+57803,猴场镇
+117437,猴场镇
+80783,后湖村
+1777,厚街镇
+82737,鮜门镇
+68017,鲘门镇
+7544,厚坡镇
+5463,华城镇
+60759,华东村
+1820,花都区
+1225,淮安
+1042,淮北
+152609,怀德
+1204,怀化
+1947,怀集县
+7869,淮口镇
+1043,淮南
+1425,怀宁县
+75784,怀乡
+1439,怀远县
+38971,怀远镇
+71004,花桥镇
+2790,黄陂区
+48702,黄陂镇
+56392,黄陂镇
+5278,黄埠镇
+7952,黄布镇
+5430,黄材镇
+2523,潢川县
+87190,黄村
+1184,黄冈
+121069,黄阁镇
+7984,黄果树镇
+7995,黄槐镇
+1789,黄江镇
+47039,黄金埠镇
+8022,黄练镇
+85249,黄龙村
+42712,黄毛元镇
+2736,黄梅县
+2140,黄平县
+5469,黄坡镇
+1818,黄埔区
+1969,黄圃镇
+169664,黄桥
+170511,黄桥村
+74917,黄桥镇
+14072,黄桥镇
+156147,黄桥镇
+59915,黄塘镇
+87179,黄田镇
+55297,黄田镇
+4591,黄岩区
+2744,黄州区
+4494,华宁县
+2042,环江县
+91696,花桥村
+13919,花桥村
+158459,花桥村
+150417,花桥村
+45134,花桥乡
+84365,花桥镇
+52979,花桥镇
+102795,花桥镇
+100760,花桥镇
+5058,花桥镇
+63260,花桥镇
+2725,华容区
+2946,华容县
+91312,花坦乡
+2410,滑县
+72597,华阳
+4063,华蓥
+1627,惠安县
+3183,会昌县
+1838,惠城区
+1835,惠东县
+1850,惠来县
+163937,回龙村
+8268,回龙寺镇
+60703,回龙乡
+7687,回龙镇
+95900,回龙镇
+87680,回龙镇
+60651,回马镇
+1839,惠阳区
+4459,会泽县
+105908,胡集
+5017,胡集镇
+3217,湖口县
+1776,虎门镇
+8310,火厂坪镇
+8315,火连坡镇
+3775,霍州
+1390,湖州
+165462,加禾
+165040,加禾村
+2859,嘉禾县
+121859,加禾乡
+4077,夹江县
+4517,建德
+4157,江安县
+3151,江都区
+4069,剑阁县
+2936,江华县
+152362,蒋家坪村
+5415,蒋家桥镇
+4625,江津区
+150783,江口
+146055,江口村
+2178,江口县
+162995,江口乡
+111830,江口镇
+120990,江口镇
+46072,江口镇
+51168,江口镇
+45152,江口镇
+78575,江口镇
+1640,将乐县
+2761,江陵县
+84314,江门镇
+2081,江南区
+4612,江山
+53360,江信村
+112085,姜圩乡
+3109,姜堰区
+3116,江阴
+2937,江永县
+3141,建湖县
+2760,监利县
+1639,建宁县
+1600,建瓯
+93346,尖沙咀
+2729,建始县
+4411,建水县
+4082,犍为县
+155085,建兴乡
+74319,建兴镇
+32532,简阳
+123632,建阳村
+1599,建阳区
+145006,滘口村
+1153,焦作
+2480,郏县
+1391,嘉兴
+2796,嘉鱼县
+53284,甲子镇
+1851,揭东区
+10118,碣石镇
+1471,界首
+1852,揭西县
+1083,揭阳
+99001,鸡街镇
+99534,鸡街镇
+5280,吉隆镇
+89343,集美区
+1291,济南
+8775,金钗镇
+1306,晋城
+2047,金城江区
+10060,金鼎镇
+8813,金渡镇
+3267,靖安县
+1737,泾川县
+4988,泾川镇
+1240,景德镇
+4469,景东县
+3200,井冈山
+4488,景洪
+3110,靖江
+1186,荆门
+2754,京山市
+1983,靖西
+4078,井研县
+1187,荆州
+2884,靖州县
+9029,敬梓镇
+48057,靖海镇
+10006,金和镇
+1392,金华
+3050,金湖县
+1292,济宁
+1629,晋江
+40858,金江镇
+79215,金井镇
+5330,金井镇
+8865,金孔镇
+46002,金兰镇
+10004,金利镇
+164069,金牌村
+4412,金平县
+1888,金平区
+2142,锦屏县
+93532,金沙
+165284,金沙
+2116,金沙县
+49579,金沙镇
+2837,津市
+10003,金石桥镇
+4011,金堂县
+68855,金淘镇
+58560,金屋村
+83364,进贤村
+3230,进贤县
+91189,金溪村
+88338,金溪村
+2054,金秀县
+3169,金溪县
+73098,金溪镇
+4562,缙云县
+1510,金寨县
+112886,金洲村
+154822,金洲村
+31506,金洲镇
+8942,筋竹镇
+3199,吉水县
+56545,久长镇
+10108,九重镇
+157727,九丰村
+38329,九峰镇
+10095,九公桥镇
+9055,九和镇
+1241,九江
+5251,九江镇
+75482,九隆
+4662,九龙城区
+29675,九龙塘
+45954,九龙镇
+9075,九市镇
+40859,九所镇
+57001,九王庙村
+39439,九支镇
+100004,旧州村
+71372,旧州镇
+5551,旧州镇
+79556,旧州镇
+1152,济源市
+3208,吉州区
+4164,筠连县
+2418,浚县
+1154,开封
+1844,开平
+2124,开阳县
+4413,开远
+4626,开州
+48071,克度镇
+9354,葵潭镇
+1379,昆明
+3087,昆山
+1462,来安县
+2730,来凤县
+9380,拉烈镇
+38660,郎岱镇
+56944,郞岱镇
+41696,琅东村
+9476,榔坪镇
+48812,朗塘镇
+4127,阆中
+2436,兰考县
+9454,蓝口镇
+3644,兰陵县
+9432,兰陵镇
+91800,兰山镇
+9458,蓝塘镇
+1068,兰州
+51436,老城
+15740,老城村
+105809,老城乡
+88999,老城镇
+2803,老河口
+15802,老隆镇
+2938,蓝山县
+3915,蓝田县
+3170,乐安县
+1899,乐昌
+5252,乐从镇
+103866,乐东村
+4092,雷波县
+2144,雷山县
+2874,耒阳
+1934,雷州
+85934,勒流镇
+42040,乐民镇
+41206,乐民镇
+81306,水口村
+2893,冷水江
+2944,冷水滩区
+85274,冷水镇
+3212,乐平
+27434,乐平村
+69611,乐平乡
+4595,乐清
+1984,乐业县
+79191,乐业镇
+36802,乐至县
+95202,连城村
+1591,连城县
+48627,良垌镇
+22907,良坊镇
+4627,梁平区
+71089,良田镇
+87924,两英镇
+74814,莲花村
+3240,莲花县
+162818,莲花镇
+98436,莲花镇
+82891,连江村
+1578,连江县
+103699,连江镇
+154823,连界村
+48547,连界镇
+1874,连南县
+1828,连平县
+1875,连山县
+54013,莲塘镇
+13730,连滩镇
+2894,涟源
+1226,连云港
+116647,连州镇
+1797,寮步镇
+1294,聊城
+2731,利川
+3171,黎川县
+86842,烈面镇
+10058,黎埠镇
+78470,立化镇
+46943,里湖镇
+1380,丽江
+2966,醴陵
+43712,栗木镇
+1381,临沧
+55863,林尘镇
+1308,临汾
+2484,灵宝
+14079,岭背镇
+1521,灵璧县
+2017,灵川县
+54981,灵峰镇
+57940,酃湖乡
+2945,零陵区
+87204,岭门镇
+70428,灵山镇
+2016,临桂区
+1985,凌云县
+82583,凌云乡
+4583,临海
+2981,临江
+112087,临江镇
+88223,临江镇
+45467,临江镇
+2838,临澧县
+81259,伶俐镇
+1472,临泉县
+4064,邻水县
+13950,林头镇
+99923,林头镇
+2860,临武县
+2947,临湘
+1295,临沂
+2569,临颍县
+64192,林寨镇
+2411,林州市
+2145,黎平县
+3929,礼泉县
+122190,厉山镇
+76813,黎少镇
+56320,李市镇
+82965,犁市镇
+13608,李市镇
+1393,丽水
+152443,黎塘村
+5850,黎塘镇
+87188,里田乡
+48505,里田乡
+1045,六安
+2060,柳城县
+6400,六都寨镇
+45938,六合村
+3068,六合区
+75523,刘家场村
+7634,刘家场镇
+15390,六靖镇
+6426,六景镇
+35707,六麻镇
+51454,柳泉铺镇
+91242,流沙
+91243,流沙村
+98998,流沙镇
+55484,流沙镇
+5194,柳市镇
+2848,浏阳
+71247,六枝村
+2134,六枝特区
+1814,荔湾区
+151280,立新村
+115887,立新村
+165273,立新村
+112350,立新村
+122727,立新村
+69542,立新村
+1550,利辛县
+3042,溧阳
+46917,李寨
+60618,李寨镇
+2073,隆安县
+6487,龙布镇
+4131,隆昌
+1829,龙川县
+6496,龙村镇
+75277,龙港村
+1916,龙岗区
+146450,龙港市
+5191,龙港镇
+67011,龙港镇
+121181,龙光乡
+1662,龙海
+2901,隆回县
+1890,龙湖区
+35712,龙虎山镇
+88391,龙虎乡
+127995,龙湖镇
+110469,龙江
+51037,龙江村
+97285,龙江镇
+5248,龙江镇
+10243,隆江镇
+71363,龙见田村
+10244,隆街镇
+75507,龙结镇
+3703,龙口
+87384,龙口镇
+98340,龙口镇
+2162,龙里县
+43965,龙马村
+80393,龙门
+95132,龙门村
+121758,龙门村
+1837,龙门县
+60387,龙门镇
+169776,龙门镇
+68518,龙门镇
+74721,龙母镇
+89567,龙南
+85284,龙南村
+3184,龙南县
+62081,龙泉镇
+61002,龙泉镇
+155977,龙山村
+48189,龙山
+2923,龙山县
+54090,龙山镇
+2018,龙胜县
+10250,隆盛镇
+60104,隆盛镇
+5241,龙市镇
+164825,龙潭村
+165121,龙潭村
+86358,龙潭镇
+146143,龙潭镇
+95156,龙潭镇
+90519,龙潭镇
+5392,龙潭镇
+80266,龙潭镇
+39860,龙潭镇
+112082,龙头镇
+87906,龙头镇
+77166,龙湾镇
+64505,龙窝镇
+46852,龙溪
+145197,龙溪铺村
+74922,龙溪铺镇
+37014,龙溪乡
+52030,龙圩区
+1054,龙岩
+122261,龙眼村
+10239,龙镇
+89394,龙镇村
+2002,龙州县
+1205,娄底
+10434,禄步镇
+4414,绿春县
+35736,鲁沟村
+72832,鲁沟村
+5221,泸沽湖镇
+1896,陆河县
+6663,芦洪市镇
+1449,庐江县
+10683,锣场镇
+2043,罗城县
+90598,罗冲围村
+2163,罗甸县
+84588,罗店镇
+56017,罗浮镇
+98341,萝岗镇
+10593,罗岗镇
+1166,漯河
+71170,骆湖镇
+32770,罗家桥
+91833,罗家镇
+5446,罗镜镇
+6853,罗坎镇
+3888,洛南县
+4461,罗平县
+77143,罗平镇
+156542,罗平镇
+10693,骡坪镇
+98490,罗山村
+2518,罗山县
+2737,罗田县
+49711,洛香镇
+57236,罗秀镇
+98494,罗秀镇
+1155,洛阳
+55453,罗阳村
+10719,洛阳镇
+46949,洛阳镇
+80744,洛阳镇
+1579,罗源县
+56429,罗源镇
+4592,路桥区
+2475,鲁山县
+2485,卢氏县
+89217,陆屋村
+10394,陆屋镇
+4181,泸县
+4420,泸西县
+2925,泸溪县
+3241,芦溪县
+2550,鹿邑县
+1346,泸州
+1046,马鞍山
+5165,马鞍镇
+75589,马安镇
+70637,马坝镇
+6998,麻布岗镇
+165292,马场坪
+58913,马场坪村
+144693,马场平村
+46887,马场坪村
+165869,马场坪村
+60078,马场坪村
+2738,麻城
+168717,马达
+47582,马房镇
+7011,麻岗镇
+11001,马贵镇
+64782,卖酒镇
+2146,麻江县
+55697,麻江乡
+158172,马家湾
+96103,马家湾村
+67502,马街
+105366,马街村
+11051,马街乡
+35767,马街镇
+11008,马迹塘镇
+4481,麻栗坡县
+11424,芒部镇
+7291,茅草街镇
+7294,茅店镇
+163335,毛咀村
+48229,茂兰镇
+1862,茂南区
+89291,茅湾村
+84983,茂芝村
+59627,毛嘴镇
+156236,马平村
+11144,马坪镇
+75455,马坪镇
+5347,马坡镇
+143737,马山村
+35741,马山
+2074,马山县
+114527,马山乡
+57014,马山乡
+11187,马水镇
+39330,马踏镇
+56049,马踏镇
+7144,马田镇
+11224,马头镇
+41218,马头镇
+79948,码头镇
+10897,麻尾镇
+11237,马圩镇
+10902,麻溪铺镇
+2885,麻阳县
+56234,马牙
+1942,麻章区
+5440,梅城镇
+69432,梅城镇
+83485,梅川镇
+4093,美姑县
+95903,梅花村
+87580,梅花镇
+1872,梅江区
+22078,梅林村
+88408,梅林镇
+87851,梅陇镇
+1336,眉山
+82207,梅山镇
+5513,梅山镇
+105332,美台村
+159804,美台乡
+46951,梅塘镇
+2201,湄潭县
+1867,梅县区
+79206,梅仙镇
+68180,梅州村
+1551,蒙城县
+39501,濛江镇
+2092,蒙山县
+94900,蒙圩镇
+7481,棉湖镇
+3880,勉县
+1337,绵阳
+7485,棉洋镇
+7710,庙岔镇
+7457,米场镇
+4388,弥渡县
+4416,弥勒
+2952,汨罗
+37202,明港
+71262,明港镇
+5147,明港镇
+86857,茗山村
+4149,名山区
+78451,明溪村
+1641,明溪县
+1763,民乐县
+155605,民乐镇
+1581,闽清县
+2492,民权县
+5235,民众镇
+2558,泌阳县
+4473,墨江县
+91187,谟岭村
+57352,木格镇
+5424,那大镇
+7899,那霍镇
+5445,那良镇
+7904,那林镇
+46893,那龙镇
+7982,那马镇
+35832,那蒙镇
+1630,南安
+4649,南岸区
+91255,南安乡
+59467,南白镇
+4121,南部县
+1774,南城
+3172,南城县
+4628,南川区
+102452,南大村
+92397,南渡镇
+13183,南渡镇
+5373,南丰镇
+72405,南丰镇
+61877,南岗村
+164826,南岗村
+164705,南岗村
+1806,南海区
+4376,南华县
+8144,南江口镇
+4003,南江县
+47329,南江乡
+4389,南涧县
+1227,南京
+56490,南径镇
+62067,南经镇
+3185,南康区
+71945,南岭镇
+83420,南龙村
+13366,南龙村
+46942,南门镇
+1055,南平
+86944,南平镇
+87184,南桥镇
+1821,南沙区
+48363,南沙镇
+113847,南沙镇
+8353,楠市镇
+13065,南宝镇
+52006,南塘镇
+1228,南通
+2927,南县
+61885,南晓镇
+53436,南溪村
+39982,南溪
+89307,南兴镇
+4158,南溪区
+4533,南浔区
+48675,南阳油田
+35813,南油社区
+2463,南召县
+102653,南镇
+94639,南镇村
+5249,南庄镇
+1987,那坡县
+55503,那坡镇
+8004,那洒镇
+8006,那桐镇
+12984,那务镇
+4184,纳溪区
+2117,纳雍县
+2412,内黄县
+48037,内湖镇
+2464,内乡县
+1394,宁波
+1056,宁德
+4567,宁海县
+1642,宁化县
+2493,宁陵县
+2003,宁明县
+117587,宁潭镇
+1742,宁县
+2846,宁乡
+156239,牛山村
+165106,牛头村
+49784,牛头村
+146128,排坊
+1498,潘集区
+163336,潘龙村
+96679,蟠龙乡
+100262,蟠龙镇
+166544,盘县
+165646,盘县村
+1340,攀枝花
+2135,盘州
+98804,炮台
+125691,炮台镇
+4123,蓬安县
+5305,蓬壶镇
+5382,朋口镇
+4107,彭山区
+4629,彭水县
+4141,蓬溪县
+3220,彭泽县
+8602,彭寨镇
+4012,彭州
+56688,坪坝镇
+2108,平坝区
+4417,屏边县
+32437,屏边乡
+82188,平昌村
+4004,平昌县
+78749,平定村
+1157,平顶山
+3833,平定县
+42619,平定镇
+42788,平伐村
+1988,平果县
+1664,平和县
+4538,平湖
+2948,平江县
+82092,平江镇
+68613,平吉镇
+2019,平乐县
+1071,平凉
+8635,平陵镇
+3854,平利县
+3838,平陆县
+125633,平南
+1613,屏南县
+2032,平南县
+83119,屏南镇
+157728,平桥村
+92203,平桥村
+2337,平山县
+4159,屏山县
+92330,屏山乡
+45686,屏山乡
+78759,平山镇
+92250,平山镇
+60567,平山镇
+62845,平山镇
+91944,坪石村
+64255,坪石村
+10229,坪石乡
+2164,平塘县
+73738,平塘乡
+72169,平塘镇
+10216,平望镇
+1243,萍乡
+2004,凭祥
+69021,平阳村
+4596,平阳县
+6462,平阳镇
+32187,平远镇
+2559,平舆县
+10220,平政镇
+48161,平海镇
+1941,坡头区
+10510,坡头镇
+3246,鄱阳县
+1602,浦城县
+2109,普定县
+3979,浦东新区
+1384,普洱
+4013,蒲江县
+1854,普宁
+37051,普坪镇
+146039,埔前
+8706,埔前镇
+105674,蒲圻
+38411,蒲塘镇
+1057,莆田
+8734,普宜镇
+10624,黔城镇
+1115,黔东南州
+8851,千官镇
+1188,潜江
+8869,迁江镇
+1116,黔南州
+10612,钱排镇
+10519,千山红镇
+1426,潜山县
+10630,浅水镇
+10618,钱塘镇
+3930,乾县
+1117,黔西南州
+2118,黔西县
+10590,前詹镇
+1866,蕉岭县
+113447,蕉门村
+1791,桥头镇
+10710,桥圩镇
+8996,乔贤镇
+3081,启东
+2873,祁东县
+89290,祈福新村
+8742,七拱镇
+10384,漆河镇
+4644,綦江区
+49507,七江镇
+122198,七里湖村
+10349,七里湖乡
+84823,歧岭村
+91613,歧岭村
+149234,岐岭村
+84833,歧岭乡
+8814,岐岭镇
+91706,歧岭镇
+46939,麒麟镇
+1881,清城区
+4070,青川县
+1296,青岛
+46886,清华园
+5344,清湖镇
+1643,清流县
+2171,晴隆县
+91821,清平村
+10840,青平镇
+4108,青神县
+9592,青树坪镇
+57029,青塘
+70656,青塘
+95899,清塘镇
+40508,青塘镇
+4557,青田县
+10962,清湾镇
+48045,青溪村
+39844,清溪村
+1877,清新区
+2079,青秀区
+1783,清溪镇
+1072,庆阳
+1086,清远
+4558,庆元县
+47366,青云镇
+2125,清镇
+3691,青州
+2035,覃塘区
+2426,沁阳
+4016,邛崃
+144507,岐山村
+3870,岐山县
+10480,歧山乡
+4483,丘北县
+11071,球溪镇
+2439,杞县
+2419,淇县
+2940,祁阳县
+1464,全椒县
+3187,全南县
+48499,荃湾村
+4675,荃湾区
+1058,泉州
+2020,全州县
+2560,确山县
+3624,曲阜
+1910,曲江区
+48076,瞿家湾镇
+1383,曲靖
+9662,渠旧镇
+9663,渠黎镇
+1399,衢州
+1770,饶平县
+39943,仁和
+103875,仁和村
+93846,仁和乡
+2192,仁怀
+1902,仁化县
+9776,任市镇
+4109,仁寿县
+9764,仁寿镇
+1297,日照
+2064,融安县
+4630,荣昌区
+1855,榕城区
+48271,容城镇
+36940,冗渡镇
+2153,榕江县
+9788,溶江镇
+47594,容奇镇
+26449,榕山镇
+2065,融水县
+4172,荣县
+2861,汝城县
+3083,如皋
+4597,瑞安
+3846,芮城县
+3188,瑞金
+4400,瑞丽
+2561,汝南县
+2447,汝阳县
+1903,乳源县
+2477,汝州
+38561,沙梨乡
+91727,三都村
+2165,三都县
+103868,三都镇
+11457,三阁司乡
+2957,桑植县
+5977,散花镇
+111928,三惠县
+87545,三江村
+98303,三江村
+96919,三江村
+11493,三江县
+155976,三江镇
+5492,三江镇
+54138,三江镇
+46610,三江镇
+153058,三角
+103865,三角村
+11505,三角塘村
+43981,三角塘村
+1956,三角镇
+55244,三甲镇
+70083,三里镇
+11539,三里镇
+1158,三门峡
+4584,三门县
+1059,三明
+61441,三水村
+1808,三水区
+111925,三水乡
+2147,三穗县
+4116,三台县
+94875,三塘镇
+92257,三乡村
+3931,三原县
+79622,沙扒镇
+91265,沙步村
+163533,沙洞村
+57013,沙垌镇
+81893,沙河店镇
+73921,沙河口村
+5348,沙河镇
+5462,沙井
+55640,沙口镇
+22516,上壁村
+69345,上壁村
+2562,上蔡县
+2519,商城县
+96597,尚店村
+6281,上奉镇
+5495,上冈镇
+3268,上高县
+1325,上海
+1024,上海
+1593,上杭县
+35914,上磺镇
+3243,上栗县
+1318,商洛
+3891,商南县
+88284,上坪
+12386,上坪村
+70647,上坪村
+103867,上坪镇
+93426,上坪镇
+1159,商丘
+1244,上饶
+98525,上水
+91682,上水村
+2551,商水县
+2009,上思县
+6434,上映乡
+3189,上犹县
+4576,上虞区
+3894,商州区
+48052,珊瑚镇
+154361,山口村
+91678,山口镇
+5264,山口镇
+47830,山门镇
+1088,汕尾
+5346,山心镇
+2431,山阳区
+3889,山阳县
+2487,陕州区
+2902,邵东县
+6464,筲箕湾镇
+2912,韶山
+1603,邵武
+1395,绍兴
+1206,邵阳
+2904,邵阳县
+41188,沙坪镇
+57368,沙坪镇
+48752,沙坡镇
+48279,沙陂镇
+45637,沙市
+164968,沙市区
+5502,沙市镇
+11972,沙塘乡
+55638,沙塘镇
+4672,沙田区
+99658,沙田镇
+44673,沙田镇
+1801,沙田镇
+5259,沙头镇
+42564,沙头镇
+1645,沙县
+2755,沙洋县
+89209,沙子坪
+87990,沙子坪
+102723,沙子坪村
+87991,沙子镇
+5431,社港镇
+4142,射洪
+6502,畲江镇
+5357,盛泽镇
+1122,昌江县
+1127,乐东县
+16203,升钟镇
+4581,嵊州
+5324,深井镇
+2552,沈丘县
+6584,神泉镇
+1090,深圳
+6512,佘田桥镇
+3142,射阳县
+33609,石板村
+46945,石板镇
+96696,石宝镇
+48084,石坝镇
+2148,施秉县
+103870,石槽村
+96113,石槽村
+26727,石槽村
+117530,石曹镇
+3190,石城县
+36032,施洞镇
+88439,十二岭村
+5458,十方镇
+6662,师岗镇
+2877,石鼓区
+61876,石鼓镇
+5021,石花镇
+12930,石江镇
+90494,石角村
+73226,石角村
+71891,石角塘村
+61588,石角镇
+42193,石角镇
+1146,石家庄
+1785,石碣镇
+6821,石康镇
+48697,石坎镇
+6833,石陵镇
+118417,石岭镇
+4429,石林县
+81934,石林镇
+70602,十里铺村
+6726,十里铺乡
+7641,十里铺镇
+48199,石龙村
+1786,石龙镇
+56314,石龙镇
+63119,石龙镇
+95159,石门村
+4965,石门区
+2839,石门县
+4150,石棉县
+1796,石排镇
+39232,石盘镇
+2179,石阡县
+105651,市桥
+6604,石桥镇
+1958,石岐区
+85616,石泉村
+3855,石泉县
+6670,诗山镇
+1632,石狮
+2762,石首
+13135,石塘镇
+83260,石塘村
+35391,石塘镇
+71448,石塘镇
+68122,石塘镇
+42213,石潭镇
+55715,石潭镇
+91667,石窝
+91684,石窝镇
+1905,始兴县
+1190,十堰
+6907,石正镇
+4631,石柱县
+5136,石柱镇
+31648,狮子林
+154824,石子岭
+165122,十字路乡
+103055,十字乡
+38963,石子镇
+4464,师宗县
+44726,寿宁南阳
+112076,双东圩镇
+168438,双东镇
+2896,双峰县
+81483,双河村
+87197,双河镇
+47973,双河镇
+91688,双滘镇
+4014,双流区
+155673,双排村
+2941,双牌县
+147976,双牌镇
+10072,双旺镇
+47047,水边镇
+48715,水边镇
+2136,水城县
+10088,水东江镇
+40623,水东镇
+71163,水墩镇
+4505,水富县
+89207,水浸坪乡
+42738,水口镇
+5322,水口镇
+70337,水口镇
+7107,水鸣镇
+72165,水清镇
+89050,水头
+43595,水头镇
+5205,水头镇
+7131,水汶镇
+11279,水寨镇
+1604,顺昌县
+83056,顺天镇
+152613,树田
+3104,沭阳县
+68639,寺村镇
+38742,四公里村
+159212,思贺镇
+3105,泗洪县
+1948,四会
+87198,四角楼村
+7186,思林镇
+7341,泗纶镇
+5317,司马浦镇
+10181,四马桥镇
+56993,司门前镇
+2180,思南县
+7176,司前乡
+37130,司前镇
+54202,司前镇
+7305,寺山乡
+7282,四通镇
+1524,泗县
+3106,泗阳县
+168073,思州
+62966,宋河镇
+5434,松口镇
+62734,松涛村
+2181,松桃县
+44706,松旺镇
+7388,松烟镇
+88214,松源镇
+4506,绥江县
+1341,遂宁
+3132,睢宁县
+2905,绥宁县
+2563,遂平县
+2499,睢县
+2501,睢阳区
+2193,绥阳县
+87954,素龙
+164239,孙家镇
+64299,锁龙村
+1230,宿迁
+57062,苏区镇
+1427,宿松县
+7462,苏圩镇
+1229,苏州
+1047,宿州
+1298,泰安
+3089,太仓
+97122,台城村
+5002,台城镇
+3716,台儿庄区
+169200,太和村
+7599,太和堂镇
+1473,太和县
+94699,太和镇
+46323,太和镇
+1428,太湖县
+79079,太湖镇
+1585,台江区
+2149,台江县
+7652,泰美镇
+1646,泰宁县
+45949,太平
+48529,太平村
+112098,太平村
+95131,太平村
+36092,太平古镇
+95274,太平镇
+5302,太平镇
+55455,太平镇
+3679,泰山
+3111,泰兴
+1311,太原
+1231,泰州
+1396,台州
+57394,太子
+7644,太子庙镇
+7860,塘坝镇
+15832,塘缀镇
+7868,塘渡口镇
+42732,塘掇镇
+2466,唐河县
+7871,塘红乡
+98911,唐家
+105323,唐家村
+74098,塘家洞村
+5268,唐江镇
+112887,塘坑村
+106953,塘坑村
+48224,塘口镇
+158225,塘莲村
+159711,塘蓬镇
+7778,汤塘镇
+98965,塘田
+7883,塘田市镇
+146161,塘头村
+165293,塘湾
+101966,塘湾镇
+61880,棠下村
+1784,塘厦镇
+7855,棠下镇
+2413,汤阴县
+157263,谭连村
+46432,潭连村
+122183,潭莲村
+94854,潭莲村
+7661,坛洛镇
+7715,潭水镇
+48717,潭头镇
+7696,谭庄镇
+7895,桃川镇
+7929,陶邓乡
+15846,桃花源镇
+2928,桃江县
+78523,桃江乡
+100219,桃源
+2840,桃源县
+7976,藤田镇
+2093,藤县
+3713,滕州
+1465,天长
+2005,天等县
+2045,天峨县
+5426,田畈街镇
+48577,天河
+1816,天河区
+10276,天河镇
+1347,天津
+1990,田林县
+111806,天门村
+1073,天水
+4586,天台县
+46900,天台镇
+52113,天堂村
+52990,天堂镇
+10291,天堂镇
+24643,田头村
+87209,田头镇
+103871,天西村
+74925,田心村
+170649,田心围村
+88216,田心镇
+98623,田心镇
+10389,田心镇
+2150,天柱县
+1998,铁山港区
+45917,亭亮镇
+1656,同安区
+45021,同安镇
+51178,同安镇
+2467,桐柏县
+1429,桐城
+2797,通城县
+1319,铜川
+2886,通道县
+55672,铜鼓村
+3269,铜鼓县
+99662,铜鼓镇
+78680,铜鼓镇
+4496,通海县
+4005,通江县
+4632,铜梁区
+1048,铜陵
+4519,桐庐县
+85933,桐木村
+74325,桐木镇
+85056,桐木镇
+47888,桐木镇
+4642,潼南区
+1118,铜仁
+2798,通山县
+4539,桐乡
+2437,通许县
+2194,桐梓县
+161294,桐梓镇
+8273,头排镇
+2739,团风县
+8308,土关垭镇
+1133,屯昌县
+37913,屯脚镇
+4673,屯门区
+19150,托洞镇
+8405,驮卢镇
+23194,土坪镇
+72540,瓦店乡
+125812,瓦店镇
+31581,外海
+64256,外沙
+91241,外沙村
+60493,外砂镇
+3203,万安县
+8756,望埠镇
+4071,旺苍县
+57010,旺甫镇
+8762,望高镇
+1430,望江县
+46794,旺角
+38562,王灵镇
+8749,旺茂镇
+1803,望牛墩镇
+79174,汪桥镇
+10738,汪桥镇
+10170,万合镇
+1775,万江
+158250,挽澜镇
+3253,万年县
+1134,万宁
+4651,万盛区
+21513,宛田乡
+8481,湾头桥镇
+4033,万源
+4654,万州区
+4665,湾仔区
+8446,瓦溪镇
+1300,潍坊
+1299,威海
+2506,卫辉
+157726,魏家村
+1320,渭南
+2119,威宁县
+8806,维桥乡
+2438,尉氏县
+96668,维新村
+4507,威信县
+165868,维新镇
+53960,威远村
+4133,威远县
+1135,文昌
+83430,文昌镇
+8937,汶村镇
+5345,文地镇
+2166,瓮安县
+144669,翁安寨村
+5504,翁城镇
+16773,瓮城镇
+1906,翁源县
+4587,温岭
+36178,文渠
+48178,温泉镇
+8911,文渠乡
+1385,文山
+4484,文山市
+2427,温县
+1397,温州
+1552,涡阳县
+2783,武昌区
+1936,吴川
+2195,务川县
+51959,武当山镇
+9241,雾渡河镇
+158962,五丰铺镇
+9071,五峰铺镇
+2906,武冈
+2478,舞钢
+3932,武功县
+9198,武功镇
+1972,五桂山
+1270,乌海
+1193,武汉
+1049,芜湖
+3101,吴江区
+20311,乌江镇
+9097,五经富镇
+112884,五块石
+84430,五里牌
+9112,五里铺镇
+9211,武利镇
+4635,武隆区
+2077,武鸣区
+3222,武宁县
+1594,武平县
+9119,五强溪镇
+23104,五桥
+11313,五山
+4633,巫山县
+79302,五山镇
+52757,乌石村
+8998,乌石乡
+73521,乌石镇
+95124,乌石镇
+9177,伍市镇
+9224,武潭镇
+4084,五通桥区
+86844,五通镇
+11319,五通镇
+1450,无为县
+1232,无锡
+2740,武穴
+11251,吴圩镇
+2570,舞阳县
+3257,婺源县
+91810,五云镇
+1276,吴忠
+1109,梧州
+152340,厦边村
+58106,下车村
+152546,厦岗村
+98309,霞湖村
+3204,峡江县
+93756,下坑村
+23548,夏涝池
+11730,下老乡
+1060,厦门
+1321,西安
+5965,仙城镇
+2732,咸丰县
+2555,项城
+2528,襄城县
+48696,香花镇
+4568,象山县
+46317,响水
+1208,湘西
+2915,湘乡
+2949,湘阴县
+4391,祥云县
+1976,香洲区
+5428,巷子口镇
+1195,咸宁
+11848,羡塘乡
+1194,仙桃
+23701,仙下村
+1322,咸阳
+1620,仙游县
+9245,西岸镇
+2814,孝昌县
+6353,小董镇
+1197,孝感
+165574,小鸡街
+86845,小金村
+58394,小金口镇
+4000,小金县
+38422,小平山镇
+95582,小平阳镇
+4526,萧山区
+89208,小塘镇
+5256,小塘镇
+1523,萧县
+5676,细坳镇
+1615,霞浦县
+5858,下桥镇
+88421,峡山村
+1940,霞山区
+5312,峡山镇
+55612,夏石镇
+59564,下洋镇
+118453,下洋镇
+5306,下洋镇
+2495,夏邑县
+98291,霞涌镇
+4098,西昌
+46875,西冲
+4124,西充县
+4485,西畴县
+2471,淅川县
+37500,西渡镇
+30644,协和乡
+6721,谢鸡镇
+1747,西峰区
+2126,息烽县
+99359,西峰乡
+91951,西华村
+2554,西华县
+79910,西湖村
+23909,西湖村
+69016,西湖区
+3235,西湖区
+102254,西湖镇
+98568,锡坑镇
+95555,西林村
+1992,西林县
+95554,西林乡
+9369,西胪镇
+5654,洗马乡
+84008,洗马镇
+9379,西庙岗乡
+51163,西南镇
+52844,新宝镇
+77495,新蔡村
+2565,新蔡县
+4579,新昌县
+2058,忻城县
+96025,新店村
+100746,新店乡
+6796,新地镇
+154418,新渡口
+7084,信都镇
+52172,新风
+91697,新风村
+1907,新丰县
+3191,信丰县
+98577,新丰镇
+57006,新丰镇
+6813,新福乡
+48232,新福镇
+1273,兴安盟
+3205,新干县
+2021,兴安县
+3192,兴国县
+3113,兴化
+156339,星火村
+124241,星火村
+33989,星火镇
+38232,杏林镇
+52657,兴隆村
+91695,兴隆镇
+100132,兴隆镇
+2080,兴宁区
+105919,新沟村
+103869,新沟镇
+64865,新沟镇
+3934,兴平
+2173,兴仁市
+39849,兴仁镇
+78786,兴仁镇
+2623,兴山区
+2822,兴山县
+7093,星沙镇
+4160,兴文县
+42536,兴文镇
+2103,兴业县
+61888,杏子村
+7095,星子镇
+4287,新和县
+57198,新和镇
+2887,新晃县
+2897,新化县
+1848,新会区
+29804,新江村
+4015,新津县
+2342,新乐
+2536,新密
+2907,新宁县
+6839,新亨镇
+6873,新平安镇
+160094,新泉镇
+2908,新邵县
+6883,新哨镇
+6885,新盛店镇
+89259,新盛镇
+46455,新盛镇
+34021,新盛镇
+7689,新市镇
+39329,新市镇
+38934,新市镇
+75866,新市镇
+164827,新塘村
+168309,新塘村
+70685,新塘村
+82792,新塘村
+43033,新塘镇
+84405,新田村
+29907,新天村
+114021,新田村
+6901,新田铺镇
+61882,新天寨
+98616,新田镇
+152952,新田镇
+68993,新田镇
+2521,新县
+1160,新乡
+150724,新锡边村
+114019,新兴村
+1929,新兴县
+112265,新兴乡
+87201,新溪乡
+6910,新溪镇
+30004,新墟村
+51124,新圩乡
+99935,新圩镇
+82786,新墟镇
+29967,新圩镇
+55413,新圩镇
+1161,信阳
+2469,新野县
+3129,新沂
+2537,新郑
+2791,新洲区
+2564,西平县
+81806,西坪镇
+5461,西坪镇
+19843,西樵村
+5245,西樵镇
+2196,习水县
+152619,溪头村
+71175,绣缎镇
+106380,秀山
+4636,秀山县
+2127,修文县
+2208,秀英区
+2520,息县
+2082,西乡塘区
+3883,西乡县
+123180,溪西镇
+82175,西阳镇
+89090,西阳镇
+11465,西阳镇
+9439,西燕镇
+9436,西岩镇
+103078,西中村
+1050,宣城
+4034,宣汉县
+1162,许昌
+75510,巡场镇
+74875,巽寮镇
+3193,寻乌县
+3856,旬阳县
+3935,旬邑县
+2891,溆浦县
+3052,盱眙县
+4180,叙永县
+1233,徐州
+4059,雅江县
+91606,雅江镇
+46892,雅居乐
+1323,延安
+1234,盐城
+7454,沿渡河镇
+7531,羊册镇
+86694,杨村
+91246,杨村镇
+56029,杨村镇
+1923,阳东区
+1091,阳江
+30782,羊角村
+89275,羊角镇
+5289,羊街镇
+156586,羊街村
+87920,洋里村
+37785,杨林村
+94367,杨林村
+162991,杨林村
+97479,杨林
+112889,杨林镇
+4830,杨梅
+85890,杨梅村
+2225,洋浦经济开发区
+118036,杨桥镇
+7691,洋桥镇
+64239,杨桥镇
+1313,阳泉
+1879,阳山县
+87148,杨市
+2022,阳朔县
+60057,杨田
+103864,杨田村
+60510,杨田镇
+146040,杨屋村
+3884,洋县
+61472,洋溪乡
+2747,阳新县
+91269,洋溪镇
+90226,洋溪镇
+5594,洋溪镇
+3159,扬中
+1235,扬州
+123470,沿河乡
+4508,盐津县
+2509,延津县
+2532,鄢陵县
+2962,炎陵县
+4486,砚山县
+78302,砚山镇
+1301,烟台
+4177,沿滩区
+7439,岩滩镇
+38291,严田镇
+4117,盐亭县
+7722,腰古镇
+7771,姚集乡
+94463,姚集镇
+112178,姚集镇
+47517,姚圩镇
+3897,耀州区
+60896,亚山镇
+7348,鸭溪镇
+7844,野三关镇
+91692,野山关镇
+7858,叶塘镇
+46950,叶潭镇
+2479,叶县
+1343,宜宾
+1198,宜昌
+2806,宜城
+1180,伊春
+2824,宜都
+7861,一渡水镇
+3174,宜黄县
+38170,一六圩村
+98838,一六镇
+48221,一六镇
+4125,仪陇县
+2488,义马
+2816,应城
+1880,英德
+8001,英都镇
+4401,盈江县
+5325,英林镇
+8003,英利镇
+99943,英山村
+1474,颍上县
+61157,应山
+2741,英山县
+32905,营山乡
+1247,鹰潭
+7971,银坑镇
+7933,义容镇
+4550,义乌
+3118,宜兴
+1209,益阳
+3256,弋阳县
+2862,宜章县
+3153,仪征
+2046,宜州区
+1647,永安
+46845,永安街道
+84406,永安新村
+70133,永安镇
+2496,永城
+4637,永川区
+1633,永春县
+4450,永德县
+1595,永定区
+3206,永丰
+166482,永丰村
+71108,永丰村
+2023,永福县
+4601,永嘉县
+46644,永嘉镇
+4551,永康
+52882,永康镇
+2078,邕宁县
+4380,永仁县
+4510,永善县
+3936,永寿县
+95032,永顺村
+2924,永顺县
+1583,永泰县
+155918,永头村
+81901,永兴镇
+3207,永新县
+3225,永修县
+1210,永州
+27725,油墩街镇
+64199,油坑村
+47562,油田镇
+161468,邮亭村
+8106,邮亭镇
+2965,攸县
+1648,尤溪县
+2825,远安县
+153011,院垌村
+2930,沅江
+5452,源头镇
+2510,原阳县
+4419,元阳县
+5484,园洲镇
+2497,虞城县
+87370,悦城镇
+4066,岳池县
+8325,岳口镇
+91623,月岭村
+70224,月岭村
+1431,岳西县
+8336,岳溪镇
+1211,岳阳
+2953,岳阳楼区
+61962,月屿村
+2068,鱼峰区
+57058,余关乡
+158324,雨河镇
+4589,玉环
+100437,鱼化乡
+3277,余江区
+1324,榆林
+1931,云安区
+1930,郁南县
+8358,云表镇
+1314,运城
+1933,云城区
+2817,云梦县
+44273,云门镇
+8369,云潭镇
+4451,云县
+1665,云霄县
+2769,郧西县
+2770,郧阳区
+4639,云阳县
+61174,云阳镇
+2186,玉屏县
+2197,余庆县
+8204,郁山镇
+3255,玉山县
+125278,余田村
+8165,余田乡
+169843,鱼窝头
+1387,玉溪
+4569,余姚
+2531,禹州
+84583,皂市镇
+40752,皂市镇
+2807,枣阳
+1302,枣庄
+5186,泽国镇
+1812,增城区
+8516,寨沙镇
+8520,寨圩镇
+8468,渣江镇
+28245,闸口乡
+94873,闸口镇
+8593,张公庙镇
+8596,张沟镇
+3091,张家港
+1212,张家界
+57252,樟木村
+1782,樟木头镇
+103862,樟木乡
+5343,樟木镇
+1666,漳浦县
+3273,樟树
+1061,漳州
+36506,湛江镇
+8555,占陇镇
+1668,诏安县
+8804,赵店子镇
+35073,朝东镇
+84830,赵家镇
+46958,赵家镇
+2050,昭平县
+1094,肇庆
+1388,昭通
+8860,肇兴乡
+5515,闸坡镇
+2498,柘城县
+89423,浙川县
+47029,柘港乡
+60495,折弓乡
+3892,镇安县
+42224,镇安镇
+3885,镇巴县
+8918,郑店镇
+123702,震东乡
+2198,正安县
+88416,郑场镇
+91326,郑店
+1607,政和县
+1746,正宁县
+2566,正阳县
+1163,郑州
+1236,镇江
+8898,镇江镇
+57640,镇隆镇
+5279,镇隆镇
+44569,镇隆镇
+8904,镇宁堡乡
+2110,镇宁县
+76931,镇平村
+2470,镇平县
+3857,镇坪县
+4512,镇雄县
+2151,镇远县
+1617,柘荣县
+158226,者塘村
+5026,枝城镇
+2826,枝江
+2889,芷江县
+78235,芷江镇
+2120,织金县
+5597,织里镇
+155978,芝山镇
+28805,值夏镇
+58964,中坝镇
+2888,中方县
+1401,中国香港
+32797,中和镇
+4041,中江县
+28844,中平村
+99946,中平镇
+78230,钟山村
+2137,钟山区
+2051,钟山县
+1787,中堂镇
+8986,中垌镇
+4640,忠县
+2756,钟祥
+42237,忠信镇
+4661,中西区
+165117,中寨村
+158251,中寨镇
+83441,中寨镇
+43404,中寨子村
+55702,周安村
+42241,周安村
+9083,周党镇
+9098,周江镇
+1164,周口
+105924,周老镇
+9100,周老嘴镇
+9104,周鹿镇
+1616,周宁县
+40750,周旺铺村
+78354,周旺镇
+81895,砖店镇
+9322,转水镇
+9295,竹篙镇
+9296,竹沟镇
+9144,朱河镇
+4580,诸暨
+39807,珠街乡
+56715,朱街镇
+28192,珠街镇
+71867,朱里镇
+106166,竹山村
+155975,竹山村
+3214,珠山区
+2771,竹山县
+74619,珠山镇
+158249,竹山镇
+87979,竹市镇
+91621,竹市镇
+9290,株潭镇
+81814,竹溪村
+2772,竹溪县
+9301,竹溪乡
+116191,竹溪镇
+33477,竹园坝
+88170,竹园镇
+117300,竹园镇
+39408,竹园镇
+1213,株洲
+1303,淄博
+1345,自贡
+2827,秭归县
+82278,紫金镇
+52732,紫市镇
+4118,梓潼县
+103863,梓潼镇
+2864,资兴
+1344,资阳
+2932,资阳区
+2111,紫云县
+4134,资中县
+9417,陬市镇
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/files/city_cap.txt b/applications/common/scrapySpiders/wangModel/wangModel/files/city_cap.txt
new file mode 100644
index 0000000..ff90a66
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/files/city_cap.txt
@@ -0,0 +1,546 @@
+1096,珠海
+1095,中山
+1824,廉江
+1110,玉林
+1126,海口
+1078,佛山
+1106,柳州
+1082,江门
+1928,罗定
+1871,兴宁
+2174,兴义
+2099,北流
+1859,化州
+2095,岑溪
+1077,东莞
+1080,河源
+1084,茂名
+1107,南宁
+1087,汕头
+1092,云浮
+1097,百色
+1202,郴州
+1099,崇左
+1237,抚州
+1085,梅州
+4415,蒙自
+1119,遵义
+1113,贵阳
+1103,河池
+1081,惠州
+1876,连州
+1130,琼海
+1132,三亚
+1089,韶关
+1101,桂林
+1105,来宾
+1334,乐山
+1338,南充
+1900,南雄
+1845,台山
+1192,天门
+1207,湘潭
+1093,湛江
+1165,驻马店
+1111,安顺
+1150,安阳
+1098,北海
+4544,东阳
+1100,防城港
+3264,丰城
+38433,拱北
+1332,广安
+1102,贵港
+1185,黄石
+1239,吉安
+2922,吉首
+2143,凯里
+1129,陵水
+2015,荔浦
+1114,六盘水
+1895,陆丰
+1242,南昌
+1156,南阳
+1339,内江
+1167,濮阳
+1108,钦州
+22290,容桂
+2763,松滋
+1191,随州
+1196,襄阳
+1861,信宜
+1245,新余
+1342,雅安
+1922,阳春
+1246,宜春
+1400,重庆
+1315,安康
+2811,安陆
+1035,安庆
+1402,中国澳门
+46889,柏布
+88217,白庙
+1120,白沙
+1036,蚌埠
+1316,宝鸡
+1373,保山
+94650,八一
+97340,八一农场
+1327,巴中
+50984,北大
+35043,北栅
+1112,毕节
+1051,亳州
+1200,常德
+2527,长葛
+1575,长乐
+101301,长隆
+2867,常宁
+89263,长庆桥
+1201,长沙
+3086,常熟
+1304,长治
+1224,常州
+1037,巢湖
+1076,潮州
+92375,城东
+1328,成都
+48033,陈家
+2794,赤壁
+46535,赤水
+2189,赤水
+1038,池州
+4008,崇州
+97498,楚雄
+1374,楚雄州
+1039,滁州
+4564,慈溪
+2972,大安市
+16437,大付
+16050,达濠
+35101,大理
+1251,大连
+1375,大理州
+2820,当阳
+58664,大宁
+2766,丹江口
+39858,淡水
+3157,丹阳
+1137,儋州
+70592,大坪
+3384,大石桥
+1305,大同
+98594,大围
+2745,大冶
+1329,达州
+114691,德城
+2534,登封
+3350,灯塔
+169775,登云
+2460,邓州
+3247,德兴
+1330,德阳
+87196,东城
+116819,东城
+1125,东方
+95933,东方
+3312,东港
+69724,洞利
+3139,东台
+2007,东兴
+1289,东营
+2156,都匀
+4076,峨眉山
+1841,恩平市
+2727,恩施
+1183,恩施州
+1182,鄂州
+87907,凤凰
+5575,汾水道
+1609,福安
+1610,福鼎
+1576,福清
+2158,福泉
+1040,阜阳
+1053,福州
+1238,赣州
+3266,高安
+89238,高坪
+89195,高坡
+46631,高桥
+86519,高沙
+3150,高邮
+1858,高州
+4408,个旧
+43701,巩桥
+6253,共青城
+1773,莞城
+12853,广华
+71131,广华岭
+2776,广水
+1333,广元
+1079,广州
+31406,桂城
+2030,桂平
+3275,贵溪
+1274,固原
+3078,海安
+3079,海门
+2813,汉川
+1142,邯郸
+1389,杭州
+2792,汉口
+1317,汉中
+1151,鹤壁
+1041,合肥
+1010,河南
+1203,衡阳
+91610,河浦
+1842,鹤山
+2053,合山
+56623,河西
+1290,菏泽
+1104,贺州
+1378,红河州
+1199,洪湖
+2881,洪江
+1225,淮安
+1042,淮北
+152609,怀德
+1204,怀化
+1043,淮南
+1184,黄冈
+169664,黄桥
+72597,华阳
+4063,华蓥
+105908,胡集
+3775,霍州
+1390,湖州
+165462,加禾
+4517,建德
+150783,江口
+4612,江山
+3116,江阴
+1600,建瓯
+93346,尖沙咀
+32532,简阳
+1153,焦作
+1391,嘉兴
+1471,界首
+1083,揭阳
+1291,济南
+1306,晋城
+3200,井冈山
+4488,景洪
+3110,靖江
+1186,荆门
+2754,京山市
+1983,靖西
+1187,荆州
+1392,金华
+1292,济宁
+1629,晋江
+93532,金沙
+165284,金沙
+2837,津市
+1241,九江
+75482,九隆
+29675,九龙塘
+1152,济源市
+1154,开封
+1844,开平
+4413,开远
+4626,开州
+1379,昆明
+3087,昆山
+4127,阆中
+1068,兰州
+51436,老城
+2803,老河口
+1899,乐昌
+2874,耒阳
+1934,雷州
+2893,冷水江
+3212,乐平
+4595,乐清
+2894,涟源
+1226,连云港
+1294,聊城
+2731,利川
+1380,丽江
+2966,醴陵
+1381,临沧
+1308,临汾
+2484,灵宝
+4583,临海
+2981,临江
+2947,临湘
+1295,临沂
+2411,林州市
+1393,丽水
+1045,六安
+91242,流沙
+2848,浏阳
+3042,溧阳
+46917,李寨
+4131,隆昌
+146450,龙港市
+1662,龙海
+110469,龙江
+3703,龙口
+80393,龙门
+89567,龙南
+48189,龙山
+46852,龙溪
+1054,龙岩
+1205,娄底
+1166,漯河
+32770,罗家桥
+1155,洛阳
+1346,泸州
+1046,马鞍山
+165292,马场坪
+2738,麻城
+168717,马达
+158172,马家湾
+67502,马街
+35741,马山
+56234,马牙
+1336,眉山
+1337,绵阳
+4416,弥勒
+2952,汨罗
+37202,明港
+1630,南安
+1774,南城
+1227,南京
+1055,南平
+1228,南通
+39982,南溪
+48675,南阳油田
+1394,宁波
+1056,宁德
+146128,排坊
+1340,攀枝花
+2135,盘州
+98804,炮台
+4012,彭州
+1157,平顶山
+4538,平湖
+1071,平凉
+125633,平南
+2004,凭祥
+1384,普洱
+1854,普宁
+146039,埔前
+105674,蒲圻
+1057,莆田
+1115,黔东南州
+1188,潜江
+1116,黔南州
+1117,黔西南州
+3081,启东
+1296,青岛
+46886,清华园
+57029,青塘
+70656,青塘
+1072,庆阳
+1086,清远
+3691,青州
+2426,沁阳
+4016,邛崃
+1058,泉州
+3624,曲阜
+1383,曲靖
+1399,衢州
+39943,仁和
+2192,仁怀
+1297,日照
+3083,如皋
+4597,瑞安
+3188,瑞金
+4400,瑞丽
+2477,汝州
+153058,三角
+1158,三门峡
+1059,三明
+5462,沙井
+1325,上海
+1318,商洛
+88284,上坪
+1159,商丘
+1244,上饶
+98525,上水
+1088,汕尾
+2912,韶山
+1603,邵武
+1395,绍兴
+1206,邵阳
+45637,沙市
+89209,沙子坪
+87990,沙子坪
+4142,射洪
+4581,嵊州
+1090,深圳
+1146,石家庄
+105651,市桥
+1632,石狮
+2762,石首
+91667,石窝
+1190,十堰
+31648,狮子林
+154824,石子岭
+44726,寿宁南阳
+89050,水头
+152613,树田
+1948,四会
+168073,思州
+1341,遂宁
+87954,素龙
+1230,宿迁
+1229,苏州
+1047,宿州
+1298,泰安
+3089,太仓
+45949,太平
+3679,泰山
+3111,泰兴
+1311,太原
+1231,泰州
+1396,台州
+57394,太子
+98911,唐家
+98965,塘田
+165293,塘湾
+100219,桃源
+3713,滕州
+1465,天长
+48577,天河
+1347,天津
+1073,天水
+1429,桐城
+1319,铜川
+1048,铜陵
+1118,铜仁
+31581,外海
+64256,外沙
+46794,旺角
+1775,万江
+1134,万宁
+4033,万源
+1300,潍坊
+1299,威海
+2506,卫辉
+1320,渭南
+1135,文昌
+4587,温岭
+36178,文渠
+1385,文山
+4484,文山市
+1397,温州
+1936,吴川
+2906,武冈
+2478,舞钢
+1972,五桂山
+1270,乌海
+1193,武汉
+1049,芜湖
+112884,五块石
+84430,五里牌
+23104,五桥
+11313,五山
+1232,无锡
+2740,武穴
+1276,吴忠
+1109,梧州
+23548,夏涝池
+1060,厦门
+1321,西安
+2555,项城
+46317,响水
+1208,湘西
+1195,咸宁
+1194,仙桃
+1322,咸阳
+1197,孝感
+165574,小鸡街
+4098,西昌
+46875,西冲
+154418,新渡口
+52172,新风
+1273,兴安盟
+3113,兴化
+3934,兴平
+2173,兴仁市
+2342,新乐
+2536,新密
+61882,新天寨
+1161,信阳
+3129,新沂
+2537,新郑
+106380,秀山
+1050,宣城
+1162,许昌
+1233,徐州
+46892,雅居乐
+1323,延安
+1234,盐城
+1091,阳江
+97479,杨林
+4830,杨梅
+1313,阳泉
+87148,杨市
+60057,杨田
+3159,扬中
+1235,扬州
+1301,烟台
+1343,宜宾
+1198,宜昌
+2806,宜城
+1180,伊春
+2824,宜都
+2488,义马
+2816,应城
+1880,英德
+61157,应山
+1247,鹰潭
+4550,义乌
+3118,宜兴
+1209,益阳
+3153,仪征
+1647,永安
+46845,永安街道
+2496,永城
+3206,永丰
+4551,永康
+1210,永州
+2930,沅江
+1211,岳阳
+4589,玉环
+1324,榆林
+1314,运城
+169843,鱼窝头
+1387,玉溪
+4569,余姚
+2531,禹州
+2807,枣阳
+1302,枣庄
+3091,张家港
+1212,张家界
+3273,樟树
+1061,漳州
+1094,肇庆
+1388,昭通
+91326,郑店
+1163,郑州
+2826,枝江
+1401,中国香港
+2756,钟祥
+1164,周口
+4580,诸暨
+33477,竹园坝
+1213,株洲
+1303,淄博
+1345,自贡
+2864,资兴
+1344,资阳
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/files/scenic b/applications/common/scrapySpiders/wangModel/wangModel/files/scenic
new file mode 100644
index 0000000..61eaad8
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/files/scenic
@@ -0,0 +1,95 @@
+1,桂桂林漓江景区
+2,桂林两江四湖象山景区
+3,七星景区
+4,桂林穿山景区
+5,桂林尧山景区
+6,桂林市南溪山景区
+7,桂万福广场休闲旅游城
+8,桂林市瓦窑小镇景区
+9,万象山区侗情水庄景区
+10,海之鑫洞藏酒文化馆
+11,桂林独秀峰王城景区
+12,芦笛景区
+13,桂林经典刘三姐大观园景区
+14,桂林西山景区
+15,桂林桂花公社景区景区
+16,桂林芦笛岩鸡血玉文化艺术中心景区
+17,桂林冠岩景区
+18,桂林愚自乐园艺术园艺术中心景区
+19,桂林旅苑景区
+20,桂林市神龙水世界景区
+21,桂林多耶古寨蛇王李景区
+22,桂林在水一汸景区
+23,桂林新区环城水系景区景区
+24,桂林罗山湖玛雅水上乐园景区
+25,桂林红溪景区
+26,桂林黄沙秘境大峡谷景区园景区
+27,美国飞虎队桂林遗址公园
+28,李宗仁故居区
+29,会仙喀斯特国家湿地公园景区园
+30,临桂十二滩漂流景区
+31,抱璞文化展示中心园景区
+32,桂林崇华中医街
+33,一院两馆景区
+34,佑子湾民俗风情园景区
+35,桂林世外桃源旅游区
+35,阳朔图腾古道聚龙潭景区
+37,桂林阳朔县蝴蝶泉旅游景区
+38,阳朔西街景区景区
+39,阳朔三千漓中国山水人文度假区景区
+40,桂林乐满地休闲世界
+41, 红军长征突破湘江烈士纪念碑园景区(红色景区)文度假区
+42,兴安灵渠景区
+43,桂林市猫儿山景区纪念碑园景区
+44,桂林龙胜温泉旅游度假区
+45,龙胜龙脊梯田景区
+46,龙胜县白面瑶寨景区(红色景区)区
+47,龙胜艺江南中国红玉文化园景区
+48,桂林银子岩旅游度假区
+49,桂林丰鱼岩旅游度假区化园景区
+50,荔浦荔江湾景区
+51,荔浦县马岭鼓寨民族风情园
+52,荔浦天河瀑布景区
+53,荔浦县柘村景区情园
+54,恭城三庙两馆景区
+55,恭城红岩村景区
+56,黄岭景区
+57,杨溪景区
+58,瑶族文化村景区
+59,北洞源景区
+60,恭城龙虎关景区
+61,恭城矮寨景区
+62,恭城社山景区
+63,红军长征湘江战役新圩狙击战纪念园(红色景区)
+64,灌阳千家洞文旅度假区
+65,灌阳唐景崧故里景区狙击战纪念园
+66,灌阳茶博园
+67,灌阳神农稻博园
+68,灌阳洞井古民居景区
+69,灌阳都庞岭大峡谷景区
+70,灌阳文市石林景区
+71,灵川县大圩古镇景区
+72,灵川县漓水人家景区
+73,桂林古东瀑布景区
+74,桂林市逍遥湖景区
+75,桂林希宇欢乐城景区
+76,八路军桂林办事处路莫村物资转运站景区(红色景区)
+77,灵川龙门瀑布景区
+78,灵川县江头景区村物资转运站景区
+79,平乐仙家温泉景区
+80,大碧头国际旅游度假区
+81,红军长征湘江战役纪念园(红色景区)
+82,桂林全州县湘山·湘源历史文化旅游区
+83,桂林国际茶花谷旅游休闲度假区园
+84,桂林湘山酿酒生态园景区 史文化旅游区
+85,全州县炎井温泉闲度假区
+86,永福金钟山旅游度假区区
+87,永福县凤山景区
+88,罗汉果小镇
+89,桂林资江·天门山景区
+90,桂林八角寨景区
+91,桂林资江灯谷景区
+92,资源县宝鼎景区
+93,资源县塘洞景区(红色景区)
+
+
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/items.py b/applications/common/scrapySpiders/wangModel/wangModel/items.py
new file mode 100644
index 0000000..7e0ca1e
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/items.py
@@ -0,0 +1,72 @@
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+# 途牛的景点信息和评价
+class WangmodelItem(scrapy.Item):
+ # define the fields for your item here like:
+ id = scrapy.Field() #景点id
+ name = scrapy.Field() #景点名称
+ begin_price = scrapy.Field() #起步价
+ satisfy_present = scrapy.Field() #满意度
+ remarkAmount = scrapy.Field() #评价总数
+ compGrade3Amount = scrapy.Field() #评价满意的人数
+ compGrade2Amount = scrapy.Field() #评价一般的人数
+ compGrade1Amount = scrapy.Field() #评价不满意的人数
+ img = scrapy.Field() #景点图片封面
+ address = scrapy.Field() #景点地址
+ time_arrange = scrapy.Field() #开放时间等
+ commentlist = scrapy.Field() #评论
+
+#途牛酒店信息和酒店评价
+class TuniuhotelItem(scrapy.Item):
+ id=scrapy.Field()
+ hname=scrapy.Field()
+ starname=scrapy.Field()
+ hpic=scrapy.Field()
+ haddress=scrapy.Field()
+ business=scrapy.Field()
+ distance=scrapy.Field()
+ hlowstprice=scrapy.Field()
+ hcomments=scrapy.Field()
+ others=scrapy.Field()
+
+class HornetNestNoteItem(scrapy.Item):
+ id = scrapy.Field()
+ url=scrapy.Field()
+ title = scrapy.Field()
+ total=scrapy.Field()
+ see=scrapy.Field()
+ collect=scrapy.Field()
+ commentNum=scrapy.Field()
+
+class WeiboItem(scrapy.Item):
+ id=scrapy.Field()
+ userid=scrapy.Field()
+ screen_name=scrapy.Field()
+ fins=scrapy.Field()
+ artilelist=scrapy.Field()
+ total_artiles=scrapy.Field()
+
+
+class TongchenTrainItem(scrapy.Item):
+ id = scrapy.Field() #id
+ site = scrapy.Field() #id
+ place_from = scrapy.Field()
+ place_to = scrapy.Field()
+ date = scrapy.Field()
+ total_count = scrapy.Field()
+ from_station=scrapy.Field()
+ to_station=scrapy.Field()
+ from_time=scrapy.Field()
+ to_time=scrapy.Field()
+ seat_name=scrapy.Field()
+ seat_price=scrapy.Field()
+ seats_left=scrapy.Field()
+ type=scrapy.Field()
+ status=scrapy.Field()
+
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/middlewares.py b/applications/common/scrapySpiders/wangModel/wangModel/middlewares.py
new file mode 100644
index 0000000..1c72e5c
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/middlewares.py
@@ -0,0 +1,56 @@
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+
+from scrapy import signals
+import time
+import logging
+# useful for handling different item types with a single interface
+from scrapy.utils.project import get_project_settings
+import random
+
+
+# 定于随机请求头
+class RandowSpiderMiddleware(object):
+ def process_request(self, request, spider):
+ # pass
+ settings = get_project_settings()
+ user_agent = settings["USER_AGENT_LIST"]
+ # print(user_agent)
+ # 随机选择请求头
+ ua = random.choice(user_agent)
+ request.headers['USER-AGENT'] = ua
+
+
+# 定义随机IP
+class RandowProxy(object):
+ def process_request(self, request, spider):
+ settings = get_project_settings()
+ proxy_list = settings["PROXY_LIST"]
+ print("代理列表: ", proxy_list)
+ proxy = random.choice(proxy_list)
+ print("代理: ", proxy['ip_port'])
+
+
+# 设置随机延时
+class RandomDelayMiddleware(object):
+ def __init__(self, delay):
+ self.delay = delay
+
+ @classmethod
+ def from_crawler(cls, crawler):
+ delay = crawler.spider.settings.get("DOWNLOAD_DELAY", 3) # setting里设置的时间,注释默认为1s
+ if not isinstance(delay, int):
+ raise ValueError("RANDOM_DELAY need a int")
+ return cls(delay)
+
+ def process_request(self, request, spider):
+ delay = random.randint(0, self.delay)
+ logging.debug("### random delay: %s s ###" % delay)
+ time.sleep(delay)
+
+
+
+
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/pipelines.py b/applications/common/scrapySpiders/wangModel/wangModel/pipelines.py
new file mode 100644
index 0000000..9bb8f15
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/pipelines.py
@@ -0,0 +1,211 @@
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+
+
+# useful for handling different item types with a single interface
+import datetime
+import uuid
+
+from itemadapter import ItemAdapter
+import csv
+from scrapy.exceptions import DropItem
+import happybase
+import json
+from wangModel.items import WangmodelItem
+from wangModel.items import TuniuhotelItem
+from wangModel.items import WeiboItem
+from wangModel.items import TongchenTrainItem
+from wangModel.utils.mysqlConn import insert,update
+
+from wangModel.utils.HbaseConn import HbaseUtil
+
+
+class DuplicatesPipeline(object):
+ """
+ 去重
+ """
+ def __init__(self):
+ self.book_set = set()
+
+ def process_item(self, item, spider):
+ name = item['id']
+ if name in self.book_set:
+ raise DropItem("Duplicate book found:%s" % item)
+
+ self.book_set.add(name)
+ return item
+
+
+class WangmodelPipeline(object):
+ def process_item(self, item, spider):
+ f = open('test.csv', 'a+', encoding='utf-8')
+ csv_writer = csv.writer(f)
+ csv_writer.writerow([item['name'], item['begin_price'],item['satisfy_present'],item['img'],item['address'],item['time_arrange']])
+ f.close()
+ return item
+
+class tuniuHBasePipeline(object):
+ def __init__(self):
+ # host = '192.168.174.129'
+ host = '202.193.53.106'
+ table_name1 = 'tuniu_scenic'
+ table_name2 = 'scenic_hotel'
+ hbase=HbaseUtil(host)
+ self.hbase=hbase
+ self.tablename1=table_name1
+ self.tablename2=table_name2
+
+ def process_item(self, item, spider):
+ """
+ 存储途牛景点数据
+ :param item:
+ :param spider:
+ :return:
+ """
+ if isinstance(item,WangmodelItem):
+ host = '202.193.53.106'
+ hbase = HbaseUtil(host)
+ sql="INSERT INTO scenic_comment(scenicId,scenicName,satisfy_present,num,good,middle,bad,crawlTime,siteFrom) select %s,%s,%s,%s,%s,%s,%s,%s,%s from dual where not exists (select scenicName,crawlTime,siteFrom from scenic_comment where scenicName=%s and crawlTime=%s and siteFrom='途牛');"
+ insert(sql,(item['id'],item['name'],item['satisfy_present'],item['remarkAmount'],item['compGrade3Amount'],item['compGrade2Amount'],item['compGrade1Amount'],datetime.date.today(),"途牛",item['name'],datetime.date.today()))
+ id = item['id']
+ commentlist=[]
+ # print(item)
+ commentlist=item['commentlist']
+ obj={}
+ if len(commentlist)>0:
+ for data in commentlist:
+ userId=str(data['userId'])
+ userName=str(data['userName'])
+ content=str(data['content'])
+ if data['subCompGrade'] is not None:
+ others={}
+ for k,v in data['subCompGrade'].items():
+ others[k]=str(v)
+ remarkSatisfaction=str(data['remarkSatisfaction'])
+ compGrade=str(data['compGrade'])
+ key=uuid.uuid1().hex
+ print(others)
+ wibsite='途牛'
+ putInfo={
+ "info:userid": userId,
+ "info:username": userName,
+ "info:scenicid": str(item['id']),
+ "info:scenicname": item['name'],
+ "info:content": content,
+ "info:others": str(others),
+ "info:satisfaction": remarkSatisfaction,
+ "info:compgrade": compGrade,
+ "info:datafrom": "途牛",
+ "info:postDate": data['remarkTime']
+
+ }
+ print(putInfo)
+ try:
+ self.hbase.batchTable("scenics_comment",str(key),putInfo)
+ except:
+ self.hbase.closeCon()
+ hbase = HbaseUtil('202.193.53.106')
+ hbase.batchTable("scenics_comment",str(key),putInfo)
+
+
+ """
+ 存储途牛酒店数据
+ """
+ elif isinstance(item,TuniuhotelItem):
+ for child in item['hcomments']:
+ print("存入Hbase",child)
+ userId=child['reviewerId']
+ userName=child['reviewerName']
+ content=child['content']
+ score=str(child['score'])
+ remarkTime=child['remarkTime']
+
+ try:
+ self.hbase.batchTable("hotel_comments", str(uuid.uuid1().hex),
+ {
+ 'info:hid': str(item['id']),
+ 'info:hname': item['hname'],
+ 'info:userid': userId,
+ 'info:username': userName,
+ 'info:content': content,
+ 'info:score': score,
+ 'info:postDate': remarkTime,
+ })
+ except:
+ self.hbase.closeCon()
+ hbase = HbaseUtil('202.193.53.106')
+ hbase.batchTable("scenics_comment", str(key), putInfo)
+ hbase.batchTable("hotel_comments",str(uuid.uuid1().hex),
+ {
+ 'info:hid':str(item['id']),
+ 'info:hname':item['hname'],
+ 'info:userid':userId,
+ 'info:username':userName,
+ 'info:content':content,
+ 'info:score':score,
+ 'info:postDate':remarkTime,
+ })
+ """
+ 存储微博数据:桂林官方旅游微博每一条文章
+ """
+ elif isinstance(item, WeiboItem):
+ print("存储该页的微博文章",item['artilelist'])
+ for artile_content in item['artilelist']:
+ self.hbase.putTable("weibo",artile_content['artile_id'],{
+ 'info:userid':str(item['userid']),
+ 'info:screen_name':item['screen_name'],
+ 'info:fins':item['fins'],
+ 'info:total_artiles':str(item['total_artiles']),
+ 'info:artile_id':artile_content['artile_id'],
+ 'info:attitudes_count':artile_content['attitudes_count'],
+ 'info:comments_count':artile_content['comments_count'],
+ 'info:reposts_count':artile_content['reposts_count'],
+ 'info:postDate':artile_content['postDate'],
+ 'info:text':artile_content['text']
+ })
+ """
+ 存储同城旅游
+ """
+
+ elif isinstance(item, TongchenTrainItem):
+ print("获取对象",item)
+ host = '202.193.53.106'
+ hbase = HbaseUtil(host)
+ try:
+ hbase.batchTable("leftticket", item['id'], {
+ 'info:place_from': item['place_from'], #出发城市
+ 'info:place_to': item['place_to'], #抵达城市:桂林
+ 'info:date': item['date'], #时间
+ 'info:total_count': item['total_count'], #一个城市到桂林的车次数目
+ 'info:from_station': item['from_station'], #出发车站名
+ 'info:to_station': item['to_station'], #抵达车站名
+ 'info:type': item['type'], #乘坐类型:火车/客车
+ 'info:from_time': item['from_time'],#出发时间
+ 'info:to_time': item['to_time'], #到达时间
+ 'info:seat_name': item['seat_name'], #座位名,火车有特等坐,客车没有就不用插
+ 'info:seat_price': item['seat_price'], #座位价格
+ 'info:seats_left': item['seats_left'], #剩余票数
+ 'info:status': item['status'] #座位状态
+ })
+ except:
+ hbase.closeCon()
+ hbase=HbaseUtil(host)
+ hbase.batchTable("leftticket", item['id'], {
+ 'info:place_from': item['place_from'], # 出发城市
+ 'info:place_to': item['place_to'], # 抵达城市:桂林
+ 'info:date': item['date'], # 时间
+ 'info:total_count': item['total_count'], # 一个城市到桂林的车次数目
+ 'info:from_station': item['from_station'], # 出发车站名
+ 'info:to_station': item['to_station'], # 抵达车站名
+ 'info:type': item['type'], # 乘坐类型:火车/客车
+ 'info:from_time': item['from_time'], # 出发时间
+ 'info:to_time': item['to_time'], # 到达时间
+ 'info:seat_name': item['seat_name'], # 座位名,火车有特等坐,客车没有就不用插
+ 'info:seat_price': item['seat_price'], # 座位价格
+ 'info:seats_left': item['seats_left'], # 剩余票数
+ 'info:status': item['status'] # 座位状态
+ })
+ return item
+
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/readme.md b/applications/common/scrapySpiders/wangModel/wangModel/readme.md
new file mode 100644
index 0000000..e1266a1
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/readme.md
@@ -0,0 +1,11 @@
+### 创建csrapy项目
+scrapy startproject 项目名
+
+### cd spider文件夹,创建爬虫
+scrapy genspider 爬虫名称 "域名"
+
+###运行命令
+scrapy crawl 爬虫名
+
+#所有爬虫执行入口文件
+wangModel/spiders/main.py
\ No newline at end of file
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/settings.py b/applications/common/scrapySpiders/wangModel/wangModel/settings.py
new file mode 100644
index 0000000..7580dde
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/settings.py
@@ -0,0 +1,135 @@
+# Scrapy settings for wangModel project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+# https://docs.scrapy.org/en/latest/topics/settings.html
+# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'wangModel'
+
+SPIDER_MODULES = ['wangModel.spiders']
+NEWSPIDER_MODULE = 'wangModel.spiders'
+DOWNLOAD_DELAY = 3 #下载延迟3秒
+DOWNLOAD_TIMEOUT = 60 #超时下载
+#请求头列表
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
+#请求头列表
+USER_AGENT_LIST=[
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
+ 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36 QIHU 360EE/13.0.2256.0',
+ 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0',
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19041',
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER',
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)',
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36',
+
+ "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 10.0; Trident/6.0)",
+ "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2752.40 Safari/537.36",
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 5.1; WOW64; Trident/5.0)",
+ "Mozilla/5.0 (Windows NT 5.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0"
+]
+# Obey robots.txt rules
+# ROBOTSTXT_OBEY = False
+PROXY_LIST=[
+ {"ip_port":"49.87.250.13:4325"},
+ {"ip_port":"114.106.173.42:4313"},
+ {"ip_port":"115.239.16.241:4314"},
+ {"ip_port":"183.165.249.249:4310"},
+ {"ip_port":"182.128.45.57:4315"},
+ {"ip_port":"183.154.221.57:4356"},
+ {"ip_port":"114.233.169.249:4313"},
+ {"ip_port":"124.161.212.165:4358"},
+ {"ip_port":"114.239.29.114:4345"},
+ {"ip_port":"220.201.85.63:4331"},
+ {"ip_port":"113.243.33.56:4343"},
+ {"ip_port":"113.65.125.60:4386"},
+ {"ip_port":"114.103.89.96:4354"},
+ {"ip_port":"115.209.123.141:4326"},
+ {"ip_port":"42.56.3.70:4361"},
+
+
+]
+
+URLLENGTH_LIMIT = 5000 #设置请求url最大长度
+HTTPERROR_ALLOWED_CODES = [521]
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+# DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+# 'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+# 'wangModel.middlewares.WangmodelSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+DOWNLOADER_MIDDLEWARES = {
+ 'wangModel.middlewares.RandowProxy': 543, #随机代理中间件
+ 'wangModel.middlewares.RandowSpiderMiddleware': 543, #随机请求头中间件
+ 'wangModel.middlewares.RandomDelayMiddleware': 150, #设置延时
+}
+custom_settings = {
+ "RANDOM_DELAY": 3,
+ "DOWNLOADER_MIDDLEWARES": {
+ 'wangModel.middlewares.RandomDelayMiddleware': 150, #设置延时
+ }
+ }
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+# 'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+ 'wangModel.pipelines.tuniuHBasePipeline': 300,
+ 'wangModel.pipelines.DuplicatesPipeline': 280,
+}
+FEED_EXPORT_ENCODING='utf-8'
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/__init__.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/__init__.py
new file mode 100644
index 0000000..ebd689a
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/__init__.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/a.html b/applications/common/scrapySpiders/wangModel/wangModel/spiders/a.html
new file mode 100644
index 0000000..135d0bc
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/a.html
@@ -0,0 +1,163 @@
+上海到北京火车票预订与代购-高铁票价,动车票价-高铁订票,动车订票网-携程火车票订购中心上海-北京 单程 2022-11-30 (共16车次)
11-29
今天
11-30
明天
12-01
周四
12-02
周五
12-03
周六
12-04
周日
中转方案推荐
650
- 第一程 G234 二等座有票
- 第二程 G36 二等座有票
- 二等座21张
146
- 一等座无票
247
- 商务座无票
490
- 二等座21张
504
- 一等座21张
832
- 商务座无票
1735
617
- 第一程 G1826 二等座有票
- 第二程 G40 二等座有票
- 二等座21张
135
- 一等座21张
228
- 商务座7张
428
- 二等座21张
482
- 一等座21张
809
- 商务座14张
1735
218
- 第一程 Z172 硬座有票
- 第二程 C2018 二等座有票
- 硬座7张
163.5
- 硬卧17张
280.5
- 软卧无票
438.5
- 无座无票
163.5
- 二等座21张
54.5
- 一等座14张
88
- 商务座无票
174
564
- 第一程 G8358 二等座有票
- 第二程 G2582 二等座有票
- 二等座有票
118
- 一等座有票
186
- 商务座10张
374
- 无座无票
118
- 二等座21张
446
- 一等座15张
739
- 商务座10张
1348
560
- 第一程 D2282 二等座有票
- 第二程 G102 二等座有票
- 二等座21张
115
- 一等座20张
184
- 无座无票
115
- 二等座有票
445
- 一等座有票
747
- 商务座10张
1405
查看更多中转方案
- 高铁(G/C)
- 动车(D)
- 普通(Z/T/K)
- 其他(L/Y)
-
00:00
-
06:00
-
06:00
-
12:00
-
12:00
-
18:00
-
18:00
-
24:00
-
00:00
-
06:00
-
06:00
-
12:00
-
12:00
-
18:00
-
18:00
-
24:00
展开
\ No newline at end of file
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/gw.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/gw.py
new file mode 100644
index 0000000..d0ac153
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/gw.py
@@ -0,0 +1,58 @@
+import scrapy
+import json
+import datetime
+import uuid
+import time
+from urllib import parse
+from wangModel.items import TongchenTrainItem
+
+class GwSpider(scrapy.Spider):
+ name = 'gw'
+ allowed_domains = ['qunar.com']
+ start_urls = ['http://qunar.com/']
+ today = str(datetime.date.today())
+ def parse(self, response):
+ with open("../files/city_cap.txt", encoding="utf-8") as f:
+ for cityInfo in f:
+ city_name = cityInfo.split(",")[1]
+ kw = parse.quote(city_name.strip())
+ currentTime = str(round(time.time() * 1000))
+ time.sleep(2)
+ url="https://train.qunar.com/dict/open/s2s.do?callback=jQuery172018610690190401646_1669725079415&dptStation="+kw+"&arrStation=%E6%A1%82%E6%9E%97&date="+str(datetime.date.today())+"&type=normal&user=neibu&source=site&start=1&num=500&sort=3&_="+currentTime
+ yield scrapy.Request(
+ url=url,
+ callback=self.parseItem
+ )
+
+ def parseItem(self, response):
+ item=TongchenTrainItem()
+ data = response.text
+ index = data.index("{") # 获取第一个大括号所在的索引位置
+ result = json.loads(response.text[index:-2])
+ if result['ret']:
+ info = result['data']
+ print(info)
+ item['site'] = "去哪儿旅行" # 爬取站点
+ item['place_from'] = info['dptStation'] # 出发城市
+ item['place_to'] = info['arrStation'] # 目的城市
+ item['date'] = str(self.today) # 出发日期
+ item['id'] = str(uuid.uuid1().hex)
+
+ item['total_count'] = str(len(info['s2sBeanList'])) # 车次数量
+ train_list=info['s2sBeanList']
+ if len(info['s2sBeanList'])>0:
+ for train in train_list: # 循环车次
+ item['from_station'] = train['dptStationName'] # 开始车站
+ item['to_station'] = train['arrStationName'] # 到达车站
+ item['from_time'] = train['dptTime'] # 启程时间
+ item['to_time'] = train['arrTime'] # 到站时间
+ item['status'] = train['note'] # 状态
+ ticketState = train['seats']
+ for seatType, content in ticketState.items(): # 循环一个车次中的座位情况
+ print(seatType,content)
+ item['seat_name'] = content['seatName'] # 座位等级
+ item['seat_price'] = str(content['price']) # 座位价格
+ item['seats_left'] = str(content['count']) # 座位剩余数目
+ item['type'] = "火车"
+ print(item)
+ yield item
\ No newline at end of file
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/main.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/main.py
new file mode 100644
index 0000000..54c4779
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/main.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> 运行爬虫文件
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-12-11 14:20
+@Desc
+=================================================='''
+from scrapy.cmdline import execute
+from wangModel.common_spiders.baidusearch import BaiduSpider
+from wangModel.common_spiders.baiduacc import baiduacc
+from wangModel.common_spiders.baiduwords import BaiDuWords
+from wangModel.common_spiders.weibosign import WeiboSignSpider
+from wangModel.common_spiders.tuniu_route import temp
+
+from scrapy.crawler import CrawlerProcess
+from scrapy.utils.project import get_project_settings
+import os
+import sys
+
+settings = get_project_settings()
+crawler = CrawlerProcess(settings)
+
+#scrapy爬虫
+crawler.crawl('tongchen')
+crawler.crawl('tuniu_scenic')
+crawler.crawl('tuniu_hotel')
+crawler.crawl('weibo')
+
+crawler.start()
+crawler.start()
+crawler.start()
+crawler.start()
+
+
+# 爬取普通爬虫
+""" 1.百度指数"""
+object=baiduacc()
+object.parse1()
+
+""" 2.百度搜索"""
+run = BaiduSpider()
+run.parse()
+
+"""3.百度词条"""
+baiduWord = BaiDuWords()
+baiduWord.run()
+
+"""4.微博签到"""
+web = WeiboSignSpider()
+web.run()
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/tongchen.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/tongchen.py
new file mode 100644
index 0000000..3ba64b6
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/tongchen.py
@@ -0,0 +1,137 @@
+import time
+import uuid
+
+import scrapy
+from urllib import parse
+from wangModel.items import TongchenTrainItem
+import json
+import datetime
+from scrapy.http import JsonRequest
+
+from wangModel.utils.HbaseConn import HbaseUtil
+"""
+同城旅行爬取今日火车票
+"""
+class TongchenSpider(scrapy.Spider):
+ name = 'tongchen'
+ allowed_domains = ['ly.com']
+ start_urls = ['http://ly.com/']
+ today = str(datetime.date.today())
+
+
+
+ def start_requests(self):
+ with open("../files/city_cap.txt",encoding="utf-8") as f:
+ car_url = "https://bus.ly.com/busresapi/schedule/getScheduleList?plateId=3"
+ for cityInfo in f:
+ item = TongchenTrainItem()
+ city_id=cityInfo.split(",")[0]
+ city_name=cityInfo.split(",")[1]
+ kw = parse.quote(city_name.strip())
+ currentTime=str(round(time.time()*1000))
+ print("-------------正在爬取的城市是 %s-----------------"%kw)
+ train_url = "https://www.ly.com/uniontrain/trainapi/TrainPCCommon/SearchTrainRemainderTickets?callback=jQuery18305629279457315504_1668857363483¶={%22To%22:%22%E6%A1%82%E6%9E%97%22,%22From%22:%22" + kw + "%22,%22TrainDate%22:%22" + self.today + "%22,%22PassType%22:%22%22,%22TrainClass%22:%22%22,%22FromTimeSlot%22:%22%22,%22ToTimeSlot%22:%22%22,%22FromStation%22:%22%22,%22ToStation%22:%22%22,%22SortBy%22:%22fromTime%22,%22callback%22:%22%22,%22tag%22:%22%22,%22memberId%22:%22%22,%22constId%22:%22TzXdqT-dUJYltDmsdvGtjh4huQTPXw1489UB3g7-exI%22,%22headct%22:%220%22,%22platId%22:1,%22headver%22:%221.0.0%22,%22headtime%22:1668590089068}&_="+currentTime
+
+
+ """
+ 1.爬取火车票剩余数量情况
+ """
+ yield scrapy.Request(
+ url=train_url,
+ callback=self.parse_item,
+ dont_filter=False,
+ meta={"item": item}
+ )
+
+ json_request={
+ "departure": city_name.strip(),
+ "destination": "桂林",
+ "departureDate": self.today,
+ "depId": city_id, #城市id
+ "desId": 1101,
+ "page": 1,
+ "pageSize": 25,
+ "orderTime": 0,
+ "orderPrice": 0,
+ "dptTimeSpan": "",
+ "departureStation": "",
+ "arrivalStation": "",
+ "hasCategory": True
+ }
+ """
+ 2.爬取汽车票剩余票数情况
+ """
+ yield JsonRequest(
+ url=car_url,
+ callback=self.parse_car,
+ data=json_request,
+ dont_filter=False,
+ meta={"item":item,"departure":city_name.strip()}
+ )
+
+ """
+ 解析火车票
+ """
+ def parse_item(self, response):
+ item=response.meta.get('item')
+ data=response.text
+
+ index=data.index("{") #获取第一个大括号所在的索引位置
+ result=json.loads(response.text[index:-1])
+ info=result['data']
+ print("网页响应火车票数据",info)
+ if info is not None:
+ flag = result['data']['trains']
+ if len(flag)>0:
+ item['site']="同城旅行" #爬取站点
+ item['place_from']=info['from'] #出发城市
+ item['place_to']=info['to'] #目的城市
+ item['date']= str(self.today) #出发日期
+ item['total_count']=str(info['totalCount']) #车次数量
+ train_list=result['data']['trains']
+ list=[]
+ for train in train_list: #循环车次
+ item['from_station']=train['fromCity'] #开始车站
+ item['to_station']=train['toCity'] #到达车站
+ item['from_time']=train['fromTime'] #启程时间
+ item['to_time']=train['toTime'] #到站时间
+ ticketState= train['ticketState']
+
+ for seatType,content in ticketState.items(): #循环一个车次中的座位情况
+ item['id'] = str(uuid.uuid1().hex)
+ item['seat_name']=content['cn'] #座位等级
+ item['seat_price']=content['price'] #座位价格
+ item['seats_left']=content['seats'] #座位剩余数目
+ item['type']="火车"
+ item['status']=content['state'] #状态 1表示有票,表示无票
+ yield item
+ # print("火车票",item)
+
+ "解析汽车票"
+ def parse_car(self, response):
+ item = response.meta.get('item')
+ departure=response.meta.get('departure')
+ responseData=response.json()
+ status=responseData['header']['isSuccess']
+ # print("汽车票网页响应数据",responseData)
+ if status==True:
+ print("汽车票网页响应数据", responseData)
+ cars_list=responseData['body']['schedule']
+ item['site'] = "同城旅行" # 爬取站点
+ item['place_from'] = departure # 出发地
+ item['place_to'] = "桂林" # 目的地
+ item['date'] = str(self.today) # 出发日期
+ item['type']="客车"
+ item['total_count'] = str(len(cars_list)) # 车次数量
+ for car in cars_list:
+ item['id'] = str(uuid.uuid1().hex)
+ item['seat_name'] = "" # 座位等级ticketPrice
+ item['seat_price'] = str(car['ticketPrice']) # 座位价格
+ item['from_station']=car['dptStation']
+ item['to_station']=car['arrStation']
+ item['from_time']=car['dptTime']
+ item['to_time']=""
+ item['seats_left']=car['ticketLeft']
+ item['status']=car['bookingDesc'] #售票状态
+ yield item
+ # print("汽车票",item)
\ No newline at end of file
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_hotel.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_hotel.py
new file mode 100644
index 0000000..2eac80f
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_hotel.py
@@ -0,0 +1,187 @@
+import time
+import scrapy
+import json
+from scrapy.http import JsonRequest
+from wangModel.items import TuniuhotelItem
+from math import ceil
+import datetime
+from wangModel.utils.mysqlConn import getRows,insert
+
+class TuniuHotelSpider(scrapy.Spider):
+ name = 'tuniu_hotel'
+ allowed_domains = ['tuniu.com']
+ start_urls = ['https://s.tuniu.com/search_complex/hotel-nn-0-%E6%A1%82%E6%9E%97/']
+ page = 1
+ count = 0
+ pageNum=0
+ today = datetime.date.today()
+ tomorrow = today + datetime.timedelta(days=1)
+
+ data = {"primary": {"cityCode": "705",
+ "cityType": 0,
+ "checkIn": str(today),
+ "checkOut": str(tomorrow),
+ "roomNum": 1,
+ "adultNum": 2,
+ "childNum": 0,
+ "childAges": [],
+ "keyword": ""},
+ "secondary": {
+ "poi": {
+ "locationType": 2,
+ "pois": []},
+ "prices": [],
+ "stars": [],
+ "brands": [],
+ "features": [],
+ "facilities": [],
+ "commentScore": "",
+ "bedTypes": []},
+ "threeStages": [],
+ "suggest": {},
+ "pageNo": 1,
+ "pageSize": 10,
+ "sort": 0,
+ "customerClient": 2,
+ "returnDistance": True,
+ "secondaryDist": {"pValue": "", "userType": 0}}
+ def start_requests(self):
+
+ url = "https://hotel.tuniu.com/hotel-api/hotel/list?c=%7B%22ct%22%3A20000%7D"
+ # url = ' https://hotel.tuniu.com/hotel-api/hotel/detail?c={"ct":20000}&d={"hotelId":"351748651"}'
+
+ yield JsonRequest(
+ url=url,
+ callback=self.parse,
+ data=self.data,
+ )
+
+ # 酒店列表数据
+ def parse(self, response):
+ print(f"爬取第{self.page}页")
+ data = response.json()
+ # print(data)
+ item = TuniuhotelItem()
+ self.count = data['data']['count']
+ hotellist = data['data']['hotels']
+ for i in range(len(hotellist)):
+ hotel = hotellist[i]['hotel']
+ refer = hotellist[i]['reference']
+ item['id'] =hotel['hotelId']
+ item['hname'] = hotel['chineseName']
+ item['starname'] = hotel['starName']
+ item['hpic'] = hotel['firstPic']
+ item['haddress'] = hotel['address']
+ item['business'] = hotel['business']
+ item['distance'] = refer['distanceText']
+ item['hlowstprice'] = hotellist[i]['lowestPrice']
+ comment = hotellist[i]['comment']
+
+
+ ##爬取有评论的酒店内容信息
+ if 'score' in comment:
+ # print("有评论内容",comment)
+ hotel_name = str(item['hname']).replace("(", "(").replace(")", ")")
+ # print("原始酒店名称", hotel_name)
+ sql = "select id,name from hotels where name = %s"
+ dataRows=getRows(sql,hotel_name)
+ print("数据库的数据查询结果",dataRows)
+ print("酒店id",item['id'])
+ if dataRows:
+ id = getRows(sql, hotel_name)[0][0]
+ baseName = getRows(sql, hotel_name)[0][1]
+ print("开始爬取酒店:",baseName)
+ yield scrapy.Request(
+ url=f"https://hotel.tuniu.com/hotel-api/comment/summary?c=%7B%22ct%22:20000%7D&d=%7B%22hotelId%22:%22{item['id']}%22%7D",
+ callback=self.parse_summary_comments,
+ dont_filter=False,
+ meta={"hotelId":id,"hotelName":baseName,"item":item}
+ )
+
+ # print(item)
+ self.pageNum = ceil(self.count / 10)
+ print("总页数", self.pageNum)
+
+ #酒店列表翻页
+ if (self.pageNum > 1):
+ self.page=self.page+1
+ print(f"开始爬取第{self.page}页")
+ self.data['pageNo']= self.page
+ print(self.page)
+ # if(self.page<=self.pageNum):
+ if(self.page<=self.pageNum):
+ yield JsonRequest(
+ url="https://hotel.tuniu.com/hotel-api/hotel/list?c=%7B%22ct%22%3A20000%7D",
+ callback=self.parse,
+ data=self.data,
+ )
+ time.sleep(2)
+
+ #评论取出其他类型评论
+ def parse_summary_comments(self, response):
+ item=response.meta.get('item')
+ # print(item)
+ id=response.meta.get('hotelId')
+ hotelName=response.meta.get('hotelName')
+ summary=response.json()
+ otherComment=summary['data']['aspects']
+ commentSum=summary['data']['commentCount']
+ print("爬取分类的评论",otherComment)
+ item['others']=otherComment
+ item['hcomments']=[]
+ requestbody = {
+ "hotelId": str(item['id']),
+ "grade": "ALL",
+ "pageNo": 1,
+ "pageSize": 8
+ }
+ pages=ceil(commentSum/8)
+ for i in range(1,pages+1):
+ requestbody['pageNo']=i+1
+ time.sleep(3)
+ yield JsonRequest(
+ url="https://hotel.tuniu.com/hotel-api/comment/list?c=%7B%22ct%22%3A20000%7D",
+ callback=self.parse_comments,
+ dont_filter=False,
+ data=requestbody,
+ meta={"hotelId": id, "hotelName": hotelName, "item": item,"body":requestbody,"pages":pages,"currentPage":i}
+ )
+ # yield item
+ # 酒店评论内容详情解析
+ def parse_comments(self, response):
+ # print("进入解析",response)
+ item = response.meta.get('item')
+ pages = response.meta.get('pages')
+ currentPage = response.meta.get('currentPage')
+ print(f"解析{item['hname']}第{currentPage}页,共{pages}页")
+ data=response.json()
+ print("json数据",data)
+ id = response.meta.get('hotelId')
+ others=item['others']
+
+ otherslist=[]
+ # print("其他评论详情",others)
+ for contentInfo in others:
+ # print(type(contentInfo))
+ # print(contentInfo)
+ categroy = {}
+ categroy["cnName"]=contentInfo['cnName']
+ categroy["enName"]=contentInfo['enName']
+ categroy["score"]=str(contentInfo['aspectScore'])
+ otherslist.append(categroy)
+ # print("其他评论", otherslist)
+ hotelName = response.meta.get('hotelName')
+ comment_sum=data['data']['groupCount']['ALL'] #总评论数
+ good=data['data']['groupCount']['GOOD']
+ middle=data['data']['groupCount']['COMMON']
+ bad=data['data']['groupCount']['BAD']
+ sql="insert into hotel_comment(hotelId,hotelName,num,good,middle,bad,othersComment,siteFrom,crawlTime) select %s,%s,%s,%s,%s,%s,%s,%s,%s from dual where not exists (select hotelName,siteFrom,crawlTime from hotel_comment where hotelName= %s and siteFrom='途牛' and crawlTime=%s);"
+ insert(sql,(id,hotelName,comment_sum,good,middle,bad,str(otherslist),"途牛",datetime.date.today(),hotelName,datetime.date.today()))
+ if 'comments' in data['data']:
+ print(data['data']['comments'])
+ item['hcomments']=item['hcomments']+data['data']['comments'] #本也评论列表内容
+ if currentPage==pages:
+ print( item['hcomments'])
+ yield item
+
+
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_scenic.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_scenic.py
new file mode 100644
index 0000000..b466026
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/tuniu_scenic.py
@@ -0,0 +1,97 @@
+# -*- coding:utf-8 -*-
+import json
+import time
+
+import requests
+import scrapy
+from wangModel.items import WangmodelItem
+import re
+from selenium import webdriver
+from scrapy.http import HtmlResponse
+from selenium.webdriver.common.action_chains import ActionChains
+from math import ceil
+from wangModel.utils.mysqlConn import query
+import datetime
+import re
+
+# 2.继承RedisSprider
+class ItcastSpider(scrapy.Spider):
+ name = 'tuniu_scenic'
+ # redis_key = 'tuniu:start_urls' #监听此key
+ allowed_domains = ['tuniu.com']
+
+ start_urls = ['https://www.tuniu.com/menpiao/787427#/index']
+ # start_urls = ["https://s.tuniu.com/search_complex/ticket-nn-0-%E6%A1%82%E6%9E%97/"]
+ # start_urls = ["https://www.tuniu.com/resp-detail/api/menpiao/getMenpiaoComment?currentPage=3&specId=1167&stamp=078776045436755181667991933212"]
+ flag=1
+
+ def start_requests(self):
+ url_list= query("select id,name,tn_url from scenics where tn_url !='' ",None)
+ # print(self.start_urls)
+ # yield scrapy.Request(
+ # url=url_list[0]['url'],
+ # callback=self.parse,
+ # meta={"scenic": url_list[0]}
+ # )
+ for redatas in url_list:
+ time.sleep(2)
+ yield scrapy.Request(
+ url=redatas['url'],
+ dont_filter=False,
+ callback=self.parse,
+ meta={"scenic":redatas}
+ )
+ """
+ 爬取列表页面
+ """
+ def parse(self, response): # 处理详情页,处理json数据
+ time.sleep(3)
+ # print("---------------进入详情页面爬取-----------------")
+ item = WangmodelItem()
+ data=response.json()
+ scenic=response.meta.get('scenic')
+ item['id']=scenic['id']
+ item['name']=scenic['name']
+ item['satisfy_present']=data['data']['summary']['satisfaction']
+ item['remarkAmount']=data['data']['summary']['remarkAmount']
+ item['compGrade3Amount']=data['data']['summary']['compGrade3Amount']
+ item['compGrade2Amount']=data['data']['summary']['compGrade2Amount']
+ item['compGrade1Amount']=data['data']['summary']['compGrade1Amount']
+ commentlist=data['data']['remarkList']
+ item['commentlist'] = []
+ # print(item)
+ # yield item
+
+ """
+ 评论详情
+ """
+ if commentlist is not None:
+ item['commentlist']=commentlist
+ comment_page=ceil(item['remarkAmount']/10) #评论页数
+ if(comment_page)>1:
+ flag = 1
+ while flag <= comment_page:
+ currentPage=flag+1
+ flag+=1
+ productId=re.search("specId=.*&",scenic['url']).group().replace("specId=","").replace("&","")
+ detail_page_url = f"https://www.tuniu.com/resp-detail/api/menpiao/getMenpiaoComment?currentPage={currentPage}&specId={productId}&stamp=078776045436755181667991933212"
+ yield scrapy.Request(
+ url=detail_page_url,
+ callback=self.parse_detail_nextPage,
+ meta={"item": item, "url": detail_page_url,"currentPage":currentPage,"comment_page":comment_page},
+ dont_filter=True
+ )
+
+
+ def parse_detail_nextPage(self, response): # 处理详情翻页评论页,处理json数据
+ time.sleep(2)
+ item = response.meta.get('item')
+ currentPage=response.meta.get('currentPage')
+ comment_page=response.meta.get('comment_page')
+ data = response.json()
+ # print("翻页评论详情",data)
+ commentlist = data['data']['remarkList']
+ if commentlist is not None:
+ item['commentlist'] =item['commentlist']+commentlist
+ if currentPage==comment_page:
+ yield item
\ No newline at end of file
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/weibo.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/weibo.py
new file mode 100644
index 0000000..42651f8
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/weibo.py
@@ -0,0 +1,87 @@
+import time
+
+import scrapy
+from wangModel.items import WeiboItem
+from math import ceil
+import re
+import uuid
+
+class WeiboSpider(scrapy.Spider):
+ name = 'weibo'
+ allowed_domains = ['weibo.com']
+ start_urls = ['http://weibo.com/']
+ articles_url = "https://m.weibo.cn/api/container/getIndex?uid=1989772524&luicode=10000011&lfid=100103type=3&q=桂林旅游&t=&type=uid&value=1989772524&containerid=1076031989772524"
+
+ total_pages=0
+ current_page=1
+ #https://weibo.com/ajax/profile/info?screen_name=桂林市文化广电和旅游局
+ #微博列表:https://weibo.com/ajax/statuses/mymblog?uid=1989772524&page=1&feature=0
+ def start_requests(self):
+ yield scrapy.Request(
+ url="https://m.weibo.cn/api/container/getIndex?uid=1989772524&luicode=10000011&lfid=100103type%3D3%26q%3D%E6%A1%82%E6%9E%97%E6%97%85%E6%B8%B8%26t%3D&type=uid&value=1989772524&containerid=1005051989772524",
+ callback=self.parse,
+ )
+ #解析,获取用户的粉丝数等信息
+ def parse(self, response):
+ data=response.json()
+ item=WeiboItem()
+ item['userid']=data['data']['userInfo']['id']
+ item['screen_name']=data['data']['userInfo']['screen_name']
+ item['fins']=data['data']['userInfo']['followers_count_str']
+ item['artilelist']=[]
+
+ yield scrapy.Request(
+ url=self.articles_url,
+ callback=self.parse_articles,
+ dont_filter=True,
+ meta={"item":item}
+ )
+
+
+ # 翻页解析每一页的博客
+ def parse_articles(self, response):
+ item=response.meta.get('item')
+ item['id'] = str(uuid.uuid1().hex)
+ artilePage=response.json()
+ if 'cardlistInfo' in artilePage['data']:
+ nextPage_id=artilePage['data']['cardlistInfo']['since_id'] #下一页的搜索参数
+ item['total_artiles']=artilePage['data']['cardlistInfo']['total']
+ self.total_pages=ceil(artilePage['data']['cardlistInfo']['total']/12)
+ print(f"一共有{self.total_pages}页")
+ content_list=artilePage['data']['cards'] #博客列表
+ card=[]
+ item['artilelist']=[]
+ for i in range(len(content_list)):
+ card_type=content_list[i]['card_type']
+ if card_type==9:
+ print(content_list[i]['mblog']['id'])
+ artile_id=content_list[i]['mblog']['id'] #博客id
+ reposts_count=content_list[i]['mblog']['reposts_count'] #转发数
+ comments_count=content_list[i]['mblog']['comments_count'] #评论数
+ attitudes_count=content_list[i]['mblog']['attitudes_count'] #点赞数
+ content_text = re.sub("[A-Za-z0-9\!\%\[\]\,\。\<\-\=\"\:\/\.\?\&\_\>\'\;\ ]", "",
+ content_list[i]['mblog']['text'])
+ postDate = content_list[i]['mblog']['created_at']
+ card.append({
+ "artile_id":str(artile_id),
+ "reposts_count":str(reposts_count),
+ "comments_count":str(comments_count),
+ "attitudes_count":str(attitudes_count),
+ "text":str(content_text),
+ 'postDate':postDate
+ })
+ # item['artilelist']= item['artilelist']+card
+ item['artilelist']= card
+ print(f"爬取第{self.current_page}页")
+ while self.current_page <= self.total_pages:
+ time.sleep(2)
+ yield scrapy.Request(
+ url=self.articles_url + "&since_id=" + str(nextPage_id),
+ callback=self.parse_articles,
+ dont_filter=True,
+ meta={"item": item}
+ )
+ print(f"-----------------第{self.current_page}页爬取完毕----------------")
+ self.current_page = self.current_page + 1
+ yield item
+
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/spiders/weixin.py b/applications/common/scrapySpiders/wangModel/wangModel/spiders/weixin.py
new file mode 100644
index 0000000..012044f
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/spiders/weixin.py
@@ -0,0 +1,46 @@
+import scrapy
+
+"""
+获取内荣没有点赞量收藏量
+"""
+class WeixinSpider(scrapy.Spider):
+ name = 'weixin'
+ allowed_domains = ['weixin.qq.com']
+ start_urls = ['http://weixin.qq.com/']
+
+ def parse(self, response):
+ url="https://mp.weixin.qq.com/cgi-bin/appmsg?action=list_ex&begin=0&count=5&fakeid=MjM5MTU4MDA3NA==&type=9&query=&token=1865697574&lang=zh_CN&f=json&ajax=1"
+ cookie={"ua_id":"M0pQpE2KNnw1HvOXAAAAAE1fDecySy9uYPcTbbxXQRU=",
+ "wxuin":"46810516647178",
+ "mm_lang":"zh_CN",
+ "RK":"79EdPle1Va",
+ "ptcz":"f456c97e2c8f1090c61d121feb1eeef1419024051b0ed67a796600b76d0188ce",
+ "tvfe_boss_uuid":"6b9a9980f35eae48",
+ "pgv_pvid":"5260654758", "o_cookie":"1732095688",
+ "sd_userid":"30241653648365907",
+ "sd_cookie_crttime":"1653648365907",
+ "pgv_pvi":"3187809280",
+ "_hjSessionUser_3021617":"eyJpZCI6IjlhOTNkZWFiLTMzMDgtNTE5Yi05NWFlLTY4NGRlNGRjM2RhNSIsImNyZWF0ZWQiOjE2NTgwMjcyNTc1MTIsImV4aXN0aW5nIjpmYWxzZX0=",
+ "fqm_pvqid":"6afd8062-36ba-409d-b8b7-5b81ed4b79a6",
+ "eas_sid":"t1s6C6I0l577k3v243y953C9j9",
+ "Qs_lvt_323937":"1660573260",
+ "Qs_pv_323937":"715607012924411500",
+ "pgv_info":"ssid=s8054505600",
+ "uuid":"290f168055a3887964b014be8c572aeb",
+ "rand_info":"CAESIMvL6//JBy3GkYFANvsjpopfu+U1CadTWcrGvE5/iUkg",
+ "slave_bizuin":"3865832081", "data_bizuin":"3865832081", "bizuin":"3865832081",
+ "data_ticket":"3PNmNqEn/TJReP5OnXXQDWRy8NSPvxdRXgAP1zpmBJEEXd373AHCceq4yOquFumT",
+ "slave_sid":"RHZjMFdod1pQdzdHVjEzTUgyZkREeUZYVDR0YUFZUlpreXJYTWZLUDB3TTJIUklGdkhlX213UVVQeFg2cVdtX1FSRDhkcWVTcE5tRm1BZm52R2E2RkpaQkRrbzdoWFpCRWtvVmtZajYydmNMajdwTmpFOUhHWjRYbHlGcGppQ2tBYTc5cmNSVm02RFE1VTNk",
+ "slave_user":"gh_0a3b16a337ce",
+ "xid":"a1ceb4b8eea06c75fdfd31d65f3767f5",
+ "_clck":"3865832081|1|f6k|0"}
+ yield scrapy.Request(
+ url=url,
+ callback=self.parse_item,
+ cookies=cookie
+ )
+
+ def parse_item(self, response):
+ print(response.json())
+ data=response.json()
+ artilelist=data['app_msg_list']
\ No newline at end of file
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/test.csv b/applications/common/scrapySpiders/wangModel/wangModel/test.csv
new file mode 100644
index 0000000..4135feb
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/test.csv
@@ -0,0 +1,271 @@
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+飞拉达攀岩基地 ,238,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市阳朔县遇龙河生态公园门口,开放时间:周一-周日:9:00-11:00,15:00-17:00。
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+虞山公园 ,220,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,99,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,83,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,87,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,86,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,39,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,96,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,83,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,99,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,82,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,79,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,75,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,98,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,74,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+桂林相公山 ,80,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,94,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,86,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,76,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,95,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,81,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,69,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,70,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,83,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,87,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,99,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,86,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,83,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,39,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,96,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,99,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,82,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,79,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,75,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,98,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,74,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+桂林相公山 ,80,76,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,94,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,95,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,81,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,69,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,70,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,86,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+桂林相公山 ,80,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西桂林阳朔兴坪镇境内的漓江西岸。,
+虞山公园 ,220,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,86,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,83,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,87,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,74,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,98,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,79,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,75,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,67,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,39,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,96,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,83,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,99,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,82,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,99,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+虞山公园 ,220,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市城叠彩区北极路东,漓江西岸。,开放时间:8:00-17:30。
+靖江王陵 ,28,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+靖江王陵 ,28,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:桂林市尧山路,开放时间:09:00-17:00
+山水园 ,11,76,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,90,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,91,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,88,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,93,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,94,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,86,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,69,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,89,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,84,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,100,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,92,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
+山水园 ,11,0,//img3.tuniucdn.com/img/20161227/common/TN320.jpg,地 址:广西省桂林市阳朔县城滨江路2号。,开放时间:08:30-17:30
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/tuniu.csv b/applications/common/scrapySpiders/wangModel/wangModel/tuniu.csv
new file mode 100644
index 0000000..e69de29
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/HbaseConn.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/HbaseConn.py
new file mode 100644
index 0000000..44b63d9
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/HbaseConn.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> connTest
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-11-11 11:31
+@Desc
+=================================================='''
+import happybase
+# con=happybase.Connection(host=’localhost’, port=9090, timeout=None, autoconnect=True, table_prefix=None, table_prefix_separator=b’_’, compat=’0.98’, transport=’buffered’, protocol=’binary’)
+# 不配置参数的话直接简单连接 thrift的默认端口就是9090
+
+class HbaseUtil:
+ def __init__(self,con):
+ self.con=con
+ self.con = happybase.Connection(con)
+ self.con.open()
+ """
+ 插入数据
+ 参数:表名,行键,数据:键值对
+ 数据实例:左边是列族:列名,右边是插入的数据
+ data= { "info:name": "lisa",
+ "info:address":"Beijing"
+ }
+ """
+ def putTable(self,tablename,rowkey,data):
+ table=self.con.table(tablename)
+ table.put(rowkey,data)
+ # self.con.close()
+
+ def batchTable(self,tablename,rowkey,data):
+ table=self.con.table(tablename)
+ bat=table.batch()
+ bat.put(rowkey,data)
+ bat.send()
+ # self.con.close()
+
+ """
+ 获取所有表名
+ """
+ def getTables(self):
+ print(self.con.tables())
+
+ def closeCon(self):
+ self.con.close()
+
+
+# #
+# obj=HbaseUtil('202.193.53.106') #连接
+# obj.getTables() #查看表
+# """
+
+
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/citydeal.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/citydeal.py
new file mode 100644
index 0000000..5ad314a
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/citydeal.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> citydeal
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-11-30 20:08
+@Desc
+=================================================='''
+import re
+with open('../files/city.txt',encoding="utf-8") as f:
+ for cityInfo in f:
+ city_id = cityInfo.split(",")[0]
+ city_name = cityInfo.split(",")[1]
+ kw = city_name.strip()
+ if len(re.findall("([.*镇]|[.*县]|[.*村]|[.*区]|[.*乡])",kw))==0:
+ print(kw)
+ with open("../files/city_cap.txt","a+",encoding='utf-8') as city:
+ city.write(city_id+","+kw+"\n")
+
+# data="灌阳县"
+# data1="灌阳镇"
+# data2="灌阳村"
+#
+# print(len(re.findall("([.*镇]|[.*县]|[.*村])",data))>0)
+# print(re.findall("([.*镇]|[.*县]|[.*村])",data1))
+# print(re.findall("([.*镇]|[.*县]|[.*村])",data2))
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/createTables.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/createTables.py
new file mode 100644
index 0000000..37b5c38
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/createTables.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> test
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-11-17 16:21
+@Desc
+=================================================='''
+import happybase
+
+con = happybase.Connection("202.193.53.106")
+con.open()
+
+con.create_table("tuniu_scenic",{
+ 'info':dict(),
+ 'comments':dict(),
+})
+con.create_table("scenic_hotel",{
+ 'info':dict()
+})
+con.create_table("weibo",{
+ 'info':dict()
+})
+con.create_table("tongchen",{
+ 'info':dict()
+})
+con.create_table("bauduacc",{
+ 'info':dict(),
+ 'all':dict(),
+ 'wise':dict()
+})
+con.create_table("baiduwords",{
+ 'info':dict()
+
+})
+con.create_table("baudusearch", {
+ 'info': dict()
+
+})
+
+con.close()
\ No newline at end of file
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/hostory_weather.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/hostory_weather.py
new file mode 100644
index 0000000..5f892e1
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/hostory_weather.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> hostory_weather
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-12-13 12:24
+@Desc
+=================================================='''
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Apr 13 11:48:58 2020
+
+@author: ZAN
+"""
+
+import requests
+import pandas as pd
+from bs4 import BeautifulSoup
+from collections import defaultdict
+from dateutil.relativedelta import relativedelta
+from datetime import datetime
+import numpy as np
+
+
+class weather_data:
+ def __init__(self, city, start_year, end_year, start_month=1, end_month=12):
+ """
+
+ :param city: 需爬取的城市全拼
+ :param start_year: 爬取开始年份
+ :param end_year: 爬取结束年份
+ :param start_month: 爬取开始月份
+ :param end_month: 爬取结束月份
+ """
+ self.city = city
+ self.start_time = datetime.strptime(f"{start_year}-{start_month}", '%Y-%m')
+ self.end_time = datetime.strptime(f"{end_year}-{end_month}", '%Y-%m')
+
+ def _get_original_html(self):
+ """
+ 网页爬取
+ """
+
+ url = f"https://tianqi.911cha.com/{self.city}/{self.start_time.year}-{self.start_time.month}.html"
+ print(url)
+ header = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"} # 填写自己浏览器内容
+ response = requests.get(url, headers=header)
+ return response.content.decode("utf-8")
+
+ def _parse_data(self):
+ # 一次解析一个月
+ soup = BeautifulSoup(self.html, "html.parser")
+ data = defaultdict(dict)
+ for n, tr in enumerate(soup.find_all("tr")):
+ if n == 0:
+ continue
+
+ if n % 2 != 0:
+ date = tr.find("a").get_text()
+ # 创建日期字典
+ # [时间,图片,天气,温度,湿度,风力,风级,降水量,体感温度,云量]
+ data[date]["Day"] = {str(self.start_time.year) + '-' + key: con.get_text() for key, con in
+ zip(['time', 'image', 'weather', 'temperature', 'humidity', 'wind_force',
+ 'wind_scale',
+ 'precipitation', 'sendible_temperature', 'cloud_amount'], tr.find_all("td"))}
+
+ else:
+ data[date]["Night"] = {key: con.get_text() for key, con in zip(
+ ['time', 'image', 'weather', 'temperature', 'humidity', 'wind_force', 'wind_scale',
+ 'precipitation', 'sendible_temperature', 'cloud_amount'], tr.find_all("td"))}
+ return data
+
+ def main(self):
+
+ data = []
+ while self.start_time <= self.end_time:
+ self.html = self._get_original_html()
+ data.append(self._parse_data())
+ self.start_time += relativedelta(months=1)
+
+ return data
+
+
+result = []
+if __name__ == "__main__":
+ T = weather_data(city="guilin", start_year=2018, end_year=2019, start_month=1, end_month=12)
+ with open('weather_dict.txt', 'w', encoding='UTF-8') as f:
+ for line in T.main():
+ result.append(line)
+ f.writelines(str(line))
+key_list = []
+key2_list = []
+val_list = []
+val3_list = []
+val5_list = []
+for data in result:
+ key_value = list(data.keys())
+ key_list.append(key_value)
+ val_value = list(data.values())
+ val_list.append(val_value)
+
+for i in key_list:
+ key2_list = key2_list + i;
+
+# 下面全是对val值进行操作
+for val2 in val_list:
+ for val3 in val2:
+ val3_value = list(val3.values())
+ val3_list.append(val3_value)
+
+for nu in range(len(val3_list)):
+ for val4 in val3_list[nu]:
+ val5 = list(val4.values())
+ val6 = ['0' if i == '-' else i for i in val5] # 把降雨的-改成0,工作需要
+ val5_list.append(val6)
+
+data_key = pd.DataFrame(key2_list) # 日期
+data_val = pd.DataFrame(val5_list) # 气象信息,可以根据自己需要对这个变量进行修改
+
+# 去除符号
+temp = data_val[3].str.strip('℃')
+humd = data_val[4].str.strip('%')
+rain = data_val[7].str.strip('mm')
+
+weather = pd.DataFrame([temp, humd, rain]).T
+
+# 保留奇数行,删除偶数行
+day = weather[weather.index % 2 == 0].reset_index(drop=True) # 白天数据
+# 保留偶数行,删除奇数行
+night = weather[weather.index % 2 == 1].reset_index(drop=True) # 晚上数据
+
+fin = pd.concat([data_key, night, day], axis=1)
+fin.to_csv('恩施气象.csv', encoding="utf_8_sig")
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/mysqlConn.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/mysqlConn.py
new file mode 100644
index 0000000..ad7a6a6
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/mysqlConn.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> mysqlConn
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-11-21 10:54
+@Desc
+=================================================='''
+import pymysql
+
+def get_conn():
+ conn = pymysql.connect(host='202.193.53.151', port=3306, user='root', passwd='root', db='travel')
+ return conn
+
+"""
+查询数据库
+"""
+def getRows(sql,args):
+ conn = get_conn()
+ cur = conn.cursor()
+ cur.execute(sql, args)
+ results = cur.fetchall()
+ return results
+
+def query(sql,args):
+ conn = get_conn()
+ cur = conn.cursor()
+ cur.execute(sql,args)
+ results = cur.fetchall()
+ # print(type(results)) # 返回 tuple元组类型
+ list=[]
+ for row in results:
+ id=row[0]
+ name=row[1]
+ url=row[2]
+ list.append({
+ "id":id,
+ "name":name,
+ "url":url
+ })
+ print(list)
+ conn.commit()
+ cur.close()
+ conn.close()
+ return list
+
+"""
+插入数据库
+"""
+def insert(sql, args):
+ conn = get_conn()
+ cur = conn.cursor()
+ result = cur.execute(sql, args)
+ # print(result)
+ conn.commit()
+ cur.close()
+ conn.close()
+
+"""更新"""
+def update(sql,args):
+ conn = get_conn()
+ cur = conn.cursor()
+ result = cur.execute(sql,args)
+ print(result)
+ conn.commit()
+ cur.close()
+ conn.close()
+
+
+# if __name__ == '__main__':
+# sql="select id,name,tn_url from scenics where tn_url !='' "
+#
+# query(sql,None)
+ # sql = 'INSERT INTO scenic_comment(scenicId,scenicName,satisfy_present,num,good,middle,bad) VALUES(%s,%s,%s,%s,%s,%s,%s);'
+ # insert(sql, (2, 'wang', 13))
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/proxys.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/proxys.py
new file mode 100644
index 0000000..e4d7121
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/proxys.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> proxys
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-11-15 18:17
+@Desc
+=================================================='''
+# 设置代理列表
+PROXY = {
+ "http": "http://1.83.249.30:4329",
+ "http": "http://182.45.41.139:4314",
+ "http": "http://175.146.208.97:4356",
+ "http": "http://49.85.179.62:4331",
+ "http": "http://110.86.177.255:4385",
+ "http": "http://218.72.80.179:4343"
+
+}
+
+asyncProxy=[
+"http://175.6.60.172:6666",
+"http://61.171.99.128:6666",
+
+]
+
+ips=[
+"106.110.86.175:4331",
+"49.82.49.157:4315",
+"117.32.77.213:4315",
+"111.76.67.53:4315",
+"115.234.245.144:4375",
+"218.85.249.120:4331",
+"113.138.147.101:4314",
+"140.224.61.27:4324",
+"27.158.34.243:4335",
+"49.85.49.161:4313",
+"49.85.188.172:4331",
+"113.138.144.253:4328",
+"183.92.199.101:4324",
+"27.29.150.18:4367",
+"182.128.44.94:4331",
+"123.115.202.150:4325",
+"1.83.249.21:4326",
+"106.110.86.229:4331",
+"59.59.158.244:4331",
+"117.34.231.111:4315",
+"124.72.100.151:4352",
+"117.32.78.202:4315",
+"117.26.231.36:4345",
+"117.26.131.113:4324",
+"27.156.194.18:4368",
+"183.165.247.91:4345",
+"115.204.59.33:4343",
+"42.7.4.243:4331",
+"125.105.110.229:4345",
+"27.190.72.105:4341",
+"114.237.193.227:4348",
+"125.79.192.81:4313",
+"220.189.78.156:4314",
+"124.116.116.162:4328",
+"42.7.30.64:4313",
+"27.156.196.94:4332",
+"59.59.215.82:4313",
+"175.146.210.246:4356",
+"60.169.115.208:4323",
+"14.157.103.161:4313",
+"114.216.46.137:4357",
+"120.42.191.38:4313",
+"42.57.148.171:4356",
+"114.106.170.131:4345",
+"114.106.170.146:4354",
+"114.103.88.180:4345",
+"115.208.46.183:4331",
+"117.34.230.147:4315",
+"114.106.156.218:4354",
+"122.246.91.178:4305",
+"115.229.247.100:4331",
+"114.99.2.196:4378",
+"175.146.68.134:4385",
+"49.87.250.61:4315",
+"183.143.135.162:4326"
+]
diff --git a/applications/common/scrapySpiders/wangModel/wangModel/utils/weather_deal.py b/applications/common/scrapySpiders/wangModel/wangModel/utils/weather_deal.py
new file mode 100644
index 0000000..b50451f
--- /dev/null
+++ b/applications/common/scrapySpiders/wangModel/wangModel/utils/weather_deal.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :爬虫 -> weather_deal
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-12-13 12:13
+@Desc
+=================================================='''
+from mysqlConn import getRows,update
+
+select_westher="select id,max_tem,min_tem,"
diff --git a/applications/common/tasks/tasks.py b/applications/common/tasks/tasks.py
index fca30e2..20a61a6 100644
--- a/applications/common/tasks/tasks.py
+++ b/applications/common/tasks/tasks.py
@@ -1,23 +1,48 @@
-
+from applications.common.tasks.微博签到.weibosign import WeiBoSign
from applications.common.tasks.景区评论标题.scenic_start import Scenic
+from applications.common.tasks.百度.baidu_start import BaiduCrawl
from applications.common.tasks.线路评论标题.route_start import Route
from applications.common.tasks.酒店评论标题.hotel_title_start import Hotel
from applications.common.tasks.景区攻略.guide_start import Guide
+from scrapy.crawler import CrawlerProcess
+from scrapy.utils.project import get_project_settings
+
+settings = get_project_settings()
+crawler = CrawlerProcess(settings)
+
+task_list = ['景区评论标题', '线路评论标题', '景区攻略', '酒店评论标题']
-task_list = ['景区评论标题', '线路评论标题', '景区攻略','酒店评论标题']
def 景区评论标题(id, name):
scenic_start = Scenic()
scenic_start.run()
+
def 线路评论标题(id, name):
scenic_start = Route()
scenic_start.run()
+
def 景区攻略(id, name):
scenic_start = Guide()
scenic_start.run()
+
def 酒店评论标题(id, name):
scenic_start = Hotel()
scenic_start.run()
+
+
+def 交通拥堵爬取():
+ crawler.crawl('tongchen') # 只爬取同城火车票和汽车票
+ crawler.start()
+
+
+def 微博签到爬取():
+ webosign = WeiBoSign()
+ webosign.run()
+
+
+def 百度相关指数爬取():
+ baidu_start = BaiduCrawl()
+ baidu_start.run()
diff --git "a/applications/common/tasks/\345\276\256\345\215\232\347\255\276\345\210\260/weibosign.py" "b/applications/common/tasks/\345\276\256\345\215\232\347\255\276\345\210\260/weibosign.py"
new file mode 100644
index 0000000..d889821
--- /dev/null
+++ "b/applications/common/tasks/\345\276\256\345\215\232\347\255\276\345\210\260/weibosign.py"
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :pear-admin-flask -> weibosign
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-12-14 18:14
+@Desc
+=================================================='''
+from scrapy.crawler import CrawlerProcess
+from scrapy.utils.project import get_project_settings
+
+from wangModel.common_spiders.weibosign import WeiboSignSpider
+
+settings = get_project_settings()
+crawler = CrawlerProcess(settings)
+
+
+class WeiBoSign:
+
+ def run(self):
+ print("开始爬取微博签到")
+ web = WeiboSignSpider()
+ web.run()
+ print("爬取微博签到结束")
diff --git "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py" "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py"
index 82ce79c..d7d511b 100644
--- "a/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py"
+++ "b/applications/common/tasks/\346\231\257\345\214\272\350\257\204\350\256\272\346\240\207\351\242\230/scenic_start.py"
@@ -8,16 +8,21 @@ from applications.common.tasks.景区评论标题.tongcheng_scenic_comment_title
from applications.common.tasks.景区评论标题.xiecheng_scenic_comment_title import Xiecheng_Scenic
import asyncio
import time
+from scrapy.crawler import CrawlerProcess
+from scrapy.utils.project import get_project_settings
+settings = get_project_settings()
+crawler = CrawlerProcess(settings)
mafengwo = Mafengwo_Scenic()
qunaer = Qunaer_Scenic()
tongcheng = Tongcheng_Scenic()
xiecheng = Xiecheng_Scenic()
+
class Scenic:
def run(self):
print("开始爬取各个网站的评论标题!")
- time_start=time.time()
+ time_start = time.time()
asyncio.run(xiecheng.getScenic())
print("携程爬取结束")
@@ -28,11 +33,9 @@ class Scenic:
asyncio.run(mafengwo.getScenic())
print("马蜂窝爬取结束")
- time_end=time.time()
- print(' time cost ',time_end-time_start,'s')
-
-
-
-
-
+ crawler.crawl('tuniu_scenic')
+ crawler.start()
+ print("途牛景区爬取结束")
+ time_end = time.time()
+ print(' time cost ', time_end - time_start, 's')
diff --git "a/applications/common/tasks/\347\231\276\345\272\246/baidu_start.py" "b/applications/common/tasks/\347\231\276\345\272\246/baidu_start.py"
new file mode 100644
index 0000000..9e27728
--- /dev/null
+++ "b/applications/common/tasks/\347\231\276\345\272\246/baidu_start.py"
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+'''=================================================
+@Project -> File :pear-admin-flask -> baidu_start
+@IDE :PyCharm
+@Author :sandmswift
+@Date :2022-12-14 18:20
+@Desc
+=================================================='''
+from wangModel.common_spiders.baiduacc import baiduacc
+from wangModel.common_spiders.baidusearch import BaiduSpider
+from wangModel.common_spiders.baiduwords import BaiDuWords
+
+
+class BaiduCrawl:
+ def run(self):
+ # 爬取普通爬虫
+ """ 1.百度指数"""
+ print("开始爬取百度指数")
+ object = baiduacc()
+ object.parse1()
+ print("百度指数爬取完毕")
+
+ """ 2.百度搜索"""
+ print("开始爬取百度搜索")
+ run = BaiduSpider()
+ run.parse()
+ print("百度搜索爬取完毕")
+
+ """3.百度词条"""
+ print("开始爬取百度词条")
+ baiduWord = BaiDuWords()
+ baiduWord.run()
+ print("百度词条爬取完毕")
\ No newline at end of file
diff --git "a/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py" "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py"
index 39a67b6..897bf3b 100644
--- "a/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py"
+++ "b/applications/common/tasks/\351\205\222\345\272\227\350\257\204\350\256\272\346\240\207\351\242\230/hotel_title_start.py"
@@ -7,6 +7,10 @@ from applications.common.tasks.酒店评论标题.qunaer_hotel_comment_title imp
from applications.common.tasks.酒店评论标题.tongcheng_hotel_comment_title import Tongcheng_Hotel
import asyncio
import time
+from scrapy.crawler import CrawlerProcess
+from scrapy.utils.project import get_project_settings
+settings = get_project_settings()
+crawler = CrawlerProcess(settings)
qunaer = Qunaer_Hotel()
tongcheng = Tongcheng_Hotel()
@@ -24,6 +28,10 @@ class Hotel:
asyncio.run(qunaer.getHotel())
print("去哪儿爬取结束")
+ crawler.crawl('tuniu_hotel')
+ crawler.start()
+ print("途牛酒店爬取结束")
+
time_end=time.time()
print(' time cost ',time_end-time_start,'s')
diff --git a/applications/view/__init__.py b/applications/view/__init__.py
index a168078..9cecc83 100644
--- a/applications/view/__init__.py
+++ b/applications/view/__init__.py
@@ -3,7 +3,7 @@ from applications.view.index import register_index_views
from applications.view.passport import register_passport_views
from applications.view.rights import register_rights_view
from applications.view.department import register_dept_views
-from applications.view.test import register_test_views
+# from applications.view.test import register_test_views
def init_view(app):
@@ -11,5 +11,5 @@ def init_view(app):
register_index_views(app)
register_rights_view(app)
register_passport_views(app)
- register_test_views(app)
+ # register_test_views(app)
register_dept_views(app)
--
Gitee