diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/Scrapy\344\270\255\351\227\264\344\273\266\345\217\212debug-10-3noteyun.md" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/Scrapy\344\270\255\351\227\264\344\273\266\345\217\212debug-10-3noteyun.md"
new file mode 100644
index 0000000000000000000000000000000000000000..4364a5132190977964187bcd2b540b46d6af7d6a
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/Scrapy\344\270\255\351\227\264\344\273\266\345\217\212debug-10-3noteyun.md"
@@ -0,0 +1,84 @@
+10-3noteyun-
+
+# Scrapy的启动和debug
+
+- 命令行
+
+ ```python
+ scrapy crawl jd_search #在命令行中
+ ```
+
+- 启动脚本
+
+ ```python
+ # 新建run.py脚本
+
+ from scrapy import cmdline
+
+ command = "scrapy crawl jd_search".split()
+ cmdline.execute(command)
+ ```
+
+# Scrapy Item
+
+只是对解析的结构化结果进行一个约束, 在到达pipeline前就可以检查出数据错误.
+
+# Scrapy的设置
+
+- ROBOTTEXT_OBEY
+
+ 获取对方网站是否允许爬虫获取数据的信息.
+
+- 设置中间件
+
+ 数字越小, 离`ENGINE`越近,越先执行;
+
+ ```python
+ DOWNLOADER_MIDDLEWARES = {
+ # 'jd_crawler_scrapy.middlewares.JdCrawlerScrapyDownloaderMiddleware': 543,
+ 'jd_crawler_scrapy.middlewares.UAMiddleware': 100,
+ }
+ ```
+
+ 
+
+- 设置PIPELINE
+
+ ```python
+ ITEM_PIPELINES = {
+ 'jd_crawler_scrapy.pipelines.JdCrawlerScrapyPipeline': 300,
+ }
+ ```
+
+- LOG 打印日志
+
+ - LOG_ENABLE
+
+ 默认为`True`, 是否使用log 默认,使用log
+
+ - LOG_FILE
+
+ 设置保存的log文件目录
+
+ - LOG_LEVEL(按严重程序排序)
+
+ - CRITICAL (critical)
+ - ERROR (error)
+ - WARNING (warning)警告
+ - INFO (info)信息
+ - DEBUG (debug) 打印全面的信息
+
+# Scrapy的中间件
+
+- 请求头中间件
+
+ ```python
+ class UAMiddleware:
+ def process_request(self, request, spider):
+ request.headers["user-agent"] = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"
+ ```
+
+# 课后作业
+
+- 将jd_crawler_scrapy完善.
+- 完成代理中间件的编写(查阅文档).
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/Scrapy\347\210\254\350\231\253\346\241\206\346\236\266\344\273\213\347\273\215-10-2noteyun-.md" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/Scrapy\347\210\254\350\231\253\346\241\206\346\236\266\344\273\213\347\273\215-10-2noteyun-.md"
new file mode 100644
index 0000000000000000000000000000000000000000..38b37e6c3573448a94aca50dc90f5318555c349a
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/Scrapy\347\210\254\350\231\253\346\241\206\346\236\266\344\273\213\347\273\215-10-2noteyun-.md"
@@ -0,0 +1,145 @@
+10-2noteyun
+
+# Scrapy爬虫框架介绍
+
+- 文档
+
+ - [英文文档](https://docs.scrapy.org/en/latest/)
+ - [中文文档](https://scrapy-chs.readthedocs.io/zh_CN/0.24/intro/tutorial.html)
+
+- 什么是scrapy
+
+ 基于`twisted`搭建的异步爬虫框架.
+
+ scrapy爬虫框架根据组件化设计理念和丰富的中间件, 使其成为了一个兼具高性能和高扩展的框架
+
+- scrapy提供的主要功能
+
+ - **具有优先级功能的调度器**
+ - **去重功能**
+ - 失败后的重试机制
+ - 并发限制
+ - ip使用次数限制
+ - ....
+
+- scrapy的使用场景
+
+ - 不适合scrapy项目的场景
+ - 业务非常简单, 对性能要求也没有那么高, 那么我们写多进程, 多线程, 异步脚本即可.
+ - 业务非常复杂, 请求之间有顺序和失效时间的限制.
+ - **如果你不遵守框架的主要设计理念, 那就不要使用框架**
+ - 适合使用scrapy项目
+ - 数据量大, 对性能有一定要求, 又需要用到**去重功能**和**优先级功能的调度器**
+
+- **scrapy组件**
+
+ 
+
+ - `ENGINE`从`SPIDERS`中获取初始请求任务`Requests`
+
+ - `ENGINE`得到`Requests`之后发送给`SCHEDULER`, `SCHEDULER`对请求进行调度后产出任务.
+
+ - `Scheduler`返回下一个请求任务给`ENGINE`
+
+ - `ENGINE`将请求任务交给`DOWNLOADER`去完成下载任务, 途径下载器中间件.
+
+ - 一旦下载器完成请求任务, 将产生一个`Response`对象给`ENGINE`, 途径下载器中间件
+
+ - `ENGINE`收到`Response`对象后, 将该对象发送给`SPIDERS`去解析和处理, 途径爬虫中间件
+
+ - ```python
+ SPIDER
+ ```
+
+ 解析返回结果
+
+ - 将解析结果`ITEMS`发送给`ENGINE`
+ - 生成一个新的`REQUESTS`任务发送给`ENGINE`(如详情页中还有新的链接)
+
+ - 如果`ENGINE`拿到的是`ITEMS`, 那么就会发送给`ITEM PIPELINES`做数据处理, 如果是`REQUESTS`则发送给`SCHEDULER`
+
+ - 周而复始, 直到没有任务产出
+
+# Scrapy教程
+
+- 安装
+
+ ```python
+ pip install scrapy
+ ```
+
+- 创建项目
+
+ ```python
+ scrapy startproject jd_crawler_scrapy
+ ```
+
+- 目录结构
+
+ - spiders(目录)
+
+ 存放`SPIDERS`项目文件, 一个scrapy项目下可以有多个爬虫实例
+
+ 多个爬虫实例,共用一套中间件、管道、设置文件等等
+
+ - items
+
+ 解析后的结构化结果.
+
+ - middlewares
+
+ 下载器中间件和爬虫中间件的地方
+
+ - piplines
+
+ 处理items的组件, 一般都在pipelines中完成items插入数据表的操作(数据库操作)
+
+ - settings
+
+ 统一化的全局爬虫配置文件
+
+ - scrapy.cfg
+
+ 项目配置文件
+
+
+
+
+
+ scrapy爬虫demo
+
+ ```python
+ import scrapy
+
+
+ class JdSearch(scrapy.Spider):
+ name = "jd_search"
+
+ def start_requests(self):
+ for keyword in ["鼠标", "键盘", "显卡", "耳机"]:
+ for page_num in range(1, 11):#10页
+ url = f"https://search.jd.com/Search?keyword={keyword}&page={page_num}"
+
+ # 选用FormRequest是因为它既可以发送GET请求, 又可以发送POST请求
+ yield scrapy.FormRequest(
+ url=url,
+ method='GET',
+ # formdata=data, # 如果是post请求, 携带数据使用formdata参数
+ callback=self.parse_search # 指定回调函数处理response对象
+ )
+
+
+ def parse_search(self, response):
+ print(response)
+ ```
+
+- 启动爬虫
+
+ ```python
+ scrapy crawl spider_name
+ ```
+
+# 课后作业
+
+- 背诵`scrapy`组件流程(必考)
+- 完成scrapy项目的demo
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/__init__.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/__init__.py"
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/items.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/items.py"
new file mode 100644
index 0000000000000000000000000000000000000000..cd9f2584bf386aac18cbd6833fb5a5780d071899
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/items.py"
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# http://doc.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class JdCrawlerScrapyItem(scrapy.Item):
+ # define the fields for your item here like:
+ # name = scrapy.Field()
+ sku_id=scrapy.Field()
+ img=scrapy.Field()
+ price=scrapy.Field()
+ title=scrapy.Field()
+ shop=scrapy.Field()
+ icons=scrapy.Field()
+
+
+ # name=scrapy.Field()
+ # tel=scrapy.Field()
+ # address=scrapy.Field()
+
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/middlewares.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/middlewares.py"
new file mode 100644
index 0000000000000000000000000000000000000000..cd2c42d3e7e8d80bd753f4fb14403f069806e3a1
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/middlewares.py"
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+# 下载器中间件、爬虫中间件
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# http://doc.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+#自己写的类
+class UAMiddleware:
+ def process_request(self,request,spider):
+ """
+ 在正式请求前,为当前请求添加headers
+ :param request:
+ :param spider:
+ :return:
+ """
+ request.headers["user-agent"]="Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"
+
+class JdCrawlerScrapySpiderMiddleware(object):
+ # Not all methods need to be defined. If a method is not defined,
+ # scrapy acts as if the spider middleware does not modify the
+ # passed objects.
+
+ @classmethod
+ def from_crawler(cls, crawler):
+ # This method is used by Scrapy to create your spiders.
+ s = cls()
+ crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+ return s
+
+ def process_spider_input(response, spider):
+ # Called for each response that goes through the spider
+ # middleware and into the spider.
+
+ # Should return None or raise an exception.
+ return None
+
+ def process_spider_output(response, result, spider):
+ # Called with the results returned from the Spider, after
+ # it has processed the response.
+
+ # Must return an iterable of Request, dict or Item objects.
+ for i in result:
+ yield i
+
+ def process_spider_exception(response, exception, spider):
+ # Called when a spider or process_spider_input() method
+ # (from other spider middleware) raises an exception.
+
+ # Should return either None or an iterable of Response, dict
+ # or Item objects.
+ pass
+
+ def process_start_requests(start_requests, spider):
+ # Called with the start requests of the spider, and works
+ # similarly to the process_spider_output() method, except
+ # that it doesn’t have a response associated.
+
+ # Must return only requests (not items).
+ for r in start_requests:
+ yield r
+
+ def spider_opened(self, spider):
+ spider.logger.info('Spider opened: %s' % spider.name)
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/pipelines.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/pipelines.py"
new file mode 100644
index 0000000000000000000000000000000000000000..db82e4222720cc90aeb148f55c1a4ec0f6293f97
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/pipelines.py"
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+import pymysql
+
+from jd_crawler_scrapy.items import JdCrawlerScrapyItem
+
+
+
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
+
+
+class JdCrawlerScrapyPipeline(object):
+ def __init__(self):
+ self.mysql_con=None
+
+
+ def process_item(self, item, spider):
+ if not self.mysql_con: #若为空
+ self.mysql_con=pymysql.connect(**spider.settings["MYSQL_CONF"])
+ # mysql_con = pymysql.connect(**MYSQL_CONF)
+
+ if isinstance(item,JdCrawlerScrapyItem):
+ cursor=self.mysql_con.cursor()
+ print(type(cursor))
+ SQL="""INSERT INTO jd_search(sku_id,img,price, title, shop, icons)
+ VALUES ('{}', '{}', '{}', '{}', '{}', '{}')""".format(
+ item['sku_id'],item['img'],item['price'],item['title'],item['shop'],item['icons']
+ )
+ cursor.execute(SQL)
+ self.mysql_con.commit() # 提交、入库
+ cursor.close() # 关闭游标
+ return item
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/settings.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/settings.py"
new file mode 100644
index 0000000000000000000000000000000000000000..e7bc1ee8c873dd0d60f7cb8a32dc52b888c0b884
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/settings.py"
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+# 统一化的全局爬虫配置文件
+# Scrapy settings for jd_crawler_scrapy project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+# http://doc.scrapy.org/en/latest/topics/settings.html
+# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
+# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'jd_crawler_scrapy'
+
+SPIDER_MODULES = ['jd_crawler_scrapy.spiders']
+NEWSPIDER_MODULE = 'jd_crawler_scrapy.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+#USER_AGENT = 'jd_crawler_scrapy (+http://www.yourdomain.com)'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = False
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+# 'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+# 'jd_crawler_scrapy.middlewares.JdCrawlerScrapySpiderMiddleware': 543,
+#}
+
+# 下载器中间件--改造
+# Enable or disable downloader middlewares
+# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
+DOWNLOADER_MIDDLEWARES = {
+ # 'jd_crawler_scrapy.middlewares.MyCustomDownloaderMiddleware': 543,
+ 'jd_crawler_scrapy.middlewares.UAMiddleware': 100,
+}
+
+# Enable or disable extensions
+# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+# 'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+ 'jd_crawler_scrapy.pipelines.JdCrawlerScrapyPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+
+# mysql # 配置
+MYSQL_CONF = {
+ "host": "127.0.0.1",
+ "user": "root",
+ "password": "123456",
+ "db": "py_class"
+}
+
+LOG_FILE="E:/PycharmProjects/log/jd_search.log"
+
+# LOG_LEVEL='DEBUG' #非常全面的信息
+LOG_LEVEL='ERROR' #一行信息
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/spiders/__init__.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/spiders/__init__.py"
new file mode 100644
index 0000000000000000000000000000000000000000..ebd689ac51d69c5e1dbbe80083c2b20a39f8bb79
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/spiders/__init__.py"
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/spiders/jd_search.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/spiders/jd_search.py"
new file mode 100644
index 0000000000000000000000000000000000000000..f420d2aeca9085233d7ae2e570dfec0dc7a83095
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/spiders/jd_search.py"
@@ -0,0 +1,61 @@
+import scrapy
+from bs4 import BeautifulSoup
+import json
+from jd_crawler_scrapy.items import JdCrawlerScrapyItem
+
+
+class JdSearch(scrapy.Spider):
+ name="jd_search"#指定运行的爬虫文件
+
+ def start_requests(self):#任务生产者
+ for keyword in ["鼠标","键盘","显卡","耳机"]:
+ for page_num in range(1,11):
+ url=f"https://search.jd.com/Search?keyword={keyword}&page={page_num}"
+ #选用FormRequest:get和post
+ yield scrapy.FormRequest(
+ url=url,
+ method="GET",
+ # formdata=data, #如果是post请求,携带数据,使用formdata参数
+ callback=self.parse_search #指定回调函数,处理response对象
+ )#支持get和post方法
+ #yield惰性返回数据(生成器)
+ break
+
+ def parse_search(self,response): #解析函数
+ print(response)
+
+ soup = BeautifulSoup(response.text, "lxml")
+ item_array = soup.select("ul[class='gl-warp clearfix'] li[class='gl-item']")
+ # $("ul[class='gl-warp clearfix'] li[class='gl-item']")
+ for item in item_array:
+ try:
+ sku_id = item.attrs["data-sku"]
+ img = item.select("img[data-img='1']")
+ price = item.select("div[class='p-price']")
+ title = item.select("div[class='p-name p-name-type-2']")
+ shop = item.select("div[class='p-shop']")
+ icons = item.select("div[class='p-icons']")
+ # print(img)
+
+ img = img[0].attrs['data-lazy-img'] if img else ""
+ price = price[0].strong.i.text if price else ""
+ title = title[0].text.strip() if title else ""
+ shop = shop[0].a.attrs['title'] if shop[0].text.strip() else ""
+ icons = json.dumps([tag_ele.text for tag_ele in icons[0].select("i")]) if icons else '[]'
+
+ # result.append((sku_id, img, price, title, shop, icons)) # 收集结果
+ #item 约束
+ item=JdCrawlerScrapyItem()
+ item["sku_id"]=sku_id
+ item["img"]=img
+ item["price"]=price
+ item["title"]=title
+ item["shop"]=shop
+ item["icons"]=icons
+ yield item #异步框架
+
+ except Exception as e:
+ print(e.args)
+
+
+
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/spiders/run.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/spiders/run.py"
new file mode 100644
index 0000000000000000000000000000000000000000..288d5060bddffbe5e8d3b60d6ca86b7fcc6805cf
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/jd_crawler_scrapy/spiders/run.py"
@@ -0,0 +1,5 @@
+from scrapy import cmdline
+
+command="scrapy crawl jd_search".split()
+print(command)
+cmdline.execute(command)
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/scrapy.cfg" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/scrapy.cfg"
new file mode 100644
index 0000000000000000000000000000000000000000..fa0c0f404b8118ee48947a4e0f6446d0a7e89e82
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/jd_crawler_scrapy/scrapy.cfg"
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.org/en/latest/deploy.html
+
+[settings]
+default = jd_crawler_scrapy.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = jd_crawler_scrapy
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/\350\267\257\345\276\204\347\256\241\347\220\20610-1noteyun-0311.md" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/\350\267\257\345\276\204\347\256\241\347\220\20610-1noteyun-0311.md"
new file mode 100644
index 0000000000000000000000000000000000000000..06659a6c552eab3f1dacb55d831672116f13acaf
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/week_10/\350\267\257\345\276\204\347\256\241\347\220\20610-1noteyun-0311.md"
@@ -0,0 +1,153 @@
+10-1noteyun
+
+# 路径管理
+
+## 路径
+
+- 绝对路径
+
+ 总是从根目录开始
+
+ ```python
+ H:\PyCharmProjects\tutorials_2\jd_crawler\main.py
+ ```
+
+- 相对路径
+
+ ```python
+ jd_crawler\main.py
+ ```
+
+ - `.`和`..`
+
+ `.`代表当前目录, `..`代表父目录
+
+- 工作目录
+
+ 当前执行命令所在的目录
+
+ ```python
+ # 将工作目录添加进当前的路径列表
+ sys.path.append(os.getcwd())
+ ```
+
+# 路径列表
+
+- 查看当前路径列表
+
+ 只有在路径列表当中的包和模块才可以导入和调用
+
+ ```python
+ import sys
+ print(sys.path)
+ ['E:\\PycharmProjects\\train002_1231\\second-python-bootcamp\\第二期训练营\\5班\\5班_云\\第9周0222--0228\\9-3\\jd_crawler',
+ 'E:\\PycharmProjects\\train002_1231\\second-python-bootcamp',
+ 'E:\\PycharmProjects\\train002_1231\\second-python-bootcamp\\第二期训练营\\5班\\5班_云\\第9周0222--0228\\9-3\\jd_crawler',
+ 'E:\\PycharmProjects\\train002_1231\\second-python-bootcamp\\第二期训练营\\5班\\5班_云\\第9周0222--0228\\9-3',
+ 'D:\\bsoft\\ananaconda\\python37.zip',
+ 'D:\\bsoft\\ananaconda\\DLLs',
+ 'D:\\bsoft\\ananaconda\\lib',
+ 'D:\\bsoft\\ananaconda',
+ 'D:\\bsoft\\ananaconda\\lib\\site-packages', 'D:\\bsoft\\ananaconda\\lib\\site-packages\\win32', 'D:\\bsoft\\ananaconda\\lib\\site-packages\\win32\\lib',
+ 'D:\\bsoft\\ananaconda\\lib\\site-packages\\Pythonwin']
+
+ ```
+
+ 命令行中:
+
+ ```python
+ python main.py
+ python jd_crawler main.py
+
+ ```
+
+
+
+- 路径搜索顺序
+
+ - 当前脚本路径, 也就是执行文件的目录
+
+ - `PYTHONPATH`路径
+
+ - 虚拟环境路径
+
+ - ```python
+ site-packages
+ ```
+
+ - 安装的第三方库所在路径
+
+- 可以向路径列表添加路径
+
+ ```python
+ sys.path.append(r"H:\PyCharmProjects\tutorials_2")
+ #不灵活
+ ```
+
+# 常见报错
+
+
+
+(1)ModuleNotFoundError: No module named 'xxxx'`
+
+- 为什么在pycharm中不报错, 在命令行当中报错
+
+ ```python
+ Pycharm会自动将当前项目的根目录添加到路径列表当中
+ ```
+
+(2) `ModuleNotFoundError: No module named 'parser.search'; 'parser' is not a package`
+
+- 自定义包和内置包名有冲突
+
+ 修改包名即可
+
+- 导入的不是一个包
+
+
+
+(3) `ModuleNotFoundError: No module named '__main__.jd_parser'; '__main__' is no t a package`
+
+- **入口程序不可以使用相对路径**
+
+- `__main__`
+
+ 主程序模块名会被修改为`__main__`
+
+ ```python
+
+ if __name__ == "__main__":#入口
+ # 用来代替生产者
+ mysql_con = pymysql.connect(**MYSQL_CONF)
+ ```
+
+
+
+(4) `ValueError: attempted relative import beyond top-level package`
+
+当前访问路径已经超过了python已知的最大路径
+
+```python
+from tutorial_2.jd_crawler.jd_parser.search import parse_jd_item
+
+top-level package 指的是上述from导入命令中的首路径tutorial_2, 而不是根据目录结构
+```
+
+- 把工作目录加入到路径列表当中
+- 进入到项目根目录下执行命令
+- 上述两个操作相当于将项目根目录加入到路径列表当中
+
+
+
+# 注意事项
+
+- 确定入口程序, 没有一个锚定的路径就没有办法做相对路径的管理
+- 将项目根目录加入到入口程序当中
+- 进入到项目根目录下执行命令
+- 项目目录结构不要嵌套的太深
+- 脚本文件或者临时运行单个模块中的方法, 可以将根目录临时添加到**路径列表**当中
+
+# 课后作业
+
+- 用命令行启动`jd_crawler`
+- 在`/test`目录中增加`parser_test.py`模块做解析测试.
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/CSS-BeautifulSoup\345\205\203\347\264\240\345\256\232\344\275\215-9-2noteyun -0309\350\241\245.md" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/CSS-BeautifulSoup\345\205\203\347\264\240\345\256\232\344\275\215-9-2noteyun -0309\350\241\245.md"
new file mode 100644
index 0000000000000000000000000000000000000000..e5b75e91ba6c6f6880e3ba5ad765e1eaa8dcac6e
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/CSS-BeautifulSoup\345\205\203\347\264\240\345\256\232\344\275\215-9-2noteyun -0309\350\241\245.md"
@@ -0,0 +1,251 @@
+9-2noteyun
+
+# css-selector
+
+> 尽量避免解析路径中包含位置信息
+
+> chrome页面中内置了Jquery环境, 用$符号来表示
+
+## 直接定位元素
+
+- 通过id进行定位
+
+ ```python
+ $("#id值")
+ ```
+
+- 通过class进行定位
+
+ ```python
+ $(".class值")
+ ```
+
+- **通过属性名进行定位**
+
+ ```python
+ $("标签名[属性名='属性值']")
+
+ $("ul[class='gl-warp clearfix']")
+ ```
+
+## 获取兄弟节点
+
+- 获取当前节点的下一个节点
+
+ - dom提供的接口, 不属于css-selector语法
+
+ ```python
+ tmp = $("li[data-sku='6039832']")[0]
+ tmp.nextElementSibling#获取兄弟节点;右边一个商品
+ ```
+
+ - 通过css-selector(不建议)
+
+ ```python
+ $("ul[class='gl-warp clearfix'] li:first-child + li")
+ ```
+
+- 获取当前节点的上一个节点
+
+ - dom提供的接口, 不属于css-selector语法
+
+ ```python
+ tmp = $("li[data-sku='2136538']")[0]
+ tmp.previousElementSibling #获取兄弟节点;左边一个商品
+ ```
+
+## 获取父子节点
+
+- 获取父节点
+
+ - dom提供的接口, 不属于css-selector语法
+
+ ```python
+ tmp.parentElement
+ ```
+
+- 获取子节点
+
+ - 获取所有子节点
+
+ - **遍历**所有符合条件的元素
+
+ ```python
+ $("ul[class='gl-warp clearfix'] div[class='gl-i-wrap']")
+
+ ```
+
+ $("ul[class='gl-warp clearfix'] li[class='gl-item']")[0]
+
+ ```
+
+ - dom提供的接口, 不属于css-selector语法
+
+ ```python
+ $("ul[class='gl-warp clearfix']")[0].children
+ ```
+
+ 
+
+ - 获取第一个子节点
+
+ ```python
+ :fist-child
+ $("ul[class='gl-warp clearfix'] li:first-child")[0]
+ ```
+
+ - 获取最后一个子节点
+
+ ```python
+ :last-child
+ $("ul[class='gl-warp clearfix'] li:last-child")[0]
+ ```
+
+ - 获取第N个子节点
+
+ ```python
+ #:nth-child(索引) 获取第五个
+ $("ul[class='gl-warp clearfix'] li:nth-child(5)")[0]
+ ```
+
+ 
+
+# 模糊匹配
+
+- 匹配开头
+
+ `^`
+
+ ```python
+ # 匹配data-sku属性值为2开头的元素
+ $("li[data-sku^='2']")
+ ```
+
+ 
+
+- 匹配结尾
+
+ `$`
+
+ ```python
+ $("li[data-sku$='2']")
+ ```
+
+- 匹配子集
+
+ `*`
+
+ ```python
+ $("li[data-sku*='2']")
+ ```
+
+ 
+
+- 获取文本值
+
+ ```python
+ $("li[data-sku='6039832'] div[class='p-name p-name-type-2'] em")[0].innerText
+ ```
+
+ 
+
+ 
+
+
+
+- 获取属性值
+
+ ```python
+ $("ul[class='gl-warp clearfix'] li")[0].getAttribute("data-sku")
+ ```
+
+ 
+
+# BeautifulSoup
+
+- 安装
+
+ ```python
+ pip install bs4
+ pip install lxml
+ ```
+
+- 使用BeautifulSoup
+
+ ```python
+ from bs4 import BeautifulSoup
+
+
+ def jd_search_parse(html):
+ soup = BeautifulSoup(html, "lxml")
+ item = soup.select("li[data-sku='6039832']")[0]
+ ```
+
+- 直接定位元素
+
+ 略
+
+- 去除空白字符
+
+ ```python
+ html = html.replace('\r\n', "").replace("\n", "").replace("\t", "")
+ ```
+
+ 
+
+- 获取兄弟节点
+
+ - 获取上一个节点
+
+ ```python
+ tmp_ele.previous_sibling
+ ```
+
+ - 获取下一个节点
+
+ ```python
+ tmp_ele.next_sibling
+ ```
+
+- 获取父子节点
+
+ - 获取父节点
+
+ ```python
+ tmp_ele.parent
+ ```
+
+ - 获取子节点
+
+ ```python
+ tmp_ele.children
+ ```
+
+- 模糊匹配
+
+ 略
+
+- 获取文本值
+
+ ```python
+ content = tmp_ele.text.strip()
+ ```
+
+- 获取属性值
+
+ ```python
+ value = tmp_ele.attrs["data-sku"]
+ ```
+
+ 
+
+ 
+
+ 
+
+ debug:50分钟之后;
+
+# 课后作业
+
+- 练习css-selector
+- 练习用beautifulsoup进行页面解析
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/001.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/001.py"
new file mode 100644
index 0000000000000000000000000000000000000000..229d72edceb621b08c99bb6919500be82b3d178a
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/001.py"
@@ -0,0 +1,6 @@
+import sys
+print(sys.path)#打印路径列表
+# ['E:\\PycharmProjects\\train002_1231\\second-python-bootcamp\\第二期训练营\\5班\\5班_云\\第9周0222--0228\\9-3\\jd_crawler', 'E:\\PycharmProjects\\train002_1231\\second-python-bootcamp', 'E:\\PycharmProjects\\train002_1231\\second-python-bootcamp\\第二期训练营\\5班\\5班_云\\第9周0222--0228\\9-3\\jd_crawler', 'E:\\PycharmProjects\\train002_1231\\second-python-bootcamp\\第二期训练营\\5班\\5班_云\\第9周0222--0228\\9-3', 'D:\\bsoft\\ananaconda\\python37.zip', 'D:\\bsoft\\ananaconda\\DLLs', 'D:\\bsoft\\ananaconda\\lib', 'D:\\bsoft\\ananaconda', 'D:\\bsoft\\ananaconda\\lib\\site-packages', 'D:\\bsoft\\ananaconda\\lib\\site-packages\\win32', 'D:\\bsoft\\ananaconda\\lib\\site-packages\\win32\\lib', 'D:\\bsoft\\ananaconda\\lib\\site-packages\\Pythonwin']
+# sys.path.append(r"H:\PyCharmProjects\tutorials_2")#不灵活
+print("001",__name__)
+# sys.path.append(r"E:\PycharmProjects\Spider_Code\9-3")
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/jd_parser/detail.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/jd_parser/detail.py"
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/jd_parser/search.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/jd_parser/search.py"
new file mode 100644
index 0000000000000000000000000000000000000000..1d1c7de8d90ceee46722cdda79b5159b969ed987
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/jd_parser/search.py"
@@ -0,0 +1,39 @@
+from bs4 import BeautifulSoup
+import json
+
+def parse_jd_item(html):#解析器
+ result = []#列表,收集,解析的结果
+
+ soup = BeautifulSoup(html, "lxml")
+ print(soup)
+ item_array = soup.select("ul[class='gl-warp clearfix'] li[class='gl-item']")
+ # $("ul[class='gl-warp clearfix'] li[class='gl-item']")
+ for item in item_array:
+ try:
+ sku_id = item.attrs["data-sku"]
+ img = item.select("img[data-img='1']")
+ price = item.select("div[class='p-price']")
+ title = item.select("div[class='p-name p-name-type-2']")
+ shop = item.select("div[class='p-shop']")
+ icons = item.select("div[class='p-icons']")
+ # print(img)
+
+
+ img = img[0].attrs['data-lazy-img'] if img else ""
+ price = price[0].strong.i.text if price else ""
+ title = title[0].text.strip() if title else ""
+ shop = shop[0].a.attrs['title'] if shop[0].text.strip() else ""
+ icons = json.dumps([tag_ele.text for tag_ele in icons[0].select("i")]) if icons else '[]'
+
+ result.append((sku_id, img, price, title, shop, icons))#收集结果
+ except Exception as e:
+ print(e.args)
+ return result
+
+#
+# if __name__ == "__main__":
+# with open(r"..\\test\\search.html", "r", encoding="utf-8") as f:
+#
+# html = f.read()
+# result = parse_jd_item(html)
+# print(result)
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/main.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/main.py"
new file mode 100644
index 0000000000000000000000000000000000000000..b3056f5fb2dfbd7e29986db1f876daf392bd6cba
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/main.py"
@@ -0,0 +1,61 @@
+import random
+import pymysql
+import requests
+import sys
+sys.path.append(r"E:\PycharmProjects\Spider_Code\week9_3")
+sys.path.append(r"E:\PycharmProjects\Spider_Code\week9_3\jd_crawler")
+print(sys.path)
+
+from jd_crawler.jd_parser.search import parse_jd_item
+from jd_crawler.settings import MYSQL_CONF, HEADERS
+import sys
+print(sys.path)
+
+
+def saver(item_array):#保存到sql数据库
+ """
+ 持久化爬取结果
+ :param item_array:
+ :return:
+ """
+ cursor = mysql_con.cursor()#每次建立一个游标
+ SQL = """INSERT INTO jd_search(sku_id,img,price, title, shop, icons)
+ VALUES ( %s,%s, %s, %s, %s, %s)"""
+ cursor.executemany(SQL, item_array)
+ mysql_con.commit()#提交、入库
+ cursor.close()#关闭游标
+
+def donwloader(task):#下载器
+ """
+ 下载器
+ 请求目标网址的组件
+ :param task:
+ :return:
+ """
+ url = "https://search.jd.com/Search"
+ params = {
+ "keyword": task #关键词的列表
+ }
+ res = requests.get(url=url, params=params, headers=HEADERS, timeout=10,
+ # proxies={"https": f"https:144.255.48.62","http": f"http:144.255.48.62"}
+ )
+ return res
+
+
+def main(task_array):#main函数,调度器
+ """
+ 爬虫任务的调度
+ :return:
+ """
+ for task in task_array:
+ result = donwloader(task)#下载器
+ item_array = parse_jd_item(result.text)#解析器
+ print("GET ITEMS", item_array)#打印语句,检查执行的正确与否,及其过程
+ saver(item_array)
+
+
+if __name__ == "__main__":#入口
+ # 用来代替生产者
+ mysql_con = pymysql.connect(**MYSQL_CONF)
+ task_array = ["鼠标", "键盘", "显卡", "耳机"]
+ main(task_array)
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/settings.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/settings.py"
new file mode 100644
index 0000000000000000000000000000000000000000..d260ebbe81a4237833c331fedfe1fe8d34055935
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/settings.py"
@@ -0,0 +1,14 @@
+# 设置文件
+# 请求头
+HEADERS = {
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36",
+ # "upgrade-insecure-requests": "1"
+}
+
+# 配置
+MYSQL_CONF = {
+ "host": "127.0.0.1",
+ "user": "root",
+ "password": "123456",
+ "db": "py_class"
+}
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/test/parser_test.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/test/parser_test.py"
new file mode 100644
index 0000000000000000000000000000000000000000..779883b2bb5919b18abb6846b2bc2e9791fef706
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/test/parser_test.py"
@@ -0,0 +1,12 @@
+import sys
+sys.path.append(r"E:\PycharmProjects\Spider_Code\week9_3\jd_crawler")
+print(sys.path)
+
+from jd_crawler.jd_parser.search import parse_jd_item
+
+with open(r"..\\test\\search.html", "r", encoding="utf-8") as f:
+ html = f.read()
+ result = parse_jd_item(html)
+ print(result)
+
+ # with open(r"..\\test\\search.html", "r", encoding="utf-8") as f:
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/test/search.html" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/test/search.html"
new file mode 100644
index 0000000000000000000000000000000000000000..0f67859bf30f108ca01618bdfc3a29b171484be0
--- /dev/null
+++ "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/5\347\217\255/5\347\217\255_\344\272\221/\347\254\2549\345\221\2500222--0228/jd_crawler/test/search.html"
@@ -0,0 +1,6615 @@
+
+