From 1dab5cb51f5d66c888920e465d8b9856e9ee3eb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?4=E7=8F=AD=E5=8A=A9=E6=95=99=20=7C=20=E5=BA=B7=E5=BA=B7?=
 <wuzikang1998@gmail.com>
Date: Mon, 8 Mar 2021 17:02:51 +0800
Subject: [PATCH] =?UTF-8?q?Revert=20'Pull=20Request=20!741=20:=20=E7=AC=AC?=
 =?UTF-8?q?=E5=8D=81=E5=91=A8=5F=E7=AC=AC=E4=BA=8C=E8=8A=82=20=E7=AC=AC?=
 =?UTF-8?q?=E4=B8=80=E4=B8=AA=20scrapy=20=E7=88=AC=E8=99=AB=E9=A1=B9?=
 =?UTF-8?q?=E7=9B=AE'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../.keep"                                    |   0
 .../items.py"                                 |  18 ---
 .../jd_search_spider.py"                      |  52 ---------
 .../middlewares.py"                           | 110 ------------------
 .../pipelines.py"                             |  32 -----
 .../proxymiddleware.txt"                      |  25 ----
 .../run.py"                                   |   4 -
 .../settings.py"                              | 103 ----------------
 .../.keep"                                    |   0
 .../Scrapy.md"                                |  83 -------------
 .../quotes_spider.py"                         |  20 ----
 11 files changed, 447 deletions(-)
 delete mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/.keep"
 delete mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/items.py"
 delete mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/jd_search_spider.py"
 delete mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/middlewares.py"
 delete mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/pipelines.py"
 delete mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/proxymiddleware.txt"
 delete mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/run.py"
 delete mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/settings.py"
 delete mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\272\214\350\212\202/.keep"
 delete mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\272\214\350\212\202/Scrapy.md"
 delete mode 100644 "\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\272\214\350\212\202/quotes_spider.py"

diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/.keep" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/.keep"
deleted file mode 100644
index e69de29b..00000000
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/items.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/items.py"
deleted file mode 100644
index b72f7a4c..00000000
--- "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/items.py"
+++ /dev/null
@@ -1,18 +0,0 @@
-# Define here the models for your scraped items
-#
-# See documentation in:
-# https://docs.scrapy.org/en/latest/topics/items.html
-
-import scrapy
-
-
-class JdSearchItem(scrapy.Item):
-    # define the fields for your item here like:
-    # name = scrapy.Field()
-    sku_id = scrapy.Field()
-    img = scrapy.Field()
-    price = scrapy.Field()
-    title = scrapy.Field()
-    shop = scrapy.Field()
-    icons = scrapy.Field()
-
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/jd_search_spider.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/jd_search_spider.py"
deleted file mode 100644
index 87b1a945..00000000
--- "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/jd_search_spider.py"
+++ /dev/null
@@ -1,52 +0,0 @@
-import scrapy
-from bs4 import BeautifulSoup
-import json
-from W10_L3.jd_search.jd_search.items import JdSearchItem
-
-
-class JdSearchSpider(scrapy.Spider):
-    name = "jd_search"
-
-    def start_requests(self):
-        search_array = ["手机", "电脑", "显卡", "内存"]
-        for keyword in search_array:
-            for page in range(1, 4):
-                url = f'https://search.jd.com/Search?keyword={keyword}&page={page}'
-
-                yield scrapy.FormRequest(
-                    url=url,
-                    method='GET',
-                    callback=self.parse_search
-                )
-
-
-    def parse_search(self, response):
-        html = response.text
-        soup = BeautifulSoup(html, 'lxml')
-        content = soup.select("ul[class='gl-warp clearfix'] li[class='gl-item']")
-        for item in content:
-            try:
-                sku_id = item.attrs["data-sku"]
-                img = item.select("img[data-img='1']")
-                price = item.select("div[class='p-price']")
-                title = item.select("div[class='p-name p-name-type-2'] em")
-                shop = item.select("div[class='p-shop']")
-                icons = item.select("div[class='p-icons']")
-
-                img = img[0].attrs['data-lazy-img'] if img else ""
-                price = price[0].strong.i.text.strip() if price else ""
-                title = title[0].text.strip() if title else ""
-                shop = shop[0].text.strip() if shop else ""
-                icons = json.dumps([ele.text.strip() for ele in icons[0].select('i')]) if icons else '[]'
-
-                items = JdSearchItem()
-                items["sku_id"] = sku_id
-                items["img"] = img
-                items["price"] = price
-                items["title"] = title
-                items["shop"] = shop
-                items["icons"] = icons
-                yield items
-
-            except Exception as e:
-                print(e.args)
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/middlewares.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/middlewares.py"
deleted file mode 100644
index 1c8b8fdc..00000000
--- "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/middlewares.py"
+++ /dev/null
@@ -1,110 +0,0 @@
-# Define here the models for your spider middleware
-#
-# See documentation in:
-# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
-
-from scrapy import signals
-
-# useful for handling different item types with a single interface
-from itemadapter import is_item, ItemAdapter
-
-
-class JdSearchSpiderMiddleware:
-    # Not all methods need to be defined. If a method is not defined,
-    # scrapy acts as if the spider middleware does not modify the
-    # passed objects.
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        # This method is used by Scrapy to create your spiders.
-        s = cls()
-        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
-        return s
-
-    def process_spider_input(self, response, spider):
-        # Called for each response that goes through the spider
-        # middleware and into the spider.
-
-        # Should return None or raise an exception.
-        return None
-
-    def process_spider_output(self, response, result, spider):
-        # Called with the results returned from the Spider, after
-        # it has processed the response.
-
-        # Must return an iterable of Request, or item objects.
-        for i in result:
-            yield i
-
-    def process_spider_exception(self, response, exception, spider):
-        # Called when a spider or process_spider_input() method
-        # (from other spider middleware) raises an exception.
-
-        # Should return either None or an iterable of Request or item objects.
-        pass
-
-    def process_start_requests(self, start_requests, spider):
-        # Called with the start requests of the spider, and works
-        # similarly to the process_spider_output() method, except
-        # that it doesn’t have a response associated.
-
-        # Must return only requests (not items).
-        for r in start_requests:
-            yield r
-
-    def spider_opened(self, spider):
-        spider.logger.info('Spider opened: %s' % spider.name)
-
-
-class JdSearchDownloaderMiddleware:
-    # Not all methods need to be defined. If a method is not defined,
-    # scrapy acts as if the downloader middleware does not modify the
-    # passed objects.
-
-    @classmethod
-    def from_crawler(cls, crawler):
-        # This method is used by Scrapy to create your spiders.
-        s = cls()
-        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
-        return s
-
-    def process_request(self, request, spider):
-        # Called for each request that goes through the downloader
-        # middleware.
-
-        # Must either:
-        # - return None: continue processing this request
-        # - or return a Response object
-        # - or return a Request object
-        # - or raise IgnoreRequest: process_exception() methods of
-        #   installed downloader middleware will be called
-        return None
-
-    def process_response(self, request, response, spider):
-        # Called with the response returned from the downloader.
-
-        # Must either;
-        # - return a Response object
-        # - return a Request object
-        # - or raise IgnoreRequest
-        return response
-
-    def process_exception(self, request, exception, spider):
-        # Called when a download handler or a process_request()
-        # (from other downloader middleware) raises an exception.
-
-        # Must either:
-        # - return None: continue processing this exception
-        # - return a Response object: stops process_exception() chain
-        # - return a Request object: stops process_exception() chain
-        pass
-
-    def spider_opened(self, spider):
-        spider.logger.info('Spider opened: %s' % spider.name)
-
-
-class JdSearchUAMiddleware:
-
-    def process_request(self, request, spider):
-        # This method is used by Scrapy to add user agent headers.
-        request.headers["user-agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/pipelines.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/pipelines.py"
deleted file mode 100644
index 79f7f7c3..00000000
--- "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/pipelines.py"
+++ /dev/null
@@ -1,32 +0,0 @@
-# Define your item pipelines here
-#
-# Don't forget to add your pipeline to the ITEM_PIPELINES setting
-# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
-
-
-# useful for handling different item types with a single interface
-from itemadapter import ItemAdapter
-import pymysql
-from W10_L3.jd_search.jd_search.items import JdSearchItem
-
-
-class JdSearchPipeline:
-
-    def __init__(self):
-        self.mysql_con = None
-
-    def process_item(self, item, spider):
-        if not self.mysql_con:
-            self.mysql_con = pymysql.connect(**spider.settings['MYSQL_CONF'])
-
-        if isinstance(item, JdSearchItem):
-            cursor = self.mysql_con.cursor()
-            SQL = """INSERT INTO jd_search(sku_id, img, price, title, shop, icons)
-                         VALUES ('{}', '{}', '{}', '{}', '{}', '{}')""".format(
-                item['sku_id'], item['img'], item['price'], item['title'], item['shop'], item['icons']
-            )
-            cursor.execute(SQL)
-            self.mysql_con.commit()
-            cursor.close()
-
-        return item
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/proxymiddleware.txt" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/proxymiddleware.txt"
deleted file mode 100644
index 3d030072..00000000
--- "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/proxymiddleware.txt"
+++ /dev/null
@@ -1,25 +0,0 @@
-
-use HttpProxyMiddleware
-"""
-This middleware sets the HTTP proxy to use for requests, by setting the proxy meta value for Request objects.
-You can also set the meta key proxy per-request,
-a value like http://some_proxy_server:port or http://username:password@some_proxy_server:port
-"""
-
-1. enable HttpProxyMiddleware in settings.py
-
-DOWNLOADER_MIDDLEWARES = {
-   #'jd_search.middlewares.JdSearchDownloaderMiddleware': 543,
-    'jd_search.middlewares.JdSearchUAMiddleware': 100,
-    'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 100
-}
-
-2. add meta in request
-yield scrapy.FormRequest(
-                    url=url,
-                    method='GET',
-                    meta={
-                        'proxy': 'http://proxy_ip:port'
-                    },
-                    callback=self.parse_search
-                )
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/run.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/run.py"
deleted file mode 100644
index bb7a2068..00000000
--- "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/run.py"
+++ /dev/null
@@ -1,4 +0,0 @@
-from scrapy import cmdline as cmd
-
-command = "scrapy crawl jd_search".split()
-cmd.execute(command)
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/settings.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/settings.py"
deleted file mode 100644
index 00c25bf3..00000000
--- "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\270\211\350\212\202/settings.py"
+++ /dev/null
@@ -1,103 +0,0 @@
-# Scrapy settings for jd_search project
-#
-# For simplicity, this file contains only settings considered important or
-# commonly used. You can find more settings consulting the documentation:
-#
-#     https://docs.scrapy.org/en/latest/topics/settings.html
-#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
-#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
-
-BOT_NAME = 'jd_search'
-
-SPIDER_MODULES = ['jd_search.spiders']
-NEWSPIDER_MODULE = 'jd_search.spiders'
-
-
-# Crawl responsibly by identifying yourself (and your website) on the user-agent
-#USER_AGENT = 'jd_search (+http://www.yourdomain.com)'
-
-# Obey robots.txt rules
-ROBOTSTXT_OBEY = False
-
-# Configure maximum concurrent requests performed by Scrapy (default: 16)
-#CONCURRENT_REQUESTS = 32
-
-# Configure a delay for requests for the same website (default: 0)
-# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
-# See also autothrottle settings and docs
-#DOWNLOAD_DELAY = 3
-# The download delay setting will honor only one of:
-#CONCURRENT_REQUESTS_PER_DOMAIN = 16
-#CONCURRENT_REQUESTS_PER_IP = 16
-
-# Disable cookies (enabled by default)
-#COOKIES_ENABLED = False
-
-# Disable Telnet Console (enabled by default)
-#TELNETCONSOLE_ENABLED = False
-
-# Override the default request headers:
-#DEFAULT_REQUEST_HEADERS = {
-#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-#   'Accept-Language': 'en',
-#}
-
-# Enable or disable spider middlewares
-# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
-#SPIDER_MIDDLEWARES = {
-#    'jd_search.middlewares.JdSearchSpiderMiddleware': 543,
-#}
-
-# Enable or disable downloader middlewares
-# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
-DOWNLOADER_MIDDLEWARES = {
-   #'jd_search.middlewares.JdSearchDownloaderMiddleware': 543,
-    'jd_search.middlewares.JdSearchUAMiddleware': 100,
-    #'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 100
-}
-
-# Enable or disable extensions
-# See https://docs.scrapy.org/en/latest/topics/extensions.html
-#EXTENSIONS = {
-#    'scrapy.extensions.telnet.TelnetConsole': None,
-#}
-
-# Configure item pipelines
-# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
-ITEM_PIPELINES = {
-   'jd_search.pipelines.JdSearchPipeline': 300,
-}
-
-# Enable and configure the AutoThrottle extension (disabled by default)
-# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
-#AUTOTHROTTLE_ENABLED = True
-# The initial download delay
-#AUTOTHROTTLE_START_DELAY = 5
-# The maximum download delay to be set in case of high latencies
-#AUTOTHROTTLE_MAX_DELAY = 60
-# The average number of requests Scrapy should be sending in parallel to
-# each remote server
-#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
-# Enable showing throttling stats for every response received:
-#AUTOTHROTTLE_DEBUG = False
-
-# Enable and configure HTTP caching (disabled by default)
-# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
-#HTTPCACHE_ENABLED = True
-#HTTPCACHE_EXPIRATION_SECS = 0
-#HTTPCACHE_DIR = 'httpcache'
-#HTTPCACHE_IGNORE_HTTP_CODES = []
-#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
-
-# MYSQL CONF
-MYSQL_CONF = {
-    'host': '127.0.0.1',
-    'port': 3306,
-    'user': 'root',
-    'password': 'wxx33043',
-    'db': 'jd_search'
-}
-
-# Log setting
-LOG_FILE = "C:\\Users\\chaos\\Desktop\\scrapy_log\\execution.log"
-LOG_LEVEL = "DEBUG"
\ No newline at end of file
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\272\214\350\212\202/.keep" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\272\214\350\212\202/.keep"
deleted file mode 100644
index e69de29b..00000000
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\272\214\350\212\202/Scrapy.md" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\272\214\350\212\202/Scrapy.md"
deleted file mode 100644
index ea17cf2b..00000000
--- "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\272\214\350\212\202/Scrapy.md"
+++ /dev/null
@@ -1,83 +0,0 @@
-# Scrapy
-
-## Architecture
-
-### Data flow
-
-![](https://docs.scrapy.org/en/latest/_images/scrapy_architecture_02.png)
-
-Scrapy 中的数据流是由执行引擎控制的，流程如下：
-
-1. `Engine` 从 `Spider` 拿到要爬取的最初的请求
-2. `Engine` 在 `Scheduler` 中调度请求，并询问下一个要爬取的请求
-3. `Scheduler` 返回给 `Engine` 下一个请求
-4. `Engine` 通过 `Downloader Middleware` 发送给 `Downloader` 请求
-5. 页面完成下载之后，`Downloader` 通过 `Downloader Middleware` 返回给 `Engine` 响应
-6. `Engine` 拿到响应之后通过 `Spider Middleware` 发送个 `Spider`，供其解析
-7. `Spider` 解析完成后，通过 `Spider Middleware` 发送给 `Engine`  scraped items 和一个新的请求
-8. `Engine` 将 scraped item 发送给 `Item Pipelines`，然后发送那个新的请求到 `Scheduler` 并且询问可能的下一次爬取请求
-9. 重复上述步骤，直到不再有来自 `Scheduler` 的请求
-
-### Event-driven networking frame
-
-Scrapy is written with Twisted. a popular event-driven networking framework for Python. Thus, it’s implemented using a non-blocking (aka **`asynchronous`** ) code for concurrency. （异步并发）
-
-## Scrapy Tutorial
-
-### Creating a project
-
-```
-scrapy startproject <project_name>
-```
-
-目录结构
-
-```
-tutorial/
-|___scrapy.cfg    		# 部署的配置文件
-|___tutorial/
-	|___ __init__.py
-	|___ items.py		# items 定义文件
-	|___ middleware.py	# 中间件定义文件
-	|___ pipelines.py	# pipelines 定义文件
-	|___ settings.py	# settings 文件
-	|___ spiders/		# spiders 目录
-		 |___ __init__.py
-```
-
-### First Spider
-
-under spiders directory create a new quotes_spider.py
-
-```python
-import scrapy
-
-
-class QuotesSpider(scrapy.Spider):
-    name = "quotes"
-
-    def start_requests(self):
-        urls = [
-            'http://quotes.toscrape.com/page/1/',
-            'http://quotes.toscrape.com/page/2/',
-        ]
-        for url in urls:
-            yield scrapy.Request(url=url, callback=self.parse)
-
-    def parse(self, response):
-        page = response.url.split("/")[-2]
-        filename = f'quotes-{page}.html'
-        with open(filename, 'wb') as f:
-            f.write(response.body)
-        self.log(f'Saved file {filename}')
-```
-
-### Run our spider
-
-```
-# 在 project 的顶层目录执行下面的命令
-scrapy crawl <name>
-```
-
-### Extracting data
-
diff --git "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\272\214\350\212\202/quotes_spider.py" "b/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\272\214\350\212\202/quotes_spider.py"
deleted file mode 100644
index 2d058723..00000000
--- "a/\347\254\254\344\272\214\346\234\237\350\256\255\347\273\203\350\220\245/2\347\217\255/2\347\217\255_chaos/\347\254\254\345\215\201\345\221\250_\347\254\254\344\272\214\350\212\202/quotes_spider.py"
+++ /dev/null
@@ -1,20 +0,0 @@
-import scrapy
-
-
-class QuoteSpider(scrapy.Spider):
-    name = "quotes"
-
-    def start_requests(self):
-        urls = [
-            'http://quotes.toscrape.com/page/1/',
-            'http://quotes.toscrape.com/page/2/',
-        ]
-        for url in urls:
-            yield scrapy.Request(url=url, callback=self.parse)
-
-    def parse(self, response):
-        page = response.split("/")[-2]
-        filename = f'quote_{page}.html'
-        with open(filename, 'wb') as f:
-            f.write(response.body)
-        self.log(f'Saved file {filename}')
-- 
Gitee