中国地震台网数据爬取

地震台网

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import scrapy
import re

from scrapy import Request
from urllib import parse

from SpiderDemo.items import SpiderdemoItem
class CeicSpider(scrapy.Spider):
name = 'ceic'
allowed_domains = ['www.ceic.ac.cn/speedsearch?time=6']
start_urls = ['http://www.ceic.ac.cn/speedsearch?time=6/']

def parse(self, response):
post_nodes = response.xpath('//*[@align="left"]/a/@href').extract()
for post_node in post_nodes:
yield Request(url=parse.urljoin(response.url,post_node),callback=self.parse_detail,dont_filter=True)

next_url = response.xpath('//*[@id="paging"]/div/div/ul/li[last()-1]/a/@href').extract()[0]
if next_url:
yield Request(url=parse.urljoin(response.url,next_url),callback=self.parse,dont_filter=True)


def parse_detail(self,response):

spider_item = SpiderdemoItem()


time = response.xpath('//*[@valign="middle"]/text()').extract()[1].strip()
latitude = response.xpath('//*[@valign="middle"]/text()').extract()[3].replace("°","")
longitude = response.xpath('//*[@valign="middle"]/text()').extract()[5].replace("°","")
depth = response.xpath('//*[@valign="middle"]/text()').extract()[7].replace("千米","")
level = response.xpath('//*[@valign="middle"]/text()').extract()[9]
area = response.xpath('//*[@valign="middle"]/text()').extract()[11]


spider_item["time"] = time
spider_item["latitude"] = latitude
spider_item["longitude"] = longitude
spider_item["depth"] = depth
spider_item["level"] = level
spider_item["area"] = area

return spider_item

Powered by Hexo and Hexo-theme-hiker

Copyright © 2013 - 2021 Inner peace All Rights Reserved.

UV : | PV :