本人初学python是菜鸟级,写的不好勿喷。
python爬虫用了比较简单的urllib.parse和requests,把爬来的数据显示在地图上。接下里我们话不多说直接上代码:
1.安装python环境和编辑器(自行度娘)
2.本人以58品牌公寓为例,爬取在杭州地区价格在2000-4000的公寓。
1
2
3
4
5
6
|
#-*- coding:utf-8 -*-
from bs4 import beautifulsoup
from urllib.parse import urljoin
import requests
import csv
import time
|
以上是需要引入的模块
1
2
3
|
url = "http://hz.58.com/pinpaigongyu/pn/{page}/?minprice=2000_4000"
#已完成的页数序号,初时为0
page = 0
|
以上的全局变量
1
2
|
csv_file = open (r "c:\users\****\desktop\houosenew.csv" , "a+" ,newline = '')
csv_writer = csv.writer(csv_file, delimiter = ',' )
|
自定义某个位置来保存爬取得数据,本人把爬取得数据保存为csv格式便于编辑(其中”a+”表示可以多次累加编辑在后面插入数据,建议不要使用“wb”哦!newline=”表示没有隔行)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
while true:
#为了防止网站屏蔽ip,设置了时间定时器每隔5秒爬一下。打完一局农药差不多都爬取过来了。
time.sleep( 5 )
page + = 1
#替换url中page变量
print (url. format (page = page) + "ok" )
response = requests.get(url. format (page = page))
html = beautifulsoup(response.text)
#寻找html中dom节点li
house_list = html.select( ".list > li" )
# 循环在读不到新的房源时结束
if not house_list:
break
for house in house_list:
#根据hml的dom节点获取自己需要的数据
house_title = house.select( "h2" )[ 0 ].string
house_url = urljoin(url, house.select( "a" )[ 0 ][ "href" ])
house_pic = urljoin(url, house.select( "img" )[ 0 ][ "lazy_src" ])
house_info_list = house_title.split()
# 如果第一列是公寓名 则取第二列作为地址
if "公寓" in house_info_list[ 0 ] or "青年社区" in house_info_list[ 0 ]:
house_location = house_info_list[ 0 ]
else :
house_location = house_info_list[ 1 ]
house_money = house.select( ".money" )[ 0 ].select( "b" )[ 0 ].string
csv_writer.writerow([house_title, house_location, house_money,house_pic ,house_url])
#最后不要忘记关闭节流
csv_file.close()
|
如果网站屏蔽了你的ip,你可以做一个ip地址数组放在http的头部具体度娘一下吧。
接下来我们写html
只是简单的写了一下写的不好见谅。用的是高德地图,具体的js api可以到高德开发者上去看。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
|
<body>
<div id = "container" >< / div>
<div class = "control-panel" >
<div class = "control-entry" >
<label>选择工作地点:< / label>
<div class = "control-input" >
< input id = "work-location" type = "text" >
< / div>
< / div>
<div class = "control-entry" >
<label>选择通勤方式:< / label>
<div class = "control-input" >
< input type = "radio" name = "vehicle" value = "subway,bus" onclick = "takebus(this)" checked / > 公交 + 地铁
< input type = "radio" name = "vehicle" value = "subway" onclick = "takesubway(this)" / > 地铁
< input type = "radio" name = "vehicle" value = "walk" onclick = "takewalk(this)" / > 走路
< input type = "radio" name = "vehicle" value = "bike" onclick = "takebike(this)" / > 骑车
< / div>
< / div>
<div class = "control-entry" >
<label>导入房源文件:< / label>
<div class = "control-input" >
< input type = "file" name = "file" id = "filecsv" / >
<button style = "margin-top: 10px;width: 50%;" onclick = "changecsv()" >开始< / button>
< / div>
< / div>
< / div>
<div id = "transfer-panel" >< / div>
<script>
var map = new amap. map ( "container" , {
resizeenable: true,
zoomenable: true,
center: [ 120.1256856402492 , 30.27289264553506 ],
zoom: 12
});
/ / 添加标尺
var scale = new amap.scale();
map .addcontrol(scale);
/ / 公交到达圈对象
var arrivalrange = new amap.arrivalrange();
/ / 经度,纬度,时间(用不到),通勤方式(默认是地铁+公交 + 走路 + 骑车)
var x, y, t, vehicle = "subway,bus" ;
/ / 工作地点,工作标记
var workaddress, workmarker;
/ / 房源标记队列
var rentmarkerarray = [];
/ / 多边形队列,存储公交到达的计算结果
var polygonarray = [];
/ / 路径规划
var amaptransfer;
/ / 信息窗体对象
var infowindow = new amap.infowindow({
offset: new amap.pixel( 0 , - 30 )
});
/ / 地址补完的使用
var auto = new amap.autocomplete({
/ / 通过 id 指定输入元素
input : "work-location"
});
/ / 添加事件监听,在选择补完的地址后调用worklocationselected
amap.event.addlistener(auto, "select" , worklocationselected);
function takebus(radio) {
vehicle = radio.value;
loadworklocation()
}
function takesubway(radio) {
vehicle = radio.value;
loadworklocation()
}
function takewalk(radio){
vehicle = radio.value;
loadworklocation()
}
function takebike(radio) {
vehicle = radio.value;
loadworklocation()
}
/ / 获取加载的文件
function changecsv() {
$( "#filecsv" ).csv2arr(function (res) {
$.each(res, function (k, p) {
if (res[k][ 1 ]) {
/ / addmarkerbyaddress(地址,价格,展示的图片)
addmarkerbyaddress(res[k][ 1 ], res[k][ 2 ],res[k][ 3 ])
}
})
});
}
function worklocationselected(e) {
workaddress = e.poi.name;
loadworklocation();
}
function loadworkmarker(x, y, locationname) {
workmarker = new amap.marker({
map : map ,
title: locationname,
icon: 'http://webapi.amap.com/theme/v1.3/markers/n/mark_r.png' ,
position: [x, y]
});
}
function loadworkrange(x, y, t, color, v) {
arrivalrange.search([x, y], t, function (status, result) {
if (result.bounds) {
for (var i = 0 ; i < result.bounds.length; i + + ) {
/ / 新建多边形对象
var polygon = new amap.polygon({
map : map ,
fillcolor: color,
fillopacity: "0.4" ,
strokecolor: color,
strokeopacity: "0.8" ,
strokeweight: 1
});
/ / 得到到达圈的多边形路径
polygon.setpath(result.bounds[i]);
polygonarray.push(polygon);
}
}
}, {
policy: v
});
}
function addmarkerbyaddress(address, money,imgurl) {
var geocoder = new amap.geocoder({
city: "杭州" ,
radius: 1000
});
geocoder.getlocation(address, function (status, result) {
var iconvalue = "";
var _money = money;
if (money.indexof( "-" ) > - 1 ) {
_money = money.split( "-" )[ 1 ];
}
/ / 如果价格高于 3000 元 / 月在地图上显示红色,低于的话显示蓝色
if (parsefloat(_money) > 3000 ) {
iconvalue = "http://webapi.amap.com/theme/v1.3/markers/n/mark_r.png" ;
} else {
iconvalue = "http://webapi.amap.com/theme/v1.3/markers/n/mark_b.png" ;
}
if (status = = = "complete" && result.info = = = 'ok' ) {
var geocode = result.geocodes[ 0 ];
rentmarker = new amap.marker({
map : map ,
title: address,
icon:iconvalue,
animation: "amap_animation_drop" ,
position: [geocode.location.getlng(), geocode.location.getlat()]
})
;
rentmarkerarray.push(rentmarker);
/ / 鼠标点击标记显示相应的内容
rentmarker.content = "<img src='" + imgurl + "'/><div>房源:<a target = '_blank' href='http://bj.58.com/pinpaigongyu/?key=" + address + "'>" + address + "</a><p>价格:" + money + "</p><div>"
rentmarker.on( 'click' , function (e) {
infowindow.setcontent(e.target.content);
infowindow. open ( map , e.target.getposition());
if (amaptransfer) amaptransfer.clear();
amaptransfer = new amap.transfer({
map : map ,
policy: amap.transferpolicy.least_time,
city: "杭州市" ,
panel: 'transfer-panel'
});
amaptransfer.search([{
keyword: workaddress
}, {
keyword: address
}], function (status, result) {
})
});
}
})
}
function delworklocation() {
if (polygonarray) map .remove(polygonarray);
if (workmarker) map .remove(workmarker);
polygonarray = [];
}
function delrentlocation() {
if (rentmarkerarray) map .remove(rentmarkerarray);
rentmarkerarray = [];
}
function loadworklocation() {
/ / 首先清空地图上已有的到达圈
delworklocation();
var geocoder = new amap.geocoder({
city: "杭州" ,
radius: 1000
});
geocoder.getlocation(workaddress, function (status, result) {
if (status = = = "complete" && result.info = = = 'ok' ) {
var geocode = result.geocodes[ 0 ];
x = geocode.location.getlng();
y = geocode.location.getlat();
/ / 加载工作地点标记
loadworkmarker(x, y);
/ / 加载 60 分钟内工作地点到达圈
loadworkrange(x, y, 60 , "#3f67a5" , vehicle);
/ / 地图移动到工作地点的位置
map .setzoomandcenter( 12 , [x, y]);
}
})
}
< / script>
< / body>
|
想要获取完整的代码github:https://github.com/divibear/pythondemo.git
新手上路,老司机们勿喷!
以上所述是小编给大家介绍的python爬虫租房信息在地图上显示的方法详解整合,希望对大家有所帮助,如果大家有任何疑问请给我留言,小编会及时回复大家的。在此也非常感谢大家对服务器之家网站的支持!
原文链接:https://blog.csdn.net/qq_36091581/article/details/76944053