4.1 find简介
MongoDB中使用find来进行查询。查询就是返回一个集合中文档的子集,子集合的范围从0个文档到整个集合。find的第一个参数决定了要返回哪些文档,其形式也是一个文档,说明要执行的查询细节。
空的查询文档{}会匹配集合的全部内容。要是不指定查询文档,默认就是{}
例如:
db.c.find()
将返回集合c中的所有内容。当我们开始向查询文档中添加键/值对时,就意味着限定了查找的条件。例如:
想要查找所有"age"的值为27的文档,直接将这样的键/值对写进查询文档就好了:
> db.users.find() { "_id" : ObjectId("503b08dea53a93158463f02e"), "emails" : [ "joe@gmail.com" ], "relationships" : { "friends" : 32, "enemies" : 23 }, "username" : "joe" } { "_id" : ObjectId("503b0d70a53a93158463f02f"), "name" : "joe1", "age" : 65 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "name" : "joe1", "age" : 20 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "name" : "joe1", "age" : 49 } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "location" : "Wisconsin", "name" : "joe", "sex" : "male" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "username" : "joe" }
> db.users.find({"age":20}) { "_id" : ObjectId("503b0d78a53a93158463f030"), "name" : "joe1", "age" : 20 }
> db.users.update({"age":20},{"$set":{"age":27}},true,true) > db.users.find({"age":27}) { "_id" : ObjectId("503b0d78a53a93158463f030"), "name" : "joe1", "age" : 27 } > db.users.find() { "_id" : ObjectId("503b08dea53a93158463f02e"), "emails" : [ "joe@gmail.com" ], "relationships" : { "friends" : 32, "enemies" : 23 }, "username" : "joe" } { "_id" : ObjectId("503b0d70a53a93158463f02f"), "name" : "joe1", "age" : 65 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "name" : "joe1", "age" : 27 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "name" : "joe1", "age" : 49 } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "location" : "Wisconsin", "name" : "joe", "sex" : "male" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "username" : "joe" }
要想匹配一个字符串,比如值为"joe"的"username"键,那么直接写就好了:
> db.users.find({"username":"joe"}) { "_id" : ObjectId("503b08dea53a93158463f02e"), "emails" : [ "joe@gmail.com" ], "relationships" : { "friends" : 32, "enemies" : 23 }, "username" : "joe" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "username" : "joe" }
可以通过向查询文档加入多个键/值对的方式来将多个查询条件组合在一起,会解释成”条件1 AND 条件2 AND ... 条件N“。例如,要想查询所有用户名为"joe"且年龄为27岁的用户,可以像下面这样:
> db.users.update({"_id" : ObjectId("503b08dea53a93158463f02e")},{"$set":{"age":27}}); > db.users.find(); { "_id" : ObjectId("503b08dea53a93158463f02e"), "age" : 27, "emails" : [ "joe@gmail.com" ], "relationships" : { "friends" : 32, "enemies" : 23 }, "username" : "joe" } { "_id" : ObjectId("503b0d70a53a93158463f02f"), "name" : "joe1", "age" : 65 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "name" : "joe1", "age" : 27 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "name" : "joe1", "age" : 49 } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "location" : "Wisconsin", "name" : "joe", "sex" : "male" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "username" : "joe" }
> db.users.find({"username":"joe","age":27});
{ "_id" : ObjectId("503b08dea53a93158463f02e"), "age" : 27, "emails" : [ "joe@gmail.com" ], "relationships" : { "friends" : 32, "enemies" : 23 }, "username" : "joe" }
4.1.1 指定返回的键
有时并不需要将文档中所有键/值对都返回。遇到这种情况,可以通过find(或者findOne)的第二个参数来指定想要的键。
例如,如果只对用户集合的"username"和"email"键感兴趣,可以使用如下查询返回这些键:
> db.users.find(); { "_id" : ObjectId("503b08dea53a93158463f02e"), "emails" : [ "joe@gmail.com" ], "relationships" : { "friends" : 32, "enemies" : 23 }, "username" : "joe" } { "_id" : ObjectId("503b0d70a53a93158463f02f"), "name" : "joe1", "age" : 65 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "name" : "joe1", "age" : 20 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "name" : "joe1", "age" : 49 } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "location" : "Wisconsin", "name" : "joe", "sex" : "male" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "username" : "joe" } > db.users.find({},{"name":1}); { "_id" : ObjectId("503b08dea53a93158463f02e") } { "_id" : ObjectId("503b0d70a53a93158463f02f"), "name" : "joe1" } { "_id" : ObjectId("503b0d78a53a93158463f030"), "name" : "joe1" } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "name" : "joe1" } { "_id" : ObjectId("503b338da53a93158463f033"), "name" : "joe" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b") } > db.users.find({},{"name":1,"_id":0}); { } { "name" : "joe1" } { "name" : "joe1" } { "name" : "joe1" } { "name" : "joe" } { } >
可以看到”_id“这个键总是被返回,即便是没有指定也一样。
也可以用第二个参数来剔除查询结果中的某个键/值对。例如,文档中有很多键,但是不希望结果中含有"fatal_weakness"键:
db.users.find({},{"fatal_weakness":0})
也可以用来防止返回"_id"
> db.users.find(); { "_id" : ObjectId("503b08dea53a93158463f02e"), "age" : 27, "emails" : [ "joe@gmail.com" ], "relationships" : { "friends" : 32, "enemies" : 23 }, "username" : "joe" } { "_id" : ObjectId("503b0d70a53a93158463f02f"), "name" : "joe1", "age" : 65 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "name" : "joe1", "age" : 27 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "name" : "joe1", "age" : 49 } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "location" : "Wisconsin", "name" : "joe", "sex" : "male" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "username" : "joe" }
> db.users.find({"username":"joe"});
{ "_id" : ObjectId("503b08dea53a93158463f02e"), "age" : 27, "emails" : [ "joe@gmail.com" ], "relationships" : { "friends" : 32, "enemies" : 23 }, "username" : "joe" }
{ "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "username" : "joe" }
> db.users.find({"username":"joe","age":27});
{ "_id" : ObjectId("503b08dea53a93158463f02e"), "age" : 27, "emails" : [ "joe@gmail.com" ], "relationships" : { "friends" : 32, "enemies" : 23 }, "username" : "joe" }
> db.users.find({},{"age":1,"_id":0})
{ "age" : 27 }
{ "age" : 65 }
{ "age" : 27 }
{ "age" : 49 }
{ "age" : 30 }
{ }
4.1.2 限制 (疑惑)
查询使用上还是有些限制的。数据库所关心的查询文档的值必须是常量。
4.2 查询条件
4.2.1 查询条件
"$lt" 、"$lte"、 "$gt" 、"$gte"就是全部的比较操作符,分别对应< 、<=、>和>=
可以将其组合起来以便查找一个范围的值。例如,查询在18~30岁(含)的用户,就可以像下面这样
> db.users.find({"age":{"$gte":18,"$lte":30}},{"age":1,"_id":0}) { "age" : 27 } { "age" : 27 } { "age" : 30 }
这样的范围查询对日期尤为有用。例如,要查找在2007年1月1日前注册的人,可以像下面这样
start = new Date("01/01/2007")
db.users.find({"registered":{"$lt":start}})
> db.users.update({"username":"joe"},{"$set":{"registered":new Date("01/01/2007")}},true,true) > db.users.find({"username":"joe"}) { "_id" : ObjectId("503b08dea53a93158463f02e"), "age" : 27, "emails" : [ "joe@gmail.com" ], "registered" : ISODate("2006-12-31T16:00:00Z"), "relationships" : { "friends" : 32, "enemies" : 23 }, "username" : "joe" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "registered" : ISODate("2006-12-31T16:00:00Z"), "username" : "joe" }
> db.users.find({"registered":{"$lte":new Date("01/01/2007")}}); { "_id" : ObjectId("503b08dea53a93158463f02e"), "age" : 27, "emails" : [ "joe@gmail.com" ], "registered" : ISODate("2006-12-31T16:00:00Z"), "relationships" : { "friends" : 32, "enemies" : 23 }, "username" : "joe" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "registered" : ISODate("2006-12-31T16:00:00Z"), "username" : "joe" }
精确匹配日期是徒劳的,因为日期只精确到毫秒。通常只是想得到关于一天、一周或者是一个月的数据,这样范围查询就很有必要了。
对于文档的键值不等于某个特定值的情况,就要使用另外一种条件操作符"$ne"了,它表示”不相等“。若是想要查询所有名字不为"joe"的用户,可以像下面这样查询
db.users.find({"username":{"$ne":"joe"}})
> db.users.find({"username":{"$ne":"joe"}}) { "_id" : ObjectId("503b0d70a53a93158463f02f"), "name" : "joe1", "age" : 65 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "name" : "joe1", "age" : 27 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "name" : "joe1", "age" : 49 } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "location" : "Wisconsin", "name" : "joe", "sex" : "male" }
> db.users.find({"username":"joe"}) { "_id" : ObjectId("503b08dea53a93158463f02e"), "age" : 27, "emails" : [ "joe@gmail.com" ], "registered" : ISODate("2006-12-31T16:00:00Z"), "relationships" : { "friends" : 32, "enemies" : 23 }, "username" : "joe" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "registered" : ISODate("2006-12-31T16:00:00Z"), "username" : "joe" }
”$ne“能用于所有类型的数据
4.2.2 OR查询
MongoDB中有两种方式进行OR查询。”$in“可以用来查询一个键的多个值。"$or"更通用一些,用来完成多个键值的任意给定值。
对于单一键要是有多个值与其匹配的话,就要用"$in"加一个条件数组。例如,抽奖活动的中奖号码是725、542和390。要找出全部这些中奖数据,可以构建如下查询:
db.raffle.find({"ticket_no":{"$in":[725,542,390]}})
> db.users.update({"_id" : ObjectId("503b08dea53a93158463f02e")},{"ticket_no":725}); > db.users.update({"_id" : ObjectId("503b0d70a53a93158463f02f")},{"ticket_no":542}); > db.users.update({"_id" : ObjectId("503b0d78a53a93158463f030")},{"ticket_no":390}); > db.users.find() { "_id" : ObjectId("503b08dea53a93158463f02e"), "ticket_no" : 725 } { "_id" : ObjectId("503b0d70a53a93158463f02f"), "ticket_no" : 542 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "ticket_no" : 390 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "name" : "joe1", "age" : 49 } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "location" : "Wisconsin", "name" : "joe", "sex" : "male" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "registered" : ISODate("2006-12-31T16:00:00Z"), "username" : "joe" } >
> db.users.find({"ticket_no":{"$in":[725,542,390]}}) { "_id" : ObjectId("503b08dea53a93158463f02e"), "ticket_no" : 725 } { "_id" : ObjectId("503b0d70a53a93158463f02f"), "ticket_no" : 542 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "ticket_no" : 390 }
"$in"非常灵活,可以指定不同的类型的条件和值。例如,在逐步将用户的ID号迁移成用户名的过程中,要做兼顾二者的查询:
db.user.find({"user_id":{”$in“:[12345,"joe"]})
这会匹配"user_id"等于123456的文档,也会匹配"user_id"等于"joe"的文档。
要是"$in"对应的数据只有一个值,那么和直接匹配这个值效果是一样的。例如,{ticket_no:{$in:[725]}}和{ticket_no:725}的效果是一样的。
与"$in"相对的是"$nin",将返回与数组中所有条件都不匹配的文档。要是想返回所有没有中奖的人,就可以用如下方法进行查询:
db.raffle.find("ticket_no":{"$nin":[725,542,390]})
> db.users.find({"ticket_no":{"$nin":[725,542,390]}}) { "_id" : ObjectId("503b0d7ca53a93158463f031"), "name" : "joe1", "age" : 49 } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "location" : "Wisconsin", "name" : "joe", "sex" : "male" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "registered" : ISODate("2006-12-31T16:00:00Z"), "username" : "joe" }
查询将会返回没有那些号码的人。
”$in“能对单个键做OR查询,但要是想到"ticket_no"为725或者"winner"为true的文档改怎么办呢?对于这种情况,应该使用"$or"。"$or"接受一个包含所有可能条件的数组作为参数。上面中奖的例子如果用"$or"改写会是下面这个样子的:
> .update({"_id" : ObjectId("503b0d7ca53a93158463f031")},{"$set":{"winner":true} > db.users.find(); { "_id" : ObjectId("503b08dea53a93158463f02e"), "ticket_no" : 725 } { "_id" : ObjectId("503b0d70a53a93158463f02f"), "ticket_no" : 542 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "ticket_no" : 390 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "age" : 49, "name" : "joe1", "winner" : true } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "location" : "Wisconsin", "name" : "joe", "sex" : "male" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "registered" : ISODate("2006-12-31T16:00:00Z"), "username" : "joe" }
> db.users.find({"$or":[{"ticket_no":725},{"winner":true}]}); { "_id" : ObjectId("503b08dea53a93158463f02e"), "ticket_no" : 725 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "age" : 49, "name" : "joe1", "winner" : true }
”$or“可以含有其他条件句。例如,如果想要将"ticket_no"与那3个值匹配上,外加"winner"键,就可以这么做:
> db.users.find({"$or":[{"ticket_no":{"$in":[725,542,390]}},{"winner":true}]});
{ "_id" : ObjectId("503b08dea53a93158463f02e"), "ticket_no" : 725 }
{ "_id" : ObjectId("503b0d70a53a93158463f02f"), "ticket_no" : 542 }
{ "_id" : ObjectId("503b0d78a53a93158463f030"), "ticket_no" : 390 }
{ "_id" : ObjectId("503b0d7ca53a93158463f031"), "age" : 49, "name" : "joe1", "winner" : true }
使用普通的AND型的查询时,总是想尽可能地用最少的条件来限定结果的范围【也就是说将最严苛的条件放置在最前面】。OR型的查询正相反:第一个条件尽可能地匹配更多的文档,这样才是最为有效的。
4.2.3 $not
”$not"是元条件句,即可以用在任何其他条件之上。例如,就拿取模运算符"$mod"来说。"$mod"会将查询的值除以第一个给定值,若余数等于第二个给定值则返回该结果:
> db.users.update({"_id" :ObjectId("503b08dea53a93158463f02e")},{"$set":{"id_num":1}}) > db.users.update({"_id" :ObjectId("503b0d70a53a93158463f02f")},{"$set":{"id_num":2}}) > db.users.update({"_id" :ObjectId("503b0d78a53a93158463f030")},{"$set":{"id_num":3}}) > db.users.update({"_id" :ObjectId("503b0d7ca53a93158463f031")},{"$set":{"id_num":4}}) > db.users.update({"_id" :ObjectId("503b338da53a93158463f033")},{"$set":{"id_num":5}}) > db.users.update({"_id" :ObjectId("503b5937d2f71a75825a1b5b")},{"$set":{"id_num":6}}) > db.users.find(); { "_id" : ObjectId("503b08dea53a93158463f02e"), "id_num" : 1, "ticket_no" : 725 } { "_id" : ObjectId("503b0d70a53a93158463f02f"), "id_num" : 2, "ticket_no" : 542 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "id_num" : 3, "ticket_no" : 390 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "age" : 49, "id_num" : 4, "name" : "joe1", "winner" : true } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "id_num" : 5, "location" : "Wisconsin", "name" : "joe", "sex" : "male" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "id_num" : 6, "registered" : ISODate("2006-12-31T16:00:00Z"), "username" : "joe" } > db.users.find({"id_num":{"$mod":[5,1]}}); { "_id" : ObjectId("503b08dea53a93158463f02e"), "id_num" : 1, "ticket_no" : 725 } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "id_num" : 6, "registered" : ISODate("2006-12-31T16:00:00Z"), "username" : "joe" }上面的查询会返回"id_num"值为1、6、11、16等的用户。但是要想返回”id_num“为2、3、4、5、7、8、9、10、12等的用户,就要用"$not"了:
> db.users.find({"id_num":{"$not":{"$mod":[5,1]}}}); { "_id" : ObjectId("503b0d70a53a93158463f02f"), "id_num" : 2, "ticket_no" : 542 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "id_num" : 3, "ticket_no" : 390 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "age" : 49, "id_num" : 4, "name" : "joe1", "winner" : true } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "id_num" : 5, "location" : "Wisconsin", "name" : "joe", "sex" : "male" }"$not"与正则表达式联合使用的时候极为有用,用来查找那些与特定模式不符的文档。
4.2.4 条件句的规则
如果比较一下上一章的更新修改器和前面的查询文档,会发现以$开头的键处在不同的位置。在查询中,"$lt"在内层文档,而更新中"$inc"则是外层文档的键。
基本可以肯定:条件句是内层文档的键,而修改器则是外层文档的键。
可对一个键应用多个条件。例如,要查找年龄为20~30的所有用户,可以在"age"键上使用"$gt"和”$lt“:
> db.users.find({"age":{"$lte":30,"$gte":20}}); { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "id_num" : 5, "location" : "Wisconsin", "name" : "joe", "sex" : "male" } > db.users.find(); { "_id" : ObjectId("503b08dea53a93158463f02e"), "id_num" : 1, "ticket_no" : 725 } { "_id" : ObjectId("503b0d70a53a93158463f02f"), "id_num" : 2, "ticket_no" : 542 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "id_num" : 3, "ticket_no" : 390 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "age" : 49, "id_num" : 4, "name" : "joe1", "winner" : true } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "id_num" : 5, "location" : "Wisconsin", "name" : "joe", "sex" : "male" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "id_num" : 6, "registered" : ISODate("2006-12-31T16:00:00Z"), "username" : "joe" }
4.3 特定于类型的查询
4.3.1 null
null就有点奇怪,它确实能匹配自身,所以要是有一个包含如下文档的集合
> db.c.insert({"y":null}) > db.c.insert({"y":1}) > db.c.insert({"y":}) Wed Aug 29 16:57:04 SyntaxError: syntax error (shell):1 > db.c.insert({"y":2})
> db.c.find(); { "_id" : ObjectId("503dd95a4ba09fc49a57808d"), "y" : null } { "_id" : ObjectId("503dd95e4ba09fc49a57808e"), "y" : 1 } { "_id" : ObjectId("503dd9624ba09fc49a57808f"), "y" : 2 }
就可以按照预期的方式查询"y"键为null的文档。
> db.c.find({"y":null}) { "_id" : ObjectId("503dd95a4ba09fc49a57808d"), "y" : null }
但是, null不仅仅匹配自身,而且匹配”不存在的“。所以,这种匹配还会返回缺少这个键的所有文档:
> db.c.find({"z":null}) { "_id" : ObjectId("503dd95a4ba09fc49a57808d"), "y" : null } { "_id" : ObjectId("503dd95e4ba09fc49a57808e"), "y" : 1 } { "_id" : ObjectId("503dd9624ba09fc49a57808f"), "y" : 2 }如果仅仅想要匹配键值为null的文档,既要检查该键的值是否为null,还要 通过"$exists"条件判定键值是否已经存在:
> db.c.find({"z":{"$in":[null]}}); { "_id" : ObjectId("503dd95a4ba09fc49a57808d"), "y" : null } { "_id" : ObjectId("503dd95e4ba09fc49a57808e"), "y" : 1 } { "_id" : ObjectId("503dd9624ba09fc49a57808f"), "y" : 2 } > db.c.find({"z":{"$in":[null],"$exists":true}});不幸的是,没有”$eq“操作符,所以看上去有些费解,但是只有一个元素的"$in"操作符效果是一样的。
4.3.2 正则表达式
正则表达式能够灵活有效地匹配字符串。例如,想要查找所有名为Joe或者joe的用户,就可以使用正则表达式执行忽略大小写的匹配。
> db.users.find(); { "_id" : ObjectId("503b08dea53a93158463f02e"), "id_num" : 1, "ticket_no" : 725 } { "_id" : ObjectId("503b0d70a53a93158463f02f"), "id_num" : 2, "ticket_no" : 542 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "id_num" : 3, "ticket_no" : 390 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "age" : 49, "id_num" : 4, "name" : "joe1", "winner" : true } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "id_num" : 5, "location" : "Wisconsin", "name" : "joe", "sex" : "male" } { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "id_num" : 6, "registered" : ISODate("2006-12-31T16:00:00Z"), "username" : "joe" } > db.users.find({"name":/joe/i}) { "_id" : ObjectId("503b0d7ca53a93158463f031"), "age" : 49, "id_num" : 4, "name" : "joe1", "winner" : true } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "id_num" : 5, "location" : "Wisconsin", "name" : "joe", "sex" : "male" }
系统可以接受正则表达式标识(i),但不一定要有。现在匹配了各种大小写组合形式的joe,要是还要匹配各种大小写组合形式的joey,就可以略微修改一下正则表达式:
> db.users.find({"name":/joey?/i}) { "_id" : ObjectId("503b0d7ca53a93158463f031"), "age" : 49, "id_num" : 4, "name" : "joe1", "winner" : true } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "id_num" : 5, "location" : "Wisconsin", "name" : "joe", "sex" : "male" }MongoDB使用Perl兼容的正则表达式(PCRE)库来匹配正则表达式,建议在查询中使用正则表达式前,先在JavaScript shell中检查一下语法,确保匹配与设想的一致。
MongoDB可以为前缀型正则表达式(比如/^joey/查询创建索引),所以这种类型的查询会非常高效。
正则表达式也可以匹配自身:
> db.foo.insert({"bar":/barz/}); > db.foo.find({"bar":/barz/}); { "_id" : ObjectId("503de12a4ba09fc49a578090"), "bar" : /barz/ }
4.3.3 查询数组
查询数组中的元素是非常容易的。数组绝大多数情况下可以这样理解:每一个元素都是整个键的值。例如,如果数组是一个水果清单,比如下面这样:
> db.food.insert({"fruit":["apple","banana","peach"]}); > db.food.find(); { "_id" : ObjectId("503de65e4ba09fc49a578091"), "fruit" : [ "apple", "banana", "peach" ] }
下面的查询语句:
> db.food.find({"fruit":"banana"}) { "_id" : ObjectId("503de65e4ba09fc49a578091"), "fruit" : [ "apple", "banana", "peach" ] }
会成功匹配该文档。
1.$all
如果需要通过多个元素来匹配数组,就要用”$all“了。这样就会匹配一组元素。例如,假设创建包含3个元素的如下集合:
> db.food.insert({"_id":1,"fruit":["apple","banana","peach"]}); > db.food.insert({"_id":2,"fruit":["apple","kumquat","orange"]}); > db.food.insert({"_id":3,"fruit":["cherry","banana","apple"]});
要找到既有"apple"又有"banana"的文档,就得用"$all"来查询
> db.food.find({"fruit":{"$all":["apple","banana"]}}) { "_id" : ObjectId("503de65e4ba09fc49a578091"), "fruit" : [ "apple", "banana", "peach" ] } { "_id" : 1, "fruit" : [ "apple", "banana", "peach" ] } { "_id" : 3, "fruit" : [ "cherry", "banana", "apple" ] }
顺序无关紧要。注意,第三个结果中"banana"在"apple"之前。要是对只有一个元素的数组使用"$all",就和不用"$all"一样了。
例如,{fruit:{"$all":['apple']}}和{fruit:'apple'}的查询效果是等价的。
也可以使用完整的数组精确匹配。但是,精确匹配对于有缺少或者冗余元素的情况就不太灵了。例如,下面的方法会匹配之前的第一个文档:
> db.food.find({"fruit":["apple","banana","peach"]}) { "_id" : ObjectId("503de65e4ba09fc49a578091"), "fruit" : [ "apple", "banana", "peach" ] } { "_id" : 1, "fruit" : [ "apple", "banana", "peach" ] }
但是下面这个就不会匹配
> db.food.find({"fruit":["apple","banana"]}) >
这个亦不会匹配:
> db.food.find({"fruit":["banana","apple","peach"]}) >
要是想查询数组指定位置的元素,则需使用key.index语法指定下标,例如:
> db.food.find({"fruit.2":"peach"}) { "_id" : ObjectId("503de65e4ba09fc49a578091"), "fruit" : [ "apple", "banana", "peach" ] } { "_id" : 1, "fruit" : [ "apple", "banana", "peach" ] }数组下表都是从0开始的,所以上面的表达式会用数组的第三个元素和"peach"匹配。
2.$size
”$size“对于查询数组来说也是意义非凡,顾名思义,可以用其查询指定长度的数组。见下面的列子:
> db.food.find({"fruit":{"$size":3}}); { "_id" : ObjectId("503de65e4ba09fc49a578091"), "fruit" : [ "apple", "banana", "peach" ] } { "_id" : 1, "fruit" : [ "apple", "banana", "peach" ] } { "_id" : 2, "fruit" : [ "apple", "kumquat", "orange" ] } { "_id" : 3, "fruit" : [ "cherry", "banana", "apple" ] } >一种常见的查询需求是需要一个长度范围。"$size"不能与其他查询子句组合(比如"$gt"),但是这种查询可以通过在文档中添加一个"size"键的方式来实现。
这样每一次向指定数组添加元素的时候, 同时增加"size"的值。原来这样的更新:
> db.food.update({ "_id" : ObjectId("503de65e4ba09fc49a578091")},{"$push":{"fruit":"strawberry1"},"$inc":{"size":1}}) > db.food.find(); { "_id" : 1, "fruit" : [ "apple", "banana", "peach" ] } { "_id" : 2, "fruit" : [ "apple", "kumquat", "orange" ] } { "_id" : 3, "fruit" : [ "cherry", "banana", "apple" ] } { "_id" : ObjectId("503de65e4ba09fc49a578091"), "fruit" : [ "apple", "banana", "peach", "strawberry", "strawberry1" ], "size" : 1 } > db.food.update({ "_id" : ObjectId("503de65e4ba09fc49a578091")},{"$push":{"fruit":"strawberry2"},"$inc":{"size":1}}) > db.food.find(); { "_id" : 1, "fruit" : [ "apple", "banana", "peach" ] } { "_id" : 2, "fruit" : [ "apple", "kumquat", "orange" ] } { "_id" : 3, "fruit" : [ "cherry", "banana", "apple" ] } { "_id" : ObjectId("503de65e4ba09fc49a578091"), "fruit" : [ "apple", "banana", "peach", "strawberry", "strawberry1", "strawberry2" ], "size" : 2 } >
增加的操作非常快,所以对性能的影响微乎其微。这样存储文档后,就可以像下面这样查询了:
> db.food.find({"size":{"$gt":3}}); > db.food.find({"size":{"$gte":2}}); { "_id" : ObjectId("503de65e4ba09fc49a578091"), "fruit" : [ "apple", "banana", "peach", "strawberry", "strawberry1", "strawberry2" ], "size" : 2 }
不幸的是,这种技巧并不能与"$addToSet"操作符同时使用。【注”$addToSet“可以避免重复,如果一个值不在数组连就把它加进去】。
3.$slice操作符
本章前面已经提及,find的第二个参数是可选的,可以指定返回哪些键。"$slice"返回数组的一个子集合。
例如,假设现在有一个博客文章的文档,要想返回前10条评论,可以
> db.blog.posts.insert({"title":"A blog post","content":"...","comments":[{"name":"bob","email":"bob@example.com","content":"good post."},{"name":"bob1","email":"bob1@example.com","content":"good post1."}]})
> db.blog.posts.find(); { "_id" : ObjectId("503b3d9109b5fcd77ef4a2a0"), "author" : { "email" : "joe@example.com", "name" : "joe schmoe" }, "content" : "...", "title" : "A Blog Post" } { "_id" : ObjectId("503b54efd2f71a75825a1b5a"), "comments" : [ { "name" : "joe", "email" : "joe@example.com", "content" : "nice post." }, { "name" : "joe", "email" : "joe@example.com", "content" : "nice post." }, { "name" : "joe", "email" : "joe@example.com", "content" : "nice post." }, { "name" : "joe", "email" : "joe@example.com", "content" : "nice post." }, { "name" : "joe", "email" : "joe@example.com", "content" : "nice post." }, { "name" : "joe1", "email" : "joe1@example.com", "content" : "nice post." } ], "content" : "...", "title" : "A blog post" } { "_id" : ObjectId("503b6fb7d2f71a75825a1b5d"), "content" : "...", "comments" : [ { "comment" : "good post", "author" : "John", "votes" : 0 }, { "comment" : "i thought it was too short", "author" : "Claire", "votes" : 3 }, { "comment" : "free watches", "author" : "Alice", "votes" : -1 } ] } { "_id" : ObjectId("503e0cc07d2cae668c71ac55"), "title" : "A blog post", "content" : "...", "comments" : [ { "name" : "bob", "email" : "bob@example.com", "content" : "good post." }, { "name" : "bob1", "email" : "bob1@example.com", "content" : "good post1." } ] } > db.blog.posts.find({"_id" : ObjectId("503e0cc07d2cae668c71ac55")}); { "_id" : ObjectId("503e0cc07d2cae668c71ac55"), "title" : "A blog post", "content" : "...", "comments" : [ { "name" : "bob", "email" : "bob@example.com", "content" : "good post." }, { "name" : "bob1", "email" : "bob1@example.com", "content" : "good post1." } ] }
> db.blog.posts.findOne({"_id" : ObjectId("503e0cc07d2cae668c71ac55")}); { "_id" : ObjectId("503e0cc07d2cae668c71ac55"), "title" : "A blog post", "content" : "...", "comments" : [ { "name" : "bob", "email" : "bob@example.com", "content" : "good post." }, { "name" : "bob1", "email" : "bob1@example.com", "content" : "good post1." } ] }例如,假如现在有一个博客文章的文档,要想返回前1条评论,可以:
> > db.blog.posts.findOne({"_id" : ObjectId("503e0cc07d2cae668c71ac55")},{"comments":{"$slice":1}}); { "_id" : ObjectId("503e0cc07d2cae668c71ac55"), "title" : "A blog post", "content" : "...", "comments" : [ { "name" : "bob", "email" : "bob@example.com", "content" : "good post." } ] }也可以返回后10条评论,只要用-1就可以了:
> db.blog.posts.findOne({"_id" : ObjectId("503e0cc07d2cae668c71ac55")},{"comments":{"$slice":-1}}); { "_id" : ObjectId("503e0cc07d2cae668c71ac55"), "title" : "A blog post", "content" : "...", "comments" : [ { "name" : "bob1", "email" : "bob1@example.com", "content" : "good post1." } ] }
”$slice“也可以接受偏移值和要返回的元素数量,来返回中间的结果:
> db.blog.posts.findOne({"_id" : ObjectId("503e0cc07d2cae668c71ac55")},{"comments":{"$slice":[1,1]}}); { "_id" : ObjectId("503e0cc07d2cae668c71ac55"), "title" : "A blog post", "content" : "...", "comments" : [ { "name" : "bob1", "email" : "bob1@example.com", "content" : "good post1." } ] }
db.blog.posts.findOne(criteria,{"comments":{"$slice":[23,10]}})
这个操作会跳过前23个元素,返回地24个~第33个元素。如果数组不够33个元素,则返回第23个元素后面你的所有元素。除非特别声明,否则使用"$slice"时将返回文档中的所有键。别的键说明符都是默认不返回未提及的键,这点与"$slice"不太一样。例如,有如下的博客文章文档:
> db.blog.posts.findOne({"_id" : ObjectId("503e0cc07d2cae668c71ac55")}); { "_id" : ObjectId("503e0cc07d2cae668c71ac55"), "title" : "A blog post", "content" : "...", "comments" : [ { "name" : "bob", "email" : "bob@example.com", "content" : "good post." }, { "name" : "bob1", "email" : "bob1@example.com", "content" : "good post1." } ] }
并且我们用"$slice"来获取最后一条评论,可以这样:
> db.blog.posts.findOne({"_id" : ObjectId("503e0cc07d2cae668c71ac55")},{"comments":{"$slice":-1}}); { "_id" : ObjectId("503e0cc07d2cae668c71ac55"), "title" : "A blog post", "content" : "...", "comments" : [ { "name" : "bob1", "email" : "bob1@example.com", "content" : "good post1." } ] }”title“和"content"都被返回了,即便是并没有显示地出现在键说明符中。
4.3.4 查询内嵌文档
有两种方法查询内嵌文档:查询整个文档,或者只针对其键/值对进行查询。
查询整个内嵌文档与普通查询完全相同。例如,有如下文档:
> db.peope.insert({"name":{"first":"Joe","last":"Schmoe"},"age":45}) > db.peope.find(); { "_id" : ObjectId("503e11327d2cae668c71ac56"), "name" : { "first" : "Joe", "last" : "Schmoe" }, "age" : 45 } > db.peope.findOne(); { "_id" : ObjectId("503e11327d2cae668c71ac56"), "name" : { "first" : "Joe", "last" : "Schmoe" }, "age" : 45 }要查询姓名为Joe Schmoe的人可以这样:
> db.peope.find({"name":{"first":"Joe","last":"Schmoe"}}) { "_id" : ObjectId("503e11327d2cae668c71ac56"), "name" : { "first" : "Joe", "last" : "Schmoe" }, "age" : 45 } { "_id" : ObjectId("503e123c7d2cae668c71ac57"), "name" : { "first" : "Joe", "last" : "Schmoe" }, "age" : 45 }
> db.peope.find({"name":{"last":"Schmoe","first":"Joe"}})然而,如果Joe决定添加一个代表中间名的键,这个查询就不好用了,因为这样就不匹配整个内嵌文档了。这种查询还是与顺序相关的,db.peope.find({"name":{"last":"Schmoe","first":"Joe"}})就什么都匹配不到了。
如果允许的话,通常只针对内嵌文档的特定键值进行查询才是比较好的做法。这样,即便数据模式改变,也不会导致所有查询因为要精确匹配而一下子都挂掉。我们可以使用点表示法查询内嵌的键:
> db.peope.find({"name.first":"Joe","name.last":"Schmoe"}) { "_id" : ObjectId("503e11327d2cae668c71ac56"), "name" : { "first" : "Joe", "last" : "Schmoe" }, "age" : 45 } { "_id" : ObjectId("503e123c7d2cae668c71ac57"), "name" : { "first" : "Joe", "last" : "Schmoe" }, "age" : 45 } > db.peope.find({"name.last":"Schmoe","name.first":"Joe"}) { "_id" : ObjectId("503e11327d2cae668c71ac56"), "name" : { "first" : "Joe", "last" : "Schmoe" }, "age" : 45 } { "_id" : ObjectId("503e123c7d2cae668c71ac57"), "name" : { "first" : "Joe", "last" : "Schmoe" }, "age" : 45 }
现在,如果Joe增加了更多的键,这个查询依然会匹配他的姓和名
这种点表示法是查询文档区别于其他文档的主要特点。查询文档可以包含点,来表达”深入内嵌文档内部“的意思。点表示法也是待插入的文档不能包含"."的原因。将键作为URL保存的时候经常会遇到此类问题。一种解决方法就是在插入前或者提取后执行一个全局替换,将"."替换成一个URL中的非法字符。
当文档结构变得更加复杂以后,内嵌文档的匹配需要些许技巧。例如,假设有博客文章若干,要找到由Joe发表的5分以上的评论。博客文章的结构如下例所示:
> db.blog.insert({"content":"...","comments":[{"author":"joe","score":3,"comment":"nice post"},{"author":"mary","score":6,"comment":"terrible post"}]})
> db.blog.find(); { "_id" : ObjectId("503e16517d2cae668c71ac58"), "content" : "...", "comments" : [ { "author" : "joe", "score" : 3, "comment" : "nice post" }, { "author" : "mary", "score" : 6, "comment" : "terrible post" } ] } > db.blog.findOne(); { "_id" : ObjectId("503e16517d2cae668c71ac58"), "content" : "...", "comments" : [ { "author" : "joe", "score" : 3, "comment" : "nice post" }, { "author" : "mary", "score" : 6, "comment" : "terrible post" } ] } >
不能直接使用
> db.blog.find({"comment":{"author":"joe","score":{"$gte":5}}})
来查询。内嵌文档匹配要求整个文档完全匹配,而不会匹配”comment“键。使用
> db.blog.find({"comment.author":"joe","comment.score":{"$gte":5}});
同样也不会达到目的。因为符合author条件的评论和符合score条件的评论可能不是同一条评论。也就是说,会返回刚才显示的那个文档。因为"author":"joe"在第一条评论中匹配了,"score":6在第二条评论中匹配了。
要正确地指定一组条件,而不用指定每个键,要使用"$elemMatch"。这种模糊的命名条件句能用来部分指定匹配数组中的单个内嵌文档的限定条件。所以正确的写法应该是这样的:
> db.blog.find({"comments":{"$elemMatch":{"author":"joe","score":{"$gte":5}}}})
4.4 $where 查询
键/值对是很有表现力的查询方式,但是依然有些需求它无法表达。当其他方法都败下阵的时候,就轮到"$where"子句了,用它可以执行任意JavaScript作为查询的一部分。这就使得查询能做(几乎)任何事情。
最典型的应用就是比较稳当中的两个键的值是否相等。例如,有个条目列表,如果其中的
两个值相等则返回文档。请看如下示例:> db.foo.insert({"apple":1,"banana":6,"peach":3});
> db.foo.insert({"apple":8,"spinach":4,"watermelon":4});第二个文档中,"spinach"和"watermelon"的值相同,所以需要返回该文档。
MongoDB似乎永远不会提供一个$条件符来做这个,所以只能用"$where"子句借助JavaScript来完成了。
> db.foo.find({"$where":function(){ ... ... for(var current in this){ ... ... for(var other in this){ ... ... if(current!=other && this[current]==this[other]){ ... ... return true; ... ... } ... ... } ... ... } ... ... return false; ... ... }}); { "_id" : ObjectId("503e1a287d2cae668c71ac5a"), "apple" : 8, "spinach" : 4, "watermelon" : 4 }
如果函数返回true,文档就作为结果的一部分被返回;如果为false;则不然。
前面用的是一个函数,也可以用一个字符串来指定"$where"查询。下面两种表达是完全等价的:
> db.foo.find({"$where":"this.x+this.y==10"}); > db.foo.find({"$where":"function(){return this.x+this.y==10;}"})
不是非常必要时,一定要避免使用"$where"查询,因为它们在速度上要比常规查询慢很多。每个文档都要从BSON转换成JavaScript对象,然后通过"$where"的表达式来运行。同样还不能利用索引。所以,只在走投无路时才考虑"$where"这种用法。将常规查询作为前置过滤,与"$where"组合使用可以不牺牲性能。如果可能的话,用索引根据非”$where“子句进行过滤,”$where“只作用于对结果进行调优。另一种复杂查询的方式是利用MapReduce.
4.5 游标
数据库使用游标来返回find的执行结果。客户端对游标的实现通常能够对最终结果进行有效的控制。可以限制结果的数量,掠过部分结果,根据任意方向任意键的组合对结果进行各种排序,或者是执行其他一些功能强大的操作。
要想从shell中创建一个游标,首先要对集合填充一些文档,然后对其执行查询,并将结果分配给一个局部变量(用var声明的变量就是局部变量)。这里,先创建一个简单的集合,而后做个查询,并用cursor变量保存结果:
> for(i=0;i<100;i++){ ... db.c.insert({x:i}); ... } > var cursor = db.collection.find();
这么做的好处是一次可以查看一条结果。如果将结果放在全局变量或者就没有放在变量中,MongoDB shell会自动迭代,自动显示最开始的若干文档。也就是在这之前我们看大的种种例子,一般大家只想通过shell看看集合里面有什么,而不是想在其中市级运行程序,这样设计也就很合适。
要迭代结果,可以使用游标的next方法。也可以使用hasNext来查看有没有其他结果。典型的结果遍历如下:
> while(cursor.hasNext()){ ... obj=cursor.next(); ... }
cursor.hasNext()检查是否有后续结果存在,然后用cursor.next()将其获得。游标类还实现了迭代器接口,所以可以在foreach循环中使用。
> var cursor = db.peope.find(); > cursor.forEach(function(x) ... { ... print(x.age); ... }); 45 45当调用find的时候,shell并不立即查询数据库,而是等待真正开始要求获得结果的时候才发送查询,这样在执行之前可以给查询附加额外的选项。几乎所有游标对象的方法都返回游标本身,这样就可以按任意顺序组成方法链。例如,下面几种表达是等价的:
> var cursor = db.foo.find().sort({"x":1}).limit(1).skip(10); > var cursor = db.foo.find().limit(1).sort({"x":1}).skip(10); > var cursor = db.foo.find().skip(10).limit(1).sort({"x":1});
此时,查询还没有执行,所有这些函数都只是构造查询。现在,假设我们执行如下操作:
> cursor.hasNext(); false
这时,查询被发往服务器。shell立即获取前100个结果或者前4M数据(两者之中较小者),这样下次调用next或者hasNext时就不必兴师动众跑到服务器上去了。客户端用光了第一组结果,shell会再一次联系数据库,并要求更多的结果。这个过程一直会持续打扫游标耗尽或者结果全部返回。
4.5.1 limit、skip和sort
最常用的查询选项就是限制返回结果的数量,忽略一定数量的结果并排序。所有这些选项一定要在查询被派发到服务器之前添加。
要限制结果数量,可以在find后使用limit函数。例如,只返回3个结果,可以这样:
> db.c.find(); { "_id" : ObjectId("503dd95a4ba09fc49a57808d"), "y" : null } { "_id" : ObjectId("503dd95e4ba09fc49a57808e"), "y" : 1 } { "_id" : ObjectId("503dd9624ba09fc49a57808f"), "y" : 2 } { "_id" : ObjectId("503e38157d2cae668c71ac5b"), "x" : 0 } { "_id" : ObjectId("503e38157d2cae668c71ac5c"), "x" : 1 } { "_id" : ObjectId("503e38157d2cae668c71ac5d"), "x" : 2 } { "_id" : ObjectId("503e38157d2cae668c71ac5e"), "x" : 3 } { "_id" : ObjectId("503e38157d2cae668c71ac5f"), "x" : 4 } { "_id" : ObjectId("503e38157d2cae668c71ac60"), "x" : 5 } { "_id" : ObjectId("503e38157d2cae668c71ac61"), "x" : 6 } { "_id" : ObjectId("503e38157d2cae668c71ac62"), "x" : 7 } { "_id" : ObjectId("503e38157d2cae668c71ac63"), "x" : 8 } { "_id" : ObjectId("503e38157d2cae668c71ac64"), "x" : 9 } { "_id" : ObjectId("503e38157d2cae668c71ac65"), "x" : 10 } { "_id" : ObjectId("503e38157d2cae668c71ac66"), "x" : 11 } { "_id" : ObjectId("503e38157d2cae668c71ac67"), "x" : 12 } { "_id" : ObjectId("503e38157d2cae668c71ac68"), "x" : 13 } { "_id" : ObjectId("503e38157d2cae668c71ac69"), "x" : 14 } { "_id" : ObjectId("503e38157d2cae668c71ac6a"), "x" : 15 } { "_id" : ObjectId("503e38157d2cae668c71ac6b"), "x" : 16 } has more
> db.c.find().limit(3); { "_id" : ObjectId("503dd95a4ba09fc49a57808d"), "y" : null } { "_id" : ObjectId("503dd95e4ba09fc49a57808e"), "y" : 1 } { "_id" : ObjectId("503dd9624ba09fc49a57808f"), "y" : 2 }
要是匹配的结果不到3个,则返回匹配数量的结果。limit指定的是上限,而非下限。skip与limit类似:
> db.c.find().skip(3); { "_id" : ObjectId("503e38157d2cae668c71ac5b"), "x" : 0 } { "_id" : ObjectId("503e38157d2cae668c71ac5c"), "x" : 1 } { "_id" : ObjectId("503e38157d2cae668c71ac5d"), "x" : 2 } { "_id" : ObjectId("503e38157d2cae668c71ac5e"), "x" : 3 } { "_id" : ObjectId("503e38157d2cae668c71ac5f"), "x" : 4 } { "_id" : ObjectId("503e38157d2cae668c71ac60"), "x" : 5 } { "_id" : ObjectId("503e38157d2cae668c71ac61"), "x" : 6 } { "_id" : ObjectId("503e38157d2cae668c71ac62"), "x" : 7 } { "_id" : ObjectId("503e38157d2cae668c71ac63"), "x" : 8 } { "_id" : ObjectId("503e38157d2cae668c71ac64"), "x" : 9 } { "_id" : ObjectId("503e38157d2cae668c71ac65"), "x" : 10 } { "_id" : ObjectId("503e38157d2cae668c71ac66"), "x" : 11 } { "_id" : ObjectId("503e38157d2cae668c71ac67"), "x" : 12 } { "_id" : ObjectId("503e38157d2cae668c71ac68"), "x" : 13 } { "_id" : ObjectId("503e38157d2cae668c71ac69"), "x" : 14 } { "_id" : ObjectId("503e38157d2cae668c71ac6a"), "x" : 15 } { "_id" : ObjectId("503e38157d2cae668c71ac6b"), "x" : 16 } { "_id" : ObjectId("503e38157d2cae668c71ac6c"), "x" : 17 } { "_id" : ObjectId("503e38157d2cae668c71ac6d"), "x" : 18 } { "_id" : ObjectId("503e38157d2cae668c71ac6e"), "x" : 19 } has more上面的操作会略过前三个匹配的文档,然后返回余下的文档。如果集合里面能匹配的文档少于3个,则不会返回任何文档。
sort用一个对象作为参数:一组键/值对,键对应文档的键名,值代表排序的方向。排序方向可以是1(升序)或者-1(降序)。如果指定了多个键,则按照多个键的顺序逐个排序。例如,要按照username升序及age降序排序,可以这样写:
> db.c.find().sort({"username":1,"age":-1})
{ "_id" : ObjectId("503dd95a4ba09fc49a57808d"), "y" : null }
{ "_id" : ObjectId("503dd95e4ba09fc49a57808e"), "y" : 1 }
{ "_id" : ObjectId("503dd9624ba09fc49a57808f"), "y" : 2 }
{ "_id" : ObjectId("503e38157d2cae668c71ac5b"), "x" : 0 }
{ "_id" : ObjectId("503e38157d2cae668c71ac5c"), "x" : 1 }
{ "_id" : ObjectId("503e38157d2cae668c71ac5d"), "x" : 2 }
{ "_id" : ObjectId("503e38157d2cae668c71ac5e"), "x" : 3 }
{ "_id" : ObjectId("503e38157d2cae668c71ac5f"), "x" : 4 }
{ "_id" : ObjectId("503e38157d2cae668c71ac60"), "x" : 5 }
{ "_id" : ObjectId("503e38157d2cae668c71ac61"), "x" : 6 }
{ "_id" : ObjectId("503e38157d2cae668c71ac62"), "x" : 7 }
{ "_id" : ObjectId("503e38157d2cae668c71ac63"), "x" : 8 }
{ "_id" : ObjectId("503e38157d2cae668c71ac64"), "x" : 9 }
{ "_id" : ObjectId("503e38157d2cae668c71ac65"), "x" : 10 }
{ "_id" : ObjectId("503e38157d2cae668c71ac66"), "x" : 11 }
{ "_id" : ObjectId("503e38157d2cae668c71ac67"), "x" : 12 }
{ "_id" : ObjectId("503e38157d2cae668c71ac68"), "x" : 13 }
{ "_id" : ObjectId("503e38157d2cae668c71ac69"), "x" : 14 }
{ "_id" : ObjectId("503e38157d2cae668c71ac6a"), "x" : 15 }
{ "_id" : ObjectId("503e38157d2cae668c71ac6b"), "x" : 16 }
has more
这三个方法可以组合使用。这对于分页非常有用。例如,你有个在线商店,有人想搜索mp3.若是想每页返回50个结果,而且按照价格从高到低排序,可以这样写:
db.stock.find({"desc":"mp3"}).limit(50).sort({"price":-1})
点击”下一页“可以看到更多的结果,通过skip也可以非常简单地实现,只需要略过前50个结果就好了(已经在第一页显示了):
db.stock.find({"desc":"mp3"}).limit(50).skip(50).sort({"price":-1})
然而,掠过过多的结果会导致性能问题,所以建议尽量避免。
比较顺序
MongoDB处理不同类型的数据是有一个顺序的。有时候一个键的值可能是多种类型的,例如,整数和布尔类型,或者字符串和null。如果对这种混合类型的键排序,其排序顺序是预先定义好的。从小到大,其顺序如下:
1 最小值
2 null
3 数字(整型、长整型、双精度)
4 字符串
5、对象/文档
6、数组
7、二进制数据
8、对象ID
9、布尔型
10、日期型
11、时间戳
12、正则表达式
13、最大值
4.5.2 避免使用skip掠过大量结果
用skip略过少量的文档还是不错的。但是要是数量非常多的话,skip就会变得很慢。所以要尽量避免。通常可以像文档本身内置查询条件,来避免过大的skip,或者利用上次的结果来计算下一次查询。
1.不用skip对结果分页
最简单的分页方法就是用limit返回结果的第一页,然后将每个后续页面作为相对于开始的偏移量返回。
//do not use:slow for large skips
> db.c.find().limit(10); { "_id" : ObjectId("503dd95a4ba09fc49a57808d"), "y" : null } { "_id" : ObjectId("503dd95e4ba09fc49a57808e"), "y" : 1 } { "_id" : ObjectId("503dd9624ba09fc49a57808f"), "y" : 2 } { "_id" : ObjectId("503e38157d2cae668c71ac5b"), "x" : 0 } { "_id" : ObjectId("503e38157d2cae668c71ac5c"), "x" : 1 } { "_id" : ObjectId("503e38157d2cae668c71ac5d"), "x" : 2 } { "_id" : ObjectId("503e38157d2cae668c71ac5e"), "x" : 3 } { "_id" : ObjectId("503e38157d2cae668c71ac5f"), "x" : 4 } { "_id" : ObjectId("503e38157d2cae668c71ac60"), "x" : 5 } { "_id" : ObjectId("503e38157d2cae668c71ac61"), "x" : 6 } > db.c.find().skip(10).limit(10); { "_id" : ObjectId("503e38157d2cae668c71ac62"), "x" : 7 } { "_id" : ObjectId("503e38157d2cae668c71ac63"), "x" : 8 } { "_id" : ObjectId("503e38157d2cae668c71ac64"), "x" : 9 } { "_id" : ObjectId("503e38157d2cae668c71ac65"), "x" : 10 } { "_id" : ObjectId("503e38157d2cae668c71ac66"), "x" : 11 } { "_id" : ObjectId("503e38157d2cae668c71ac67"), "x" : 12 } { "_id" : ObjectId("503e38157d2cae668c71ac68"), "x" : 13 } { "_id" : ObjectId("503e38157d2cae668c71ac69"), "x" : 14 } { "_id" : ObjectId("503e38157d2cae668c71ac6a"), "x" : 15 }
{ "_id" : ObjectId("503e38157d2cae668c71ac6b"), "x" : 16 }
> db.c.find().skip(20).limit(10); { "_id" : ObjectId("503e38157d2cae668c71ac6c"), "x" : 17 } { "_id" : ObjectId("503e38157d2cae668c71ac6d"), "x" : 18 } { "_id" : ObjectId("503e38157d2cae668c71ac6e"), "x" : 19 } { "_id" : ObjectId("503e38157d2cae668c71ac6f"), "x" : 20 } { "_id" : ObjectId("503e38157d2cae668c71ac70"), "x" : 21 } { "_id" : ObjectId("503e38157d2cae668c71ac71"), "x" : 22 } { "_id" : ObjectId("503e38157d2cae668c71ac72"), "x" : 23 } { "_id" : ObjectId("503e38157d2cae668c71ac73"), "x" : 24 } { "_id" : ObjectId("503e38157d2cae668c71ac74"), "x" : 25 } { "_id" : ObjectId("503e38157d2cae668c71ac75"), "x" : 26 } >然而,一般来讲可以找到一种方法实现不用skip的分页,这取决于查询本身。例如,要按照"date"降序显示文档。可以用如下方式获取结果的第一页:
var page1 = db.foo.find().sort({"date":-1}).limit(100)
> db.users.find().sort({"registered":-1}).limit(100); { "_id" : ObjectId("503b5937d2f71a75825a1b5b"), "emails" : [ "joe@example", "joe@gmail.com", "joe@yahoo.com", "joe@hotmail.com", "joe@php.net", "joe@example.com", "joe@python.org" ], "id_num" : 6, "registered" : ISODate("2006-12-31T16:00:00Z"), "username" : "joe" } { "_id" : ObjectId("503b08dea53a93158463f02e"), "id_num" : 1, "ticket_no" : 725 } { "_id" : ObjectId("503b0d70a53a93158463f02f"), "id_num" : 2, "ticket_no" : 542 } { "_id" : ObjectId("503b0d78a53a93158463f030"), "id_num" : 3, "ticket_no" : 390 } { "_id" : ObjectId("503b0d7ca53a93158463f031"), "age" : 49, "id_num" : 4, "name" : "joe1", "winner" : true } { "_id" : ObjectId("503b338da53a93158463f033"), "age" : 30, "id_num" : 5, "location" : "Wisconsin", "name" : "joe", "sex" : "male" }然后,可以利用最后一个文档中"date"的值作为查询条件,来获取下一页:
var latest = null;
//display first page
while(page1.hasNext()){
latest = page1.next();
display(latest);
}
//get next page
var page2 = db.foo.find({"date":{"$gt":latest.date}});
page2.sort({"date":-1}).limit(100);
2、随机选取文档
从集合里面随机挑选一个文档算是个常见问题。最笨的(也是最慢的)做法就是先计算文档总数,然后选择一个从0到文档数量之间的随机数,利用find做一次查询,掠过这个随机数那么多的文档,这个随机数的取值范围为0到集合中文档的总数。
//do not use
var total = db.foo.count();
var random = Math.floor(Math.random()*total)
db.foo.find.skip(random).limit(1)
这种选取随机文档的做法实在是低效:首先得计算总数(要是有查询条件就会很费时),然后大量的skip也会非常耗时。
略微东东脑筋,从集合里面查找一个随机元素还是好得多的办法的。秘诀就是在插入文档时给每个文档都添加一个额外的随机键。例如,在shell中,可以用Math.random()产生一个0~1的随机数:
> db.people.insert({"name":"joe","random":Math.random()}); > db.people.insert({"name":"john","random":Math.random()}); > db.people.insert({"name":"jim","random":Math.random()}); >这样,想要从集合中查找一个随机文档,只要计算个随机数并将其作为查询条件就好了,完全不用skip:
> db.people.find(); { "_id" : ObjectId("503e45f77d2cae668c71acbf"), "name" : "joe", "random" : 0.8444473752020685 } { "_id" : ObjectId("503e45fe7d2cae668c71acc0"), "name" : "john", "random" : 0.04516828536238293 } { "_id" : ObjectId("503e46027d2cae668c71acc1"), "name" : "jim", "random" : 0.896214824631229 }var random = Math.random();
result = db.foo.findOne({"random":{"$gt":random}})
也有偶尔遇到随机数比所有集合里面存着的随机值大的情况【还会有极少数相等的时候,$lte可能更加严谨】,这时就没有结果返回了。那就换个方向,这样就万事大吉了:
if(result==null){
result = db.foo.findOne({"random":{"$lt":random}})
}
> db.people.findOne({"random":{"$lte":random}}) { "_id" : ObjectId("503e45fe7d2cae668c71acc0"), "name" : "john", "random" : 0.04516828536238293 }
要是集合里面本就没有文档,则会返回null,这说得通。
这种技巧还可以和其他各种复杂的查询一同使用,仅需要确保有包含随机键的索引即可。例如,想随机找一个加州的水暖工,可以对"profession"、"state"和"random"建立索引
db.people.ensureIndex({"profession":1,"state":1,"random":1})
这样就能很快得出一个随机结果了
4.5.3 高级查询选项
查询分包装的和普通的两类。普通的查询就像下面这个:
var cursor = db.foo.find({"foo":"bar"})
有几个选项用于包装查询。例如,假如我们执行一个排序:
var cursor = db.foo.find({"foo","bar"}).sort({"x",-1})
实际上不是将{"foo":"bar"}作为查询直接发送给数据库,而是将查询包装在一个更大的文档中。shell会把查询从{"foo","bar"}转换成{"$query":{"foo":"bar"},"$orderby":{"x":1}}
绝大多数驱动程序有些辅助措施想查询添加各种选项。下面列举了其他一些有用的选项。
$maxscan:integer
指定查询最多扫描的文档数量
$min:document
查询的开始条件
$max:document
查询的结束条件
$hint:document
指定服务器使用哪个索引进行查询
$explain:boolean
获取查询执行的细节(用到的索引、结果数量、耗时等),而并非真正执行查询
$snapshot:boolean
确保查询的结果是在查询执行那一刻的一致快照。
4.5.4 获取一致结果
数据处理通常的一种做法就是先把数据从MongoDB中取出来,然后经过某种变换,最后再存回去:
cursor = db.foo.find();
while(cursor.hasNext()){
var doc = cursor.next();
doc = process(doc);
db.foo.save(doc);
}
结果比较少时还好,文档较多的时候就玩不转了。
做查找的时候,从集合的开头返回结果,并向右移动。程序获取前100个文档并处理。当要将其保存回数据库时,如果文档体积增加而预留空间不足,则需要将其移动。通常会将其挪置集合的末尾处。应对这个问题的方法就是对查询进行快照。如果使用了”$snapshot“选项,查询就是针对不变的集合视图进行的。所有返回一组结果的查询实际上都进行了快照,不一致只在游标等待结果时集合内容被改变的情况下发生的。