MongoDB and distinct values in arrays
This post is a mini-tutorial on how to deal with array fields in MongoDB, specifically arrays of items that may or may not be duplicated.
Let's create a collection of unicorns, smilarly to The Little MongoDB Book examples:
> db.unicorns.insert( { name: 'Scott', loves: ['sweets', 'movies'] } ) | |
> db.unicorns.insert( { name: 'Nega Scott', loves: ['evil things', ], evil: true} ) |
Find should now return something like this, the _ids will vary:
> db.unicorns.find() | |
{ "_id" : ObjectId("4f7b80fffbd1718d939a8801"), "name" : "Scott", "loves" : [ "sweets", "movies"] } | |
{ "_id" : ObjectId("4f7b8106fbd1718d939a8802"), "name" : "Nega Scott", "loves" : [ "evil things"], "evil" : true } |
Adding a value into an array
We've found out that our unicorn Scott also loves playing games, so lets push a new value into our document's loves array:
> db.unicorns.update( {name: 'Playja' }, { $push: { loves: 'playing games' } } ) | |
> db.unicorns.find() | |
{ "_id" : ObjectId("4f7b80fffbd1718d939a8801"), "loves" : [ "sweets", "movies", "playing games" ], "name" : "Scott" } | |
{ "_id" : ObjectId("4f7b8106fbd1718d939a8802"), "name" : "Nega Scott", "loves" : [ "evil things" ], "evil" : true } |
Using push, we could insert into our the same value multiple times.
Pushing a value if its unique
We have found out that all our unicorns love movies and want to update the whole collection. Unfortunately Scott already has got movies in his loves field. As shown here, we can use addToSet, which won't add a duplicated value to an array:
> db.unicorns.update({}, {$addToSet: { loves : 'movies' } }, false, true) | |
> db.unicorns.find() | |
{ "_id" : ObjectId("4f7b80fffbd1718d939a8801"), "loves" : [ "sweets", "playing games", "movies"], "name" : "Scott" } | |
{ "_id" : ObjectId("4f7b8106fbd1718d939a8802"), "evil" : true, "loves" : [ "evil things", "movies" ], "name" : "Nega Scott" } |
The third parameter of update is upsert(false by default). The fourth is multi which makes the query update every document that matches the criteria (false by default meaning that it only updates the first one). You can read more here.
Getting the distinct values
> db.unicorns.distinct( "loves" ) //all distinct values from <i>loves</i> field of every unicorn | |
[ "movies", "playing games", "sweets", "evil things" ] | |
> db.unicorns.distinct( "loves", {name: /Nega/ }) //all distinct values from <i>loves</i> field of unicorn that matches the regex /nega/ | |
[ "evil things", "movies" ] |
Removing duplicate values from array
Lets say that we missused push and added some duplicate values into our array. Here's now to fix this:
> db.unicorns.update({}, {$push: { loves : 'movies' } }, false, true) | |
> db.unicorns.update({}, {$push: { loves : 'movies' } }, false, true) | |
> db.unicorns.find() | |
{ "_id" : ObjectId("4f7b8106fbd1718d939a8802"), "evil" : true, "loves" : [ "evil things", "movies", "movies", "movies" ], "name" : "negaPlayja" } | |
{ "_id" : ObjectId("4f7b80fffbd1718d939a8801"), "loves" : [ "sweets", "playing games", "movies", "movies" "movies" ], "name" : "Playja" } | |
// We can remove the duplicates for negaPlayja like this: | |
> db.unicorns.update({ name: /nega/ }, { $set:{ loves : db.unicorns.distinct( "loves", { name: /nega/ }) } } ) | |
// To do this for every document we can use a JS function: | |
db.unicorns.find().forEach(uni => { | |
var distLoves = db.unicorns.distinct( 'loves', { _id: uni._id } ) | |
db.unicorns.update( { _id: uni._id }, {$set: {loves: distLoves}}) | |
}) |
I hope I could help a little bit, especially with removing duplicate values.
Comments
Post a Comment