Create a Multikey Index in MongoDB

In MongoDB, when you create an index on a field that holds an array, it’s automatically created as a multikey index.

Multikey indexes support efficient queries against array fields.

Multikey indexes can be created for arrays that hold scalar data (e.g. strings, numbers, etc) and nested documents.

Example

Suppose we have a collection called products that contains the following documents:

{ "_id" : 1, "product" : "Bat", "sizes" : [ "S", "M", "L" ] }
{ "_id" : 2, "product" : "Hat", "sizes" : [ "S", "L", "XL" ] }
{ "_id" : 3, "product" : "Cap", "sizes" : [ "M", "L" ] }

We can create a multikey index on that collection like this:

db.products.createIndex(
   {
     "sizes": 1
   }
)

It’s just like creating a regular index. You don’t need to explicitly specify that it’s a multikey index. MongoDB is able to determine that the field holds an array, and therefore create it as a multikey index.

With multikey indexes, MongoDB creates an index key for each element in the array.

Compound Multikey Index on Embedded Documents

As mentioned, you can create multikey indexes for arrays that hold embedded documents.

You can create a compound index on these, so that your index is created against multiple fields in the array.

Suppose we have a collection called restaurants with documents like this:

db.restaurants.insertMany([
   {
    _id: 1,
    name: "The Rat",
    reviews: [{
        name: "Stanley",
        date: "04 December, 2020",
        ordered: "Dinner",
        rating: 1
      },
      {
        name: "Tom",
        date: "04 October, 2020",
        ordered: "Lunch",
        rating: 2
      }]
   },
   {
    _id: 2,
    name: "Yum Palace",
    reviews: [{
        name: "Stacey",
        date: "08 December, 2020",
        ordered: "Lunch",
        rating: 3
      },
      {
        name: "Tom",
        date: "08 October, 2020",
        ordered: "Breakfast",
        rating: 4
      }]
   },
   {
    _id: 3,
    name: "Boardwalk Cafe",
    reviews: [{
        name: "Steve",
        date: "20 December, 2020",
        ordered: "Breakfast",
        rating: 5
      },
      {
        name: "Lisa",
        date: "25 October, 2020",
        ordered: "Dinner",
        rating: 5
      },
      {
        name: "Kim",
        date: "21 October, 2020",
        ordered: "Dinner",
        rating: 5
      }]
   }
])

We could create a compound multikey index like this:

db.restaurants.createIndex( 
  { 
    "reviews.ordered": 1,
    "reviews.rating": -1
  } 
)

Now, the multikey index will be used whenever we run queries that involve those fields.

Here’s what the query plan looks like when we search against one of those fields:

db.restaurants.find( { "reviews.ordered": "Dinner" } ).explain()

Result:

{
	"queryPlanner" : {
		"plannerVersion" : 1,
		"namespace" : "krankykranes.restaurants",
		"indexFilterSet" : false,
		"parsedQuery" : {
			"reviews.ordered" : {
				"$eq" : "Dinner"
			}
		},
		"queryHash" : "A01226B4",
		"planCacheKey" : "0E761583",
		"winningPlan" : {
			"stage" : "FETCH",
			"inputStage" : {
				"stage" : "IXSCAN",
				"keyPattern" : {
					"reviews.ordered" : 1,
					"reviews.rating" : -1
				},
				"indexName" : "reviews.ordered_1_reviews.rating_-1",
				"isMultiKey" : true,
				"multiKeyPaths" : {
					"reviews.ordered" : [
						"reviews"
					],
					"reviews.rating" : [
						"reviews"
					]
				},
				"isUnique" : false,
				"isSparse" : false,
				"isPartial" : false,
				"indexVersion" : 2,
				"direction" : "forward",
				"indexBounds" : {
					"reviews.ordered" : [
						"[\"Dinner\", \"Dinner\"]"
					],
					"reviews.rating" : [
						"[MaxKey, MinKey]"
					]
				}
			}
		},
		"rejectedPlans" : [ ]
	},
	"ok" : 1
}

The part that reads IXSCAN means that it did an index scan. If it hadn’t used the index, it would have done a collection scan (COLLSCAN).

It’s the same when we do a query that involves both fields in the index:

db.restaurants.find( { "reviews.ordered": "Dinner", "reviews.rating": { $gt: 3 } } ).explain()

Result:

{
	"queryPlanner" : {
		"plannerVersion" : 1,
		"namespace" : "krankykranes.restaurants",
		"indexFilterSet" : false,
		"parsedQuery" : {
			"$and" : [
				{
					"reviews.ordered" : {
						"$eq" : "Dinner"
					}
				},
				{
					"reviews.rating" : {
						"$gt" : 3
					}
				}
			]
		},
		"queryHash" : "C770E210",
		"planCacheKey" : "447B5666",
		"winningPlan" : {
			"stage" : "FETCH",
			"filter" : {
				"reviews.rating" : {
					"$gt" : 3
				}
			},
			"inputStage" : {
				"stage" : "IXSCAN",
				"keyPattern" : {
					"reviews.ordered" : 1,
					"reviews.rating" : -1
				},
				"indexName" : "reviews.ordered_1_reviews.rating_-1",
				"isMultiKey" : true,
				"multiKeyPaths" : {
					"reviews.ordered" : [
						"reviews"
					],
					"reviews.rating" : [
						"reviews"
					]
				},
				"isUnique" : false,
				"isSparse" : false,
				"isPartial" : false,
				"indexVersion" : 2,
				"direction" : "forward",
				"indexBounds" : {
					"reviews.ordered" : [
						"[\"Dinner\", \"Dinner\"]"
					],
					"reviews.rating" : [
						"[MaxKey, MinKey]"
					]
				}
			}
		},
		"rejectedPlans" : [ ]
	},
	"ok" : 1
}

However, if one of the fields in the query isn’t included in the index, then it results in a collection scan:

db.restaurants.find( { "reviews.name": "Lisa", "reviews.rating": { $gt: 3 } } ).explain()

Result:

{
	"queryPlanner" : {
		"plannerVersion" : 1,
		"namespace" : "krankykranes.restaurants",
		"indexFilterSet" : false,
		"parsedQuery" : {
			"$and" : [
				{
					"reviews.name" : {
						"$eq" : "Lisa"
					}
				},
				{
					"reviews.rating" : {
						"$gt" : 3
					}
				}
			]
		},
		"queryHash" : "49EF83EC",
		"planCacheKey" : "3C60321C",
		"winningPlan" : {
			"stage" : "COLLSCAN",
			"filter" : {
				"$and" : [
					{
						"reviews.name" : {
							"$eq" : "Lisa"
						}
					},
					{
						"reviews.rating" : {
							"$gt" : 3
						}
					}
				]
			},
			"direction" : "forward"
		},
		"rejectedPlans" : [ ]
	},
	"ok" : 1
}