Reformulação do documento MongoDB

Aqui está uma agregação que encontra os dois primeiros por duração em cada categoria (ela quebra "empates" arbitrariamente, o que parece estar alinhado com a saída da amostra):

var pregroup = { "$group" : {
        "_id" : {
            "type" : "$tracked_item_type",
            "name" : "$tracked_item_name"
        },
        "duration" : {
            "$sum" : "$duration"
        }
    }
};
var sort = { "$sort" : { "_id.type" : 1, "duration" : -1 } };
var group1 = { "$group" : {
        "_id" : "$_id.type",
        "num1" : {
            "$first" : {
                "name" : "$_id.name",
                "dur" : "$duration"
            }
        },
        "other" : {
            "$push" : {
                "name" : "$_id.name",
                "dur" : "$duration"
            }
        },
    "all" : {
        "$push" : {
            "name" : "$_id.name",
            "dur" : "$duration"
        }
    }
    }
};
var unwind = { "$unwind" : "$other" };
project = {
    "$project" : {
        "keep" : {
            "$ne" : [
                "$num1.name",
                "$other.name"
            ]
        },
        "num1" : 1,
        "all" : 1,
        "other" : 1
    }
};
var match = { "$match" : { "keep" : true } };
var sort2 = { "$sort" : { "_id" : 1, "other.dur" : -1 } };
var group2 = { "$group" : {
        "_id" : "$_id",
        "numberOne" : {
            "$first" : "$num1"
        },
        "numberTwo" : {
            "$first" : "$other"
        },
    "all" : {
        "$first" : "$all"
    }
    }
};
unwind2 = { "$unwind" : "$all" };
project2 = { "$project" : {
    "_id" : 0,
    "tracked_item_type" : "$_id",
    "tracked_item_name" : {
        "$cond" : [
            {
                "$or" : [
                    {
                        "$eq" : [
                            "$all.name",
                            "$numberOne.name"
                        ]
                    },
                    {
                        "$eq" : [
                            "$all.name",
                            "$numberTwo.name"
                        ]
                    }
                ]
            },
            "$all.name",
            null
        ]
    },
    "duration" : {
        "$cond" : [
            {
                "$or" : [
                    {
                        "$eq" : [
                            "$all.name",
                            "$numberOne.name"
                        ]
                    },
                    {
                        "$eq" : [
                            "$all.name",
                            "$numberTwo.name"
                        ]
                    }
                ]
            },
            "$all.dur",
            null
        ]
    }
}
}
match2 = { "$match" : { "tracked_item_name" : { "$ne" : null } } };

Executando isso com seus dados de amostra:

db.top2.aggregate(pregroup, sort, group1, unwind, project, match, sort2, group2, unwind2, project2, match2).toArray()
[
    {
        "tracked_item_type" : "Software",
        "tracked_item_name" : "Word",
        "duration" : 9540
    },
    {
        "tracked_item_type" : "Software",
        "tracked_item_name" : "Notepad",
        "duration" : 4000
    },
    {
        "tracked_item_type" : "Site",
        "tracked_item_name" : "Digital Blasphemy",
        "duration" : 8000
    },
    {
        "tracked_item_type" : "Site",
        "tracked_item_name" : "Facebook",
        "duration" : 7920
    }
]

Isso funcionará com um número arbitrário de domínios (diferentes valores de tipo de item rastreado) e você não precisa saber todos os nomes antecipadamente. No entanto, generalizar para os três primeiros, quatro primeiros, cinco primeiros, etc., adicionará mais quatro estágios para cada valor "N" superior adicional - não muito prático ou bonito.

Por favor, vote neste tíquete jira para obter uma implementação mais nativa da funcionalidade "top N" na estrutura de agregação.