diff --git a/chainercv/evaluations/__init__.py b/chainercv/evaluations/__init__.py
index 1f12332cdb..53017c6bb1 100644
--- a/chainercv/evaluations/__init__.py
+++ b/chainercv/evaluations/__init__.py
@@ -5,6 +5,7 @@
 from chainercv.evaluations.eval_instance_segmentation_coco import eval_instance_segmentation_coco  # NOQA
 from chainercv.evaluations.eval_instance_segmentation_voc import calc_instance_segmentation_voc_prec_rec  # NOQA
 from chainercv.evaluations.eval_instance_segmentation_voc import eval_instance_segmentation_voc  # NOQA
+from chainercv.evaluations.eval_keypoint_detection_coco import eval_keypoint_detection_coco  # NOQA
 from chainercv.evaluations.eval_semantic_segmentation import calc_semantic_segmentation_confusion  # NOQA
 from chainercv.evaluations.eval_semantic_segmentation import calc_semantic_segmentation_iou  # NOQA
 from chainercv.evaluations.eval_semantic_segmentation import eval_semantic_segmentation  # NOQA
diff --git a/chainercv/evaluations/eval_keypoint_detection_coco.py b/chainercv/evaluations/eval_keypoint_detection_coco.py
new file mode 100644
index 0000000000..97dfc75b6f
--- /dev/null
+++ b/chainercv/evaluations/eval_keypoint_detection_coco.py
@@ -0,0 +1,308 @@
+import itertools
+import numpy as np
+import os
+import six
+
+from chainercv.evaluations.eval_detection_coco import _redirect_stdout
+from chainercv.evaluations.eval_detection_coco import _summarize
+
+try:
+    import pycocotools.coco
+    import pycocotools.cocoeval
+    _available = True
+except ImportError:
+    _available = False
+
+
+def eval_keypoint_detection_coco(
+        pred_points, pred_labels, pred_scores,
+        gt_points, gt_visibles, gt_labels=None, gt_bboxes=None,
+        gt_areas=None, gt_crowdeds=None):
+    """Evaluate keypoint detection based on evaluation code of MS COCO.
+
+    This function evaluates predicted keypints obtained by using average
+    precision for each class.
+    The code is based on the evaluation code used in MS COCO.
+
+    Args:
+        pred_points (iterable of numpy.ndarray): See the table below.
+        pred_labels (iterable of numpy.ndarray): See the table below.
+        pred_scores (iterable of numpy.ndarray): See the table below.
+            This is used to rank instances. Note that this is not
+            the confidene for each keypoint.
+        gt_points (iterable of numpy.ndarray): See the table below.
+        gt_visibles (iterable of numpy.ndarray): See the table below.
+        gt_labels (iterable of numpy.ndarray): See the table below.
+        gt_bboxes (iterable of numpy.ndarray): See the table below.
+            This is optional. If this is :obj:`None`, the ground truth
+            bounding boxes are esitmated from the ground truth
+            keypoints.
+        gt_areas (iterable of numpy.ndarray): See the table below. If
+            :obj:`None`, some scores are not returned.
+        gt_crowdeds (iterable of numpy.ndarray): See the table below.
+
+    .. csv-table::
+        :header: name, shape, dtype, format
+
+        :obj:`pred_points`, ":math:`[(R, K, 2)]`", :obj:`float32`, \
+        ":math:`(y, x)`"
+        :obj:`pred_labels`, ":math:`[(R,)]`", :obj:`int32`, \
+        ":math:`[0, \#fg\_class - 1]`"
+        :obj:`pred_scores`, ":math:`[(R,)]`", :obj:`float32`, \
+        --
+        :obj:`gt_points`, ":math:`[(R, K, 2)]`", :obj:`float32`, \
+        ":math:`(y, x)`"
+        :obj:`gt_visibles`, ":math:`[(R, K)]`", :obj:`bool`, --
+        :obj:`gt_labels`, ":math:`[(R,)]`", :obj:`int32`, \
+        ":math:`[0, \#fg\_class - 1]`"
+        :obj:`gt_bboxes`, ":math:`[(R, 4)]`", :obj:`float32`, \
+        ":math:`(y_{min}, x_{min}, y_{max}, x_{max})`"
+        :obj:`gt_areas`, ":math:`[(R,)]`", \
+        :obj:`float32`, --
+        :obj:`gt_crowdeds`, ":math:`[(R,)]`", :obj:`bool`, --
+
+
+    Returns:
+        dict:
+
+        The keys, value-types and the description of the values are listed
+        below. The APs and ARs calculated with different iou
+        thresholds, sizes of objects, and numbers of detections
+        per image. For more details on the 12 patterns of evaluation metrics,
+        please refer to COCO's official `evaluation page`_.
+
+        .. csv-table::
+            :header: key, type, description
+
+            ap/iou=0.50:0.95/area=all/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_1]_
+            ap/iou=0.50/area=all/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_1]_
+            ap/iou=0.75/area=all/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_1]_
+            ap/iou=0.50:0.95/area=medium/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_1]_ [#coco_kp_eval_5]_
+            ap/iou=0.50:0.95/area=large/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_1]_ [#coco_kp_eval_5]_
+            ar/iou=0.50:0.95/area=all/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_2]_
+            ar/iou=0.50/area=all/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_2]_
+            ar/iou=0.75/area=all/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_2]_
+            ar/iou=0.50:0.95/area=medium/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_2]_ [#coco_kp_eval_5]_
+            ar/iou=0.50:0.95/area=large/max_dets=20, *numpy.ndarray*, \
+                [#coco_kp_eval_2]_ [#coco_kp_eval_5]_
+            map/iou=0.50:0.95/area=all/max_dets=20, *float*, \
+                [#coco_kp_eval_3]_
+            map/iou=0.50/area=all/max_dets=20, *float*, \
+                [#coco_kp_eval_3]_
+            map/iou=0.75/area=all/max_dets=20, *float*, \
+                [#coco_kp_eval_3]_
+            map/iou=0.50:0.95/area=medium/max_dets=20, *float*, \
+                [#coco_kp_eval_3]_ [#coco_kp_eval_5]_
+            map/iou=0.50:0.95/area=large/max_dets=20, *float*, \
+                [#coco_kp_eval_3]_ [#coco_kp_eval_5]_
+            mar/iou=0.50:0.95/area=all/max_dets=20, *float*, \
+                [#coco_kp_eval_4]_
+            mar/iou=0.50/area=all/max_dets=20, *float*, \
+                [#coco_kp_eval_4]_
+            mar/iou=0.75/area=all/max_dets=20, *float*, \
+                [#coco_kp_eval_4]_
+            mar/iou=0.50:0.95/area=medium/max_dets=20, *float*, \
+                [#coco_kp_eval_4]_ [#coco_kp_eval_5]_
+            mar/iou=0.50:0.95/area=large/max_dets=20, *float*, \
+                [#coco_kp_eval_4]_ [#coco_kp_eval_5]_
+            coco_eval, *pycocotools.cocoeval.COCOeval*, \
+                result from :obj:`pycocotools`
+            existent_labels, *numpy.ndarray*, \
+                used labels \
+
+    .. [#coco_kp_eval_1] An array of average precisions. \
+        The :math:`l`-th value corresponds to the average precision \
+        for class :math:`l`. If class :math:`l` does not exist in \
+        either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \
+        value is set to :obj:`numpy.nan`.
+    .. [#coco_kp_eval_2] An array of average recalls. \
+        The :math:`l`-th value corresponds to the average precision \
+        for class :math:`l`. If class :math:`l` does not exist in \
+        either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \
+        value is set to :obj:`numpy.nan`.
+    .. [#coco_kp_eval_3] The average of average precisions over classes.
+    .. [#coco_kp_eval_4] The average of average recalls over classes.
+    .. [#coco_kp_eval_5] Skip if :obj:`gt_areas` is :obj:`None`.
+
+    """
+    if not _available:
+        raise ValueError(
+            'Please install pycocotools \n'
+            'pip install -e \'git+https://github.com/cocodataset/coco.git'
+            '#egg=pycocotools&subdirectory=PythonAPI\'')
+
+    gt_coco = pycocotools.coco.COCO()
+    pred_coco = pycocotools.coco.COCO()
+
+    pred_points = iter(pred_points)
+    pred_labels = iter(pred_labels)
+    pred_scores = iter(pred_scores)
+    gt_points = iter(gt_points)
+    gt_visibles = iter(gt_visibles)
+    gt_labels = iter(gt_labels)
+    gt_bboxes = (iter(gt_bboxes) if gt_bboxes is not None
+                 else itertools.repeat(None))
+    if gt_areas is None:
+        compute_area_dependent_metrics = False
+        gt_areas = itertools.repeat(None)
+    else:
+        compute_area_dependent_metrics = True
+        gt_areas = iter(gt_areas)
+    gt_crowdeds = (iter(gt_crowdeds) if gt_crowdeds is not None
+                   else itertools.repeat(None))
+
+    ids = []
+    pred_annos = []
+    gt_annos = []
+    existent_labels = {}
+    for i, (pred_point, pred_label, pred_score, gt_point, gt_visible,
+            gt_label, gt_bbox,
+            gt_area, gt_crowded) in enumerate(six.moves.zip(
+                pred_points, pred_labels, pred_scores,
+                gt_points, gt_visibles, gt_labels, gt_bboxes,
+                gt_areas, gt_crowdeds)):
+        if gt_bbox is None:
+            gt_bbox = itertools.repeat(None)
+        if gt_area is None:
+            gt_area = itertools.repeat(None)
+        if gt_crowded is None:
+            gt_crowded = itertools.repeat(None)
+        # Starting ids from 1 is important when using COCO.
+        img_id = i + 1
+
+        for pred_pnt, pred_lb, pred_sc in zip(pred_point, pred_label,
+                                              pred_score):
+            # http://cocodataset.org/#format-results
+            # Visibility flag is currently not used for evaluation
+            v = np.ones(len(pred_pnt))
+            pred_annos.append(
+                _create_anno(pred_pnt, v,
+                             pred_lb, pred_sc, None,
+                             img_id=img_id, anno_id=len(pred_annos) + 1,
+                             ar=None, crw=0))
+            existent_labels[pred_lb] = True
+
+        for gt_pnt, gt_v, gt_lb, gt_bb, gt_ar, gt_crw in zip(
+                gt_point, gt_visible, gt_label, gt_bbox, gt_area, gt_crowded):
+            gt_annos.append(
+                _create_anno(gt_pnt, gt_v, gt_lb, None, gt_bb,
+                             img_id=img_id, anno_id=len(gt_annos) + 1,
+                             ar=gt_ar, crw=gt_crw))
+        ids.append({'id': img_id})
+    existent_labels = sorted(existent_labels.keys())
+
+    pred_coco.dataset['categories'] = [{'id': i} for i in existent_labels]
+    gt_coco.dataset['categories'] = [{'id': i} for i in existent_labels]
+    pred_coco.dataset['annotations'] = pred_annos
+    gt_coco.dataset['annotations'] = gt_annos
+    pred_coco.dataset['images'] = ids
+    gt_coco.dataset['images'] = ids
+
+    with _redirect_stdout(open(os.devnull, 'w')):
+        pred_coco.createIndex()
+        gt_coco.createIndex()
+        coco_eval = pycocotools.cocoeval.COCOeval(
+            gt_coco, pred_coco, 'keypoints')
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+
+    results = {'coco_eval': coco_eval}
+    p = coco_eval.params
+    common_kwargs = {
+        'prec': coco_eval.eval['precision'],
+        'rec': coco_eval.eval['recall'],
+        'iou_threshs': p.iouThrs,
+        'area_ranges': p.areaRngLbl,
+        'max_detection_list': p.maxDets,
+    }
+    all_kwargs = {
+        'ap/iou=0.50:0.95/area=all/max_dets=20': {
+            'ap': True, 'iou_thresh': None, 'area_range': 'all',
+            'max_detection': 20},
+        'ap/iou=0.50/area=all/max_dets=20': {
+            'ap': True, 'iou_thresh': 0.5, 'area_range': 'all',
+            'max_detection': 20},
+        'ap/iou=0.75/area=all/max_dets=20': {
+            'ap': True, 'iou_thresh': 0.75, 'area_range': 'all',
+            'max_detection': 20},
+        'ar/iou=0.50:0.95/area=all/max_dets=20': {
+            'ap': False, 'iou_thresh': None, 'area_range': 'all',
+            'max_detection': 20},
+        'ar/iou=0.50/area=all/max_dets=20': {
+            'ap': False, 'iou_thresh': 0.5, 'area_range': 'all',
+            'max_detection': 20},
+        'ar/iou=0.75/area=all/max_dets=20': {
+            'ap': False, 'iou_thresh': 0.75, 'area_range': 'all',
+            'max_detection': 20},
+    }
+    if compute_area_dependent_metrics:
+        all_kwargs.update({
+            'ap/iou=0.50:0.95/area=medium/max_dets=20': {
+                'ap': True, 'iou_thresh': None, 'area_range': 'medium',
+                'max_detection': 20},
+            'ap/iou=0.50:0.95/area=large/max_dets=20': {
+                'ap': True, 'iou_thresh': None, 'area_range': 'large',
+                'max_detection': 20},
+            'ar/iou=0.50:0.95/area=medium/max_dets=20': {
+                'ap': False, 'iou_thresh': None, 'area_range': 'medium',
+                'max_detection': 20},
+            'ar/iou=0.50:0.95/area=large/max_dets=20': {
+                'ap': False, 'iou_thresh': None, 'area_range': 'large',
+                'max_detection': 20},
+        })
+
+    for key, kwargs in all_kwargs.items():
+        kwargs.update(common_kwargs)
+        metrics, mean_metric = _summarize(**kwargs)
+
+        # pycocotools ignores classes that are not included in
+        # either gt or prediction, but lies between 0 and
+        # the maximum label id.
+        # We set values for these classes to np.nan.
+        results[key] = np.nan * np.ones(np.max(existent_labels) + 1)
+        results[key][existent_labels] = metrics
+        results['m' + key] = mean_metric
+
+    results['existent_labels'] = existent_labels
+    return results
+
+
+def _create_anno(pnt, v, lb, sc, bb, img_id, anno_id, ar=None, crw=None):
+    # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L342
+    y_min = np.min(pnt[:, 0])
+    x_min = np.min(pnt[:, 1])
+    y_max = np.max(pnt[:, 0])
+    x_max = np.max(pnt[:, 1])
+    if ar is None:
+        ar = (y_max - y_min) * (x_max - x_min)
+
+    if crw is None:
+        crw = False
+    # Rounding is done to make the result consistent with COCO.
+
+    if bb is None:
+        bb_xywh = [x_min, y_min, x_max - x_min, y_max - y_min]
+    else:
+        bb_xywh = [bb[1], bb[0], bb[3] - bb[1], bb[2] - bb[0]]
+    pnt = np.concatenate((pnt[:, [1, 0]], v[:, None]), axis=1)
+    anno = {
+        'image_id': img_id, 'category_id': lb,
+        'keypoints': pnt.reshape((-1)).tolist(),
+        'area': ar,
+        'bbox': bb_xywh,
+        'id': anno_id,
+        'iscrowd': crw,
+        'num_keypoints': (pnt[:, 0] > 0).sum()
+    }
+    if sc is not None:
+        anno.update({'score': sc})
+    return anno
diff --git a/docs/source/reference/evaluations.rst b/docs/source/reference/evaluations.rst
index 2befc38e47..553f1b52f6 100644
--- a/docs/source/reference/evaluations.rst
+++ b/docs/source/reference/evaluations.rst
@@ -45,6 +45,10 @@ calc_instance_segmentation_voc_prec_rec
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autofunction:: calc_instance_segmentation_voc_prec_rec
 
+Keypoint Detection COCO
+-----------------------
+.. autofunction:: eval_keypoint_detection_coco
+
 Semantic Segmentation IoU
 -------------------------
 
diff --git a/tests/evaluations_tests/test_eval_keypoint_detection_coco.py b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
new file mode 100644
index 0000000000..8112f007f8
--- /dev/null
+++ b/tests/evaluations_tests/test_eval_keypoint_detection_coco.py
@@ -0,0 +1,171 @@
+import numpy as np
+import os
+from six.moves.urllib import request
+import unittest
+
+from chainer import testing
+
+from chainercv.datasets import coco_keypoint_names
+from chainercv.evaluations import eval_keypoint_detection_coco
+
+try:
+    import pycocotools  # NOQA
+    _available = True
+except ImportError:
+    _available = False
+
+
+human_id = 0
+
+
+def _generate_point(n_inst, size):
+    H, W = size
+    n_joint = len(coco_keypoint_names[human_id])
+    ys = np.random.uniform(0, H, size=(n_inst, n_joint))
+    xs = np.random.uniform(0, W, size=(n_inst, n_joint))
+    point = np.stack((ys, xs), axis=2).astype(np.float32)
+
+    valid = np.random.randint(0, 2, size=(n_inst, n_joint)).astype(np.bool)
+    return point, valid
+
+
+@unittest.skipUnless(_available, 'pycocotools is not installed')
+class TestEvalKeypointDetectionCOCOSimple(unittest.TestCase):
+
+    n_inst = 3
+
+    def setUp(self):
+        self.pred_points = []
+        self.pred_labels = []
+        self.pred_scores = []
+        self.gt_points = []
+        self.gt_visibles = []
+        self.gt_bboxes = []
+        self.gt_labels = []
+        for i in range(2):
+            point, valid = _generate_point(self.n_inst, (32, 48))
+            self.pred_points.append(point)
+            self.pred_labels.append(np.zeros((self.n_inst,), dtype=np.int32))
+            self.pred_scores.append(np.random.uniform(
+                0.5, 1, size=(self.n_inst,)).astype(np.float32))
+            self.gt_points.append(point)
+            self.gt_visibles.append(valid)
+            bbox = np.zeros((self.n_inst, 4), dtype=np.float32)
+            for i, pnt in enumerate(point):
+                y_min = np.min(pnt[:, 0])
+                x_min = np.min(pnt[:, 1])
+                y_max = np.max(pnt[:, 0])
+                x_max = np.max(pnt[:, 1])
+                bbox[i] = [y_min, x_min, y_max, x_max]
+            self.gt_bboxes.append(bbox)
+            self.gt_labels.append(np.zeros((self.n_inst,), dtype=np.int32))
+
+    def _check(self, result):
+        self.assertEqual(result['map/iou=0.50:0.95/area=all/max_dets=20'], 1)
+        self.assertEqual(result['map/iou=0.50/area=all/max_dets=20'], 1)
+        self.assertEqual(result['map/iou=0.75/area=all/max_dets=20'], 1)
+        self.assertEqual(result['mar/iou=0.50:0.95/area=all/max_dets=20'], 1)
+        self.assertEqual(result['mar/iou=0.50/area=all/max_dets=20'], 1)
+        self.assertEqual(result['mar/iou=0.75/area=all/max_dets=20'], 1)
+
+    def test_gt_bboxes_not_supplied(self):
+        result = eval_keypoint_detection_coco(
+            self.pred_points, self.pred_labels, self.pred_scores,
+            self.gt_points, self.gt_visibles, self.gt_labels, None)
+        self._check(result)
+
+    def test_area_not_supplied(self):
+        result = eval_keypoint_detection_coco(
+            self.pred_points, self.pred_labels, self.pred_scores,
+            self.gt_points, self.gt_visibles, self.gt_labels, self.gt_bboxes)
+        self._check(result)
+
+        self.assertFalse(
+            'map/iou=0.50:0.95/area=medium/max_dets=20' in result)
+        self.assertFalse(
+            'map/iou=0.50:0.95/area=large/max_dets=20' in result)
+        self.assertFalse(
+            'mar/iou=0.50:0.95/area=medium/max_dets=20' in result)
+        self.assertFalse(
+            'mar/iou=0.50:0.95/area=large/max_dets=20' in result)
+
+    def test_area_supplied(self):
+        gt_areas = [[100] * self.n_inst for _ in range(2)]
+        result = eval_keypoint_detection_coco(
+            self.pred_points, self.pred_labels, self.pred_scores,
+            self.gt_points, self.gt_visibles, self.gt_labels, self.gt_bboxes,
+            gt_areas=gt_areas,
+        )
+        self._check(result)
+        self.assertTrue(
+            'map/iou=0.50:0.95/area=medium/max_dets=20' in result)
+        self.assertTrue(
+            'map/iou=0.50:0.95/area=large/max_dets=20' in result)
+        self.assertTrue(
+            'mar/iou=0.50:0.95/area=medium/max_dets=20' in result)
+        self.assertTrue(
+            'mar/iou=0.50:0.95/area=large/max_dets=20' in result)
+
+    def test_crowded_supplied(self):
+        gt_crowdeds = [[True] * self.n_inst for _ in range(2)]
+        result = eval_keypoint_detection_coco(
+            self.pred_points, self.pred_labels, self.pred_scores,
+            self.gt_points, self.gt_visibles, self.gt_labels, self.gt_bboxes,
+            gt_crowdeds=gt_crowdeds,
+        )
+        # When the only ground truth is crowded, nothing is evaluated.
+        # In that case, all the results are nan.
+        self.assertTrue(
+            np.isnan(result['map/iou=0.50:0.95/area=all/max_dets=20']))
+
+
+@unittest.skipUnless(_available, 'pycocotools is not installed')
+class TestEvalKeypointDetectionCOCO(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        base_url = 'https://chainercv-models.preferred.jp/tests'
+
+        cls.dataset = np.load(request.urlretrieve(os.path.join(
+            base_url,
+            'eval_keypoint_detection_coco_dataset_2019_02_21.npz'))[0])
+        cls.result = np.load(request.urlretrieve(os.path.join(
+            base_url,
+            'eval_keypoint_detection_coco_result_2019_02_20.npz'))[0])
+
+    def test_eval_keypoint_detection_coco(self):
+        pred_points = self.result['points']
+        pred_labels = self.result['labels']
+        pred_scores = self.result['scores']
+
+        gt_points = self.dataset['points']
+        gt_visibles = self.dataset['visibles']
+        gt_labels = self.dataset['labels']
+        gt_bboxes = self.dataset['bboxes']
+        gt_areas = self.dataset['areas']
+        gt_crowdeds = self.dataset['crowdeds']
+
+        result = eval_keypoint_detection_coco(
+            pred_points, pred_labels, pred_scores,
+            gt_points, gt_visibles, gt_labels, gt_bboxes,
+            gt_areas, gt_crowdeds)
+
+        expected = {
+            'map/iou=0.50:0.95/area=all/max_dets=20': 0.37733572721481323,
+            'map/iou=0.50/area=all/max_dets=20': 0.6448841691017151,
+            'map/iou=0.75/area=all/max_dets=20': 0.35469090938568115,
+            'map/iou=0.50:0.95/area=medium/max_dets=20': 0.3894105851650238,
+            'map/iou=0.50:0.95/area=large/max_dets=20': 0.39169296622276306,
+            'mar/iou=0.50:0.95/area=all/max_dets=20': 0.5218977928161621,
+            'mar/iou=0.50/area=all/max_dets=20': 0.7445255517959595,
+            'mar/iou=0.75/area=all/max_dets=20': 0.510948896408081,
+            'mar/iou=0.50:0.95/area=medium/max_dets=20': 0.5150684714317322,
+            'mar/iou=0.50:0.95/area=large/max_dets=20': 0.5296875238418579,
+        }
+
+        for key, item in expected.items():
+            np.testing.assert_almost_equal(
+                result[key], expected[key], decimal=5)
+
+
+testing.run_module(__name__, __file__)