Parallel coordinates plots with elm-vega

Let's start with the well-known Iris dataset used for benchmarking clustering and visualization approaches. It comprises a set of 150 samples from three species of Iris with four morphologic measurements, petal length, petal width, sepal length and sepal width.

A simple scatterplot of two of those measurements, coloured by species might look like this:


    irisColors : List ScaleProperty
    irisColors =
        categoricalDomainMap
            [ ( "setosa", "rgb(125,200,125)" )
            , ( "versicolor", "rgb(190,175,210)" )
            , ( "virginica", "rgb(255,190,130)" )
            ]

    scatter : Spec
    scatter =
        let
            data =
                dataFromColumns []
                    << dataColumn "sepal length" (Numbers sLength)
                    << dataColumn "petal length" (Numbers pLength)
                    << dataColumn "species" (Strings species)

            enc =
                encoding
                    << position X [ PName "sepal length", PmType Quantitative ]
                    << position Y [ PName "petal length", PmType Quantitative ]
                    << color [ MName "species", MmType Nominal, MScale irisColors ]
        in
        toVegaLite [ data [], mark Circle [], enc [] ]
  

Because we will be manipulating the data below, we will start by storing the raw Iris data inline in Elm:


  sLength : List Float
  sLength =
      [ 5.1, 4.9, 4.7, 4.6, 5, 5.4, 4.6, 5, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5, 5, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5, 5.5, 4.9, 4.4, 5.1, 5, 4.5, 4.4, 5, 5.1, 4.8, 5.1, 4.6, 5.3, 5, 7, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5, 5.9, 6, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6, 5.7, 5.5, 5.5, 5.8, 6, 5.4, 6, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3, 6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5, 7.7, 7.7, 6, 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2, 7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6, 6.9, 6.7, 6.9, 5.8, 6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9 ]


  sWidth : List Float
  sWidth =
      [ 3.5, 3, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3, 3, 4, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2, 3, 2.2, 2.9, 2.9, 3.1, 3, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3, 2.8, 3, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3, 3.4, 3.1, 2.3, 3, 2.5, 2.6, 3, 2.6, 2.3, 2.7, 3, 2.9, 2.9, 2.5, 2.8, 3.3, 2.7, 3, 2.9, 3, 3, 2.5, 2.9, 2.5, 3.6, 3.2, 2.7, 3, 2.5, 2.8, 3.2, 3, 3.8, 2.6, 2.2, 3.2, 2.8, 2.8, 2.7, 3.3, 3.2, 2.8, 3, 2.8, 3, 2.8, 3.8, 2.8, 2.8, 2.6, 3, 3.4, 3.1, 3, 3.1, 3.1, 3.1, 2.7, 3.2, 3.3, 3, 2.5, 3, 3.4, 3 ]


  pLength : List Float
  pLength =
      [ 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4, 4.9, 4.7, 4.3, 4.4, 4.8, 5, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4, 4.4, 4.6, 4, 3.3, 4.2, 4.2, 4.2, 4.3, 3, 4.1, 6, 5.1, 5.9, 5.6, 5.8, 6.6, 4.5, 6.3, 5.8, 6.1, 5.1, 5.3, 5.5, 5, 5.1, 5.3, 5.5, 6.7, 6.9, 5, 5.7, 4.9, 6.7, 4.9, 5.7, 6, 4.8, 4.9, 5.6, 5.8, 6.1, 6.4, 5.6, 5.1, 5.6, 6.1, 5.6, 5.5, 4.8, 5.4, 5.6, 5.1, 5.1, 5.9, 5.7, 5.2, 5, 5.2, 5.4, 5.1 ]


  pWidth : List Float
  pWidth =
      [ 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1, 1.3, 1.4, 1, 1.5, 1, 1.4, 1.3, 1.4, 1.5, 1, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1, 1.1, 1, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3, 2.5, 1.9, 2.1, 1.8, 2.2, 2.1, 1.7, 1.8, 1.8, 2.5, 2, 1.9, 2.1, 2, 2.4, 2.3, 1.8, 2.2, 2.3, 1.5, 2.3, 2, 2, 1.8, 2.1, 1.8, 1.8, 1.8, 2.1, 1.6, 1.9, 2, 2.2, 1.5, 1.4, 2.3, 2.4, 1.8, 1.8, 2.1, 2.4, 2.3, 1.9, 2.3, 2.5, 2.3, 1.9, 2, 2.3, 1.8 ]


  species : List String
  species =
      [ "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "setosa", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "versicolor", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica" ]
  

To compare all possible mophologic pairs as scatterplots we can create a scatterplot matrix (SPLOM):


  splom : Spec
  splom =
      let
          data =
              dataFromColumns []
                  << dataColumn "petal length" (Numbers pLength)
                  << dataColumn "petal width" (Numbers pWidth)
                  << dataColumn "sepal length" (Numbers sLength)
                  << dataColumn "sepal width" (Numbers sWidth)
                  << dataColumn "species" (Strings species)

          enc =
              encoding
                  << position X [ PRepeat Column, PmType Quantitative ]
                  << position Y [ PRepeat Row, PmType Quantitative ]
                  << color [ MName "species", MmType Nominal, MScale irisColors ]

          spec =
              asSpec [ width 120, height 120, data [], mark Circle [], enc [] ]
      in
      toVegaLite
          [ repeat
              [ RowFields [ "sepal length", "sepal width", "petal length", "petal width" ]
              , ColumnFields [ "sepal length", "sepal width", "petal length", "petal width" ]
              ]
          , specification spec
          ]
  

The SPLOM ensures every permutation of pairs is displayed and even the scatterplot of a measurement against itself (diagonal) has value in showing the distribution of every measurement. It does though take up quite some space and there is redundancy both in the data being shown (the top-right is a mirror of the bottom left) and the visualization decorations (axes, tick marks, titles etc.).

A Parallel Coordinates plot

A parallel coordinates plot (PCP) can be thought of as a set of line graphs where the x-axis is an ordered sequence of the variables to show and the y-axis is the magnitude of each of those variables. It has the advantage of being able to show a large number of variables in a small space. To create a PCP we need to reshape our data from:

variable Avariable Bvariable C
a1b1c1
a2b2c2
a3b3c3
a4b4c4

to

GroupVariableValue
1aa1
1bb1
1cc1
2aa2
2bb2
2cc2
3aa3
3bb3
3cc3
4aa4
4bb4
4cc4

This kind of list reshaping is something well suited to functional programming, so can be managed easily in the 'elm' bit of elm-vega:


  toPCData : List (List a) -> ( List Float, List Float, List a )
  toPCData xss =
      let
          ( rows, xs ) =
              xss
                  |> transpose
                  |> List.indexedMap (\i xs -> ( List.repeat (List.length xss) (toFloat i), xs ))
                  |> List.unzip

          vars =
              List.repeat (List.length rows) (List.range 1 (List.length xss) |> List.map toFloat)
      in
      ( List.concat rows, List.concat vars, List.concat xs )


  spread : Int -> List a -> List a
  spread n =
      List.map (\x -> List.repeat n x) >> List.concat


  {-| Transposes a list of lists, swappings rows for columns.
  -}
  transpose : List (List a) -> List (List a)
  transpose ll =
      let
          heads =
              List.filterMap List.head ll

          tails =
              List.filterMap List.tail ll
      in
      if List.length heads == List.length ll then
          heads :: transpose tails
      else
          []
  

If we then encode the variable with x-position and value with y-position and break lines by group (with Vega-Lite's detail encoding), we get:


    pc1 : Spec
    pc1 =
        let
            table =
                [ sLength, sWidth, pLength, pWidth ]

            ( groups, vars, xs ) =
                toPCData table

            data =
                dataFromColumns []
                    << dataColumn "morphology" (Numbers xs)
                    << dataColumn "groups" (Numbers groups)
                    << dataColumn "vars" (Numbers vars)
                    << dataColumn "species" (Strings (spread (List.length table) species))

            enc =
                encoding
                    << position X [ PName "vars", PmType Ordinal ]
                    << position Y [ PName "morphology", PmType Quantitative ]
                    << detail [ DName "groups", DmType Nominal ]
                    << color [ MName "species", MmType Nominal, MScale irisColors ]
        in
        toVegaLite [ width 300, data [], mark Line [ MStrokeWidth 0.3 ], enc [] ]
  

While a pretty good approximation of a parallel coordinates plot, it does have several problems. Firstly, the variables are shown on the x-axis by their variable ordering number (1-4) by default rather than as labelled vertical axes. More significantly, all variables are scaled to the same range (0-8) and assumed to be of the same type (here they are all lengths but in many PCPs, they can be a range of different data types and measurement scales).

To overcome these problems we can use elm to rescale all variables to the range [0-1], hide all axes and then create a separate 'axis-annotation layer' for the four morphology variables:


    normalize : List Float -> List Float
    normalize xs =
        let
            minX =
                List.minimum xs |> Maybe.withDefault 0

            maxX =
                List.maximum xs |> Maybe.withDefault 1
        in
        List.map (\x -> (x - minX) / (maxX - minX)) xs

    pc2 : Spec
    pc2 =
        let
            table =
                [ normalize sLength, normalize sWidth, normalize pLength, normalize pWidth ]

            ( groups, vars, xs ) =
                toPCData table

            varLabel v =
                case v of
                    1 ->
                        "sepal length"

                    2 ->
                        "sepal width"

                    3 ->
                        "petal length"

                    4 ->
                        "petal width"

                    _ ->
                        "unknown"

            data =
                dataFromColumns []
                    << dataColumn "morphology" (Numbers xs)
                    << dataColumn "groups" (Numbers groups)
                    << dataColumn "vars" (Strings (List.map varLabel vars))
                    << dataColumn "species" (Strings (spread (List.length table) species))

            config =
                configure
                    << configuration (View [ Stroke Nothing ])

            enc =
                encoding
                    << position X [ PName "vars", PmType Ordinal, PAxis [ AxDomain False, AxGrid False ] ]
                    << position Y [ PName "morphology", PmType Quantitative, PAxis [] ]
                    << detail [ DName "groups", DmType Nominal ]
                    << color [ MName "species", MmType Nominal, MScale irisColors ]

            pcSpec =
                asSpec [ width 300, mark Line [ MStrokeWidth 0.3 ], enc [] ]

            axisEnc =
                encoding
                    << position X [ PName "vars", PmType Ordinal, PAxis [ AxTitle "" ] ]

            axisSpec =
                asSpec [ width 300, mark Rule [], axisEnc [] ]
        in
        toVegaLite [ config [], data [], layer [ pcSpec, axisSpec ] ]
  

Finally, the rather lengthy inline data generated via elm can be placed in its own file to keep the Vega-Lite specification clean and readable.


    irisColors : List ScaleProperty
    irisColors =
        categoricalDomainMap
            [ ( "setosa", "rgb(125,200,125)" )
            , ( "versicolor", "rgb(190,175,210)" )
            , ( "virginica", "rgb(255,190,130)" )
            ]

    pcFinal : Spec
    pcFinal =
        let
            config =
                configure
                    << configuration (View [ Stroke Nothing ])

            data =
                dataFromUrl "data/irispc.json"

            pcEnc =
                encoding
                    << position X [ PName "vars", PmType Ordinal, PAxis [ AxDomain False, AxGrid False ] ]
                    << position Y [ PName "morphology", PmType Quantitative, PAxis [] ]
                    << detail [ DName "groups", DmType Nominal ]
                    << color [ MName "species", MmType Nominal, MScale irisColors ]

            pcSpec =
                asSpec [ width 300, mark Line [ MStrokeWidth 0.3 ], pcEnc [] ]

            axisEnc =
                encoding
                    << position X [ PName "vars", PmType Nominal, PAxis [ AxTitle "" ] ]

            axisSpec =
                asSpec [ width 300, mark Rule [], axisEnc [] ]
        in
        toVegaLite [ config [], data [], layer [ pcSpec, axisSpec ] ]