在 R 中使用包 mclust 的 coordProj 绘图,如何更改 "errors" 图中的符号和颜色

plotting with coordProj of package mclust in R, how to change the symbols and colors in the "errors" plot

我正在使用包 mclust 的 coordProj 函数,当您使用选项 "errors" 绘图时,我想更改符号和颜色。

我的代码

library("mclust")
data(iris)
init=sample(1:3,length(iris[,5]),replace=TRUE)
est <- meVVV(iris[,-5], unmap(init))
coordProj(iris[,-5],parameters = est$parameters,z=est$z,
truth = iris[,5],what="errors",symbols=c("T","F"),colors=c("blue","red"))

剧情在这里(http://i.stack.imgur.com/Nmgbq.png)

我正在尝试更改字符为 'F' 的黑点和其他字符为 'T' 的黑点。 不知道是不是不行还是我做错了

如果您查看带有 page(coordProj) 的函数,您会发现,如果不更改它是不可能的。 一个快速的解决方法是更改​​它以允许额外的参数,此处 symbol2=。以下是在第 171 行进行编辑的函数:

coordProj<-function (data, dimens = c(1, 2), parameters = NULL, z = NULL, 
    classification = NULL, truth = NULL, uncertainty = NULL, 
    what = c("classification", "errors", "uncertainty"), quantiles = c(0.75, 
        0.95), symbols = NULL, colors = NULL, scale = FALSE, 
    xlim = NULL, ylim = NULL, CEX = 1, PCH = ".", identify = FALSE,symbols2, 
    ...) 
{
    if (is.null(dimens)) 
        dimens <- c(1, 2)
    if (is.null(classification) && !is.null(z)) 
        classification <- map(z)
    if (is.null(uncertainty) && !is.null(z)) 
        uncertainty <- 1 - apply(z, 1, max)
    if (!is.null(parameters)) {
        mu <- parameters$mean
        L <- ncol(mu)
        sigma <- parameters$variance$sigma
        haveParams <- !is.null(mu) && !is.null(sigma) && !any(is.na(mu)) && 
            !any(is.na(sigma))
    }
    else haveParams <- FALSE
    data <- data[, dimens, drop = FALSE]
    if (dim(data)[2] != 2) 
        stop("need two dimensions")
    if (is.null(xlim)) 
        xlim <- range(data[, 1])
    if (is.null(ylim)) 
        ylim <- range(data[, 2])
    if (scale) {
        par(pty = "s")
        d <- diff(xlim) - diff(ylim)
        if (d > 0) {
            ylim <- c(ylim[1] - d/2, ylim[2] + d/2)
        }
        else {
            xlim <- c(xlim[1] + d/2, xlim[2] - d/2)
        }
    }
    if (is.null(dnames <- dimnames(data)[[2]])) 
        xlab <- ylab <- ""
    else {
        xlab <- dnames[1]
        ylab <- dnames[2]
    }
    if (haveParams) {
        G <- ncol(mu)
        dimpar <- dim(sigma)
        if (length(dimpar) != 3) {
            haveParams <- FALSE
            warning("covariance must be a 3D matrix")
        }
        if (G != dimpar[3]) {
            haveParams <- FALSE
            warning("means and variance parameters are incompatible")
        }
        mu <- array(mu[dimens, ], c(2, G))
        sigma <- array(sigma[dimens, dimens, ], c(2, 2, G))
    }
    if (!is.null(truth)) {
        if (is.null(classification)) {
            classification <- truth
            truth <- NULL
        }
    }
    if (!is.null(classification)) {
        classification <- as.character(classification)
        U <- sort(unique(classification))
        L <- length(U)
        noise <- classification[1] == "0"
        if (is.null(symbols)) {
            if (L <= length(.mclust$classPlotSymbols)) {
                symbols <- .mclust$classPlotSymbols
                if (noise) {
                  first <- symbols[1]
                  symbols[symbols == 16] <- first
                  symbols[1] <- 16
                }
            }
            else if (L <= 9) {
                symbols <- as.character(1:9)
            }
            else if (L <= 26) {
                symbols <- LETTERS
            }
        }
        else if (length(symbols) == 1) 
            symbols <- rep(symbols, L)
        if (is.null(colors)) {
            if (L <= length(.mclust$classPlotColors)) {
                colors <- .mclust$classPlotColors[1:L]
                if (noise) {
                  first <- colors[1]
                  colors[colors == "black"] <- first
                  colors[1] <- "black"
                }
            }
        }
        else if (length(colors) == 1) 
            colors <- rep(colors, L)
        if (length(symbols) < L) {
            warning("more symbols needed to show classification ")
            symbols <- rep(16, L)
        }
        if (length(colors) < L) {
            warning("more colors needed to show classification ")
            colors <- rep("black", L)
        }
    }
    if (length(what) > 1) 
        what <- what[1]
    choices <- c("classification", "errors", "uncertainty")
    m <- charmatch(what, choices, nomatch = 0)
    if (m) {
        what <- choices[m]
        bad <- what == "classification" && is.null(classification)
        bad <- bad || (what == "uncertainty" && is.null(uncertainty))
        bad <- bad || (what == "errors" && (is.null(classification) || 
            is.null(truth)))
        if (bad) 
            warning("insufficient input for specified plot")
        badClass <- (what == "errors" && (length(unique(classification)) != 
            length(unique(truth))))
        if (badClass && !bad) 
            warning("classification and truth differ in number of groups")
        bad <- bad && badClass
    }
    else {
        bad <- !m
        warning("what improperly specified")
    }
    if (bad) 
        what <- "bad"
    switch(EXPR = what, classification = {
        plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, 
            xlim = xlim, ylim = ylim, main = "", ...)
        if (identify) {
            TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection showing Classification")
            title(main = TITLE)
        }
        for (k in 1:L) {
            I <- classification == U[k]
            points(data[I, 1], data[I, 2], pch = symbols[k], 
                col = colors[k], cex = if (U[k] == "0") CEX/4 else CEX)
        }
    }, errors = {
        ERRORS <- classError(classification, truth)$misclassified
        plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, 
            xlim = xlim, ylim = ylim, main = "", ...)
        if (identify) {
            TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection showing Errors")
            title(main = TITLE)
        }
        CLASSES <- unique(as.character(truth))
        symOpen <- c(2, 0, 1, 5)
        symFill <- c(17, 15, 16, 18)
        good <- rep(TRUE, length(classification))
        good[ERRORS] <- FALSE
        if (L > 4) {
            points(data[good, 1], data[good, 2], pch = 1, col = colors, 
                cex = CEX)
            points(data[!good, 1], data[!good, 2], pch = 16, 
                cex = CEX)
        } else {
            for (k in 1:L) {
                K <- truth == CLASSES[k]
                if (any(I <- (K & good))) {
                  points(data[I, 1], data[I, 2], pch = symOpen[k], 
                    col = colors[k], cex = CEX)
                }
                if (any(I <- (K & !good))) {
                  points(data[I, 1], data[I, 2], pch = symbols2[k], #Before: pch = symFill[k]
                    cex = CEX)
                }
            }
        }
    }, uncertainty = {
        plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, 
            xlim = xlim, ylim = ylim, main = "", ...)
        if (identify) {
            TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection showing Uncertainty")
            title(main = TITLE)
        }
        breaks <- quantile(uncertainty, probs = sort(quantiles))
        I <- uncertainty <= breaks[1]
        points(data[I, 1], data[I, 2], pch = 16, col = "gray75", 
            cex = 0.5 * CEX)
        I <- uncertainty <= breaks[2] & !I
        points(data[I, 1], data[I, 2], pch = 16, col = "gray50", 
            cex = 1 * CEX)
        I <- uncertainty > breaks[2] & !I
        points(data[I, 1], data[I, 2], pch = 16, col = "black", 
            cex = 1.5 * CEX)
    }, {
        plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, 
            xlim = xlim, ylim = ylim, main = "", ...)
        if (identify) {
            TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection")
            title(main = TITLE)
        }
        points(data[, 1], data[, 2], pch = PCH, cex = CEX)
    })
    if (haveParams) {
        for (k in 1:G) mvn2plot(mu = mu[, k], sigma = sigma[, 
            , k], k = 15)
    }
    invisible()
}    

然后调用该函数(注意您实际上需要 3 个参数用于 colsymbols、...):

coordProj(iris[,-5],parameters = est$parameters,z=est$z,
truth = iris[,5],what="errors",colors=c(4,2,3),symbols=".",symbols2=c("A","T","F"))

导致:

我最后用到的功能就是这个

coordProj2<-function (data, dimens = c(1, 2), parameters = NULL, z = NULL, 
                      classification = NULL, truth = NULL, uncertainty = NULL, 
                      what = c("classification", "errors", "uncertainty"), quantiles = c(0.75, 
                                                                                         0.95), symbols = NULL, colors = NULL, scale = FALSE, 
                      xlim = NULL, ylim = NULL, CEX = 1, PCH = ".", identify = FALSE,symbolsTrue,symbolsFalse,colorsTrue,colorsFalse, 
                      ...) 
{
  if (is.null(dimens)) 
    dimens <- c(1, 2)
  if (is.null(classification) && !is.null(z)) 
    classification <- map(z)
  if (is.null(uncertainty) && !is.null(z)) 
    uncertainty <- 1 - apply(z, 1, max)
  if (!is.null(parameters)) {
    mu <- parameters$mean
    L <- ncol(mu)
    sigma <- parameters$variance$sigma
    haveParams <- !is.null(mu) && !is.null(sigma) && !any(is.na(mu)) && 
      !any(is.na(sigma))
  }
  else haveParams <- FALSE
  data <- data[, dimens, drop = FALSE]
  if (dim(data)[2] != 2) 
    stop("need two dimensions")
  if (is.null(xlim)) 
    xlim <- range(data[, 1])
  if (is.null(ylim)) 
    ylim <- range(data[, 2])
  if (scale) {
    par(pty = "s")
    d <- diff(xlim) - diff(ylim)
    if (d > 0) {
      ylim <- c(ylim[1] - d/2, ylim[2] + d/2)
    }
    else {
      xlim <- c(xlim[1] + d/2, xlim[2] - d/2)
    }
  }
  if (is.null(dnames <- dimnames(data)[[2]])) 
    xlab <- ylab <- ""
  else {
    xlab <- dnames[1]
    ylab <- dnames[2]
  }
  if (haveParams) {
    G <- ncol(mu)
    dimpar <- dim(sigma)
    if (length(dimpar) != 3) {
      haveParams <- FALSE
      warning("covariance must be a 3D matrix")
    }
    if (G != dimpar[3]) {
      haveParams <- FALSE
      warning("means and variance parameters are incompatible")
    }
    mu <- array(mu[dimens, ], c(2, G))
    sigma <- array(sigma[dimens, dimens, ], c(2, 2, G))
  }
  if (!is.null(truth)) {
    if (is.null(classification)) {
      classification <- truth
      truth <- NULL
    }
  }
  if (!is.null(classification)) {
    classification <- as.character(classification)
    U <- sort(unique(classification))
    L <- length(U)
    noise <- classification[1] == "0"
    if (is.null(symbols)) {
      if (L <= length(.mclust$classPlotSymbols)) {
        symbols <- .mclust$classPlotSymbols
        if (noise) {
          first <- symbols[1]
          symbols[symbols == 16] <- first
          symbols[1] <- 16
        }
      }
      else if (L <= 9) {
        symbols <- as.character(1:9)
      }
      else if (L <= 26) {
        symbols <- LETTERS
      }
    }
    else if (length(symbols) == 1) 
      symbols <- rep(symbols, L)
    if (is.null(colors)) {
      if (L <= length(.mclust$classPlotColors)) {
        colors <- .mclust$classPlotColors[1:L]
        if (noise) {
          first <- colors[1]
          colors[colors == "black"] <- first
          colors[1] <- "black"
        }
      }
    }
    else if (length(colors) == 1) 
      colors <- rep(colors, L)
    if (length(symbols) < L) {
      warning("more symbols needed to show classification ")
      symbols <- rep(16, L)
    }
    if (length(colors) < L) {
      warning("more colors needed to show classification ")
      colors <- rep("black", L)
    }
  }
  if (length(what) > 1) 
    what <- what[1]
  choices <- c("classification", "errors", "uncertainty")
  m <- charmatch(what, choices, nomatch = 0)
  if (m) {
    what <- choices[m]
    bad <- what == "classification" && is.null(classification)
    bad <- bad || (what == "uncertainty" && is.null(uncertainty))
    bad <- bad || (what == "errors" && (is.null(classification) || 
                                          is.null(truth)))
    if (bad) 
      warning("insufficient input for specified plot")
    badClass <- (what == "errors" && (length(unique(classification)) != 
                                        length(unique(truth))))
    if (badClass && !bad) 
      warning("classification and truth differ in number of groups")
    bad <- bad && badClass
  }
  else {
    bad <- !m
    warning("what improperly specified")
  }
  if (bad) 
    what <- "bad"
  switch(EXPR = what, classification = {
    plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, 
         xlim = xlim, ylim = ylim, main = "", ...)
    if (identify) {
      TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection showing Classification")
      title(main = TITLE)
    }
    for (k in 1:L) {
      I <- classification == U[k]
      points(data[I, 1], data[I, 2], pch = symbols[k], 
             col = colors[k], cex = if (U[k] == "0") CEX/4 else CEX)
    }
  }, errors = {
    ERRORS <- classError(classification, truth)$misclassified
    plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, 
         xlim = xlim, ylim = ylim, main = "", ...)
    if (identify) {
      TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection showing Errors")
      title(main = TITLE)
    }
    CLASSES <- unique(as.character(truth))
    symOpen <- c(2, 0, 1, 5)
    symFill <- c(17, 15, 16, 18)
    good <- rep(TRUE, length(classification))
    good[ERRORS] <- FALSE
    if (L > 4) {
      points(data[good, 1], data[good, 2], pch = 1, col = colors, 
             cex = CEX)
      points(data[!good, 1], data[!good, 2], pch = 16, 
             cex = CEX)
    } else {
      for (k in 1:L) {
        K <- truth == CLASSES[k]
        if (any(I <- (K & good))) {
          points(data[I, 1], data[I, 2], pch = symbolsTrue[k], 
                 col = colorsTrue[k], cex = CEX)
        }
        if (any(I <- (K & !good))) {
          points(data[I, 1], data[I, 2], pch = symbolsFalse[k], #Before: pch = symFill[k]
                 col=colorsFalse[k], cex = CEX)
        }
      }
    }
  }, uncertainty = {
    plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, 
         xlim = xlim, ylim = ylim, main = "", ...)
    if (identify) {
      TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection showing Uncertainty")
      title(main = TITLE)
    }
    breaks <- quantile(uncertainty, probs = sort(quantiles))
    I <- uncertainty <= breaks[1]
    points(data[I, 1], data[I, 2], pch = 16, col = "gray75", 
           cex = 0.5 * CEX)
    I <- uncertainty <= breaks[2] & !I
    points(data[I, 1], data[I, 2], pch = 16, col = "gray50", 
           cex = 1 * CEX)
    I <- uncertainty > breaks[2] & !I
    points(data[I, 1], data[I, 2], pch = 16, col = "black", 
           cex = 1.5 * CEX)
  }, {
    plot(data[, 1], data[, 2], type = "n", xlab = xlab, ylab = ylab, 
         xlim = xlim, ylim = ylim, main = "", ...)
    if (identify) {
      TITLE <- paste(paste(dimens, collapse = ","), "Coordinate Projection")
      title(main = TITLE)
    }
    points(data[, 1], data[, 2], pch = PCH, cex = CEX)
  })
  if (haveParams) {
    for (k in 1:G) mvn2plot(mu = mu[, k], sigma = sigma[, 
                                                        , k], k = 15)
  }
  invisible()
}   

使用

调用
colorsTrue=c("red","blue","yellow")
colorsFalse=c("pink","gray","black")
symbolsTrue=c("T1","T2","T3")
symbolsFalse=c("F1","F2","F3")

coordProj2(iris[,-5],parameters = est$parameters,z=est$z,
truth = iris[,5],what="errors",colors=c(4,2,3),symbols=".",
symbolsTrue=symbolsTrue,symbolsFalse=symbolsFalse,
colorsTrue=colorsTrue,colorsFalse=colorsFalse)

http://i.stack.imgur.com/W3KNa.png