Este gráfico foi criado em resposta a essa pergunta no StackOverflow. O objetivo é replicar o seguinte gráfico: ! [] (waterfall_original.png)
O truque para plotar gráficos em cascata com ggplot2
é criar um conjunto de dados com os grupos (valores x - estou chamando isso no meu código como x.axis.Var
) no ordem exata que você deseja plotar. Depois disso, você precisa obter os pontos inicial e final das barras para cada categoria (categorias em sua legenda - cat.Var
) dentro dos grupos. Em seguida, crie outro grupo com os * totais por categoria *. Você também precisará de um índice numérico para os grupos manipularem as barras. Finalmente, obtenha uma coluna com o * total por grupo * para os números acima das barras.
Suponha que seu banco de dados esteja assim:
df <-
data.frame(
x.axis.Var = rep(c("Widgets", "Gridgets", "Groms", "Wobs"), 3),
cat.Var = rep(c("High End", "Mid Range", "Low End"), each = 4),
values = c(600, 500, 300, 200, # high end
300, 200, 300, 250, # mid range
100, 80, 200, 150 # low end
)
)
Ou,
x.axis.Var cat.Var values
1 Widgets High End 600
2 Gridgets High End 500
3 Groms High End 300
4 Wobs High End 200
5 Widgets Mid Range 300
6 Gridgets Mid Range 200
7 Groms Mid Range 300
8 Wobs Mid Range 250
9 Widgets Low End 100
10 Gridgets Low End 80
11 Groms Low End 200
12 Wobs Low End 150
Siga os passos abaixo para obter um novo banco de dados:
library('tidyverse')
df.tmp <- df %>%
# \_Set the factor levels in the order you want ----
mutate(
x.axis.Var = factor(x.axis.Var,
levels = c("Widgets", "Gridgets", "Groms", "Wobs")),
cat.Var = factor(cat.Var,
levels = c("Low End", "Mid Range", "High End"))
) %>%
# \_Sort by Group and Category ----
arrange(x.axis.Var, desc(cat.Var)) %>%
# \_Get the start and end points of the bars ----
mutate(end.Bar = cumsum(values),
start.Bar = c(0, head(end.Bar, -1))) %>%
# \_Add a new Group called 'Total' with total by category ----
rbind(
df %>%
# \___Sum by Categories ----
group_by(cat.Var) %>%
summarise(values = sum(values)) %>%
# \___Create new Group: 'Total' ----
mutate(
x.axis.Var = "Total",
cat.Var = factor(cat.Var,
levels = c("Low End", "Mid Range", "High End"))
) %>%
# \___Sort by Group and Category ----
arrange(x.axis.Var, desc(cat.Var)) %>%
# \___Get the start and end points of the bars ----
mutate(end.Bar = cumsum(values),
start.Bar = c(0, head(end.Bar, -1))) %>%
# \___Put variables in the same order ----
select(names(df),end.Bar,start.Bar)
) %>%
# \_Get numeric index for the groups ----
mutate(group.id = group_indices(., x.axis.Var)) %>%
# \_Create new variable with total by group ----
group_by(x.axis.Var) %>%
mutate(total.by.x = sum(values)) %>%
# \_Order the columns ----
select(x.axis.Var, cat.Var, group.id, start.Bar, values, end.Bar, total.by.x)
Isso produz:
x.axis.Var cat.Var group.id start.Bar values end.Bar total.by.x
<fct> <fct> <int> <dbl> <dbl> <dbl> <dbl>
1 Widgets High End 1 0 600 600 1000
2 Widgets Mid Range 1 600 300 900 1000
3 Widgets Low End 1 900 100 1000 1000
4 Gridgets High End 2 1000 500 1500 780
5 Gridgets Mid Range 2 1500 200 1700 780
6 Gridgets Low End 2 1700 80 1780 780
7 Groms High End 3 1780 300 2080 800
8 Groms Mid Range 3 2080 300 2380 800
9 Groms Low End 3 2380 200 2580 800
10 Wobs High End 4 2580 200 2780 600
11 Wobs Mid Range 4 2780 250 3030 600
12 Wobs Low End 4 3030 150 3180 600
13 Total High End 5 0 1600 1600 3180
14 Total Mid Range 5 1600 1050 2650 3180
15 Total Low End 5 2650 530 3180 3180
Então, eu posso usar o seguinte código para obter o gráfico que eu quero:
ggplot(df.tmp, aes(x = group.id, fill = cat.Var)) +
# \_Simple Waterfall Chart ----
geom_rect(aes(x = group.id,
xmin = group.id - 0.25, # control bar gap width
xmax = group.id + 0.25,
ymin = end.Bar,
ymax = start.Bar),
color="black",
alpha=0.95) +
# \_Lines Between Bars ----
geom_segment(aes(x=ifelse(group.id == last(group.id),
last(group.id),
group.id+0.25),
xend=ifelse(group.id == last(group.id),
last(group.id),
group.id+0.75),
y=ifelse(cat.Var == "Low End",
end.Bar,
# these will be removed once we set the y limits
max(end.Bar)*2),
yend=ifelse(cat.Var == "Low End",
end.Bar,
# these will be removed once we set the y limits
max(end.Bar)*2)),
colour="black") +
# \_Numbers inside bars (each category) ----
geom_text(
mapping =
aes(
label = ifelse(values < 150,
"",
ifelse(nchar(values) == 3,
as.character(values),
sub("(.{1})(.*)", "\\1.\\2",
as.character(values)
)
)
),
y = rowSums(cbind(start.Bar,values/2))
),
color = "white",
fontface = "bold"
) +
# \_Total for each category above bars ----
geom_text(
mapping =
aes(
label = ifelse(cat.Var != "Low End",
"",
ifelse(nchar(total.by.x) == 3,
as.character(total.by.x),
sub("(.{1})(.*)", "\\1.\\2",
as.character(total.by.x)
)
)
),
y = end.Bar+200
),
color = "#4e4d47",
fontface = "bold"
) +
# \_Change colors ----
scale_fill_manual(values=c('#c8f464','#ff6969','#55646e')) +
# \_Change y axis to same scale as original ----
scale_y_continuous(
expand=c(0,0),
limits = c(0, 3500),
breaks = seq(0, 3500, 500),
labels = ifelse(nchar(seq(0, 3500, 500)) < 4,
as.character(seq(0, 3500, 500)),
sub("(.{1})(.*)", "\\1.\\2",
as.character(seq(0, 3500, 500))
)
)
) +
# \_Add tick marks on x axis to look like the original plot ----
scale_x_continuous(
expand=c(0,0),
limits = c(min(df.tmp$group.id)-0.5,max(df.tmp$group.id)+0.5),
breaks = c(min(df.tmp$group.id)-0.5,
unique(df.tmp$group.id),
unique(df.tmp$group.id) + 0.5
),
labels =
c("",
as.character(unique(df.tmp$x.axis.Var)),
rep(c(""), length(unique(df.tmp$x.axis.Var)))
)
) +
# \_Theme options to make it look like the original plot ----
theme(
text = element_text(size = 14, color = "#4e4d47"),
axis.text = element_text(size = 10, color = "#4e4d47", face = "bold"),
axis.text.y = element_text(margin = margin(r = 0.3, unit = "cm")),
axis.ticks.x =
element_line(color =
c("black",
rep(NA, length(unique(df.tmp$x.axis.Var))),
rep("black", length(unique(df.tmp$x.axis.Var))-1)
)
),
axis.line = element_line(colour = "#4e4d47", size = 0.5),
axis.ticks.length = unit(.15, "cm"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.background = element_blank(),
plot.margin = unit(c(1, 1, 1, 1), "lines"),
legend.text = element_text(size = 10,
color = "#4e4d47",
face = "bold",
margin = margin(l = 0.25, unit = "cm")
),
legend.title = element_blank()
)
E o gráfico final: