# Load necessary libraries
library(ggplot2)
library(dplyr)
library(ggpubr)
library(corrplot)
# Set seed for reproducibility
set.seed(43)
################################
# The graph was too complicated
################################
# Generate a random permutation of 8 predefined group means
# These represent different average values for each group
group_means <- sample(c(10, 15, 20, 20.01, 30, 35, 40, 40.2))
# Display the randomly sampled group means
group_means
# Create a data frame with two columns: Group and Value
# Group: factor with 8 levels, each repeated 30 times (240 observations in total)
# Value: for each group mean, generate 30 random values from a normal distribution
# with the specified mean from the group_means vector and a standard deviation of 5
data_too_complicated <- data.frame(
Group = factor(rep(1:8, each = 30)),
Value = unlist(lapply(group_means, function(mean) rnorm(30, mean, 5)))
)
# Initialize a ggplot object using the dataset 'data_too_complicated'
ggplot(data_too_complicated, aes(x = Group, y = Value, fill = Group)) +
# Add a boxplot layer to show the distribution of values for each group
geom_boxplot() +
# Overlay jittered points to display individual observations
# width = 0.2 controls horizontal spread; alpha = 0.5 makes points semi-transparent
geom_jitter(width = 0.2, alpha = 0.5) +
# Add statistical comparison between all pairs of groups using t-tests
# comparisons = all pairwise combinations of Group levels
# label = "p.signif" shows significance stars; hide.ns = TRUE hides non-significant results
stat_compare_means(comparisons = combn(levels(data_too_complicated$Group), 2, simplify = FALSE),
method = "t.test", label = "p.signif", hide.ns = TRUE) +
# Apply a minimal theme for a clean look
theme_minimal() +
# Add plot title and axis labels; customize legend title for fill
labs(title = "Estimated Profit Per Client Group",
x = "Client Group",
y = "Value",
fill="Client Group") +
# Format y-axis: scientific notation for labels and 10 evenly spaced breaks
scale_y_continuous(labels = scales::scientific,
breaks = scales::pretty_breaks(n = 10)) +
# increase y-axis text size and add a black border around the plot
theme(axis.text.y = element_text(size = 12),
plot.background = element_rect(colour = "black", fill = NA, size = 1))
###########################
# The graph was too simple
###########################
# simulate the promotion data
data_too_simple <- data.frame(
# Define the 'Promotion' column as a factor with two levels: Promotion A and Promotion B
Promotion = factor(c("Promotion A", "Promotion B")),
# Define the 'Value' column: calculated values for each promotion
# Promotion A: 23 * 1.5 * 2.1; Promotion B: 23 * 2.1
Value = c(72.5, 48),
# Define the 'SE' (Standard Error) column for each promotion
# Promotion A: 2.5; Promotion B: 2
SE = c(2.5, 2)
)
# plot the simulated data
ggplot(data_too_simple, aes(x = Promotion, y = Value, fill = Promotion)) +
# Add bar chart layer with actual values (stat = "identity")
# Bars are dodged for side-by-side comparison and width set to 0.7
geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
# Add error bars to represent standard errors
# ymin and ymax define lower and upper bounds; width controls bar cap size
# Position dodged to align with bars
geom_errorbar(aes(ymin = Value - SE, ymax = Value + SE), width = 0.2, position = position_dodge(0.7)) +
# Add text labels showing rounded Value with a percentage sign
# vjust adjusts vertical position above bars; size sets font size
geom_text(aes(label = paste0(round(Value), "%")), vjust = -1.5, size = 5) +
# Manually set fill colors for each promotion for a visually appealing palette
scale_fill_manual(values = c("Promotion A" = "#FF5733", "Promotion B" = "#33C3FF")) +
# Apply a minimal theme for a clean and modern look
theme_minimal() +
# Add plot title, axis labels, and subtitle explaining error bars
labs(title = "Comparison of Promotion A and Promotion B",
x = "Promotion",
y = "Customer Purchase (%)",
subtitle = "Error bars represent standard errors") +
# Configure y-axis: set limits from 0 to 100 and breaks every 10 units
scale_y_continuous(limits = c(0, 100), breaks = seq(0, 100, by = 10)) +
# Customize theme: axis text and titles size, bold plot title, and black border around plot
theme(axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
plot.title = element_text(size = 16, face = "bold"),
plot.background = element_rect(colour = "black", fill = NA, size = 1))
##########################################
# Graph leads to stakeholder tunnel vision
##########################################
# Load the mtcars dataset
data(mtcars)
# make a subselection of the columns
mtcars <- mtcars %>%
select(wt, hp, cyl, disp, qsec, mpg, drat)
# Calculate the correlation matrix
cor_matrix <- cor(mtcars)
# Rename the columns of the correlation matrix to proper English names
colnames(cor_matrix) <- c("Weight", "Horsepower", "Cylinders", "Displacement",
"1/4 Mile Time", "Miles per Gallon", "Rear Axle Ratio")
rownames(cor_matrix) <- colnames(cor_matrix)
# Create a correlation plot using circles to represent correlation strength
corrplot(cor_matrix, method = "circle", type = "upper",
# Define color palette: gradient from red (negative) to white (neutral) to blue (positive)
# Generate 200 color steps for smooth transitions
col = colorRampPalette(c("red", "white", "blue"))(200),
# Set text label size for variable names and color to black
tl.cex = 0.8, tl.col = "black",
# Set color legend size and position on the right
cl.cex = 0.8, cl.pos = "r",
# Add a title to the plot
title="Correlation of Important Variables",
# Adjust plot margins: bottom, left, top, right
mar = c(1, 1, 2, 1))
# put a box around the edge of the plot
box(which = "figure", col = "black", lwd = 3)
##########################################
# When Data Reveal an Uncomfortable Truth
##########################################
# Simulated data for 7 cities
data_uncomfortable_truth <- data.frame(
City = c("London", "Paris", "Berlin", "Madrid", "Rome", "Amsterdam", "Vienna"),
Business_Metric = c(85, 78, 65, 60, 55, 50, 45)
)
# Order the cities by Business Metric from largest to smallest
data_uncomfortable_truth <- data_uncomfortable_truth %>%
arrange(desc(Business_Metric))
# Create the bar chart
data_uncomfortable_truth %>%
# set up plot aesthetics
ggplot(aes(x = reorder(City, -Business_Metric),
y = Business_Metric,
fill = City)) +
# specify barplot and width of bars
geom_bar(stat = "identity", width = 0.7) +
# minimal theme
theme_minimal() +
# make axis labels and title
labs(title = "Business Metric by City",
x = "City",
y = "Business Metric") +
# format text and background of the plot
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(size = 16, face = "bold"),
legend.position = "none",
plot.background = element_rect(colour = "black", fill = NA, size = 1))
Povești adevărate din câmpul de luptă (de date) – Partea 1: Comunicarea despre date
Pe același subiect
