I know this is probably simple and straightforward yet all the different methods I have tried fail to keep the fill color. I would like to have the first geom_point(data = may_sonde_ph_situ) have a black outline around the points so they stand out on the figure. There is only 12 data points being plotted with that geom_point.
I would also like to have the legend for those geom_point shapes be the appropriate color as what is displayed on the figure. Right now they are shape and color coded to their sight name and it matches colors based on sight for the other geom_point call however on the legend they are black.
Code
annotation_ph_sonde <- data.frame( x = c("2023-05-20", "2023-05-20"), y = c(8.6,6.4), label = c("TMDL Numeric Target Upper Limit = 8.5", "TMDL Numeric Target Lower Limit = 6.5"))ggplot() + geom_point(data = may_sonde_ph_situ, aes(x = sample_date_time, y = result, color = station_name, shape = station_name, group = station_name), size = 3)+ geom_point(data = may_sonde_ph, aes(x = sample_date_time, y = result, color = station_name, group = station_name), size = 0.8, alpha = 0.6) + theme_classic() + geom_hline(yintercept = c(8.5, 6.5), linetype = "solid", color = "black", size = 0.7) + scale_color_viridis( discrete = TRUE, name = NULL, guide = guide_legend(order = 1) ) + ggnewscale::new_scale_color() + scale_color_viridis( discrete = TRUE, name = NULL, guide = guide_legend(order = 2) ) + ggtitle(paste0("May 2023 - pH")) + theme( plot.title = element_text(hjust = 0.5), axis.title.x = element_blank(), legend.position = "bottom", legend.box = "vertical", legend.direction = "horizontal", panel.grid.major.y = element_line(size = .01, color = "grey60"), legend.margin = margin(), panel.grid.major.x = element_line(size=.01, color="grey60" ), axis.text.x = element_text(angle = 45, hjust = 1), legend.title=element_blank()) + labs(y="pH") + scale_y_continuous(limits = c(6, 9), breaks = seq(6, 9, by = 0.5), expand = c(0,0)) + geom_text(data = annotation_ph_sonde, aes(x=x, y=y, label=label))+ guides(color = guide_legend(nrow = 1, byrow = TRUE)) + scale_x_discrete(breaks = unique(may_sonde_ph$date), labels = unique(may_sonde_ph$date), expand = c(0.04,0.04))
Sample Data
may_sonde_ph <- structure(list(station_name = c("TMDL-R1", "TMDL-R2", "TMDL-R3", "TMDL-R4", "TMDL-R1", "TMDL-R2", "TMDL-R3", "TMDL-R4", "TMDL-R1", "TMDL-R2", "TMDL-R3", "TMDL-R4", "TMDL-R1", "TMDL-R2", "TMDL-R3", "TMDL-R4", "TMDL-R1", "TMDL-R2", "TMDL-R3", "TMDL-R4", "TMDL-R1", "TMDL-R2", "TMDL-R3", "TMDL-R4", "TMDL-R1", "TMDL-R2", "TMDL-R3", "TMDL-R4", "TMDL-R1", "TMDL-R2"), sample_date_time = c("2023-05-12 11:00:00", "2023-05-12 11:00:00", "2023-05-12 11:00:00", "2023-05-12 11:00:00", "2023-05-12 11:15:00", "2023-05-12 11:15:00", "2023-05-12 11:15:00", "2023-05-12 11:15:00", "2023-05-12 11:30:00", "2023-05-12 11:30:00", "2023-05-12 11:30:00", "2023-05-12 11:30:00", "2023-05-12 11:45:00", "2023-05-12 11:45:00", "2023-05-12 11:45:00", "2023-05-12 11:45:00", "2023-05-12 12:00:00", "2023-05-12 12:00:00", "2023-05-12 12:00:00", "2023-05-12 12:00:00", "2023-05-12 12:15:00", "2023-05-12 12:15:00", "2023-05-12 12:15:00", "2023-05-12 12:15:00", "2023-05-12 12:30:00", "2023-05-12 12:30:00", "2023-05-12 12:30:00", "2023-05-12 12:30:00", "2023-05-12 12:45:00", "2023-05-12 12:45:00"), parameter = c("pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH"), result = c(8.32, 8.24, 8.39, 8.17, 8.32, 8.23, 8.37, 8.12, 8.32, 8.22, 8.35, 8.05, 8.32, 8.21, 8.34, 8, 8.32, 8.19, 8.32, 7.97, 8.32, 8.17, 8.3, 7.94, 8.32, 8.14, 8.29, 7.9, 8.32, 8.12), units = c("None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None"), comments = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), date = c("2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12", "2023-05-12"), ym = c("2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05")), row.names = c(NA, 30L), class = "data.frame")may_sonde_ph_situ <- structure(list(program = c("Ventura River TMDL", "Ventura River TMDL", "Ventura River TMDL", "Ventura River TMDL", "Ventura River TMDL", "Ventura River TMDL", "Ventura River TMDL", "Ventura River TMDL", "Ventura River TMDL", "Ventura River TMDL", "Ventura River TMDL", "Ventura River TMDL"), station_name = c("TMDL-R1", "TMDL-R1", "TMDL-R1", "TMDL-R2", "TMDL-R2", "TMDL-R2", "TMDL-R3", "TMDL-R3", "TMDL-R3", "TMDL-R4", "TMDL-R4", "TMDL-R4"), sample_date_time = c("2023-05-12 06:17:00", "2023-05-19 04:42:00", "2023-05-26 04:55:00", "2023-05-12 05:26:00", "2023-05-19 04:11:00", "2023-05-26 04:20:00", "2023-05-12 04:30:00", "2023-05-19 03:33:00", "2023-05-26 03:25:00", "2023-05-12 03:36:00", "2023-05-19 02:40:00", "2023-05-26 02:45:00"), parameter = c("pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH", "pH"), result = c(8.29, 8.38, 8.35, 8.35, 8.41, 8.3, 8.3, 8.34, 8.26, 8, 7.84, 7.79), units = c("None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None", "None"), date = c("2023-05-12", "2023-05-19", "2023-05-26", "2023-05-12", "2023-05-19", "2023-05-26", "2023-05-12", "2023-05-19", "2023-05-26", "2023-05-12", "2023-05-19", "2023-05-26"), ym = c("2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05", "2023-05")), row.names = c(NA, -12L), class = "data.frame")