Y축으로 놓는 그래프는 언제나 힘들었다.log Scale을 통해 값의 크기를 줄이기는 하지만, Y축을 어떻게 표현하는 것이 좋을지에 대한 고민은 늘 있어왔다.log Scale을 적용했을 때와 그렇지 않을 때의 그래프를 비교해본다.dviz.supp 저자인 Claus O. Wilke의 Github Repo에서 가져와야 한다.
> sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.6
install.packages("devtools")
devtools::install_github("wilkelab/cowplot")
library(cowplot)
install.packages("colorspace")
library(colorspace)
devtools::install_github("clauswilke/colorblindr")
library(colorblindr)
devtools::install_github("clauswilke/dviz.supp")
library(dviz.supp)
library(lubridate) # 날짜 관련 패키지
library(forcats) # 시계열 관련 패키지
library(tidyr) # 데이터 핸들링 패키지
library(ggrepel) # 시각화 관련 패키지
library(dviz.supp) # 저자의 개인 Repo 패키지: 교재 있는 데이터 활용 및 그래프를 구현하려면 필수적으로 설치 되어야 함.
library(kableExtra) # 데이터 출력 Table을 HTML로 변환하기 위한 패키지
US_Census 데이터를 기반으로 한다.
소스코드 참조: https://github.com/clauswilke/dataviz/blob/master/coordinate_systems_axes.Rmd
set.seed(3878)
US_census %>% filter(state == "Texas") %>%
select(name, pop2010) %>%
extract(name, "county", regex = "(.+) County") %>%
mutate(popratio = pop2010/median(pop2010)) %>%
arrange(desc(popratio)) %>%
mutate(index = 1:n(),
label = ifelse(index <= 3 | index > n()-3 | runif(n()) < .04, county, ""),
label_large = ifelse(index <= 6, county, "")) -> tx_counties
kable(tx_counties) %>%
kable_styling(bootstrap_options = c("striped", "hover")) %>%
scroll_box(width = "500px", height = "200px")
| county | pop2010 | popratio | index | label | label_large |
|---|---|---|---|---|---|
| Harris | 4092459 | 222.6461564 | 1 | Harris | Harris |
| Dallas | 2368139 | 128.8362439 | 2 | Dallas | Dallas |
| Tarrant | 1809034 | 98.4186932 | 3 | Tarrant | Tarrant |
| Bexar | 1714773 | 93.2905174 | 4 | Bexar | |
| Travis | 1024266 | 55.7241717 | 5 | Travis | |
| El Paso | 800647 | 43.5584027 | 6 | El Paso | |
| Collin | 782341 | 42.5624830 | 7 | ||
| Hidalgo | 774769 | 42.1505359 | 8 | ||
| Denton | 662614 | 36.0488548 | 9 | ||
| Fort Bend | 585375 | 31.8467439 | 10 | ||
| Montgomery | 455746 | 24.7944073 | 11 | ||
| Williamson | 422679 | 22.9954301 | 12 | ||
| Cameron | 406220 | 22.0999946 | 13 | ||
| Nueces | 340223 | 18.5094935 | 14 | ||
| Brazoria | 313166 | 17.0374844 | 15 | ||
| Bell | 310235 | 16.8780262 | 16 | ||
| Galveston | 291309 | 15.8483760 | 17 | Galveston | |
| Lubbock | 278831 | 15.1695229 | 18 | Lubbock | |
| Jefferson | 252273 | 13.7246613 | 19 | ||
| Webb | 250304 | 13.6175399 | 20 | ||
| McLennan | 234906 | 12.7798270 | 21 | ||
| Smith | 209714 | 11.4092813 | 22 | ||
| Brazos | 194851 | 10.6006746 | 23 | ||
| Hays | 157107 | 8.5472499 | 24 | ||
| Johnson | 150934 | 8.2114140 | 25 | ||
| Ellis | 149610 | 8.1393831 | 26 | ||
| Ector | 137130 | 7.4604211 | 27 | ||
| Midland | 136872 | 7.4463849 | 28 | ||
| Guadalupe | 131533 | 7.1559219 | 29 | ||
| Taylor | 131506 | 7.1544530 | 30 | ||
| Wichita | 131500 | 7.1541265 | 31 | ||
| Gregg | 121730 | 6.6225994 | 32 | ||
| Potter | 121073 | 6.5868560 | 33 | ||
| Grayson | 120877 | 6.5761928 | 34 | ||
| Randall | 120725 | 6.5679234 | 35 | ||
| Parker | 116927 | 6.3612970 | 36 | ||
| Tom Green | 110224 | 5.9966270 | 37 | ||
| Comal | 108472 | 5.9013111 | 38 | Comal | |
| Kaufman | 103350 | 5.6226538 | 39 | ||
| Bowie | 92565 | 5.0359066 | 40 | Bowie | |
| Victoria | 86793 | 4.7218867 | 41 | ||
| Angelina | 86771 | 4.7206898 | 42 | ||
| Hunt | 86129 | 4.6857625 | 43 | ||
| Orange | 81837 | 4.4522605 | 44 | ||
| Henderson | 78532 | 4.2724553 | 45 | ||
| Rockwall | 78337 | 4.2618465 | 46 | ||
| Liberty | 75643 | 4.1152821 | 47 | ||
| Coryell | 75388 | 4.1014091 | 48 | ||
| Bastrop | 74171 | 4.0351994 | 49 | ||
| Walker | 67861 | 3.6919101 | 50 | ||
| Harrison | 65631 | 3.5705892 | 51 | ||
| San Patricio | 64804 | 3.5255971 | 52 | ||
| Nacogdoches | 64524 | 3.5103640 | 53 | ||
| Starr | 60968 | 3.3169033 | 54 | ||
| Wise | 59127 | 3.2167456 | 55 | ||
| Anderson | 58458 | 3.1803493 | 56 | ||
| Hardin | 54635 | 2.9723628 | 57 | ||
| Maverick | 54258 | 2.9518525 | 58 | ||
| Rusk | 53330 | 2.9013655 | 59 | ||
| Van Zandt | 52579 | 2.8605081 | 60 | ||
| Hood | 51182 | 2.7845057 | 61 | ||
| Cherokee | 50845 | 2.7661716 | 62 | ||
| Lamar | 49793 | 2.7089386 | 63 | ||
| Kerr | 49625 | 2.6997987 | 64 | ||
| Val Verde | 48879 | 2.6592133 | 65 | ||
| Navarro | 47735 | 2.5969751 | 66 | ||
| Medina | 46006 | 2.5029106 | 67 | ||
| Polk | 45413 | 2.4706490 | 68 | ||
| Atascosa | 44911 | 2.4433382 | 69 | ||
| Waller | 43205 | 2.3505250 | 70 | Waller | |
| Wilson | 42918 | 2.3349110 | 71 | ||
| Burnet | 42750 | 2.3257712 | 72 | ||
| Wood | 41964 | 2.2830096 | 73 | ||
| Wharton | 41280 | 2.2457973 | 74 | ||
| Jim Wells | 40838 | 2.2217507 | 75 | ||
| Upshur | 39309 | 2.1385670 | 76 | ||
| Cooke | 38437 | 2.0911267 | 77 | ||
| Brown | 38106 | 2.0731190 | 78 | ||
| Caldwell | 38066 | 2.0709428 | 79 | ||
| Erath | 37890 | 2.0613677 | 80 | ||
| Matagorda | 36702 | 1.9967358 | 81 | ||
| Hale | 36273 | 1.9733964 | 82 | ||
| Jasper | 35710 | 1.9427670 | 83 | ||
| Hopkins | 35161 | 1.9128992 | 84 | ||
| Chambers | 35096 | 1.9093629 | 85 | ||
| Hill | 35089 | 1.9089821 | 86 | ||
| Howard | 35012 | 1.9047930 | 87 | ||
| Fannin | 33915 | 1.8451118 | 88 | ||
| Washington | 33718 | 1.8343942 | 89 | ||
| Kendall | 33410 | 1.8176378 | 90 | ||
| Titus | 32334 | 1.7590991 | 91 | ||
| Kleberg | 32061 | 1.7442468 | 92 | ||
| Bee | 31861 | 1.7333660 | 93 | ||
| Cass | 30464 | 1.6573636 | 94 | ||
| Austin | 28417 | 1.5459986 | 95 | ||
| Palo Pinto | 28111 | 1.5293510 | 96 | ||
| Grimes | 26604 | 1.4473641 | 97 | ||
| Uvalde | 26405 | 1.4365377 | 98 | ||
| San Jacinto | 26384 | 1.4353952 | 99 | San Jacinto | |
| Shelby | 25448 | 1.3844731 | 100 | ||
| Gillespie | 24837 | 1.3512323 | 101 | ||
| Milam | 24757 | 1.3468799 | 102 | ||
| Fayette | 24554 | 1.3358359 | 103 | ||
| Panola | 23796 | 1.2945977 | 104 | ||
| Houston | 23732 | 1.2911158 | 105 | ||
| Limestone | 23384 | 1.2721832 | 106 | ||
| Aransas | 23158 | 1.2598879 | 107 | ||
| Hockley | 22935 | 1.2477558 | 108 | ||
| Gray | 22535 | 1.2259942 | 109 | ||
| Hutchinson | 22150 | 1.2050487 | 110 | ||
| Willacy | 22134 | 1.2041782 | 111 | ||
| Moore | 21904 | 1.1916653 | 112 | ||
| Tyler | 21766 | 1.1841576 | 113 | ||
| Calhoun | 21381 | 1.1632120 | 114 | ||
| Colorado | 20874 | 1.1356292 | 115 | ||
| Bandera | 20485 | 1.1144660 | 116 | ||
| Jones | 20202 | 1.0990697 | 117 | ||
| DeWitt | 20097 | 1.0933573 | 118 | ||
| Freestone | 19816 | 1.0780697 | 119 | ||
| Gonzales | 19807 | 1.0775801 | 120 | ||
| Montague | 19719 | 1.0727926 | 121 | ||
| Lampasas | 19677 | 1.0705076 | 122 | ||
| Deaf Smith | 19372 | 1.0539144 | 123 | ||
| Llano | 19301 | 1.0500517 | 124 | ||
| Lavaca | 19263 | 1.0479843 | 125 | ||
| Eastland | 18583 | 1.0109896 | 126 | ||
| Young | 18550 | 1.0091943 | 127 | ||
| Bosque | 18212 | 0.9908057 | 128 | ||
| Falls | 17866 | 0.9719819 | 129 | ||
| Gaines | 17526 | 0.9534846 | 130 | ||
| Frio | 17217 | 0.9366737 | 131 | ||
| Burleson | 17187 | 0.9350416 | 132 | ||
| Scurry | 16921 | 0.9205702 | 133 | ||
| Leon | 16801 | 0.9140417 | 134 | ||
| Robertson | 16622 | 0.9043034 | 135 | ||
| Lee | 16612 | 0.9037593 | 136 | ||
| Pecos | 15507 | 0.8436429 | 137 | ||
| Nolan | 15216 | 0.8278113 | 138 | ||
| Karnes | 14824 | 0.8064850 | 139 | ||
| Andrews | 14786 | 0.8044176 | 140 | ||
| Trinity | 14585 | 0.7934824 | 141 | ||
| Newton | 14445 | 0.7858658 | 142 | ||
| Jackson | 14075 | 0.7657364 | 143 | ||
| Zapata | 14018 | 0.7626353 | 144 | ||
| Lamb | 13977 | 0.7604048 | 145 | ||
| Comanche | 13974 | 0.7602416 | 146 | ||
| Dawson | 13833 | 0.7525706 | 147 | ||
| Reeves | 13783 | 0.7498504 | 148 | ||
| Madison | 13664 | 0.7433763 | 149 | ||
| Callahan | 13544 | 0.7368478 | 150 | ||
| Wilbarger | 13535 | 0.7363582 | 151 | ||
| Morris | 12934 | 0.7036614 | 152 | ||
| Red River | 12860 | 0.6996355 | 153 | ||
| Terry | 12651 | 0.6882651 | 154 | ||
| Camp | 12401 | 0.6746641 | 155 | ||
| Duval | 11782 | 0.6409880 | 156 | ||
| Zavala | 11677 | 0.6352756 | 157 | ||
| Live Oak | 11531 | 0.6273326 | 158 | ||
| Rains | 10914 | 0.5937653 | 159 | ||
| Sabine | 10834 | 0.5894130 | 160 | ||
| Clay | 10752 | 0.5849519 | 161 | ||
| Ward | 10658 | 0.5798379 | 162 | ||
| Franklin | 10605 | 0.5769545 | 163 | ||
| Marion | 10546 | 0.5737446 | 164 | ||
| Runnels | 10501 | 0.5712964 | 165 | ||
| Blanco | 10497 | 0.5710788 | 166 | ||
| Parmer | 10269 | 0.5586747 | 167 | ||
| Ochiltree | 10223 | 0.5561721 | 168 | ||
| Dimmit | 9996 | 0.5438224 | 169 | ||
| Stephens | 9630 | 0.5239106 | 170 | ||
| Mitchell | 9403 | 0.5115609 | 171 | Mitchell | |
| Brewster | 9232 | 0.5022578 | 172 | ||
| Archer | 9054 | 0.4925739 | 173 | ||
| Jack | 9044 | 0.4920298 | 174 | ||
| Coleman | 8895 | 0.4839236 | 175 | ||
| San Augustine | 8865 | 0.4822915 | 176 | ||
| Hamilton | 8517 | 0.4633589 | 177 | ||
| Somervell | 8490 | 0.4618900 | 178 | ||
| McCulloch | 8283 | 0.4506284 | 179 | ||
| Castro | 8062 | 0.4386051 | 180 | ||
| Yoakum | 7879 | 0.4286491 | 181 | ||
| Swisher | 7854 | 0.4272890 | 182 | ||
| Presidio | 7818 | 0.4253305 | 183 | ||
| Refugio | 7383 | 0.4016648 | 184 | ||
| Brooks | 7223 | 0.3929601 | 185 | ||
| Goliad | 7210 | 0.3922529 | 186 | Goliad | |
| Bailey | 7165 | 0.3898047 | 187 | ||
| Winkler | 7110 | 0.3868125 | 188 | ||
| Childress | 7041 | 0.3830586 | 189 | ||
| La Salle | 6886 | 0.3746260 | 190 | ||
| Dallam | 6703 | 0.3646700 | 191 | ||
| Garza | 6461 | 0.3515043 | 192 | ||
| Floyd | 6446 | 0.3506882 | 193 | ||
| Carson | 6182 | 0.3363256 | 194 | ||
| San Saba | 6131 | 0.3335509 | 195 | ||
| Hartley | 6062 | 0.3297971 | 196 | ||
| Crosby | 6059 | 0.3296339 | 197 | ||
| Lynn | 5915 | 0.3217997 | 198 | ||
| Haskell | 5899 | 0.3209292 | 199 | ||
| Hansford | 5613 | 0.3053697 | 200 | ||
| Wheeler | 5410 | 0.2943257 | 201 | ||
| Jim Hogg | 5300 | 0.2883412 | 202 | ||
| Delta | 5231 | 0.2845873 | 203 | ||
| Mills | 4936 | 0.2685382 | 204 | ||
| Martin | 4799 | 0.2610848 | 205 | ||
| Kimble | 4607 | 0.2506392 | 206 | ||
| Crane | 4375 | 0.2380175 | 207 | ||
| Hardeman | 4139 | 0.2251782 | 208 | ||
| Sutton | 4128 | 0.2245797 | 209 | ||
| Concho | 4087 | 0.2223492 | 210 | ||
| Mason | 4012 | 0.2182689 | 211 | ||
| Fisher | 3974 | 0.2162015 | 212 | ||
| Hemphill | 3807 | 0.2071160 | 213 | ||
| Baylor | 3726 | 0.2027093 | 214 | ||
| Crockett | 3719 | 0.2023285 | 215 | ||
| Knox | 3719 | 0.2023285 | 216 | ||
| Donley | 3677 | 0.2000435 | 217 | ||
| Kinney | 3598 | 0.1957456 | 218 | ||
| Hudspeth | 3476 | 0.1891083 | 219 | ||
| Schleicher | 3461 | 0.1882923 | 220 | ||
| Shackelford | 3378 | 0.1837767 | 221 | ||
| Reagan | 3367 | 0.1831783 | 222 | ||
| Upton | 3355 | 0.1825254 | 223 | ||
| Hall | 3353 | 0.1824166 | 224 | ||
| Coke | 3320 | 0.1806213 | 225 | ||
| Real | 3309 | 0.1800228 | 226 | Real | |
| Lipscomb | 3302 | 0.1796420 | 227 | Lipscomb | |
| Cochran | 3127 | 0.1701213 | 228 | ||
| Collingsworth | 3057 | 0.1663130 | 229 | ||
| Sherman | 3034 | 0.1650617 | 230 | ||
| Dickens | 2444 | 0.1329634 | 231 | ||
| Culberson | 2398 | 0.1304608 | 232 | ||
| Jeff Davis | 2342 | 0.1274142 | 233 | ||
| Menard | 2242 | 0.1219738 | 234 | ||
| Oldham | 2052 | 0.1116370 | 235 | ||
| Edwards | 2002 | 0.1089168 | 236 | ||
| Armstrong | 1901 | 0.1034220 | 237 | ||
| Throckmorton | 1641 | 0.0892770 | 238 | ||
| Briscoe | 1637 | 0.0890594 | 239 | ||
| Irion | 1599 | 0.0869920 | 240 | ||
| Cottle | 1505 | 0.0818780 | 241 | ||
| Stonewall | 1490 | 0.0810620 | 242 | ||
| Foard | 1336 | 0.0726837 | 243 | ||
| Glasscock | 1226 | 0.0666993 | 244 | ||
| Motley | 1210 | 0.0658288 | 245 | ||
| Sterling | 1143 | 0.0621838 | 246 | ||
| Terrell | 984 | 0.0535335 | 247 | ||
| Roberts | 929 | 0.0505413 | 248 | ||
| Kent | 808 | 0.0439584 | 249 | ||
| McMullen | 707 | 0.0384636 | 250 | ||
| Borden | 641 | 0.0348730 | 251 | ||
| Kenedy | 416 | 0.0226321 | 252 | Kenedy | |
| King | 286 | 0.0155595 | 253 | King | |
| Loving | 82 | 0.0044611 | 254 | Loving |
temp_font <- "Times New Roman"
counties_lin <- ggplot(tx_counties, aes(x = index, y = popratio)) +
geom_point(size = 0.5, color = "#0072B2") +
geom_text_repel(aes(label = label_large), point.padding = .4, color = "black",
min.segment.length = 0, family = temp_font) +
scale_y_continuous(name = "population number / median") +
scale_x_continuous(limits = c(.5, nrow(tx_counties) + .5), expand = c(0, 0),
breaks = NULL, #c(1, 50*(1:5)),
name = "Texas counties, from most to least populous") +
theme_dviz_hgrid(font_family = temp_font) +
theme(axis.line = element_blank(),
plot.margin = margin(3, 7, 3, 1.5))
ggdraw(counties_lin, clip = "on") +
draw_text(paste0("bad", " "), x=1, y=1, vjust=1.1, hjust=1, size=14, angle = 0,
color="#d02138", alpha=1, family = temp_font, fontface = "plain") +
draw_line(c(1, 1), c(0, 1), size=2.8, color="#d02138", alpha=1)
선형(linear)스케일은 대개 데이터를 정확하게 보여주기도 하지만, 독자에게 보다 강한 메시지를 주기 위해서는 비선형 스케일이 더 좋을 때가 있다.ggplot(tx_counties, aes(x = index, y = popratio)) +
geom_hline(yintercept = 1, linetype = 2, color = "grey40") +
geom_point(size = 0.5, color = "#0072B2") +
geom_text_repel(aes(label = label), point.padding = .4, color = "black",
min.segment.length = 0, family = temp_font) +
scale_y_log10(breaks = c(.01, .1, 1, 10, 100),
name = "population number / median",
labels = label_log10) +
scale_x_continuous(limits = c(.5, nrow(tx_counties) + .5), expand = c(0, 0),
breaks = NULL, #c(1, 50*(1:5)),
name = "Texas counties, from most to least populous") +
theme_dviz_hgrid(font_family = temp_font) +
theme(axis.line = element_blank(),
plot.margin = margin(3, 7, 3, 1.5))
자세한 건 교재를 구매하자.
Wilke, C. (2019). Fundamentals of Data Visualization. Retrieved July 28, 2020, from https://serialmentor.com/dataviz/