Y
축으로 놓는 그래프는 언제나 힘들었다.log Scale
을 통해 값의 크기를 줄이기는 하지만, Y
축을 어떻게 표현하는 것이 좋을지에 대한 고민은 늘 있어왔다.log Scale
을 적용했을 때와 그렇지 않을 때의 그래프를 비교해본다.dviz.supp
저자인 Claus O. Wilke
의 Github Repo
에서 가져와야 한다.
> sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.6
install.packages("devtools")
devtools::install_github("wilkelab/cowplot")
library(cowplot)
install.packages("colorspace")
library(colorspace)
devtools::install_github("clauswilke/colorblindr")
library(colorblindr)
devtools::install_github("clauswilke/dviz.supp")
library(dviz.supp)
library(lubridate) # 날짜 관련 패키지
library(forcats) # 시계열 관련 패키지
library(tidyr) # 데이터 핸들링 패키지
library(ggrepel) # 시각화 관련 패키지
library(dviz.supp) # 저자의 개인 Repo 패키지: 교재 있는 데이터 활용 및 그래프를 구현하려면 필수적으로 설치 되어야 함.
library(kableExtra) # 데이터 출력 Table을 HTML로 변환하기 위한 패키지
US_Census
데이터를 기반으로 한다.
소스코드 참조: https://github.com/clauswilke/dataviz/blob/master/coordinate_systems_axes.Rmd
set.seed(3878)
US_census %>% filter(state == "Texas") %>%
select(name, pop2010) %>%
extract(name, "county", regex = "(.+) County") %>%
mutate(popratio = pop2010/median(pop2010)) %>%
arrange(desc(popratio)) %>%
mutate(index = 1:n(),
label = ifelse(index <= 3 | index > n()-3 | runif(n()) < .04, county, ""),
label_large = ifelse(index <= 6, county, "")) -> tx_counties
kable(tx_counties) %>%
kable_styling(bootstrap_options = c("striped", "hover")) %>%
scroll_box(width = "500px", height = "200px")
county | pop2010 | popratio | index | label | label_large |
---|---|---|---|---|---|
Harris | 4092459 | 222.6461564 | 1 | Harris | Harris |
Dallas | 2368139 | 128.8362439 | 2 | Dallas | Dallas |
Tarrant | 1809034 | 98.4186932 | 3 | Tarrant | Tarrant |
Bexar | 1714773 | 93.2905174 | 4 | Bexar | |
Travis | 1024266 | 55.7241717 | 5 | Travis | |
El Paso | 800647 | 43.5584027 | 6 | El Paso | |
Collin | 782341 | 42.5624830 | 7 | ||
Hidalgo | 774769 | 42.1505359 | 8 | ||
Denton | 662614 | 36.0488548 | 9 | ||
Fort Bend | 585375 | 31.8467439 | 10 | ||
Montgomery | 455746 | 24.7944073 | 11 | ||
Williamson | 422679 | 22.9954301 | 12 | ||
Cameron | 406220 | 22.0999946 | 13 | ||
Nueces | 340223 | 18.5094935 | 14 | ||
Brazoria | 313166 | 17.0374844 | 15 | ||
Bell | 310235 | 16.8780262 | 16 | ||
Galveston | 291309 | 15.8483760 | 17 | Galveston | |
Lubbock | 278831 | 15.1695229 | 18 | Lubbock | |
Jefferson | 252273 | 13.7246613 | 19 | ||
Webb | 250304 | 13.6175399 | 20 | ||
McLennan | 234906 | 12.7798270 | 21 | ||
Smith | 209714 | 11.4092813 | 22 | ||
Brazos | 194851 | 10.6006746 | 23 | ||
Hays | 157107 | 8.5472499 | 24 | ||
Johnson | 150934 | 8.2114140 | 25 | ||
Ellis | 149610 | 8.1393831 | 26 | ||
Ector | 137130 | 7.4604211 | 27 | ||
Midland | 136872 | 7.4463849 | 28 | ||
Guadalupe | 131533 | 7.1559219 | 29 | ||
Taylor | 131506 | 7.1544530 | 30 | ||
Wichita | 131500 | 7.1541265 | 31 | ||
Gregg | 121730 | 6.6225994 | 32 | ||
Potter | 121073 | 6.5868560 | 33 | ||
Grayson | 120877 | 6.5761928 | 34 | ||
Randall | 120725 | 6.5679234 | 35 | ||
Parker | 116927 | 6.3612970 | 36 | ||
Tom Green | 110224 | 5.9966270 | 37 | ||
Comal | 108472 | 5.9013111 | 38 | Comal | |
Kaufman | 103350 | 5.6226538 | 39 | ||
Bowie | 92565 | 5.0359066 | 40 | Bowie | |
Victoria | 86793 | 4.7218867 | 41 | ||
Angelina | 86771 | 4.7206898 | 42 | ||
Hunt | 86129 | 4.6857625 | 43 | ||
Orange | 81837 | 4.4522605 | 44 | ||
Henderson | 78532 | 4.2724553 | 45 | ||
Rockwall | 78337 | 4.2618465 | 46 | ||
Liberty | 75643 | 4.1152821 | 47 | ||
Coryell | 75388 | 4.1014091 | 48 | ||
Bastrop | 74171 | 4.0351994 | 49 | ||
Walker | 67861 | 3.6919101 | 50 | ||
Harrison | 65631 | 3.5705892 | 51 | ||
San Patricio | 64804 | 3.5255971 | 52 | ||
Nacogdoches | 64524 | 3.5103640 | 53 | ||
Starr | 60968 | 3.3169033 | 54 | ||
Wise | 59127 | 3.2167456 | 55 | ||
Anderson | 58458 | 3.1803493 | 56 | ||
Hardin | 54635 | 2.9723628 | 57 | ||
Maverick | 54258 | 2.9518525 | 58 | ||
Rusk | 53330 | 2.9013655 | 59 | ||
Van Zandt | 52579 | 2.8605081 | 60 | ||
Hood | 51182 | 2.7845057 | 61 | ||
Cherokee | 50845 | 2.7661716 | 62 | ||
Lamar | 49793 | 2.7089386 | 63 | ||
Kerr | 49625 | 2.6997987 | 64 | ||
Val Verde | 48879 | 2.6592133 | 65 | ||
Navarro | 47735 | 2.5969751 | 66 | ||
Medina | 46006 | 2.5029106 | 67 | ||
Polk | 45413 | 2.4706490 | 68 | ||
Atascosa | 44911 | 2.4433382 | 69 | ||
Waller | 43205 | 2.3505250 | 70 | Waller | |
Wilson | 42918 | 2.3349110 | 71 | ||
Burnet | 42750 | 2.3257712 | 72 | ||
Wood | 41964 | 2.2830096 | 73 | ||
Wharton | 41280 | 2.2457973 | 74 | ||
Jim Wells | 40838 | 2.2217507 | 75 | ||
Upshur | 39309 | 2.1385670 | 76 | ||
Cooke | 38437 | 2.0911267 | 77 | ||
Brown | 38106 | 2.0731190 | 78 | ||
Caldwell | 38066 | 2.0709428 | 79 | ||
Erath | 37890 | 2.0613677 | 80 | ||
Matagorda | 36702 | 1.9967358 | 81 | ||
Hale | 36273 | 1.9733964 | 82 | ||
Jasper | 35710 | 1.9427670 | 83 | ||
Hopkins | 35161 | 1.9128992 | 84 | ||
Chambers | 35096 | 1.9093629 | 85 | ||
Hill | 35089 | 1.9089821 | 86 | ||
Howard | 35012 | 1.9047930 | 87 | ||
Fannin | 33915 | 1.8451118 | 88 | ||
Washington | 33718 | 1.8343942 | 89 | ||
Kendall | 33410 | 1.8176378 | 90 | ||
Titus | 32334 | 1.7590991 | 91 | ||
Kleberg | 32061 | 1.7442468 | 92 | ||
Bee | 31861 | 1.7333660 | 93 | ||
Cass | 30464 | 1.6573636 | 94 | ||
Austin | 28417 | 1.5459986 | 95 | ||
Palo Pinto | 28111 | 1.5293510 | 96 | ||
Grimes | 26604 | 1.4473641 | 97 | ||
Uvalde | 26405 | 1.4365377 | 98 | ||
San Jacinto | 26384 | 1.4353952 | 99 | San Jacinto | |
Shelby | 25448 | 1.3844731 | 100 | ||
Gillespie | 24837 | 1.3512323 | 101 | ||
Milam | 24757 | 1.3468799 | 102 | ||
Fayette | 24554 | 1.3358359 | 103 | ||
Panola | 23796 | 1.2945977 | 104 | ||
Houston | 23732 | 1.2911158 | 105 | ||
Limestone | 23384 | 1.2721832 | 106 | ||
Aransas | 23158 | 1.2598879 | 107 | ||
Hockley | 22935 | 1.2477558 | 108 | ||
Gray | 22535 | 1.2259942 | 109 | ||
Hutchinson | 22150 | 1.2050487 | 110 | ||
Willacy | 22134 | 1.2041782 | 111 | ||
Moore | 21904 | 1.1916653 | 112 | ||
Tyler | 21766 | 1.1841576 | 113 | ||
Calhoun | 21381 | 1.1632120 | 114 | ||
Colorado | 20874 | 1.1356292 | 115 | ||
Bandera | 20485 | 1.1144660 | 116 | ||
Jones | 20202 | 1.0990697 | 117 | ||
DeWitt | 20097 | 1.0933573 | 118 | ||
Freestone | 19816 | 1.0780697 | 119 | ||
Gonzales | 19807 | 1.0775801 | 120 | ||
Montague | 19719 | 1.0727926 | 121 | ||
Lampasas | 19677 | 1.0705076 | 122 | ||
Deaf Smith | 19372 | 1.0539144 | 123 | ||
Llano | 19301 | 1.0500517 | 124 | ||
Lavaca | 19263 | 1.0479843 | 125 | ||
Eastland | 18583 | 1.0109896 | 126 | ||
Young | 18550 | 1.0091943 | 127 | ||
Bosque | 18212 | 0.9908057 | 128 | ||
Falls | 17866 | 0.9719819 | 129 | ||
Gaines | 17526 | 0.9534846 | 130 | ||
Frio | 17217 | 0.9366737 | 131 | ||
Burleson | 17187 | 0.9350416 | 132 | ||
Scurry | 16921 | 0.9205702 | 133 | ||
Leon | 16801 | 0.9140417 | 134 | ||
Robertson | 16622 | 0.9043034 | 135 | ||
Lee | 16612 | 0.9037593 | 136 | ||
Pecos | 15507 | 0.8436429 | 137 | ||
Nolan | 15216 | 0.8278113 | 138 | ||
Karnes | 14824 | 0.8064850 | 139 | ||
Andrews | 14786 | 0.8044176 | 140 | ||
Trinity | 14585 | 0.7934824 | 141 | ||
Newton | 14445 | 0.7858658 | 142 | ||
Jackson | 14075 | 0.7657364 | 143 | ||
Zapata | 14018 | 0.7626353 | 144 | ||
Lamb | 13977 | 0.7604048 | 145 | ||
Comanche | 13974 | 0.7602416 | 146 | ||
Dawson | 13833 | 0.7525706 | 147 | ||
Reeves | 13783 | 0.7498504 | 148 | ||
Madison | 13664 | 0.7433763 | 149 | ||
Callahan | 13544 | 0.7368478 | 150 | ||
Wilbarger | 13535 | 0.7363582 | 151 | ||
Morris | 12934 | 0.7036614 | 152 | ||
Red River | 12860 | 0.6996355 | 153 | ||
Terry | 12651 | 0.6882651 | 154 | ||
Camp | 12401 | 0.6746641 | 155 | ||
Duval | 11782 | 0.6409880 | 156 | ||
Zavala | 11677 | 0.6352756 | 157 | ||
Live Oak | 11531 | 0.6273326 | 158 | ||
Rains | 10914 | 0.5937653 | 159 | ||
Sabine | 10834 | 0.5894130 | 160 | ||
Clay | 10752 | 0.5849519 | 161 | ||
Ward | 10658 | 0.5798379 | 162 | ||
Franklin | 10605 | 0.5769545 | 163 | ||
Marion | 10546 | 0.5737446 | 164 | ||
Runnels | 10501 | 0.5712964 | 165 | ||
Blanco | 10497 | 0.5710788 | 166 | ||
Parmer | 10269 | 0.5586747 | 167 | ||
Ochiltree | 10223 | 0.5561721 | 168 | ||
Dimmit | 9996 | 0.5438224 | 169 | ||
Stephens | 9630 | 0.5239106 | 170 | ||
Mitchell | 9403 | 0.5115609 | 171 | Mitchell | |
Brewster | 9232 | 0.5022578 | 172 | ||
Archer | 9054 | 0.4925739 | 173 | ||
Jack | 9044 | 0.4920298 | 174 | ||
Coleman | 8895 | 0.4839236 | 175 | ||
San Augustine | 8865 | 0.4822915 | 176 | ||
Hamilton | 8517 | 0.4633589 | 177 | ||
Somervell | 8490 | 0.4618900 | 178 | ||
McCulloch | 8283 | 0.4506284 | 179 | ||
Castro | 8062 | 0.4386051 | 180 | ||
Yoakum | 7879 | 0.4286491 | 181 | ||
Swisher | 7854 | 0.4272890 | 182 | ||
Presidio | 7818 | 0.4253305 | 183 | ||
Refugio | 7383 | 0.4016648 | 184 | ||
Brooks | 7223 | 0.3929601 | 185 | ||
Goliad | 7210 | 0.3922529 | 186 | Goliad | |
Bailey | 7165 | 0.3898047 | 187 | ||
Winkler | 7110 | 0.3868125 | 188 | ||
Childress | 7041 | 0.3830586 | 189 | ||
La Salle | 6886 | 0.3746260 | 190 | ||
Dallam | 6703 | 0.3646700 | 191 | ||
Garza | 6461 | 0.3515043 | 192 | ||
Floyd | 6446 | 0.3506882 | 193 | ||
Carson | 6182 | 0.3363256 | 194 | ||
San Saba | 6131 | 0.3335509 | 195 | ||
Hartley | 6062 | 0.3297971 | 196 | ||
Crosby | 6059 | 0.3296339 | 197 | ||
Lynn | 5915 | 0.3217997 | 198 | ||
Haskell | 5899 | 0.3209292 | 199 | ||
Hansford | 5613 | 0.3053697 | 200 | ||
Wheeler | 5410 | 0.2943257 | 201 | ||
Jim Hogg | 5300 | 0.2883412 | 202 | ||
Delta | 5231 | 0.2845873 | 203 | ||
Mills | 4936 | 0.2685382 | 204 | ||
Martin | 4799 | 0.2610848 | 205 | ||
Kimble | 4607 | 0.2506392 | 206 | ||
Crane | 4375 | 0.2380175 | 207 | ||
Hardeman | 4139 | 0.2251782 | 208 | ||
Sutton | 4128 | 0.2245797 | 209 | ||
Concho | 4087 | 0.2223492 | 210 | ||
Mason | 4012 | 0.2182689 | 211 | ||
Fisher | 3974 | 0.2162015 | 212 | ||
Hemphill | 3807 | 0.2071160 | 213 | ||
Baylor | 3726 | 0.2027093 | 214 | ||
Crockett | 3719 | 0.2023285 | 215 | ||
Knox | 3719 | 0.2023285 | 216 | ||
Donley | 3677 | 0.2000435 | 217 | ||
Kinney | 3598 | 0.1957456 | 218 | ||
Hudspeth | 3476 | 0.1891083 | 219 | ||
Schleicher | 3461 | 0.1882923 | 220 | ||
Shackelford | 3378 | 0.1837767 | 221 | ||
Reagan | 3367 | 0.1831783 | 222 | ||
Upton | 3355 | 0.1825254 | 223 | ||
Hall | 3353 | 0.1824166 | 224 | ||
Coke | 3320 | 0.1806213 | 225 | ||
Real | 3309 | 0.1800228 | 226 | Real | |
Lipscomb | 3302 | 0.1796420 | 227 | Lipscomb | |
Cochran | 3127 | 0.1701213 | 228 | ||
Collingsworth | 3057 | 0.1663130 | 229 | ||
Sherman | 3034 | 0.1650617 | 230 | ||
Dickens | 2444 | 0.1329634 | 231 | ||
Culberson | 2398 | 0.1304608 | 232 | ||
Jeff Davis | 2342 | 0.1274142 | 233 | ||
Menard | 2242 | 0.1219738 | 234 | ||
Oldham | 2052 | 0.1116370 | 235 | ||
Edwards | 2002 | 0.1089168 | 236 | ||
Armstrong | 1901 | 0.1034220 | 237 | ||
Throckmorton | 1641 | 0.0892770 | 238 | ||
Briscoe | 1637 | 0.0890594 | 239 | ||
Irion | 1599 | 0.0869920 | 240 | ||
Cottle | 1505 | 0.0818780 | 241 | ||
Stonewall | 1490 | 0.0810620 | 242 | ||
Foard | 1336 | 0.0726837 | 243 | ||
Glasscock | 1226 | 0.0666993 | 244 | ||
Motley | 1210 | 0.0658288 | 245 | ||
Sterling | 1143 | 0.0621838 | 246 | ||
Terrell | 984 | 0.0535335 | 247 | ||
Roberts | 929 | 0.0505413 | 248 | ||
Kent | 808 | 0.0439584 | 249 | ||
McMullen | 707 | 0.0384636 | 250 | ||
Borden | 641 | 0.0348730 | 251 | ||
Kenedy | 416 | 0.0226321 | 252 | Kenedy | |
King | 286 | 0.0155595 | 253 | King | |
Loving | 82 | 0.0044611 | 254 | Loving |
temp_font <- "Times New Roman"
counties_lin <- ggplot(tx_counties, aes(x = index, y = popratio)) +
geom_point(size = 0.5, color = "#0072B2") +
geom_text_repel(aes(label = label_large), point.padding = .4, color = "black",
min.segment.length = 0, family = temp_font) +
scale_y_continuous(name = "population number / median") +
scale_x_continuous(limits = c(.5, nrow(tx_counties) + .5), expand = c(0, 0),
breaks = NULL, #c(1, 50*(1:5)),
name = "Texas counties, from most to least populous") +
theme_dviz_hgrid(font_family = temp_font) +
theme(axis.line = element_blank(),
plot.margin = margin(3, 7, 3, 1.5))
ggdraw(counties_lin, clip = "on") +
draw_text(paste0("bad", " "), x=1, y=1, vjust=1.1, hjust=1, size=14, angle = 0,
color="#d02138", alpha=1, family = temp_font, fontface = "plain") +
draw_line(c(1, 1), c(0, 1), size=2.8, color="#d02138", alpha=1)
선형(linear)스케일
은 대개 데이터를 정확하게 보여주기도 하지만, 독자에게 보다 강한 메시지를 주기 위해서는 비선형 스케일이 더 좋을 때가 있다.ggplot(tx_counties, aes(x = index, y = popratio)) +
geom_hline(yintercept = 1, linetype = 2, color = "grey40") +
geom_point(size = 0.5, color = "#0072B2") +
geom_text_repel(aes(label = label), point.padding = .4, color = "black",
min.segment.length = 0, family = temp_font) +
scale_y_log10(breaks = c(.01, .1, 1, 10, 100),
name = "population number / median",
labels = label_log10) +
scale_x_continuous(limits = c(.5, nrow(tx_counties) + .5), expand = c(0, 0),
breaks = NULL, #c(1, 50*(1:5)),
name = "Texas counties, from most to least populous") +
theme_dviz_hgrid(font_family = temp_font) +
theme(axis.line = element_blank(),
plot.margin = margin(3, 7, 3, 1.5))
자세한 건 교재를 구매하자.
Wilke, C. (2019). Fundamentals of Data Visualization. Retrieved July 28, 2020, from https://serialmentor.com/dataviz/