加载包和数据
library(tidyverse)
## -- Attaching packages ----------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.0 √ purrr 0.3.3
## √ tibble 2.1.3 √ dplyr 0.8.5
## √ tidyr 1.0.2 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.5.0
## -- Conflicts -------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(here)
## here() starts at D:/RBookLearning/Data-Science-and-Economics
library(skimr)
library(stringr)
数据见文件夹data1
dball_data <- read_csv(here("/R高级编程/data1/Dragon_Ball_Data_Set.csv"))
## Parsed with column specification:
## cols(
## Character = col_character(),
## Power_Level = col_character(),
## Saga_or_Movie = col_character(),
## Dragon_Ball_Series = col_character()
## )
dball_data %>% glimpse() # 全是字符数据
## Observations: 1,244
## Variables: 4
## $ Character <chr> "Goku", "Bulma", "Bear Thief", "Master Roshi", "...
## $ Power_Level <chr> "10", "1.5", "7", "30", "5", "8.5", "4", "8", "2...
## $ Saga_or_Movie <chr> "Emperor Pilaf Saga", "Emperor Pilaf Saga", "Emp...
## $ Dragon_Ball_Series <chr> "Dragon Ball", "Dragon Ball", "Dragon Ball", "Dr...
dball_data %>% head()
## # A tibble: 6 x 4
## Character Power_Level Saga_or_Movie Dragon_Ball_Series
## <chr> <chr> <chr> <chr>
## 1 Goku 10 Emperor Pilaf Saga Dragon Ball
## 2 Bulma 1.5 Emperor Pilaf Saga Dragon Ball
## 3 Bear Thief 7 Emperor Pilaf Saga Dragon Ball
## 4 Master Roshi 30 Emperor Pilaf Saga Dragon Ball
## 5 Oolong 5 Emperor Pilaf Saga Dragon Ball
## 6 Yamcha 8.5 Emperor Pilaf Saga Dragon Ball
开始
在这里,我们使用正则表达式字符串匹配来过滤“字符”列。stringr
包中的str_detect()函数检测字符串中是否存在模式或表达式,并返回TRUE或FALSE的逻辑值(这是dplyr :: filter()在第二个参数中采用的值)。 我还使用了stringr :: regex()函数,并将ignore_case参数设置为TRUE,这使过滤器不区分大小写,因此,如果存在’Kame’和’kAMe’的情况,它们也会被拾取。
dball_data %>%
filter(str_detect(Character,
regex(pattern = "kameha",
ignore_case = TRUE)))->dball_data_1
dball_data_1 %>%
knitr::kable()
Master Roshi’s Max Power Kamehameha |
180 |
Emperor Pilaf Saga |
Dragon Ball |
Goku’s Kamehameha |
12 |
Emperor Pilaf Saga |
Dragon Ball |
Jackie Chun’s Max power Kamehamha |
330 |
Tournament Saga |
Dragon Ball |
Goku’s Kamehameha |
90 |
Red Ribbon Army Saga |
Dragon Ball |
Goku’s Kamehameha |
90 |
Red Ribbon Army Saga |
Dragon Ball |
Goku’s Super Kamehameha |
740 |
Piccolo Jr. Saga |
Dragon Ball |
Goku’s Kamehameha |
950 |
Saiyan Saga |
Dragon Ball Z |
Goku’s Kamehameha x3 |
36,000 |
Saiyan Saga |
Dragon Ball Z |
Goku’s Kamehameha x4 |
44,000 |
Saiyan Saga |
Dragon Ball Z |
Goku’s Angry Kamehameha |
180,000,000 |
Frieza Saga |
Dragon Ball Z |
Android 19 (Goku’s kamehameha absorbed) |
230,000,000 |
Android Saga |
Dragon Ball Z |
Goku’s Instant Kamehameha |
28,750,000,000 |
Cell Games Saga |
Dragon Ball Z |
Perfect Cell’s Earth-Destroying Kamehameha |
30,200,000,000 |
Cell Games Saga |
Dragon Ball Z |
Gohan’s Standing Kamehameha |
45,000,000,000 |
Cell Games Saga |
Dragon Ball Z |
Super Perfect Cell’s Solar Kamehameha |
80,000,000,000 |
Cell Games Saga |
Dragon Ball Z |
Gohan’s Father-Son Kamehameha |
200,000,000,000 |
Cell Games Saga |
Dragon Ball Z |
Gohan’s Electric Kamehameha |
47,100,000,000 |
Babidi Saga/Majin Buu Saga |
Dragon Ball Z |
Goku’s True Kamehameha |
2,500,000,000,000 |
Kid Buu Saga |
Dragon Ball Z |
Goku’s 10x Kamehameha |
200,000,000,000,000 |
Baby Saga |
Dragon Ball GT |
Goku’s 10x Kamehameha |
4,000,000,000,000,000 |
Baby Saga |
Dragon Ball GT |
Gohan’s Super Kamehameha |
7,000,000,000,000 |
Super 17 saga |
Dragon Ball GT |
Goku’s 10x Kamehameha |
2,000,000,000,000,000 |
Super 17 saga |
Dragon Ball GT |
Super 17 (10x Kamehameha absorbed) |
530,000,000,000,000 |
Super 17 saga |
Dragon Ball GT |
Goku’s Super Kamehameha |
34,000,000,000,000 |
Super 17 saga |
Dragon Ball GT |
Student-Teacher Kamehameha |
17,000 |
Movie 2: World’s Strongest |
Dragon Ball Z |
Goku’s Kaio-ken x3 Kamehameha |
32,000 |
Movie 2: World’s Strongest |
Dragon Ball Z |
Goku’s Kaio-ken x4 Kamehameha |
45,000 |
Movie 2: World’s Strongest |
Dragon Ball Z |
Goku’s Kaio-ken x20 Kamehameha |
400,000,000 |
Movie 5: Cooler’s Revenge |
Dragon Ball Z |
Goku’s Super Kamehameha |
25,300,000,000 |
OVA: Plan to Eradicate the Super Saiyans |
Dragon Ball Z |
Family Kamehameha |
300,000,000,000 |
Movie 10: Broly- The Second Coming |
Dragon Ball Z |
Krillin’s Kamehameha |
8,000,000 |
Movie 11: Bio-Broly |
Dragon Ball Z |
Goten’s Kamehameha |
950,000,000 |
Movie 11: Bio-Broly |
Dragon Ball Z |
Trunk’s Kamehameha |
980,000,000 |
Movie 11: Bio-Broly |
Dragon Ball Z |
Goten’s Super Kamehameha |
3,000,000,000 |
Movie 11: Bio-Broly |
Dragon Ball Z |
dball_data %>%
filter(str_detect(Character,"kamehameha"))->dball_data_1b
dball_data_1b %>%
knitr::kable()
Android 19 (Goku’s kamehameha absorbed) |
230,000,000 |
Android Saga |
Dragon Ball Z |
## Show the rows which do not appears on BOTH datasets
dball_data_1 %>%
dplyr::anti_join(dball_data_1b, by = "Character") %>%
knitr::kable()
Master Roshi’s Max Power Kamehameha |
180 |
Emperor Pilaf Saga |
Dragon Ball |
Goku’s Kamehameha |
12 |
Emperor Pilaf Saga |
Dragon Ball |
Jackie Chun’s Max power Kamehamha |
330 |
Tournament Saga |
Dragon Ball |
Goku’s Kamehameha |
90 |
Red Ribbon Army Saga |
Dragon Ball |
Goku’s Kamehameha |
90 |
Red Ribbon Army Saga |
Dragon Ball |
Goku’s Super Kamehameha |
740 |
Piccolo Jr. Saga |
Dragon Ball |
Goku’s Kamehameha |
950 |
Saiyan Saga |
Dragon Ball Z |
Goku’s Kamehameha x3 |
36,000 |
Saiyan Saga |
Dragon Ball Z |
Goku’s Kamehameha x4 |
44,000 |
Saiyan Saga |
Dragon Ball Z |
Goku’s Angry Kamehameha |
180,000,000 |
Frieza Saga |
Dragon Ball Z |
Goku’s Instant Kamehameha |
28,750,000,000 |
Cell Games Saga |
Dragon Ball Z |
Perfect Cell’s Earth-Destroying Kamehameha |
30,200,000,000 |
Cell Games Saga |
Dragon Ball Z |
Gohan’s Standing Kamehameha |
45,000,000,000 |
Cell Games Saga |
Dragon Ball Z |
Super Perfect Cell’s Solar Kamehameha |
80,000,000,000 |
Cell Games Saga |
Dragon Ball Z |
Gohan’s Father-Son Kamehameha |
200,000,000,000 |
Cell Games Saga |
Dragon Ball Z |
Gohan’s Electric Kamehameha |
47,100,000,000 |
Babidi Saga/Majin Buu Saga |
Dragon Ball Z |
Goku’s True Kamehameha |
2,500,000,000,000 |
Kid Buu Saga |
Dragon Ball Z |
Goku’s 10x Kamehameha |
200,000,000,000,000 |
Baby Saga |
Dragon Ball GT |
Goku’s 10x Kamehameha |
4,000,000,000,000,000 |
Baby Saga |
Dragon Ball GT |
Gohan’s Super Kamehameha |
7,000,000,000,000 |
Super 17 saga |
Dragon Ball GT |
Goku’s 10x Kamehameha |
2,000,000,000,000,000 |
Super 17 saga |
Dragon Ball GT |
Super 17 (10x Kamehameha absorbed) |
530,000,000,000,000 |
Super 17 saga |
Dragon Ball GT |
Goku’s Super Kamehameha |
34,000,000,000,000 |
Super 17 saga |
Dragon Ball GT |
Student-Teacher Kamehameha |
17,000 |
Movie 2: World’s Strongest |
Dragon Ball Z |
Goku’s Kaio-ken x3 Kamehameha |
32,000 |
Movie 2: World’s Strongest |
Dragon Ball Z |
Goku’s Kaio-ken x4 Kamehameha |
45,000 |
Movie 2: World’s Strongest |
Dragon Ball Z |
Goku’s Kaio-ken x20 Kamehameha |
400,000,000 |
Movie 5: Cooler’s Revenge |
Dragon Ball Z |
Goku’s Super Kamehameha |
25,300,000,000 |
OVA: Plan to Eradicate the Super Saiyans |
Dragon Ball Z |
Family Kamehameha |
300,000,000,000 |
Movie 10: Broly- The Second Coming |
Dragon Ball Z |
Krillin’s Kamehameha |
8,000,000 |
Movie 11: Bio-Broly |
Dragon Ball Z |
Goten’s Kamehameha |
950,000,000 |
Movie 11: Bio-Broly |
Dragon Ball Z |
Trunk’s Kamehameha |
980,000,000 |
Movie 11: Bio-Broly |
Dragon Ball Z |
Goten’s Super Kamehameha |
3,000,000,000 |
Movie 11: Bio-Broly |
Dragon Ball Z |
dball_data_1 %>%
mutate_at("Power_Level", ~str_remove_all(., ",")) %>%
mutate_at("Power_Level", ~as.numeric(.)) -> dball_data_2
dball_data_2 %>%
knitr::kable()
Master Roshi’s Max Power Kamehameha |
1.800e+02 |
Emperor Pilaf Saga |
Dragon Ball |
Goku’s Kamehameha |
1.200e+01 |
Emperor Pilaf Saga |
Dragon Ball |
Jackie Chun’s Max power Kamehamha |
3.300e+02 |
Tournament Saga |
Dragon Ball |
Goku’s Kamehameha |
9.000e+01 |
Red Ribbon Army Saga |
Dragon Ball |
Goku’s Kamehameha |
9.000e+01 |
Red Ribbon Army Saga |
Dragon Ball |
Goku’s Super Kamehameha |
7.400e+02 |
Piccolo Jr. Saga |
Dragon Ball |
Goku’s Kamehameha |
9.500e+02 |
Saiyan Saga |
Dragon Ball Z |
Goku’s Kamehameha x3 |
3.600e+04 |
Saiyan Saga |
Dragon Ball Z |
Goku’s Kamehameha x4 |
4.400e+04 |
Saiyan Saga |
Dragon Ball Z |
Goku’s Angry Kamehameha |
1.800e+08 |
Frieza Saga |
Dragon Ball Z |
Android 19 (Goku’s kamehameha absorbed) |
2.300e+08 |
Android Saga |
Dragon Ball Z |
Goku’s Instant Kamehameha |
2.875e+10 |
Cell Games Saga |
Dragon Ball Z |
Perfect Cell’s Earth-Destroying Kamehameha |
3.020e+10 |
Cell Games Saga |
Dragon Ball Z |
Gohan’s Standing Kamehameha |
4.500e+10 |
Cell Games Saga |
Dragon Ball Z |
Super Perfect Cell’s Solar Kamehameha |
8.000e+10 |
Cell Games Saga |
Dragon Ball Z |
Gohan’s Father-Son Kamehameha |
2.000e+11 |
Cell Games Saga |
Dragon Ball Z |
Gohan’s Electric Kamehameha |
4.710e+10 |
Babidi Saga/Majin Buu Saga |
Dragon Ball Z |
Goku’s True Kamehameha |
2.500e+12 |
Kid Buu Saga |
Dragon Ball Z |
Goku’s 10x Kamehameha |
2.000e+14 |
Baby Saga |
Dragon Ball GT |
Goku’s 10x Kamehameha |
4.000e+15 |
Baby Saga |
Dragon Ball GT |
Gohan’s Super Kamehameha |
7.000e+12 |
Super 17 saga |
Dragon Ball GT |
Goku’s 10x Kamehameha |
2.000e+15 |
Super 17 saga |
Dragon Ball GT |
Super 17 (10x Kamehameha absorbed) |
5.300e+14 |
Super 17 saga |
Dragon Ball GT |
Goku’s Super Kamehameha |
3.400e+13 |
Super 17 saga |
Dragon Ball GT |
Student-Teacher Kamehameha |
1.700e+04 |
Movie 2: World’s Strongest |
Dragon Ball Z |
Goku’s Kaio-ken x3 Kamehameha |
3.200e+04 |
Movie 2: World’s Strongest |
Dragon Ball Z |
Goku’s Kaio-ken x4 Kamehameha |
4.500e+04 |
Movie 2: World’s Strongest |
Dragon Ball Z |
Goku’s Kaio-ken x20 Kamehameha |
4.000e+08 |
Movie 5: Cooler’s Revenge |
Dragon Ball Z |
Goku’s Super Kamehameha |
2.530e+10 |
OVA: Plan to Eradicate the Super Saiyans |
Dragon Ball Z |
Family Kamehameha |
3.000e+11 |
Movie 10: Broly- The Second Coming |
Dragon Ball Z |
Krillin’s Kamehameha |
8.000e+06 |
Movie 11: Bio-Broly |
Dragon Ball Z |
Goten’s Kamehameha |
9.500e+08 |
Movie 11: Bio-Broly |
Dragon Ball Z |
Trunk’s Kamehameha |
9.800e+08 |
Movie 11: Bio-Broly |
Dragon Ball Z |
Goten’s Super Kamehameha |
3.000e+09 |
Movie 11: Bio-Broly |
Dragon Ball Z |
dball_data_2 %>%
filter(!str_detect(Character, "absorbed")) %>% # Remove 2 rows unrelated to kamehameha attacks
mutate(Character_Single = str_remove_all(Character, "\\'.+")) %>% # Remove everything after apostrophe
select(Character_Single, everything()) -> dball_data_3
注意,撇号是一个特殊字符,因此需要通过在它前面添加两个正斜杠来转义它。 点(.) 匹配所有字符,并且 + 告诉 r 匹配前面的点以匹配一次或多次。
dball_data_3 %>%
group_by(Character_Single) %>%
summarise_at(vars(Power_Level), ~mean(.)) %>%
arrange(desc(Power_Level)) -> kame_data_grouped # Sort by descending
kame_data_grouped
## # A tibble: 11 x 2
## Character_Single Power_Level
## <chr> <dbl>
## 1 Goku 3.46e14
## 2 Gohan 1.82e12
## 3 Family Kamehameha 3.00e11
## 4 Super Perfect Cell 8.00e10
## 5 Perfect Cell 3.02e10
## 6 Goten 1.98e 9
## 7 Trunk 9.80e 8
## 8 Krillin 8.00e 6
## 9 Student-Teacher Kamehameha 1.70e 4
## 10 Jackie Chun 3.30e 2
## 11 Master Roshi 1.80e 2
kame_data_grouped %>%
pull(Power_Level) %>%
summary()
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.800e+02 4.008e+06 1.975e+09 3.170e+13 1.900e+11 3.465e+14
kame_data_grouped %>%
mutate(Power_Index = log(Power_Level)) %>% # Log transform Power Levels
ggplot(aes(x = reorder(Character_Single, Power_Level),
y = Power_Index,
fill = Character_Single)) +
geom_col() +
coord_flip() +
scale_fill_brewer(palette = "Spectral") +
theme_minimal() +
geom_text(aes(y = Power_Index,
label = round(Power_Index, 1),
hjust = -.2),
colour = "#FFFFFF") +
ggtitle("Power Levels of Kamehamehas", subtitle = "By Dragon Ball characters") +
theme(plot.background = element_rect(fill = "grey20"),
text = element_text(colour = "#FFFFFF"),
panel.grid = element_blank(),
plot.title = element_text(colour="#FFFFFF", face="bold", size=20),
axis.line = element_line(colour = "#FFFFFF"),
legend.position = "none",
axis.title = element_text(colour = "#FFFFFF", size = 12),
axis.text = element_text(colour = "#FFFFFF", size = 12)) +
ylab("Power Levels (log transformed)") +
xlab(" ")
dball_data_3 %>%
filter(Character_Single == "Goku") %>%
mutate(Power_Index = log(Power_Level)) %>% # Log transform Power Levels
group_by(Saga_or_Movie) %>%
summarise(Power_Index = mean(Power_Index)) %>%
ggplot(aes(x = reorder(Saga_or_Movie, Power_Index),
y = Power_Index)) +
geom_col(fill = "#F85B1A") +
theme_minimal() +
geom_text(aes(y = Power_Index,
label = round(Power_Index, 1),
vjust = -.5),
colour = "#FFFFFF") +
ggtitle("Power Levels of Goku's Kamehamehas", subtitle = "By Saga/Movie") +
scale_y_continuous(limits = c(0, 40)) +
theme(plot.background = element_rect(fill = "grey20"),
text = element_text(colour = "#FFFFFF"),
panel.grid = element_blank(),
plot.title = element_text(colour="#FFFFFF", face="bold", size=20),
plot.subtitle = element_text(colour="#FFFFFF", face="bold", size=12),
axis.line = element_line(colour = "#FFFFFF"),
legend.position = "none",
axis.title = element_text(colour = "#FFFFFF", size = 10),
axis.text.y = element_text(colour = "#FFFFFF", size = 8),
axis.text.x = element_text(colour = "#FFFFFF", size = 8, angle = 45, hjust = 1)) +
ylab("Power Levels (log transformed)") +
xlab(" ")