加载包和数据

library(tidyverse)
## -- Attaching packages ----------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.0     √ purrr   0.3.3
## √ tibble  2.1.3     √ dplyr   0.8.5
## √ tidyr   1.0.2     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0
## -- Conflicts -------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(here)
## here() starts at D:/RBookLearning/Data-Science-and-Economics
library(skimr)
library(stringr)

数据见文件夹data1

dball_data <- read_csv(here("/R高级编程/data1/Dragon_Ball_Data_Set.csv"))
## Parsed with column specification:
## cols(
##   Character = col_character(),
##   Power_Level = col_character(),
##   Saga_or_Movie = col_character(),
##   Dragon_Ball_Series = col_character()
## )
dball_data %>% glimpse()  # 全是字符数据
## Observations: 1,244
## Variables: 4
## $ Character          <chr> "Goku", "Bulma", "Bear Thief", "Master Roshi", "...
## $ Power_Level        <chr> "10", "1.5", "7", "30", "5", "8.5", "4", "8", "2...
## $ Saga_or_Movie      <chr> "Emperor Pilaf Saga", "Emperor Pilaf Saga", "Emp...
## $ Dragon_Ball_Series <chr> "Dragon Ball", "Dragon Ball", "Dragon Ball", "Dr...
dball_data %>% head()
## # A tibble: 6 x 4
##   Character    Power_Level Saga_or_Movie      Dragon_Ball_Series
##   <chr>        <chr>       <chr>              <chr>             
## 1 Goku         10          Emperor Pilaf Saga Dragon Ball       
## 2 Bulma        1.5         Emperor Pilaf Saga Dragon Ball       
## 3 Bear Thief   7           Emperor Pilaf Saga Dragon Ball       
## 4 Master Roshi 30          Emperor Pilaf Saga Dragon Ball       
## 5 Oolong       5           Emperor Pilaf Saga Dragon Ball       
## 6 Yamcha       8.5         Emperor Pilaf Saga Dragon Ball

开始

在这里,我们使用正则表达式字符串匹配来过滤“字符”列。stringr包中的str_detect()函数检测字符串中是否存在模式或表达式,并返回TRUE或FALSE的逻辑值(这是dplyr :: filter()在第二个参数中采用的值)。 我还使用了stringr :: regex()函数,并将ignore_case参数设置为TRUE,这使过滤器不区分大小写,因此,如果存在’Kame’和’kAMe’的情况,它们也会被拾取。

dball_data %>% 
  filter(str_detect(Character,
                    regex(pattern = "kameha",
                          ignore_case = TRUE)))->dball_data_1

dball_data_1 %>%
  knitr::kable()
Character Power_Level Saga_or_Movie Dragon_Ball_Series
Master Roshi’s Max Power Kamehameha 180 Emperor Pilaf Saga Dragon Ball
Goku’s Kamehameha 12 Emperor Pilaf Saga Dragon Ball
Jackie Chun’s Max power Kamehamha 330 Tournament Saga Dragon Ball
Goku’s Kamehameha 90 Red Ribbon Army Saga Dragon Ball
Goku’s Kamehameha 90 Red Ribbon Army Saga Dragon Ball
Goku’s Super Kamehameha 740 Piccolo Jr. Saga Dragon Ball
Goku’s Kamehameha 950 Saiyan Saga Dragon Ball Z
Goku’s Kamehameha x3 36,000 Saiyan Saga Dragon Ball Z
Goku’s Kamehameha x4 44,000 Saiyan Saga Dragon Ball Z
Goku’s Angry Kamehameha 180,000,000 Frieza Saga Dragon Ball Z
Android 19 (Goku’s kamehameha absorbed) 230,000,000 Android Saga Dragon Ball Z
Goku’s Instant Kamehameha 28,750,000,000 Cell Games Saga Dragon Ball Z
Perfect Cell’s Earth-Destroying Kamehameha 30,200,000,000 Cell Games Saga Dragon Ball Z
Gohan’s Standing Kamehameha 45,000,000,000 Cell Games Saga Dragon Ball Z
Super Perfect Cell’s Solar Kamehameha 80,000,000,000 Cell Games Saga Dragon Ball Z
Gohan’s Father-Son Kamehameha 200,000,000,000 Cell Games Saga Dragon Ball Z
Gohan’s Electric Kamehameha 47,100,000,000 Babidi Saga/Majin Buu Saga Dragon Ball Z
Goku’s True Kamehameha 2,500,000,000,000 Kid Buu Saga Dragon Ball Z
Goku’s 10x Kamehameha 200,000,000,000,000 Baby Saga Dragon Ball GT
Goku’s 10x Kamehameha 4,000,000,000,000,000 Baby Saga Dragon Ball GT
Gohan’s Super Kamehameha 7,000,000,000,000 Super 17 saga Dragon Ball GT
Goku’s 10x Kamehameha 2,000,000,000,000,000 Super 17 saga Dragon Ball GT
Super 17 (10x Kamehameha absorbed) 530,000,000,000,000 Super 17 saga Dragon Ball GT
Goku’s Super Kamehameha 34,000,000,000,000 Super 17 saga Dragon Ball GT
Student-Teacher Kamehameha 17,000 Movie 2: World’s Strongest Dragon Ball Z
Goku’s Kaio-ken x3 Kamehameha 32,000 Movie 2: World’s Strongest Dragon Ball Z
Goku’s Kaio-ken x4 Kamehameha 45,000 Movie 2: World’s Strongest Dragon Ball Z
Goku’s Kaio-ken x20 Kamehameha 400,000,000 Movie 5: Cooler’s Revenge Dragon Ball Z
Goku’s Super Kamehameha 25,300,000,000 OVA: Plan to Eradicate the Super Saiyans Dragon Ball Z
Family Kamehameha 300,000,000,000 Movie 10: Broly- The Second Coming Dragon Ball Z
Krillin’s Kamehameha 8,000,000 Movie 11: Bio-Broly Dragon Ball Z
Goten’s Kamehameha 950,000,000 Movie 11: Bio-Broly Dragon Ball Z
Trunk’s Kamehameha 980,000,000 Movie 11: Bio-Broly Dragon Ball Z
Goten’s Super Kamehameha 3,000,000,000 Movie 11: Bio-Broly Dragon Ball Z
dball_data %>% 
  filter(str_detect(Character,"kamehameha"))->dball_data_1b

dball_data_1b %>% 
  knitr::kable()
Character Power_Level Saga_or_Movie Dragon_Ball_Series
Android 19 (Goku’s kamehameha absorbed) 230,000,000 Android Saga Dragon Ball Z
## Show the rows which do not appears on BOTH datasets
dball_data_1 %>%
  dplyr::anti_join(dball_data_1b, by = "Character") %>% 
  knitr::kable()
Character Power_Level Saga_or_Movie Dragon_Ball_Series
Master Roshi’s Max Power Kamehameha 180 Emperor Pilaf Saga Dragon Ball
Goku’s Kamehameha 12 Emperor Pilaf Saga Dragon Ball
Jackie Chun’s Max power Kamehamha 330 Tournament Saga Dragon Ball
Goku’s Kamehameha 90 Red Ribbon Army Saga Dragon Ball
Goku’s Kamehameha 90 Red Ribbon Army Saga Dragon Ball
Goku’s Super Kamehameha 740 Piccolo Jr. Saga Dragon Ball
Goku’s Kamehameha 950 Saiyan Saga Dragon Ball Z
Goku’s Kamehameha x3 36,000 Saiyan Saga Dragon Ball Z
Goku’s Kamehameha x4 44,000 Saiyan Saga Dragon Ball Z
Goku’s Angry Kamehameha 180,000,000 Frieza Saga Dragon Ball Z
Goku’s Instant Kamehameha 28,750,000,000 Cell Games Saga Dragon Ball Z
Perfect Cell’s Earth-Destroying Kamehameha 30,200,000,000 Cell Games Saga Dragon Ball Z
Gohan’s Standing Kamehameha 45,000,000,000 Cell Games Saga Dragon Ball Z
Super Perfect Cell’s Solar Kamehameha 80,000,000,000 Cell Games Saga Dragon Ball Z
Gohan’s Father-Son Kamehameha 200,000,000,000 Cell Games Saga Dragon Ball Z
Gohan’s Electric Kamehameha 47,100,000,000 Babidi Saga/Majin Buu Saga Dragon Ball Z
Goku’s True Kamehameha 2,500,000,000,000 Kid Buu Saga Dragon Ball Z
Goku’s 10x Kamehameha 200,000,000,000,000 Baby Saga Dragon Ball GT
Goku’s 10x Kamehameha 4,000,000,000,000,000 Baby Saga Dragon Ball GT
Gohan’s Super Kamehameha 7,000,000,000,000 Super 17 saga Dragon Ball GT
Goku’s 10x Kamehameha 2,000,000,000,000,000 Super 17 saga Dragon Ball GT
Super 17 (10x Kamehameha absorbed) 530,000,000,000,000 Super 17 saga Dragon Ball GT
Goku’s Super Kamehameha 34,000,000,000,000 Super 17 saga Dragon Ball GT
Student-Teacher Kamehameha 17,000 Movie 2: World’s Strongest Dragon Ball Z
Goku’s Kaio-ken x3 Kamehameha 32,000 Movie 2: World’s Strongest Dragon Ball Z
Goku’s Kaio-ken x4 Kamehameha 45,000 Movie 2: World’s Strongest Dragon Ball Z
Goku’s Kaio-ken x20 Kamehameha 400,000,000 Movie 5: Cooler’s Revenge Dragon Ball Z
Goku’s Super Kamehameha 25,300,000,000 OVA: Plan to Eradicate the Super Saiyans Dragon Ball Z
Family Kamehameha 300,000,000,000 Movie 10: Broly- The Second Coming Dragon Ball Z
Krillin’s Kamehameha 8,000,000 Movie 11: Bio-Broly Dragon Ball Z
Goten’s Kamehameha 950,000,000 Movie 11: Bio-Broly Dragon Ball Z
Trunk’s Kamehameha 980,000,000 Movie 11: Bio-Broly Dragon Ball Z
Goten’s Super Kamehameha 3,000,000,000 Movie 11: Bio-Broly Dragon Ball Z
dball_data_1 %>%
  mutate_at("Power_Level", ~str_remove_all(., ",")) %>%
  mutate_at("Power_Level", ~as.numeric(.)) -> dball_data_2

dball_data_2 %>% 
  knitr::kable()
Character Power_Level Saga_or_Movie Dragon_Ball_Series
Master Roshi’s Max Power Kamehameha 1.800e+02 Emperor Pilaf Saga Dragon Ball
Goku’s Kamehameha 1.200e+01 Emperor Pilaf Saga Dragon Ball
Jackie Chun’s Max power Kamehamha 3.300e+02 Tournament Saga Dragon Ball
Goku’s Kamehameha 9.000e+01 Red Ribbon Army Saga Dragon Ball
Goku’s Kamehameha 9.000e+01 Red Ribbon Army Saga Dragon Ball
Goku’s Super Kamehameha 7.400e+02 Piccolo Jr. Saga Dragon Ball
Goku’s Kamehameha 9.500e+02 Saiyan Saga Dragon Ball Z
Goku’s Kamehameha x3 3.600e+04 Saiyan Saga Dragon Ball Z
Goku’s Kamehameha x4 4.400e+04 Saiyan Saga Dragon Ball Z
Goku’s Angry Kamehameha 1.800e+08 Frieza Saga Dragon Ball Z
Android 19 (Goku’s kamehameha absorbed) 2.300e+08 Android Saga Dragon Ball Z
Goku’s Instant Kamehameha 2.875e+10 Cell Games Saga Dragon Ball Z
Perfect Cell’s Earth-Destroying Kamehameha 3.020e+10 Cell Games Saga Dragon Ball Z
Gohan’s Standing Kamehameha 4.500e+10 Cell Games Saga Dragon Ball Z
Super Perfect Cell’s Solar Kamehameha 8.000e+10 Cell Games Saga Dragon Ball Z
Gohan’s Father-Son Kamehameha 2.000e+11 Cell Games Saga Dragon Ball Z
Gohan’s Electric Kamehameha 4.710e+10 Babidi Saga/Majin Buu Saga Dragon Ball Z
Goku’s True Kamehameha 2.500e+12 Kid Buu Saga Dragon Ball Z
Goku’s 10x Kamehameha 2.000e+14 Baby Saga Dragon Ball GT
Goku’s 10x Kamehameha 4.000e+15 Baby Saga Dragon Ball GT
Gohan’s Super Kamehameha 7.000e+12 Super 17 saga Dragon Ball GT
Goku’s 10x Kamehameha 2.000e+15 Super 17 saga Dragon Ball GT
Super 17 (10x Kamehameha absorbed) 5.300e+14 Super 17 saga Dragon Ball GT
Goku’s Super Kamehameha 3.400e+13 Super 17 saga Dragon Ball GT
Student-Teacher Kamehameha 1.700e+04 Movie 2: World’s Strongest Dragon Ball Z
Goku’s Kaio-ken x3 Kamehameha 3.200e+04 Movie 2: World’s Strongest Dragon Ball Z
Goku’s Kaio-ken x4 Kamehameha 4.500e+04 Movie 2: World’s Strongest Dragon Ball Z
Goku’s Kaio-ken x20 Kamehameha 4.000e+08 Movie 5: Cooler’s Revenge Dragon Ball Z
Goku’s Super Kamehameha 2.530e+10 OVA: Plan to Eradicate the Super Saiyans Dragon Ball Z
Family Kamehameha 3.000e+11 Movie 10: Broly- The Second Coming Dragon Ball Z
Krillin’s Kamehameha 8.000e+06 Movie 11: Bio-Broly Dragon Ball Z
Goten’s Kamehameha 9.500e+08 Movie 11: Bio-Broly Dragon Ball Z
Trunk’s Kamehameha 9.800e+08 Movie 11: Bio-Broly Dragon Ball Z
Goten’s Super Kamehameha 3.000e+09 Movie 11: Bio-Broly Dragon Ball Z
dball_data_2 %>%
  filter(!str_detect(Character, "absorbed")) %>% # Remove 2 rows unrelated to kamehameha attacks
  mutate(Character_Single = str_remove_all(Character, "\\'.+")) %>% # Remove everything after apostrophe
  select(Character_Single, everything()) -> dball_data_3

注意,撇号是一个特殊字符,因此需要通过在它前面添加两个正斜杠来转义它。 点(.) 匹配所有字符,并且 + 告诉 r 匹配前面的点以匹配一次或多次。

dball_data_3 %>%
  group_by(Character_Single) %>%
  summarise_at(vars(Power_Level), ~mean(.)) %>%
  arrange(desc(Power_Level)) -> kame_data_grouped # Sort by descending

kame_data_grouped
## # A tibble: 11 x 2
##    Character_Single           Power_Level
##    <chr>                            <dbl>
##  1 Goku                           3.46e14
##  2 Gohan                          1.82e12
##  3 Family Kamehameha              3.00e11
##  4 Super Perfect Cell             8.00e10
##  5 Perfect Cell                   3.02e10
##  6 Goten                          1.98e 9
##  7 Trunk                          9.80e 8
##  8 Krillin                        8.00e 6
##  9 Student-Teacher Kamehameha     1.70e 4
## 10 Jackie Chun                    3.30e 2
## 11 Master Roshi                   1.80e 2
kame_data_grouped %>%
  pull(Power_Level) %>%
  summary()
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 1.800e+02 4.008e+06 1.975e+09 3.170e+13 1.900e+11 3.465e+14
kame_data_grouped %>%
  mutate(Power_Index = log(Power_Level)) %>% # Log transform Power Levels
  ggplot(aes(x = reorder(Character_Single, Power_Level),
             y = Power_Index,
             fill = Character_Single)) +
  geom_col() +
  coord_flip() +
  scale_fill_brewer(palette = "Spectral") +
  theme_minimal() +
  geom_text(aes(y = Power_Index,
                label = round(Power_Index, 1),
                hjust = -.2),
            colour = "#FFFFFF") +
  ggtitle("Power Levels of Kamehamehas", subtitle = "By Dragon Ball characters") +
  theme(plot.background = element_rect(fill = "grey20"),
        text = element_text(colour = "#FFFFFF"),
        panel.grid = element_blank(),
        plot.title = element_text(colour="#FFFFFF", face="bold", size=20),
        axis.line = element_line(colour = "#FFFFFF"),
        legend.position = "none",
        axis.title = element_text(colour = "#FFFFFF", size = 12),
        axis.text = element_text(colour = "#FFFFFF", size = 12)) +
  ylab("Power Levels (log transformed)") +
  xlab(" ")

dball_data_3 %>%
  filter(Character_Single == "Goku") %>%
  mutate(Power_Index = log(Power_Level)) %>% # Log transform Power Levels
  group_by(Saga_or_Movie) %>%
  summarise(Power_Index = mean(Power_Index)) %>%
  ggplot(aes(x = reorder(Saga_or_Movie, Power_Index),
             y = Power_Index)) +
  geom_col(fill = "#F85B1A") +
  theme_minimal() +
  geom_text(aes(y = Power_Index,
                label = round(Power_Index, 1),
                vjust = -.5),
                colour = "#FFFFFF") +
  ggtitle("Power Levels of Goku's Kamehamehas", subtitle = "By Saga/Movie") +
  scale_y_continuous(limits = c(0, 40)) +
  theme(plot.background = element_rect(fill = "grey20"),
        text = element_text(colour = "#FFFFFF"),
        panel.grid = element_blank(),
        plot.title = element_text(colour="#FFFFFF", face="bold", size=20),
        plot.subtitle = element_text(colour="#FFFFFF", face="bold", size=12),
        axis.line = element_line(colour = "#FFFFFF"),
        legend.position = "none",
        axis.title = element_text(colour = "#FFFFFF", size = 10),
        axis.text.y = element_text(colour = "#FFFFFF", size = 8),
        axis.text.x = element_text(colour = "#FFFFFF", size = 8, angle = 45, hjust = 1)) +
  ylab("Power Levels (log transformed)") +
  xlab(" ")