library(pacman)
# 模型
p_load(tidyverse,grf,glmnet,caret,tidytext,fpp2,
forecast,car,tseries,hdm,tidymodels,broom)
# 读数据
p_load(readxl,writexl,data.table,openxlsx,haven,rvest)
# 数据探索
p_load(DT,skimr,DataExplorer,explore,vtable,stringr,lubridate)
# 可视化
p_load(patchwork,ggrepel,ggcorrplot,gghighlight,ggthemes,shiny)
# 其它常用包
p_load(magrittr,listviewer,devtools,here,janitor,reticulate,jsonlite)
## [1] 1 2 3 4 5
## [1] 1 2 3 4 5 6 7 8 9 10
## [1] 1 2 3 4 5 6 7 8 9 10
## [1] 1 3 5 7 9
## [1] 1.000000 1.183673 1.367347 1.551020 1.734694 1.918367 2.102041
## [8] 2.285714 2.469388 2.653061 2.836735 3.020408 3.204082 3.387755
## [15] 3.571429 3.755102 3.938776 4.122449 4.306122 4.489796 4.673469
## [22] 4.857143 5.040816 5.224490 5.408163 5.591837 5.775510 5.959184
## [29] 6.142857 6.326531 6.510204 6.693878 6.877551 7.061224 7.244898
## [36] 7.428571 7.612245 7.795918 7.979592 8.163265 8.346939 8.530612
## [43] 8.714286 8.897959 9.081633 9.265306 9.448980 9.632653 9.816327
## [50] 10.000000
## [1] 1 2 3 4 5 1 2 3 4 5
## [1] 1 1 2 2 3 3 4 4 5 5
## [1] 1 1 2 2 3 3 4 4 5 5 1 1 2 2 3 3 4 4 5 5
# sample()取样函数从一组数据中随机取出指定数量的数据
# replace参数决定是否可以重复取数
y <- sample(x=1:100,size=10,replace=F);print(y)
## [1] 48 85 81 76 93 38 92 51 88 43
## [1] 0 0 0 0 0 0 1 0 0 0 1 1 0 1 1 0 0 1 1 1
## [1] 119.44954 115.66702 112.75420 77.97959 80.62497 110.95895 90.65271
## [8] 93.93836 102.55506 148.90882
## [1] "2014-01-01"
## [1] "2014-04-01"
## [1] "2014-01-01" "2014-01-08" "2014-01-15" "2014-01-22" "2014-01-29"
## [6] "2014-02-05" "2014-02-12" "2014-02-19" "2014-02-26" "2014-03-05"
## [11] "2014-03-12" "2014-03-19" "2014-03-26"
## [1] "Date"
or:
## [1] "2020-10-10"
## [1] "2020-10-10"
## [1] "2020-10-10"
## [1] "604800s (~1 weeks)"
## [1] "31557600s (~1 years)"
## [1] "1y 0m 0d 0H 0M 0S"
## [1] "2020-10-10" "2020-10-11" "2020-10-12" "2020-10-13" "2020-10-14"
## [6] "2020-10-15" "2020-10-16" "2020-10-17" "2020-10-18" "2020-10-19"
## [11] "2020-10-20"
## [1] "2009-01-01 UTC"
## Warning: All formats failed to parse. No formats found.
## [1] NA
## 00:01:25
## Time difference of 1.157396 days
## [1] NA
## [1] "2018-02-28"
## [1] 16177
## [1] 16177
## attr(,"origin")
## [1] "1970-01-01"
## [1] "2014-04-17"
## [1] "1970-01-01 00:01:00 UTC"
## [1] "2020-05-13 12:40:31 CST"
## [1] 2020
## [1] 5
## [1] 13
## [1] 134
## [1] 4
## [1] 4
## [1] 星期三
## Levels: 星期日 < 星期一 < 星期二 < 星期三 < 星期四 < 星期五 < 星期六
## [1] 周三
## Levels: 周日 < 周一 < 周二 < 周三 < 周四 < 周五 < 周六
## [1] 周一 周二 周三 周四 周五 周六 周日
## Levels: 周日 < 周一 < 周二 < 周三 < 周四 < 周五 < 周六
## [,1] [,2] [,3] [,4]
## [1,] 1 6 11 16
## [2,] 2 7 12 17
## [3,] 3 8 13 18
## [4,] 4 9 14 19
## [5,] 5 10 15 20
## [,1] [,2] [,3] [,4]
## [1,] 3.14 3.14 3.14 3.14
## [2,] 3.14 3.14 3.14 3.14
## [3,] 3.14 3.14 3.14 3.14
## [4,] 3.14 3.14 3.14 3.14
## [5,] 3.14 3.14 3.14 3.14
## , , 1
##
## [,1] [,2] [,3] [,4]
## [1,] 16.281108 6.480255 8.652127 21.7569551
## [2,] 8.021999 13.736055 13.023824 0.8162136
## [3,] 10.800302 7.805840 7.685018 12.9801556
## [4,] 7.839010 6.474145 3.345148 4.5540304
## [5,] 5.352209 11.386933 12.682583 17.2448605
##
## , , 2
##
## [,1] [,2] [,3] [,4]
## [1,] 13.066487 6.794657 1.133897 8.347505
## [2,] 10.415106 8.472372 17.972329 7.987675
## [3,] 5.496948 7.782532 6.973205 16.983654
## [4,] 17.221882 1.729233 20.104030 3.230302
## [5,] 10.401133 14.992161 18.379429 14.473569
## [1] 40
# 数据框
# 数据框的形式和Excel比较相似
# 每一列可以看做是一个向量,类型相同;不同列数据之间可以有不同类型
# 同列表的重要区别是:数据框要求每一列数据长度相同
# 数据框的每一行可以看作是一条记录,每一列看做是记录的某一个属性
# 数据框是数据处理中非常重要的数据结构,用途很广必须深入了解
df <- data.frame(symbols=c("MSFT","KO","CSCO"),
price=c(40.40,40.56,23.02),
currency=rep("USD",3),
country=rep("USA",3),
type=rep("STOCK",3),stringsAsFactors = FALSE)
df
## symbols price currency country type
## 1 MSFT 40.40 USD USA STOCK
## 2 KO 40.56 USD USA STOCK
## 3 CSCO 23.02 USD USA STOCK
## [1] "data.frame"
## df数据框行数为:3
## df数据框列数为:5
## df数据框总元素数量为:5
# 列表使用list()函数来定义,列表中的每个元素可以是单个变量或者是向量,甚至是另一个列表
# 列表中每个向量的长度可以不同,这是列表和数据框的重要区别
# 数据框是一种形式特殊的列表
lst <- list(symbols=c("MSFT","KO","CSCO"),
price=c(40.40,40.56,23.02),
currency="USD",country="USA",
type="STOCK" )
lst # 月饼盒子
## $symbols
## [1] "MSFT" "KO" "CSCO"
##
## $price
## [1] 40.40 40.56 23.02
##
## $currency
## [1] "USD"
##
## $country
## [1] "USA"
##
## $type
## [1] "STOCK"
## [1] 5
## [1] "MSFT" "KO" "CSCO"
## [1] "MSFT" "KO" "CSCO"
## $symbols
## [1] "MSFT" "KO" "CSCO"