library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.2
✔ ggplot2 4.0.0 ✔ tibble 3.3.0
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.1.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
set.seed(1234)
df <- data.frame(
Previous_status=factor(rep(c("Diabetes", "Non-Diabetes"), each=200)),
FBS=round(c(rnorm(200, mean=160, sd=20),
rnorm(200, mean=100, sd=20))),
BMI=round(c(rnorm(200,mean=32,sd=8),
rnorm(200,mean=30.5,sd=7)),1),
HbA1c=round(c(rnorm(200, mean=10.60, sd=1.5),
rnorm(200, mean=6.1, sd=0.5)) ,1),
Smoking=rbinom(n=400,size=1,prob=0.30),
Gender=rbinom(n=400,size = 1,prob=0.45)
)
df$Gender<-as.factor(df$Gender)
levels(df$Gender)<-c("Female","Male")
df$Smoking<-as.factor(df$Smoking)
levels(df$Smoking)<-c("Non-Smoker","Smoker")
df %>% mutate(BMI_Cat=factor(case_when(BMI>30~"obese",
BMI<22~"Not-obese",
TRUE~"Pre-obese")))->df
df %>% mutate(serum_creat=round(rnorm(400, mean=1.2, sd=0.2),1))->df
df %>% mutate(diet=factor(rep(c("Vegetarian","NonVegetarian"),each=200)))->df
df %>% mutate( ID = paste(row_number(),"XYZ",sep = "-"))->df
str(df)'data.frame': 400 obs. of 10 variables:
$ Previous_status: Factor w/ 2 levels "Diabetes","Non-Diabetes": 1 1 1 1 1 1 1 1 1 1 ...
$ FBS : num 136 166 182 113 169 170 149 149 149 142 ...
$ BMI : num 22.2 32.3 28.6 24.8 35.3 33.2 43.7 23 27.9 31.4 ...
$ HbA1c : num 9.1 8.5 10.5 13.3 10.5 11.8 8.9 10.3 11.4 10.1 ...
$ Smoking : Factor w/ 2 levels "Non-Smoker","Smoker": 1 1 2 1 2 2 1 2 2 2 ...
$ Gender : Factor w/ 2 levels "Female","Male": 1 1 1 1 2 2 1 1 2 2 ...
$ BMI_Cat : Factor w/ 3 levels "Not-obese","obese",..: 3 2 3 3 2 2 2 3 3 2 ...
$ serum_creat : num 1.2 1 1.2 1.5 1.1 1.2 1.4 0.9 0.9 1 ...
$ diet : Factor w/ 2 levels "NonVegetarian",..: 2 2 2 2 2 2 2 2 2 2 ...
$ ID : chr "1-XYZ" "2-XYZ" "3-XYZ" "4-XYZ" ...
summary(df) Previous_status FBS BMI HbA1c
Diabetes :200 Min. : 32.0 Min. : 9.10 Min. : 4.60
Non-Diabetes:200 1st Qu.:103.0 1st Qu.:25.75 1st Qu.: 6.10
Median :132.0 Median :30.70 Median : 7.00
Mean :130.1 Mean :30.78 Mean : 8.32
3rd Qu.:157.0 3rd Qu.:35.52 3rd Qu.:10.50
Max. :221.0 Max. :57.60 Max. :14.30
Smoking Gender BMI_Cat serum_creat
Non-Smoker:272 Female:222 Not-obese: 41 Min. :0.600
Smoker :128 Male :178 obese :210 1st Qu.:1.100
Pre-obese:149 Median :1.200
Mean :1.204
3rd Qu.:1.300
Max. :1.800
diet ID
NonVegetarian:200 Length:400
Vegetarian :200 Class :character
Mode :character
head(df) Previous_status FBS BMI HbA1c Smoking Gender BMI_Cat serum_creat
1 Diabetes 136 22.2 9.1 Non-Smoker Female Pre-obese 1.2
2 Diabetes 166 32.3 8.5 Non-Smoker Female obese 1.0
3 Diabetes 182 28.6 10.5 Smoker Female Pre-obese 1.2
4 Diabetes 113 24.8 13.3 Non-Smoker Female Pre-obese 1.5
5 Diabetes 169 35.3 10.5 Smoker Male obese 1.1
6 Diabetes 170 33.2 11.8 Smoker Male obese 1.2
diet ID
1 Vegetarian 1-XYZ
2 Vegetarian 2-XYZ
3 Vegetarian 3-XYZ
4 Vegetarian 4-XYZ
5 Vegetarian 5-XYZ
6 Vegetarian 6-XYZ