14  Quy trình phân tích logistic regression

14.1 Example dataset

Sử dụng ví dụ ở chapter 10 trong cuốn sách này.

https://studyr.netlify.app/ref/introductory_biostatistics_2016.pdf

cancer <- read.csv("prostatecancer.dat", sep = "&")
cancer
   Xray Grade Stage Age Acid Nodes
1     0     1     1  64   40     0
2     0     0     1  63   40     0
3     1     0     0  65   46     0
4     0     1     0  67   47     0
5     0     0     0  66   48     0
6     0     1     1  65   48     0
7     0     0     0  60   49     0
8     0     0     0  51   49     0
9     0     0     0  66   50     0
10    0     0     0  58   50     0
11    0     1     0  56   50     0
12    0     0     1  61   50     0
13    0     1     1  64   50     0
14    0     0     0  56   52     0
15    0     0     0  67   52     0
16    1     0     0  49   55     0
17    0     1     1  52   55     0
18    0     0     0  68   56     0
19    0     1     1  66   59     0
20    1     0     0  60   62     0
21    0     0     0  61   62     0
22    1     1     1  59   63     0
23    0     0     0  51   65     0
24    0     1     1  53   66     0
25    0     0     0  58   71     0
26    0     0     0  63   75     0
27    0     0     1  53   76     0
28    0     0     0  60   78     0
29    0     0     0  52   83     0
30    0     0     1  67   95     0
31    0     0     0  56   98     0
32    0     0     1  61  102     0
33    0     0     0  64  187     0
34    1     0     1  58   48     1
35    0     0     1  65   49     1
36    1     1     1  57   51     1
37    0     1     0  50   56     1
38    1     1     0  67   67     1
39    0     0     1  67   67     1
40    0     1     1  57   67     1
41    0     1     1  45   70     1
42    0     0     1  46   70     1
43    1     0     1  51   72     1
44    1     1     1  60   76     1
45    1     1     1  56   78     1
46    1     1     1  50   81     1
47    0     0     0  56   82     1
48    0     0     1  63   82     1
49    1     1     1  65   84     1
50    1     0     1  64   89     1
51    0     1     0  59   99     1
52    1     1     1  68  126     1
53    1     0     0  61  136     1

14.2 Logistic Regression Model

Effect of Measurement Scale

acid.glm <- glm(Nodes ~ Acid, 
                data = cancer, 
                family = binomial(link = "logit"))
# acid.glm

summary(acid.glm)

Call:
glm(formula = Nodes ~ Acid, family = binomial(link = "logit"), 
    data = cancer)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)  
(Intercept) -1.92703    0.92104  -2.092   0.0364 *
Acid         0.02040    0.01257   1.624   0.1045  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 70.252  on 52  degrees of freedom
Residual deviance: 67.116  on 51  degrees of freedom
AIC: 71.116

Number of Fisher Scoring iterations: 4
or.50acid <- exp((100-50)*acid.glm$coefficients[2])

or.50acid
    Acid 
2.773301 

Testing Hypotheses in Multiple Logistic Regression

all.glm <- glm(Nodes ~ Xray + Grade + Stage + Age + Acid, 
              data = cancer,
              family = binomial(link = "logit"))

summary(all.glm)

Call:
glm(formula = Nodes ~ Xray + Grade + Stage + Age + Acid, family = binomial(link = "logit"), 
    data = cancer)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)  
(Intercept)  0.06180    3.45992   0.018   0.9857  
Xray         2.04534    0.80718   2.534   0.0113 *
Grade        0.76142    0.77077   0.988   0.3232  
Stage        1.56410    0.77401   2.021   0.0433 *
Age         -0.06926    0.05788  -1.197   0.2314  
Acid         0.02434    0.01316   1.850   0.0643 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 70.252  on 52  degrees of freedom
Residual deviance: 48.126  on 47  degrees of freedom
AIC: 60.126

Number of Fisher Scoring iterations: 5
chisq.LR <- all.glm$null.deviance - all.glm$deviance

chisq.LR
[1] 22.12636
df.LR <- all.glm$df.null - all.glm$df.residual

df.LR
[1] 5
1-pchisq(chisq.LR,df.LR)
[1] 0.000495414

Stepwise

library(MASS)
MASS::stepAIC(all.glm)
Start:  AIC=60.13
Nodes ~ Xray + Grade + Stage + Age + Acid

        Df Deviance    AIC
- Grade  1   49.097 59.097
- Age    1   49.615 59.615
<none>       48.126 60.126
- Acid   1   51.572 61.572
- Stage  1   52.558 62.558
- Xray   1   55.350 65.350

Step:  AIC=59.1
Nodes ~ Xray + Stage + Age + Acid

        Df Deviance    AIC
- Age    1   50.660 58.660
<none>       49.097 59.097
- Acid   1   52.085 60.085
- Stage  1   55.381 63.381
- Xray   1   57.016 65.016

Step:  AIC=58.66
Nodes ~ Xray + Stage + Acid

        Df Deviance    AIC
<none>       50.660 58.660
- Acid   1   53.353 59.353
- Stage  1   57.059 63.059
- Xray   1   58.613 64.613

Call:  glm(formula = Nodes ~ Xray + Stage + Acid, family = binomial(link = "logit"), 
    data = cancer)

Coefficients:
(Intercept)         Xray        Stage         Acid  
   -3.57565      2.06179      1.75556      0.02063  

Degrees of Freedom: 52 Total (i.e. Null);  49 Residual
Null Deviance:      70.25 
Residual Deviance: 50.66    AIC: 58.66

ROC curve

library(epiDisplay)
Loading required package: foreign
Loading required package: survival
Warning: package 'survival' was built under R version 4.4.1

Attaching package: 'survival'
The following object is masked _by_ '.GlobalEnv':

    cancer
Loading required package: nnet
all.glm.roc <- lroc(all.glm, title = TRUE, auc.coords = c(0.5, 0.1))

all.glm.roc
$model.description
[1] "Nodes ~ Xray + Grade + Stage + Age + Acid"

$auc
[1] 0.8454545

$predicted.table
 predicted.prob Non-diseased Diseased
         0.0341            1        0
         0.0351            1        0
         0.0358            1        0
         0.0361            1        0
         0.0521            1        0
         0.0607            1        0
         0.0645            1        0
         0.0657            1        0
         0.0723            1        0
         0.0775            1        0
         0.0929            1        0
         0.0973            1        0
         0.1002            1        0
         0.1314            1        0
         0.1372            1        0
         0.1393            0        1
         0.1463            1        0
         0.1566            0        1
         0.1795            1        0
         0.1929            1        0
         0.2004            0        1
         0.2007            1        0
         0.2181            0        1
         0.2184            1        0
         0.2551            1        0
         0.2796            1        0
         0.2988            0        1
         0.3040            1        0
         0.3213            1        0
         0.3227            0        1
         0.3314            1        0
         0.3684            1        0
         0.4514            1        0
         0.4648            0        1
         0.4710            1        0
         0.5130            1        0
         0.5176            0        1
         0.5311            1        0
         0.5359            0        1
         0.5452            1        0
         0.5801            1        0
         0.6948            0        1
         0.7260            0        1
         0.7673            0        1
         0.8030            0        1
         0.8489            0        1
         0.8676            1        0
         0.8689            0        1
         0.8782            0        1
         0.8935            0        1
         0.9207            0        1
         0.9421            0        1
         0.9498            0        1

$diagnostic.table
   1-Specificity Sensitivity
      1.00000000        1.00
>     0.96969697        1.00
>     0.93939394        1.00
>     0.90909091        1.00
>     0.87878788        1.00
>     0.84848485        1.00
>     0.81818182        1.00
>     0.78787879        1.00
>     0.75757576        1.00
>     0.72727273        1.00
>     0.69696970        1.00
>     0.66666667        1.00
>     0.63636364        1.00
>     0.60606061        1.00
>     0.57575758        1.00
>     0.54545455        1.00
>     0.54545455        0.95
>     0.51515152        0.95
>     0.51515152        0.90
>     0.48484848        0.90
>     0.45454545        0.90
>     0.45454545        0.85
>     0.42424242        0.85
>     0.42424242        0.80
>     0.39393939        0.80
>     0.36363636        0.80
>     0.33333333        0.80
>     0.33333333        0.75
>     0.30303030        0.75
>     0.27272727        0.75
>     0.27272727        0.70
>     0.24242424        0.70
>     0.21212121        0.70
>     0.18181818        0.70
>     0.18181818        0.65
>     0.15151515        0.65
>     0.12121212        0.65
>     0.12121212        0.60
>     0.09090909        0.60
>     0.09090909        0.55
>     0.06060606        0.55
>     0.03030303        0.55
>     0.03030303        0.50
>     0.03030303        0.45
>     0.03030303        0.40
>     0.03030303        0.35
>     0.03030303        0.30
>     0.00000000        0.30
>     0.00000000        0.25
>     0.00000000        0.20
>     0.00000000        0.15
>     0.00000000        0.10
>     0.00000000        0.05
>     0.00000000        0.00