각 항목이 어느 정도의 상관관계를 가지고 있는지 알고 싶다면 다음과 같이 입력합니다.
df.corr()
그러면 다음과 같이 출력됩니다.
pregnant |
plasma |
pressure |
thickness |
insulin |
bmi |
pedigree |
age |
diabetes |
|
pregnant |
1.000000 |
0.129459 |
0.141282 |
-0.081672 |
-0.073535 |
0.017683 |
-0.033523 |
0.544341 |
0.221898 |
plasma |
0.129459 |
1.000000 |
0.152590 |
0.057328 |
0.331357 |
0.221071 |
0.137337 |
0.263514 |
0.466581 |
pressure |
0.141282 |
0.152590 |
1.000000 |
0.207371 |
0.088933 |
0.281805 |
0.041265 |
0.239528 |
0.065068 |
thickness |
-0.081672 |
0.057328 |
0.207371 |
1.000000 |
0.436783 |
0.392573 |
0.183928 |
-0.113970 |
0.074752 |
insulin |
-0.073535 |
0.331357 |
0.088933 |
0.436783 |
1.000000 |
0.197859 |
0.185071 |
-0.042163 |
0.130548 |
bmi |
0.017683 |
0.221071 |
0.281805 |
0.392573 |
0.197859 |
1.000000 |
0.140647 |
0.036242 |
0.292695 |
pedigree |
-0.033523 |
0.137337 |
0.041265 |
0.183928 |
0.185071 |
0.140647 |
1.000000 |
0.033561 |
0.173844 |
age |
0.544341 |
0.263514 |
0.239528 |
-0.113970 |
-0.042163 |
0.036242 |
0.033561 |
1.000000 |
0.238356 |
diabetes |
0.221898 |
0.466581 |
0.065068 |
0.074752 |
0.130548 |
0.292695 |
0.173844 |
0.238356 |
1.000000 |