20250601 | 특수목적코딩학원 광교본점

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

# 한글 폰트 설정 (Windows 환경)

plt.rcParams['axes.unicode_minus'] = False

plt.rcParams['font.family'] = 'Malgun Gothic'

# CSV 파일 불러오기

df_delivery = pd.read_csv('./배달_사용_통계.csv', encoding='utf-8')

df_delivery.columns = df_delivery.columns.str.strip()

df_covid = pd.read_csv('./코로나_확진자_수_통계.csv', encoding='utf-8')

df_covid.columns = df_covid.columns.str.strip()

# 데이터 병합

df = pd.merge(df_delivery, df_covid, on='연도', how='inner')

# 데이터 타입 변환 (문자열 유지)

df['연도'] = df['연도'].astype(str) # 문자열 변환

df['배달앱'] = pd.to_numeric(df['배달앱'], errors='coerce')

df['배달대행'] = pd.to_numeric(df['배달대행'], errors='coerce')

df['확진자'] = pd.to_numeric(df['확진자'], errors='coerce')

df['사망자'] = pd.to_numeric(df['사망자'], errors='coerce')

df['백신'] = pd.to_numeric(df['백신'], errors='coerce')

df['사망률 감소'] = pd.to_numeric(df['사망률 감소'], errors='coerce')

df['온라인 쇼핑'] = pd.to_numeric(df['온라인 쇼핑'], errors='coerce')

df['미세먼지'] = pd.to_numeric(df['미세먼지'], errors='coerce')

df['야생'] = pd.to_numeric(df['야생동물 교통사고'], errors='coerce')

df['향수'] = pd.to_numeric(df['향수'], errors='coerce')

# 결측치 제거

df = df.dropna(subset=['연도', '배달앱', '배달대행'])

# 그래프 설정

fig, axes = plt.subplots(3, 2, figsize=(12, 8))

# 그래프 1: 배달앱 & 배달대행 vs 코로나 확진률

axes[0, 0].plot(df['연도'], df['확진자'], marker='o', linestyle='-', color='red', label='코로나 확진률(누적)')

axes[0, 0].plot(df['연도'], df['사망자'], marker='o', linestyle='-', color='gray', label='코로나 사망률(누적)')

axes[0, 0].plot(df['연도'], df['배달앱'], marker='o', linestyle='-', color='yellow', label='배달앱 이용률')

axes[0, 0].plot(df['연도'], df['배달대행'], marker='o', linestyle='-', color='orange', label='배달대행 이용률')

axes[0, 0].set_title('배달앱 및 배달대행과 코로나의 관계')

axes[0, 0].set_ylabel('비율 (%)')

axes[0, 0].grid()

axes[0, 0].legend()

# 그래프 2: 코로나 백신과 사망률 감소 효과

axes[0, 1].plot(df['연도'], df['확진자'], marker='o', linestyle='-', color='red', label='코로나 확진률(누적)')

axes[0, 1].plot(df['연도'], df['사망자'], marker='o', linestyle='-', color='gray', label='코로나 사망률(누적)')

axes[0, 1].plot(df['연도'], df['백신'], marker='o', linestyle='-', color='cyan', label='코로나 백신 접종률(누적)')

axes[0, 1].plot(df['연도'], df['사망률 감소'], marker='o', linestyle='-', color='orchid', label='예상 사망률 감소율')

axes[0, 1].set_title('코로나 발생과 백신 효과 분석')

axes[0, 1].set_ylabel('비율 (%)')

axes[0, 1].grid()

axes[0, 1].legend()

# 그래프 3: 코로나 확진율과 온라인 쇼핑 증가

axes[1, 0].plot(df['연도'], df['확진자'], marker='o', linestyle='-', color='red', label='코로나 확진률(누적)')

axes[1, 0].plot(df['연도'], df['사망자'], marker='o', linestyle='-', color='gray', label='코로나 사망률(누적)')

axes[1, 0].plot(df['연도'], df['온라인 쇼핑'], marker='o', linestyle='-', color='sandybrown', label='온라인 쇼핑 증가율')

axes[1, 0].set_title('코로나와 온라인 쇼핑의 관계')

axes[1, 0].set_ylabel('비율 (%)')

axes[1, 0].grid()

axes[1, 0].legend()

# 그래프 4: 코로나 확진율과 미세먼지 농도 변화

axes[1, 1].plot(df['연도'], df['확진자'], marker='o', linestyle='-', color='red', label='코로나 확진률(누적)')

axes[1, 1].plot(df['연도'], df['사망자'], marker='o', linestyle='-', color='gray', label='코로나 사망률(누적)')

axes[1, 1].plot(df['연도'], df['미세먼지'], marker='o', linestyle='-', color='tan', label='미세먼지 평균 농도')

axes[1, 1].set_title('코로나와 미세먼지의 관계')

axes[1, 1].set_ylabel('비율 (%)')

axes[1, 1].grid()

axes[1, 1].legend()

axes[2, 1].plot(df['연도'], df['확진자'], marker='o', linestyle='-', color='red', label='코로나 확진률(누적)')

axes[2, 1].plot(df['연도'], df['사망자'], marker='o', linestyle='-', color='gray', label='코로나 사망률(누적)')

axes[2, 1].plot(df['연도'], df['야생동물 교통사고'], marker='o', linestyle='-', color='teal', label='야생동물 교통사고 건 수의 증가율')

axes[2, 1].set_title('코로나와 야생동물 교통사고율의 관계')

axes[2, 1].set_ylabel('비율 (%)')

axes[2, 1].grid()

axes[2, 1].legend()

axes[2, 0].plot(df['연도'], df['확진자'], marker='o', linestyle='-', color='red', label='코로나 확진률(누적)')

axes[2, 0].plot(df['연도'], df['사망자'], marker='o', linestyle='-', color='gray', label='코로나 사망률(누적)')

axes[2, 0].plot(df['연도'], df['향수'], marker='o', linestyle='-', color='pink', label='향수 판매량 증가율')

axes[2, 0].set_title('코로나와 향수 판매율의 관계')

axes[2, 0].set_ylabel('비율 (%)')

axes[2, 0].grid()

axes[2, 0].legend()

# x축 레이블 설정

for ax in axes.flat:

ax.set_xticks(range(len(df["연도"])))

ax.set_xticklabels(df["연도"], rotation=45)

plt.tight_layout()

plt.show()

# 바 그래프 설정

bar_width = 0.15

index = range(len(df["연도"])) # x축 인덱스 생성

fig, axes = plt.subplots(3, 2, figsize=(12, 8))

# 그래프 1: 배달앱 & 배달대행 vs 코로나 확진률

axes[0, 0].bar(index, df["확진자"], bar_width, label="코로나 확진률(누적)", color="red")

axes[0, 0].bar([i + bar_width for i in index], df["사망자"], bar_width, label="코로나 사망률(누적)", color="gray")

axes[0, 0].bar([i - bar_width * 2 for i in index], df["배달앱"], bar_width, label="배달앱 이용률", color="yellow")

axes[0, 0].bar([i - bar_width for i in index], df["배달대행"], bar_width, label="배달대행 이용률", color="orange")

axes[0, 0].set_title('배달앱 및 배달대행과 코로나의 관계')

axes[0, 0].set_ylabel('비율 (%)')

axes[0, 0].grid()

axes[0, 0].legend()

# 그래프 2: 코로나 백신과 사망률 감소 효과

axes[0, 1].bar(index, df["확진자"], bar_width, label="코로나 확진률(누적)", color="red")

axes[0, 1].bar([i + bar_width for i in index], df["사망자"], bar_width, label="코로나 사망률(누적)", color="gray")

axes[0, 1].bar([i - bar_width for i in index], df["백신"], bar_width, label="백신 접종률(누적)", color="cyan")

axes[0, 1].bar([i - bar_width * 2 for i in index], df["사망률 감소"], bar_width, label="사망률 감소율", color="orchid")

axes[0, 1].set_title('코로나 발생과 백신 효과 분석')

axes[0, 1].set_ylabel('비율 (%)')

axes[0, 1].grid()

axes[0, 1].legend()

# 그래프 3: 코로나 확진율과 온라인 쇼핑 증가

axes[1, 0].bar(index, df["확진자"], bar_width, label="코로나 확진률(누적)", color="red")

axes[1, 0].bar([i + bar_width for i in index], df["사망자"], bar_width, label="코로나 사망률(누적)", color="gray")

axes[1, 0].bar([i - bar_width for i in index], df["온라인 쇼핑"], bar_width, label="작년 대비 온라인 쇼핑 거래액 증가율", color='salmon')

axes[1, 0].set_title('코로나와 온라인 쇼핑의 관계')

axes[1, 0].set_ylabel('비율 (%)')

axes[1, 0].grid()

axes[1, 0].legend()

# 그래프 4: 코로나 확진율과 미세먼지 농도 변화

axes[1, 1].bar(index, df["확진자"], bar_width, label="코로나 확진률(누적)", color="red")

axes[1, 1].bar([i + bar_width for i in index], df["사망자"], bar_width, label="코로나 사망률(누적)", color="gray")

axes[1, 1].bar([i - bar_width for i in index], df["미세먼지"], bar_width, label="평균 미세먼지 농도", color='tan')

axes[1, 1].set_title('코로나와 미세먼지의 관계')

axes[1, 1].set_ylabel('비율 (%)')

axes[1, 1].grid()

axes[1, 1].legend()

axes[2, 1].bar(index, df["확진자"], bar_width, label="코로나 확진률(누적)", color="red")

axes[2, 1].bar([i + bar_width for i in index], df["사망자"], bar_width, label="코로나 사망률(누적)", color="gray")

axes[2, 1].bar([i - bar_width for i in index], df["야생동물 교통사고"], bar_width, label="야생동물 교통사고 건 수의 증가율", color='teal')

axes[2, 1].set_title('코로나와 야생동물 교통사고율의 관계')

axes[2, 1].set_ylabel('비율 (%)')

axes[2, 1].grid()

axes[2, 1].legend()

axes[2, 0].bar(index, df["확진자"], bar_width, label="코로나 확진률(누적)", color="red")

axes[2, 0].bar([i + bar_width for i in index], df["사망자"], bar_width, label="코로나 사망률(누적)", color="gray")

axes[2, 0].bar([i - bar_width for i in index], df["향수"], bar_width, label="향수 판매량 증가율", color='pink')

axes[2, 0].set_title('코로나와 향수 판매율의 관계')

axes[2, 0].set_ylabel('비율 (%)')

axes[2, 0].grid()

axes[2, 0].legend()

# x축 레이블 설정

for ax in axes.flat:

ax.set_xticks(index)

ax.set_xticklabels(df["연도"], rotation=45)

plt.tight_layout()

plt.show()

# 데이터 병합

df = pd.merge(df_delivery, df_covid, on='연도', how='inner')

# 날짜 데이터를 처리 (문자열 → 연도 숫자로 변환)

df['연도'] = df['연도'].astype(str).str[:4].astype(int) # '2017-01' → 2017

# 숫자로 변환 가능한 열 변환

numeric_cols = ['배달앱', '배달대행', '확진자', '사망자', '백신', '사망률 감소', '온라인 쇼핑', '미세먼지', '야생동물 교통사고', '향수']

df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

# 결측치 제거

df = df.dropna()

# 히트맵을 위한 숫자형 데이터만 선택

df_numeric = df.select_dtypes(include=['number'])

# 히트맵 생성

plt.figure(figsize=(10, 8))

sns.heatmap(df_numeric.corr(), annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)

plt.title("데이터 간 상관 관계 히트맵")

plt.show()

# 데이터 변환: 연도별 미세먼지 농도를 비교할 수 있도록 변환

df_melted = df.melt(id_vars=["연도"], value_vars=["미세먼지", "중국 미세먼지"],

var_name="나라", value_name="PM2.5")

# 그래프 생성

fig, ax = plt.subplots(figsize=(14, 6))

# Boxplot 추가 (배경 투명 처리 + zorder 낮춤)

sns.boxplot(x="연도", y="PM2.5", hue="나라", data=df_melted, ax=ax, showcaps=False,

boxprops={'facecolor': 'None', 'edgecolor': 'gray'}, whiskerprops={'color': 'gray'},

zorder=1) # Boxplot을 뒤쪽으로 배치

# Line Plot 추가 (마커 및 스타일 조정 + zorder 높임)

sns.lineplot(x="연도", y="PM2.5", hue="나라", data=df_melted, markers=True,

dashes=False, linewidth=2, alpha=0.8, marker='o', ax=ax, zorder=2) # Line Plot을 앞쪽으로 배치

# x축 레이블 수정 (x축 위치를 명확하게 설정 후 레이블 적용)

ax.set_xticks(sorted(df["연도"].unique())) # x축 위치 설정

ax.set_xticklabels(sorted(df["연도"].astype(str).unique()), rotation=30, ha="right") # 레이블 설정

# 그래프 디자인 설정

plt.title("연도별 한국 vs 중국 미세먼지 농도 비교 (Boxplot + Line Plot)")

plt.ylabel("PM2.5 (µg/m³)")

plt.xlabel("연도")

plt.legend(title="나라")

plt.grid(axis='y', linestyle='--', alpha=0.5) # 수평선 추가로 가독성 향상

plt.show()