import pandas as pd
import plotly.graph_objects as go
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import numpy as np
import json
import os
# --- 1. データ準備(4つの学力カテゴリ + 心理特性) ---
DATA_FILE = "sample.cvd"
if os.path.exists(DATA_FILE):
# ファイルがある場合は読み込む
try:
with open(DATA_FILE, 'r', encoding='utf-8') as f:
data = json.load(f)
print(f"Loaded data from {DATA_FILE}")
except Exception as e:
print(f"Error loading {DATA_FILE}: {e}. Using default data.")
load_default = True
else:
load_default = True
if 'load_default' in locals() and load_default:
data = {
'Name': [
'講師A (理系エース)', '講師B (英語特化)', '講師C (文系オール)', '講師D (数学・物理)', '講師E (心理・生物)',
'生徒1 (理系志望)', '生徒2 (英語苦手)', '生徒3 (国語のみ得意)', '生徒4 (全教科平均)', '生徒5 (難関国立)', '生徒6 (勉強嫌い)'
],
'Type': ['Teacher', 'Teacher', 'Teacher', 'Teacher', 'Teacher', 'Student', 'Student', 'Student', 'Student', 'Student', 'Student'],
# --- 学力・指導力の4大分類 (0.0: 不可/苦手 ~ 1.0: 指導可能/得意) ---
# 1. 数理・論理 (数学全般)
'Score_Math_Logic': [0.9, 0.1, 0.2, 0.9, 0.4, 0.8, 0.4, 0.1, 0.5, 0.9, 0.1],
# 2. 英語・語学 (英語全般)
'Score_English': [0.6, 0.9, 0.8, 0.3, 0.5, 0.6, 0.2, 0.3, 0.5, 0.9, 0.2],
# 3. 自然科学 (物理・化学・生物)
'Score_Science': [0.9, 0.1, 0.1, 0.9, 0.8, 0.7, 0.2, 0.1, 0.4, 0.8, 0.1],
# 4. 文脈・教養 (国語・社会)
'Score_Humanities': [0.2, 0.5, 0.9, 0.1, 0.6, 0.3, 0.5, 0.9, 0.5, 0.8, 0.2],
# --- 性格・行動特性 ---
# Extraversion: 0.0(無口・内向) ~ 1.0(おしゃべり・外向)
'Extraversion': [0.5, 0.8, 0.9, 0.2, 0.7, 0.2, 0.8, 0.4, 0.6, 0.5, 0.6],
# Empathy/Sensitivity: 0.0(ドライ・論理的) ~ 1.0(共感的・繊細)
# ※生徒の場合は「傷つきやすさ」、講師の場合は「受容力」として解釈
'Sensitivity': [0.3, 0.6, 0.8, 0.1, 0.9, 0.7, 0.4, 0.9, 0.5, 0.3, 0.5],
# Independence: 0.0(依存的・手がかかる) ~ 1.0(自律的・放置OK)
'Independence': [0.8, 0.5, 0.7, 0.9, 0.6, 0.5, 0.3, 0.4, 0.7, 0.9, 0.1],
# Gender: 0.0(女性) / 1.0(男性) ※座席配置上のハード制約用
'Gender_Code': [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0]
}
df = pd.DataFrame(data)
features = [
'Score_Math_Logic', 'Score_English', 'Score_Science', 'Score_Humanities',
'Extraversion', 'Sensitivity', 'Independence', 'Gender_Code'
]
# --- 2. 前処理 ---
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df[features])
# --- 3. PCAで「6次元」まで抽出 ---
# 第1~3主成分 -> 空間上の位置 (x, y, z)
# 第4~6主成分 -> ベクトルの向きと大きさ (u, v, w)
pca = PCA(n_components=6)
components = pca.fit_transform(df_scaled)
# 保持率の計算
retention_3d = sum(pca.explained_variance_ratio_[:3]) * 100
retention_6d = sum(pca.explained_variance_ratio_) * 100
print(f"3次元(位置)での情報保持率: {retention_3d:.2f}%")
print(f"6次元(位置+矢印)での情報保持率: {retention_6d:.2f}%")
# データフレームに割り当て
df['x'] = components[:, 0]
df['y'] = components[:, 1]
df['z'] = components[:, 2]
df['u'] = components[:, 3]
df['v'] = components[:, 4]
df['w'] = components[:, 5]
# ベクトルの大きさを計算(矢印のサイズ用)
df['norm'] = np.sqrt(df['u']**2 + df['v']**2 + df['w']**2)
# --- 4. 3D可視化 (Scatter + Cone) ---
fig = go.Figure()
# (1) 点と名前の描画 (散布図) - 講師と生徒で分ける
# 講師(Teacher)
df_teacher = df[df['Type'] == 'Teacher']
fig.add_trace(go.Scatter3d(
x=df_teacher['x'], y=df_teacher['y'], z=df_teacher['z'],
mode='markers',
marker=dict(
size=10,
color='gold',
line=dict(width=2, color='DarkSlateGrey')
),
text=df_teacher['Name'],
name='Teacher'
))
# 生徒(Student)
df_student = df[df['Type'] == 'Student']
fig.add_trace(go.Scatter3d(
x=df_student['x'], y=df_student['y'], z=df_student['z'],
mode='markers',
marker=dict(
size=10,
color='lightskyblue',
line=dict(width=2, color='DarkSlateGrey')
),
text=df_student['Name'],
name='Student'
))
# (2) ベクトル(コーン)の描画
# u, v, w は矢印の成分。これが「隠れた詳細パラメータ」を表します。
fig.add_trace(go.Cone(
x=df['x'], y=df['y'], z=df['z'],
u=df['u'], v=df['v'], w=df['w'],
sizemode="scaled",
sizeref=2,
anchor="tail",
colorscale='Viridis',
showscale=True,
colorbar=dict(
title="隠された特性の大きさ",
x=1.0,
y=0.4,
len=0.8
),
name='Vector (マイナー属性)',
customdata=np.stack((df['Name'], df['u'], df['v'], df['w']), axis=-1),
hovertemplate="%{customdata[0]}
u: %{customdata[1]:.2f}
v: %{customdata[2]:.2f}
w: %{customdata[3]:.2f}"
))
# --- 10x モード用の座標計算 ---
# 1倍 (Normal)
x_1x = df['x']
y_1x = df['y']
z_1x = df['z']
# 10倍 (Wide) - 座標を10倍に広げる
x_10x = df['x'] * 10
y_10x = df['y'] * 10
z_10x = df['z'] * 10
# --- 注記 (Annotations) の作成 (関数化) ---
def create_annotations(scale_factor=1.0):
notes = []
# 座標データを選択
if scale_factor == 10.0:
current_x, current_y, current_z = x_10x, y_10x, z_10x
else:
current_x, current_y, current_z = x_1x, y_1x, z_1x
for i, row in df.iterrows():
notes.append(dict(
x=current_x[i], y=current_y[i], z=current_z[i],
text=row['Name'],
xanchor='left',
yanchor='bottom',
showarrow=False,
yshift=10,
xshift=10,
font=dict(family="sans serif", size=12, color="black"),
bgcolor="rgba(255, 255, 255, 0.7)",
bordercolor="black",
borderwidth=1,
opacity=0.9
))
return notes
annotations_1x = create_annotations(1.0)
annotations_10x = create_annotations(10.0)
fig.update_layout(
title=f'講師・生徒の属性空間 (情報保持率: {retention_6d:.2f}%)',
scene=dict(
xaxis_title='PC1 (主成分1)',
yaxis_title='PC2 (主成分2)',
zaxis_title='PC3 (主成分3)',
aspectmode='cube',
annotations=annotations_1x # 初期状態は1x
),
margin=dict(l=0, r=0, b=0, t=50),
# --- 10x モード切り替えボタン ---
updatemenus=[
dict(
type="buttons",
direction="left",
buttons=[
dict(
label="Normal (1x)",
method="update",
args=[
# restyle: データの更新
{
"x": [x_1x[df['Type']=='Teacher'], x_1x[df['Type']=='Student'], x_1x],
"y": [y_1x[df['Type']=='Teacher'], y_1x[df['Type']=='Student'], y_1x],
"z": [z_1x[df['Type']=='Teacher'], z_1x[df['Type']=='Student'], z_1x],
"marker.size": [10, 10, None],
"sizeref": [None, None, 2]
},
{
"scene.annotations": annotations_1x,
"title": f'講師・生徒の属性空間 (Normal 1x) - 情報保持率: {retention_6d:.2f}%'
}
]
),
dict(
label="Wide (10x)",
method="update",
args=[
# restyle: データの更新
{
"x": [x_10x[df['Type']=='Teacher'], x_10x[df['Type']=='Student'], x_10x],
"y": [y_10x[df['Type']=='Teacher'], y_10x[df['Type']=='Student'], y_10x],
"z": [z_10x[df['Type']=='Teacher'], z_10x[df['Type']=='Student'], z_10x],
"marker.size": [5, 5, None],
"sizeref": [None, None, 0.5]
},
{
"scene.annotations": annotations_10x,
"title": f'講師・生徒の属性空間 (Wide 10x) - 情報保持率: {retention_6d:.2f}%'
}
]
)
],
pad={"r": 10, "t": 10},
showactive=True,
x=0.05,
xanchor="left",
y=1.1,
yanchor="top"
)
]
)
fig.show()