基于javaweb的Python基于机器学习的文本分类系统(python+django+mysql+html+bootstrap)

运行环境

Python>=10、MySQL≥5.7

开发工具

Pycharm(推荐)

适用

课程设计,大作业,毕业设计,项目练习,学习演示等

功能说明

011125321805

151125311805

441125291805

551125291805

591125291805

基于javaweb的Python基于机器学习的文本分类系统(python+django+mysql+html+bootstrap)

1 设置虚拟环境
2 安装依赖:

1
2
pip install -i https://mirrors.aliyun.com/pypi/simple/ bs4 pymysql Django==3.1 requests gensim jieba
pip install torch==1.12.1+cpu torchvision==0.13.1+cpu -f https://download.pytorch.org/whl/torch_stable.html

3 启动:

1
python manage.py runserver 8091

4 打开页面:
http://127.0.0.1:8091

账户: 用户名:admin 密码:123456

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
    username = request.POST.get('username')
phone = request.POST.get('phone')
UserTable.objects.filter(id=user_id).update(
name=username,
phone=phone)
response_data['msg'] = 'success'
return JsonResponse(response_data, status=201)


def del_user(request):
"""
删除用户
"""
user_id = request.POST.get('id')
result = UserTable.objects.filter(id=user_id).first()
try:
if not result:
response_data = {'error': '删除用户信息失败!', 'message': '找不到id为%s的用户' % user_id}
return JsonResponse(response_data, status=403)
result.delete()
response_data = {'message': '删除成功!'}
return JsonResponse(response_data, status=201)
except Exception as e:
response_data = {'message': '删除失败!'}
return JsonResponse(response_data, status=403)


def change_password(request):
"""
修改密码
"""

user = UserTable.objects.filter(name=request.session["username"]).first()
if user.password == request.POST.get('changePassword'):
# 修改的密码与原密码重复不予修改
return JsonResponse({"msg": "修改密码与原密码重复"}), 406
else:
# 不重复,予以修改
UserTable.objects.filter(name=request.session["username"]).update(
password=request.POST.get('changePassword'))
# 清除session回到login界面
del request.session['username']
return JsonResponse({"msg": "success"})



# coding:utf-8

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

# coding:utf-8

module_path = os.path.dirname(__file__)

# 模型保存路径
save_path = {
'model': module_path + '/model_save/model.pth',
'epoch': module_path + '/model_save/epoch.pth'
}
def get_trained_net():
net = torch.load(save_path.get('model') ,map_location='cpu')
net.to(device)
net.eval()
return net


def get_epoch():
return torch.load(save_path.get('epoch'))


def save_net_epoch(net, epoch):
net.eval()
torch.save(net, save_path.get('model'))
torch.save(epoch, save_path.get('epoch'))


# 模型和输入输出都会保存在这个device中
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# embedding_weight = get_vector_weight(get_vector_model())
# Config = {
# # 'embedding_weight': get_vector_weight(model_save), 如果每次将这个config输入进去,之前的优化都失效了,只能让模型随机了
# 'kernel_size': (2, 3, 4),
# 'output_channels': 300,
# 'class_num': 12,
# 'linear_one': 250,
# 'linear_two': 120,
# 'dropout': 0.5,
# 'embedding_weight': embedding_weight
# }


Config = {
'kernel_size': (3, 4, 5), # 卷积核的不同尺寸
'output_channels': 200, # 每种尺寸的卷积核有多少个
'class_num': 12, # 分类数量,见data_loader.categories
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
    net = torch.load(save_path.get('model') ,map_location='cpu')
net.to(device)
net.eval()
return net


def get_epoch():
return torch.load(save_path.get('epoch'))


def save_net_epoch(net, epoch):
net.eval()
torch.save(net, save_path.get('model'))
torch.save(epoch, save_path.get('epoch'))


# 模型和输入输出都会保存在这个device中
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# embedding_weight = get_vector_weight(get_vector_model())
# Config = {
# # 'embedding_weight': get_vector_weight(model_save), 如果每次将这个config输入进去,之前的优化都失效了,只能让模型随机了
# 'kernel_size': (2, 3, 4),
# 'output_channels': 300,
# 'class_num': 12,
# 'linear_one': 250,
# 'linear_two': 120,
# 'dropout': 0.5,
# 'embedding_weight': embedding_weight
# }


Config = {
'kernel_size': (3, 4, 5), # 卷积核的不同尺寸
'output_channels': 200, # 每种尺寸的卷积核有多少个
'class_num': 12, # 分类数量,见data_loader.categories
'linear_one': 250, # 第一个全连接层的输出节点数
# 'linear_two': 120,
'dropout': 0.5, # 随机丢失节点占比
'vocab_size': 283302, # 词库大小,即词的数量, len(model.wv.index_to_key))是word2vec模型中的词库大小
'vector_size': 100 # 每个词的词向量的长度, word = [.....]
# 'embedding_weight': embedding_weight
}


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
    'epoch': module_path + '/model_save/epoch.pth'
}
def get_trained_net():
net = torch.load(save_path.get('model') ,map_location='cpu')
net.to(device)
net.eval()
return net


def get_epoch():
return torch.load(save_path.get('epoch'))


def save_net_epoch(net, epoch):
net.eval()
torch.save(net, save_path.get('model'))
torch.save(epoch, save_path.get('epoch'))


# 模型和输入输出都会保存在这个device中
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# embedding_weight = get_vector_weight(get_vector_model())
# Config = {
# # 'embedding_weight': get_vector_weight(model_save), 如果每次将这个config输入进去,之前的优化都失效了,只能让模型随机了
# 'kernel_size': (2, 3, 4),
# 'output_channels': 300,
# 'class_num': 12,
# 'linear_one': 250,
# 'linear_two': 120,
# 'dropout': 0.5,
# 'embedding_weight': embedding_weight
# }


Config = {
'kernel_size': (3, 4, 5), # 卷积核的不同尺寸
'output_channels': 200, # 每种尺寸的卷积核有多少个
'class_num': 12, # 分类数量,见data_loader.categories
'linear_one': 250, # 第一个全连接层的输出节点数
# 'linear_two': 120,
'dropout': 0.5, # 随机丢失节点占比
'vocab_size': 283302, # 词库大小,即词的数量, len(model.wv.index_to_key))是word2vec模型中的词库大小
'vector_size': 100 # 每个词的词向量的长度, word = [.....]
# 'embedding_weight': embedding_weight
}



# in_channels 就是词向量的维度, out_channels则是卷积核(每个kernel_size都一样多)的数量
# 对于一种尺寸的卷积核 kernel_size = 3,in_channels=100,out_channels=50时,
# 设 x = [32][100]即一个新闻数据, 进行卷积操作前,先对x维度进行变换->[100][32],即每一列是一个词向量,conv1d卷积层左右扫描即可
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
        self.fc1 = nn.Linear(len(self.kernel_size) * self.output_channels, self.liner_one)
# self.fc2 = nn.Linear(self.liner_one, self.liner_two)
self.fc2 = nn.Linear(self.liner_one, self.class_num)
self.dropout = nn.Dropout(self.dropout)

# embedding_matrix就是word2vec的get_vector_weight
def init_embedding(self, embedding_matrix):
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix).to(device))

# x = torch.tensor([[word1_index, word2_index,..],[],[]])
# forward这个函数定义了前向传播的运算
def forward(self, x):
x = self.embedding(x) # 词索引经过词嵌入层转化为词向量, [word_index1,word_index2]->[[vector1][vector2]],
x = x.permute(0, 2, 1) # 将(news_num, words_num, vector_size)换为(news_num,vector_size,word_num),方便卷积层运算
# 将所有经过卷积、池化的结果拼接在一起
x = torch.cat([self.conv_and_pool(x, conv) for conv in self.convs], 1)
# 展开,[news_num][..]
x = x.view(-1, len(self.kernel_size) * self.output_channels)
x = self.dropout(x)
x = F.relu(self.fc1(x))
return self.fc2(x)

@staticmethod
def conv_and_pool(x, conv):
x = F.relu(conv(x))
x = F.max_pool1d(x, x.size(2)) # 最大池化
x = x.squeeze(2) # 只保存2维
return x
# coding:utf-8

module_path = os.path.dirname(__file__)

# 模型保存路径
save_path = {
'model': module_path + '/model_save/model.pth',
'epoch': module_path + '/model_save/epoch.pth'
}
def get_trained_net():
net = torch.load(save_path.get('model') ,map_location='cpu')
net.to(device)
net.eval()
return net



项目链接:
https://javayms.github.io?id=581125381805201py
https://javayms.pages.dev?id=581125381805201py