从‘空训练集’报错深入理解sklearn的train_test_split:参数、源码与最佳实践
2026/6/15 15:00:05
作为陕西某软件公司项目负责人,针对公司产品部门提出的大文件传输需求,我经过深入调研和技术评估,提出以下专业解决方案。
[客户端(Vue2)] ——HTTP/WebSocket——> [Nginx反向代理] | [应用服务器(JSP)] ——> [文件分片服务] ——> [阿里云OSS] | [MySQL] <—— [传输状态管理服务] <—— [断点续传控制]// FileUploader.vueexportdefault{data(){return{fileList:[],folderList:[],uploadStatus:{}}},methods:{// 处理文件夹选择handleFolderSelect(e){constitems=e.dataTransfer?.items||e.target.files;this.scanFolder(items).then(tree=>{this.folderList=tree;});},// 递归扫描文件夹asyncscanFolder(items,path=''){consttree=[];for(leti=0;i<items.length;i++){constitem=items[i];if(item.isFile){constfile=awaitnewPromise(resolve=>item.getAsFile(resolve));file.relativePath=path+file.name;tree.push(file);}elseif(item.isDirectory){constdirReader=item.createReader();constentries=awaitnewPromise(resolve=>dirReader.readEntries(resolve));constchildren=awaitthis.scanFolder(entries,path+item.name+'/');tree.push(...children);}}returntree;},// 分片上传asyncuploadFile(file,chunkSize=4*1024*1024){constfileId=this.generateFileId(file);consttotalChunks=Math.ceil(file.size/chunkSize);// 检查已上传分片const{uploadedChunks}=awaitthis.$http.get(`/api/upload/status?fileId=${fileId}`);for(leti=0;i<totalChunks;i++){if(uploadedChunks.includes(i))continue;constchunk=file.slice(i*chunkSize,(i+1)*chunkSize);constformData=newFormData();formData.append('file',chunk);formData.append('chunkIndex',i);formData.append('totalChunks',totalChunks);formData.append('fileId',fileId);formData.append('fileName',file.name);awaitthis.$http.post('/api/upload/chunk',formData,{onUploadProgress:(e)=>{this.updateProgress(fileId,i,e.loaded/e.total);}});// 保存进度到本地存储this.saveProgress(fileId,i);}// 通知服务器合并文件awaitthis.$http.post('/api/upload/merge',{fileId,fileName:file.name,totalChunks});},// IE8兼容方案initIE8Upload(){if(!window.FileReader){// 使用传统表单上传+ActiveX控件this.$refs.uploadForm.style.display='block';}}}}// FileUploadServlet.java@WebServlet("/api/upload/chunk")publicclassFileUploadServletextendsHttpServlet{privatestaticfinalintCHUNK_SIZE=4*1024*1024;// 4MBprotectedvoiddoPost(HttpServletRequestreq,HttpServletResponseresp){try{StringfileId=req.getParameter("fileId");intchunkIndex=Integer.parseInt(req.getParameter("chunkIndex"));PartfilePart=req.getPart("file");// 存储分片到临时目录StringtempDir=getTempDir(fileId);FilechunkFile=newFile(tempDir,"chunk_"+chunkIndex);try(InputStreamin=filePart.getInputStream();OutputStreamout=newFileOutputStream(chunkFile)){IOUtils.copy(in,out);}// 更新分片状态到数据库UploadDao.updateChunkStatus(fileId,chunkIndex,"completed");resp.getWriter().write("{\"status\":\"success\"}");}catch(Exceptione){resp.sendError(500,"Upload failed: "+e.getMessage());}}privateStringgetTempDir(StringfileId){StringtempDir=Config.get("upload.temp.dir")+"/"+fileId;newFile(tempDir).mkdirs();returntempDir;}}// UploadDao.javapublicclassUploadDao{publicstaticvoidupdateChunkStatus(StringfileId,intchunkIndex,Stringstatus){Stringsql="INSERT INTO upload_chunks (file_id, chunk_index, status) VALUES (?, ?, ?) "+"ON DUPLICATE KEY UPDATE status = VALUES(status)";try(Connectionconn=DataSource.getConnection();PreparedStatementstmt=conn.prepareStatement(sql)){stmt.setString(1,fileId);stmt.setInt(2,chunkIndex);stmt.setString(3,status);stmt.executeUpdate();}catch(SQLExceptione){thrownewRuntimeException(e);}}publicstaticListgetUploadedChunks(StringfileId){Listchunks=newArrayList<>();Stringsql="SELECT chunk_index FROM upload_chunks WHERE file_id = ? AND status = 'completed'";try(Connectionconn=DataSource.getConnection();PreparedStatementstmt=conn.prepareStatement(sql)){stmt.setString(1,fileId);ResultSetrs=stmt.executeQuery();while(rs.next()){chunks.add(rs.getInt(1));}}catch(SQLExceptione){thrownewRuntimeException(e);}returnchunks;}}// UploadStatusService.javapublicclassUploadStatusService{privatestaticfinalStringREDIS_PREFIX="upload:status:";publicUploadStatusgetUploadStatus(StringfileId){// 优先从Redis获取try(Jedisjedis=RedisPool.getResource()){Stringkey=REDIS_PREFIX+fileId;Stringjson=jedis.get(key);if(json!=null){returnJSON.parseObject(json,UploadStatus.class);}}// Redis中没有则从数据库加载UploadStatusstatus=newUploadStatus();status.setFileId(fileId);status.setUploadedChunks(UploadDao.getUploadedChunks(fileId));// 存入Redistry(Jedisjedis=RedisPool.getResource()){jedis.setex(REDIS_PREFIX+fileId,3600,JSON.toJSONString(status));}returnstatus;}publicvoidsaveUploadStatus(UploadStatusstatus){// 异步保存到数据库newThread(()->{UploadDao.saveUploadStatus(status);}).start();// 更新Redistry(Jedisjedis=RedisPool.getResource()){jedis.setex(REDIS_PREFIX+status.getFileId(),3600,JSON.toJSONString(status));}}}CREATETABLEfile_transfers(idVARCHAR(64)PRIMARYKEY,user_idINTNOTNULL,file_nameVARCHAR(255),file_pathVARCHAR(1024),file_sizeBIGINT,statusENUM('pending','uploading','completed','failed'),created_atDATETIMEDEFAULTCURRENT_TIMESTAMP,updated_atDATETIMEONUPDATECURRENT_TIMESTAMP,INDEXidx_user_status(user_id,status));CREATETABLEfolder_hierarchy(idINTAUTO_INCREMENTPRIMARYKEY,transfer_idVARCHAR(64)NOTNULL,parent_idINTDEFAULTNULL,nameVARCHAR(255)NOTNULL,relative_pathVARCHAR(1024)NOTNULL,is_directoryTINYINT(1)NOTNULL,file_sizeBIGINTDEFAULT0,FOREIGNKEY(transfer_id)REFERENCESfile_transfers(id),FOREIGNKEY(parent_id)REFERENCESfolder_hierarchy(id),INDEXidx_transfer(transfer_id));relativePath字段,重建文件夹结构// FileDownloadServlet.java@WebServlet("/api/download/folder")publicclassFileDownloadServletextendsHttpServlet{protectedvoiddoGet(HttpServletRequestreq,HttpServletResponseresp){StringfolderId=req.getParameter("folderId");try{// 获取文件夹结构Listitems=FolderService.getFolderItems(folderId);// 生成下载清单ListdownloadList=newArrayList<>();for(FileItemitem:items){if(!item.isDirectory()){DownloadItemdi=newDownloadItem();di.setPath(item.getRelativePath());di.setUrl(OSSUtil.generatePresignedUrl(item.getOssKey()));downloadList.add(di);}}// 返回JSON格式的下载清单resp.setContentType("application/json");resp.getWriter().write(JSON.toJSONString(downloadList));}catch(Exceptione){resp.sendError(500,"Download failed: "+e.getMessage());}}}// OSSUtil.javapublicclassOSSUtil{privatestaticOSSclient=newOSSClientBuilder().build(Config.get("oss.endpoint"),Config.get("oss.accessKeyId"),Config.get("oss.accessKeySecret"));publicstaticStringgeneratePresignedUrl(StringossKey){Dateexpiration=newDate(System.currentTimeMillis()+3600*1000);GeneratePresignedUrlRequestrequest=newGeneratePresignedUrlRequest(Config.get("oss.bucket"),ossKey,HttpMethod.GET);request.setExpiration(expiration);returnclient.generatePresignedUrl(request).toString();}}上传方案:
下载方案:
window.location.href直接下载单个文件进度显示:
[负载均衡] | [Nginx集群] —— [应用服务器集群] | | [Redis] [MySQL主从] | [阿里云OSS]基于贵司需求,建议考虑以下商业化方案:
技术授权模式:
服务内容:
实施周期:
如需进一步讨论技术细节或商务条款,可安排技术团队进行深度对接。
导入到Eclipse:点南查看教程
导入到IDEA:点击查看教程
springboot统一配置:点击查看教程
NOSQL示例不需要任何配置,可以直接访问测试
选择对应的数据表脚本,这里以SQL为例
up6/upload/年/月/日/guid/filename
支持离线保存文件进度,在关闭浏览器,刷新浏览器后进行不丢失,仍然能够继续上传
支持上传文件夹并保留层级结构,同样支持进度信息离线保存,刷新页面,关闭页面,重启系统不丢失上传进度。
点击下载完整示例