博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
POI读取Word与Excel
阅读量:6984 次
发布时间:2019-06-27

本文共 11345 字,大约阅读时间需要 37 分钟。

import java.io.BufferedWriter;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.File;import java.io.OutputStreamWriter;import java.util.HashSet;import org.apache.poi.hslf.HSLFSlideShow;import org.apache.poi.hslf.model.Slide;import org.apache.poi.hslf.model.TextRun;import org.apache.poi.hslf.usermodel.RichTextRun;import org.apache.poi.hslf.usermodel.SlideShow;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.extractor.WordExtractor;import org.apache.poi.hwpf.usermodel.CharacterRun;import org.apache.poi.hwpf.usermodel.Paragraph;import org.apache.poi.hwpf.usermodel.Range;import org.apache.poi.hwpf.usermodel.Section;import org.apache.poi.xslf.usermodel.XMLSlideShow;import org.apache.poi.xslf.usermodel.XSLFShape;import org.apache.poi.xslf.usermodel.XSLFSlide;import org.apache.poi.xslf.usermodel.XSLFTable;import org.apache.poi.xslf.usermodel.XSLFTableCell;import org.apache.poi.xslf.usermodel.XSLFTableRow;import org.apache.poi.xslf.usermodel.XSLFTextParagraph;import org.apache.poi.xslf.usermodel.XSLFTextRun;import org.apache.poi.xslf.usermodel.XSLFTextShape;public class read {    private static InputStream is =null;    public static void readFiles(String path){        File f = new File(path);        File[] files = f.listFiles();        for(File everyfile : files){            StringBuilder sb = new StringBuilder();            if(everyfile.isDirectory())    continue;            String Filename = everyfile.getName();            if(Filename.startsWith("~")) continue;            if(!Filename.endsWith("doc")) continue;            System.out.println(Filename);            sb.append("###@@@").append(Filename.substring(0,Filename.lastIndexOf("."))).append("\n");            sb.append("----------------------").append("\n");            try {                //输入文件流                is = new FileInputStream(everyfile);                if(Filename.toLowerCase().endsWith("ppt")||Filename.toLowerCase().endsWith("pptm")){                    try {                        XMLSlideShow pptx = new XMLSlideShow(is);                        is.close();                        for(int x= 0 ; x< pptx.getSlides().length ; x++){                            XSLFSlide slide = pptx.getSlides()[x];                            if (slide.getShapes().length == 0) continue;                            String title = getTitle(slide);                            if(title != null) sb.append(title).append("\t").append("title##@@").append("\n");                            for(XSLFShape shape : slide){                                if(shape instanceof XSLFTextShape){                                    XSLFTextShape content = (XSLFTextShape)shape;                                    for( XSLFTextParagraph ttp: content.getTextParagraphs()){                                                                                    if(ttp.getText().equals(title)) continue;                                        //用一个set统计到底有多少字体大小,如果只有一种字体大小,则直接添加paragraph                                        HashSet
sizeset = new HashSet
(); for(XSLFTextRun tr : ttp.getTextRuns()){ if (tr.getText().trim().equals("")) continue; if(tr.getText().trim().equals(title)) continue; float size = (float) tr.getFontSize(); sizeset.add(size); } if(sizeset.size()!=1){ for(XSLFTextRun tr : ttp.getTextRuns()){ if (tr.getText().trim().equals("")) continue; if(tr.getText().trim().equals(title)) continue; String text = tr.getText(); float size = (float) tr.getFontSize(); sb.append(text.trim()).append("\t").append(size).append("##@@").append("\n"); } }else{ sb.append(ttp.getText().trim().replaceAll("[\\n\\r]", " ")).append("\t").append((float)sizeset.toArray()[0]).append("##@@").append("\n"); } } }else if(shape instanceof XSLFTable){ XSLFTable txShape = (XSLFTable)shape; for(XSLFTableRow row : txShape.getRows()){ for(XSLFTableCell cell: row.getCells()){ XSLFTextShape content = (XSLFTextShape)cell; for( XSLFTextParagraph ttp: content.getTextParagraphs()){ if(ttp.getText().equals(title)) continue; //用一个set统计到底有多少字体大小,如果只有一种字体大小,则直接添加paragraph HashSet
sizeset = new HashSet
(); for(XSLFTextRun tr : ttp.getTextRuns()){ if (tr.getText().trim().equals("")) continue; if(tr.getText().trim().equals(title)) continue; float size = (float) tr.getFontSize(); sizeset.add(size); } if(sizeset.size()!=1){ for(XSLFTextRun tr : ttp.getTextRuns()){ if (tr.getText().trim().equals("")) continue; if(tr.getText().trim().equals(title)) continue; String text = tr.getText(); float size = (float) tr.getFontSize(); sb.append(text.trim()).append("\t").append(size).append("##@@").append("\n"); } }else{ sb.append(ttp.getText().trim().replaceAll("[\\n\\r]", " ")).append("\t").append((float)sizeset.toArray()[0]).append("##@@").append("\n"); } } } } } } if(x!=pptx.getSlides().length-1) sb.append("----------------------").append("\n"); } } catch (IOException e) { e.printStackTrace(); } }else if(Filename.endsWith("ppt")){ try { SlideShow ss = new SlideShow(new HSLFSlideShow(is)); is.close(); for(int x = 0 ; x < ss.getSlides().length ; x ++){ Slide slide = ss.getSlides()[x]; if (slide.getShapes().length ==0) continue; String title = getTitle(slide); if(title != null) sb.append(title).append("\t").append("title##@@").append("\n"); for(TextRun tr : slide.getTextRuns()){ HashSet
sizeset = new HashSet
(); for(RichTextRun rtr : tr.getRichTextRuns()){ if (rtr.getText().trim().equals("")|| rtr.getText() ==null) continue; if(rtr.getText().trim().equals(title)) continue; sizeset.add((float)rtr.getFontSize()); } if(sizeset.size()!=1){ for(RichTextRun rtr : tr.getRichTextRuns()){ if (rtr.getText().trim().equals("") || rtr.getText() ==null) continue; if(rtr.getText().trim().equals(title)) continue; String text = rtr.getText(); float size = (float) rtr.getFontSize(); sb.append(text.trim()).append("\t").append(size).append("##@@").append("\n"); } }else { for(RichTextRun rtr : tr.getRichTextRuns()){ if (rtr.getText().trim().equals("")|| rtr.getText() ==null) continue; if(rtr.getText().trim().equals(title)) continue; sb.append(rtr.getText().trim()).append(" "); } sb.append("\t").append((float)sizeset.toArray()[0]).append("##@@").append("\n"); } } if(x!=ss.getSlides().length-1) sb.append("----------------------").append("\n"); } } catch (IOException e) { e.printStackTrace(); } }else if(Filename.endsWith("doc")){ try { HWPFDocument hwpf = new HWPFDocument(is); Range range = hwpf.getRange(); for (int x = 0; x < range.numSections(); x++) { Section s = range.getSection(x); for (int y = 0; y < s.numParagraphs(); y++) { Paragraph p = s.getParagraph(y); for (int z = 0; z < p.numCharacterRuns(); z++) { CharacterRun run = p.getCharacterRun(z); //字符串文本 String text = run.text().trim(); if(text ==null ||text == " "|| text=="") continue; sb.append(text.trim()).append("\t").append(run.getFontSize()).append("##@@").append("\n"); } } if (x != range.numSections()-1) sb.append("----------------------").append("\n"); } } catch (IOException e) { e.printStackTrace(); } } } catch (FileNotFoundException e) { e.printStackTrace(); } write(sb.toString()); } } public static String getTitle(XSLFSlide slide){ String title = null; if (slide.getTitle() != null && !slide.getTitle().trim().equals("")){ title= slide.getTitle().trim(); } return title; } public static String getTitle(Slide slide){ String title = null; if (slide.getTitle() != null && !slide.getTitle().trim().equals("")){ title= slide.getTitle().trim(); } return title; } static FileOutputStream fos =null; static OutputStreamWriter osw =null; static BufferedWriter bw =null; public static void write(String content){ File f = new File("ressss.csv"); try { fos = new FileOutputStream(f,true); osw = new OutputStreamWriter(fos,"utf-8"); bw = new BufferedWriter(osw); bw.write(content); bw.flush(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }finally{ if(bw !=null){ try { bw.close(); } catch (IOException e) { e.printStackTrace(); } bw =null; } if(osw !=null){ try { osw.close(); } catch (IOException e) { e.printStackTrace(); } osw =null; } if(fos !=null){ try { fos.close(); } catch (IOException e) { e.printStackTrace(); } fos =null; } } } public static void main(String[] args) throws Exception { readFiles("C:\\Users\\ooon\\Desktop\\DKM_data\\DKM_data"); }}

 

转载于:https://www.cnblogs.com/ooon/p/4828007.html

你可能感兴趣的文章
JCheckBox使用示例
查看>>
LaTeX使用listings宏包插入代码时,将代码字体设为 Monaco
查看>>
设计模式之迭代子模式
查看>>
代码评审的不可能三角
查看>>
揭秘ThreadLocal
查看>>
七年蜕变 感恩献礼
查看>>
共享经济、短视频、新零售、AI:寻觅2019年新经济未来走向
查看>>
zabbix配置邮箱报警
查看>>
使用ulimit设置文件最大打开数
查看>>
[Step By Step]SAP HANA PAL指数回归预测分析Exponential Regression编程实例EXPREGRESSION(模型)...
查看>>
VMware Data Recovery备份恢复vmware虚拟机
查看>>
solr多core的处理
查看>>
解决DeferredResult 使用 @ResponseBody 注解返回中文乱码
查看>>
C# WinForm开发系列 - TextBox
查看>>
28岁少帅统领旷视南京研究院,LAMDA魏秀参专访
查看>>
java文件传输
查看>>
Xen虚拟机迁移技术
查看>>
安装Sql Server 2005出现“性能监视器计数器要求”错误解决方法。
查看>>
[.NET领域驱动设计实战系列]专题八:DDD案例:网上书店分布式消息队列和分布式缓存的实现...
查看>>
Icomparer和Icomparable集合排序
查看>>