亲宝软件园·资讯

展开

利用openoffice+jodconverter-code-3.0-bate4实现ppt转图片

人气:0

安装openoffice4  (用于把文档(ppt)转成pdf)根据系统的位数安装
使用jodconverter-core3.0-beta-4(要上传maven本地仓库)
安装ImageMagick:yum install ImageMagick(用于pdf转图片)
安装pdftotext  用于提取文字大纲  yum install poppler-utils
perl脚本(用于提取pdf文档的文字大纲)

使用jodconverter调用OpenOffice 将office文档转换为PDF时。如果转换程序异常中止而OpenOffice并没有停止运行的话。

openoffice

1、启动tomcat时,启动openoffice服务(个人感觉有风险问题)

2、手工用命令,启动openoffice服务,在使用链接服务(推荐)

package com.document.servers.impl;
 
import java.io.File;
import java.net.ConnectException;
 
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
 
import org.artofsolving.jodconverter.OfficeDocumentConverter;
import org.artofsolving.jodconverter.office.DefaultOfficeManagerConfiguration;
import org.artofsolving.jodconverter.office.ExternalOfficeManagerConfiguration;
import org.artofsolving.jodconverter.office.OfficeConnectionProtocol;
import org.artofsolving.jodconverter.office.OfficeManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
 
 
 
//import com.artofsolving.jodconverter.DefaultDocumentFormatRegistry;
//import com.artofsolving.jodconverter.DocumentConverter;
//import com.artofsolving.jodconverter.DocumentFamily;
//import com.artofsolving.jodconverter.DocumentFormat;
//import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
//import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
//import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;
import com.document.servers.OfficeService;
 
/**
 * linux下:
 * cd /opt/openoffice4/program 
 * ./soffice "-accept=socket,host=localhost,port=8100;urp;StarOffice.ServiceManager" -nologo -headless -nofirststartwizard &
 */
@Service("officeService")
public class OfficeServiceImpl implements OfficeService {
 
  private static final Logger logger = LoggerFactory.getLogger(OfficeServiceImpl.class);
 
  private OfficeManager officeManager;
  private OfficeDocumentConverter documentConverter;
 
//  @PostConstruct
//  public void init() throws Exception {
//    // TODO Auto-generated method stub
//    officeManager = new DefaultOfficeManagerConfiguration().setOfficeHome("/opt/openoffice4").buildOfficeManager();
//
//    documentConverter = new OfficeDocumentConverter(officeManager);
//    // officeManager.stop();
//
//    logger.warn("openoffice starting....");
//    try {
//      officeManager.start();
//      logger.warn("openoffice started");
//    } catch (Exception e) {
//      logger.error("office start failed:{}", e);
//    }
//  }
//
//  @PreDestroy
//  public void destroy() throws Exception {
//    // TODO Auto-generated method stub
//    logger.info("shutdown office service....");
//    if (officeManager != null) {
//      try {
//        
//        officeManager.stop();
//        logger.info("office closed");
//      } catch (Exception e) {
//        logger.error("office close failed:{}", e);
//      }
//    }
//  }
 
//  public void convert(String inputfilename, String outputfilename) {
//    logger.info("convert...." + inputfilename + " to " + outputfilename);
//    documentConverter.convert(new File(inputfilename), new File(outputfilename));
//  }
  
  
  public void manualConvert(String inputfilename, String outputfilename) {
    logger.info("convert...." + inputfilename + " to " + outputfilename);
    // connect to an OpenOffice.org instance running on port 8100 
    ExternalOfficeManagerConfiguration externalProcessOfficeManager = new 
        ExternalOfficeManagerConfiguration(); 
    externalProcessOfficeManager.setConnectOnStart(true); 
    externalProcessOfficeManager.setPortNumber(8100); 
    officeManager = externalProcessOfficeManager.buildOfficeManager(); 
    officeManager.start(); 
    logger.info("openoffice服务已链接");
    documentConverter = new OfficeDocumentConverter(officeManager);
    documentConverter.convert(new File(inputfilename), new File(outputfilename));
  }
  
 
 
}

转换处理方法

package com.document.servers.impl;
 
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
 
import javax.servlet.http.HttpServletRequest;
 
import org.apache.pdfbox.pdmodel.PDDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
 
import com.document.defined.model.ImagePPT;
import com.document.servers.OfficeService;
import com.document.servers.PPTConvertServers;
import com.document.tool.ImageMagickUtils;
import com.document.tool.SystemConfig;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.ppt.util.Command;
 
@Service("pPTConvertServers")
public class PPTConvertServersImpl implements PPTConvertServers {
 
  private static final Logger logger = LoggerFactory.getLogger(PPTConvertServersImpl.class);
 
  @Autowired
  private OfficeService officeService;
 
  /**
   * (non-Javadoc)
   * 利用openoffice把ppt转图片
   */
  public Map<String, Object> deal_ppt(HttpServletRequest request, String filePath, String extension, String title, String filename)
      throws Exception {
 
    logger.info("ppt转pdf,{}");
 
    // ppt文件地址
    String ppt_target_file = filePath;
 
    // pdf文件地址
    String path = filePath.substring(0, filePath.lastIndexOf("."));
    String pdf_target_file = path + ".pdf";
    // 输出jpg文件地址
    String images_target_file = path + "/jpg" + "-%d.jpg";
 
    // if(exists(pdf_target_file)){
    // unlink(pdf_target_file);//删除
    // }
    // copy(new File(ppt_target_file), ppt_target_file, true);
 
    if (!extension.equals(".pdf")) {
      officeService.manualConvert(ppt_target_file, pdf_target_file);// 转成pdf文件
    }
 
    StringWriter writer = new StringWriter();
 
    // 提取文字大纲
    String[] pdf_lines = extractOutLineFromPDF(pdf_target_file);
 
    File filepath = new File(images_target_file);
    File parentFile = filepath.getParentFile();
    if (!parentFile.exists()) {
      logger.info("创建图片目录");
      parentFile.mkdirs();
    }
 
    Command.run("convert " + pdf_target_file + " " + images_target_file, writer);// 转成图片
 
    String basePath = request.getScheme() + "://" + request.getServerName() + "/";
    PDDocument document = PDDocument.load(new File(pdf_target_file));
    int pageCount = document.getNumberOfPages();
    document.close();
 
    List<ImagePPT> list = new ArrayList<ImagePPT>();
    String pathUrl = filename.substring(0, filename.lastIndexOf("."));
    if (pageCount > 0) {
      for (int i = 0; i < pageCount; i++) {
        ImagePPT imagePPT = new ImagePPT();
        imagePPT.setId(i + 1);
        if (pdf_lines.length > 0) {
          try {
            imagePPT.setTitle(pdf_lines[i]);
          } catch (Exception e) {
            // TODO Auto-generated catch block
            imagePPT.setTitle(title);
            logger.info("title,数组越界");
            //e.printStackTrace();
          }
        } else {
          imagePPT.setTitle(title);
        }
        imagePPT.setUrl(basePath + "images/" + pathUrl + "/jpg-" + i + ".jpg");
        imagePPT.setPreviewUrl(basePath + "preview/images/" + pathUrl + "/preview/pjpg-" + i + ".jpg");
 
//        String oimgDir = SystemConfig.getBlobDirectory() + pathUrl + "/jpg-" + i + ".jpg";
//        String pimgDir = SystemConfig.getBlobDirectory() + pathUrl + "/preview/pjpg-" + i + ".jpg";
//        File pfilepath = new File(pimgDir);
//        File pf = pfilepath.getParentFile();
//        if (!pf.exists()) {
//          pf.mkdirs();
//        }
        //ImageMagickUtils.scale(oimgDir, pimgDir, 240, 180);//预览图
        list.add(imagePPT);
      }
    }
    // 拼接json字符串
 
    ObjectMapper objectMapper = new ObjectMapper();
    String jsonlist = objectMapper.writeValueAsString(list);
    // logger.info(jsonlist);
    Map<String, Object> map=new HashMap<String, Object>();
    map.put("json", jsonlist.toString());
    map.put("totalPage", pageCount);
    return map;
 
  }
 
  /**
   * 文件已经上传过 ,进行替换性转换
   */
  public Map<String, Object> replace_ppt(HttpServletRequest request, String filePath, String extension, String title,
      String filename) throws Exception {
 
    logger.info("替换,ppt转pdf,{}");
 
    // ppt文件地址
    String ppt_target_file = filePath;
 
    // pdf文件地址
    String path = filePath.substring(0, filePath.lastIndexOf("."));
    String pdf_target_file = path + ".pdf";
    // 输出jpg文件地址
    String images_target_file = path + "/jpg" + "-%d.jpg";
 
    if (!extension.equals(".pdf")) {
      officeService.manualConvert(ppt_target_file, pdf_target_file);// 转成pdf文件
    }
 
    StringWriter writer = new StringWriter();
 
    // 提取文字大纲
    String[] pdf_lines = extractOutLineFromPDF(pdf_target_file);
 
    File filepath = new File(images_target_file);
    File parentFile = filepath.getParentFile();
    if (!parentFile.exists()) {
      logger.info("替换创建图片目录");
      parentFile.mkdirs();
    }
    Command.run("convert " + pdf_target_file + " " + images_target_file, writer);// 转成图片
 
    String basePath = request.getScheme() + "://" + request.getServerName() + "/";
    PDDocument document = PDDocument.load(new File(pdf_target_file));
    int pageCount = document.getNumberOfPages();
    document.close();
 
    List<ImagePPT> list = new ArrayList<ImagePPT>();
    String pathUrl = filename.substring(0, filename.lastIndexOf("."));
    if (pageCount > 0) {
      for (int i = 0; i < pageCount; i++) {
        ImagePPT imagePPT = new ImagePPT();
        imagePPT.setId(i + 1);
        if (pdf_lines.length > 0) {
          try {
            imagePPT.setTitle(pdf_lines[i]);
          } catch (Exception e) {
            // TODO Auto-generated catch block
            imagePPT.setTitle(title);
            logger.info("title,数组越界");
            // e.printStackTrace();
          }
        } else {
          imagePPT.setTitle(title);
        }
        imagePPT.setUrl(basePath + "images/" + pathUrl + "/jpg-" + i + ".jpg");
        imagePPT.setPreviewUrl(basePath + "preview/images/" + pathUrl + "/preview/pjpg-" + i + ".jpg");
 
//        String oimgDir = SystemConfig.getBlobDirectory() + pathUrl + "/jpg-" + i + ".jpg";
//        String pimgDir = SystemConfig.getBlobDirectory() + pathUrl + "/preview/pjpg-" + i + ".jpg";
//        File pfilepath = new File(pimgDir);
//        File pf = pfilepath.getParentFile();
//        if (!pf.exists()) {
//          pf.mkdirs();
//        }
//        ImageMagickUtils.scale(oimgDir, pimgDir, 240, 180);
        list.add(imagePPT);
      }
    }
    // 拼接json字符串
 
    ObjectMapper objectMapper = new ObjectMapper();
    String jsonlist = objectMapper.writeValueAsString(list);
    // logger.info(jsonlist);
    Map<String, Object> map=new HashMap<String, Object>();
    map.put("json", jsonlist.toString());
    map.put("totalPage", pageCount);
    return map;
 
  }
 
  /**
   * 提取pdf文字大纲
   * @param pdf_file
   * @return
   * @throws UnsupportedEncodingException
   * @throws Exception
   */
  public static String[] extractOutLineFromPDF(String pdf_file) throws UnsupportedEncodingException {
    String svndir = PPTConvertServersImpl.class.getResource("").getPath();
    svndir = svndir.split("WEB-INF")[0];
    svndir = svndir.replaceFirst("file:", "");
    logger.info(svndir);
 
    String command = "/usr/bin/perl " + svndir + "WEB-INF/sh/pdf_outline.pl " + pdf_file;
    logger.info(command);
 
    ByteArrayOutputStream writer = new ByteArrayOutputStream();
 
    Command.run2(command, writer);
    String outline = writer.toString("utf-8");
    logger.info("title pdf,{}", outline);
    String[] items = outline.split("///");
 
    return items;
  }
 
  /**
   * 文件是否存在
   * 
   * @param filename
   * @return @throws IOException
   */
  public static boolean exists(String filename) {
    try {
      File file = new File(filename);
      return file.exists();
    } catch (Exception e) {
      return false;
    }
  }
 
  /**
   * 删除文件
   * 
   * @param filename
   * @return
   */
  public static boolean unlink(String filename) {
    try {
      File file = new File(filename);
      if (file.isFile()) {
        file.delete();
        return true;
      }
      return false;
    } catch (Exception e) {
      return false;
    }
  }
 
  /**
   * 拷贝文件
   * 
   * @param file
   * @param newname
   * @param overwrite
   * @return
   */
  public static boolean copy(File file, String newname, boolean overwrite) {
    try {
      if (!overwrite && new File(newname).exists()) {
        return false;
      }
      FileInputStream input = new FileInputStream(file);
      File dest = new File(newname);
      if (!mkdir(dest.getParent())) {
        return false;
      }
      FileOutputStream output = new FileOutputStream(newname);
      byte[] b = new byte[1024 * 5];
      int len;
      while ((len = input.read(b)) != -1) {
        output.write(b, 0, len);
      }
      output.flush();
      output.close();
      input.close();
      return true;
    } catch (Exception e) {
      e.printStackTrace();
      return false;
    }
  }
 
  /**
   * 创建目录
   * 
   * @param dirname
   * @return
   */
  public static boolean mkdir(String dir) {
    try {
      File file = new File(dir);
      if (!file.exists()) {
        file.mkdirs();
      }
      return true;
    } catch (Exception e) {
      e.printStackTrace();
      return false;
    }
  }
}

上传ppt文件处理类:

package com.document.handle.controller;
 
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.Date;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
 
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
 
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.ModelAndView;
 
import com.document.servers.PPTConvertServers;
import com.document.tool.FilenameUtils;
import com.document.tool.SystemConfig;
import com.fasterxml.jackson.annotation.PropertyAccessor;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;
 
@Controller
public class PptToImageController {
 
  private static final Logger logger = LoggerFactory.getLogger(PptToImageController.class);
 
  private static final String TYPE_BLOB = "BLOB";
 
  private static final String CALLBACK = "callback"; // 回调函数的参数名
 
  @Autowired
  private PPTConvertServers pPTConvertServers;
 
  @RequestMapping(value = "/convert/upload")
  public ModelAndView updateFile(HttpServletRequest request, HttpServletResponse response) {
    ModelAndView mav = new ModelAndView();
    mav.addObject("name", "Hello Word");
    mav.setViewName("/ppt/uploadFile");
    logger.info("/convert/upload");
    return mav;
  }
 
  /**
   * 显示上传文件的页面表单。
   */
  @SuppressWarnings("unchecked")
  private ModelAndView showUploadForm(HttpServletRequest request, String type) {
    // 所有请求参数
    Map<String, String> params = new HashMap<String, String>();
    Enumeration<String> paramNames = request.getParameterNames();
    while (paramNames.hasMoreElements()) {
      String name = paramNames.nextElement();
      String value = request.getParameter(name);
      if (null != value) {
        params.put(name, value);
      }
    }
 
    ModelAndView mav = new ModelAndView();
    mav.setViewName("/upload/" + type.toLowerCase());
    mav.addObject("parameters", params);
    return mav;
  }
 
  /**
   * 保存用户上传的文件。
   * @throws UnsupportedEncodingException
   */
  private Map<String, Object> saveUploadedFile(HttpServletRequest request, MultipartFile file, String type) {
 
    // 文件内容MD5串,避免文件重复上传
    String md5 = null;
    try {
      md5 = DigestUtils.md5Hex(file.getBytes());
      logger.info("文件内容MD5串,{}", md5);
    } catch (IOException e1) {
      // TODO Auto-generated catch block
      e1.printStackTrace();
    }
 
    String originalFilename = file.getOriginalFilename();
 
    String extension = FilenameUtils.getExtension(originalFilename); // 文件扩展名
 
    String filename = null;
    if (md5 != null) {
      filename = FilenameUtils.generateFileNameMd5(extension, md5);
    } else {
      filename = FilenameUtils.generateFileName(extension);
    }
    
 
    
    String filenameUrl = null; // 文件访问的URL
    String absoluteFilename = null; // 文件存储的绝对路径
 
    filenameUrl = SystemConfig.getBlobUrl() + filename;
    absoluteFilename = SystemConfig.getBlobDirectory() + filename;
 
    // 检查是否需要创建目录
    File filepath = new File(absoluteFilename);
    File parentFile = filepath.getParentFile();
    if (!parentFile.exists()) {
      parentFile.mkdirs();
    }
    
    Map<String, Object> params = new HashMap<String, Object>();
    // 所有请求参数
    Enumeration<String> paramNames = request.getParameterNames();
    while (paramNames.hasMoreElements()) {
      String name = paramNames.nextElement();
      String value = request.getParameter(name);
      if (null != value) {
        params.put(name, value);
      }
    }
    
    String pdftitle = originalFilename.substring(0, originalFilename.lastIndexOf("."));
    params.put("title", pdftitle);
    Map<String, Object> officeMap=new HashMap<String, Object>();
    if (filepath.exists()) {
      // 文件已上传过,文件进行替换
      try {
        officeMap = pPTConvertServers.replace_ppt(request, absoluteFilename, extension, pdftitle, filename);
        params.put("totalPage", officeMap.get("totalPage"));
        params.put("data", officeMap.get("json"));
        params.put("status", "success");
      } catch (Exception e) {
        // TODO Auto-generated catch block
        logger.info("把ppt文件转pdf失败,{}", e);
        params.put("status", "fail");
        params.put("data", "把ppt文件转pdf失败");
        params.put("totalPage", 0);
        e.printStackTrace();
      }
      
      return params;
    }
    
    
    // 保存文件
    BufferedOutputStream bos = null;
    try {
      byte[] fileBytes = file.getBytes();
      bos = new BufferedOutputStream(new FileOutputStream(filepath));
      bos.write(fileBytes);
    } catch (IOException e) {
      logger.error("保存'" + originalFilename + "'时发生异常,Cause: ", e);
    } finally {
      if (null != bos) {
        try {
          bos.close();
        } catch (IOException e) {
        }
      }
    }
 
    // params.put("url", filenameUrl);
    // params.put("originalFilename", originalFilename);
    // params.put("filesize", file.getSize());
 
    // 把ppt文件转pdf,pdf转图片
    try {
      officeMap = pPTConvertServers.deal_ppt(request, absoluteFilename, extension, pdftitle, filename);
      params.put("totalPage", officeMap.get("totalPage"));
      params.put("data", officeMap.get("json"));
      params.put("status", "success");
    } catch (Exception e) {
      // TODO Auto-generated catch block
      logger.info("把ppt文件转pdf失败,{}", e);
      params.put("status", "fail");
      params.put("data", "把ppt文件转pdf失败");
      params.put("totalPage", 0);
      e.printStackTrace();
    }
 
    return params;
  }
 
  /**
   * 处理文件上传。
   * @throws IOException
   * 
   */
  @RequestMapping(value = "/convert/upload", method = RequestMethod.POST,produces = "text/html;charset=UTF-8")
  public @ResponseBody String uploadFilePost(HttpServletRequest request,
      @RequestParam("file") MultipartFile file) throws IOException {
 
    String callback = request.getParameter(CALLBACK); // 回调函数的函数名
    String json = "请上传文件";
    Map<String, Object> params = new HashMap<String, Object>();
 
    ObjectMapper mapper = new ObjectMapper();
    mapper.setVisibility(PropertyAccessor.FIELD, Visibility.ANY);
    ObjectWriter writer = mapper.writerWithType(Map.class);
 
    if (!file.isEmpty()) {
      params = saveUploadedFile(request, file, TYPE_BLOB);
      if (params == null) {
        params = new HashMap<String, Object>();
        json = "文件已上传过";
        params.put("status", "fail");
        params.put("data", json);
        json = writer.writeValueAsString(params);
        return json.toString();
      }
 
      try {
        json = writer.writeValueAsString(params);
        // json = (String) params.get("data");
      } catch (Exception e) {
        logger.error("转换Blob上传参数为JSON时发生异常,Cause: ", e);
      }
 
      if (StringUtils.isBlank(callback)) {
        return json.toString();
      } else {
        return callback + "(" + json.toString() + ");";
      }
 
    }
 
    // 还没上传文件的
    params.put("status", "fail");
    params.put("data", json);
    json = writer.writeValueAsString(params);
    return json.toString();
  }
 
}

预览图代理输出-----处理类:

package com.document.handle.controller;
 
import java.io.File;
import java.io.IOException;
 
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
 
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
 
import com.document.tool.ImageMagickUtils;
import com.document.tool.SystemConfig;
 
 
 
@Controller
public class ImageAgentController {
 
  private static final Logger LOG = LoggerFactory.getLogger(ImageAgentController.class);
  
  /**
   * ppt预览图片代理输出
   * @throws IOException 
   */
  @RequestMapping("/preview/images/{year}/{month}/{md5id}/{preview}/{filename}.{ext}")
  public void cropImage(@PathVariable String year, @PathVariable String month, @PathVariable String md5id,@PathVariable String preview, @PathVariable String filename, @PathVariable String ext, HttpServletRequest request, HttpServletResponse response) throws IOException {
    //String rootDir = "/data05/ovp/images/";
    String rootDir = SystemConfig.getBlobDirectory();
    String oname = filename.substring(1,filename.length());//原图文件名
    String dirString = rootDir + year+"/" +month + "/" + md5id + "/"+oname+"." + ext;
    String targetFileString = rootDir + year+"/" +month + "/" + md5id + "/preview/" + filename + "." + ext;
    
    LOG.info("corpImage..." + dirString + " -> " +targetFileString );
    File newfile = new File(targetFileString);
    String pathString = newfile.getParent();
    LOG.info("pathString...{} {}" , pathString);
    File pathFile = new File(pathString);
    if(!pathFile.exists()){
      LOG.info("---create file---");
      pathFile.mkdirs();
    }
    boolean status = ImageMagickUtils.scale(dirString, targetFileString, 240, 180);
    if(status){
      response.reset(); 
      response.setContentType("image/" + ext);
      
      java.io.InputStream in = new java.io.FileInputStream(targetFileString);
      //FilenameUrlUtils.getImageFilename(targetFileString);
      
      if ( in != null )
      {
        byte[] b = new byte[1024]; 
        int len;
        while( (len = in.read(b)) != -1 )
        {
          response.getOutputStream().write(b);   
        }
      
        in.close(); 
      }
    }
  }
  
 
}

提取文字大纲的perl脚本:

use strict;
use warnings;
use utf8;
use open ':encoding(utf8)';
binmode(STDOUT, ":utf8");
sub trim($)
{
    my $string = shift;
    $string =~ s/^\s+//;
    $string =~ s/\s+$//;
    return $string;
}
if(!$ARGV[0]){
 die;
}
open my $fh, "pdftotext -layout -enc UTF-8 $ARGV[0] - |" or die $!;
my $firstline=<$fh>;
print trim($firstline);
my $pageNum = 1;
 
while ( my $line = <$fh> ) {
  if ( $line =~ /\xC/ ) {
    my $count = ($line =~ tr/\xC//);
    for(my $i=0;$i<$count-1;$i++){
        print "///".$pageNum;
        $pageNum++;
    }
    if(trim($line)){
        print "///".trim($line);
    }
    $pageNum++;
  }
}
close $fh;

可能遇到的问题:

1、ppt转pdf时,遇到启动失败(不清楚是不是再次启动引起的)

2、转换后的pdf 表格里的中文会出现乱码

3、有时会出现关闭服务器的所用服务(尚不清楚什么原因引起的)

4、处理请求时,经常出现超时504

您可能感兴趣的文章:

加载全部内容

相关教程
猜你喜欢
用户评论