java : pdfbox 读取 PDF文件内书签

时间:2025-02-18 13:00:36

从 / 下载 PDFbox-app-2.0. 包
pdfbox-app-2.0. 

pdfbox-2.0.

\pdfbox-2.0.9-src\examples\src\main\java\org\apache\pdfbox\examples\pdmodel\

package test;
import .*;

import ;
import ;
import ;
import ;
import ;

import ;
import ;
import ;
import ;
import ;

public class PrintBookmarks
{

    /**
     * This will print the documents 书签 to .
     *
     * @param bookmark The 书签 to print out.
     * @param indentation A pretty printing parameter
     *
     * @throws IOException If there is an error getting the page count.
     */
    public void printBookmark( PDOutlineNode bookmark, String indentation ) throws IOException
    {
        
        PDOutlineItem current = ();
        while( current != null )
        {
        	int pages =0;
	        if (() instanceof PDPageDestination)
			{
			    PDPageDestination pd = (PDPageDestination) ();
			    pages = (() +1);
			}
			if (() instanceof PDActionGoTo)
			{
			    PDActionGoTo gta = (PDActionGoTo) ();
			    if (() instanceof PDPageDestination)
			    {
			        PDPageDestination pd = (PDPageDestination) ();
			        pages = (() +1);
			    }
			}
			if (pages ==0)
				( indentation + ());
			else
				( indentation + () +"  "+ pages);
            printBookmark( current, indentation + "    " );  // 递归调用
            current = ();
        }

    }
    
    // 
    public static void main( String[] args ) throws Exception
    {
        if(  != 1 )
        {
            ( " usage: java PrintBookmarks  " );
            return;
        }

		File file1 = new File(args[0]);
		if (!()){
			(" file is not exists ");
			return; 
		}
		//  开始读取 PDF文档
            PDDocument document = null;
            FileInputStream fis = null;
            try
            {
                fis = new FileInputStream(file1);
                PDFParser parser = new PDFParser(new RandomAccessBuffer(fis));
                ();
                document = ();

                PrintBookmarks the = new PrintBookmarks();
                PDDocumentOutline outline =  ().getDocumentOutline();
                if( outline != null )
                {
                    ( outline, "" );
                }
                else
                {
                    ( "This document does not contain any bookmarks" );
                }
            }
            finally
            {
                if( fis != null ) ();
                if( document != null ) ();
            }
        
    }
}

编译

set JAR=pdfbox-app-2.0.
javac -cp %JAR% -d .

运行

set JAR=pdfbox-app-2.0.
java -Xms128m -Xmx512m -cp %JAR%;. %1

例如: cmd

 Hadoop权威指南_第四版_中文版.pdf >

运行 cmd
java -jar pdfbox-app-2.0.
PDFBox version: "2.0.9"
Usage: java -jar <command> <args..>

Possible commands are:
  ConvertColorspace
  Decrypt
  Encrypt
  ExtractText
  ExtractImages
  OverlayPDF
  PrintPDF
  PDFDebugger
  PDFMerger
  PDFReader
  PDFSplit
  PDFToImage
  TextToPDF
  WriteDecodedDoc