splitting a string which contains MIME content

Actually trying to auto-detect whether the string contains MIME or something else (which is assumed to be XML).

/**
 * Service to which checks for a WS response whether it is XML or MIME and allows extracting the XML or binary content.
 * <p/>
 * See <a href="http://en.wikipedia.org/wiki/MIME">http://en.wikipedia.org/wiki/MIME</a>.
 *
 * @author Joachim Van der Auwera
 */
public class WsResponseExtractor
{
    public final static String MIME_HEADER = "MIME-Version: 1.0";
    public final static int MIME_HEADER_LENGTH = MIME_HEADER.length();
    public final static String BOUNDARY = "boundary=";
    public final static String CONTENT_ID = "Content-ID: ";

    private String xml;
    private Map<String, String> attachments = new HashMap<String, String>();

    public WsResponseExtractor( String message )
    {
        if ( message.startsWith( MIME_HEADER ) )
        {
            splitParts( message );
        }
        else
        {
            xml = message;
        }
    }

    public String getXml()
    {
        return xml;
    }

    public String getAttachment( String name )
    {
        return attachments.get( name );
    }

    private void splitParts( String message )
    {
        // determine line separator
        int startOfSecondLine = MIME_HEADER_LENGTH;
        while ( Character.isWhitespace( message.charAt( startOfSecondLine ) ) ) startOfSecondLine++;
        String newLine = message.substring( MIME_HEADER_LENGTH, startOfSecondLine );

        // determine part boundary
        int boundaryStart = message.indexOf( BOUNDARY ) + BOUNDARY.length();
        int boundaryEnd = message.indexOf( newLine, boundaryStart );
        String boundary = message.substring( boundaryStart, boundaryEnd );
        // boundary may be enclosed in quotes
        if ( boundary.startsWith( "\"" ) && boundary.endsWith( "\"" ) )
        {
            boundary = message.substring( boundaryStart + 1, boundaryEnd - 1 );
        }
        boundary = newLine + "--" + boundary;

        // now use the boundary to split in parts
        int bl = boundary.length();
        int partStart = message.indexOf( boundary ) + bl;
        int partEnd = message.indexOf( boundary, partStart );
        //while ( partEnd > 0 && partEnd + bl + 1 >= message.length() && (message.charAt( partStart + bl ) != '-' || message.charAt( partStart + bl + 1 ) != '-' ))
        while ( partEnd > 0 )
        {
            processPart( message.substring( partStart, partEnd ), newLine );
            partStart = partEnd + bl;
            partEnd = message.indexOf( boundary, partStart );
        }
    }

    private void processPart( String message, String newLine )
    {
        // split start and headers
        int sep = message.indexOf( newLine + newLine );
        String headers = message.substring( 0, sep );
        String body = message.substring( sep + 2 * newLine.length() );

        int cidStart = headers.indexOf( CONTENT_ID );
        if ( cidStart == -1 )
        {
            xml = body;
        }
        else
        {
            String name = headers.substring( cidStart + CONTENT_ID.length() );
            int cidEnd = name.indexOf( newLine );
            if ( cidEnd > -1 ) name = name.substring( 0, cidEnd );
            attachments.put( name, body );
        }
    }
}

/**
 * Test for {@link WsResponseExtractor}.
 *
 * @author Joachim Van der Auwera
 */
public class WsResponseExtractorTest
{
    @Test
    public void testSimple() {
        String msg = "" +
                "MIME-Version: 1.0\n" +
                "Content-Type: multipart/mixed; boundary=\"frontier\"\n" +
                "\n" +
                "This is a message with multiple parts in MIME format.\n" +
                "--frontier\n" +
                "Content-Type: text/plain\n" +
                "\n" +
                "<message>Hi!</message>\n" +
                "--frontier\n" +
                "Content-Type: application/octet-stream\n" +
                "Content-Transfer-Encoding: base64\n" +
                "Content-ID: bla.txt\n" +
                "\n" +
                "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\n" +
                "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==\n" +
                "--frontier--\n" +
                "";
        WsResponseExtractor re = new WsResponseExtractor(msg);
        Assert.assertEquals( "<message>Hi!</message>", re.getXml() );
        Assert.assertNull( re.getAttachment("bla") );
        Assert.assertNotNull( re.getAttachment("bla.txt") );
        Assert.assertEquals(
                "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\n" +
                "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==",
                re.getAttachment("bla.txt") );
    }

    @Test
    public void testSimpleCrNl() {
        String msg = "" +
                "MIME-Version: 1.0\r\n" +
                "Content-Type: multipart/mixed; boundary=\"frontier\"\r\n" +
                "\r\n" +
                "This is a message with multiple parts in MIME format.\r\n" +
                "--frontier\r\n" +
                "Content-Type: text/plain\r\n" +
                "\r\n" +
                "<message>Hi!</message>\r\n" +
                "--frontier\r\n" +
                "Content-Type: application/octet-stream\r\n" +
                "Content-Transfer-Encoding: base64\r\n" +
                "Content-ID: bla.txt\r\n" +
                "\r\n" +
                "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" +
                "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==\r\n" +
                "--frontier--\r\n" +
                "";
        WsResponseExtractor re = new WsResponseExtractor(msg);
        Assert.assertEquals( "<message>Hi!</message>", re.getXml() );
        Assert.assertNull( re.getAttachment("bla") );
        Assert.assertNotNull( re.getAttachment("bla.txt") );
        Assert.assertEquals(
                "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" +
                "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==",
                re.getAttachment("bla.txt") );
    }

    @Test
    public void testSimpleNoQuotes() {
        String msg = "" +
                "MIME-Version: 1.0\n" +
                "Content-Type: multipart/mixed; boundary=frontier\n" +
                "\n" +
                "This is a message with multiple parts in MIME format.\n" +
                "--frontier\n" +
                "Content-Type: text/plain\n" +
                "\n" +
                "<message>Hi!</message>\n" +
                "--frontier\n" +
                "Content-Type: application/octet-stream\n" +
                "Content-Transfer-Encoding: base64\n" +
                "Content-ID: bla.txt\n" +
                "\n" +
                "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\n" +
                "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==\n" +
                "--frontier--" +
                "";
        WsResponseExtractor re = new WsResponseExtractor(msg);
        Assert.assertEquals( "<message>Hi!</message>", re.getXml() );
        Assert.assertNull( re.getAttachment("bla") );
        Assert.assertNotNull( re.getAttachment("bla.txt") );
        Assert.assertEquals(
                "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\n" +
                "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==",
                re.getAttachment("bla.txt") );
    }

    @Test
    public void testSimpleContentIdNotLast() {
        String msg = "" +
                "MIME-Version: 1.0\n" +
                "Content-Type: multipart/mixed; boundary=frontier\n" +
                "\n" +
                "This is a message with multiple parts in MIME format.\n" +
                "--frontier\n" +
                "Content-Type: text/plain\n" +
                "\n" +
                "<message>Hi!</message>\n" +
                "--frontier\n" +
                "Content-Type: application/octet-stream\n" +
                "Content-ID: bla.txt\n" +
                "Content-Transfer-Encoding: base64\n" +
                "\n" +
                "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\n" +
                "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==\n" +
                "--frontier--" +
                "";
        WsResponseExtractor re = new WsResponseExtractor(msg);
        Assert.assertEquals( "<message>Hi!</message>", re.getXml() );
        Assert.assertNull( re.getAttachment("bla") );
        Assert.assertNotNull( re.getAttachment("bla.txt") );
        Assert.assertEquals(
                "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\n" +
                "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==",
                re.getAttachment("bla.txt") );
    }

    @Test
    public void testSimpleFrontierStartQuote() {
        String msg = "" +
                "MIME-Version: 1.0\n" +
                "Content-Type: multipart/mixed; boundary=\"frontier\n" +
                "\n" +
                "This is a message with multiple parts in MIME format.\n" +
                "--\"frontier\n" +
                "Content-Type: text/plain\n" +
                "\n" +
                "<message>Hi!</message>\n" +
                "--\"frontier\n" +
                "Content-Type: application/octet-stream\n" +
                "Content-ID: bla.txt\n" +
                "Content-Transfer-Encoding: base64\n" +
                "\n" +
                "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\n" +
                "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==\n" +
                "--\"frontier--" +
                "";
        WsResponseExtractor re = new WsResponseExtractor(msg);
        Assert.assertEquals( "<message>Hi!</message>", re.getXml() );
        Assert.assertNull( re.getAttachment("bla") );
        Assert.assertNotNull( re.getAttachment("bla.txt") );
        Assert.assertEquals(
                "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\n" +
                "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==",
                re.getAttachment("bla.txt") );
    }

    @Test
    public void testSimpleFrontierEndQuote() {
        String msg = "" +
                "MIME-Version: 1.0\n" +
                "Content-Type: multipart/mixed; boundary=frontier\"\n" +
                "\n" +
                "This is a message with multiple parts in MIME format.\n" +
                "--frontier\"\n" +
                "Content-Type: text/plain\n" +
                "\n" +
                "<message>Hi!</message>\n" +
                "--frontier\"\n" +
                "Content-Type: application/octet-stream\n" +
                "Content-ID: bla.txt\n" +
                "Content-Transfer-Encoding: base64\n" +
                "\n" +
                "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\n" +
                "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==\n" +
                "--frontier\"--" +
                "";
        WsResponseExtractor re = new WsResponseExtractor(msg);
        Assert.assertEquals( "<message>Hi!</message>", re.getXml() );
        Assert.assertNull( re.getAttachment("bla") );
        Assert.assertNotNull( re.getAttachment("bla.txt") );
        Assert.assertEquals(
                "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\n" +
                "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==",
                re.getAttachment("bla.txt") );
    }

    @Test
    public void testNoMime() {
        String msg = "<message>Hi!</message>";
        WsResponseExtractor re = new WsResponseExtractor(msg);
        Assert.assertEquals( "<message>Hi!</message>", re.getXml() );
        Assert.assertNull( re.getAttachment("bla") );
        Assert.assertNull( re.getAttachment("bla.txt") );
    }
}

Leave a Reply

Your email address will not be published. Required fields are marked *

question razz sad evil exclaim smile redface biggrin surprised eek confused cool lol mad twisted rolleyes wink idea arrow neutral cry mrgreen

*