Advertisement
Guest User

Untitled

a guest
Apr 29th, 2015
266
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 15.27 KB | None | 0 0
  1. /* $Id: getoptargv.cpp $ */
  2. /** @file
  3.  * IPRT - Command Line Parsing, Argument Vector.
  4.  */
  5.  
  6. /*
  7.  * Copyright (C) 2010-2012 Oracle Corporation
  8.  *
  9.  * This file is part of VirtualBox Open Source Edition (OSE), as
  10.  * available from http://www.virtualbox.org. This file is free software;
  11.  * you can redistribute it and/or modify it under the terms of the GNU
  12.  * General Public License (GPL) as published by the Free Software
  13.  * Foundation, in version 2 as it comes in the "COPYING" file of the
  14.  * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
  15.  * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
  16.  *
  17.  * The contents of this file may alternatively be used under the terms
  18.  * of the Common Development and Distribution License Version 1.0
  19.  * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
  20.  * VirtualBox OSE distribution, in which case the provisions of the
  21.  * CDDL are applicable instead of those of the GPL.
  22.  *
  23.  * You may elect to license modified versions of this file under the
  24.  * terms and conditions of either the GPL or the CDDL or both.
  25.  */
  26.  
  27. /*******************************************************************************
  28. *   Header Files                                                               *
  29. *******************************************************************************/
  30. #include <iprt/getopt.h>
  31. #include "internal/iprt.h"
  32.  
  33. #include <iprt/asm.h>
  34. #include <iprt/assert.h>
  35. #include <iprt/err.h>
  36. #include <iprt/mem.h>
  37. #include <iprt/string.h>
  38.  
  39.  
  40. /*******************************************************************************
  41. *   Header Files                                                               *
  42. *******************************************************************************/
  43. /**
  44.  * Array indexed by the quoting type and 7-bit ASCII character.
  45.  *
  46.  * We include some extra stuff here that the corresponding shell would normally
  47.  * require quoting of.
  48.  */
  49. static uint8_t const g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_MASK + 1][128/8] =
  50. {
  51.     { 0xfe, 0xff, 0x0f, 0x00, 0x65, 0x00, 0x00, 0x50 },
  52.     { 0xfe, 0xff, 0x0f, 0x00, 0xd7, 0x07, 0x00, 0xd8 },
  53. };
  54.  
  55.  
  56. #if 0   /* To re-generate the bitmaps. */
  57. #include <stdio.h>
  58. int main()
  59. {
  60.     RT_ZERO(g_abmQuoteChars);
  61.  
  62. # define SET_ALL(ch) \
  63.         do { \
  64.             for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++) \
  65.                 ASMBitSet(&g_abmQuoteChars[iType], (ch)); \
  66.         } while (0)
  67. # define SET(ConstSuffix, ch) \
  68.         ASMBitSet(&g_abmQuoteChars[RTGETOPTARGV_CNV_QUOTE_##ConstSuffix], (ch));
  69.  
  70.     /* just flag all the control chars as in need of quoting. */
  71.     for (char ch = 1; ch < 0x20; ch++)
  72.         SET_ALL(ch);
  73.  
  74.     /* ... and space of course */
  75.     SET_ALL(' ');
  76.  
  77.     /* MS CRT / CMD.EXE: */
  78.     SET(MS_CRT, '"')
  79.     SET(MS_CRT, '&')
  80.     SET(MS_CRT, '>')
  81.     SET(MS_CRT, '<')
  82.     SET(MS_CRT, '|')
  83.     SET(MS_CRT, '%')
  84.  
  85.     /* Bourne shell: */
  86.     SET(BOURNE_SH, '!');
  87.     SET(BOURNE_SH, '"');
  88.     SET(BOURNE_SH, '$');
  89.     SET(BOURNE_SH, '&');
  90.     SET(BOURNE_SH, '(');
  91.     SET(BOURNE_SH, ')');
  92.     SET(BOURNE_SH, '*');
  93.     SET(BOURNE_SH, ';');
  94.     SET(BOURNE_SH, '<');
  95.     SET(BOURNE_SH, '>');
  96.     SET(BOURNE_SH, '?');
  97.     SET(BOURNE_SH, '[');
  98.     SET(BOURNE_SH, '\'');
  99.     SET(BOURNE_SH, '\\');
  100.     SET(BOURNE_SH, '`');
  101.     SET(BOURNE_SH, '|');
  102.     SET(BOURNE_SH, '~');
  103.  
  104.     for (size_t iType = 0; iType <= RTGETOPTARGV_CNV_QUOTE_MASK; iType++)
  105.     {
  106.         printf("    {");
  107.         for (size_t iByte = 0; iByte < 8; iByte++)
  108.             printf(iByte == 0 ? " 0x%02x" : ", 0x%02x", g_abmQuoteChars[iType][iByte]);
  109.         printf(" },\n");
  110.     }
  111.     return 0;
  112. }
  113. #endif /* To re-generate the bitmaps. */
  114.  
  115.  
  116. /**
  117.  * Look for an unicode code point in the separator string.
  118.  *
  119.  * @returns true if it's a separator, false if it isn't.
  120.  * @param   Cp              The code point.
  121.  * @param   pszSeparators   The separators.
  122.  */
  123. static bool rtGetOptIsUniCpInString(RTUNICP Cp, const char *pszSeparators)
  124. {
  125.     /* This could be done in a more optimal fashion.  Probably worth a
  126.        separate RTStr function at some point. */
  127.     for (;;)
  128.     {
  129.         RTUNICP CpSep;
  130.         int rc = RTStrGetCpEx(&pszSeparators, &CpSep);
  131.         AssertRCReturn(rc, false);
  132.         if (CpSep == Cp)
  133.             return true;
  134.         if (!CpSep)
  135.             return false;
  136.     }
  137. }
  138.  
  139.  
  140. /**
  141.  * Look for an 7-bit ASCII character in the separator string.
  142.  *
  143.  * @returns true if it's a separator, false if it isn't.
  144.  * @param   ch              The character.
  145.  * @param   pszSeparators   The separators.
  146.  * @param   cchSeparators   The number of separators chars.
  147.  */
  148. DECLINLINE(bool) rtGetOptIsAsciiInSet(char ch, const char *pszSeparators, size_t cchSeparators)
  149. {
  150.     switch (cchSeparators)
  151.     {
  152.         case 8: if (ch == pszSeparators[7]) return true;
  153.         case 7: if (ch == pszSeparators[6]) return true;
  154.         case 6: if (ch == pszSeparators[5]) return true;
  155.         case 5: if (ch == pszSeparators[4]) return true;
  156.         case 4: if (ch == pszSeparators[3]) return true;
  157.         case 3: if (ch == pszSeparators[2]) return true;
  158.         case 2: if (ch == pszSeparators[1]) return true;
  159.         case 1: if (ch == pszSeparators[0]) return true;
  160.             return false;
  161.         default:
  162.             return memchr(pszSeparators, ch, cchSeparators) != NULL;
  163.     }
  164. }
  165.  
  166.  
  167. /**
  168.  * Checks if the character is in the set of separators
  169.  *
  170.  * @returns true if it is, false if it isn't.
  171.  *
  172.  * @param   Cp              The code point.
  173.  * @param   pszSeparators   The separators.
  174.  * @param   cchSeparators   The length of @a pszSeparators.
  175.  */
  176. DECL_FORCE_INLINE(bool) rtGetOptIsCpInSet(RTUNICP Cp, const char *pszSeparators, size_t cchSeparators)
  177. {
  178.     if (RT_LIKELY(Cp <= 127))
  179.         return rtGetOptIsAsciiInSet((char)Cp, pszSeparators, cchSeparators);
  180.     return rtGetOptIsUniCpInString(Cp, pszSeparators);
  181. }
  182.  
  183.  
  184. /**
  185.  * Skips any delimiters at the start of the string that is pointed to.
  186.  *
  187.  * @returns VINF_SUCCESS or RTStrGetCpEx status code.
  188.  * @param   ppszSrc         Where to get and return the string pointer.
  189.  * @param   pszSeparators   The separators.
  190.  * @param   cchSeparators   The length of @a pszSeparators.
  191.  */
  192. static int rtGetOptSkipDelimiters(const char **ppszSrc, const char *pszSeparators, size_t cchSeparators)
  193. {
  194.     const char *pszSrc = *ppszSrc;
  195.     const char *pszRet;
  196.     for (;;)
  197.     {
  198.         pszRet = pszSrc;
  199.         RTUNICP Cp;
  200.         int rc = RTStrGetCpEx(&pszSrc, &Cp);
  201.         if (RT_FAILURE(rc))
  202.         {
  203.             *ppszSrc = pszRet;
  204.             return rc;
  205.         }
  206.         if (   !Cp
  207.             || !rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
  208.             break;
  209.     }
  210.  
  211.     *ppszSrc = pszRet;
  212.     return VINF_SUCCESS;
  213. }
  214.  
  215.  
  216. RTDECL(int) RTGetOptArgvFromString(char ***ppapszArgv, int *pcArgs, const char *pszCmdLine, const char *pszSeparators)
  217. {
  218.     /*
  219.      * Some input validation.
  220.      */
  221.     AssertPtr(pszCmdLine);
  222.     AssertPtr(pcArgs);
  223.     AssertPtr(ppapszArgv);
  224.     if (!pszSeparators)
  225.         pszSeparators = " \t\n\r";
  226.     else
  227.         AssertPtr(pszSeparators);
  228.     size_t const cchSeparators = strlen(pszSeparators);
  229.     AssertReturn(cchSeparators > 0, VERR_INVALID_PARAMETER);
  230.  
  231.     /*
  232.      * Parse the command line and chop off it into argv individual argv strings.
  233.      */
  234.     int         rc        = VINF_SUCCESS;
  235.     const char *pszSrc    = pszCmdLine;
  236.     char       *pszDup    = (char *)RTMemAlloc(strlen(pszSrc) + 1);
  237.     char       *pszDst    = pszDup;
  238.     if (!pszDup)
  239.         return VERR_NO_STR_MEMORY;
  240.     char      **papszArgs = NULL;
  241.     unsigned    iArg      = 0;
  242.     while (*pszSrc)
  243.     {
  244.         /* Skip stuff */
  245.         rc = rtGetOptSkipDelimiters(&pszSrc, pszSeparators, cchSeparators);
  246.         if (RT_FAILURE(rc))
  247.             break;
  248.         if (!*pszSrc)
  249.             break;
  250.  
  251.         /* Start a new entry. */
  252.         if ((iArg % 32) == 0)
  253.         {
  254.             void *pvNew = RTMemRealloc(papszArgs, (iArg + 33) * sizeof(char *));
  255.             if (!pvNew)
  256.             {
  257.                 rc = VERR_NO_MEMORY;
  258.                 break;
  259.             }
  260.             papszArgs = (char **)pvNew;
  261.         }
  262.         papszArgs[iArg++] = pszDst;
  263.  
  264.         /* Parse and copy the string over. */
  265.         RTUNICP CpQuote = 0;
  266.         RTUNICP CpLast = 0;
  267.         RTUNICP Cp;
  268.        
  269.         for (;;)
  270.         {
  271.             rc = RTStrGetCpEx(&pszSrc, &Cp);
  272.             if (RT_FAILURE(rc) || !Cp)
  273.                 break;
  274.             if (!CpQuote)
  275.             {
  276.                 if (CpLast != '\\' && (Cp == '"' || Cp == '\''))
  277.                     CpQuote = Cp;
  278.                 else if (rtGetOptIsCpInSet(Cp, pszSeparators, cchSeparators))
  279.                     break;
  280.                 else
  281.                     pszDst = RTStrPutCp(pszDst, Cp);
  282.             }
  283.             else if (CpQuote != Cp || CpLast == '\\')
  284.             {
  285.                 if (CpLast == '\\')
  286.                 {
  287.                     if (Cp != '"' && Cp != '\\')
  288.                         pszDst = RTStrPutCp(pszDst, CpLast);
  289.  
  290.                     pszDst = RTStrPutCp(pszDst, Cp);
  291.                 }
  292.                 else if (Cp != '\\')
  293.                 {
  294.                     pszDst = RTStrPutCp(pszDst, Cp);
  295.                 }
  296.             }
  297.             else
  298.                 CpQuote = 0;
  299.            
  300.             CpLast = Cp;
  301.         }
  302.         *pszDst++ = '\0';
  303.         if (RT_FAILURE(rc) || !Cp)
  304.             break;
  305.     }
  306.  
  307.     if (RT_FAILURE(rc))
  308.     {
  309.         RTMemFree(pszDup);
  310.         RTMemFree(papszArgs);
  311.         return rc;
  312.     }
  313.  
  314.     /*
  315.      * Terminate the array.
  316.      * Check for empty string to make sure we've got an array.
  317.      */
  318.     if (iArg == 0)
  319.     {
  320.         RTMemFree(pszDup);
  321.         papszArgs = (char **)RTMemAlloc(1 * sizeof(char *));
  322.         if (!papszArgs)
  323.             return VERR_NO_MEMORY;
  324.     }
  325.     papszArgs[iArg] = NULL;
  326.  
  327.     *pcArgs     = iArg;
  328.     *ppapszArgv = papszArgs;
  329.     return VINF_SUCCESS;
  330. }
  331.  
  332.  
  333. RTDECL(void) RTGetOptArgvFree(char **papszArgv)
  334. {
  335.     if (papszArgv)
  336.     {
  337.         /*
  338.          * We've really only _two_ allocations here. Check the code in
  339.          * RTGetOptArgvFromString for the particulars.
  340.          */
  341.         RTMemFree(papszArgv[0]);
  342.         RTMemFree(papszArgv);
  343.     }
  344. }
  345.  
  346.  
  347. /**
  348.  * Checks if the argument needs quoting or not.
  349.  *
  350.  * @returns true if it needs, false if it don't.
  351.  * @param   pszArg              The argument.
  352.  * @param   fFlags              Quoting style.
  353.  * @param   pcch                Where to store the argument length when quoting
  354.  *                              is not required.  (optimization)
  355.  */
  356. DECLINLINE(bool) rtGetOpArgvRequiresQuoting(const char *pszArg, uint32_t fFlags, size_t *pcch)
  357. {
  358.     char const *psz = pszArg;
  359.     unsigned char ch;
  360.     while ((ch = (unsigned char)*psz))
  361.     {
  362.         if ((psz == 0 || ch != '"')
  363.             && ch < 128
  364.             && ASMBitTest(&g_abmQuoteChars[fFlags & RTGETOPTARGV_CNV_QUOTE_MASK], ch))
  365.             return true;
  366.         psz++;
  367.     }
  368.  
  369.     *pcch = psz - pszArg;
  370.     return false;
  371. }
  372.  
  373.  
  374. /**
  375.  * Grows the command line string buffer.
  376.  *
  377.  * @returns VINF_SUCCESS or VERR_NO_STR_MEMORY.
  378.  * @param   ppszCmdLine     Pointer to the command line string pointer.
  379.  * @param   pcbCmdLineAlloc Pointer to the allocation length variable.
  380.  * @param   cchMin          The minimum size to grow with, kind of.
  381.  */
  382. static int rtGetOptArgvToStringGrow(char **ppszCmdLine, size_t *pcbCmdLineAlloc, size_t cchMin)
  383. {
  384.     size_t cb = *pcbCmdLineAlloc;
  385.     while (cb < cchMin)
  386.         cb *= 2;
  387.     cb *= 2;
  388.     *pcbCmdLineAlloc = cb;
  389.     return RTStrRealloc(ppszCmdLine, cb);
  390. }
  391.  
  392. /**
  393.  * Checks if we have a sequence of DOS slashes followed by a double quote char.
  394.  *
  395.  * @returns true / false accordingly.
  396.  * @param   psz             The string.
  397.  */
  398. DECLINLINE(bool) rtGetOptArgvMsCrtIsSlashQuote(const char *psz)
  399. {
  400.     while (*psz == '\\')
  401.         psz++;
  402.     return *psz == '"' || *psz == '\0';
  403. }
  404.  
  405.  
  406. RTDECL(int) RTGetOptArgvToString(char **ppszCmdLine, const char * const *papszArgv, uint32_t fFlags)
  407. {
  408.     AssertReturn(!(fFlags & ~RTGETOPTARGV_CNV_QUOTE_MASK), VERR_INVALID_PARAMETER);
  409.  
  410. #define PUT_CH(ch) \
  411.         if (RT_UNLIKELY(off + 1 >= cbCmdLineAlloc)) { \
  412.             rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, 1); \
  413.             if (RT_FAILURE(rc)) \
  414.                 break; \
  415.         } \
  416.         pszCmdLine[off++] = (ch)
  417.  
  418. #define PUT_PSZ(psz, cch) \
  419.         if (RT_UNLIKELY(off + (cch) >= cbCmdLineAlloc)) { \
  420.             rc = rtGetOptArgvToStringGrow(&pszCmdLine, &cbCmdLineAlloc, (cch)); \
  421.             if (RT_FAILURE(rc)) \
  422.                 break; \
  423.         } \
  424.         memcpy(&pszCmdLine[off], (psz), (cch)); \
  425.         off += (cch);
  426. #define PUT_SZ(sz)  PUT_PSZ(sz, sizeof(sz) - 1)
  427.  
  428.     /*
  429.      * Take the realloc approach, it requires less code and is probably more
  430.      * efficient than figuring out the size first.
  431.      */
  432.     int     rc              = VINF_SUCCESS;
  433.     size_t  off             = 0;
  434.     size_t  cbCmdLineAlloc  = 256;
  435.     char   *pszCmdLine      = RTStrAlloc(256);
  436.     if (!pszCmdLine)
  437.         return VERR_NO_STR_MEMORY;
  438.  
  439.     for (size_t i = 0; papszArgv[i]; i++)
  440.     {
  441.         if (i > 0)
  442.         {
  443.             PUT_CH(' ');
  444.         }
  445.  
  446.         /* does it need quoting? */
  447.         const char *pszArg = papszArgv[i];
  448.         size_t      cchArg;
  449.         if (!rtGetOpArgvRequiresQuoting(pszArg, fFlags, &cchArg))
  450.         {
  451.             /* No quoting needed, just append the argument. */
  452.             PUT_PSZ(pszArg, cchArg);
  453.         }
  454.         else if ((fFlags & RTGETOPTARGV_CNV_QUOTE_MASK) == RTGETOPTARGV_CNV_QUOTE_MS_CRT)
  455.         {
  456.             /*
  457.              * Microsoft CRT quoting.  Quote the whole argument in double
  458.              * quotes to make it easier to read and code.
  459.              */
  460.             PUT_CH('"');
  461.             char ch;
  462.             while ((ch = *pszArg++))
  463.             {
  464.                 if (   ch == '\\'
  465.                     && rtGetOptArgvMsCrtIsSlashQuote(pszArg))
  466.                 {
  467.                     PUT_SZ("\\\\");
  468.                 }
  469.                 else if (ch == '"')
  470.                 {
  471.                     PUT_SZ("\\\"");
  472.                 }
  473.                 else
  474.                 {
  475.                     PUT_CH(ch);
  476.                 }
  477.             }
  478.             PUT_CH('"');
  479.         }
  480.         else
  481.         {
  482.             /*
  483.              * Bourne Shell quoting.  Quote the whole thing in single quotes
  484.              * and use double quotes for any single quote chars.
  485.              */
  486.             PUT_CH('\'');
  487.             char ch;
  488.             while ((ch = *pszArg++))
  489.             {
  490.                 if (ch == '\'')
  491.                 {
  492.                     PUT_SZ("'\"'\"'");
  493.                 }
  494.                 else
  495.                 {
  496.                     PUT_CH(ch);
  497.                 }
  498.             }
  499.             PUT_CH('\'');
  500.         }
  501.     }
  502.  
  503.     /* Set return value / cleanup. */
  504.     if (RT_SUCCESS(rc))
  505.     {
  506.         pszCmdLine[off] = '\0';
  507.         *ppszCmdLine    = pszCmdLine;
  508.     }
  509.     else
  510.         RTStrFree(pszCmdLine);
  511. #undef PUT_SZ
  512. #undef PUT_PSZ
  513. #undef PUT_CH
  514.     return rc;
  515. }
  516.  
  517.  
  518. RTDECL(int) RTGetOptArgvToUtf16String(PRTUTF16 *ppwszCmdLine, const char * const *papszArgv, uint32_t fFlags)
  519. {
  520.     char *pszCmdLine;
  521.     int rc = RTGetOptArgvToString(&pszCmdLine, papszArgv, fFlags);
  522.     if (RT_SUCCESS(rc))
  523.     {
  524.         rc = RTStrToUtf16(pszCmdLine, ppwszCmdLine);
  525.         RTStrFree(pszCmdLine);
  526.     }
  527.     return rc;
  528. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement