| 354 | | /* |
|---|
| 355 | | * Unescape a string that may have escaped characters %xx |
|---|
| 356 | | * where xx is the hexadecimal number equal to the character ascii value. |
|---|
| 357 | | */ |
|---|
| 358 | | ret_t |
|---|
| 359 | | cherokee_buffer_unescape_uri (cherokee_buffer_t *buffer) |
|---|
| 360 | | { |
|---|
| 361 | | static const char hex2dec_tab[256] = { |
|---|
| 362 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00-0F */ |
|---|
| 363 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10-1F */ |
|---|
| 364 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20-2F */ |
|---|
| 365 | | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /* 30-3F */ |
|---|
| 366 | | 0,10,11,12,13,14,15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40-4F */ |
|---|
| 367 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 50-5F */ |
|---|
| 368 | | 0,10,11,12,13,14,15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60-6F */ |
|---|
| 369 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 70-7F */ |
|---|
| 370 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8F */ |
|---|
| 371 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90-9F */ |
|---|
| 372 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0-AF */ |
|---|
| 373 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0-BF */ |
|---|
| 374 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0-CF */ |
|---|
| 375 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0-DF */ |
|---|
| 376 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0-EF */ |
|---|
| 377 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F0-FF */ |
|---|
| 378 | | }; |
|---|
| 379 | | |
|---|
| 380 | | char *psrc; |
|---|
| 381 | | char *ptgt; |
|---|
| 382 | | int len; |
|---|
| 383 | | |
|---|
| 384 | | #define hex2dec_m(c) ( (int) hex2dec_tab[ ( (unsigned char )(c) ) ] ) |
|---|
| 385 | | |
|---|
| 386 | | if (buffer->buf == NULL) { |
|---|
| 387 | | return ret_error; |
|---|
| 388 | | } |
|---|
| 389 | | |
|---|
| 390 | | /* Verify if decoding is needed. |
|---|
| 391 | | */ |
|---|
| 392 | | if ((psrc = strchr (buffer->buf, '%')) == NULL) |
|---|
| 393 | | return ret_ok; |
|---|
| 394 | | |
|---|
| 395 | | /* Decode string. |
|---|
| 396 | | */ |
|---|
| 397 | | len = buffer->len; |
|---|
| 398 | | for (ptgt = psrc; *psrc != '\0'; ++ptgt, ++psrc) { |
|---|
| 399 | | if (psrc[0] == '%' && isxdigit(psrc[1]) && isxdigit(psrc[2])) { |
|---|
| 400 | | if (unlikely (((psrc[1] == '0') && (psrc[2] == '0')))) { |
|---|
| 401 | | /* Replace null bytes (%00) with |
|---|
| 402 | | * spaces, to prevent attacks |
|---|
| 403 | | */ |
|---|
| 404 | | *ptgt = ' '; |
|---|
| 405 | | } else { |
|---|
| 406 | | *ptgt = hex2dec_m(psrc[1]) * 16 + hex2dec_m(psrc[2]); |
|---|
| 407 | | } |
|---|
| 408 | | |
|---|
| 409 | | psrc += 2; |
|---|
| 410 | | len -= 2; |
|---|
| 411 | | } else { |
|---|
| 412 | | *ptgt = *psrc; |
|---|
| 413 | | } |
|---|
| 414 | | } |
|---|
| 415 | | *ptgt = '\0'; |
|---|
| 416 | | buffer->len = len; |
|---|
| 417 | | |
|---|
| 418 | | #undef hex2dec_m |
|---|
| 419 | | return ret_ok; |
|---|
| 420 | | } |
|---|
| 421 | | |
|---|
| 422 | | |
|---|
| | 704 | /* |
|---|
| | 705 | * Unescape a string that may have escaped characters %xx |
|---|
| | 706 | * where xx is the hexadecimal number equal to the character ascii value. |
|---|
| | 707 | */ |
|---|
| | 708 | ret_t |
|---|
| | 709 | cherokee_buffer_unescape_uri (cherokee_buffer_t *buffer) |
|---|
| | 710 | { |
|---|
| | 711 | static const char hex2dec_tab[256] = { |
|---|
| | 712 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00-0F */ |
|---|
| | 713 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10-1F */ |
|---|
| | 714 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20-2F */ |
|---|
| | 715 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /* 30-3F */ |
|---|
| | 716 | 0,10,11,12,13,14,15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40-4F */ |
|---|
| | 717 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 50-5F */ |
|---|
| | 718 | 0,10,11,12,13,14,15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60-6F */ |
|---|
| | 719 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 70-7F */ |
|---|
| | 720 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80-8F */ |
|---|
| | 721 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90-9F */ |
|---|
| | 722 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0-AF */ |
|---|
| | 723 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0-BF */ |
|---|
| | 724 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0-CF */ |
|---|
| | 725 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0-DF */ |
|---|
| | 726 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0-EF */ |
|---|
| | 727 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F0-FF */ |
|---|
| | 728 | }; |
|---|
| | 729 | |
|---|
| | 730 | char *psrc; |
|---|
| | 731 | char *ptgt; |
|---|
| | 732 | int len; |
|---|
| | 733 | |
|---|
| | 734 | #define hex2dec_m(c) ( (int) hex2dec_tab[ ( (unsigned char )(c) ) ] ) |
|---|
| | 735 | #define hex2dec_m2(c1, c2) ( hex2dec_m(c1) * 16 + hex2dec_m(c2) ) |
|---|
| | 736 | |
|---|
| | 737 | if (unlikely (buffer->buf == NULL)) { |
|---|
| | 738 | return ret_error; |
|---|
| | 739 | } |
|---|
| | 740 | |
|---|
| | 741 | /* Verify if unescaping is needed. |
|---|
| | 742 | */ |
|---|
| | 743 | if ((psrc = strchr (buffer->buf, '%')) == NULL) |
|---|
| | 744 | return ret_ok; |
|---|
| | 745 | |
|---|
| | 746 | /* Yes, unescape string. |
|---|
| | 747 | */ |
|---|
| | 748 | len = buffer->len; |
|---|
| | 749 | for (ptgt = psrc; *psrc != '\0'; ++ptgt, ++psrc) { |
|---|
| | 750 | if (psrc[0] != '%' || |
|---|
| | 751 | !isxdigit(psrc[1]) || !isxdigit(psrc[2])) { |
|---|
| | 752 | *ptgt = *psrc; |
|---|
| | 753 | continue; |
|---|
| | 754 | } |
|---|
| | 755 | /* Escape sequence %xx |
|---|
| | 756 | */ |
|---|
| | 757 | if (likely ((*ptgt = hex2dec_m2(psrc[1], psrc[2])) != '\0')) { |
|---|
| | 758 | psrc += 2; |
|---|
| | 759 | len -= 2; |
|---|
| | 760 | continue; |
|---|
| | 761 | } |
|---|
| | 762 | /* Replace null bytes (%00) with |
|---|
| | 763 | * spaces, to prevent attacks |
|---|
| | 764 | */ |
|---|
| | 765 | *ptgt = ' '; |
|---|
| | 766 | psrc += 2; |
|---|
| | 767 | len -= 2; |
|---|
| | 768 | } |
|---|
| | 769 | *ptgt = '\0'; |
|---|
| | 770 | buffer->len = len; |
|---|
| | 771 | |
|---|
| | 772 | #undef hex2dec_m2 |
|---|
| | 773 | #undef hex2dec_m |
|---|
| | 774 | return ret_ok; |
|---|
| | 775 | } |
|---|
| | 776 | |
|---|
| | 777 | |
|---|
| | 778 | ret_t |
|---|
| | 779 | cherokee_buffer_escape_html (cherokee_buffer_t *buf, cherokee_buffer_t **maybe_new) |
|---|
| | 780 | { |
|---|
| | 781 | ret_t ret; |
|---|
| | 782 | cuint_t i; |
|---|
| | 783 | cuint_t j; |
|---|
| | 784 | cuint_t extra = 0; |
|---|
| | 785 | char *p0, *p; |
|---|
| | 786 | |
|---|
| | 787 | /* Verify string termination, |
|---|
| | 788 | * we assume there are no '\0' inside buffer. |
|---|
| | 789 | */ |
|---|
| | 790 | if (buf->buf[buf->len] != '\0') |
|---|
| | 791 | buf->buf[buf->len] = '\0'; |
|---|
| | 792 | |
|---|
| | 793 | /* Verify if string has to be escaped. |
|---|
| | 794 | */ |
|---|
| | 795 | if ((p0 = strpbrk(buf->buf, "<>&\"")) == NULL) |
|---|
| | 796 | return ret_not_found; |
|---|
| | 797 | |
|---|
| | 798 | /* Count extra characters |
|---|
| | 799 | */ |
|---|
| | 800 | for (p = p0; *p != '\0'; ++p) { |
|---|
| | 801 | switch(*p) { |
|---|
| | 802 | case '<': |
|---|
| | 803 | case '>': |
|---|
| | 804 | extra += 3; |
|---|
| | 805 | continue; |
|---|
| | 806 | case '&': |
|---|
| | 807 | extra += 4; |
|---|
| | 808 | continue; |
|---|
| | 809 | case '"': |
|---|
| | 810 | extra += 5; |
|---|
| | 811 | continue; |
|---|
| | 812 | default: |
|---|
| | 813 | continue; |
|---|
| | 814 | } |
|---|
| | 815 | } |
|---|
| | 816 | |
|---|
| | 817 | /* Verify there are no embedded '\0'. |
|---|
| | 818 | */ |
|---|
| | 819 | if ( ((int) (p - buf->buf)) != buf->len) |
|---|
| | 820 | return ret_error; |
|---|
| | 821 | |
|---|
| | 822 | /* Create a new buffer |
|---|
| | 823 | */ |
|---|
| | 824 | ret = cherokee_buffer_new (maybe_new); |
|---|
| | 825 | if (unlikely(ret != ret_ok)) return ret; |
|---|
| | 826 | |
|---|
| | 827 | ret = cherokee_buffer_ensure_size (*maybe_new, buf->len + extra + 1); |
|---|
| | 828 | if (unlikely(ret != ret_ok)) return ret; |
|---|
| | 829 | |
|---|
| | 830 | ret = cherokee_buffer_add_buffer (*maybe_new, buf); |
|---|
| | 831 | if (unlikely(ret != ret_ok)) return ret; |
|---|
| | 832 | |
|---|
| | 833 | buf = *maybe_new; |
|---|
| | 834 | |
|---|
| | 835 | /* Make the changes |
|---|
| | 836 | */ |
|---|
| | 837 | for (i = 0, j = 0; i < buf->len; i++) { |
|---|
| | 838 | char c = buf->buf[i+j]; |
|---|
| | 839 | |
|---|
| | 840 | switch (c) { |
|---|
| | 841 | case '<': |
|---|
| | 842 | memmove (&buf->buf[i+j+4], &buf->buf[i+j+1], buf->len-i); |
|---|
| | 843 | memcpy (&buf->buf[i+j], "<", 4); |
|---|
| | 844 | j += 3; |
|---|
| | 845 | break; |
|---|
| | 846 | |
|---|
| | 847 | case '>': |
|---|
| | 848 | memmove (&buf->buf[i+j+4], &buf->buf[i+j+1], buf->len-i); |
|---|
| | 849 | memcpy (&buf->buf[i+j], ">", 4); |
|---|
| | 850 | j += 3; |
|---|
| | 851 | break; |
|---|
| | 852 | |
|---|
| | 853 | case '&': |
|---|
| | 854 | memmove (&buf->buf[i+j+5], &buf->buf[i+j+1], buf->len-i); |
|---|
| | 855 | memcpy (&buf->buf[i+j], "&", 5); |
|---|
| | 856 | j += 4; |
|---|
| | 857 | break; |
|---|
| | 858 | |
|---|
| | 859 | case '"': |
|---|
| | 860 | memmove (&buf->buf[i+j+6], &buf->buf[i+j+1], buf->len-i); |
|---|
| | 861 | memcpy (&buf->buf[i+j], """, 6); |
|---|
| | 862 | j += 5; |
|---|
| | 863 | break; |
|---|
| | 864 | } |
|---|
| | 865 | } |
|---|
| | 866 | |
|---|
| | 867 | /* Set the new length |
|---|
| | 868 | */ |
|---|
| | 869 | buf->len += extra; |
|---|
| | 870 | |
|---|
| | 871 | return ret_ok; |
|---|
| | 872 | } |
|---|
| | 873 | |
|---|
| | 874 | |
|---|
| 858 | | ret_t |
|---|
| 859 | | cherokee_buffer_escape_html (cherokee_buffer_t *buf, cherokee_buffer_t **maybe_new) |
|---|
| 860 | | { |
|---|
| 861 | | ret_t ret; |
|---|
| 862 | | cuint_t i; |
|---|
| 863 | | cuint_t j; |
|---|
| 864 | | cuint_t extra = 0; |
|---|
| 865 | | char *p0, *p; |
|---|
| 866 | | |
|---|
| 867 | | /* Verify string termination, |
|---|
| 868 | | * we assume there are no '\0' inside buffer. |
|---|
| 869 | | */ |
|---|
| 870 | | if (buf->buf[buf->len] != '\0') |
|---|
| 871 | | buf->buf[buf->len] = '\0'; |
|---|
| 872 | | |
|---|
| 873 | | /* Verify if string has to be escaped. |
|---|
| 874 | | */ |
|---|
| 875 | | if ((p0 = strpbrk(buf->buf, "<>&\"")) == NULL) |
|---|
| 876 | | return ret_not_found; |
|---|
| 877 | | |
|---|
| 878 | | /* Count extra characters |
|---|
| 879 | | */ |
|---|
| 880 | | for (p = p0; *p != '\0'; ++p) { |
|---|
| 881 | | switch(*p) { |
|---|
| 882 | | case '<': |
|---|
| 883 | | case '>': |
|---|
| 884 | | extra += 3; |
|---|
| 885 | | continue; |
|---|
| 886 | | case '&': |
|---|
| 887 | | extra += 4; |
|---|
| 888 | | continue; |
|---|
| 889 | | case '"': |
|---|
| 890 | | extra += 5; |
|---|
| 891 | | continue; |
|---|
| 892 | | default: |
|---|
| 893 | | continue; |
|---|
| 894 | | } |
|---|
| 895 | | } |
|---|
| 896 | | |
|---|
| 897 | | /* Verify there are no embedded '\0'. |
|---|
| 898 | | */ |
|---|
| 899 | | if ( ((int) (p - buf->buf)) != buf->len) |
|---|
| 900 | | return ret_error; |
|---|
| 901 | | |
|---|
| 902 | | /* Create a new buffer |
|---|
| 903 | | */ |
|---|
| 904 | | ret = cherokee_buffer_new (maybe_new); |
|---|
| 905 | | if (unlikely(ret != ret_ok)) return ret; |
|---|
| 906 | | |
|---|
| 907 | | ret = cherokee_buffer_ensure_size (*maybe_new, buf->len + extra + 1); |
|---|
| 908 | | if (unlikely(ret != ret_ok)) return ret; |
|---|
| 909 | | |
|---|
| 910 | | ret = cherokee_buffer_add_buffer (*maybe_new, buf); |
|---|
| 911 | | if (unlikely(ret != ret_ok)) return ret; |
|---|
| 912 | | |
|---|
| 913 | | buf = *maybe_new; |
|---|
| 914 | | |
|---|
| 915 | | /* Make the changes |
|---|
| 916 | | */ |
|---|
| 917 | | for (i = 0, j = 0; i < buf->len; i++) { |
|---|
| 918 | | char c = buf->buf[i+j]; |
|---|
| 919 | | |
|---|
| 920 | | switch (c) { |
|---|
| 921 | | case '<': |
|---|
| 922 | | memmove (&buf->buf[i+j+4], &buf->buf[i+j+1], buf->len-i); |
|---|
| 923 | | memcpy (&buf->buf[i+j], "<", 4); |
|---|
| 924 | | j += 3; |
|---|
| 925 | | break; |
|---|
| 926 | | |
|---|
| 927 | | case '>': |
|---|
| 928 | | memmove (&buf->buf[i+j+4], &buf->buf[i+j+1], buf->len-i); |
|---|
| 929 | | memcpy (&buf->buf[i+j], ">", 4); |
|---|
| 930 | | j += 3; |
|---|
| 931 | | break; |
|---|
| 932 | | |
|---|
| 933 | | case '&': |
|---|
| 934 | | memmove (&buf->buf[i+j+5], &buf->buf[i+j+1], buf->len-i); |
|---|
| 935 | | memcpy (&buf->buf[i+j], "&", 5); |
|---|
| 936 | | j += 4; |
|---|
| 937 | | break; |
|---|
| 938 | | |
|---|
| 939 | | case '"': |
|---|
| 940 | | memmove (&buf->buf[i+j+6], &buf->buf[i+j+1], buf->len-i); |
|---|
| 941 | | memcpy (&buf->buf[i+j], """, 6); |
|---|
| 942 | | j += 5; |
|---|
| 943 | | break; |
|---|
| 944 | | } |
|---|
| 945 | | } |
|---|
| 946 | | |
|---|
| 947 | | /* Set the new length |
|---|
| 948 | | */ |
|---|
| 949 | | buf->len += extra; |
|---|
| 950 | | |
|---|
| 951 | | return ret_ok; |
|---|
| 952 | | } |
|---|
| 953 | | |
|---|
| 954 | | |
|---|