00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <cstdlib>
00011 #include <cstring>
00012 #include <boost/regex.hpp>
00013 #include <boost/assert.hpp>
00014 #include <boost/logic/tribool.hpp>
00015 #include <boost/algorithm/string.hpp>
00016 #include <pion/algorithm.hpp>
00017 #include <pion/http/parser.hpp>
00018 #include <pion/http/request.hpp>
00019 #include <pion/http/response.hpp>
00020 #include <pion/http/message.hpp>
00021
00022
00023 namespace pion {
00024 namespace http {
00025
00026
00027
00028
00029 const boost::uint32_t parser::STATUS_MESSAGE_MAX = 1024;
00030 const boost::uint32_t parser::METHOD_MAX = 1024;
00031 const boost::uint32_t parser::RESOURCE_MAX = 256 * 1024;
00032 const boost::uint32_t parser::QUERY_STRING_MAX = 1024 * 1024;
00033 const boost::uint32_t parser::HEADER_NAME_MAX = 1024;
00034 const boost::uint32_t parser::HEADER_VALUE_MAX = 1024 * 1024;
00035 const boost::uint32_t parser::QUERY_NAME_MAX = 1024;
00036 const boost::uint32_t parser::QUERY_VALUE_MAX = 1024 * 1024;
00037 const boost::uint32_t parser::COOKIE_NAME_MAX = 1024;
00038 const boost::uint32_t parser::COOKIE_VALUE_MAX = 1024 * 1024;
00039 const std::size_t parser::DEFAULT_CONTENT_MAX = 1024 * 1024;
00040 parser::error_category_t * parser::m_error_category_ptr = NULL;
00041 boost::once_flag parser::m_instance_flag = BOOST_ONCE_INIT;
00042
00043
00044
00045
00046 boost::tribool parser::parse(http::message& http_msg,
00047 boost::system::error_code& ec)
00048 {
00049 BOOST_ASSERT(! eof() );
00050
00051 boost::tribool rc = boost::indeterminate;
00052 std::size_t total_bytes_parsed = 0;
00053
00054 if(http_msg.has_missing_packets()) {
00055 http_msg.set_data_after_missing_packet(true);
00056 }
00057
00058 do {
00059 switch (m_message_parse_state) {
00060
00061 case PARSE_START:
00062 m_message_parse_state = PARSE_HEADERS;
00063
00064
00065
00066 case PARSE_HEADERS:
00067 case PARSE_FOOTERS:
00068 rc = parse_headers(http_msg, ec);
00069 total_bytes_parsed += m_bytes_last_read;
00070
00071 if (rc == true && m_message_parse_state == PARSE_HEADERS) {
00072
00073
00074 rc = finish_header_parsing(http_msg, ec);
00075 }
00076 break;
00077
00078
00079 case PARSE_CHUNKS:
00080 rc = parse_chunks(http_msg.get_chunk_cache(), ec);
00081 total_bytes_parsed += m_bytes_last_read;
00082
00083 if (rc == true && !m_payload_handler) {
00084 http_msg.concatenate_chunks();
00085
00086
00087 rc = ((m_message_parse_state == PARSE_FOOTERS) ?
00088 boost::indeterminate : (boost::tribool)true);
00089 }
00090 break;
00091
00092
00093 case PARSE_CONTENT:
00094 rc = consume_content(http_msg, ec);
00095 total_bytes_parsed += m_bytes_last_read;
00096 break;
00097
00098
00099 case PARSE_CONTENT_NO_LENGTH:
00100 consume_content_as_next_chunk(http_msg.get_chunk_cache());
00101 total_bytes_parsed += m_bytes_last_read;
00102 break;
00103
00104
00105 case PARSE_END:
00106 rc = true;
00107 break;
00108 }
00109 } while ( boost::indeterminate(rc) && ! eof() );
00110
00111
00112 if (rc == true) {
00113 m_message_parse_state = PARSE_END;
00114 finish(http_msg);
00115 } else if(rc == false) {
00116 compute_msg_status(http_msg, false);
00117 }
00118
00119
00120 m_bytes_last_read = total_bytes_parsed;
00121
00122 return rc;
00123 }
00124
00125 boost::tribool parser::parse_missing_data(http::message& http_msg,
00126 std::size_t len, boost::system::error_code& ec)
00127 {
00128 static const char MISSING_DATA_CHAR = 'X';
00129 boost::tribool rc = boost::indeterminate;
00130
00131 http_msg.set_missing_packets(true);
00132
00133 switch (m_message_parse_state) {
00134
00135
00136 case PARSE_START:
00137 case PARSE_HEADERS:
00138 case PARSE_FOOTERS:
00139 set_error(ec, ERROR_MISSING_HEADER_DATA);
00140 rc = false;
00141 break;
00142
00143
00144 case PARSE_CHUNKS:
00145
00146 if (m_chunked_content_parse_state == PARSE_CHUNK
00147 && m_bytes_read_in_current_chunk < m_size_of_current_chunk
00148 && (m_size_of_current_chunk - m_bytes_read_in_current_chunk) >= len)
00149 {
00150
00151 if (m_payload_handler) {
00152 for (std::size_t n = 0; n < len; ++n)
00153 m_payload_handler(&MISSING_DATA_CHAR, 1);
00154 } else {
00155 for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n)
00156 http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR);
00157 }
00158
00159 m_bytes_read_in_current_chunk += len;
00160 m_bytes_last_read = len;
00161 m_bytes_total_read += len;
00162 m_bytes_content_read += len;
00163
00164 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
00165 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
00166 }
00167 } else {
00168
00169 set_error(ec, ERROR_MISSING_CHUNK_DATA);
00170 rc = false;
00171 }
00172 break;
00173
00174
00175 case PARSE_CONTENT:
00176
00177 if (m_bytes_content_remaining == 0) {
00178
00179 rc = true;
00180 } else if (m_bytes_content_remaining < len) {
00181
00182 set_error(ec, ERROR_MISSING_TOO_MUCH_CONTENT);
00183 rc = false;
00184 } else {
00185
00186
00187 if (m_payload_handler) {
00188 for (std::size_t n = 0; n < len; ++n)
00189 m_payload_handler(&MISSING_DATA_CHAR, 1);
00190 } else if ( (m_bytes_content_read+len) <= m_max_content_length) {
00191
00192 for (std::size_t n = 0; n < len; ++n)
00193 http_msg.get_content()[m_bytes_content_read++] = MISSING_DATA_CHAR;
00194 } else {
00195 m_bytes_content_read += len;
00196 }
00197
00198 m_bytes_content_remaining -= len;
00199 m_bytes_total_read += len;
00200 m_bytes_last_read = len;
00201
00202 if (m_bytes_content_remaining == 0)
00203 rc = true;
00204 }
00205 break;
00206
00207
00208 case PARSE_CONTENT_NO_LENGTH:
00209
00210 if (m_payload_handler) {
00211 for (std::size_t n = 0; n < len; ++n)
00212 m_payload_handler(&MISSING_DATA_CHAR, 1);
00213 } else {
00214 for (std::size_t n = 0; n < len && http_msg.get_chunk_cache().size() < m_max_content_length; ++n)
00215 http_msg.get_chunk_cache().push_back(MISSING_DATA_CHAR);
00216 }
00217 m_bytes_last_read = len;
00218 m_bytes_total_read += len;
00219 m_bytes_content_read += len;
00220 break;
00221
00222
00223 case PARSE_END:
00224 rc = true;
00225 break;
00226 }
00227
00228
00229 if (rc == true) {
00230 m_message_parse_state = PARSE_END;
00231 finish(http_msg);
00232 } else if(rc == false) {
00233 compute_msg_status(http_msg, false);
00234 }
00235
00236 return rc;
00237 }
00238
00239 boost::tribool parser::parse_headers(http::message& http_msg,
00240 boost::system::error_code& ec)
00241 {
00242
00243
00244
00245
00246
00247
00248
00249 const char *read_start_ptr = m_read_ptr;
00250 m_bytes_last_read = 0;
00251 while (m_read_ptr < m_read_end_ptr) {
00252
00253 if (m_save_raw_headers)
00254 m_raw_headers += *m_read_ptr;
00255
00256 switch (m_headers_parse_state) {
00257 case PARSE_METHOD_START:
00258
00259 if (*m_read_ptr != ' ' && *m_read_ptr!='\r' && *m_read_ptr!='\n') {
00260 if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00261 set_error(ec, ERROR_METHOD_CHAR);
00262 return false;
00263 }
00264 m_headers_parse_state = PARSE_METHOD;
00265 m_method.erase();
00266 m_method.push_back(*m_read_ptr);
00267 }
00268 break;
00269
00270 case PARSE_METHOD:
00271
00272 if (*m_read_ptr == ' ') {
00273 m_resource.erase();
00274 m_headers_parse_state = PARSE_URI_STEM;
00275 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00276 set_error(ec, ERROR_METHOD_CHAR);
00277 return false;
00278 } else if (m_method.size() >= METHOD_MAX) {
00279 set_error(ec, ERROR_METHOD_SIZE);
00280 return false;
00281 } else {
00282 m_method.push_back(*m_read_ptr);
00283 }
00284 break;
00285
00286 case PARSE_URI_STEM:
00287
00288 if (*m_read_ptr == ' ') {
00289 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00290 } else if (*m_read_ptr == '?') {
00291 m_query_string.erase();
00292 m_headers_parse_state = PARSE_URI_QUERY;
00293 } else if (*m_read_ptr == '\r') {
00294 http_msg.set_version_major(0);
00295 http_msg.set_version_minor(0);
00296 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00297 } else if (*m_read_ptr == '\n') {
00298 http_msg.set_version_major(0);
00299 http_msg.set_version_minor(0);
00300 m_headers_parse_state = PARSE_EXPECTING_CR;
00301 } else if (is_control(*m_read_ptr)) {
00302 set_error(ec, ERROR_URI_CHAR);
00303 return false;
00304 } else if (m_resource.size() >= RESOURCE_MAX) {
00305 set_error(ec, ERROR_URI_SIZE);
00306 return false;
00307 } else {
00308 m_resource.push_back(*m_read_ptr);
00309 }
00310 break;
00311
00312 case PARSE_URI_QUERY:
00313
00314 if (*m_read_ptr == ' ') {
00315 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00316 } else if (*m_read_ptr == '\r') {
00317 http_msg.set_version_major(0);
00318 http_msg.set_version_minor(0);
00319 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00320 } else if (*m_read_ptr == '\n') {
00321 http_msg.set_version_major(0);
00322 http_msg.set_version_minor(0);
00323 m_headers_parse_state = PARSE_EXPECTING_CR;
00324 } else if (is_control(*m_read_ptr)) {
00325 set_error(ec, ERROR_QUERY_CHAR);
00326 return false;
00327 } else if (m_query_string.size() >= QUERY_STRING_MAX) {
00328 set_error(ec, ERROR_QUERY_SIZE);
00329 return false;
00330 } else {
00331 m_query_string.push_back(*m_read_ptr);
00332 }
00333 break;
00334
00335 case PARSE_HTTP_VERSION_H:
00336
00337 if (*m_read_ptr == '\r') {
00338
00339 if (! m_is_request) {
00340 set_error(ec, ERROR_VERSION_EMPTY);
00341 return false;
00342 }
00343 http_msg.set_version_major(0);
00344 http_msg.set_version_minor(0);
00345 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00346 } else if (*m_read_ptr == '\n') {
00347
00348 if (! m_is_request) {
00349 set_error(ec, ERROR_VERSION_EMPTY);
00350 return false;
00351 }
00352 http_msg.set_version_major(0);
00353 http_msg.set_version_minor(0);
00354 m_headers_parse_state = PARSE_EXPECTING_CR;
00355 } else if (*m_read_ptr != 'H') {
00356 set_error(ec, ERROR_VERSION_CHAR);
00357 return false;
00358 }
00359 m_headers_parse_state = PARSE_HTTP_VERSION_T_1;
00360 break;
00361
00362 case PARSE_HTTP_VERSION_T_1:
00363
00364 if (*m_read_ptr != 'T') {
00365 set_error(ec, ERROR_VERSION_CHAR);
00366 return false;
00367 }
00368 m_headers_parse_state = PARSE_HTTP_VERSION_T_2;
00369 break;
00370
00371 case PARSE_HTTP_VERSION_T_2:
00372
00373 if (*m_read_ptr != 'T') {
00374 set_error(ec, ERROR_VERSION_CHAR);
00375 return false;
00376 }
00377 m_headers_parse_state = PARSE_HTTP_VERSION_P;
00378 break;
00379
00380 case PARSE_HTTP_VERSION_P:
00381
00382 if (*m_read_ptr != 'P') {
00383 set_error(ec, ERROR_VERSION_CHAR);
00384 return false;
00385 }
00386 m_headers_parse_state = PARSE_HTTP_VERSION_SLASH;
00387 break;
00388
00389 case PARSE_HTTP_VERSION_SLASH:
00390
00391 if (*m_read_ptr != '/') {
00392 set_error(ec, ERROR_VERSION_CHAR);
00393 return false;
00394 }
00395 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR_START;
00396 break;
00397
00398 case PARSE_HTTP_VERSION_MAJOR_START:
00399
00400 if (!is_digit(*m_read_ptr)) {
00401 set_error(ec, ERROR_VERSION_CHAR);
00402 return false;
00403 }
00404 http_msg.set_version_major(*m_read_ptr - '0');
00405 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR;
00406 break;
00407
00408 case PARSE_HTTP_VERSION_MAJOR:
00409
00410 if (*m_read_ptr == '.') {
00411 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR_START;
00412 } else if (is_digit(*m_read_ptr)) {
00413 http_msg.set_version_major( (http_msg.get_version_major() * 10)
00414 + (*m_read_ptr - '0') );
00415 } else {
00416 set_error(ec, ERROR_VERSION_CHAR);
00417 return false;
00418 }
00419 break;
00420
00421 case PARSE_HTTP_VERSION_MINOR_START:
00422
00423 if (!is_digit(*m_read_ptr)) {
00424 set_error(ec, ERROR_VERSION_CHAR);
00425 return false;
00426 }
00427 http_msg.set_version_minor(*m_read_ptr - '0');
00428 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR;
00429 break;
00430
00431 case PARSE_HTTP_VERSION_MINOR:
00432
00433 if (*m_read_ptr == ' ') {
00434
00435 if (! m_is_request) {
00436 m_headers_parse_state = PARSE_STATUS_CODE_START;
00437 }
00438 } else if (*m_read_ptr == '\r') {
00439
00440 if (! m_is_request) {
00441 set_error(ec, ERROR_STATUS_EMPTY);
00442 return false;
00443 }
00444 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00445 } else if (*m_read_ptr == '\n') {
00446
00447 if (! m_is_request) {
00448 set_error(ec, ERROR_STATUS_EMPTY);
00449 return false;
00450 }
00451 m_headers_parse_state = PARSE_EXPECTING_CR;
00452 } else if (is_digit(*m_read_ptr)) {
00453 http_msg.set_version_minor( (http_msg.get_version_minor() * 10)
00454 + (*m_read_ptr - '0') );
00455 } else {
00456 set_error(ec, ERROR_VERSION_CHAR);
00457 return false;
00458 }
00459 break;
00460
00461 case PARSE_STATUS_CODE_START:
00462
00463 if (!is_digit(*m_read_ptr)) {
00464 set_error(ec, ERROR_STATUS_CHAR);
00465 return false;
00466 }
00467 m_status_code = (*m_read_ptr - '0');
00468 m_headers_parse_state = PARSE_STATUS_CODE;
00469 break;
00470
00471 case PARSE_STATUS_CODE:
00472
00473 if (*m_read_ptr == ' ') {
00474 m_status_message.erase();
00475 m_headers_parse_state = PARSE_STATUS_MESSAGE;
00476 } else if (is_digit(*m_read_ptr)) {
00477 m_status_code = ( (m_status_code * 10) + (*m_read_ptr - '0') );
00478 } else if (*m_read_ptr == '\r') {
00479
00480 m_status_message.erase();
00481 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00482 } else if (*m_read_ptr == '\n') {
00483
00484 m_status_message.erase();
00485 m_headers_parse_state = PARSE_EXPECTING_CR;
00486 } else {
00487 set_error(ec, ERROR_STATUS_CHAR);
00488 return false;
00489 }
00490 break;
00491
00492 case PARSE_STATUS_MESSAGE:
00493
00494 if (*m_read_ptr == '\r') {
00495 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00496 } else if (*m_read_ptr == '\n') {
00497 m_headers_parse_state = PARSE_EXPECTING_CR;
00498 } else if (is_control(*m_read_ptr)) {
00499 set_error(ec, ERROR_STATUS_CHAR);
00500 return false;
00501 } else if (m_status_message.size() >= STATUS_MESSAGE_MAX) {
00502 set_error(ec, ERROR_STATUS_CHAR);
00503 return false;
00504 } else {
00505 m_status_message.push_back(*m_read_ptr);
00506 }
00507 break;
00508
00509 case PARSE_EXPECTING_NEWLINE:
00510
00511 if (*m_read_ptr == '\n') {
00512 m_headers_parse_state = PARSE_HEADER_START;
00513 } else if (*m_read_ptr == '\r') {
00514
00515
00516
00517 ++m_read_ptr;
00518 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00519 m_bytes_total_read += m_bytes_last_read;
00520 return true;
00521 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00522 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00523 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00524 set_error(ec, ERROR_HEADER_CHAR);
00525 return false;
00526 } else {
00527
00528 m_header_name.erase();
00529 m_header_name.push_back(*m_read_ptr);
00530 m_headers_parse_state = PARSE_HEADER_NAME;
00531 }
00532 break;
00533
00534 case PARSE_EXPECTING_CR:
00535
00536 if (*m_read_ptr == '\r') {
00537 m_headers_parse_state = PARSE_HEADER_START;
00538 } else if (*m_read_ptr == '\n') {
00539
00540
00541
00542 ++m_read_ptr;
00543 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00544 m_bytes_total_read += m_bytes_last_read;
00545 return true;
00546 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00547 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00548 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00549 set_error(ec, ERROR_HEADER_CHAR);
00550 return false;
00551 } else {
00552
00553 m_header_name.erase();
00554 m_header_name.push_back(*m_read_ptr);
00555 m_headers_parse_state = PARSE_HEADER_NAME;
00556 }
00557 break;
00558
00559 case PARSE_HEADER_WHITESPACE:
00560
00561 if (*m_read_ptr == '\r') {
00562 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00563 } else if (*m_read_ptr == '\n') {
00564 m_headers_parse_state = PARSE_EXPECTING_CR;
00565 } else if (*m_read_ptr != '\t' && *m_read_ptr != ' ') {
00566 if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00567 set_error(ec, ERROR_HEADER_CHAR);
00568 return false;
00569 }
00570
00571 m_header_name.erase();
00572 m_header_name.push_back(*m_read_ptr);
00573 m_headers_parse_state = PARSE_HEADER_NAME;
00574 }
00575 break;
00576
00577 case PARSE_HEADER_START:
00578
00579 if (*m_read_ptr == '\r') {
00580 m_headers_parse_state = PARSE_EXPECTING_FINAL_NEWLINE;
00581 } else if (*m_read_ptr == '\n') {
00582 m_headers_parse_state = PARSE_EXPECTING_FINAL_CR;
00583 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00584 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00585 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00586 set_error(ec, ERROR_HEADER_CHAR);
00587 return false;
00588 } else {
00589
00590 m_header_name.erase();
00591 m_header_name.push_back(*m_read_ptr);
00592 m_headers_parse_state = PARSE_HEADER_NAME;
00593 }
00594 break;
00595
00596 case PARSE_HEADER_NAME:
00597
00598 if (*m_read_ptr == ':') {
00599 m_header_value.erase();
00600 m_headers_parse_state = PARSE_SPACE_BEFORE_HEADER_VALUE;
00601 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00602 set_error(ec, ERROR_HEADER_CHAR);
00603 return false;
00604 } else if (m_header_name.size() >= HEADER_NAME_MAX) {
00605 set_error(ec, ERROR_HEADER_NAME_SIZE);
00606 return false;
00607 } else {
00608
00609 m_header_name.push_back(*m_read_ptr);
00610 }
00611 break;
00612
00613 case PARSE_SPACE_BEFORE_HEADER_VALUE:
00614
00615 if (*m_read_ptr == ' ') {
00616 m_headers_parse_state = PARSE_HEADER_VALUE;
00617 } else if (*m_read_ptr == '\r') {
00618 http_msg.add_header(m_header_name, m_header_value);
00619 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00620 } else if (*m_read_ptr == '\n') {
00621 http_msg.add_header(m_header_name, m_header_value);
00622 m_headers_parse_state = PARSE_EXPECTING_CR;
00623 } else if (!is_char(*m_read_ptr) || is_control(*m_read_ptr) || is_special(*m_read_ptr)) {
00624 set_error(ec, ERROR_HEADER_CHAR);
00625 return false;
00626 } else {
00627
00628 m_header_value.push_back(*m_read_ptr);
00629 m_headers_parse_state = PARSE_HEADER_VALUE;
00630 }
00631 break;
00632
00633 case PARSE_HEADER_VALUE:
00634
00635 if (*m_read_ptr == '\r') {
00636 http_msg.add_header(m_header_name, m_header_value);
00637 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00638 } else if (*m_read_ptr == '\n') {
00639 http_msg.add_header(m_header_name, m_header_value);
00640 m_headers_parse_state = PARSE_EXPECTING_CR;
00641 } else if (*m_read_ptr != '\t' && is_control(*m_read_ptr)) {
00642
00643
00644
00645
00646
00647
00648 set_error(ec, ERROR_HEADER_CHAR);
00649 return false;
00650 } else if (m_header_value.size() >= HEADER_VALUE_MAX) {
00651 set_error(ec, ERROR_HEADER_VALUE_SIZE);
00652 return false;
00653 } else {
00654
00655 m_header_value.push_back(*m_read_ptr);
00656 }
00657 break;
00658
00659 case PARSE_EXPECTING_FINAL_NEWLINE:
00660 if (*m_read_ptr == '\n') ++m_read_ptr;
00661 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00662 m_bytes_total_read += m_bytes_last_read;
00663 return true;
00664
00665 case PARSE_EXPECTING_FINAL_CR:
00666 if (*m_read_ptr == '\r') ++m_read_ptr;
00667 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00668 m_bytes_total_read += m_bytes_last_read;
00669 return true;
00670 }
00671
00672 ++m_read_ptr;
00673 }
00674
00675 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00676 m_bytes_total_read += m_bytes_last_read;
00677 return boost::indeterminate;
00678 }
00679
00680 void parser::update_message_with_header_data(http::message& http_msg) const
00681 {
00682 if (is_parsing_request()) {
00683
00684
00685
00686 http::request& http_request(dynamic_cast<http::request&>(http_msg));
00687 http_request.set_method(m_method);
00688 http_request.set_resource(m_resource);
00689 http_request.set_query_string(m_query_string);
00690
00691
00692 if (! m_query_string.empty()) {
00693 if (! parse_url_encoded(http_request.get_queries(),
00694 m_query_string.c_str(),
00695 m_query_string.size()))
00696 PION_LOG_WARN(m_logger, "Request query string parsing failed (URI)");
00697 }
00698
00699
00700 std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator>
00701 cookie_pair = http_request.get_headers().equal_range(http::types::HEADER_COOKIE);
00702 for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first;
00703 cookie_iterator != http_request.get_headers().end()
00704 && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00705 {
00706 if (! parse_cookie_header(http_request.get_cookies(),
00707 cookie_iterator->second, false) )
00708 PION_LOG_WARN(m_logger, "Cookie header parsing failed");
00709 }
00710
00711 } else {
00712
00713
00714
00715 http::response& http_response(dynamic_cast<http::response&>(http_msg));
00716 http_response.set_status_code(m_status_code);
00717 http_response.set_status_message(m_status_message);
00718
00719
00720 std::pair<ihash_multimap::const_iterator, ihash_multimap::const_iterator>
00721 cookie_pair = http_response.get_headers().equal_range(http::types::HEADER_SET_COOKIE);
00722 for (ihash_multimap::const_iterator cookie_iterator = cookie_pair.first;
00723 cookie_iterator != http_response.get_headers().end()
00724 && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00725 {
00726 if (! parse_cookie_header(http_response.get_cookies(),
00727 cookie_iterator->second, true) )
00728 PION_LOG_WARN(m_logger, "Set-Cookie header parsing failed");
00729 }
00730
00731 }
00732 }
00733
00734 boost::tribool parser::finish_header_parsing(http::message& http_msg,
00735 boost::system::error_code& ec)
00736 {
00737 boost::tribool rc = boost::indeterminate;
00738
00739 m_bytes_content_remaining = m_bytes_content_read = 0;
00740 http_msg.set_content_length(0);
00741 http_msg.update_transfer_encoding_using_header();
00742 update_message_with_header_data(http_msg);
00743
00744 if (http_msg.is_chunked()) {
00745
00746
00747 m_message_parse_state = PARSE_CHUNKS;
00748
00749
00750 if (m_parse_headers_only)
00751 rc = true;
00752
00753 } else if (http_msg.is_content_length_implied()) {
00754
00755
00756 m_message_parse_state = PARSE_END;
00757 rc = true;
00758
00759 } else {
00760
00761
00762 if (http_msg.has_header(http::types::HEADER_CONTENT_LENGTH)) {
00763
00764
00765 try {
00766 http_msg.update_content_length_using_header();
00767 } catch (...) {
00768 PION_LOG_ERROR(m_logger, "Unable to update content length");
00769 set_error(ec, ERROR_INVALID_CONTENT_LENGTH);
00770 return false;
00771 }
00772
00773
00774 if (http_msg.get_content_length() == 0) {
00775 m_message_parse_state = PARSE_END;
00776 rc = true;
00777 } else {
00778 m_message_parse_state = PARSE_CONTENT;
00779 m_bytes_content_remaining = http_msg.get_content_length();
00780
00781
00782 if (m_bytes_content_remaining > m_max_content_length)
00783 http_msg.set_content_length(m_max_content_length);
00784
00785 if (m_parse_headers_only) {
00786
00787 rc = true;
00788 } else {
00789
00790 http_msg.create_content_buffer();
00791 }
00792 }
00793
00794 } else {
00795
00796
00797
00798
00799 if (! m_is_request) {
00800
00801 http_msg.get_chunk_cache().clear();
00802
00803
00804 m_message_parse_state = PARSE_CONTENT_NO_LENGTH;
00805
00806
00807 if (m_parse_headers_only)
00808 rc = true;
00809 } else {
00810 m_message_parse_state = PARSE_END;
00811 rc = true;
00812 }
00813 }
00814 }
00815
00816 finished_parsing_headers(ec);
00817
00818 return rc;
00819 }
00820
00821 bool parser::parse_uri(const std::string& uri, std::string& proto,
00822 std::string& host, boost::uint16_t& port,
00823 std::string& path, std::string& query)
00824 {
00825 size_t proto_end = uri.find("://");
00826 size_t proto_len = 0;
00827
00828 if(proto_end != std::string::npos) {
00829 proto = uri.substr(0, proto_end);
00830 proto_len = proto_end + 3;
00831 } else {
00832 proto.clear();
00833 }
00834
00835
00836
00837 size_t server_port_end = uri.find('/', proto_len);
00838 if(server_port_end == std::string::npos) {
00839 return false;
00840 }
00841
00842
00843 std::string t;
00844 t = uri.substr(proto_len, server_port_end - proto_len);
00845 size_t port_pos = t.find(':', 0);
00846
00847
00848
00849 host = t.substr(0, port_pos);
00850 if(host.length() == 0) {
00851 return false;
00852 }
00853
00854
00855 if(port_pos != std::string::npos) {
00856 try {
00857 port = boost::lexical_cast<int>(t.substr(port_pos+1));
00858 } catch (boost::bad_lexical_cast &) {
00859 return false;
00860 }
00861 } else if (proto == "http" || proto == "HTTP") {
00862 port = 80;
00863 } else if (proto == "https" || proto == "HTTPS") {
00864 port = 443;
00865 } else {
00866 port = 0;
00867 }
00868
00869
00870 path = uri.substr(server_port_end);
00871
00872
00873 size_t query_pos = path.find('?', 0);
00874
00875 if(query_pos != std::string::npos) {
00876 query = path.substr(query_pos + 1, path.length() - query_pos - 1);
00877 path = path.substr(0, query_pos);
00878 } else {
00879 query.clear();
00880 }
00881
00882 return true;
00883 }
00884
00885 bool parser::parse_url_encoded(ihash_multimap& dict,
00886 const char *ptr, const size_t len)
00887 {
00888
00889 if (ptr == NULL || len == 0)
00890 return true;
00891
00892
00893 enum QueryParseState {
00894 QUERY_PARSE_NAME, QUERY_PARSE_VALUE
00895 } parse_state = QUERY_PARSE_NAME;
00896
00897
00898 const char * const end = ptr + len;
00899 std::string query_name;
00900 std::string query_value;
00901
00902
00903 while (ptr < end) {
00904 switch (parse_state) {
00905
00906 case QUERY_PARSE_NAME:
00907
00908 if (*ptr == '=') {
00909
00910 parse_state = QUERY_PARSE_VALUE;
00911 } else if (*ptr == '&') {
00912
00913 if (! query_name.empty()) {
00914
00915 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00916 query_name.erase();
00917 }
00918 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00919
00920 } else if (is_control(*ptr) || query_name.size() >= QUERY_NAME_MAX) {
00921
00922 return false;
00923 } else {
00924
00925 query_name.push_back(*ptr);
00926 }
00927 break;
00928
00929 case QUERY_PARSE_VALUE:
00930
00931 if (*ptr == '&') {
00932
00933 if (! query_name.empty()) {
00934 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00935 query_name.erase();
00936 }
00937 query_value.erase();
00938 parse_state = QUERY_PARSE_NAME;
00939 } else if (*ptr == ',') {
00940
00941 if (! query_name.empty())
00942 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00943 query_value.erase();
00944 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00945
00946 } else if (is_control(*ptr) || query_value.size() >= QUERY_VALUE_MAX) {
00947
00948 return false;
00949 } else {
00950
00951 query_value.push_back(*ptr);
00952 }
00953 break;
00954 }
00955
00956 ++ptr;
00957 }
00958
00959
00960 if (! query_name.empty())
00961 dict.insert( std::make_pair(algorithm::url_decode(query_name), algorithm::url_decode(query_value)) );
00962
00963 return true;
00964 }
00965
00966 bool parser::parse_multipart_form_data(ihash_multimap& dict,
00967 const std::string& content_type,
00968 const char *ptr, const size_t len)
00969 {
00970
00971 if (ptr == NULL || len == 0)
00972 return true;
00973
00974
00975 std::size_t pos = content_type.find("boundary=");
00976 if (pos == std::string::npos)
00977 return false;
00978 const std::string boundary = std::string("--") + content_type.substr(pos+9);
00979
00980
00981 enum MultiPartParseState {
00982 MP_PARSE_START,
00983 MP_PARSE_HEADER_CR, MP_PARSE_HEADER_LF,
00984 MP_PARSE_HEADER_NAME, MP_PARSE_HEADER_SPACE, MP_PARSE_HEADER_VALUE,
00985 MP_PARSE_HEADER_LAST_LF, MP_PARSE_FIELD_DATA
00986 } parse_state = MP_PARSE_START;
00987
00988
00989 std::string header_name;
00990 std::string header_value;
00991 std::string field_name;
00992 std::string field_value;
00993 bool found_parameter = false;
00994 bool save_current_field = true;
00995 const char * const end_ptr = ptr + len;
00996
00997 ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
00998
00999 while (ptr != NULL && ptr < end_ptr) {
01000 switch (parse_state) {
01001 case MP_PARSE_START:
01002
01003 header_name.clear();
01004 header_value.clear();
01005 field_name.clear();
01006 field_value.clear();
01007 save_current_field = true;
01008 ptr += boundary.size() - 1;
01009 parse_state = MP_PARSE_HEADER_CR;
01010 break;
01011 case MP_PARSE_HEADER_CR:
01012
01013 if (*ptr == '\r') {
01014
01015 parse_state = MP_PARSE_HEADER_LF;
01016 } else if (*ptr == '\n') {
01017
01018 parse_state = MP_PARSE_HEADER_NAME;
01019 } else if (*ptr == '-' && ptr+1 < end_ptr && ptr[1] == '-') {
01020
01021 return true;
01022 } else return false;
01023 break;
01024 case MP_PARSE_HEADER_LF:
01025
01026 if (*ptr == '\n') {
01027
01028 parse_state = MP_PARSE_HEADER_NAME;
01029 } else return false;
01030 break;
01031 case MP_PARSE_HEADER_NAME:
01032
01033 if (*ptr == '\r' || *ptr == '\n') {
01034 if (header_name.empty()) {
01035
01036 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LAST_LF : MP_PARSE_FIELD_DATA);
01037 } else {
01038
01039 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME);
01040 }
01041 } else if (*ptr == ':') {
01042
01043 parse_state = MP_PARSE_HEADER_SPACE;
01044 } else {
01045
01046 header_name += *ptr;
01047 }
01048 break;
01049 case MP_PARSE_HEADER_SPACE:
01050
01051 if (*ptr == '\r') {
01052
01053 parse_state = MP_PARSE_HEADER_LF;
01054 } else if (*ptr == '\n') {
01055
01056 parse_state = MP_PARSE_HEADER_NAME;
01057 } else if (*ptr != ' ') {
01058
01059 header_value += *ptr;
01060 parse_state = MP_PARSE_HEADER_VALUE;
01061 }
01062
01063 break;
01064 case MP_PARSE_HEADER_VALUE:
01065
01066 if (*ptr == '\r' || *ptr == '\n') {
01067
01068 if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_TYPE)) {
01069
01070 save_current_field = boost::algorithm::iequals(header_value.substr(0, 5), "text/");
01071 } else if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_DISPOSITION)) {
01072
01073 std::size_t name_pos = header_value.find("name=\"");
01074 if (name_pos != std::string::npos) {
01075 for (name_pos += 6; name_pos < header_value.size() && header_value[name_pos] != '\"'; ++name_pos) {
01076 field_name += header_value[name_pos];
01077 }
01078 }
01079 }
01080
01081 header_name.clear();
01082 header_value.clear();
01083 parse_state = (*ptr == '\r' ? MP_PARSE_HEADER_LF : MP_PARSE_HEADER_NAME);
01084 } else {
01085
01086 header_value += *ptr;
01087 }
01088 break;
01089 case MP_PARSE_HEADER_LAST_LF:
01090
01091 if (*ptr == '\n') {
01092
01093 if (save_current_field && !field_name.empty()) {
01094
01095 parse_state = MP_PARSE_FIELD_DATA;
01096 } else {
01097
01098 parse_state = MP_PARSE_START;
01099 ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
01100 }
01101 } else return false;
01102 break;
01103 case MP_PARSE_FIELD_DATA:
01104
01105 const char *field_end_ptr = end_ptr;
01106 const char *next_ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
01107 if (next_ptr) {
01108
01109 const char *temp_ptr = next_ptr - 2;
01110 if (temp_ptr[0] == '\r' && temp_ptr[1] == '\n')
01111 field_end_ptr = temp_ptr;
01112 else field_end_ptr = next_ptr;
01113 }
01114 field_value.assign(ptr, field_end_ptr - ptr);
01115
01116 dict.insert( std::make_pair(field_name, field_value) );
01117 found_parameter = true;
01118
01119 parse_state = MP_PARSE_START;
01120 ptr = next_ptr;
01121 break;
01122 }
01123
01124 if (parse_state != MP_PARSE_START)
01125 ++ptr;
01126 }
01127
01128 return found_parameter;
01129 }
01130
01131 bool parser::parse_cookie_header(ihash_multimap& dict,
01132 const char *ptr, const size_t len,
01133 bool set_cookie_header)
01134 {
01135
01136
01137
01138
01139
01140
01141
01142 enum CookieParseState {
01143 COOKIE_PARSE_NAME, COOKIE_PARSE_VALUE, COOKIE_PARSE_IGNORE
01144 } parse_state = COOKIE_PARSE_NAME;
01145
01146
01147 const char * const end = ptr + len;
01148 std::string cookie_name;
01149 std::string cookie_value;
01150 char value_quote_character = '\0';
01151
01152
01153 while (ptr < end) {
01154 switch (parse_state) {
01155
01156 case COOKIE_PARSE_NAME:
01157
01158 if (*ptr == '=') {
01159
01160 value_quote_character = '\0';
01161 parse_state = COOKIE_PARSE_VALUE;
01162 } else if (*ptr == ';' || *ptr == ',') {
01163
01164
01165 if (! cookie_name.empty()) {
01166
01167 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01168 dict.insert( std::make_pair(cookie_name, cookie_value) );
01169 cookie_name.erase();
01170 }
01171 } else if (*ptr != ' ') {
01172
01173 if (is_control(*ptr) || cookie_name.size() >= COOKIE_NAME_MAX)
01174 return false;
01175
01176 cookie_name.push_back(*ptr);
01177 }
01178 break;
01179
01180 case COOKIE_PARSE_VALUE:
01181
01182 if (value_quote_character == '\0') {
01183
01184 if (*ptr == ';' || *ptr == ',') {
01185
01186 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01187 dict.insert( std::make_pair(cookie_name, cookie_value) );
01188 cookie_name.erase();
01189 cookie_value.erase();
01190 parse_state = COOKIE_PARSE_NAME;
01191 } else if (*ptr == '\'' || *ptr == '"') {
01192 if (cookie_value.empty()) {
01193
01194 value_quote_character = *ptr;
01195 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
01196
01197 return false;
01198 } else {
01199
01200 cookie_value.push_back(*ptr);
01201 }
01202 } else if (*ptr != ' ' || !cookie_value.empty()) {
01203
01204 if (is_control(*ptr) || cookie_value.size() >= COOKIE_VALUE_MAX)
01205 return false;
01206
01207 cookie_value.push_back(*ptr);
01208 }
01209 } else {
01210
01211 if (*ptr == value_quote_character) {
01212
01213 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01214 dict.insert( std::make_pair(cookie_name, cookie_value) );
01215 cookie_name.erase();
01216 cookie_value.erase();
01217 parse_state = COOKIE_PARSE_IGNORE;
01218 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
01219
01220 return false;
01221 } else {
01222
01223 cookie_value.push_back(*ptr);
01224 }
01225 }
01226 break;
01227
01228 case COOKIE_PARSE_IGNORE:
01229
01230 if (*ptr == ';' || *ptr == ',')
01231 parse_state = COOKIE_PARSE_NAME;
01232 break;
01233 }
01234
01235 ++ptr;
01236 }
01237
01238
01239 if (! is_cookie_attribute(cookie_name, set_cookie_header))
01240 dict.insert( std::make_pair(cookie_name, cookie_value) );
01241
01242 return true;
01243 }
01244
01245 boost::tribool parser::parse_chunks(http::message::chunk_cache_t& chunks,
01246 boost::system::error_code& ec)
01247 {
01248
01249
01250
01251
01252
01253
01254
01255 const char *read_start_ptr = m_read_ptr;
01256 m_bytes_last_read = 0;
01257 while (m_read_ptr < m_read_end_ptr) {
01258
01259 switch (m_chunked_content_parse_state) {
01260 case PARSE_CHUNK_SIZE_START:
01261
01262 if (is_hex_digit(*m_read_ptr)) {
01263 m_chunk_size_str.erase();
01264 m_chunk_size_str.push_back(*m_read_ptr);
01265 m_chunked_content_parse_state = PARSE_CHUNK_SIZE;
01266 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09' || *m_read_ptr == '\x0D' || *m_read_ptr == '\x0A') {
01267
01268
01269 break;
01270 } else {
01271 set_error(ec, ERROR_CHUNK_CHAR);
01272 return false;
01273 }
01274 break;
01275
01276 case PARSE_CHUNK_SIZE:
01277 if (is_hex_digit(*m_read_ptr)) {
01278 m_chunk_size_str.push_back(*m_read_ptr);
01279 } else if (*m_read_ptr == '\x0D') {
01280 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01281 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01282
01283
01284 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE;
01285 } else if (*m_read_ptr == ';') {
01286
01287
01288 m_chunked_content_parse_state = PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE;
01289 } else {
01290 set_error(ec, ERROR_CHUNK_CHAR);
01291 return false;
01292 }
01293 break;
01294
01295 case PARSE_EXPECTING_IGNORED_TEXT_AFTER_CHUNK_SIZE:
01296 if (*m_read_ptr == '\x0D') {
01297 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01298 }
01299 break;
01300
01301 case PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE:
01302 if (*m_read_ptr == '\x0D') {
01303 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01304 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01305
01306
01307 break;
01308 } else {
01309 set_error(ec, ERROR_CHUNK_CHAR);
01310 return false;
01311 }
01312 break;
01313
01314 case PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE:
01315
01316
01317 if (*m_read_ptr == '\x0A') {
01318 m_bytes_read_in_current_chunk = 0;
01319 m_size_of_current_chunk = strtol(m_chunk_size_str.c_str(), 0, 16);
01320 if (m_size_of_current_chunk == 0) {
01321 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK;
01322 } else {
01323 m_chunked_content_parse_state = PARSE_CHUNK;
01324 }
01325 } else {
01326 set_error(ec, ERROR_CHUNK_CHAR);
01327 return false;
01328 }
01329 break;
01330
01331 case PARSE_CHUNK:
01332 if (m_bytes_read_in_current_chunk < m_size_of_current_chunk) {
01333 if (m_payload_handler) {
01334 const std::size_t bytes_avail = bytes_available();
01335 const std::size_t bytes_in_chunk = m_size_of_current_chunk - m_bytes_read_in_current_chunk;
01336 const std::size_t len = (bytes_in_chunk > bytes_avail) ? bytes_avail : bytes_in_chunk;
01337 m_payload_handler(m_read_ptr, len);
01338 m_bytes_read_in_current_chunk += len;
01339 if (len > 1) m_read_ptr += (len - 1);
01340 } else if (chunks.size() < m_max_content_length) {
01341 chunks.push_back(*m_read_ptr);
01342 m_bytes_read_in_current_chunk++;
01343 }
01344 }
01345 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
01346 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
01347 }
01348 break;
01349
01350 case PARSE_EXPECTING_CR_AFTER_CHUNK:
01351
01352 if (*m_read_ptr == '\x0D') {
01353 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK;
01354 } else {
01355 set_error(ec, ERROR_CHUNK_CHAR);
01356 return false;
01357 }
01358 break;
01359
01360 case PARSE_EXPECTING_LF_AFTER_CHUNK:
01361
01362 if (*m_read_ptr == '\x0A') {
01363 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
01364 } else {
01365 set_error(ec, ERROR_CHUNK_CHAR);
01366 return false;
01367 }
01368 break;
01369
01370 case PARSE_EXPECTING_FINAL_CR_OR_FOOTERS_AFTER_LAST_CHUNK:
01371
01372 if (*m_read_ptr == '\x0D') {
01373 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK;
01374 } else {
01375
01376
01377 m_message_parse_state = PARSE_FOOTERS;
01378 m_headers_parse_state = PARSE_HEADER_START;
01379 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01380 m_bytes_total_read += m_bytes_last_read;
01381 m_bytes_content_read += m_bytes_last_read;
01382 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
01383 return true;
01384 }
01385 break;
01386
01387 case PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK:
01388
01389 if (*m_read_ptr == '\x0A') {
01390 ++m_read_ptr;
01391 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01392 m_bytes_total_read += m_bytes_last_read;
01393 m_bytes_content_read += m_bytes_last_read;
01394 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
01395 return true;
01396 } else {
01397 set_error(ec, ERROR_CHUNK_CHAR);
01398 return false;
01399 }
01400 }
01401
01402 ++m_read_ptr;
01403 }
01404
01405 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01406 m_bytes_total_read += m_bytes_last_read;
01407 m_bytes_content_read += m_bytes_last_read;
01408 return boost::indeterminate;
01409 }
01410
01411 boost::tribool parser::consume_content(http::message& http_msg,
01412 boost::system::error_code& ec)
01413 {
01414 size_t content_bytes_to_read;
01415 size_t content_bytes_available = bytes_available();
01416 boost::tribool rc = boost::indeterminate;
01417
01418 if (m_bytes_content_remaining == 0) {
01419
01420 return true;
01421 } else {
01422 if (content_bytes_available >= m_bytes_content_remaining) {
01423
01424 rc = true;
01425 content_bytes_to_read = m_bytes_content_remaining;
01426 } else {
01427
01428 content_bytes_to_read = content_bytes_available;
01429 }
01430 m_bytes_content_remaining -= content_bytes_to_read;
01431 }
01432
01433
01434 if (m_payload_handler) {
01435 m_payload_handler(m_read_ptr, content_bytes_to_read);
01436 } else if (m_bytes_content_read < m_max_content_length) {
01437 if (m_bytes_content_read + content_bytes_to_read > m_max_content_length) {
01438
01439
01440 memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr,
01441 m_max_content_length - m_bytes_content_read);
01442 } else {
01443
01444 memcpy(http_msg.get_content() + m_bytes_content_read, m_read_ptr, content_bytes_to_read);
01445 }
01446 }
01447
01448 m_read_ptr += content_bytes_to_read;
01449 m_bytes_content_read += content_bytes_to_read;
01450 m_bytes_total_read += content_bytes_to_read;
01451 m_bytes_last_read = content_bytes_to_read;
01452
01453 return rc;
01454 }
01455
01456 std::size_t parser::consume_content_as_next_chunk(http::message::chunk_cache_t& chunks)
01457 {
01458 if (bytes_available() == 0) {
01459 m_bytes_last_read = 0;
01460 } else {
01461
01462 m_bytes_last_read = (m_read_end_ptr - m_read_ptr);
01463 if (m_payload_handler) {
01464 m_payload_handler(m_read_ptr, m_bytes_last_read);
01465 m_read_ptr += m_bytes_last_read;
01466 } else {
01467 while (m_read_ptr < m_read_end_ptr) {
01468 if (chunks.size() < m_max_content_length)
01469 chunks.push_back(*m_read_ptr);
01470 ++m_read_ptr;
01471 }
01472 }
01473 m_bytes_total_read += m_bytes_last_read;
01474 m_bytes_content_read += m_bytes_last_read;
01475 }
01476 return m_bytes_last_read;
01477 }
01478
01479 void parser::finish(http::message& http_msg) const
01480 {
01481 switch (m_message_parse_state) {
01482 case PARSE_START:
01483 http_msg.set_is_valid(false);
01484 http_msg.set_content_length(0);
01485 http_msg.create_content_buffer();
01486 return;
01487 case PARSE_END:
01488 http_msg.set_is_valid(true);
01489 break;
01490 case PARSE_HEADERS:
01491 case PARSE_FOOTERS:
01492 http_msg.set_is_valid(false);
01493 update_message_with_header_data(http_msg);
01494 http_msg.set_content_length(0);
01495 http_msg.create_content_buffer();
01496 break;
01497 case PARSE_CONTENT:
01498 http_msg.set_is_valid(false);
01499 if (get_content_bytes_read() < m_max_content_length)
01500 http_msg.set_content_length(get_content_bytes_read());
01501 break;
01502 case PARSE_CHUNKS:
01503 http_msg.set_is_valid(m_chunked_content_parse_state==PARSE_CHUNK_SIZE_START);
01504 if (!m_payload_handler)
01505 http_msg.concatenate_chunks();
01506 break;
01507 case PARSE_CONTENT_NO_LENGTH:
01508 http_msg.set_is_valid(true);
01509 if (!m_payload_handler)
01510 http_msg.concatenate_chunks();
01511 break;
01512 }
01513
01514 compute_msg_status(http_msg, http_msg.is_valid());
01515
01516 if (is_parsing_request() && !m_payload_handler && !m_parse_headers_only) {
01517
01518
01519
01520 http::request& http_request(dynamic_cast<http::request&>(http_msg));
01521 const std::string& content_type_header = http_request.get_header(http::types::HEADER_CONTENT_TYPE);
01522 if (content_type_header.compare(0, http::types::CONTENT_TYPE_URLENCODED.length(),
01523 http::types::CONTENT_TYPE_URLENCODED) == 0)
01524 {
01525 if (! parse_url_encoded(http_request.get_queries(),
01526 http_request.get_content(),
01527 http_request.get_content_length()))
01528 PION_LOG_WARN(m_logger, "Request form data parsing failed (POST urlencoded)");
01529 } else if (content_type_header.compare(0, http::types::CONTENT_TYPE_MULTIPART_FORM_DATA.length(),
01530 http::types::CONTENT_TYPE_MULTIPART_FORM_DATA) == 0)
01531 {
01532 if (! parse_multipart_form_data(http_request.get_queries(),
01533 content_type_header,
01534 http_request.get_content(),
01535 http_request.get_content_length()))
01536 PION_LOG_WARN(m_logger, "Request form data parsing failed (POST multipart)");
01537 }
01538 }
01539 }
01540
01541 void parser::compute_msg_status(http::message& http_msg, bool msg_parsed_ok )
01542 {
01543 http::message::data_status_t st = http::message::STATUS_NONE;
01544
01545 if(http_msg.has_missing_packets()) {
01546 st = http_msg.has_data_after_missing_packets() ?
01547 http::message::STATUS_PARTIAL : http::message::STATUS_TRUNCATED;
01548 } else {
01549 st = msg_parsed_ok ? http::message::STATUS_OK : http::message::STATUS_TRUNCATED;
01550 }
01551
01552 http_msg.set_status(st);
01553 }
01554
01555 void parser::create_error_category(void)
01556 {
01557 static error_category_t UNIQUE_ERROR_CATEGORY;
01558 m_error_category_ptr = &UNIQUE_ERROR_CATEGORY;
01559 }
01560
01561 bool parser::parse_forwarded_for(const std::string& header, std::string& public_ip)
01562 {
01563
01564 static const boost::regex IPV4_ADDR_RX("[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}");
01565
01571 static const boost::regex PRIVATE_NET_RX("(10\\.[0-9]{1,3}|127\\.[0-9]{1,3}|192\\.168|172\\.1[6-9]|172\\.2[0-9]|172\\.3[0-1])\\.[0-9]{1,3}\\.[0-9]{1,3}");
01572
01573
01574 if (header.empty())
01575 return false;
01576
01577
01578 boost::match_results<std::string::const_iterator> m;
01579 std::string::const_iterator start_it = header.begin();
01580
01581
01582 while (boost::regex_search(start_it, header.end(), m, IPV4_ADDR_RX)) {
01583
01584 std::string ip_str(m[0].first, m[0].second);
01585
01586 if (! boost::regex_match(ip_str, PRIVATE_NET_RX) ) {
01587
01588 public_ip = ip_str;
01589 return true;
01590 }
01591
01592 start_it = m[0].second;
01593 }
01594
01595
01596 return false;
01597 }
01598
01599 }
01600 }