Source code

Revision control

Other Tools

1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* This Source Code Form is subject to the terms of the Mozilla Public
3
* License, v. 2.0. If a copy of the MPL was not distributed with this
4
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6
#include <string.h>
7
8
#include "mozilla/RangedPtr.h"
9
#include "mozilla/TextUtils.h"
10
11
#include "nsCRTGlue.h"
12
#include "nsURLParsers.h"
13
#include "nsURLHelper.h"
14
#include "nsString.h"
15
16
using namespace mozilla;
17
18
//----------------------------------------------------------------------------
19
20
static uint32_t CountConsecutiveSlashes(const char* str, int32_t len) {
21
RangedPtr<const char> p(str, len);
22
uint32_t count = 0;
23
while (len-- && *p++ == '/') ++count;
24
return count;
25
}
26
27
//----------------------------------------------------------------------------
28
// nsBaseURLParser implementation
29
//----------------------------------------------------------------------------
30
31
NS_IMPL_ISUPPORTS(nsAuthURLParser, nsIURLParser)
32
NS_IMPL_ISUPPORTS(nsNoAuthURLParser, nsIURLParser)
33
34
#define SET_RESULT(component, pos, len) \
35
PR_BEGIN_MACRO \
36
if (component##Pos) *component##Pos = uint32_t(pos); \
37
if (component##Len) *component##Len = int32_t(len); \
38
PR_END_MACRO
39
40
#define OFFSET_RESULT(component, offset) \
41
PR_BEGIN_MACRO \
42
if (component##Pos) *component##Pos += offset; \
43
PR_END_MACRO
44
45
NS_IMETHODIMP
46
nsBaseURLParser::ParseURL(const char* spec, int32_t specLen,
47
uint32_t* schemePos, int32_t* schemeLen,
48
uint32_t* authorityPos, int32_t* authorityLen,
49
uint32_t* pathPos, int32_t* pathLen) {
50
if (NS_WARN_IF(!spec)) {
51
return NS_ERROR_INVALID_POINTER;
52
}
53
54
if (specLen < 0) specLen = strlen(spec);
55
56
const char* stop = nullptr;
57
const char* colon = nullptr;
58
const char* slash = nullptr;
59
const char* p = spec;
60
uint32_t offset = 0;
61
int32_t len = specLen;
62
63
// skip leading whitespace
64
while (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') {
65
spec++;
66
specLen--;
67
offset++;
68
69
p++;
70
len--;
71
}
72
73
for (; len && *p && !colon && !slash; ++p, --len) {
74
switch (*p) {
75
case ':':
76
if (!colon) colon = p;
77
break;
78
case '/': // start of filepath
79
case '?': // start of query
80
case '#': // start of ref
81
if (!slash) slash = p;
82
break;
83
case '@': // username@hostname
84
case '[': // start of IPv6 address literal
85
if (!stop) stop = p;
86
break;
87
}
88
}
89
// disregard the first colon if it follows an '@' or a '['
90
if (colon && stop && colon > stop) colon = nullptr;
91
92
// if the spec only contained whitespace ...
93
if (specLen == 0) {
94
SET_RESULT(scheme, 0, -1);
95
SET_RESULT(authority, 0, 0);
96
SET_RESULT(path, 0, 0);
97
return NS_OK;
98
}
99
100
// ignore trailing whitespace and control characters
101
for (p = spec + specLen - 1; ((unsigned char)*p <= ' ') && (p != spec); --p)
102
;
103
104
specLen = p - spec + 1;
105
106
if (colon && (colon < slash || !slash)) {
107
//
108
// spec = <scheme>:/<the-rest>
109
//
110
// or
111
//
112
// spec = <scheme>:<authority>
113
// spec = <scheme>:<path-no-slashes>
114
//
115
if (!net_IsValidScheme(spec, colon - spec) || (*(colon + 1) == ':')) {
116
return NS_ERROR_MALFORMED_URI;
117
}
118
SET_RESULT(scheme, offset, colon - spec);
119
if (authorityLen || pathLen) {
120
uint32_t schemeLen = colon + 1 - spec;
121
offset += schemeLen;
122
ParseAfterScheme(colon + 1, specLen - schemeLen, authorityPos,
123
authorityLen, pathPos, pathLen);
124
OFFSET_RESULT(authority, offset);
125
OFFSET_RESULT(path, offset);
126
}
127
} else {
128
//
129
// spec = <authority-no-port-or-password>/<path>
130
// spec = <path>
131
//
132
// or
133
//
134
// spec = <authority-no-port-or-password>/<path-with-colon>
135
// spec = <path-with-colon>
136
//
137
// or
138
//
139
// spec = <authority-no-port-or-password>
140
// spec = <path-no-slashes-or-colon>
141
//
142
SET_RESULT(scheme, 0, -1);
143
if (authorityLen || pathLen) {
144
ParseAfterScheme(spec, specLen, authorityPos, authorityLen, pathPos,
145
pathLen);
146
OFFSET_RESULT(authority, offset);
147
OFFSET_RESULT(path, offset);
148
}
149
}
150
return NS_OK;
151
}
152
153
NS_IMETHODIMP
154
nsBaseURLParser::ParseAuthority(const char* auth, int32_t authLen,
155
uint32_t* usernamePos, int32_t* usernameLen,
156
uint32_t* passwordPos, int32_t* passwordLen,
157
uint32_t* hostnamePos, int32_t* hostnameLen,
158
int32_t* port) {
159
if (NS_WARN_IF(!auth)) {
160
return NS_ERROR_INVALID_POINTER;
161
}
162
163
if (authLen < 0) authLen = strlen(auth);
164
165
SET_RESULT(username, 0, -1);
166
SET_RESULT(password, 0, -1);
167
SET_RESULT(hostname, 0, authLen);
168
if (port) *port = -1;
169
return NS_OK;
170
}
171
172
NS_IMETHODIMP
173
nsBaseURLParser::ParseUserInfo(const char* userinfo, int32_t userinfoLen,
174
uint32_t* usernamePos, int32_t* usernameLen,
175
uint32_t* passwordPos, int32_t* passwordLen) {
176
SET_RESULT(username, 0, -1);
177
SET_RESULT(password, 0, -1);
178
return NS_OK;
179
}
180
181
NS_IMETHODIMP
182
nsBaseURLParser::ParseServerInfo(const char* serverinfo, int32_t serverinfoLen,
183
uint32_t* hostnamePos, int32_t* hostnameLen,
184
int32_t* port) {
185
SET_RESULT(hostname, 0, -1);
186
if (port) *port = -1;
187
return NS_OK;
188
}
189
190
NS_IMETHODIMP
191
nsBaseURLParser::ParsePath(const char* path, int32_t pathLen,
192
uint32_t* filepathPos, int32_t* filepathLen,
193
uint32_t* queryPos, int32_t* queryLen,
194
uint32_t* refPos, int32_t* refLen) {
195
if (NS_WARN_IF(!path)) {
196
return NS_ERROR_INVALID_POINTER;
197
}
198
199
if (pathLen < 0) pathLen = strlen(path);
200
201
// path = [/]<segment1>/<segment2>/<...>/<segmentN>?<query>#<ref>
202
203
// XXX PL_strnpbrk would be nice, but it's buggy
204
205
// search for first occurrence of either ? or #
206
const char *query_beg = nullptr, *query_end = nullptr;
207
const char* ref_beg = nullptr;
208
const char* p = nullptr;
209
for (p = path; p < path + pathLen; ++p) {
210
// only match the query string if it precedes the reference fragment
211
if (!ref_beg && !query_beg && *p == '?')
212
query_beg = p + 1;
213
else if (*p == '#') {
214
ref_beg = p + 1;
215
if (query_beg) query_end = p;
216
break;
217
}
218
}
219
220
if (query_beg) {
221
if (query_end)
222
SET_RESULT(query, query_beg - path, query_end - query_beg);
223
else
224
SET_RESULT(query, query_beg - path, pathLen - (query_beg - path));
225
} else
226
SET_RESULT(query, 0, -1);
227
228
if (ref_beg)
229
SET_RESULT(ref, ref_beg - path, pathLen - (ref_beg - path));
230
else
231
SET_RESULT(ref, 0, -1);
232
233
const char* end;
234
if (query_beg)
235
end = query_beg - 1;
236
else if (ref_beg)
237
end = ref_beg - 1;
238
else
239
end = path + pathLen;
240
241
// an empty file path is no file path
242
if (end != path)
243
SET_RESULT(filepath, 0, end - path);
244
else
245
SET_RESULT(filepath, 0, -1);
246
return NS_OK;
247
}
248
249
NS_IMETHODIMP
250
nsBaseURLParser::ParseFilePath(const char* filepath, int32_t filepathLen,
251
uint32_t* directoryPos, int32_t* directoryLen,
252
uint32_t* basenamePos, int32_t* basenameLen,
253
uint32_t* extensionPos, int32_t* extensionLen) {
254
if (NS_WARN_IF(!filepath)) {
255
return NS_ERROR_INVALID_POINTER;
256
}
257
258
if (filepathLen < 0) filepathLen = strlen(filepath);
259
260
if (filepathLen == 0) {
261
SET_RESULT(directory, 0, -1);
262
SET_RESULT(basename, 0, 0); // assume a zero length file basename
263
SET_RESULT(extension, 0, -1);
264
return NS_OK;
265
}
266
267
const char* p;
268
const char* end = filepath + filepathLen;
269
270
// search backwards for filename
271
for (p = end - 1; *p != '/' && p > filepath; --p)
272
;
273
if (*p == '/') {
274
// catch /.. and /.
275
if ((p + 1 < end && *(p + 1) == '.') &&
276
(p + 2 == end || (*(p + 2) == '.' && p + 3 == end)))
277
p = end - 1;
278
// filepath = <directory><filename>.<extension>
279
SET_RESULT(directory, 0, p - filepath + 1);
280
ParseFileName(p + 1, end - (p + 1), basenamePos, basenameLen, extensionPos,
281
extensionLen);
282
OFFSET_RESULT(basename, p + 1 - filepath);
283
OFFSET_RESULT(extension, p + 1 - filepath);
284
} else {
285
// filepath = <filename>.<extension>
286
SET_RESULT(directory, 0, -1);
287
ParseFileName(filepath, filepathLen, basenamePos, basenameLen, extensionPos,
288
extensionLen);
289
}
290
return NS_OK;
291
}
292
293
nsresult nsBaseURLParser::ParseFileName(
294
const char* filename, int32_t filenameLen, uint32_t* basenamePos,
295
int32_t* basenameLen, uint32_t* extensionPos, int32_t* extensionLen) {
296
if (NS_WARN_IF(!filename)) {
297
return NS_ERROR_INVALID_POINTER;
298
}
299
300
if (filenameLen < 0) filenameLen = strlen(filename);
301
302
// no extension if filename ends with a '.'
303
if (filename[filenameLen - 1] != '.') {
304
// ignore '.' at the beginning
305
for (const char* p = filename + filenameLen - 1; p > filename; --p) {
306
if (*p == '.') {
307
// filename = <basename.extension>
308
SET_RESULT(basename, 0, p - filename);
309
SET_RESULT(extension, p + 1 - filename,
310
filenameLen - (p - filename + 1));
311
return NS_OK;
312
}
313
}
314
}
315
// filename = <basename>
316
SET_RESULT(basename, 0, filenameLen);
317
SET_RESULT(extension, 0, -1);
318
return NS_OK;
319
}
320
321
//----------------------------------------------------------------------------
322
// nsNoAuthURLParser implementation
323
//----------------------------------------------------------------------------
324
325
NS_IMETHODIMP
326
nsNoAuthURLParser::ParseAuthority(const char* auth, int32_t authLen,
327
uint32_t* usernamePos, int32_t* usernameLen,
328
uint32_t* passwordPos, int32_t* passwordLen,
329
uint32_t* hostnamePos, int32_t* hostnameLen,
330
int32_t* port) {
331
MOZ_ASSERT_UNREACHABLE("Shouldn't parse auth in a NoAuthURL!");
332
return NS_ERROR_UNEXPECTED;
333
}
334
335
void nsNoAuthURLParser::ParseAfterScheme(const char* spec, int32_t specLen,
336
uint32_t* authPos, int32_t* authLen,
337
uint32_t* pathPos, int32_t* pathLen) {
338
MOZ_ASSERT(specLen >= 0, "unexpected");
339
340
// everything is the path
341
uint32_t pos = 0;
342
switch (CountConsecutiveSlashes(spec, specLen)) {
343
case 0:
344
case 1:
345
break;
346
case 2: {
347
const char* p = nullptr;
348
if (specLen > 2) {
349
// looks like there is an authority section
350
351
// if the authority looks like a drive number then we
352
// really want to treat it as part of the path
353
// [a-zA-Z][:|]{/\}
354
// i.e one of: c: c:\foo c:/foo c| c|\foo c|/foo
355
if ((specLen > 3) && (spec[3] == ':' || spec[3] == '|') &&
356
IsAsciiAlpha(spec[2]) &&
357
((specLen == 4) || (spec[4] == '/') || (spec[4] == '\\'))) {
358
pos = 1;
359
break;
360
}
361
// Ignore apparent authority; path is everything after it
362
for (p = spec + 2; p < spec + specLen; ++p) {
363
if (*p == '/' || *p == '?' || *p == '#') break;
364
}
365
}
366
SET_RESULT(auth, 0, -1);
367
if (p && p != spec + specLen)
368
SET_RESULT(path, p - spec, specLen - (p - spec));
369
else
370
SET_RESULT(path, 0, -1);
371
return;
372
}
373
default:
374
pos = 2;
375
break;
376
}
377
SET_RESULT(auth, pos, 0);
378
SET_RESULT(path, pos, specLen - pos);
379
}
380
381
#if defined(XP_WIN)
382
NS_IMETHODIMP
383
nsNoAuthURLParser::ParseFilePath(const char* filepath, int32_t filepathLen,
384
uint32_t* directoryPos, int32_t* directoryLen,
385
uint32_t* basenamePos, int32_t* basenameLen,
386
uint32_t* extensionPos,
387
int32_t* extensionLen) {
388
if (NS_WARN_IF(!filepath)) {
389
return NS_ERROR_INVALID_POINTER;
390
}
391
392
if (filepathLen < 0) filepathLen = strlen(filepath);
393
394
// look for a filepath consisting of only a drive number, which may or
395
// may not have a leading slash.
396
if (filepathLen > 1 && filepathLen < 4) {
397
const char* end = filepath + filepathLen;
398
const char* p = filepath;
399
if (*p == '/') p++;
400
if ((end - p == 2) && (p[1] == ':' || p[1] == '|') && IsAsciiAlpha(*p)) {
401
// filepath = <drive-number>:
402
SET_RESULT(directory, 0, filepathLen);
403
SET_RESULT(basename, 0, -1);
404
SET_RESULT(extension, 0, -1);
405
return NS_OK;
406
}
407
}
408
409
// otherwise fallback on common implementation
410
return nsBaseURLParser::ParseFilePath(filepath, filepathLen, directoryPos,
411
directoryLen, basenamePos, basenameLen,
412
extensionPos, extensionLen);
413
}
414
#endif
415
416
//----------------------------------------------------------------------------
417
// nsAuthURLParser implementation
418
//----------------------------------------------------------------------------
419
420
NS_IMETHODIMP
421
nsAuthURLParser::ParseAuthority(const char* auth, int32_t authLen,
422
uint32_t* usernamePos, int32_t* usernameLen,
423
uint32_t* passwordPos, int32_t* passwordLen,
424
uint32_t* hostnamePos, int32_t* hostnameLen,
425
int32_t* port) {
426
nsresult rv;
427
428
if (NS_WARN_IF(!auth)) {
429
return NS_ERROR_INVALID_POINTER;
430
}
431
432
if (authLen < 0) authLen = strlen(auth);
433
434
if (authLen == 0) {
435
SET_RESULT(username, 0, -1);
436
SET_RESULT(password, 0, -1);
437
SET_RESULT(hostname, 0, 0);
438
if (port) *port = -1;
439
return NS_OK;
440
}
441
442
// search backwards for @
443
const char* p = auth + authLen - 1;
444
for (; (*p != '@') && (p > auth); --p) {
445
}
446
if (*p == '@') {
447
// auth = <user-info@server-info>
448
rv = ParseUserInfo(auth, p - auth, usernamePos, usernameLen, passwordPos,
449
passwordLen);
450
if (NS_FAILED(rv)) return rv;
451
rv = ParseServerInfo(p + 1, authLen - (p - auth + 1), hostnamePos,
452
hostnameLen, port);
453
if (NS_FAILED(rv)) return rv;
454
OFFSET_RESULT(hostname, p + 1 - auth);
455
456
// malformed if has a username or password
457
// but no host info, such as: http://u:p@/
458
if ((usernamePos || passwordPos) && (!hostnamePos || !*hostnameLen)) {
459
return NS_ERROR_MALFORMED_URI;
460
}
461
} else {
462
// auth = <server-info>
463
SET_RESULT(username, 0, -1);
464
SET_RESULT(password, 0, -1);
465
rv = ParseServerInfo(auth, authLen, hostnamePos, hostnameLen, port);
466
if (NS_FAILED(rv)) return rv;
467
}
468
return NS_OK;
469
}
470
471
NS_IMETHODIMP
472
nsAuthURLParser::ParseUserInfo(const char* userinfo, int32_t userinfoLen,
473
uint32_t* usernamePos, int32_t* usernameLen,
474
uint32_t* passwordPos, int32_t* passwordLen) {
475
if (NS_WARN_IF(!userinfo)) {
476
return NS_ERROR_INVALID_POINTER;
477
}
478
479
if (userinfoLen < 0) userinfoLen = strlen(userinfo);
480
481
if (userinfoLen == 0) {
482
SET_RESULT(username, 0, -1);
483
SET_RESULT(password, 0, -1);
484
return NS_OK;
485
}
486
487
const char* p = (const char*)memchr(userinfo, ':', userinfoLen);
488
if (p) {
489
// userinfo = <username:password>
490
SET_RESULT(username, 0, p - userinfo);
491
SET_RESULT(password, p - userinfo + 1, userinfoLen - (p - userinfo + 1));
492
} else {
493
// userinfo = <username>
494
SET_RESULT(username, 0, userinfoLen);
495
SET_RESULT(password, 0, -1);
496
}
497
return NS_OK;
498
}
499
500
NS_IMETHODIMP
501
nsAuthURLParser::ParseServerInfo(const char* serverinfo, int32_t serverinfoLen,
502
uint32_t* hostnamePos, int32_t* hostnameLen,
503
int32_t* port) {
504
if (NS_WARN_IF(!serverinfo)) {
505
return NS_ERROR_INVALID_POINTER;
506
}
507
508
if (serverinfoLen < 0) serverinfoLen = strlen(serverinfo);
509
510
if (serverinfoLen == 0) {
511
SET_RESULT(hostname, 0, 0);
512
if (port) *port = -1;
513
return NS_OK;
514
}
515
516
// search backwards for a ':' but stop on ']' (IPv6 address literal
517
// delimiter). check for illegal characters in the hostname.
518
const char* p = serverinfo + serverinfoLen - 1;
519
const char *colon = nullptr, *bracket = nullptr;
520
for (; p > serverinfo; --p) {
521
switch (*p) {
522
case ']':
523
bracket = p;
524
break;
525
case ':':
526
if (bracket == nullptr) colon = p;
527
break;
528
case ' ':
529
// hostname must not contain a space
530
return NS_ERROR_MALFORMED_URI;
531
}
532
}
533
534
if (colon) {
535
// serverinfo = <hostname:port>
536
SET_RESULT(hostname, 0, colon - serverinfo);
537
if (port) {
538
// XXX unfortunately ToInteger is not defined for substrings
539
nsAutoCString buf(colon + 1, serverinfoLen - (colon + 1 - serverinfo));
540
if (buf.Length() == 0) {
541
*port = -1;
542
} else {
543
const char* nondigit = NS_strspnp("0123456789", buf.get());
544
if (nondigit && *nondigit) return NS_ERROR_MALFORMED_URI;
545
546
nsresult err;
547
*port = buf.ToInteger(&err);
548
if (NS_FAILED(err) || *port < 0 ||
549
*port > std::numeric_limits<uint16_t>::max())
550
return NS_ERROR_MALFORMED_URI;
551
}
552
}
553
} else {
554
// serverinfo = <hostname>
555
SET_RESULT(hostname, 0, serverinfoLen);
556
if (port) *port = -1;
557
}
558
559
// In case of IPv6 address check its validity
560
if (*hostnameLen > 1 && *(serverinfo + *hostnamePos) == '[' &&
561
*(serverinfo + *hostnamePos + *hostnameLen - 1) == ']' &&
562
!net_IsValidIPv6Addr(
563
Substring(serverinfo + *hostnamePos + 1, *hostnameLen - 2)))
564
return NS_ERROR_MALFORMED_URI;
565
566
return NS_OK;
567
}
568
569
void nsAuthURLParser::ParseAfterScheme(const char* spec, int32_t specLen,
570
uint32_t* authPos, int32_t* authLen,
571
uint32_t* pathPos, int32_t* pathLen) {
572
MOZ_ASSERT(specLen >= 0, "unexpected");
573
574
uint32_t nslash = CountConsecutiveSlashes(spec, specLen);
575
576
// search for the end of the authority section
577
const char* end = spec + specLen;
578
const char* p;
579
for (p = spec + nslash; p < end; ++p) {
580
if (*p == '/' || *p == '?' || *p == '#') break;
581
}
582
if (p < end) {
583
// spec = [/]<auth><path>
584
SET_RESULT(auth, nslash, p - (spec + nslash));
585
SET_RESULT(path, p - spec, specLen - (p - spec));
586
} else {
587
// spec = [/]<auth>
588
SET_RESULT(auth, nslash, specLen - nslash);
589
SET_RESULT(path, 0, -1);
590
}
591
}
592
593
//----------------------------------------------------------------------------
594
// nsStdURLParser implementation
595
//----------------------------------------------------------------------------
596
597
void nsStdURLParser::ParseAfterScheme(const char* spec, int32_t specLen,
598
uint32_t* authPos, int32_t* authLen,
599
uint32_t* pathPos, int32_t* pathLen) {
600
MOZ_ASSERT(specLen >= 0, "unexpected");
601
602
uint32_t nslash = CountConsecutiveSlashes(spec, specLen);
603
604
// search for the end of the authority section
605
const char* end = spec + specLen;
606
const char* p;
607
for (p = spec + nslash; p < end; ++p) {
608
if (strchr("/?#;", *p)) break;
609
}
610
switch (nslash) {
611
case 0:
612
case 2:
613
if (p < end) {
614
// spec = (//)<auth><path>
615
SET_RESULT(auth, nslash, p - (spec + nslash));
616
SET_RESULT(path, p - spec, specLen - (p - spec));
617
} else {
618
// spec = (//)<auth>
619
SET_RESULT(auth, nslash, specLen - nslash);
620
SET_RESULT(path, 0, -1);
621
}
622
break;
623
case 1:
624
// spec = /<path>
625
SET_RESULT(auth, 0, -1);
626
SET_RESULT(path, 0, specLen);
627
break;
628
default:
629
// spec = ///[/]<path>
630
SET_RESULT(auth, 2, 0);
631
SET_RESULT(path, 2, specLen - 2);
632
}
633
}