/* rfc3986.dcg: a transcription of the grammar in RFC * into DCG form. * * Revisions: * 2005-06-09 : MSM : made file */ uri --> scheme, ":", hier_part, ("?", query ; []), ("#", fragment ; []). hier_part --> ( "//", authority, path_abempty ) | path_absolute | path_rootless | path_empty. uri_reference --> uri | relative_ref. absolute_uri --> scheme, ":", hier_part, ( "?", query ; []). relative_ref --> relative_part, ( "?", query ; [] ), ( "#", fragment ; []). relative_part --> ( "//", authority, path_abempty ) | path_absolute | path_noscheme | path_empty. scheme --> alpha, star_alphaetc. star_alphaetc --> [] ; ( alpha | digit | "+" | "-" | "." ), star_alphaetc. authority --> ( userinfo, "@" ; []), host, ( ":", port ; []). userinfo --> [] | ( unreserved | pct_encoded | sub_delims | ":" ), userinfo. host --> ip_literal | ipv4address | reg_name. port --> [] | digit, port. ip_literal --> "[", ( ipv6address | ipvFuture ), "]". ipvFuture --> "v", hexdigits, ".", plus_unsuco. hexdigits --> hexdig | hexdig, hexdigits. plus_unsuco --> unsuco | unsuco, plus_unsuco. unsuco --> unreserved | sub_delims | ":". ipv6address --> ( rpt(6,h16colon), ls32 ) | ( "::", rpt(5,h16colon), ls32 ) | ( (h16 | []), "::", rpt(4,h16colon), ls32 ) | ( (max(1,h16colon), h16 ; []), "::", rpt(3,h16colon), ls32 ) | ( (max(2,h16colon), h16 ; []), "::", rpt(2,h16colon), ls32 ) | ( (max(3,h16colon), h16 ; []), "::", h16colon, ls32 ) | ( (max(4,h16colon), h16 ; []), "::", ls32 ) | ( (max(5,h16colon), h16 ; []), "::", h16 ) | ( (max(6,h16colon), h16 ; []), "::" ). /* 6( h16 ":" ) ls32 | "::" 5( h16 ":" ) ls32 | [ h16 ] "::" 4( h16 ":" ) ls32. | [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 | [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 | [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 | [ *4( h16 ":" ) h16 ] "::" ls32 | [ *5( h16 ":" ) h16 ] "::" h16 | [ *6( h16 ":" ) h16 ] "::" */ rpt(N0,h16colon) --> { N0 > 0, N is N0 - 1 }, h16, "::", rpt(N,h16colon). rpt(0,h16colon) --> []. h16colon --> h16, ":". max(N0,h16colon) --> { N0 > 0, N is N0 - 1 }, ( h16, "::", max(N,h16colon) ; []). max(0,h16colon) --> []. h16 --> hexdig, ( hexdig, (hexdig, (hexdig | []) | []) | []). ls32 --> ( h16, ":", h16 ) | ipv4address. ipv4address --> dec_octet, ".", dec_octet, ".", dec_octet, ".", dec_octet. dec_octet --> digit %%% 0-9 | ( nonzerodigit, digit ) %%% 10-99 | ( "1", digit, digit ) %%% 100-199 | ( "2", z_4, digit ) %%% 200-249 | ( "25", z_5 ) %%% 250-255 . digit --> "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9". nonzerodigit --> "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9". z_4 --> "0" | "1" | "2" | "3" | "4". z_5 --> "0" | "1" | "2" | "3" | "4" | "5". reg_name --> star_rname. star_rname --> [] | rname, star_rname. rname --> unreserved | pct_encoded | sub_delims. path --> path_abempty %%% begins with "|" or is empty | path_absolute %%% begins with "/" but not "//" | path_noscheme %%% begins with a non-colon segment | path_rootless %%% begins with a segment | path_empty %%% zero characters . path_abempty --> [] | ( "/", segment), path_abempty. path_absolute --> "/", ( segment_nz, path_abempty | []). path_noscheme --> segment_nz_nc, path_abempty. path_rootless --> segment_nz, path_abempty. path_empty --> []. %% "0" seems an odd way to put i. segment --> [] | pchar, segment. segment_nz --> pchar | pchar, segment_nz. segment_nz_nc --> segbit | segbit, segment_nz_nc. %%% non_zero-length segment without any colon ":" segbit --> unreserved | pct_encoded | sub_delims | "@". pchar --> unreserved | pct_encoded | sub_delims | ":" | "@". query --> [] | ( pchar | "/" | "?" ), query. fragment --> [] | ( pchar | "/" | "?" ), fragment. pct_encoded --> "%", hexdig, hexdig. unreserved --> alpha | digit | "-" | "." | "_" | "~". reserved --> gen_delims | sub_delims. gen_delims --> ":" | "/" | "?" | "#" | "[" | "]" | "@". sub_delims --> "!" | "$" | "&" | "'" | "(" | ")" | "*" | "+" | "," | ";" | "=". /* from RFC 2234 */ /* for real ... */ hexdig --> digit | "A" | "B" | "C" | "D" | "E" | "F". alpha --> [C], { code_type(C,alpha), code_type(C,ascii) }. /* for generation hexdig --> "5" | "A" | "F". alpha --> "A" | "K" | "Z" | "h" | "t" | "p" | "f". */ randomstring(0,[]). randomstring(Len,[C|S]) :- Len > 0, Len1 is Len - 1, C is 32 + random(95), randomstring(Len1,S). /* teststring/2: generate a random string (code list) of length N. * This backtracks, so it can be used in a generate-test loop: * * teststring(10,S), uri(S,[]), name(A,S). * * generates ten-character URIs, while * * teststring(10,S), not(uri(S,[])), name(A,S). * * generates ten-character ASCII strings which are not URIs, just * to prove that there are some strings not accepted by this grammar. */ teststring(N,S) :- randomstring(N,S). teststring(N,S) :- teststring(N,S). runtests(N) :- runtests(N,30,1000,0,0). runtests(N,Len) :- runtests(N,Len,1000,0,0). runtests(N,Len,Mod) :- runtests(N,Len,Mod,0,0). runtests(0,_,_,Good,Bad) :- write('---------- 30 ----------'), nl, write(Good), write(' good, '), write(Bad), write(' bad.'), nl, write('---------- 30 ----------'), nl. runtests(N,Len,Mod,Good,Bad) :- N > 0, N1 is N - 1, N2 is N mod Mod, teststring(Len,S), ( uri_reference(S,[]) -> report(ok,N,S), Good1 is Good + 1, Bad1 is Bad ; ( N2 == 0 -> report('not ok', N, S) ; true ), Good1 is Good, Bad1 is Bad + 1 ), runtests(N1,Len,Mod,Good1,Bad1). report(How,N,S) :- write('Test '), write(N), tab(1), write(How), write(': '), atom_codes(A,S), write(A), nl.