1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // FS-safe encoding of module paths and versions. 6 // Copied from cmd/go/internal/module and unexported. 7 8 package sumweb 9 10 import ( 11 "fmt" 12 "unicode/utf8" 13 ) 14 15 // Safe encodings 16 // 17 // Module paths appear as substrings of file system paths 18 // (in the download cache) and of web server URLs in the proxy protocol. 19 // In general we cannot rely on file systems to be case-sensitive, 20 // nor can we rely on web servers, since they read from file systems. 21 // That is, we cannot rely on the file system to keep rsc.io/QUOTE 22 // and rsc.io/quote separate. Windows and macOS don't. 23 // Instead, we must never require two different casings of a file path. 24 // Because we want the download cache to match the proxy protocol, 25 // and because we want the proxy protocol to be possible to serve 26 // from a tree of static files (which might be stored on a case-insensitive 27 // file system), the proxy protocol must never require two different casings 28 // of a URL path either. 29 // 30 // One possibility would be to make the safe encoding be the lowercase 31 // hexadecimal encoding of the actual path bytes. This would avoid ever 32 // needing different casings of a file path, but it would be fairly illegible 33 // to most programmers when those paths appeared in the file system 34 // (including in file paths in compiler errors and stack traces) 35 // in web server logs, and so on. Instead, we want a safe encoding that 36 // leaves most paths unaltered. 37 // 38 // The safe encoding is this: 39 // replace every uppercase letter with an exclamation mark 40 // followed by the letter's lowercase equivalent. 41 // 42 // For example, 43 // github.com/Azure/azure-sdk-for-go -> github.com/!azure/azure-sdk-for-go. 44 // github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy 45 // github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus. 46 // 47 // Import paths that avoid upper-case letters are left unchanged. 48 // Note that because import paths are ASCII-only and avoid various 49 // problematic punctuation (like : < and >), the safe encoding is also ASCII-only 50 // and avoids the same problematic punctuation. 51 // 52 // Import paths have never allowed exclamation marks, so there is no 53 // need to define how to encode a literal !. 54 // 55 // Although paths are disallowed from using Unicode (see pathOK above), 56 // the eventual plan is to allow Unicode letters as well, to assume that 57 // file systems and URLs are Unicode-safe (storing UTF-8), and apply 58 // the !-for-uppercase convention. Note however that not all runes that 59 // are different but case-fold equivalent are an upper/lower pair. 60 // For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin) 61 // are considered to case-fold to each other. When we do add Unicode 62 // letters, we must not assume that upper/lower are the only case-equivalent pairs. 63 // Perhaps the Kelvin symbol would be disallowed entirely, for example. 64 // Or perhaps it would encode as "!!k", or perhaps as "(212A)". 65 // 66 // Also, it would be nice to allow Unicode marks as well as letters, 67 // but marks include combining marks, and then we must deal not 68 // only with case folding but also normalization: both U+00E9 ('é') 69 // and U+0065 U+0301 ('e' followed by combining acute accent) 70 // look the same on the page and are treated by some file systems 71 // as the same path. If we do allow Unicode marks in paths, there 72 // must be some kind of normalization to allow only one canonical 73 // encoding of any character used in an import path. 74 75 // encodePath returns the safe encoding of the given module path. 76 // It fails if the module path is invalid. 77 func encodePath(path string) (encoding string, err error) { 78 return encodeString(path) 79 } 80 81 // encodeVersion returns the safe encoding of the given module version. 82 // Versions are allowed to be in non-semver form but must be valid file names 83 // and not contain exclamation marks. 84 func encodeVersion(v string) (encoding string, err error) { 85 return encodeString(v) 86 } 87 88 func encodeString(s string) (encoding string, err error) { 89 haveUpper := false 90 for _, r := range s { 91 if r == '!' || r >= utf8.RuneSelf { 92 // This should be disallowed by CheckPath, but diagnose anyway. 93 // The correctness of the encoding loop below depends on it. 94 return "", fmt.Errorf("internal error: inconsistency in EncodePath") 95 } 96 if 'A' <= r && r <= 'Z' { 97 haveUpper = true 98 } 99 } 100 101 if !haveUpper { 102 return s, nil 103 } 104 105 var buf []byte 106 for _, r := range s { 107 if 'A' <= r && r <= 'Z' { 108 buf = append(buf, '!', byte(r+'a'-'A')) 109 } else { 110 buf = append(buf, byte(r)) 111 } 112 } 113 return string(buf), nil 114 } 115 116 // decodePath returns the module path of the given safe encoding. 117 // It fails if the encoding is invalid or encodes an invalid path. 118 func decodePath(encoding string) (path string, err error) { 119 path, ok := decodeString(encoding) 120 if !ok { 121 return "", fmt.Errorf("invalid module path encoding %q", encoding) 122 } 123 return path, nil 124 } 125 126 // decodeVersion returns the version string for the given safe encoding. 127 // It fails if the encoding is invalid or encodes an invalid version. 128 // Versions are allowed to be in non-semver form but must be valid file names 129 // and not contain exclamation marks. 130 func decodeVersion(encoding string) (v string, err error) { 131 v, ok := decodeString(encoding) 132 if !ok { 133 return "", fmt.Errorf("invalid version encoding %q", encoding) 134 } 135 return v, nil 136 } 137 138 func decodeString(encoding string) (string, bool) { 139 var buf []byte 140 141 bang := false 142 for _, r := range encoding { 143 if r >= utf8.RuneSelf { 144 return "", false 145 } 146 if bang { 147 bang = false 148 if r < 'a' || 'z' < r { 149 return "", false 150 } 151 buf = append(buf, byte(r+'A'-'a')) 152 continue 153 } 154 if r == '!' { 155 bang = true 156 continue 157 } 158 if 'A' <= r && r <= 'Z' { 159 return "", false 160 } 161 buf = append(buf, byte(r)) 162 } 163 if bang { 164 return "", false 165 } 166 return string(buf), true 167 } 168