Source file src/pkg/cmd/go/internal/sumweb/encode.go

     1	// Copyright 2018 The Go Authors. All rights reserved.
     2	// Use of this source code is governed by a BSD-style
     3	// license that can be found in the LICENSE file.
     4	
     5	// FS-safe encoding of module paths and versions.
     6	// Copied from cmd/go/internal/module and unexported.
     7	
     8	package sumweb
     9	
    10	import (
    11		"fmt"
    12		"unicode/utf8"
    13	)
    14	
    15	// Safe encodings
    16	//
    17	// Module paths appear as substrings of file system paths
    18	// (in the download cache) and of web server URLs in the proxy protocol.
    19	// In general we cannot rely on file systems to be case-sensitive,
    20	// nor can we rely on web servers, since they read from file systems.
    21	// That is, we cannot rely on the file system to keep rsc.io/QUOTE
    22	// and rsc.io/quote separate. Windows and macOS don't.
    23	// Instead, we must never require two different casings of a file path.
    24	// Because we want the download cache to match the proxy protocol,
    25	// and because we want the proxy protocol to be possible to serve
    26	// from a tree of static files (which might be stored on a case-insensitive
    27	// file system), the proxy protocol must never require two different casings
    28	// of a URL path either.
    29	//
    30	// One possibility would be to make the safe encoding be the lowercase
    31	// hexadecimal encoding of the actual path bytes. This would avoid ever
    32	// needing different casings of a file path, but it would be fairly illegible
    33	// to most programmers when those paths appeared in the file system
    34	// (including in file paths in compiler errors and stack traces)
    35	// in web server logs, and so on. Instead, we want a safe encoding that
    36	// leaves most paths unaltered.
    37	//
    38	// The safe encoding is this:
    39	// replace every uppercase letter with an exclamation mark
    40	// followed by the letter's lowercase equivalent.
    41	//
    42	// For example,
    43	// github.com/Azure/azure-sdk-for-go ->  github.com/!azure/azure-sdk-for-go.
    44	// github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy
    45	// github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus.
    46	//
    47	// Import paths that avoid upper-case letters are left unchanged.
    48	// Note that because import paths are ASCII-only and avoid various
    49	// problematic punctuation (like : < and >), the safe encoding is also ASCII-only
    50	// and avoids the same problematic punctuation.
    51	//
    52	// Import paths have never allowed exclamation marks, so there is no
    53	// need to define how to encode a literal !.
    54	//
    55	// Although paths are disallowed from using Unicode (see pathOK above),
    56	// the eventual plan is to allow Unicode letters as well, to assume that
    57	// file systems and URLs are Unicode-safe (storing UTF-8), and apply
    58	// the !-for-uppercase convention. Note however that not all runes that
    59	// are different but case-fold equivalent are an upper/lower pair.
    60	// For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin)
    61	// are considered to case-fold to each other. When we do add Unicode
    62	// letters, we must not assume that upper/lower are the only case-equivalent pairs.
    63	// Perhaps the Kelvin symbol would be disallowed entirely, for example.
    64	// Or perhaps it would encode as "!!k", or perhaps as "(212A)".
    65	//
    66	// Also, it would be nice to allow Unicode marks as well as letters,
    67	// but marks include combining marks, and then we must deal not
    68	// only with case folding but also normalization: both U+00E9 ('é')
    69	// and U+0065 U+0301 ('e' followed by combining acute accent)
    70	// look the same on the page and are treated by some file systems
    71	// as the same path. If we do allow Unicode marks in paths, there
    72	// must be some kind of normalization to allow only one canonical
    73	// encoding of any character used in an import path.
    74	
    75	// encodePath returns the safe encoding of the given module path.
    76	// It fails if the module path is invalid.
    77	func encodePath(path string) (encoding string, err error) {
    78		return encodeString(path)
    79	}
    80	
    81	// encodeVersion returns the safe encoding of the given module version.
    82	// Versions are allowed to be in non-semver form but must be valid file names
    83	// and not contain exclamation marks.
    84	func encodeVersion(v string) (encoding string, err error) {
    85		return encodeString(v)
    86	}
    87	
    88	func encodeString(s string) (encoding string, err error) {
    89		haveUpper := false
    90		for _, r := range s {
    91			if r == '!' || r >= utf8.RuneSelf {
    92				// This should be disallowed by CheckPath, but diagnose anyway.
    93				// The correctness of the encoding loop below depends on it.
    94				return "", fmt.Errorf("internal error: inconsistency in EncodePath")
    95			}
    96			if 'A' <= r && r <= 'Z' {
    97				haveUpper = true
    98			}
    99		}
   100	
   101		if !haveUpper {
   102			return s, nil
   103		}
   104	
   105		var buf []byte
   106		for _, r := range s {
   107			if 'A' <= r && r <= 'Z' {
   108				buf = append(buf, '!', byte(r+'a'-'A'))
   109			} else {
   110				buf = append(buf, byte(r))
   111			}
   112		}
   113		return string(buf), nil
   114	}
   115	
   116	// decodePath returns the module path of the given safe encoding.
   117	// It fails if the encoding is invalid or encodes an invalid path.
   118	func decodePath(encoding string) (path string, err error) {
   119		path, ok := decodeString(encoding)
   120		if !ok {
   121			return "", fmt.Errorf("invalid module path encoding %q", encoding)
   122		}
   123		return path, nil
   124	}
   125	
   126	// decodeVersion returns the version string for the given safe encoding.
   127	// It fails if the encoding is invalid or encodes an invalid version.
   128	// Versions are allowed to be in non-semver form but must be valid file names
   129	// and not contain exclamation marks.
   130	func decodeVersion(encoding string) (v string, err error) {
   131		v, ok := decodeString(encoding)
   132		if !ok {
   133			return "", fmt.Errorf("invalid version encoding %q", encoding)
   134		}
   135		return v, nil
   136	}
   137	
   138	func decodeString(encoding string) (string, bool) {
   139		var buf []byte
   140	
   141		bang := false
   142		for _, r := range encoding {
   143			if r >= utf8.RuneSelf {
   144				return "", false
   145			}
   146			if bang {
   147				bang = false
   148				if r < 'a' || 'z' < r {
   149					return "", false
   150				}
   151				buf = append(buf, byte(r+'A'-'a'))
   152				continue
   153			}
   154			if r == '!' {
   155				bang = true
   156				continue
   157			}
   158			if 'A' <= r && r <= 'Z' {
   159				return "", false
   160			}
   161			buf = append(buf, byte(r))
   162		}
   163		if bang {
   164			return "", false
   165		}
   166		return string(buf), true
   167	}
   168
View as plain text