As soon as you have people uploading files with all kind of funky characters and umlauts in it, you might run into troubles once you have to download them again.
Same if you create files on the fly that contain for example the name of a person or a company.
This was exactly the situation we had, so I came up with a little cleaning function in C#.
Feel free to adjust the mapping list according to your requirements. If mapped some of the more common special chars to the minus sign (-) and tried to get a sensible mapping for all the umlauts and other special characters.
The method CleanFileName() first maps all the known chars and then gets rid of everything that is left.
//http://www.pjb.com.au/comp/diacritics.html private static string[,] CharacterReplacements = { { " ", "-"}, { "&", "-"}, { "?", "-"}, { "!", "-"}, { "%", "-"}, { "+", "-"}, { "#", "-"}, { ":", "-"}, { ";", "-"}, { ".", "-"}, { "¢", "c" }, //cent { "£", "P" }, //Pound { "€", "E" }, //Euro { "¥", "Y" }, //Yen { "°", "d" }, //degree { "¼", "1-4" }, //fraction one-quarter { "½", "1-2" }, //fraction half { "¾", "1-3" }, //fraction three-quarters} { "@", "AT)"}, //at { "Œ", "OE" }, //OE ligature, French (in ISO-8859-15) { "œ", "oe" }, //OE ligature, French (in ISO-8859-15) {"Å","A" }, //ring {"Æ","AE"}, //diphthong {"Ç","C" }, //cedilla {"È","E" }, //grave accent {"É","E" }, //acute accent {"Ê","E" }, //circumflex accent {"Ë","E" }, //umlaut mark {"Ì","I" }, //grave accent {"Í","I" }, //acute accent {"Î","I" }, //circumflex accent {"Ï","I" }, //umlaut mark {"Ð","Eth"}, //Icelandic {"Ñ","N" }, //tilde {"Ò","O" }, //grave accent {"Ó","O" }, //acute accent {"Ô","O" }, //circumflex accent {"Õ","O" }, //tilde {"Ö","O" }, //umlaut mark {"Ø","O" }, //slash {"Ù","U" }, //grave accent {"Ú","U" }, //acute accent {"Û","U" }, //circumflex accent {"Ü","U" }, //umlaut mark {"Ý","Y" }, //acute accent {"Þ","eth"}, //Icelandic - http://en.wikipedia.org/wiki/Thorn_(letter) {"ß","ss"}, //German {"à","a" }, //grave accent {"á","a" }, //acute accent {"â","a" }, //circumflex accent {"ã","a" }, //tilde {"ä","ae"}, //umlaut mark {"å","a" }, //ring {"æ","ae"}, //diphthong {"ç","c" }, //cedilla {"è","e" }, //grave accent {"é","e" }, //acute accent {"ê","e" }, //circumflex accent {"ë","e" }, //umlaut mark {"ì","i" }, //grave accent {"í","i" }, //acute accent {"î","i" }, //circumflex accent {"ï","i" }, //umlaut mark {"ð","eth"}, //Icelandic {"ñ","n" }, //tilde {"ò","o" }, //grave accent {"ó","o" }, //acute accent {"ô","o" }, //circumflex accent {"õ","o" }, //tilde {"ö","oe"}, //umlaut mark {"ø","o" }, //slash {"ù","u" }, //grave accent {"ú","u" }, //acute accent {"û","u" }, //circumflex accent {"ü","ue"}, //umlaut mark {"ý","y" }, //acute accent {"þ","eth"}, //Icelandic - http://en.wikipedia.org/wiki/Thorn_(letter) {"ÿ","y" }, //umlaut mark }; //http://stackoverflow.com/questions/3885964/regex-to-replace-invalid-characters public static string RemoveNonWordChars(string source) { return RemoveNonWordChars(source, ""); } //http://stackoverflow.com/questions/3885964/regex-to-replace-invalid-characters public static string RemoveNonWordChars(string source, string replacement) { //\W is any non-word character (not [^a-zA-Z0-9_]). Regex regex = new Regex(@"[^a-zA-Z0-9-]+"); return regex.Replace(source, replacement); } public static string CleanFileName(string filename) { string fileEnding = null; int index = filename.LastIndexOf("."); //removes the file ending. if (index != -1) { fileEnding = filename.Substring(index + 1); filename = filename.Substring(0, index); //remove based on the CharacterReplacements list for (int i = 0; i < CharacterReplacements.GetLength(0); i++) { fileEnding = fileEnding.Replace(CharacterReplacements[i, 0], CharacterReplacements[i, 1]); } //remove everything that is left fileEnding = "." + StringUtil.RemoveNonWordChars(fileEnding); } //remove based on the CharacterReplacements list for (int i = 0; i < CharacterReplacements.GetLength(0); i++) { filename = filename.Replace(CharacterReplacements[i, 0], CharacterReplacements[i, 1]); } //remove everything that is left filename = StringUtil.RemoveNonWordChars(filename); return filename + fileEnding; } |
You could use HttpUtility.UrlEncode, but that does not solve all the issues that are solved with my solution. And it should work in all browsers. My tests with UrlEncode worked fine in Chrome, but not in IE.