Closed Bug 331404 Opened 18 years ago Closed 17 years ago

NSS may crash in initialization when windows file system contains REALLY OLD files

Categories

(NSS :: Libraries, defect, P3)

x86
Windows XP
defect

Tracking

(Not tracked)

RESOLVED FIXED
3.11.7

People

(Reporter: bugs, Assigned: nelson, NeedInfo)

References

Details

Attachments

(5 files, 4 obsolete files)

I am running a Firefox build of my own creation, not having touched anything at all outside my little world of bookmarks/history. 

I start the build like so:

firefox -profile p -chrome chrome://browser/content/places/places.xul

After a while, I get this assertion and a crash:

Debug Assertion Failed!
Program: path\to\firefox.exe
File: dtoxtm64.c
Line: 67

Expression: (((long)(yr-1900) >= _BASE_YEAR) && ((long)(yr - 1900) <= _MAX_YEAR64))

When I debug, I see that an automated software update check was spawned, which caused the browser to try and load a file on https://aus2.mozilla.org/... this caused SSL to be initialized. As part of the entropy collection for seeding the random number generator, NSS appears to scan the system files and use the number of them as a seed. This scan of the system files is what crashes - it crashes in a windows system DLL, not in the browser! ... because one of my system files has a yr of 1617.

Nevertheless, there is a crash.

I built on Visual Studio 2005.

Here's the stack:

msvcr80d.dll!__loctotime64_t(int yr=1617, int mo=11, int dy=3, int hr=1, int mn=26, int sc=9, int dstflag=-1)  Line 67 + 0x57 bytes	C
msvcr80d.dll!__time64_t_from_ft(_FILETIME * pft=0x0012c52c)  Line 253 + 0x25 bytes	C
msvcr80d.dll!_findnext64i32(int hFile=1484232, _finddata64i32_t * pfd=0x0012c684)  Line 187 + 0xc bytes	C
freebl3.dll!EnumSystemFiles(int (const char *)* func=0x04092f20)  Line 277 + 0x14 bytes	C
freebl3.dll!ReadSystemFiles()  Line 320 + 0xa bytes	C
freebl3.dll!RNG_SystemInfoForRNG()  Line 462	C
softokn3.dll!RNG_SystemInfoForRNG()  Line 1633	C
softokn3.dll!nsc_CommonInitialize(void * pReserved=0x0012cc28, int isFIPS=0)  Line 2988	C
softokn3.dll!NSC_Initialize(void * pReserved=0x0012cc28)  Line 3052 + 0xb bytes	C
nss3.dll!secmod_ModuleInit(SECMODModuleStr * mod=0x03cdb598, int * alreadyLoaded=0x0012ccc8)  Line 150 + 0xf bytes	C
nss3.dll!SECMOD_LoadPKCS11Module(SECMODModuleStr * mod=0x03cdb598)  Line 322 + 0xd bytes	C
nss3.dll!SECMOD_LoadModule(char * modulespec=0x03cd9ca8, SECMODModuleStr * parent=0x03cea788, int recurse=1)  Line 323 + 0x9 bytes	C
nss3.dll!SECMOD_LoadModule(char * modulespec=0x03ce3390, SECMODModuleStr * parent=0x00000000, int recurse=1)  Line 338 + 0x11 bytes	C
nss3.dll!nss_Init(const char * configdir=0x0012ce4c, const char * certPrefix=0x02cb1c0e, const char * keyPrefix=0x02cb1c0d, const char * secmodName=0x02c9c234, int readOnly=0, int noCertDB=0, int noModDB=0, int forceOpen=0, int noRootInit=0, int optimizeSpace=1, int noSingleThreadedModules=0, int allowAlreadyInitializedModules=0, int dontFinalizeModules=0)  Line 467 + 0xd bytes	C
nss3.dll!NSS_InitReadWrite(const char * configdir=0x0012ce4c)  Line 511 + 0x2a bytes	C
pipnss.dll!nsNSSComponent::InitializeNSS(int showWarningBox=1)  Line 1383 + 0xe bytes	C++
pipnss.dll!nsNSSComponent::Init()  Line 1547 + 0xa bytes	C++
pipnss.dll!nsNSSComponentConstructor(nsISupports * aOuter=0x00000000, const nsID & aIID={...}, void * * aResult=0x0012cf7c)  Line 157 + 0x8b bytes	C++
xpcom_core.dll!nsGenericFactory::CreateInstance(nsISupports * aOuter=0x00000000, const nsID & aIID={...}, void * * aResult=0x0012cf7c)  Line 80 + 0x17 bytes	C++
xpcom_core.dll!nsComponentManagerImpl::CreateInstanceByContractID(const char * aContractID=0x015edd10, nsISupports * aDelegate=0x00000000, const nsID & aIID={...}, void * * aResult=0x0012cf7c)  Line 1808 + 0x1a bytes	C++
xpcom_core.dll!nsComponentManagerImpl::GetServiceByContractID(const char * aContractID=0x015edd10, const nsID & aIID={...}, void * * result=0x0012cfe8)  Line 2233 + 0x34 bytes	C++
xpcom_core.dll!CallGetService(const char * aContractID=0x015edd10, const nsID & aIID={...}, void * * aResult=0x0012cfe8)  Line 95	C++
xpcom_core.dll!nsGetServiceByContractID::operator()(const nsID & aIID={...}, void * * aInstancePtr=0x0012cfe8)  Line 278 + 0x13 bytes	C++
xpcom_core.dll!nsCOMPtr_base::assign_from_gs_contractid(nsGetServiceByContractID gs={...}, const nsID & iid={...})  Line 132 + 0xf bytes	C++
pipnss.dll!nsCOMPtr<nsISupports>::nsCOMPtr<nsISupports>(nsGetServiceByContractID gs={...})  Line 999	C++
pipnss.dll!EnsureNSSInitialized(int triggeredByNSSComponent=0)  Line 91	C++
pipnss.dll!nsSSLSocketProviderConstructor(nsISupports * aOuter=0x00000000, const nsID & aIID={...}, void * * aResult=0x0012d0b0)  Line 165 + 0xd bytes	C++
xpcom_core.dll!nsGenericFactory::CreateInstance(nsISupports * aOuter=0x00000000, const nsID & aIID={...}, void * * aResult=0x0012d0b0)  Line 80 + 0x17 bytes	C++
xpcom_core.dll!nsComponentManagerImpl::CreateInstanceByContractID(const char * aContractID=0x0012d158, nsISupports * aDelegate=0x00000000, const nsID & aIID={...}, void * * aResult=0x0012d0b0)  Line 1808 + 0x1a bytes	C++
xpcom_core.dll!nsComponentManagerImpl::GetServiceByContractID(const char * aContractID=0x0012d158, const nsID & aIID={...}, void * * result=0x0012d1e4)  Line 2233 + 0x34 bytes	C++
xpcom_core.dll!CallGetService(const char * aContractID=0x0012d158, const nsID & aIID={...}, void * * aResult=0x0012d1e4)  Line 95	C++
necko.dll!CallGetService<nsISocketProvider>(const char * aContractID=0x0012d158, nsISocketProvider * * aDestination=0x0012d1e4)  Line 130 + 0x14 bytes	C++
necko.dll!nsSocketProviderService::GetSocketProvider(const char * type=0x015084d8, nsISocketProvider * * result=0x0012d1e4)  Line 71 + 0x12 bytes	C++
necko.dll!nsHttpHandler::NewProxiedChannel(nsIURI * uri=0x03cd7418, nsIProxyInfo * givenProxyInfo=0x00000000, nsIChannel * * result=0x0012d370)  Line 1509 + 0x36 bytes	C++
necko.dll!nsHttpHandler::NewChannel(nsIURI * uri=0x03cd7418, nsIChannel * * result=0x0012d370)  Line 1455	C++
necko.dll!nsHttpsHandler::NewChannel(nsIURI * aURI=0x03cd7418, nsIChannel * * _retval=0x0012d370)  Line 1788	C++
necko.dll!nsIOService::NewChannelFromURI(nsIURI * aURI=0x03cd7418, nsIChannel * * result=0x0012d370)  Line 546 + 0x2a bytes	C++
xmlextras.dll!NS_NewChannel(nsIChannel * * result=0x03cd47f4, nsIURI * uri=0x03cd7418, nsIIOService * ioService=0x00dab3c0, nsILoadGroup * loadGroup=0x01d43b58, nsIInterfaceRequestor * callbacks=0x00000000, unsigned int loadFlags=1)  Line 171 + 0x16 bytes	C++
xmlextras.dll!nsXMLHttpRequest::OpenRequest(const nsACString_internal & method={...}, const nsACString_internal & url={...}, int async=1, const nsAString_internal & user={...}, const nsAString_internal & password={...})  Line 974 + 0x3d bytes	C++
xmlextras.dll!nsXMLHttpRequest::Open(const nsACString_internal & method={...}, const nsACString_internal & url={...})  Line 1070 + 0x28 bytes	C++
xpcom_core.dll!XPTC_InvokeByIndex(nsISupports * that=0x0000000c, unsigned int methodIndex=2, unsigned int paramCount=1234792, nsXPTCVariant * params=0x00dcda14)  Line 102	C++
xpc3250.dll!XPCWrappedNative::CallMethod(XPCCallContext & ccx={...}, XPCWrappedNative::CallMode mode=12)  Line 2152 + 0x1e bytes	C++
xpc3250.dll!XPCWrappedNative::CallMethod(XPCCallContext & ccx={...}, XPCWrappedNative::CallMode mode=CALL_METHOD)  Line 2152 + 0x1e bytes	C++
xpc3250.dll!XPC_WN_CallMethod(JSContext * cx=0x01d73a58, JSObject * obj=0x03ccc688, unsigned int argc=3, long * argv=0x03cc8160, long * vp=0x0012da2c)  Line 1444 + 0xe bytes	C++
js3250.dll!js_Invoke(JSContext * cx=0x01d73a58, unsigned int argc=3, unsigned int flags=0)  Line 1246 + 0x20 bytes	C
js3250.dll!js_Interpret(JSContext * cx=0x01d73a58, unsigned char * pc=0x01d25bd9, long * result=0x0012e4cc)  Line 3884 + 0xf bytes	C
js3250.dll!js_Invoke(JSContext * cx=0x01d73a58, unsigned int argc=1, unsigned int flags=2)  Line 1270 + 0x13 bytes	C
xpc3250.dll!nsXPCWrappedJSClass::CallMethod(nsXPCWrappedJS * wrapper=0x03cc4ae8, unsigned short methodIndex=3, const nsXPTMethodInfo * info=0x01d53108, nsXPTCMiniVariant * nativeParams=0x0012e80c)  Line 1379 + 0x14 bytes	C++
xpc3250.dll!nsXPCWrappedJS::CallMethod(unsigned short methodIndex=3, const nsXPTMethodInfo * info=0x01d53108, nsXPTCMiniVariant * params=0x0012e80c)  Line 466	C++
xpcom_core.dll!PrepareAndDispatch(nsXPTCStubBase * self=0x03cc4ae8, unsigned int methodIndex=3, unsigned int * args=0x0012e8d4, unsigned int * stackBytesToPop=0x0012e8c4)  Line 117 + 0x1e bytes	C++
xpcom_core.dll!SharedStub()  Line 147	C++
xpcom_core.dll!XPTC_InvokeByIndex(nsISupports * that=0x00000003, unsigned int methodIndex=1, unsigned int paramCount=1239568, nsXPTCVariant * params=0x00000001)  Line 102	C++
xpc3250.dll!XPCWrappedNative::CallMethod(XPCCallContext & ccx={...}, XPCWrappedNative::CallMode mode=30703552)  Line 2152 + 0x1e bytes	C++
xpcom_core.dll!XPTC_InvokeByIndex(nsISupports * that=0x00000003, unsigned int methodIndex=1, unsigned int paramCount=1239568, nsXPTCVariant * params=0x00000001)  Line 102	C++
xpc3250.dll!XPCWrappedNative::CallMethod(XPCCallContext & ccx={...}, XPCWrappedNative::CallMode mode=3)  Line 2152 + 0x1e bytes	C++
xpc3250.dll!XPCWrappedNative::CallMethod(XPCCallContext & ccx={...}, XPCWrappedNative::CallMode mode=CALL_METHOD)  Line 2152 + 0x1e bytes	C++
xpc3250.dll!XPC_WN_CallMethod(JSContext * cx=0x01d73a58, JSObject * obj=0x01d193a8, unsigned int argc=1, long * argv=0x03cc8090, long * vp=0x0012ecd4)  Line 1444 + 0xe bytes	C++
js3250.dll!js_Invoke(JSContext * cx=0x01d73a58, unsigned int argc=1, unsigned int flags=0)  Line 1246 + 0x20 bytes	C
js3250.dll!js_Interpret(JSContext * cx=0x01d73a58, unsigned char * pc=0x01d30d09, long * result=0x0012f774)  Line 3884 + 0xf bytes	C
js3250.dll!js_Invoke(JSContext * cx=0x01d73a58, unsigned int argc=1, unsigned int flags=2)  Line 1270 + 0x13 bytes	C
xpc3250.dll!nsXPCWrappedJSClass::CallMethod(nsXPCWrappedJS * wrapper=0x01d53e00, unsigned short methodIndex=3, const nsXPTMethodInfo * info=0x01d53108, nsXPTCMiniVariant * nativeParams=0x0012fab4)  Line 1379 + 0x14 bytes	C++
xpc3250.dll!nsXPCWrappedJS::CallMethod(unsigned short methodIndex=3, const nsXPTMethodInfo * info=0x01d53108, nsXPTCMiniVariant * params=0x0012fab4)  Line 466	C++
xpcom_core.dll!PrepareAndDispatch(nsXPTCStubBase * self=0x01d53e00, unsigned int methodIndex=3, unsigned int * args=0x0012fb7c, unsigned int * stackBytesToPop=0x0012fb6c)  Line 117 + 0x1e bytes	C++
xpcom_core.dll!SharedStub()  Line 147	C++
xpcom_core.dll!nsTimerImpl::Fire()  Line 405	C++
xpcom_core.dll!nsTimerImpl::Fire()  Line 405	C++
xpcom_core.dll!nsTimerManager::FireNextIdleTimer()  Line 637	C++
gkwidget.dll!nsAppShell::Run()  Line 142	C++
tkitcmps.dll!nsAppStartup::Run()  Line 161 + 0x1c bytes	C++
xul.dll!XRE_main(int argc=3, char * * argv=0x00b37d00, const nsXREAppData * aAppData=0x004036b0)  Line 2364 + 0x25 bytes	C++
firefox.exe!main(int argc=3, char * * argv=0x00b37d00)  Line 61 + 0x13 bytes	C++
firefox.exe!__tmainCRTStartup()  Line 586 + 0x19 bytes	C
firefox.exe!mainCRTStartup()  Line 403	C
kernel32.dll!7c816d4f() 	
[Frames below may be incorrect and/or missing, no symbols loaded for kernel32.dll]	
kernel32.dll!7c8399f3()
How do you propose this be fixed?
Shall we stop enumerating any files from windows file systems in all of mozilla?
Does file:///c:/<directory containing old file>  also crash?
Assignee: wtchang → nobody
QA Contact: jason.m.reid → libraries
Ben, the question I asked above was not rhetorical.
What do you suggest we do about a crash in OS file system code caused by 
a corrupt file system?
Summary: Crash initializing SSL → NSS may crash in initialization when windows file system is corrupt
Attached patch try to use win32 api (obsolete) — Splinter Review
i don't think the os/2 api was being used properly either...
Attachment #216503 - Flags: review?(nelson)
Some pre-review comments/questions:

1. Does this change the minimum set of windows DLLs with which an NSS-based
program must be linked on Windows?  Does this change necessitate linking with *ANY* different windows DLLs than are now already being used in NSS product
builds?  (Note, I don't mean browser products, that are already linked with 
TONS of windows DLLs).  In particular, does this necessitate the use of any
C++ DLLs?

2. Are these newly called  functions available on all windows platforms, 
including WinCE? (a.k.a., Pocket PC) ?
i should note that i wrote this from MoCo on a linux box, it's not compiled ...

fwiw, api docs are available from microsoft, i generally use:
http://www.google.com/search?q=site:msdn.microsoft.com%20FindFirstFile
replace to the lsat word w/ the function you care about...

at the bottom, you'll see:
Requirements
Client 	Requires Windows Vista, Windows XP, Windows 2000 Professional, Windows NT Workstation, Windows Me, Windows 98, or Windows 95.
Server 	Requires Windows Server "Longhorn", Windows Server 2003, Windows 2000 Server, or Windows NT Server.
Header 	

Declared in Winbase.h; include Windows.h.
Library 	

Link to Kernel32.lib.
DLL 	Requires Kernel32.dll.

So as long as we're already linking to Kernel32, we're fine.

for nt3.1 and friends, load:
http://msdn.microsoft.com/library/default.asp?url=/library/en-us/winprog/winprog/windows_nt_3_51.asp

if the function is there (it is), then it was available in nt3.1

for wince,
http://www.google.com/search?num=100&hl=en&lr=&safe=active&q=site%3Amsdn.microsoft.com+FindFirstFile+wince

works well enough

Requirements

OS Versions: Windows CE 1.0 and later.
Header: Winbase.h.
Link Library: Coredll.lib.

So, as long as pocket pc is later than wince1.0, ...
Any idea how to get a file with a 4 century old time stamp in an NTFS
file system on WinXP, for testing?
Is there any utility program, already compiled, like "touch", that can 
set the time stamp way back?  I can write one, but would prefer to use one
already written, especially if it is a standard feature of windows.
now you're getting really greedy. i'd just use:

http://msdn.microsoft.com/library/default.asp?url=/library/en-us/sysinfo/base/changing_a_file_time_to_the_current_time.asp
http://msdn.microsoft.com/library/default.asp?url=/library/en-us/sysinfo/base/filetime_str.asp

it should be sufficient to use 0,0 as the filetime. that'd be 
a 64-bit value representing January 1, 1601 (UTC).
mcsmurf: could you find someone to make a program that just sets the date to 0,0 using the api i mentioned? :(
Is there any way to work out which file is causing this crash? This is something I've now hit as well, I think.
Severity: normal → critical
Incidentally, as far as I can tell, if this is what I'm hitting, it doesn't trigger Talkback. So we have no way to know how often it is occurring.
you wouldn't get talkback, the runtime is asserting which basically tails to abort.

try using dir /od or dir /o-d.
Comment on attachment 216503 [details] [diff] [review]
try to use win32 api

This patch doesn't come close to building.  
I'll attach one that does.
Attachment #216503 - Flags: review?(nelson) → review-
I'll do some testing on this next.
Assignee: nobody → nelson
Attachment #216503 - Attachment is obsolete: true
Status: NEW → ASSIGNED
Comment on attachment 222989 [details] [diff] [review]
patch that builds and works on WinXP (dunno about other Windows flavors, or OS/2).

Works on WinXP.  Dunno about other Windows flavors.
Attachment #222989 - Attachment description: untested patch that builds → patch that builds and works on WinXP (dunno about other Windows flavors, or OS/2).
Attachment #222989 - Flags: review?(julien.pierre.bugs)
Ben and Hixie: are you willing to test this latest patch on your systems,
where you can reproduce the problem?
Priority: -- → P3
Version: unspecified → 3.0
Comment on attachment 222989 [details] [diff] [review]
patch that builds and works on WinXP (dunno about other Windows flavors, or OS/2).

The patch is correct. It doesn't affect OS/2, which uses another file, os2_rand.c .

Nit #1 : there is a C++ comment (//) in the code introduced by this patch. But there are such comments all over the file already ... It must be that all the Windows compilers are OK with them in .c files.

Nit #2 :
This patch makes changes to the WIN16 section of the file. We don't support WIN16 anymore, so this is dead code. I suggest that the section just be deleted, at least for this part, if not for the whole file.
Attachment #222989 - Flags: review?(julien.pierre.bugs) → review+
Summary: NSS may crash in initialization when windows file system is corrupt → NSS may crash in initialization when windows file system contains REALLY OLD files
This touch program lets you set the date of a file to any date/time since 
1600 AD.  Usage message explains.  Date/Time is in UTC, not local time zone.
Made this for Hixie.
To test your NSS program's resilience in the presence of REALLY OLD system files
try this command (using touch.exe in the above attachment)

touch 18000101 C:\WINDOWS\system32\ReallyOldFile

This will create a file that is about 2 centuries old.  
Then use program findold.exe to find it.  (Just run the command findold)
Finally test your NSS program and see how it behaves. 

Note: findold also finds files with future timestamps.  So if you get a bunch
of filenames whose timesstamps seem to be the present time, just run it again.
Note to self:  fix findold to ignore future ACCESS times.
This program checks all the "system files" (same ones checked by NSS's 
startup code) looking for files whose creation, modification, or access 
time stamp is outside of the range that can be represented in a time_t.
IIRC, this is the same test as performed by the assertion in the MSVC 8 
runtime.  (Note that older MSVCs don't have this assertion).  This program
will tell you what file(s) have timestamps that will cause the MSVC80 
run time to assert, and it will tell you which one(s) of the file's 3 
time stamps are out of this range.  

BTW, a reminder.  The NSS patch is now awaiting someone to verify it. 
To do so, someone must:
a) reproduce the RTL assertion failure, perhaps with the tools I've 
attached to this bug.
b) recompile NSS with this patch in place, and then
c) verify that the problem is no longer reproduced with this patch in place.
compiling it with the version of MSVC that has the problematic RTL, 
I do not have the version of MSVC with that RTL.  The older RTLs don't have
this problem.  So I invite someone to do this test.
Attachment #224185 - Attachment is obsolete: true
Workaround bogus assertion failure in MSVC 8 (Express, 2005) RTL by switching
from the old _findfirst, _findnext file enumeration API to the newer 
FindFirstFile, FindNextFile API.  Might be slower, but won't crash if it finds
files older than 1970.  Bug 331404. r=julien.pierre

Checking in win_rand.c;  new revision: 1.10; previous revision: 1.9

Commited on trunk.  Will commit on 3.11 branch IFF this doesn't invalidate
FIPS evaluation in progress.
This is fixed on the NSS trunk, and the fix will be in NSS 3.12.
I would fix this on the 3.11 branch, but I believe that freebl is 
frozen on that branch due to the ongoing FIPS evaluation.  

If that should change, and if there arises a window of opportunity to get
this fix into the 3.11 branch at some later date, please repoen this bug
and change the target fix version to 3.11.<whatever>
Status: ASSIGNED → RESOLVED
Closed: 18 years ago
Resolution: --- → FIXED
Target Milestone: --- → 3.12
*** Bug 330271 has been marked as a duplicate of this bug. ***
*** Bug 357106 has been marked as a duplicate of this bug. ***
*** Bug 360892 has been marked as a duplicate of this bug. ***
I think we really need to get this into the branch ASAP.  Trunk builds of Firefox/Thunderbird/Seamonkey are crashing for a fair amount of people because of this.
*** Bug 357709 has been marked as a duplicate of this bug. ***
*** Bug 359669 has been marked as a duplicate of this bug. ***
The NSS crypto module, which consists of softokn3.dll and freebl3.dll
on Windows, is undergoing a validation by the US government for conformance
to the FIPS 140-2 standard.  The validation is in the final review phase.
Unfortunately the fix for this bug is in freebl3.dll.  I need to check
with the FIPS testing lab to find out how to make a change to the NSS
crypto module at this stage.

While I'm working on getting this fix into the NSS crypto module,
please look into working around this problem.  Some ideas are:
1. When the Mozilla installer installs msvcr80.dll, it can fix any
invalid time stamps on the files in the "system32" directory.  The
installer is the best place to fix the files' time stamps because it
has the privilege to make changes to the "system32" directory and
can afford to spend a little more time.
2. If the files with invalid time stamps are known, the main NSS library,
nss3.dll, can fix the time stamps on those files before initializing
the NSS crypto module.

Has anyone reported this msvcr80.dll (or Windows) bug to Microsoft?
If not, I will do that.
Comment on attachment 222989 [details] [diff] [review]
patch that builds and works on WinXP (dunno about other Windows flavors, or OS/2).

r=wtc.  Regarding Julien's review comments in comment 19:

The C++ comment // is copied and pasted from existing code.  So
it's not introduced by this patch.

I agree that it's not necessary to touch the WIN16 code and I
would also just delete it while we're at it.

I tested this patch in the debugger.
Attachment #222989 - Flags: superreview+
it's not a bug. microsoft has made this perfectly clear, and they're right.

and yes, we talked to microsoft about this at the beginning of the process (comment 3).

That msvcr80 and not msvcr80d is causing this problem wasn't discussed in the original report and I'm investigating that now.
Timeless,  One takes a program that has been running without error for years,
recompiles the very same source code with a newer compiler, one that links 
with msvcr80, and the same program crashes in msvcr80.  msvcr80 asserts 
that the date is in some valid range.  However, the failure of that 
assertion is NOT the fault of the calling program, and is not due to a 
programming error in the calling program.  This is a bug in msvcr80, pure
and simple.

As far as prior attempts to communicate with Microsoft, I'm only aware of 
communication attempts through microsoft's newsgroup for MSVC libraries:
news://news.microsoft.com:119/microsoft.public.dotnet.languages.vc.libraries

That channel is no more official than mozilla's news groups are.  Replies 
usually come from volunteers who are not Microsoft product developers.
Thanks to the link provided by timeless.

To avoid changing softokn3.dll and freebl3.dll and causing delays
to our FIPS validation, I implemented this workaround to install
a no-op invalid parameter handler for MSVC 2005 (8).

Please refer to the stack in comment 0.  Any function in softokn3.dll
and freebl3.dll is off limit.  So I am setting the invalid parameter
handler inside the last function (secmod_ModuleInit) before we call
into softokn3.dll.

Do not check in this patch on the NSS trunk because this bug is
fixed on the NSS trunk.
Attachment #247255 - Flags: superreview?(nelson)
Attachment #247255 - Flags: review?(timeless)
This patch also disables the message windows for the assertion failures
if the debug C run-time library (msvcrd80.dll) is used.  Unfortunately
this patch requires that we fix bug 362577 first.  I'm attaching this
patch as a reference.
Attachment #247255 - Flags: review?(timeless) → review+
on average i get responses from people who work for ms.

but just because something works doesn't mean it was guaranteed to work. if gecko has a function and we say it might crash for odd input and it happens to work today for odd input and we change it tomorrow to crash for odd input, we'd claim this is perfectly acceptable, and it is.

it's not necessarily the nicest thing to do, but the contract didn't guarantee more.

you're not obligated to use the new compiler. and personally i think anyone switching is crazy. all it leads to are crashes because of various uncaught exceptions, e.g. new throwing. none of these things are events we've written code to handle. we knew that our code wasn't correct, but we didn't care, and we most certainly didn't prepare.
Comment on attachment 247255 [details] [diff] [review]
Workaround for the NSS_3_11_BRANCH

Bob, what's the best way to determine if 'mod' is the softoken here?
I only need to call _set_invalid_parameter_handler for the softoken.
Should I put the _set_invalid_parameter_handler calls inside
"if (mod->libraryParams != NULL)" blocks?
Attachment #247255 - Flags: review?(rrelyea)
Only use the workaround on the softoken (the "internal" PKCS #11 module).
Attachment #247304 - Attachment is obsolete: true
Comment on attachment 247255 [details] [diff] [review]
Workaround for the NSS_3_11_BRANCH

The main problem I have with all variants of this patch is that 
it masks ALL failures of this type, not only those caused by 
invalid dates on file system objects, for the entire duration of 
NSS_Initialize.  This will mask failures that we do not wish to 
mask, as well as one that we do.  And it will create a new backwards
binary compatiblity challenge for us.  

The date on the file system object is NOT a "parameter" supplied 
by the caller of the function _findnext (a.k.a., _findnext64i32).
The date comes from the file system, from the OS itself.
If the date is deemed invalid, then the fault lies with whatever
software SET that date on the file system object.  It is not the 
fault of every program that enumerates the contents of the 
directory.  The program that enumerates those contents has passed
no invalid parameter, and should not be penalized for invalid file
system contents.   It is wrong to fault the innocent program for the
sins of the file system.

I'd say we should supply a program that goes through the file system
(at least the directories about which we care) and fixes any invalid
dates it finds.  Tell people, "It's unfortunate that some software 
on your system (not ours) is trashing your file system, but being the 
good guys that we are, we're giving you a tool to mitigate the damage 
done by that other malfeasant software.".
Nelson, I agree with your proposal (the same as my first idea
in comment 33).  I proposed these _set_invalid_parameter_handler
patches as a contingency plan, in case nobody is willing to
write this program for the Mozilla installer.

Another problem with these _set_invalid_parameter_handler patches
is that the invalid parameter handler is a process-wide property,
so it is a bad idea for a library like NSS to change a process
property.  Even though I attempt to restore it, it cannot be done
in a thread-safe way.
Because the mozilla installer may be run from an unprivileged user, there is no reliable way to set any timestamps on system files from it (such a patch would not be accepted).
Ben: I guess I made a wrong assumption.  Does the Mozilla installer
install msvcr80.dll in the system32 directory, or does Mozilla use
msvcr80.dll as a private DLL (installed in the directory where the
.exe resides)?
The old Mozilla/Firefox installer tries to copy msvcrt.dll to the system32 folder, the new Firefox installer installs msvcrt8.dll to the FF program folder.
Comment on attachment 247255 [details] [diff] [review]
Workaround for the NSS_3_11_BRANCH

The subsequent patches are preferred to this one, if any of the "just ignore asserts because the msvc8 runtime is broken" style patches are acceptable.
Attachment #247255 - Flags: review?(rrelyea)
Updated the patch to test mod->internal so that we only use
the workaround on the softoken.
Attachment #247255 - Attachment is obsolete: true
Attachment #247627 - Flags: superreview?(rrelyea)
Attachment #247627 - Flags: review?(nelson)
Attachment #247255 - Flags: superreview?(nelson)
Attachment 247508 [details] [diff] adds a test for mod->internal, which I favor.
Attachment 247627 [details] [diff] removes the code that hides the assertion failure dialogs.
I think I don't understand this combination.
Does this have the effect of keeping the assertion failure dialogs, but 
making them non-fatal?
The full workaround requires that we fix the _DEBUG bug (bug 362577)
first.  This is why I also provided a smaller workaround, which can
be checked in right away.  With the smaller workaround, a debug build
of Mozilla will still get an assertion failure dialog from the debug
C run-time library.  I also prefer the full workaround, but we'd need
to fix the _DEBUG bug first.
*** Bug 363090 has been marked as a duplicate of this bug. ***
I downloaded and installed a SeaMonkey trunk nightly built 2006/12/26.
I immediately started experiencing this bug.  After launching SM, the first 
act I did that necessitated the use of NSS caused SM to crash.  No Talkback.  
The cause was very old file I had created with the touch program (attachd to
this bug) and had put into /Windows/system32 to test this bug, and the fix. 
I found the old file with the findold program (also attached to this bug).
Removing that very old file file made SM stop crashing.  

This experience made me think that we really do need to fix this for 3.11
and not let the fix wait for 3.12, which is why I'm reopening this bug.  

I had been concerned about Wan-Teh's patch named 
"Workaround for the NSS_3_11_BRANCH v2" because it seems to me that this 
patch will cause us to ignore ALL run time errors during NSS initialization
and not only the one error caused by old files.  I decided to try to see if
the new error handler callback function in Wan-Teh's patch could be made to 
examine its arguments to recognize the specific case we're trying to fix.

But to test it, I had to download and install MSVC 8 (a.k.a. 2005 Express Edition).  I also downloaded Service Pack 1 for it and installed that too.
I already had the Platform SDK installed.  Soon, I had figured out how to 
build NSS with it, and it passed all.sh.

There's one problem with patch "Workaround for the NSS_3_11_BRANCH v2".  
The patch code references symbols defined in stddef.h but doesn't include
<stddef.h>.  Those symbols are only defined in the stddef.h that comes 
with MSVC 8.  They're NOT defined in the stddef.h that is part of the 
Platform SDK.  One must re-arrange the order of the directories searched 
for system header files by MSVC8, to make it seach its own headers before 
searching the PSDK headers, to get the code to build with this patch.

Once I got past that, I went back to test the bug, to ensure that I could 
reproduce it before testing the patch's effectiveness.  To my surprise and
dismay, I found that I can no longer reproduce this crash AT ALL, not even
with the SM code from 12/26 (which I never modified).  That is, I can not
reproduce the crash with code I built, nor with code built by others, code
that previously crashed many times.

I have created numerous very old files (dated in the 1700's) in system32.  
I have stepped through the code that uses _findnext (the crt version) and 
watched it succesfully find the old files.  The creation timestamp on them 
shows up as (64-bit) -1, but "finding" it doesn't cause an exception/crash 
any more.

So, now I'm stumped.  It seems something has made the problem "go away".
I wish I knew what it was.  Maybe THAT's the best answer for this bug, 
to get the problem to "go away" for everybody.  I just wish I knew how/why
it went away.  
Status: RESOLVED → REOPENED
Resolution: FIXED → ---
Target Milestone: 3.12 → 3.11.5
the runtime library from express might be different than the other one...
MSDN documentation says I only need to include <stdlib.h> to
use _set_invalid_parameter_handler.  pk11load.c includes
"seccomon.h", which includes "secport.h", which includes
<stdlib.h>.  My header file search path is:

INCLUDE=C:\Program Files\Microsoft Platform SDK\Include;C:\Program Files\Microso
ft Visual Studio 8\VC\INCLUDE;

I suspect that Nelson's using a very old version of the
Platform SDK.  The version I'm using is "Microsoft Platform
SDK for Windows Server 2003 SP1".
Comment on attachment 247627 [details] [diff] [review]
Workaround for the NSS_3_11_BRANCH v2

This patch doesn't compile because the newly reference symbols are defined in <stddef.h> which is not #included by this patch.

Further, if possible,  the new function myInvalidParameterHandler should somehow test that it has caught the specific error that we're trying to ignore, and not some other error that we should not be ignoring.
Attachment #247627 - Flags: review?(nelson) → review-
Wan-Teh, I'm using the same PSDK as you are (I believe).
I was using an INCLUDE variable equivalent to yours, one that specified
the PSDK first, and then the compiler's include directory.  I found it 
necessary to reverse that order to get this to build.
(In reply to comment #53)
> the runtime library from express might be different than the other one...

The RTL msvcr80.dll in MSVC8 SP1 differs from the one in the original vc8. 
One is version 8.00.50727.42 created Friday, September 23, 2005, 06:29:16
SP1 is version 8.0.50727.762 created Friday, December 01, 2006, 22:54:32

I installed SP1 on December 28, 2006, so that December 01 date is not the
date I installed it.

But it's not immediately clear to me how installing a new msvcr80.dll in
C:\WINDOWS\WinSxS would change the behavior of SeaMonkey.  Each of the
directories in which I have installed a version SeaMonkey or FireFox has 
its own copy of msvcr80.dll.  The copy in each of those directories is 
version 8.00.50727.42.  

As I understand it, when a DLL is loaded by simple name (not path name), 
the directory where the .exe resides is searched first for that DLL, so 
FF and SM should always be using the copy of msvcr80.dll in their own 
directory, and a change to a copy in another directory ought to have no 
effect.  If my understanding is mistaken, please enlighten me!

If we can fix this bug by shipping the newer SP1 version of msvcr80.dll
with SM and FF, let's do that with all haste!
Here's a question for anyone using VC8 (and/or 2005 Express):
Do you have a copy of the PDB file for VC8's msvcr80D.dll ?
If so, where does it reside?  How did you get it?

I don't have it, and that entirely defeats the purpose of using the Debug RTL. 
:(
I found this explanation of how the new msvcr80.dll overrides the one in 
SM's directory.

MSVC 8 installs its DLLs in the "Side by Side" directory:  /Windows/WinSxS 
This is explained in some detail in this blog article:
http://blogs.gotdotnet.com/martynl/archive/2005/10/13/480880.aspx

The blog article http://blog.kalmbachnet.de/?postid=80 says "If a DLL is 
installed in the WinSxS folder, the local DLLs will be ignored."  (The author 
uses the term "local DLLs" to mean DLLs installed in the same directory as 
the application's EXE file.)  So, that explains why SM stopped crashing after
I installed MSVC 8 (2005 Express) SP1. 

At the very minimum, I think mozilla applications that are now being built
with MSVC 8 (2005 Express) should begin to redistribute this new msvcr80.dll
instead of the one they're distributing now.  

Other relevant links:
http://forums.microsoft.com/MSDN/ShowPost.aspx?PostID=836407&SiteID=1
http://forums.microsoft.com/MSDN/ShowPost.aspx?PostID=1034641&SiteID=1
http://msdn2.microsoft.com/en-us/library/aa983349(VS.80).aspx

That last URL says: "If you originally deployed your application with an 
application local copy of a Visual C++ DLL, you should deploy your update 
with an application local copy of the Visual C++ DLL that was included with 
the service pack or hotfix."  But probably the best solution is to distribute
the "redistributable" CRT installer that (reportedly) comes with SP1. 

Caveat: I didn't get a copy of this redistributable program vcredist_*.exe 
when I installed SP1. Your mileage may vary.
Definitive document on Windows' DLL search sequence:
http://msdn2.microsoft.com/en-us/library/aa374224.aspx
pdbs can be retrieved using the microsoft symbol server, or you can build your own versions of the runtime library and force those pdbs to be used.

personally i use the ms symbol server:
http://developer.mozilla.org/en/docs/How_to_get_a_stacktrace_with_WinDbg
It sounds like Microsoft fixed this bug in Visual C++ 2005 SP1, but I can't find anything to verify that.  If that's the case, then I think the solution would be to update our build machines to use VC2k5 SP1, so we ship the latest CRT.

FYI, a list of CRT bugs fixed in SP1 can be found here:
http://blogs.msdn.com/vcblog/pages/643313.aspx
Ted, thanks for the link to the Visual C++ team blog.  After browsing around,
I found this bug report, which is essentially the same as our bug:

http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=101425

That bug was marked as a duplicate of

http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=98949

which has a comment that says it's fixed, although I can't find it listed in
this knowledge base article either:

http://support.microsoft.com/kb/918526
Wan-Teh and I are agreed: the solution for this bug is to build NSS with 
service pack 1 of MSVC 8 (MSVC 2005 Express).
Status: REOPENED → RESOLVED
Closed: 18 years ago17 years ago
Resolution: --- → FIXED
Slight clarification:  The solution to this bug, for users of NSS 3.11.x who 
are building with MSVC8 (MSVC 2005 Express) is for those users to upgrade to
Service Pack 1, and build it with that.   NSS 3.12 contains a workaround in
NSS source code.  We won't be committing that change to 3.11.x due to FIPS.
Target Milestone: 3.11.5 → 3.12
Attachment #247627 - Flags: superreview?(rrelyea)
Wan-Teh, 
Since we're going to modify softoken in NSS 3.11.6 to fix bug 364684,
should we reconsider applying the workaround patch for *this* bug 
(which we previously applied to the trunk) to the 3_11 branch, also?
I prefer to ask developers to upgrade to Visual C++ 2005 Service
Pack 1.  SP1 fixes a lot of bugs, so I expect most developers will
upgrade in a few months anyway.  But it's fine to apply the patch
for this bug to the NSS_3_11_BRANCH.
I checked in the patch (attachment 222989 [details] [diff] [review]) on the NSS_3_11_BRANCH
for NSS 3.11.7.

Checking in win_rand.c;
/cvsroot/mozilla/security/nss/lib/freebl/win_rand.c,v  <--  win_rand.c
new revision: 1.9.28.2; previous revision: 1.9.28.1
done
Target Milestone: 3.12 → 3.11.7
The MSVC++ 2005 SP1 Redistributable Package (x86) may be downloaded from
http://www.microsoft.com/downloads/details.aspx?familyid=200B2FD9-AE1A-4A14-984D-389C36F85647&displaylang=en

Installing it is believed to cure this problem.
Comment on attachment 222989 [details] [diff] [review]
patch that builds and works on WinXP (dunno about other Windows flavors, or OS/2).

>Index: win_rand.c
>===================================================================
>RCS file: /cvsroot/mozilla/security/nss/lib/freebl/win_rand.c,v
>retrieving revision 1.9
>diff -u -9 -r1.9 win_rand.c
>--- win_rand.c	25 Apr 2004 15:03:08 -0000	1.9
>+++ win_rand.c	23 May 2006 03:42:58 -0000
>@@ -239,55 +239,54 @@
>     rv |= EnumSystemFilesWithNSPR("\\Windows\\Temporary Internet Files", TRUE, func);
>     rv |= EnumSystemFilesWithNSPR("\\Temp",    FALSE, func);
>     rv |= EnumSystemFilesWithNSPR("\\Windows", FALSE, func);
>     return rv;
> #else
>     int                 iStatus;
>     char                szSysDir[_MAX_PATH];
>     char                szFileName[_MAX_PATH];
> #ifdef _WIN32
>-    struct _finddata_t  fdData;
>-    long                lFindHandle;
>+    WIN32_FIND_DATA     fdData;
>+    HANDLE              lFindHandle;
> #else
>     struct _find_t  fdData;
> #endif
> 
>     if (!GetSystemDirectory(szSysDir, sizeof(szSysDir)))
>         return FALSE;
> 
>     // tack *.* on the end so we actually look for files. this will
>     // not overflow
>     strcpy(szFileName, szSysDir);
>     strcat(szFileName, "\\*.*");
> 
> #ifdef _WIN32
>-    lFindHandle = _findfirst(szFileName, &fdData);
>-    if (lFindHandle == -1)
>+    lFindHandle = FindFirstFile(szFileName, &fdData);
>+    if (lFindHandle == INVALID_HANDLE_VALUE)
>         return FALSE;
>+    do {
>+        // pass the full pathname to the callback
>+        sprintf(szFileName, "%s\\%s", szSysDir, fdData.cFileName);
>+        (*func)(szFileName);
>+        iStatus = FindNextFile(lFindHandle, &fdData);
>+    } while (iStatus != 0);
>+    FindClose(lFindHandle);
> #else
>-    if (_dos_findfirst(szFileName, _A_NORMAL | _A_RDONLY | _A_ARCH | _A_SUBDIR, &fdData) != 0)
>+    if (_dos_findfirst(szFileName, 
>+             _A_NORMAL | _A_RDONLY | _A_ARCH | _A_SUBDIR, &fdData) != 0)
>         return FALSE;
>-#endif
>-
>     do {
>         // pass the full pathname to the callback
>         sprintf(szFileName, "%s\\%s", szSysDir, fdData.name);
>         (*func)(szFileName);
>-
>-#ifdef _WIN32
>-        iStatus = _findnext(lFindHandle, &fdData);
>-#else
>         iStatus = _dos_findnext(&fdData);
>-#endif
>     } while (iStatus == 0);
>-
>-#ifdef _WIN32
>-    _findclose(lFindHandle);
>+    _dos_findclose(&fdData);
> #endif
> 
>     return TRUE;
> #endif
> }
> 
> static PRInt32
> CountFiles(const char *file)
> {
Flags: needinfo?(nelson)
You need to log in before you can comment on or make changes to this bug.