Closed Bug 485145 Opened 15 years ago Closed 15 years ago

Miscellaneous crashes in signtool on Windows

Categories

(NSS :: Tools, defect, P2)

x86_64
Windows Vista
defect

Tracking

(Not tracked)

RESOLVED FIXED
3.12.4

People

(Reporter: julien.pierre, Assigned: julien.pierre)

References

Details

Attachments

(1 file, 2 obsolete files)

This occurred in the 32-bit debug build on my home machine. The test was :

tools.sh: Show who signed jar ------------------------------

signtool -w nojs.jar -d ../alicedir

Here is the stack :

>	sqlite3.dll!pager_lookup(Pager * pPager=0x0260f710, unsigned int pgno=37)  Line 17629 + 0x9 bytes	C
 	sqlite3.dll!sqlite3PagerAcquire(Pager * pPager=0x0260f710, unsigned int pgno=37, PgHdr * * ppPage=0x0017f1b8, int noContent=0)  Line 19760 + 0xd bytes	C
 	sqlite3.dll!getPage(BtShared * pBt=0x02769740, unsigned int pgno=37, MemPage * * ppPage=0x0017f1f8, int noContent=0)  Line 22501 + 0x17 bytes	C
 	sqlite3.dll!getAndInitPage(BtShared * pBt=0x02769740, unsigned int pgno=37, MemPage * * ppPage=0x0017f1f8, MemPage * pParent=0x02629ae8)  Line 22528 + 0x13 bytes	C
 	sqlite3.dll!moveToChild(BtCursor * pCur=0x026124b0, unsigned int newPgno=37)  Line 24274 + 0x18 bytes	C
 	sqlite3.dll!sqlite3BtreeMoveto(BtCursor * pCur=0x026124b0, const void * pKey=0x00000000, __int64 nKey=38, int biasRight=0, int * pRes=0x0017f274)  Line 24588 + 0xd bytes	C
 	sqlite3.dll!sqlite3VdbeCursorMoveto(Cursor * p=0x02617cb8)  Line 30458 + 0x1e bytes	C
 	sqlite3.dll!sqlite3VdbeExec(Vdbe * p=0x02629428)  Line 33749 + 0xc bytes	C
 	sqlite3.dll!sqlite3Step(Vdbe * p=0x02629428)  Line 31150 + 0x9 bytes	C
 	sqlite3.dll!sqlite3_step(sqlite3_stmt * pStmt=0x02629428)  Line 31205 + 0x9 bytes	C
 	softokn3.dll!sdb_FindObjects(SDBStr * sdb=0x02614320, SDBFindStr * sdbFind=0x0262aae0, unsigned long * object=0x02614258, unsigned long arraySize=5, unsigned long * count=0x0017fa40)  Line 761 + 0x9 bytes	C
 	softokn3.dll!sftkdb_FindObjects(SFTKDBHandleStr * handle=0x026101b8, SDBFindStr * find=0x0262aae0, unsigned long * ids=0x02614258, int arraySize=5, unsigned long * count=0x0017fa40)  Line 1242 + 0x1c bytes	C
 	softokn3.dll!sftk_searchDatabase(SFTKDBHandleStr * handle=0x026101b8, SFTKSearchResultsStr * search=0x02624550, const CK_ATTRIBUTE * pTemplate=0x0017fb8c, long ulCount=4)  Line 4106 + 0x19 bytes	C
 	softokn3.dll!sftk_searchTokenList(SFTKSlotStr * slot=0x0260f1f8, SFTKSearchResultsStr * search=0x02624550, CK_ATTRIBUTE * pTemplate=0x0017fb8c, long ulCount=4, int * tokenOnly=0x0017fa90, int isLoggedIn=0)  Line 4227 + 0x15 bytes	C
 	softokn3.dll!NSC_FindObjectsInit(unsigned long hSession=16777217, CK_ATTRIBUTE * pTemplate=0x0017fb8c, unsigned long ulCount=4)  Line 4280 + 0x1d bytes	C
 	nss3.dll!find_objects(NSSTokenStr * tok=0x0263f638, nssSessionStr * sessionOpt=0x0263d6a0, CK_ATTRIBUTE * obj_template=0x0017fb8c, unsigned long otsize=4, unsigned int maximumOpt=1, PRStatus * statusOpt=0x0017fbe8)  Line 335 + 0x17 bytes	C
 	nss3.dll!find_objects_by_template(NSSTokenStr * token=0x0263f638, nssSessionStr * sessionOpt=0x0263d6a0, CK_ATTRIBUTE * obj_template=0x0017fb8c, unsigned long otsize=4, unsigned int maximumOpt=1, PRStatus * statusOpt=0x0017fbe8)  Line 465 + 0x1d bytes	C
 	nss3.dll!nssToken_FindCertificateByIssuerAndSerialNumber(NSSTokenStr * token=0x0263f638, nssSessionStr * sessionOpt=0x0263d6a0, NSSItemStr * issuer=0x0017fc20, NSSItemStr * serial=0x0017fc34, nssTokenSearchType searchType=nssTokenSearchType_TokenOnly, PRStatus * statusOpt=0x0017fbe8)  Line 866 + 0x1b bytes	C
 	nss3.dll!nssTrustDomain_FindCertificateByIssuerAndSerialNumber(NSSTrustDomainStr * td=0x0263d5b8, NSSItemStr * issuer=0x0017fc20, NSSItemStr * serial=0x0017fc34)  Line 818 + 0x1d bytes	C
 	nss3.dll!nssTrustDomain_FindCertificateByEncodedCertificate(NSSTrustDomainStr * td=0x0263d5b8, NSSItemStr * ber=0x0017fc70)  Line 885 + 0x11 bytes	C
 	nss3.dll!NSSTrustDomain_FindCertificateByEncodedCertificate(NSSTrustDomainStr * td=0x0263d5b8, NSSItemStr * ber=0x0017fc70)  Line 897 + 0xd bytes	C
 	nss3.dll!CERT_NewTempCertificate(NSSTrustDomainStr * handle=0x0263d5b8, SECItemStr * derCert=0x0276dc68, char * nickname=0x00000000, int isperm=0, int copyDER=1)  Line 380 + 0xd bytes	C
 	nss3.dll!CERT_ImportCerts(NSSTrustDomainStr * certdb=0x0263d5b8, SECCertUsageEnum usage=certUsageObjectSigner, unsigned int ncerts=1, SECItemStr * * derCerts=0x0276df00, CERTCertificateStr * * * retCerts=0x0017fd60, int keepCerts=0, int caOnly=0, char * nickname=0x00000000)  Line 2554 + 0x19 bytes	C
 	smime3.dll!sec_pkcs7_verify_signature(SEC_PKCS7ContentInfoStr * cinfo=0x0276db98, SECCertUsageEnum certusage=certUsageObjectSigner, SECItemStr * detached_digest=0x0017fdf4, HASH_HashType digest_type=HASH_AlgSHA1, int keepcerts=0)  Line 1548 + 0x21 bytes	C
 	smime3.dll!SEC_PKCS7VerifyDetachedSignature(SEC_PKCS7ContentInfoStr * cinfo=0x0276db98, SECCertUsageEnum certusage=certUsageObjectSigner, SECItemStr * detached_digest=0x0017fdf4, HASH_HashType digest_type=HASH_AlgSHA1, int keepcerts=0)  Line 1928 + 0x19 bytes	C
 	signtool.exe!jar_validate_pkcs7(JAR_ * jar=0x026171c0, JAR_Signer_ * signer=0x02614158, char * data=0x02628fd8, long length=1090)  Line 1651 + 0x13 bytes	C
 	signtool.exe!jar_parse_digital_signature(char * raw_manifest=0x02628fd8, JAR_Signer_ * signer=0x02614158, long length=1090, JAR_ * jar=0x026171c0)  Line 813 + 0x15 bytes	C
 	signtool.exe!jar_parse_sig(JAR_ * jar=0x026171c0, const char * path=0x026140d8, char * raw_manifest=0x02628fd8, long length=1090)  Line 243 + 0x15 bytes	C
 	signtool.exe!JAR_parse_manifest(JAR_ * jar=0x026171c0, char * raw_manifest=0x02628fd8, long length=1090, const char * path=0x026140d8, const char * url=0x0043eff0)  Line 180 + 0x15 bytes	C
 	signtool.exe!jar_extract_mf(JAR_ * jar=0x026171c0, jarArch format=jarArchZip, PRFileDesc * fp=0x02614058, char * ext=0x0043efdc)  Line 735 + 0x1c bytes	C
 	signtool.exe!jar_extract_manifests(JAR_ * jar=0x026171c0, jarArch format=jarArchZip, PRFileDesc * fp=0x02614058)  Line 615 + 0x16 bytes	C
 	signtool.exe!JAR_pass_archive(JAR_ * jar=0x026171c0, jarArch format=jarArchZip, char * filename=0x02761bc0, const char * url=0x0043e5fc)  Line 134 + 0x11 bytes	C
 	signtool.exe!JarWho(char * filename=0x02761bc0)  Line 305 + 0x14 bytes	C
 	signtool.exe!main(int argc=5, char * * argv=0x0260ed50)  Line 1027 + 0xb bytes	C
 	signtool.exe!__tmainCRTStartup()  Line 586 + 0x17 bytes	C
 	kernel32.dll!7699e3f3() 	
 	[Frames below may be incorrect and/or missing, no symbols loaded for kernel32.dll]	
 	ntdll.dll!7740cfed() 	
 	ntdll.dll!7740d1ff() 	


Here is the value of pPager

-		pPager	0x0260f710 {journalOpen=0 journalStarted=0 useJournal='' ...}	Pager *
		journalOpen	0	unsigned char
		journalStarted	0	unsigned char
		useJournal	1 ''	unsigned char
		noReadlock	0	unsigned char
		stmtOpen	0	unsigned char
		stmtInUse	0	unsigned char
		stmtAutoopen	0	unsigned char
		noSync	0	unsigned char
		fullSync	1 ''	unsigned char
		full_fsync	0	unsigned char
		state	1 ''	unsigned char
		tempFile	0	unsigned char
		readOnly	0	unsigned char
		needSync	0	unsigned char
		dirtyCache	0	unsigned char
		alwaysRollback	0	unsigned char
		memDb	0	unsigned char
		setMaster	0	unsigned char
		doNotSync	0	unsigned char
		exclusiveMode	0	unsigned char
		changeCountDone	0	unsigned char
		errCode	0	int
		dbSize	39	int
		origDbSize	0	int
		stmtSize	0	int
		nRec	0	int
		cksumInit	0	unsigned int
		stmtNRec	0	int
		nExtra	88	int
		pageSize	1024	int
		nPage	30	int
		nMaxPage	30	int
		nRef	4	int
		mxPage	2000	int
+		aInJournal	0x00000000 <Bad Ptr>	unsigned char *
+		aInStmt	0x00000000 <Bad Ptr>	unsigned char *
+		zFilename	0x0260f7e8 "c:\NSS\tip\mozilla\tests_results\security\localhost.16\upgradedb\alicedir\cert9.db"	char *
+		zJournal	0x0260f88e "c:\NSS\tip\mozilla\tests_results\security\localhost.16\upgradedb\alicedir\cert9.db-journal"	char *
+		zDirectory	0x0260f83b "c:\NSS\tip\mozilla\tests_results\security\localhost.16\upgradedb\alicedir\cert9.db"	char *
+		fd	0x0276fb78 {pMethod=0x027509cc }	OsFile *
+		jfd	0x00000000 {pMethod=??? }	OsFile *
+		stfd	0x00000000 {pMethod=??? }	OsFile *
+		pBusyHandler	0x0260f59c {xFunc=0x0273a570 pArg=0x0260f488 nBusy=0 }	BusyHandler *
+		pFirst	0x02613940 {pPager=0x0260f710 pgno=4 pNextHash=0x00000000 ...}	PgHdr *
+		pLast	0x0263b200 {pPager=0x0260f710 pgno=39 pNextHash=0x00000000 ...}	PgHdr *
+		pFirstSynced	0x02613940 {pPager=0x0260f710 pgno=4 pNextHash=0x00000000 ...}	PgHdr *
+		pAll	0x0263bb90 {pPager=0x0260f710 pgno=29 pNextHash=0x00000000 ...}	PgHdr *
+		pStmt	0x00000000 {pPager=??? pgno=??? pNextHash=??? ...}	PgHdr *
+		pDirty	0x00000000 {pPager=??? pgno=??? pNextHash=??? ...}	PgHdr *
		journalOff	0	__int64
		journalHdr	0	__int64
		stmtHdrOff	0	__int64
		stmtCksum	0	__int64
		stmtJSize	0	__int64
		sectorSize	512	int
		xDestructor	0x026ff2e0 pageDestructor(PgHdr *, int)	void (PgHdr *, int)*
		xReiniter	0x026ff350 pageReinit(PgHdr *, int)	void (PgHdr *, int)*
		nHash	256	int
+		aHash	0x02612090	PgHdr * *
+		pTmpSpace	0x0260f908 "Ä"	char *
+		dbFileVers	0x0260f7d8 ""	char [16]

pgno is 37 .

p was 0x8000a870, an invalid pointer .

It seems that the linked list got corrupt somehow.
Here is another crash stack I got in signtool, not in sqlite this time, but similar enough :

 	ntdll.dll!7707f773() 	
 	[Frames below may be incorrect and/or missing, no symbols loaded for ntdll.dll]	
>	ntdll.dll!7707f285() 	
 	kernel32.dll!76943593() 	
 	msvcr90.dll!free(void * pBlock=0x026d8228)  Line 110	C
 	libnspr4.dll!PR_Free(void * ptr=0x026d8228)  Line 536 + 0xa bytes	C
 	nssutil3.dll!PORT_Free_Util(void * ptr=0x026d8228)  Line 152 + 0xa bytes	C
 	signtool.exe!jar_extract_mf(JAR_ * jar=0x026d4958, jarArch format=jarArchZip, PRFileDesc * fp=0x026bdbc0, char * ext=0x0043efd8)  Line 737 + 0x9 bytes	C
 	signtool.exe!jar_extract_manifests(JAR_ * jar=0x026d4958, jarArch format=jarArchZip, PRFileDesc * fp=0x026bdbc0)  Line 611 + 0x16 bytes	C
 	signtool.exe!JAR_pass_archive(JAR_ * jar=0x026d4958, jarArch format=jarArchZip, char * filename=0x02691bc0, const char * url=0x0043e5fc)  Line 134 + 0x11 bytes	C
 	signtool.exe!JarWho(char * filename=0x02691bc0)  Line 305 + 0x14 bytes	C
 	signtool.exe!main(int argc=5, char * * argv=0x026bed50)  Line 1027 + 0xb bytes	C
 	signtool.exe!__tmainCRTStartup()  Line 586 + 0x17 bytes	C
 	kernel32.dll!7694e3f3() 	
 	ntdll.dll!770ccfed() 	
 	ntdll.dll!770cd1ff()
This happens very frequently on my home system which has been running QA non-stop.
I wish I knew the libjar and signtool code better in order to add assertions to track down this bug.
Another crash, in 32 bit optimized build this time, that shows heap corruption.

 	msvcr90.dll!_calloc_impl(unsigned int num=1088, unsigned int size=1088, int * errno_tmp=0x0017fe44)  Line 94 + 0xf bytes	C
 	msvcr90.dll!calloc(unsigned int num=1, unsigned int size=1088)  Line 50 + 0xf bytes	C
 	nssutil3.dll!PORT_ZAlloc_Util(unsigned int bytes=1088)  Line 140 + 0x17 bytes	C
>	signtool.exe!jar_inflate_memory(unsigned int method=4239747, long * length=0x0017fed4, long expected_out_len=0, char * * data=0x0017fed0)  Line 485 + 0x6 bytes	C
 	signtool.exe!jar_extract_mf(JAR_ * jar=0x00db7d30, jarArch format=4240252, PRFileDesc * fp=0x00db6e70, char * ext=0x00417410)  Line 717 + 0x12 bytes	C
 	signtool.exe!jar_extract_manifests(JAR_ * jar=0x00000000, jarArch format=jarArchGuess, PRFileDesc * fp=0x00000000)  Line 615 + 0xc bytes	C
 	signtool.exe!JAR_pass_archive(JAR_ * jar=0x00db7d30, jarArch format=jarArchGuess, char * filename=0x003c17f0, const char * url=0x0041991c)  Line 134 + 0x9 bytes	C
 	signtool.exe!JarWho(char * filename=0x003c17f0)  Line 307	C
 	signtool.exe!main(int argc=5, char * * argv=0x003c1520)  Line 1027 + 0x6 bytes	C
 	signtool.exe!__tmainCRTStartup()  Line 586 + 0x17 bytes	C
 	kernel32.dll!7610e3f3() 	
 	ntdll.dll!7708cfed() 	
 	ntdll.dll!7708d1ff()
I think signtool crashes that do not involve sqlite3 belong in a separate bug.
Nelson,

That may be, or perhaps not. I'm not sure if this crash is related to sqlite after all. If it is memory corruption, then sqlite could just be the victim. I only got the signtool crash with a stack in sqlite once. But I have gotten plenty of others that were just from lib/jar like the ones I posted. These signtool crashes are fairly reproducible on my system. I am peppering jar_extract_mf with _heapchk() calls in hope that it will narrow down the problem. That function appears in all my signtool crash stacks so far.
I hit this assertion after JAR_parse_manifest overnight in signtool.

        hc = _heapchk();
        PORT_Assert(_HEAPOK == hc);

        status = JAR_parse_manifest 
           (jar, manifest, length, it->pathname, "url");

        hc = _heapchk();
        PORT_Assert(_HEAPOK == hc);
Summary: signtool crash on Windows in sqlite → Miscellaneous crashes in signtool on Windows
This last assert happened with the shared DB, ie. with sqlite. Here is the lat thing from output.log :

tools.sh: #6066: Listing signed files in jar (signtool -v)  - PASSED
tools.sh: Show who signed jar ------------------------------
signtool -w nojs.jar -d ../alicedir

Unfortunately, I don't think that is the real command that was executed. I think the script just lies and the real command was :
signtool -w nojs.jar -d sql:../alicedir

I tried to run it again, but it succeeded this time. I will add more assertions in JAR_parse_manifest .
I finally was able to obtain a better stack trace. I am now using the debug C runtime. I also had to add a lot more assertions in every function of jarver.c .

Here is the stack trace :

 	kernel32.dll!0000000076db2fda() 	
 	msvcr90d.dll!_heapchk()  Line 96 + 0x12 bytes	C
>	signtool.exe!jar_eat_line(int lines=0, int eating=1, char * data=0x000000000276a7d9, long * len=0x000000000012f2e0)  Line 1254 + 0x6 bytes	C
 	signtool.exe!jar_parse_any(JAR_ * jar=0x0000000002749a00, int type=3, JAR_Signer_ * signer=0x00000000027484a0, char * raw_manifest=0x000000000276a7a1, long length=491, const char * path=0x0000000002747290, const char * url=0x0000000140053744)  Line 737 + 0x1c bytes	C
 	signtool.exe!jar_parse_sf(JAR_ * jar=0x0000000002749a00, char * raw_manifest=0x000000000276a5e0, long length=491, const char * path=0x0000000002747290, const char * url=0x0000000140053744)  Line 449 + 0x3e bytes	C
 	signtool.exe!JAR_parse_manifest(JAR_ * jar=0x0000000002749a00, char * raw_manifest=0x000000000276a5e0, long length=491, const char * path=0x0000000002747290, const char * url=0x0000000140053744)  Line 203 + 0x23 bytes	C
 	signtool.exe!jar_extract_mf(JAR_ * jar=0x0000000002749a00, jarArch format=jarArchZip, PRFileDesc * fp=0x0000000002748120, char * ext=0x0000000140053738)  Line 763 + 0x2b bytes	C
 	signtool.exe!jar_extract_manifests(JAR_ * jar=0x0000000002749a00, jarArch format=jarArchZip, PRFileDesc * fp=0x0000000002748120)  Line 620 + 0x1a bytes	C
 	signtool.exe!JAR_pass_archive(JAR_ * jar=0x0000000002749a00, jarArch format=jarArchZip, char * filename=0x00000000002e9160, const char * url=0x0000000140052840)  Line 140 + 0x13 bytes	C
 	signtool.exe!VerifyJar(char * filename=0x00000000002e9160)  Line 74 + 0x1b bytes	C
 	signtool.exe!main(int argc=9, char * * argv=0x0000000002717b90)  Line 1007 + 0xc bytes	C
 	signtool.exe!__tmainCRTStartup()  Line 586 + 0x19 bytes	C
 	signtool.exe!mainCRTStartup()  Line 403	C
 	kernel32.dll!0000000076dd495d() 	
 	ntdll.dll!0000000076fd8791() 	


The _heapchk() assertion that was hit is just before the last return statement in jar_eat_line . I also had one on the entry of this function, and the heap was OK. So, the memory got corrupt within jar_eat_line.

I can't tell what the value of the raw_len argument passed in was because it has changed within the execution of that function, but the value of raw_manifest was

+		raw_manifest	0x000000000276a7a1 "SHA1-Digest: rMgZOq0YIjcx5XBk/3WwqOQpkAA="	char *
A cursory examination of jar_eat_lines shows that it is obviously bogus,
and will run right past the end of the input buffer. 

I do wonder why this is seen only on Windows and why the rest of the 
NSS developers are not experiencing this.  

Please prioritize this signtool work below the CRL cache indexing work.
Attached patch patch moved to bug 487007 (obsolete) — Splinter Review
I ran jarver.c through the "c beautifier" program (cb) to convert it to
NSS coding style.  The resultant patch is pretty big (no surprise).  

I haven't even tested this yet, other than to see that it still compiles,
but I'd like to commit this before we go fixing any real bugs.
Nelson,

re: comment 9,

I don't know we haven't seen this bug before. I have only ever seen this signtool crash on the Vista x64 OS at home. As far as I know, nobody else runs the QA on that platform. Perhaps I'm seeing it due to the behavior of the memory allocator and the ordering of the heap on Vista. Maybe the part of the memory that is corrupt is not reused on other platforms so the error goes undetected.

I would expect tools like purify to detect this type of error on any platform, but our QA only runs memory leak checking (which I think also checks for other errors) on the SSL tests, and not signtool. If our coverage for those tests was expanded to all the programs in all.sh, we probably would have found this problem before too.

Regarding your patch, I think you should wait until 3.12.3 is a go before committing it.
Priority: -- → P2
Target Milestone: --- → 3.12.4
I got a couple more signtool stacks today. This was running strictly with the trunk - no _heapchk() asserts, but with the debug memory allocator.

 	ntdll.dll!77ec094b() 	
 	[Frames below may be incorrect and/or missing, no symbols loaded for ntdll.dll]	
 	ntdll.dll!77e92c4b() 	
 	ntdll.dll!77e92ace() 	
 	ntdll.dll!77e8f945() 	
 	ntdll.dll!77e8fa88() 	
 	ntdll.dll!77e8f945() 	
 	ntdll.dll!77ef556f() 	
 	msvcr90d.dll!_realloc_base(void * pBlock=0x00000000, unsigned int newsize=1570024)  Line 323 + 0x17 bytes	C
 	9e547915()	
 	msvcr90d.dll!_nh_malloc_dbg_impl(unsigned int nSize=10, int nhFlag=0, int nBlockUse=1, const char * szFileName=0x00000000, int nLine=0, int * errno_tmp=0x0017f50c)  Line 239 + 0x19 bytes	C++
 	msvcr90d.dll!_nh_malloc_dbg(unsigned int nSize=10, int nhFlag=0, int nBlockUse=1, const char * szFileName=0x00000000, int nLine=0)  Line 296 + 0x1d bytes	C++
 	msvcr90d.dll!malloc(unsigned int nSize=10)  Line 56 + 0x15 bytes	C++
 	sqlite3.dll!sqlite3GenericMalloc(int n=10)  Line 15026 + 0xa bytes	C
 	sqlite3.dll!sqlite3MallocRaw(int n=10, int doMemManage=1)  Line 9003 + 0x9 bytes	C
 	sqlite3.dll!sqlite3StrNDup(const char * z=0x025b0db0, int n=9)  Line 9146 + 0xe bytes	C
 	sqlite3.dll!sqlite3NameFromToken(Token * pName=0x025ec894)  Line 41584 + 0x1a bytes	C
 	sqlite3.dll!sqlite3ExprListAppend(ExprList * pList=0x025b6088, Expr * pExpr=0x025ec880, Token * pName=0x025ec894)  Line 37487 + 0x9 bytes	C
 	sqlite3.dll!prepSelectStmt(Parse * pParse=0x0017f894, Select * p=0x025a9c98)  Line 52357 + 0x14 bytes	C
 	sqlite3.dll!sqlite3SelectResolve(Parse * pParse=0x0017f894, Select * p=0x025a9c98, NameContext * pOuterNC=0x00000000)  Line 53553 + 0xd bytes	C
 	sqlite3.dll!sqlite3Select(Parse * pParse=0x0017f894, Select * p=0x025a9c98, int eDest=4, int iParm=0, Select * pParent=0x00000000, int parentTab=0, int * pParentAgg=0x00000000, char * aff=0x00000000)  Line 53850 + 0xf bytes	C
 	sqlite3.dll!yy_reduce(yyParser * yypParser=0x025e8ee0, int yyruleno=104)  Line 61984 + 0x1c bytes	C
 	sqlite3.dll!sqlite3Parser(void * yyp=0x025e8ee0, int yymajor=1, Token yyminor={...}, Parse * pParse=0x0017f894)  Line 62806 + 0x12 bytes	C
 	sqlite3.dll!sqlite3RunParser(Parse * pParse=0x0017f894, const char * zSql=0x025ad170, char * * pzErrMsg=0x0017f960)  Line 63457 + 0x22 bytes	C
 	sqlite3.dll!sqlite3Prepare(sqlite3 * db=0x025a5ee8, const char * zSql=0x025ad170, int nBytes=-1, int saveSqlFlag=1, sqlite3_stmt * * ppStmt=0x0017f9c8, const char * * pzTail=0x00000000)  Line 50818 + 0x14 bytes	C
 	sqlite3.dll!sqlite3_prepare_v2(sqlite3 * db=0x025a5ee8, const char * zSql=0x025ad170, int nBytes=-1, sqlite3_stmt * * ppStmt=0x0017f9c8, const char * * pzTail=0x00000000)  Line 50939 + 0x1b bytes	C
 	softokn3.dll!sdb_FindObjectsInit(SDBStr * sdb=0x025a7af8, const CK_ATTRIBUTE * template=0x025d0bd0, unsigned long count=4, SDBFindStr * * find=0x0017fa20)  Line 707 + 0x15 bytes	C
 	softokn3.dll!sftkdb_FindObjectsInit(SFTKDBHandleStr * handle=0x025b4fb0, const CK_ATTRIBUTE * template=0x0017fb78, unsigned long count=4, SDBFindStr * * find=0x0017fa20)  Line 1220 + 0x18 bytes	C
 	softokn3.dll!sftk_searchDatabase(SFTKDBHandleStr * handle=0x025b4fb0, SFTKSearchResultsStr * search=0x025b5fc8, const CK_ATTRIBUTE * pTemplate=0x0017fb78, long ulCount=4)  Line 4140 + 0x15 bytes	C
 	softokn3.dll!sftk_searchTokenList(SFTKSlotStr * slot=0x025a2258, SFTKSearchResultsStr * search=0x025b5fc8, CK_ATTRIBUTE * pTemplate=0x0017fb78, long ulCount=4, int * tokenOnly=0x0017fa7c, int isLoggedIn=0)  Line 4265 + 0x15 bytes	C
 	softokn3.dll!NSC_FindObjectsInit(unsigned long hSession=16777217, CK_ATTRIBUTE * pTemplate=0x0017fb78, unsigned long ulCount=4)  Line 4318 + 0x1d bytes	C
 	nss3.dll!find_objects(NSSTokenStr * tok=0x025e40b8, nssSessionStr * sessionOpt=0x025f6370, CK_ATTRIBUTE * obj_template=0x0017fb78, unsigned long otsize=4, unsigned int maximumOpt=1, PRStatus * statusOpt=0x0017fbd4)  Line 335 + 0x17 bytes	C
 	nss3.dll!find_objects_by_template(NSSTokenStr * token=0x025e40b8, nssSessionStr * sessionOpt=0x025f6370, CK_ATTRIBUTE * obj_template=0x0017fb78, unsigned long otsize=4, unsigned int maximumOpt=1, PRStatus * statusOpt=0x0017fbd4)  Line 465 + 0x1d bytes	C
 	nss3.dll!nssToken_FindCertificateByIssuerAndSerialNumber(NSSTokenStr * token=0x025e40b8, nssSessionStr * sessionOpt=0x025f6370, NSSItemStr * issuer=0x0017fc0c, NSSItemStr * serial=0x0017fc20, nssTokenSearchType searchType=nssTokenSearchType_TokenOnly, PRStatus * statusOpt=0x0017fbd4)  Line 866 + 0x1b bytes	C
 	nss3.dll!nssTrustDomain_FindCertificateByIssuerAndSerialNumber(NSSTrustDomainStr * td=0x025f6288, NSSItemStr * issuer=0x0017fc0c, NSSItemStr * serial=0x0017fc20)  Line 818 + 0x1d bytes	C
 	nss3.dll!nssTrustDomain_FindCertificateByEncodedCertificate(NSSTrustDomainStr * td=0x025f6288, NSSItemStr * ber=0x0017fc5c)  Line 885 + 0x11 bytes	C
 	nss3.dll!NSSTrustDomain_FindCertificateByEncodedCertificate(NSSTrustDomainStr * td=0x025f6288, NSSItemStr * ber=0x0017fc5c)  Line 897 + 0xd bytes	C
 	nss3.dll!CERT_NewTempCertificate(NSSTrustDomainStr * handle=0x025f6288, SECItemStr * derCert=0x025f6b98, char * nickname=0x00000000, int isperm=0, int copyDER=1)  Line 380 + 0xd bytes	C
 	nss3.dll!CERT_ImportCerts(NSSTrustDomainStr * certdb=0x025f6288, SECCertUsageEnum usage=certUsageObjectSigner, unsigned int ncerts=1, SECItemStr * * derCerts=0x025f6e30, CERTCertificateStr * * * retCerts=0x0017fd4c, int keepCerts=0, int caOnly=0, char * nickname=0x00000000)  Line 2554 + 0x19 bytes	C
 	smime3.dll!sec_pkcs7_verify_signature(SEC_PKCS7ContentInfoStr * cinfo=0x025f6ac8, SECCertUsageEnum certusage=certUsageObjectSigner, SECItemStr * detached_digest=0x0017fde0, HASH_HashType digest_type=HASH_AlgSHA1, int keepcerts=0)  Line 1548 + 0x21 bytes	C
 	smime3.dll!SEC_PKCS7VerifyDetachedSignature(SEC_PKCS7ContentInfoStr * cinfo=0x025f6ac8, SECCertUsageEnum certusage=certUsageObjectSigner, SECItemStr * detached_digest=0x0017fde0, HASH_HashType digest_type=HASH_AlgSHA1, int keepcerts=0)  Line 1928 + 0x19 bytes	C
>	signtool.exe!jar_validate_pkcs7(JAR_ * jar=0x025b5630, JAR_Signer_ * signer=0x0276a8a8, char * data=0x025debe8, long length=1088)  Line 1651 + 0x13 bytes	C
 	signtool.exe!jar_parse_digital_signature(char * raw_manifest=0x025debe8, JAR_Signer_ * signer=0x0276a8a8, long length=1088, JAR_ * jar=0x025b5630)  Line 813 + 0x15 bytes	C
 	signtool.exe!jar_parse_sig(JAR_ * jar=0x025b5630, const char * path=0x0276a818, char * raw_manifest=0x025debe8, long length=1088)  Line 243 + 0x15 bytes	C
 	signtool.exe!JAR_parse_manifest(JAR_ * jar=0x025b5630, char * raw_manifest=0x025debe8, long length=1088, const char * path=0x0276a818, const char * url=0x0043efc4)  Line 180 + 0x15 bytes	C
 	signtool.exe!jar_extract_mf(JAR_ * jar=0x025b5630, jarArch format=jarArchZip, PRFileDesc * fp=0x0276a740, char * ext=0x0043efb0)  Line 735 + 0x1c bytes	C
 	signtool.exe!jar_extract_manifests(JAR_ * jar=0x025b5630, jarArch format=jarArchZip, PRFileDesc * fp=0x0276a740)  Line 615 + 0x16 bytes	C
 	signtool.exe!JAR_pass_archive(JAR_ * jar=0x025b5630, jarArch format=jarArchZip, char * filename=0x02761778, const char * url=0x0043e5d0)  Line 134 + 0x11 bytes	C
 	signtool.exe!JarWho(char * filename=0x02761778)  Line 305 + 0x14 bytes	C
 	signtool.exe!main(int argc=5, char * * argv=0x0259e940)  Line 1027 + 0xb bytes	C
 	signtool.exe!__tmainCRTStartup()  Line 586 + 0x19 bytes	C
 	signtool.exe!mainCRTStartup()  Line 403	C
 	kernel32.dll!7670e3f3() 	
 	ntdll.dll!77edcfed() 	
 	ntdll.dll!77edd1ff() 	

 	ntdll.dll!0000000077d04ea0() 	
 	[Frames below may be incorrect and/or missing, no symbols loaded for ntdll.dll]	
 	ntdll.dll!0000000077d655e7() 	
 	ntdll.dll!0000000077d6562a() 	
 	ntdll.dll!0000000077d66ee5() 	
 	ntdll.dll!0000000077cfecad() 	
 	ntdll.dll!0000000077cfe13d() 	
 	ntdll.dll!0000000077cfea57() 	
 	ntdll.dll!0000000077cfee41() 	
 	ntdll.dll!0000000077d66e97() 	
 	ntdll.dll!0000000077d679d6() 	
 	ntdll.dll!0000000077d69136() 	
 	ntdll.dll!0000000077d6aa44() 	
 	ntdll.dll!0000000077d13a57() 	
 	ntdll.dll!0000000077d091f7() 	
 	kernel32.dll!0000000077aeceaa() 	
 	msvcr90d.dll!_free_base(void * pBlock=0x000000000271a5b0)  Line 109 + 0x14 bytes	C
 	msvcr90d.dll!_free_dbg_nolock(void * pUserData=0x000000000271a5e0, int nBlockUse=1)  Line 1428	C++
 	msvcr90d.dll!_free_dbg(void * pUserData=0x000000000271a5e0, int nBlockUse=1)  Line 1258 + 0xe bytes	C++
>	msvcr90d.dll!free(void * pUserData=0x000000000271a5e0)  Line 50	C++
 	libnspr4.dll!PR_Free(void * ptr=0x000000000271a5e0)  Line 538	C
 	nssutil3.dll!PORT_Free_Util(void * ptr=0x000000000271a5e0)  Line 154	C
 	signtool.exe!jar_extract_mf(JAR_ * jar=0x00000000026f9a00, jarArch format=jarArchZip, PRFileDesc * fp=0x00000000026f8120, char * ext=0x00000001400506a4)  Line 739	C
 	signtool.exe!jar_extract_manifests(JAR_ * jar=0x00000000026f9a00, jarArch format=jarArchZip, PRFileDesc * fp=0x00000000026f8120)  Line 611 + 0x1a bytes	C
 	signtool.exe!JAR_pass_archive(JAR_ * jar=0x00000000026f9a00, jarArch format=jarArchZip, char * filename=0x0000000000d49160, const char * url=0x000000014004f840)  Line 134 + 0x13 bytes	C
 	signtool.exe!VerifyJar(char * filename=0x0000000000d49160)  Line 74 + 0x1b bytes	C
 	signtool.exe!main(int argc=9, char * * argv=0x00000000026c7b90)  Line 1007 + 0xc bytes	C
 	signtool.exe!__tmainCRTStartup()  Line 586 + 0x19 bytes	C
 	signtool.exe!mainCRTStartup()  Line 403	C
 	kernel32.dll!0000000077ae495d() 	
 	ntdll.dll!0000000077ce8791() 	

I don't know if it's helpful to look at them before we fix the jar_eat_lines issues. I'm just recording them in case doing that doesn't solve the whole issue.
This patch depends on the patch for bug 487007 being applied first.

Obviously, the standard test setups aren't adequate for testing signtool,
since it only fails for Julien, and not in tinderbox or on other platforms.
So, I cannot say that I have adequately testing this patch, or the patch 
for bug 487007, because I cannot reproduce any of the failures, so I do not
know that this patch (or the patch for bug 487007) actually fix anything.

Julien, I'd appreciate it if you would test these patches on your Win64 box,
the one for bug 487007 first, and then this one.

I won't ask for review for this patch until the patch for bug 487007 is resolved, due to the dependency.
Attachment #370754 - Attachment is obsolete: true
Attachment #371202 - Attachment is patch: true
My patch contains a comment that says:

>      It skips right past NUL characters, so it might
> *    go off the end of the world.

That was true before my patch.  I put that comment in before I started
working on the patch.  I believe the patch corrects that problem, 
assuming that the callers pass correct values of the *len argument.
So, I will remove that comment before committing.
Summary: Miscellaneous crashes in signtool on Windows → Miscellaneous crashes in signtool on Windows 64
Nelson,

I will test your patch on my system.

Re: the change of description, even though I have only seen the problem on my Vista 64 system, the same problems have happened both with my builds targeted for 32-bits or 64-bits. You need only browse this bug to find that half the stacks are 32 bits and the others 64 bits. So, I don't think the root cause of these problems is specific to Win64.
Your patch caused all of my 4 concurrent QAs (32 and 64 bit, OPT + DBG) to spin in an infinite loop in their respective version of signtool.exe .

Here is the stack of one looping process. This one never returns from jar_parse_any :

>	signtool.exe!jar_parse_any(JAR_ * jar=0x00000000028275d0, int type=2, JAR_Signer_ * signer=0x0000000000000000, char * raw_manifest=0x00000000028278e0, long length=383, const char * path=0x0000000002827040, const char * url=0x000000014004f6bc)  Line 474 + 0x17 bytes	C
 	signtool.exe!jar_parse_mf(JAR_ * jar=0x00000000028275d0, char * raw_manifest=0x0000000002827760, long length=383, const char * path=0x0000000002827040, const char * url=0x000000014004f6bc)  Line 252	C
 	signtool.exe!JAR_parse_manifest(JAR_ * jar=0x00000000028275d0, char * raw_manifest=0x0000000002827760, long length=383, const char * path=0x0000000002827040, const char * url=0x000000014004f6bc)  Line 161 + 0x23 bytes	C
 	signtool.exe!jar_extract_mf(JAR_ * jar=0x00000000028275d0, jarArch format=jarArchZip, PRFileDesc * fp=0x000000000274e790, char * ext=0x000000014004f6a0)  Line 627 + 0x2b bytes	C
 	signtool.exe!jar_extract_manifests(JAR_ * jar=0x00000000028275d0, jarArch format=jarArchZip, PRFileDesc * fp=0x000000000274e790)  Line 515 + 0x1a bytes	C
 	signtool.exe!JAR_pass_archive(JAR_ * jar=0x00000000028275d0, jarArch format=jarArchZip, char * filename=0x0000000002746d20, const char * url=0x000000014004e840)  Line 140 + 0x13 bytes	C
 	signtool.exe!VerifyJar(char * filename=0x0000000002746d20)  Line 74 + 0x1b bytes	C
 	signtool.exe!main(int argc=9, char * * argv=0x0000000002807960)  Line 1007 + 0xc bytes	C

Another one :

>	signtool.exe!jar_digest_section(char * manifest=0x02692d0a, long length=125)  Line 828 + 0x11 bytes	C
 	signtool.exe!jar_parse_any(JAR_ * jar=0x0268fff0, int type=2, JAR_Signer_ * signer=0x00000000, char * raw_manifest=0x02692d09, long length=383, const char * path=0x02628220, const char * url=0x0043efc4)  Line 466 + 0x13 bytes	C
 	signtool.exe!jar_parse_mf(JAR_ * jar=0x0268fff0, char * raw_manifest=0x02692c08, long length=383, const char * path=0x02628220, const char * url=0x0043efc4)  Line 251 + 0x1d bytes	C
 	signtool.exe!JAR_parse_manifest(JAR_ * jar=0x0268fff0, char * raw_manifest=0x02692c08, long length=383, const char * path=0x02628220, const char * url=0x0043efc4)  Line 161 + 0x19 bytes	C
 	signtool.exe!jar_extract_mf(JAR_ * jar=0x0268fff0, jarArch format=jarArchZip, PRFileDesc * fp=0x026281e0, char * ext=0x0043efa8)  Line 627 + 0x1c bytes	C
 	signtool.exe!jar_extract_manifests(JAR_ * jar=0x0268fff0, jarArch format=jarArchZip, PRFileDesc * fp=0x026281e0)  Line 515 + 0x16 bytes	C
 	signtool.exe!JAR_pass_archive(JAR_ * jar=0x0268fff0, jarArch format=jarArchZip, char * filename=0x0267e8f8, const char * url=0x0043e210)  Line 140 + 0x11 bytes	C
 	signtool.exe!VerifyJar(char * filename=0x0267e8f8)  Line 74 + 0x14 bytes	C
 	signtool.exe!main(int argc=9, char * * argv=0x0267e5b8)  Line 1007 + 0xb bytes	C
 	signtool.exe!__tmainCRTStartup()  Line 586 + 0x17 bytes	C

This one never returns from jar_digest_section.
Julien,  Did you test this patch on top of the patch for bug 487007? 
If so, there is something truly bizarre about your system. 

I tested these patches on Win32 with ordinary debug builds and the 
USE_DEBUG_RTL builds.  tools.sh (the only part of all.sh that uses signtool) 
passes with them all.  No loops, no crashes, nothing odd.  

I wonder what is SO different about your system.
Julien, please add a comment to this bug specifying the exact command line
that led to the behavior you described in comment 16 (infinite loops).
If that command was operating on an existing JAR file, please attach that
JAR file to this bug.  I'd like to see how that command with that JAR file
behaves on my machines.
Attachment #371202 - Attachment description: Patch v1, fix jar_eat_line, depends on patch for bug → Patch v1, fix jar_eat_line, patch for bug 487007 must be applied first
I think no one but Julien can get to the bottom of why the behavior on 
his system is so different than other systems.
Assignee: nobody → julien.pierre.boogz
Hardware: x86 → x86_64
Nelson,

Re: comment 16, yes, I applied both patches. I was not running with USE_DEBUG_RTL this time. I have some other major problems with USE_DEBUG_RTL (pk12util) which I filed bugs about. I am running all.sh in a loop, not just tools.sh . I will try running just tools.sh next time. The only thing that I know is different about my system is that it's running Vista x64, AFAIK. It also does have a large amount of RAM (8 GB physical, + 16 GB pagefile) which could account for addresses and the memory allocation pattern being different than on other machines.

Since this is considered low priority, I don't know when we will get to the bottom of this.

One thing I would suggest however is that we setup a Windows server 2008 machine for QA in the office. Its kernel is supposed to share much of the same characteristics as Vista. We might be able to reproduce the failures that way.

Also, it may be worth considering having us do nightly builds and QA with USE_DEBUG_RTL=1, since that would be a relatively inexpensive way to get better coverage for all our tools - without any surgery needed to our QA scripts to run the rest of the tools under Purify / valgrind. Perhaps the Windows tinderboxes should do that, or we should have additional tinderboxes for USE_DEBUG_RTL . The  only concern is that I'm not sure if the pop-ups from the debug RTL would be trapped by the OS or not.
QA Contact: tools → slavomir.katuscak
Nelson,

Re: comment 18, I will attach the command line next time I am home. I must tell you however that I have tried to run the same command against the same JAR file again previously with the same binaries, and it did not fail in the same way - it ran just fine. The bug is not reproducible every time. Maybe there are some uninitialized variables in signtool / libjar that are causing this random behavior.
Nelson,

Another reason why your test may have passed and mine did not - my machine runs all the different passes in all.sh, which is the default. Ie. regular, PKIX, database upgrade, shared DB. I think the problems I have seen in signtool were usually in combination with the shared DB. I have deleted most of my old mozilla/test_results  because they filled too much space (30 + GB !), so I can't look back. I will create another partition on my 1 TB drive so I don't have to delete the old logs. Right now I only have a single C: 100 GB partition.
Nevertheless, the problem does not happen every single time even on my machine. That's why I run all.sh in a loop on it - 4 concurrent loops actually - it is a quad-core box so it can handle it. But usually within a 24 hour period, the signtool problem will show up at least a couple of times. In fact typically all 4 concurrent instances end up hitting the problem at some point, and when I wake up, or come back home from the office, the machine has gone into sleep mode because it was idle due to the 4 concurrent debugger prompts.
Well, it's officially not just happening on my machine anymore. The signtool failure just happened in tinderbox on goride , Win32 optimized .

See :
http://tinderbox.mozilla.org/showlog.cgi?log=NSS/1240011438.1240024237.13994.gz&fulltext=1
Attachment #370754 - Attachment is obsolete: false
Attachment #370754 - Flags: review?(julien.pierre.boogz)
Comment on attachment 370754 [details] [diff] [review]
patch moved to bug 487007

I don't know how this patch got marked obsolete.  I've been running it for days, and I want to commit it.
Comment on attachment 370754 [details] [diff] [review]
patch moved to bug 487007

Never mind.  I see now that I moved this patch to another bug.  

But don't ignore this patch, It blocks the other patch to this bug.
Attachment #370754 - Attachment description: convert jarver.c to NSS coding style → patch moved to bug 487007
Attachment #370754 - Attachment is obsolete: true
Attachment #370754 - Flags: review?(julien.pierre.boogz)
Ah HA! 
I build NSS with a different libc, with a very different heap (jemalloc),
and then, the first time I tried to test this program, I ran into the 
infinite loop.  So, I have developed a newer patch that definitely fixes
that particular problem.  I will do more testing so see if any other 
problems are found.
OK, there was an off-by-one error in that last patch.  :(

This code had several problems, some of which were fixed by my previous
patch (despite the off-by-one error).  But there were also additional 
prlblems with the *callers* of jar_eat_line.  All but one of them assumed 
that jar_eat_line always returned the address of a character in the buffer, and that it was valid to look at that character and try to parse the "line" 
of which it was the first character.  

But every buffer has an end, and when jar_eat_line came to the end of it, 
it would return the address of the first character past the end of the 
buffer, and would set the number of characters remaining to zero.  That 
wasn't wrong, but only one of the callers ever checked that the count of 
remaining characters was greater than zero before dereferencing that 
pointer.  So, at the end of the buffer, they'd go right on into oblivion,
parsing characters past the end of the buffer.  

This was OK most of the time, because that address, just past the end of 
the buffer would normally be a valid address, and most of the time would
contain a value that was NOT valid as the first character on the next line.
But sometimes, that character either (a) on a non-existent data page, or 
(b) a character that looked like a valid line header, causing the caller
to go right on looking for characters until it ran into trouble.

So, the solution was to change all but one of the callers to check the 
count of valid characters remaining (characters on this line whose starting
address has been returned, and not start parsing the line unless that count
is greater than zero.  This patch does that.  

This patch also fixes the off-by-one error, and corrects the end-of-line
detection code.
Attachment #373543 - Flags: review?(julien.pierre.boogz)
Attachment #371202 - Attachment is obsolete: true
Thanks, Nelson. I will test your patch.
Summary: Miscellaneous crashes in signtool on Windows 64 → Miscellaneous crashes in signtool on Windows
I have been testing with attachment 373543 [details] [diff] [review] and its dependency for about 12 hours, with four all.sh looping in parallel, and so far so good - no signtool crashes, infinite loops, or failures. I'll let it run some more.
Comment on attachment 373543 [details] [diff] [review]
Patch v2, fix jar_eat_line and its callers (checked in)

This patch still applies cleanly to the trunk.
Attachment #373543 - Flags: review?(rrelyea)
Comment on attachment 373543 [details] [diff] [review]
Patch v2, fix jar_eat_line and its callers (checked in)

r+.

My only comment is about the comment... Your new comment says:

"if "lines" is non-zero, it reads and discards that many lines from the input.  It skips right past NULL characters, so it might go off the end of the world."

It seems to me more appropriate to say:

"if "lines" is non-zero, it reads and discards that many lines from the input. data is presumed to be of length *len and is not considered NULL terminated (imbedded NULL are treated as new lines)."

Or something similiar. It's pretty clear from the code and callers that the buffer is a buf/len rather than a null terminated buffer.

bob
Attachment #373543 - Flags: review?(rrelyea) → review+
Comment on attachment 373543 [details] [diff] [review]
Patch v2, fix jar_eat_line and its callers (checked in)

I modified the comment as Bob suggested.
Checking in jarver.c; new revision: 1.17; previous revision: 1.16
Attachment #373543 - Attachment description: Patch v2, fix jar_eat_line and its callers → Patch v2, fix jar_eat_line and its callers (checked in)
I'm marking this fixed. If I see the problem again I will reopen.
Thanks, Nelson.
Status: NEW → RESOLVED
Closed: 15 years ago
Resolution: --- → FIXED
Attachment #373543 - Flags: review?(julien.pierre.boogz)
You need to log in before you can comment on or make changes to this bug.

Attachment

General

Created:
Updated:
Size: